diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/rocksdb/db/compaction/compaction_picker_level.cc | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rocksdb/db/compaction/compaction_picker_level.cc')
-rw-r--r-- | src/rocksdb/db/compaction/compaction_picker_level.cc | 841 |
1 files changed, 841 insertions, 0 deletions
diff --git a/src/rocksdb/db/compaction/compaction_picker_level.cc b/src/rocksdb/db/compaction/compaction_picker_level.cc new file mode 100644 index 000000000..b689b6add --- /dev/null +++ b/src/rocksdb/db/compaction/compaction_picker_level.cc @@ -0,0 +1,841 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/compaction/compaction_picker_level.h" + +#include <string> +#include <utility> +#include <vector> + +#include "db/version_edit.h" +#include "logging/log_buffer.h" +#include "test_util/sync_point.h" + +namespace ROCKSDB_NAMESPACE { + +bool LevelCompactionPicker::NeedsCompaction( + const VersionStorageInfo* vstorage) const { + if (!vstorage->ExpiredTtlFiles().empty()) { + return true; + } + if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) { + return true; + } + if (!vstorage->BottommostFilesMarkedForCompaction().empty()) { + return true; + } + if (!vstorage->FilesMarkedForCompaction().empty()) { + return true; + } + if (!vstorage->FilesMarkedForForcedBlobGC().empty()) { + return true; + } + for (int i = 0; i <= vstorage->MaxInputLevel(); i++) { + if (vstorage->CompactionScore(i) >= 1) { + return true; + } + } + return false; +} + +namespace { +// A class to build a leveled compaction step-by-step. +class LevelCompactionBuilder { + public: + LevelCompactionBuilder(const std::string& cf_name, + VersionStorageInfo* vstorage, + SequenceNumber earliest_mem_seqno, + CompactionPicker* compaction_picker, + LogBuffer* log_buffer, + const MutableCFOptions& mutable_cf_options, + const ImmutableOptions& ioptions, + const MutableDBOptions& mutable_db_options) + : cf_name_(cf_name), + vstorage_(vstorage), + earliest_mem_seqno_(earliest_mem_seqno), + compaction_picker_(compaction_picker), + log_buffer_(log_buffer), + mutable_cf_options_(mutable_cf_options), + ioptions_(ioptions), + mutable_db_options_(mutable_db_options) {} + + // Pick and return a compaction. + Compaction* PickCompaction(); + + // Pick the initial files to compact to the next level. (or together + // in Intra-L0 compactions) + void SetupInitialFiles(); + + // If the initial files are from L0 level, pick other L0 + // files if needed. + bool SetupOtherL0FilesIfNeeded(); + + // Compaction with round-robin compaction priority allows more files to be + // picked to form a large compaction + void SetupOtherFilesWithRoundRobinExpansion(); + // Based on initial files, setup other files need to be compacted + // in this compaction, accordingly. + bool SetupOtherInputsIfNeeded(); + + Compaction* GetCompaction(); + + // For the specfied level, pick a file that we want to compact. + // Returns false if there is no file to compact. + // If it returns true, inputs->files.size() will be exactly one for + // all compaction priorities except round-robin. For round-robin, + // multiple consecutive files may be put into inputs->files. + // If level is 0 and there is already a compaction on that level, this + // function will return false. + bool PickFileToCompact(); + + // Return true if a L0 trivial move is picked up. + bool TryPickL0TrivialMove(); + + // For L0->L0, picks the longest span of files that aren't currently + // undergoing compaction for which work-per-deleted-file decreases. The span + // always starts from the newest L0 file. + // + // Intra-L0 compaction is independent of all other files, so it can be + // performed even when L0->base_level compactions are blocked. + // + // Returns true if `inputs` is populated with a span of files to be compacted; + // otherwise, returns false. + bool PickIntraL0Compaction(); + + // Return true if TrivialMove is extended. `start_index` is the index of + // the intiial file picked, which should already be in `start_level_inputs_`. + bool TryExtendNonL0TrivialMove(int start_index); + + // Picks a file from level_files to compact. + // level_files is a vector of (level, file metadata) in ascending order of + // level. If compact_to_next_level is true, compact the file to the next + // level, otherwise, compact to the same level as the input file. + void PickFileToCompact( + const autovector<std::pair<int, FileMetaData*>>& level_files, + bool compact_to_next_level); + + const std::string& cf_name_; + VersionStorageInfo* vstorage_; + SequenceNumber earliest_mem_seqno_; + CompactionPicker* compaction_picker_; + LogBuffer* log_buffer_; + int start_level_ = -1; + int output_level_ = -1; + int parent_index_ = -1; + int base_index_ = -1; + double start_level_score_ = 0; + bool is_manual_ = false; + bool is_l0_trivial_move_ = false; + CompactionInputFiles start_level_inputs_; + std::vector<CompactionInputFiles> compaction_inputs_; + CompactionInputFiles output_level_inputs_; + std::vector<FileMetaData*> grandparents_; + CompactionReason compaction_reason_ = CompactionReason::kUnknown; + + const MutableCFOptions& mutable_cf_options_; + const ImmutableOptions& ioptions_; + const MutableDBOptions& mutable_db_options_; + // Pick a path ID to place a newly generated file, with its level + static uint32_t GetPathId(const ImmutableCFOptions& ioptions, + const MutableCFOptions& mutable_cf_options, + int level); + + static const int kMinFilesForIntraL0Compaction = 4; +}; + +void LevelCompactionBuilder::PickFileToCompact( + const autovector<std::pair<int, FileMetaData*>>& level_files, + bool compact_to_next_level) { + for (auto& level_file : level_files) { + // If it's being compacted it has nothing to do here. + // If this assert() fails that means that some function marked some + // files as being_compacted, but didn't call ComputeCompactionScore() + assert(!level_file.second->being_compacted); + start_level_ = level_file.first; + if ((compact_to_next_level && + start_level_ == vstorage_->num_non_empty_levels() - 1) || + (start_level_ == 0 && + !compaction_picker_->level0_compactions_in_progress()->empty())) { + continue; + } + if (compact_to_next_level) { + output_level_ = + (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; + } else { + output_level_ = start_level_; + } + start_level_inputs_.files = {level_file.second}; + start_level_inputs_.level = start_level_; + if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &start_level_inputs_)) { + return; + } + } + start_level_inputs_.files.clear(); +} + +void LevelCompactionBuilder::SetupInitialFiles() { + // Find the compactions by size on all levels. + bool skipped_l0_to_base = false; + for (int i = 0; i < compaction_picker_->NumberLevels() - 1; i++) { + start_level_score_ = vstorage_->CompactionScore(i); + start_level_ = vstorage_->CompactionScoreLevel(i); + assert(i == 0 || start_level_score_ <= vstorage_->CompactionScore(i - 1)); + if (start_level_score_ >= 1) { + if (skipped_l0_to_base && start_level_ == vstorage_->base_level()) { + // If L0->base_level compaction is pending, don't schedule further + // compaction from base level. Otherwise L0->base_level compaction + // may starve. + continue; + } + output_level_ = + (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1; + if (PickFileToCompact()) { + // found the compaction! + if (start_level_ == 0) { + // L0 score = `num L0 files` / `level0_file_num_compaction_trigger` + compaction_reason_ = CompactionReason::kLevelL0FilesNum; + } else { + // L1+ score = `Level files size` / `MaxBytesForLevel` + compaction_reason_ = CompactionReason::kLevelMaxLevelSize; + } + break; + } else { + // didn't find the compaction, clear the inputs + start_level_inputs_.clear(); + if (start_level_ == 0) { + skipped_l0_to_base = true; + // L0->base_level may be blocked due to ongoing L0->base_level + // compactions. It may also be blocked by an ongoing compaction from + // base_level downwards. + // + // In these cases, to reduce L0 file count and thus reduce likelihood + // of write stalls, we can attempt compacting a span of files within + // L0. + if (PickIntraL0Compaction()) { + output_level_ = 0; + compaction_reason_ = CompactionReason::kLevelL0FilesNum; + break; + } + } + } + } else { + // Compaction scores are sorted in descending order, no further scores + // will be >= 1. + break; + } + } + if (!start_level_inputs_.empty()) { + return; + } + + // if we didn't find a compaction, check if there are any files marked for + // compaction + parent_index_ = base_index_ = -1; + + compaction_picker_->PickFilesMarkedForCompaction( + cf_name_, vstorage_, &start_level_, &output_level_, &start_level_inputs_); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kFilesMarkedForCompaction; + return; + } + + // Bottommost Files Compaction on deleting tombstones + PickFileToCompact(vstorage_->BottommostFilesMarkedForCompaction(), false); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kBottommostFiles; + return; + } + + // TTL Compaction + if (ioptions_.compaction_pri == kRoundRobin && + !vstorage_->ExpiredTtlFiles().empty()) { + auto expired_files = vstorage_->ExpiredTtlFiles(); + // the expired files list should already be sorted by level + start_level_ = expired_files.front().first; +#ifndef NDEBUG + for (const auto& file : expired_files) { + assert(start_level_ <= file.first); + } +#endif + if (start_level_ > 0) { + output_level_ = start_level_ + 1; + if (PickFileToCompact()) { + compaction_reason_ = CompactionReason::kRoundRobinTtl; + return; + } + } + } + + PickFileToCompact(vstorage_->ExpiredTtlFiles(), true); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kTtl; + return; + } + + // Periodic Compaction + PickFileToCompact(vstorage_->FilesMarkedForPeriodicCompaction(), false); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kPeriodicCompaction; + return; + } + + // Forced blob garbage collection + PickFileToCompact(vstorage_->FilesMarkedForForcedBlobGC(), false); + if (!start_level_inputs_.empty()) { + compaction_reason_ = CompactionReason::kForcedBlobGC; + return; + } +} + +bool LevelCompactionBuilder::SetupOtherL0FilesIfNeeded() { + if (start_level_ == 0 && output_level_ != 0 && !is_l0_trivial_move_) { + return compaction_picker_->GetOverlappingL0Files( + vstorage_, &start_level_inputs_, output_level_, &parent_index_); + } + return true; +} + +void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() { + // We only expand when the start level is not L0 under round robin + assert(start_level_ >= 1); + + // For round-robin compaction priority, we have 3 constraints when picking + // multiple files. + // Constraint 1: We can only pick consecutive files + // -> Constraint 1a: When a file is being compacted (or some input files + // are being compacted after expanding, we cannot + // choose it and have to stop choosing more files + // -> Constraint 1b: When we reach the last file (with largest keys), we + // cannot choose more files (the next file will be the + // first one) + // Constraint 2: We should ensure the total compaction bytes (including the + // overlapped files from the next level) is no more than + // mutable_cf_options_.max_compaction_bytes + // Constraint 3: We try our best to pick as many files as possible so that + // the post-compaction level size is less than + // MaxBytesForLevel(start_level_) + // Constraint 4: We do not expand if it is possible to apply a trivial move + // Constraint 5 (TODO): Try to pick minimal files to split into the target + // number of subcompactions + TEST_SYNC_POINT("LevelCompactionPicker::RoundRobin"); + + // Only expand the inputs when we have selected a file in start_level_inputs_ + if (start_level_inputs_.size() == 0) return; + + uint64_t start_lvl_bytes_no_compacting = 0; + uint64_t curr_bytes_to_compact = 0; + uint64_t start_lvl_max_bytes_to_compact = 0; + const std::vector<FileMetaData*>& level_files = + vstorage_->LevelFiles(start_level_); + // Constraint 3 (pre-calculate the ideal max bytes to compact) + for (auto f : level_files) { + if (!f->being_compacted) { + start_lvl_bytes_no_compacting += f->fd.GetFileSize(); + } + } + if (start_lvl_bytes_no_compacting > + vstorage_->MaxBytesForLevel(start_level_)) { + start_lvl_max_bytes_to_compact = start_lvl_bytes_no_compacting - + vstorage_->MaxBytesForLevel(start_level_); + } + + size_t start_index = vstorage_->FilesByCompactionPri(start_level_)[0]; + InternalKey smallest, largest; + // Constraint 4 (No need to check again later) + compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest); + CompactionInputFiles output_level_inputs; + output_level_inputs.level = output_level_; + vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, + &output_level_inputs.files); + if (output_level_inputs.empty()) { + if (TryExtendNonL0TrivialMove((int)start_index)) { + return; + } + } + // Constraint 3 + if (start_level_inputs_[0]->fd.GetFileSize() >= + start_lvl_max_bytes_to_compact) { + return; + } + CompactionInputFiles tmp_start_level_inputs; + tmp_start_level_inputs = start_level_inputs_; + // TODO (zichen): Future parallel round-robin may also need to update this + // Constraint 1b (only expand till the end) + for (size_t i = start_index + 1; i < level_files.size(); i++) { + auto* f = level_files[i]; + if (f->being_compacted) { + // Constraint 1a + return; + } + + tmp_start_level_inputs.files.push_back(f); + if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &tmp_start_level_inputs) || + compaction_picker_->FilesRangeOverlapWithCompaction( + {tmp_start_level_inputs}, output_level_, + Compaction::EvaluatePenultimateLevel( + vstorage_, ioptions_, start_level_, output_level_))) { + // Constraint 1a + tmp_start_level_inputs.clear(); + return; + } + + curr_bytes_to_compact = 0; + for (auto start_lvl_f : tmp_start_level_inputs.files) { + curr_bytes_to_compact += start_lvl_f->fd.GetFileSize(); + } + + // Check whether any output level files are locked + compaction_picker_->GetRange(tmp_start_level_inputs, &smallest, &largest); + vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, + &output_level_inputs.files); + if (!output_level_inputs.empty() && + !compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &output_level_inputs)) { + // Constraint 1a + tmp_start_level_inputs.clear(); + return; + } + + uint64_t start_lvl_curr_bytes_to_compact = curr_bytes_to_compact; + for (auto output_lvl_f : output_level_inputs.files) { + curr_bytes_to_compact += output_lvl_f->fd.GetFileSize(); + } + if (curr_bytes_to_compact > mutable_cf_options_.max_compaction_bytes) { + // Constraint 2 + tmp_start_level_inputs.clear(); + return; + } + + start_level_inputs_.files = tmp_start_level_inputs.files; + // Constraint 3 + if (start_lvl_curr_bytes_to_compact > start_lvl_max_bytes_to_compact) { + return; + } + } +} + +bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() { + // Setup input files from output level. For output to L0, we only compact + // spans of files that do not interact with any pending compactions, so don't + // need to consider other levels. + if (output_level_ != 0) { + output_level_inputs_.level = output_level_; + bool round_robin_expanding = + ioptions_.compaction_pri == kRoundRobin && + compaction_reason_ == CompactionReason::kLevelMaxLevelSize; + if (round_robin_expanding) { + SetupOtherFilesWithRoundRobinExpansion(); + } + if (!is_l0_trivial_move_ && + !compaction_picker_->SetupOtherInputs( + cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs_, + &output_level_inputs_, &parent_index_, base_index_, + round_robin_expanding)) { + return false; + } + + compaction_inputs_.push_back(start_level_inputs_); + if (!output_level_inputs_.empty()) { + compaction_inputs_.push_back(output_level_inputs_); + } + + if (!is_l0_trivial_move_) { + // In some edge cases we could pick a compaction that will be compacting + // a key range that overlap with another running compaction, and both + // of them have the same output level. This could happen if + // (1) we are running a non-exclusive manual compaction + // (2) AddFile ingest a new file into the LSM tree + // We need to disallow this from happening. + if (compaction_picker_->FilesRangeOverlapWithCompaction( + compaction_inputs_, output_level_, + Compaction::EvaluatePenultimateLevel( + vstorage_, ioptions_, start_level_, output_level_))) { + // This compaction output could potentially conflict with the output + // of a currently running compaction, we cannot run it. + return false; + } + compaction_picker_->GetGrandparents(vstorage_, start_level_inputs_, + output_level_inputs_, &grandparents_); + } + } else { + compaction_inputs_.push_back(start_level_inputs_); + } + return true; +} + +Compaction* LevelCompactionBuilder::PickCompaction() { + // Pick up the first file to start compaction. It may have been extended + // to a clean cut. + SetupInitialFiles(); + if (start_level_inputs_.empty()) { + return nullptr; + } + assert(start_level_ >= 0 && output_level_ >= 0); + + // If it is a L0 -> base level compaction, we need to set up other L0 + // files if needed. + if (!SetupOtherL0FilesIfNeeded()) { + return nullptr; + } + + // Pick files in the output level and expand more files in the start level + // if needed. + if (!SetupOtherInputsIfNeeded()) { + return nullptr; + } + + // Form a compaction object containing the files we picked. + Compaction* c = GetCompaction(); + + TEST_SYNC_POINT_CALLBACK("LevelCompactionPicker::PickCompaction:Return", c); + + return c; +} + +Compaction* LevelCompactionBuilder::GetCompaction() { + auto c = new Compaction( + vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_, + std::move(compaction_inputs_), output_level_, + MaxFileSizeForLevel(mutable_cf_options_, output_level_, + ioptions_.compaction_style, vstorage_->base_level(), + ioptions_.level_compaction_dynamic_level_bytes), + mutable_cf_options_.max_compaction_bytes, + GetPathId(ioptions_, mutable_cf_options_, output_level_), + GetCompressionType(vstorage_, mutable_cf_options_, output_level_, + vstorage_->base_level()), + GetCompressionOptions(mutable_cf_options_, vstorage_, output_level_), + Temperature::kUnknown, + /* max_subcompactions */ 0, std::move(grandparents_), is_manual_, + /* trim_ts */ "", start_level_score_, false /* deletion_compaction */, + /* l0_files_might_overlap */ start_level_ == 0 && !is_l0_trivial_move_, + compaction_reason_); + + // If it's level 0 compaction, make sure we don't execute any other level 0 + // compactions in parallel + compaction_picker_->RegisterCompaction(c); + + // Creating a compaction influences the compaction score because the score + // takes running compactions into account (by skipping files that are already + // being compacted). Since we just changed compaction score, we recalculate it + // here + vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_); + return c; +} + +/* + * Find the optimal path to place a file + * Given a level, finds the path where levels up to it will fit in levels + * up to and including this path + */ +uint32_t LevelCompactionBuilder::GetPathId( + const ImmutableCFOptions& ioptions, + const MutableCFOptions& mutable_cf_options, int level) { + uint32_t p = 0; + assert(!ioptions.cf_paths.empty()); + + // size remaining in the most recent path + uint64_t current_path_size = ioptions.cf_paths[0].target_size; + + uint64_t level_size; + int cur_level = 0; + + // max_bytes_for_level_base denotes L1 size. + // We estimate L0 size to be the same as L1. + level_size = mutable_cf_options.max_bytes_for_level_base; + + // Last path is the fallback + while (p < ioptions.cf_paths.size() - 1) { + if (level_size <= current_path_size) { + if (cur_level == level) { + // Does desired level fit in this path? + return p; + } else { + current_path_size -= level_size; + if (cur_level > 0) { + if (ioptions.level_compaction_dynamic_level_bytes) { + // Currently, level_compaction_dynamic_level_bytes is ignored when + // multiple db paths are specified. https://github.com/facebook/ + // rocksdb/blob/main/db/column_family.cc. + // Still, adding this check to avoid accidentally using + // max_bytes_for_level_multiplier_additional + level_size = static_cast<uint64_t>( + level_size * mutable_cf_options.max_bytes_for_level_multiplier); + } else { + level_size = static_cast<uint64_t>( + level_size * mutable_cf_options.max_bytes_for_level_multiplier * + mutable_cf_options.MaxBytesMultiplerAdditional(cur_level)); + } + } + cur_level++; + continue; + } + } + p++; + current_path_size = ioptions.cf_paths[p].target_size; + } + return p; +} + +bool LevelCompactionBuilder::TryPickL0TrivialMove() { + if (vstorage_->base_level() <= 0) { + return false; + } + if (start_level_ == 0 && mutable_cf_options_.compression_per_level.empty() && + !vstorage_->LevelFiles(output_level_).empty() && + ioptions_.db_paths.size() <= 1) { + // Try to pick trivial move from L0 to L1. We start from the oldest + // file. We keep expanding to newer files if it would form a + // trivial move. + // For now we don't support it with + // mutable_cf_options_.compression_per_level to prevent the logic + // of determining whether L0 can be trivial moved to the next level. + // We skip the case where output level is empty, since in this case, at + // least the oldest file would qualify for trivial move, and this would + // be a surprising behavior with few benefits. + + // We search from the oldest file from the newest. In theory, there are + // files in the middle can form trivial move too, but it is probably + // uncommon and we ignore these cases for simplicity. + const std::vector<FileMetaData*>& level_files = + vstorage_->LevelFiles(start_level_); + + InternalKey my_smallest, my_largest; + for (auto it = level_files.rbegin(); it != level_files.rend(); ++it) { + CompactionInputFiles output_level_inputs; + output_level_inputs.level = output_level_; + FileMetaData* file = *it; + if (it == level_files.rbegin()) { + my_smallest = file->smallest; + my_largest = file->largest; + } else { + if (compaction_picker_->icmp()->Compare(file->largest, my_smallest) < + 0) { + my_smallest = file->smallest; + } else if (compaction_picker_->icmp()->Compare(file->smallest, + my_largest) > 0) { + my_largest = file->largest; + } else { + break; + } + } + vstorage_->GetOverlappingInputs(output_level_, &my_smallest, &my_largest, + &output_level_inputs.files); + if (output_level_inputs.empty()) { + assert(!file->being_compacted); + start_level_inputs_.files.push_back(file); + } else { + break; + } + } + } + + if (!start_level_inputs_.empty()) { + // Sort files by key range. Not sure it's 100% necessary but it's cleaner + // to always keep files sorted by key the key ranges don't overlap. + std::sort(start_level_inputs_.files.begin(), + start_level_inputs_.files.end(), + [icmp = compaction_picker_->icmp()](FileMetaData* f1, + FileMetaData* f2) -> bool { + return (icmp->Compare(f1->smallest, f2->smallest) < 0); + }); + + is_l0_trivial_move_ = true; + return true; + } + return false; +} + +bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index) { + if (start_level_inputs_.size() == 1 && + (ioptions_.db_paths.empty() || ioptions_.db_paths.size() == 1) && + (mutable_cf_options_.compression_per_level.empty())) { + // Only file of `index`, and it is likely a trivial move. Try to + // expand if it is still a trivial move, but not beyond + // max_compaction_bytes or 4 files, so that we don't create too + // much compaction pressure for the next level. + // Ignore if there are more than one DB path, as it would be hard + // to predict whether it is a trivial move. + const std::vector<FileMetaData*>& level_files = + vstorage_->LevelFiles(start_level_); + const size_t kMaxMultiTrivialMove = 4; + FileMetaData* initial_file = start_level_inputs_.files[0]; + size_t total_size = initial_file->fd.GetFileSize(); + CompactionInputFiles output_level_inputs; + output_level_inputs.level = output_level_; + for (int i = start_index + 1; + i < static_cast<int>(level_files.size()) && + start_level_inputs_.size() < kMaxMultiTrivialMove; + i++) { + FileMetaData* next_file = level_files[i]; + if (next_file->being_compacted) { + break; + } + vstorage_->GetOverlappingInputs(output_level_, &(initial_file->smallest), + &(next_file->largest), + &output_level_inputs.files); + if (!output_level_inputs.empty()) { + break; + } + if (i < static_cast<int>(level_files.size()) - 1 && + compaction_picker_->icmp() + ->user_comparator() + ->CompareWithoutTimestamp( + next_file->largest.user_key(), + level_files[i + 1]->smallest.user_key()) == 0) { + TEST_SYNC_POINT_CALLBACK( + "LevelCompactionBuilder::TryExtendNonL0TrivialMove:NoCleanCut", + nullptr); + // Not a clean up after adding the next file. Skip. + break; + } + total_size += next_file->fd.GetFileSize(); + if (total_size > mutable_cf_options_.max_compaction_bytes) { + break; + } + start_level_inputs_.files.push_back(next_file); + } + return start_level_inputs_.size() > 1; + } + return false; +} + +bool LevelCompactionBuilder::PickFileToCompact() { + // level 0 files are overlapping. So we cannot pick more + // than one concurrent compactions at this level. This + // could be made better by looking at key-ranges that are + // being compacted at level 0. + if (start_level_ == 0 && + !compaction_picker_->level0_compactions_in_progress()->empty()) { + TEST_SYNC_POINT("LevelCompactionPicker::PickCompactionBySize:0"); + return false; + } + + start_level_inputs_.clear(); + start_level_inputs_.level = start_level_; + + assert(start_level_ >= 0); + + if (TryPickL0TrivialMove()) { + return true; + } + + const std::vector<FileMetaData*>& level_files = + vstorage_->LevelFiles(start_level_); + + // Pick the file with the highest score in this level that is not already + // being compacted. + const std::vector<int>& file_scores = + vstorage_->FilesByCompactionPri(start_level_); + + unsigned int cmp_idx; + for (cmp_idx = vstorage_->NextCompactionIndex(start_level_); + cmp_idx < file_scores.size(); cmp_idx++) { + int index = file_scores[cmp_idx]; + auto* f = level_files[index]; + + // do not pick a file to compact if it is being compacted + // from n-1 level. + if (f->being_compacted) { + if (ioptions_.compaction_pri == kRoundRobin) { + // TODO(zichen): this file may be involved in one compaction from + // an upper level, cannot advance the cursor for round-robin policy. + // Currently, we do not pick any file to compact in this case. We + // should fix this later to ensure a compaction is picked but the + // cursor shall not be advanced. + return false; + } + continue; + } + + start_level_inputs_.files.push_back(f); + if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &start_level_inputs_) || + compaction_picker_->FilesRangeOverlapWithCompaction( + {start_level_inputs_}, output_level_, + Compaction::EvaluatePenultimateLevel( + vstorage_, ioptions_, start_level_, output_level_))) { + // A locked (pending compaction) input-level file was pulled in due to + // user-key overlap. + start_level_inputs_.clear(); + + if (ioptions_.compaction_pri == kRoundRobin) { + return false; + } + continue; + } + + // Now that input level is fully expanded, we check whether any output + // files are locked due to pending compaction. + // + // Note we rely on ExpandInputsToCleanCut() to tell us whether any output- + // level files are locked, not just the extra ones pulled in for user-key + // overlap. + InternalKey smallest, largest; + compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest); + CompactionInputFiles output_level_inputs; + output_level_inputs.level = output_level_; + vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest, + &output_level_inputs.files); + if (output_level_inputs.empty()) { + if (TryExtendNonL0TrivialMove(index)) { + break; + } + } else { + if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_, + &output_level_inputs)) { + start_level_inputs_.clear(); + if (ioptions_.compaction_pri == kRoundRobin) { + return false; + } + continue; + } + } + + base_index_ = index; + break; + } + + // store where to start the iteration in the next call to PickCompaction + if (ioptions_.compaction_pri != kRoundRobin) { + vstorage_->SetNextCompactionIndex(start_level_, cmp_idx); + } + return start_level_inputs_.size() > 0; +} + +bool LevelCompactionBuilder::PickIntraL0Compaction() { + start_level_inputs_.clear(); + const std::vector<FileMetaData*>& level_files = + vstorage_->LevelFiles(0 /* level */); + if (level_files.size() < + static_cast<size_t>( + mutable_cf_options_.level0_file_num_compaction_trigger + 2) || + level_files[0]->being_compacted) { + // If L0 isn't accumulating much files beyond the regular trigger, don't + // resort to L0->L0 compaction yet. + return false; + } + return FindIntraL0Compaction(level_files, kMinFilesForIntraL0Compaction, + std::numeric_limits<uint64_t>::max(), + mutable_cf_options_.max_compaction_bytes, + &start_level_inputs_, earliest_mem_seqno_); +} +} // namespace + +Compaction* LevelCompactionPicker::PickCompaction( + const std::string& cf_name, const MutableCFOptions& mutable_cf_options, + const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage, + LogBuffer* log_buffer, SequenceNumber earliest_mem_seqno) { + LevelCompactionBuilder builder(cf_name, vstorage, earliest_mem_seqno, this, + log_buffer, mutable_cf_options, ioptions_, + mutable_db_options); + return builder.PickCompaction(); +} +} // namespace ROCKSDB_NAMESPACE |