summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/db/compaction/file_pri.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/rocksdb/db/compaction/file_pri.h
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rocksdb/db/compaction/file_pri.h')
-rw-r--r--src/rocksdb/db/compaction/file_pri.h92
1 files changed, 92 insertions, 0 deletions
diff --git a/src/rocksdb/db/compaction/file_pri.h b/src/rocksdb/db/compaction/file_pri.h
new file mode 100644
index 000000000..82dddcf93
--- /dev/null
+++ b/src/rocksdb/db/compaction/file_pri.h
@@ -0,0 +1,92 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+
+#pragma once
+#include <algorithm>
+
+#include "db/version_edit.h"
+
+namespace ROCKSDB_NAMESPACE {
+// We boost files that are closer to TTL limit. This boosting could be
+// through FileMetaData.compensated_file_size but this compensated size
+// is widely used as something similar to file size so dramatically boost
+// the value might cause unintended consequences.
+//
+// This boosting algorithm can go very fancy, but here we use a simple
+// formula which can satisify:
+// (1) Different levels are triggered slightly differently to avoid
+// too many cascading cases
+// (2) Files in the same level get boosting more when TTL gets closer.
+//
+// Don't do any boosting before TTL has past by half. This is to make
+// sure lower write amp for most of the case. And all levels should be
+// fully boosted when total TTL compaction threshold triggers.
+// Differientiate boosting ranges of each level by 1/2. This will make
+// range for each level exponentially increasing. We could do it by
+// having them to be equal, or go even fancier. We can adjust it after
+// we observe the behavior in production.
+// The threshold starting boosting:
+// +------------------------------------------------------------------ +
+// ^ ^ ^ ^ ^ ^
+// Age 0 ... | | second last level thresold
+// | |
+// | third last level
+// |
+// forth last level
+//
+// We arbitrarily set with 0 when a file is aged boost_age_start and
+// grow linearly. The ratio is arbitrarily set so that when the next level
+// starts to boost, the previous level's boosting amount is 16.
+class FileTtlBooster {
+ public:
+ FileTtlBooster(uint64_t current_time, uint64_t ttl, int num_non_empty_levels,
+ int level)
+ : current_time_(current_time) {
+ if (ttl == 0 || level == 0 || level >= num_non_empty_levels - 1) {
+ enabled_ = false;
+ boost_age_start_ = 0;
+ boost_step_ = 1;
+ } else {
+ enabled_ = true;
+ uint64_t all_boost_start_age = ttl / 2;
+ uint64_t all_boost_age_range = (ttl / 32) * 31 - all_boost_start_age;
+ uint64_t boost_age_range =
+ all_boost_age_range >> (num_non_empty_levels - level - 1);
+ boost_age_start_ = all_boost_start_age + boost_age_range;
+ const uint64_t kBoostRatio = 16;
+ // prevent 0 value to avoid divide 0 error.
+ boost_step_ = std::max(boost_age_range / kBoostRatio, uint64_t{1});
+ }
+ }
+
+ uint64_t GetBoostScore(FileMetaData* f) {
+ if (!enabled_) {
+ return 1;
+ }
+ uint64_t oldest_ancester_time = f->TryGetOldestAncesterTime();
+ if (oldest_ancester_time >= current_time_) {
+ return 1;
+ }
+ uint64_t age = current_time_ - oldest_ancester_time;
+ if (age > boost_age_start_) {
+ // Use integer just for convenience.
+ // We could make all file_to_order double if we want.
+ // Technically this can overflow if users override timing and
+ // give a very high current time. Ignore the case for simplicity.
+ // Boosting is addition to current value, so +1. This will effectively
+ // make boosting to kick in after the first boost_step_ is reached.
+ return (age - boost_age_start_) / boost_step_ + 1;
+ }
+ return 1;
+ }
+
+ private:
+ bool enabled_;
+ uint64_t current_time_;
+ uint64_t boost_age_start_;
+ uint64_t boost_step_;
+};
+} // namespace ROCKSDB_NAMESPACE