summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/db/seqno_to_time_mapping.h
blob: 4ffc9c1992656e35ac5f81ed46b90763e8f5b304 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
//  Copyright (c) Meta Platforms, Inc. and affiliates.
//
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).

#pragma once

#include <algorithm>
#include <cinttypes>
#include <deque>
#include <functional>
#include <iterator>
#include <string>

#include "rocksdb/status.h"
#include "rocksdb/types.h"

namespace ROCKSDB_NAMESPACE {

constexpr uint64_t kUnknownSeqnoTime = 0;

// SeqnoToTimeMapping stores the sequence number to time mapping, so given a
// sequence number it can estimate the oldest possible time for that sequence
// number. For example:
//   10 -> 100
//   50 -> 300
// then if a key has seqno 19, the OldestApproximateTime would be 100, for 51 it
// would be 300.
// As it's a sorted list, the new entry is inserted from the back. The old data
// will be popped from the front if they're no longer used.
//
// Note: the data struct is not thread safe, both read and write need to be
//  synchronized by caller.
class SeqnoToTimeMapping {
 public:
  // Maximum number of entries can be encoded into SST. The data is delta encode
  // so the maximum data usage for each SST is < 0.3K
  static constexpr uint64_t kMaxSeqnoTimePairsPerSST = 100;

  // Maximum number of entries per CF. If there's only CF with this feature on,
  // the max duration divided by this number, so for example, if
  // preclude_last_level_data_seconds = 100000 (~1day), then it will sample the
  // seqno -> time every 1000 seconds (~17minutes). Then the maximum entry it
  // needs is 100.
  // When there are multiple CFs having this feature on, the sampling cadence is
  // determined by the smallest setting, the capacity is determined the largest
  // setting, also it's caped by kMaxSeqnoTimePairsPerCF * 10.
  static constexpr uint64_t kMaxSeqnoTimePairsPerCF = 100;

  // A simple struct for sequence number to time pair
  struct SeqnoTimePair {
    SequenceNumber seqno = 0;
    uint64_t time = 0;

    SeqnoTimePair() = default;
    SeqnoTimePair(SequenceNumber _seqno, uint64_t _time)
        : seqno(_seqno), time(_time) {}

    // Encode to dest string
    void Encode(std::string& dest) const;

    // Decode the value from input Slice and remove it from the input
    Status Decode(Slice& input);

    // subtraction of 2 SeqnoTimePair
    SeqnoTimePair operator-(const SeqnoTimePair& other) const;

    // Add 2 values together
    void Add(const SeqnoTimePair& obj) {
      seqno += obj.seqno;
      time += obj.time;
    }

    // Compare SeqnoTimePair with a sequence number, used for binary search a
    // sequence number in a list of SeqnoTimePair
    bool operator<(const SequenceNumber& other) const { return seqno < other; }

    // Compare 2 SeqnoTimePair
    bool operator<(const SeqnoTimePair& other) const {
      return std::tie(seqno, time) < std::tie(other.seqno, other.time);
    }

    // Check if 2 SeqnoTimePair is the same
    bool operator==(const SeqnoTimePair& other) const {
      return std::tie(seqno, time) == std::tie(other.seqno, other.time);
    }
  };

  // constractor of SeqnoToTimeMapping
  // max_time_duration is the maximum time it should track. For example, if
  // preclude_last_level_data_seconds is 1 day, then if an entry is older than 1
  // day, then it can be removed.
  // max_capacity is the maximum number of entry it can hold. For single CF,
  // it's caped at 100 (kMaxSeqnoTimePairsPerCF), otherwise
  // kMaxSeqnoTimePairsPerCF * 10.
  // If it's set to 0, means it won't truncate any old data.
  explicit SeqnoToTimeMapping(uint64_t max_time_duration = 0,
                              uint64_t max_capacity = 0)
      : max_time_duration_(max_time_duration), max_capacity_(max_capacity) {}

  // Append a new entry to the list. The new entry should be newer than the
  // existing ones. It maintains the internal sorted status.
  bool Append(SequenceNumber seqno, uint64_t time);

  // Given a sequence number, estimate it's oldest time
  uint64_t GetOldestApproximateTime(SequenceNumber seqno) const;

  // Truncate the old entries based on the current time and max_time_duration_
  void TruncateOldEntries(uint64_t now);

  // Given a time, return it's oldest possible sequence number
  SequenceNumber GetOldestSequenceNum(uint64_t time);

  // Encode to a binary string
  void Encode(std::string& des, SequenceNumber start, SequenceNumber end,
              uint64_t now,
              uint64_t output_size = kMaxSeqnoTimePairsPerSST) const;

  // Add a new random entry, unlike Append(), it can be any data, but also makes
  // the list un-sorted.
  void Add(SequenceNumber seqno, uint64_t time);

  // Decode and add the entries to the current obj. The list will be unsorted
  Status Add(const std::string& seqno_time_mapping_str);

  // Return the number of entries
  size_t Size() const { return seqno_time_mapping_.size(); }

  // Reduce the size of internal list
  bool Resize(uint64_t min_time_duration, uint64_t max_time_duration);

  // Override the max_time_duration_
  void SetMaxTimeDuration(uint64_t max_time_duration) {
    max_time_duration_ = max_time_duration;
  }

  uint64_t GetCapacity() const { return max_capacity_; }

  // Sort the list, which also remove the redundant entries, useless entries,
  // which makes sure the seqno is sorted, but also the time
  Status Sort();

  // copy the current obj from the given smallest_seqno.
  SeqnoToTimeMapping Copy(SequenceNumber smallest_seqno) const;

  // If the internal list is empty
  bool Empty() const { return seqno_time_mapping_.empty(); }

  // clear all entries
  void Clear() { seqno_time_mapping_.clear(); }

  // return the string for user message
  // Note: Not efficient, okay for print
  std::string ToHumanString() const;

#ifndef NDEBUG
  const std::deque<SeqnoTimePair>& TEST_GetInternalMapping() const {
    return seqno_time_mapping_;
  }
#endif

 private:
  static constexpr uint64_t kMaxSeqnoToTimeEntries =
      kMaxSeqnoTimePairsPerCF * 10;

  uint64_t max_time_duration_;
  uint64_t max_capacity_;

  std::deque<SeqnoTimePair> seqno_time_mapping_;

  bool is_sorted_ = true;

  static uint64_t CalculateMaxCapacity(uint64_t min_time_duration,
                                       uint64_t max_time_duration);

  SeqnoTimePair& Last() {
    assert(!Empty());
    return seqno_time_mapping_.back();
  }
};

// for searching the sequence number from SeqnoToTimeMapping
inline bool operator<(const SequenceNumber& seqno,
                      const SeqnoToTimeMapping::SeqnoTimePair& other) {
  return seqno < other.seqno;
}

}  // namespace ROCKSDB_NAMESPACE