summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/db/version_edit.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/db/version_edit.h')
-rw-r--r--src/rocksdb/db/version_edit.h438
1 files changed, 438 insertions, 0 deletions
diff --git a/src/rocksdb/db/version_edit.h b/src/rocksdb/db/version_edit.h
new file mode 100644
index 000000000..6d1893f2a
--- /dev/null
+++ b/src/rocksdb/db/version_edit.h
@@ -0,0 +1,438 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+#include <algorithm>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+#include "db/dbformat.h"
+#include "memory/arena.h"
+#include "rocksdb/cache.h"
+#include "table/table_reader.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class VersionSet;
+
+constexpr uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF;
+constexpr uint64_t kInvalidBlobFileNumber = 0;
+constexpr uint64_t kUnknownOldestAncesterTime = 0;
+constexpr uint64_t kUnknownFileCreationTime = 0;
+
+extern const std::string kUnknownFileChecksum;
+extern const std::string kUnknownFileChecksumFuncName;
+
+extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id);
+
+// A copyable structure contains information needed to read data from an SST
+// file. It can contain a pointer to a table reader opened for the file, or
+// file number and size, which can be used to create a new table reader for it.
+// The behavior is undefined when a copied of the structure is used when the
+// file is not in any live version any more.
+struct FileDescriptor {
+ // Table reader in table_reader_handle
+ TableReader* table_reader;
+ uint64_t packed_number_and_path_id;
+ uint64_t file_size; // File size in bytes
+ SequenceNumber smallest_seqno; // The smallest seqno in this file
+ SequenceNumber largest_seqno; // The largest seqno in this file
+
+ FileDescriptor() : FileDescriptor(0, 0, 0) {}
+
+ FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size)
+ : FileDescriptor(number, path_id, _file_size, kMaxSequenceNumber, 0) {}
+
+ FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size,
+ SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno)
+ : table_reader(nullptr),
+ packed_number_and_path_id(PackFileNumberAndPathId(number, path_id)),
+ file_size(_file_size),
+ smallest_seqno(_smallest_seqno),
+ largest_seqno(_largest_seqno) {}
+
+ FileDescriptor(const FileDescriptor& fd) { *this = fd; }
+
+ FileDescriptor& operator=(const FileDescriptor& fd) {
+ table_reader = fd.table_reader;
+ packed_number_and_path_id = fd.packed_number_and_path_id;
+ file_size = fd.file_size;
+ smallest_seqno = fd.smallest_seqno;
+ largest_seqno = fd.largest_seqno;
+ return *this;
+ }
+
+ uint64_t GetNumber() const {
+ return packed_number_and_path_id & kFileNumberMask;
+ }
+ uint32_t GetPathId() const {
+ return static_cast<uint32_t>(
+ packed_number_and_path_id / (kFileNumberMask + 1));
+ }
+ uint64_t GetFileSize() const { return file_size; }
+};
+
+struct FileSampledStats {
+ FileSampledStats() : num_reads_sampled(0) {}
+ FileSampledStats(const FileSampledStats& other) { *this = other; }
+ FileSampledStats& operator=(const FileSampledStats& other) {
+ num_reads_sampled = other.num_reads_sampled.load();
+ return *this;
+ }
+
+ // number of user reads to this file.
+ mutable std::atomic<uint64_t> num_reads_sampled;
+};
+
+struct FileMetaData {
+ FileDescriptor fd;
+ InternalKey smallest; // Smallest internal key served by table
+ InternalKey largest; // Largest internal key served by table
+
+ // Needs to be disposed when refs becomes 0.
+ Cache::Handle* table_reader_handle = nullptr;
+
+ FileSampledStats stats;
+
+ // Stats for compensating deletion entries during compaction
+
+ // File size compensated by deletion entry.
+ // This is updated in Version::UpdateAccumulatedStats() first time when the
+ // file is created or loaded. After it is updated (!= 0), it is immutable.
+ uint64_t compensated_file_size = 0;
+ // These values can mutate, but they can only be read or written from
+ // single-threaded LogAndApply thread
+ uint64_t num_entries = 0; // the number of entries.
+ uint64_t num_deletions = 0; // the number of deletion entries.
+ uint64_t raw_key_size = 0; // total uncompressed key size.
+ uint64_t raw_value_size = 0; // total uncompressed value size.
+
+ int refs = 0; // Reference count
+
+ bool being_compacted = false; // Is this file undergoing compaction?
+ bool init_stats_from_file = false; // true if the data-entry stats of this
+ // file has initialized from file.
+
+ bool marked_for_compaction = false; // True if client asked us nicely to
+ // compact this file.
+
+ // Used only in BlobDB. The file number of the oldest blob file this SST file
+ // refers to. 0 is an invalid value; BlobDB numbers the files starting from 1.
+ uint64_t oldest_blob_file_number = kInvalidBlobFileNumber;
+
+ // The file could be the compaction output from other SST files, which could
+ // in turn be outputs for compact older SST files. We track the memtable
+ // flush timestamp for the oldest SST file that eventaully contribute data
+ // to this file. 0 means the information is not available.
+ uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
+
+ // Unix time when the SST file is created.
+ uint64_t file_creation_time = kUnknownFileCreationTime;
+
+ // File checksum
+ std::string file_checksum = kUnknownFileChecksum;
+
+ // File checksum function name
+ std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
+
+ FileMetaData() = default;
+
+ FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size,
+ const InternalKey& smallest_key, const InternalKey& largest_key,
+ const SequenceNumber& smallest_seq,
+ const SequenceNumber& largest_seq, bool marked_for_compact,
+ uint64_t oldest_blob_file, uint64_t _oldest_ancester_time,
+ uint64_t _file_creation_time, const std::string& _file_checksum,
+ const std::string& _file_checksum_func_name)
+ : fd(file, file_path_id, file_size, smallest_seq, largest_seq),
+ smallest(smallest_key),
+ largest(largest_key),
+ marked_for_compaction(marked_for_compact),
+ oldest_blob_file_number(oldest_blob_file),
+ oldest_ancester_time(_oldest_ancester_time),
+ file_creation_time(_file_creation_time),
+ file_checksum(_file_checksum),
+ file_checksum_func_name(_file_checksum_func_name) {
+ TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this);
+ }
+
+ // REQUIRED: Keys must be given to the function in sorted order (it expects
+ // the last key to be the largest).
+ void UpdateBoundaries(const Slice& key, const Slice& value,
+ SequenceNumber seqno, ValueType value_type);
+
+ // Unlike UpdateBoundaries, ranges do not need to be presented in any
+ // particular order.
+ void UpdateBoundariesForRange(const InternalKey& start,
+ const InternalKey& end, SequenceNumber seqno,
+ const InternalKeyComparator& icmp) {
+ if (smallest.size() == 0 || icmp.Compare(start, smallest) < 0) {
+ smallest = start;
+ }
+ if (largest.size() == 0 || icmp.Compare(largest, end) < 0) {
+ largest = end;
+ }
+ fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);
+ fd.largest_seqno = std::max(fd.largest_seqno, seqno);
+ }
+
+ // Try to get oldest ancester time from the class itself or table properties
+ // if table reader is already pinned.
+ // 0 means the information is not available.
+ uint64_t TryGetOldestAncesterTime() {
+ if (oldest_ancester_time != kUnknownOldestAncesterTime) {
+ return oldest_ancester_time;
+ } else if (fd.table_reader != nullptr &&
+ fd.table_reader->GetTableProperties() != nullptr) {
+ return fd.table_reader->GetTableProperties()->creation_time;
+ }
+ return kUnknownOldestAncesterTime;
+ }
+
+ uint64_t TryGetFileCreationTime() {
+ if (file_creation_time != kUnknownFileCreationTime) {
+ return file_creation_time;
+ } else if (fd.table_reader != nullptr &&
+ fd.table_reader->GetTableProperties() != nullptr) {
+ return fd.table_reader->GetTableProperties()->file_creation_time;
+ }
+ return kUnknownFileCreationTime;
+ }
+};
+
+// A compressed copy of file meta data that just contain minimum data needed
+// to server read operations, while still keeping the pointer to full metadata
+// of the file in case it is needed.
+struct FdWithKeyRange {
+ FileDescriptor fd;
+ FileMetaData* file_metadata; // Point to all metadata
+ Slice smallest_key; // slice that contain smallest key
+ Slice largest_key; // slice that contain largest key
+
+ FdWithKeyRange()
+ : fd(),
+ file_metadata(nullptr),
+ smallest_key(),
+ largest_key() {
+ }
+
+ FdWithKeyRange(FileDescriptor _fd, Slice _smallest_key, Slice _largest_key,
+ FileMetaData* _file_metadata)
+ : fd(_fd),
+ file_metadata(_file_metadata),
+ smallest_key(_smallest_key),
+ largest_key(_largest_key) {}
+};
+
+// Data structure to store an array of FdWithKeyRange in one level
+// Actual data is guaranteed to be stored closely
+struct LevelFilesBrief {
+ size_t num_files;
+ FdWithKeyRange* files;
+ LevelFilesBrief() {
+ num_files = 0;
+ files = nullptr;
+ }
+};
+
+// The state of a DB at any given time is referred to as a Version.
+// Any modification to the Version is considered a Version Edit. A Version is
+// constructed by joining a sequence of Version Edits. Version Edits are written
+// to the MANIFEST file.
+class VersionEdit {
+ public:
+ void Clear();
+
+ void SetDBId(const std::string& db_id) {
+ has_db_id_ = true;
+ db_id_ = db_id;
+ }
+ bool HasDbId() const { return has_db_id_; }
+ const std::string& GetDbId() const { return db_id_; }
+
+ void SetComparatorName(const Slice& name) {
+ has_comparator_ = true;
+ comparator_ = name.ToString();
+ }
+ bool HasComparatorName() const { return has_comparator_; }
+ const std::string& GetComparatorName() const { return comparator_; }
+
+ void SetLogNumber(uint64_t num) {
+ has_log_number_ = true;
+ log_number_ = num;
+ }
+ bool HasLogNumber() const { return has_log_number_; }
+ uint64_t GetLogNumber() const { return log_number_; }
+
+ void SetPrevLogNumber(uint64_t num) {
+ has_prev_log_number_ = true;
+ prev_log_number_ = num;
+ }
+ bool HasPrevLogNumber() const { return has_prev_log_number_; }
+ uint64_t GetPrevLogNumber() const { return prev_log_number_; }
+
+ void SetNextFile(uint64_t num) {
+ has_next_file_number_ = true;
+ next_file_number_ = num;
+ }
+ bool HasNextFile() const { return has_next_file_number_; }
+ uint64_t GetNextFile() const { return next_file_number_; }
+
+ void SetMaxColumnFamily(uint32_t max_column_family) {
+ has_max_column_family_ = true;
+ max_column_family_ = max_column_family;
+ }
+ bool HasMaxColumnFamily() const { return has_max_column_family_; }
+ uint32_t GetMaxColumnFamily() const { return max_column_family_; }
+
+ void SetMinLogNumberToKeep(uint64_t num) {
+ has_min_log_number_to_keep_ = true;
+ min_log_number_to_keep_ = num;
+ }
+ bool HasMinLogNumberToKeep() const { return has_min_log_number_to_keep_; }
+ uint64_t GetMinLogNumberToKeep() const { return min_log_number_to_keep_; }
+
+ void SetLastSequence(SequenceNumber seq) {
+ has_last_sequence_ = true;
+ last_sequence_ = seq;
+ }
+ bool HasLastSequence() const { return has_last_sequence_; }
+ SequenceNumber GetLastSequence() const { return last_sequence_; }
+
+ // Delete the specified "file" from the specified "level".
+ void DeleteFile(int level, uint64_t file) {
+ deleted_files_.emplace(level, file);
+ }
+
+ // Retrieve the files deleted as well as their associated levels.
+ using DeletedFiles = std::set<std::pair<int, uint64_t>>;
+ const DeletedFiles& GetDeletedFiles() const { return deleted_files_; }
+
+ // Add the specified file at the specified level.
+ // REQUIRES: This version has not been saved (see VersionSet::SaveTo)
+ // REQUIRES: "smallest" and "largest" are smallest and largest keys in file
+ // REQUIRES: "oldest_blob_file_number" is the number of the oldest blob file
+ // referred to by this file if any, kInvalidBlobFileNumber otherwise.
+ void AddFile(int level, uint64_t file, uint32_t file_path_id,
+ uint64_t file_size, const InternalKey& smallest,
+ const InternalKey& largest, const SequenceNumber& smallest_seqno,
+ const SequenceNumber& largest_seqno, bool marked_for_compaction,
+ uint64_t oldest_blob_file_number, uint64_t oldest_ancester_time,
+ uint64_t file_creation_time, const std::string& file_checksum,
+ const std::string& file_checksum_func_name) {
+ assert(smallest_seqno <= largest_seqno);
+ new_files_.emplace_back(
+ level, FileMetaData(file, file_path_id, file_size, smallest, largest,
+ smallest_seqno, largest_seqno,
+ marked_for_compaction, oldest_blob_file_number,
+ oldest_ancester_time, file_creation_time,
+ file_checksum, file_checksum_func_name));
+ }
+
+ void AddFile(int level, const FileMetaData& f) {
+ assert(f.fd.smallest_seqno <= f.fd.largest_seqno);
+ new_files_.emplace_back(level, f);
+ }
+
+ // Retrieve the files added as well as their associated levels.
+ using NewFiles = std::vector<std::pair<int, FileMetaData>>;
+ const NewFiles& GetNewFiles() const { return new_files_; }
+
+ // Number of edits
+ size_t NumEntries() const { return new_files_.size() + deleted_files_.size(); }
+
+ void SetColumnFamily(uint32_t column_family_id) {
+ column_family_ = column_family_id;
+ }
+ uint32_t GetColumnFamily() const { return column_family_; }
+
+ // set column family ID by calling SetColumnFamily()
+ void AddColumnFamily(const std::string& name) {
+ assert(!is_column_family_drop_);
+ assert(!is_column_family_add_);
+ assert(NumEntries() == 0);
+ is_column_family_add_ = true;
+ column_family_name_ = name;
+ }
+
+ // set column family ID by calling SetColumnFamily()
+ void DropColumnFamily() {
+ assert(!is_column_family_drop_);
+ assert(!is_column_family_add_);
+ assert(NumEntries() == 0);
+ is_column_family_drop_ = true;
+ }
+
+ bool IsColumnFamilyManipulation() const {
+ return is_column_family_add_ || is_column_family_drop_;
+ }
+
+ void MarkAtomicGroup(uint32_t remaining_entries) {
+ is_in_atomic_group_ = true;
+ remaining_entries_ = remaining_entries;
+ }
+ bool IsInAtomicGroup() const { return is_in_atomic_group_; }
+ uint32_t GetRemainingEntries() const { return remaining_entries_; }
+
+ // return true on success.
+ bool EncodeTo(std::string* dst) const;
+ Status DecodeFrom(const Slice& src);
+
+ std::string DebugString(bool hex_key = false) const;
+ std::string DebugJSON(int edit_num, bool hex_key = false) const;
+
+ private:
+ friend class ReactiveVersionSet;
+ friend class VersionSet;
+ friend class Version;
+ friend class AtomicGroupReadBuffer;
+
+ bool GetLevel(Slice* input, int* level, const char** msg);
+
+ const char* DecodeNewFile4From(Slice* input);
+
+ int max_level_ = 0;
+ std::string db_id_;
+ std::string comparator_;
+ uint64_t log_number_ = 0;
+ uint64_t prev_log_number_ = 0;
+ uint64_t next_file_number_ = 0;
+ uint32_t max_column_family_ = 0;
+ // The most recent WAL log number that is deleted
+ uint64_t min_log_number_to_keep_ = 0;
+ SequenceNumber last_sequence_ = 0;
+ bool has_db_id_ = false;
+ bool has_comparator_ = false;
+ bool has_log_number_ = false;
+ bool has_prev_log_number_ = false;
+ bool has_next_file_number_ = false;
+ bool has_max_column_family_ = false;
+ bool has_min_log_number_to_keep_ = false;
+ bool has_last_sequence_ = false;
+
+ DeletedFiles deleted_files_;
+ NewFiles new_files_;
+
+ // Each version edit record should have column_family_ set
+ // If it's not set, it is default (0)
+ uint32_t column_family_ = 0;
+ // a version edit can be either column_family add or
+ // column_family drop. If it's column family add,
+ // it also includes column family name.
+ bool is_column_family_drop_ = false;
+ bool is_column_family_add_ = false;
+ std::string column_family_name_;
+
+ bool is_in_atomic_group_ = false;
+ uint32_t remaining_entries_ = 0;
+};
+
+} // namespace ROCKSDB_NAMESPACE