diff options
Diffstat (limited to 'src/rocksdb/db/version_edit.cc')
-rw-r--r-- | src/rocksdb/db/version_edit.cc | 708 |
1 files changed, 708 insertions, 0 deletions
diff --git a/src/rocksdb/db/version_edit.cc b/src/rocksdb/db/version_edit.cc new file mode 100644 index 00000000..bf5f178f --- /dev/null +++ b/src/rocksdb/db/version_edit.cc @@ -0,0 +1,708 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "db/version_edit.h" + +#include "db/version_set.h" +#include "rocksdb/slice.h" +#include "util/coding.h" +#include "util/event_logger.h" +#include "util/string_util.h" +#include "util/sync_point.h" + +namespace rocksdb { + +// Tag numbers for serialized VersionEdit. These numbers are written to +// disk and should not be changed. +enum Tag : uint32_t { + kComparator = 1, + kLogNumber = 2, + kNextFileNumber = 3, + kLastSequence = 4, + kCompactPointer = 5, + kDeletedFile = 6, + kNewFile = 7, + // 8 was used for large value refs + kPrevLogNumber = 9, + kMinLogNumberToKeep = 10, + + // these are new formats divergent from open source leveldb + kNewFile2 = 100, + kNewFile3 = 102, + kNewFile4 = 103, // 4th (the latest) format version of adding files + kColumnFamily = 200, // specify column family for version edit + kColumnFamilyAdd = 201, + kColumnFamilyDrop = 202, + kMaxColumnFamily = 203, + + kInAtomicGroup = 300, +}; + +// Mask for an identified tag from the future which can be safely ignored. +uint32_t kTagSafeIgnoreMask = 1 << 13; + +enum CustomTag : uint32_t { + kTerminate = 1, // The end of customized fields + kNeedCompaction = 2, + // Since Manifest is not entirely currently forward-compatible, and the only + // forward-compatible part is the CutsomtTag of kNewFile, we currently encode + // kMinLogNumberToKeep as part of a CustomTag as a hack. This should be + // removed when manifest becomes forward-comptabile. + kMinLogNumberToKeepHack = 3, + kPathId = 65, +}; +// If this bit for the custom tag is set, opening DB should fail if +// we don't know this field. +uint32_t kCustomTagNonSafeIgnoreMask = 1 << 6; + +uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id) { + assert(number <= kFileNumberMask); + return number | (path_id * (kFileNumberMask + 1)); +} + +void VersionEdit::Clear() { + comparator_.clear(); + max_level_ = 0; + log_number_ = 0; + prev_log_number_ = 0; + last_sequence_ = 0; + next_file_number_ = 0; + max_column_family_ = 0; + min_log_number_to_keep_ = 0; + has_comparator_ = false; + has_log_number_ = false; + has_prev_log_number_ = false; + has_next_file_number_ = false; + has_last_sequence_ = false; + has_max_column_family_ = false; + has_min_log_number_to_keep_ = false; + deleted_files_.clear(); + new_files_.clear(); + column_family_ = 0; + is_column_family_add_ = 0; + is_column_family_drop_ = 0; + column_family_name_.clear(); + is_in_atomic_group_ = false; + remaining_entries_ = 0; +} + +bool VersionEdit::EncodeTo(std::string* dst) const { + if (has_comparator_) { + PutVarint32(dst, kComparator); + PutLengthPrefixedSlice(dst, comparator_); + } + if (has_log_number_) { + PutVarint32Varint64(dst, kLogNumber, log_number_); + } + if (has_prev_log_number_) { + PutVarint32Varint64(dst, kPrevLogNumber, prev_log_number_); + } + if (has_next_file_number_) { + PutVarint32Varint64(dst, kNextFileNumber, next_file_number_); + } + if (has_last_sequence_) { + PutVarint32Varint64(dst, kLastSequence, last_sequence_); + } + if (has_max_column_family_) { + PutVarint32Varint32(dst, kMaxColumnFamily, max_column_family_); + } + for (const auto& deleted : deleted_files_) { + PutVarint32Varint32Varint64(dst, kDeletedFile, deleted.first /* level */, + deleted.second /* file number */); + } + + bool min_log_num_written = false; + for (size_t i = 0; i < new_files_.size(); i++) { + const FileMetaData& f = new_files_[i].second; + if (!f.smallest.Valid() || !f.largest.Valid()) { + return false; + } + bool has_customized_fields = false; + if (f.marked_for_compaction || has_min_log_number_to_keep_) { + PutVarint32(dst, kNewFile4); + has_customized_fields = true; + } else if (f.fd.GetPathId() == 0) { + // Use older format to make sure user can roll back the build if they + // don't config multiple DB paths. + PutVarint32(dst, kNewFile2); + } else { + PutVarint32(dst, kNewFile3); + } + PutVarint32Varint64(dst, new_files_[i].first /* level */, f.fd.GetNumber()); + if (f.fd.GetPathId() != 0 && !has_customized_fields) { + // kNewFile3 + PutVarint32(dst, f.fd.GetPathId()); + } + PutVarint64(dst, f.fd.GetFileSize()); + PutLengthPrefixedSlice(dst, f.smallest.Encode()); + PutLengthPrefixedSlice(dst, f.largest.Encode()); + PutVarint64Varint64(dst, f.fd.smallest_seqno, f.fd.largest_seqno); + if (has_customized_fields) { + // Customized fields' format: + // +-----------------------------+ + // | 1st field's tag (varint32) | + // +-----------------------------+ + // | 1st field's size (varint32) | + // +-----------------------------+ + // | bytes for 1st field | + // | (based on size decoded) | + // +-----------------------------+ + // | | + // | ...... | + // | | + // +-----------------------------+ + // | last field's size (varint32)| + // +-----------------------------+ + // | bytes for last field | + // | (based on size decoded) | + // +-----------------------------+ + // | terminating tag (varint32) | + // +-----------------------------+ + // + // Customized encoding for fields: + // tag kPathId: 1 byte as path_id + // tag kNeedCompaction: + // now only can take one char value 1 indicating need-compaction + // + if (f.fd.GetPathId() != 0) { + PutVarint32(dst, CustomTag::kPathId); + char p = static_cast<char>(f.fd.GetPathId()); + PutLengthPrefixedSlice(dst, Slice(&p, 1)); + } + if (f.marked_for_compaction) { + PutVarint32(dst, CustomTag::kNeedCompaction); + char p = static_cast<char>(1); + PutLengthPrefixedSlice(dst, Slice(&p, 1)); + } + if (has_min_log_number_to_keep_ && !min_log_num_written) { + PutVarint32(dst, CustomTag::kMinLogNumberToKeepHack); + std::string varint_log_number; + PutFixed64(&varint_log_number, min_log_number_to_keep_); + PutLengthPrefixedSlice(dst, Slice(varint_log_number)); + min_log_num_written = true; + } + TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields", + dst); + + PutVarint32(dst, CustomTag::kTerminate); + } + } + + // 0 is default and does not need to be explicitly written + if (column_family_ != 0) { + PutVarint32Varint32(dst, kColumnFamily, column_family_); + } + + if (is_column_family_add_) { + PutVarint32(dst, kColumnFamilyAdd); + PutLengthPrefixedSlice(dst, Slice(column_family_name_)); + } + + if (is_column_family_drop_) { + PutVarint32(dst, kColumnFamilyDrop); + } + + if (is_in_atomic_group_) { + PutVarint32(dst, kInAtomicGroup); + PutVarint32(dst, remaining_entries_); + } + return true; +} + +static bool GetInternalKey(Slice* input, InternalKey* dst) { + Slice str; + if (GetLengthPrefixedSlice(input, &str)) { + dst->DecodeFrom(str); + return dst->Valid(); + } else { + return false; + } +} + +bool VersionEdit::GetLevel(Slice* input, int* level, const char** /*msg*/) { + uint32_t v; + if (GetVarint32(input, &v)) { + *level = v; + if (max_level_ < *level) { + max_level_ = *level; + } + return true; + } else { + return false; + } +} + +static bool is_pseudo_new_file_record_pr3488( + const int level, + const uint64_t number, + const uint64_t file_size, + InternalKey& smallest, + InternalKey& largest, + const bool has_min_log_number_to_keep_) { + + if (level == 0 && number == 0 && file_size == 0 && + has_min_log_number_to_keep_) { + InternalKey dummy_key(Slice("dummy_key"), 0ull, ValueType::kTypeValue); + return (*smallest.rep() == *dummy_key.rep() && + *largest.rep() == *dummy_key.rep()); + } else { + return false; + } +} + +const char* VersionEdit::DecodeNewFile4From(Slice* input) { + const char* msg = nullptr; + int level; + FileMetaData f; + uint64_t number; + uint32_t path_id = 0; + uint64_t file_size; + SequenceNumber smallest_seqno; + SequenceNumber largest_seqno; + // Since this is the only forward-compatible part of the code, we hack new + // extension into this record. When we do, we set this boolean to distinguish + // the record from the normal NewFile records. + if (GetLevel(input, &level, &msg) && GetVarint64(input, &number) && + GetVarint64(input, &file_size) && GetInternalKey(input, &f.smallest) && + GetInternalKey(input, &f.largest) && + GetVarint64(input, &smallest_seqno) && + GetVarint64(input, &largest_seqno)) { + // See comments in VersionEdit::EncodeTo() for format of customized fields + while (true) { + uint32_t custom_tag; + Slice field; + if (!GetVarint32(input, &custom_tag)) { + return "new-file4 custom field"; + } + if (custom_tag == kTerminate) { + break; + } + if (!GetLengthPrefixedSlice(input, &field)) { + return "new-file4 custom field length prefixed slice error"; + } + switch (custom_tag) { + case kPathId: + if (field.size() != 1) { + return "path_id field wrong size"; + } + path_id = field[0]; + if (path_id > 3) { + return "path_id wrong vaue"; + } + break; + case kNeedCompaction: + if (field.size() != 1) { + return "need_compaction field wrong size"; + } + f.marked_for_compaction = (field[0] == 1); + break; + case kMinLogNumberToKeepHack: + // This is a hack to encode kMinLogNumberToKeep in a + // forward-compatible fashion. + if (!GetFixed64(&field, &min_log_number_to_keep_)) { + return "deleted log number malformatted"; + } + has_min_log_number_to_keep_ = true; + break; + default: + if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) { + // Should not proceed if cannot understand it + return "new-file4 custom field not supported"; + } + break; + } + } + } else { + return "new-file4 entry"; + } + if (is_pseudo_new_file_record_pr3488(level, number, file_size, + f.smallest, f.largest, + has_min_log_number_to_keep_)) { + // Since this has nothing to do with NewFile, return immediately. + return nullptr; + } + f.fd = + FileDescriptor(number, path_id, file_size, smallest_seqno, largest_seqno); + new_files_.push_back(std::make_pair(level, f)); + return nullptr; +} + +Status VersionEdit::DecodeFrom(const Slice& src) { + Clear(); + Slice input = src; + const char* msg = nullptr; + uint32_t tag; + + // Temporary storage for parsing + int level; + FileMetaData f; + Slice str; + InternalKey key; + + while (msg == nullptr && GetVarint32(&input, &tag)) { + switch (tag) { + case kComparator: + if (GetLengthPrefixedSlice(&input, &str)) { + comparator_ = str.ToString(); + has_comparator_ = true; + } else { + msg = "comparator name"; + } + break; + + case kLogNumber: + if (GetVarint64(&input, &log_number_)) { + has_log_number_ = true; + } else { + msg = "log number"; + } + break; + + case kPrevLogNumber: + if (GetVarint64(&input, &prev_log_number_)) { + has_prev_log_number_ = true; + } else { + msg = "previous log number"; + } + break; + + case kNextFileNumber: + if (GetVarint64(&input, &next_file_number_)) { + has_next_file_number_ = true; + } else { + msg = "next file number"; + } + break; + + case kLastSequence: + if (GetVarint64(&input, &last_sequence_)) { + has_last_sequence_ = true; + } else { + msg = "last sequence number"; + } + break; + + case kMaxColumnFamily: + if (GetVarint32(&input, &max_column_family_)) { + has_max_column_family_ = true; + } else { + msg = "max column family"; + } + break; + + case kMinLogNumberToKeep: + if (GetVarint64(&input, &min_log_number_to_keep_)) { + has_min_log_number_to_keep_ = true; + } else { + msg = "min log number to kee"; + } + break; + + case kCompactPointer: + if (GetLevel(&input, &level, &msg) && + GetInternalKey(&input, &key)) { + // we don't use compact pointers anymore, + // but we should not fail if they are still + // in manifest + } else { + if (!msg) { + msg = "compaction pointer"; + } + } + break; + + case kDeletedFile: { + uint64_t number; + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number)) { + deleted_files_.insert(std::make_pair(level, number)); + } else { + if (!msg) { + msg = "deleted file"; + } + } + break; + } + + case kNewFile: { + uint64_t number; + uint64_t file_size; + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && + GetVarint64(&input, &file_size) && + GetInternalKey(&input, &f.smallest) && + GetInternalKey(&input, &f.largest)) { + f.fd = FileDescriptor(number, 0, file_size); + new_files_.push_back(std::make_pair(level, f)); + } else { + if (!msg) { + msg = "new-file entry"; + } + } + break; + } + case kNewFile2: { + uint64_t number; + uint64_t file_size; + SequenceNumber smallest_seqno; + SequenceNumber largest_seqno; + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && + GetVarint64(&input, &file_size) && + GetInternalKey(&input, &f.smallest) && + GetInternalKey(&input, &f.largest) && + GetVarint64(&input, &smallest_seqno) && + GetVarint64(&input, &largest_seqno)) { + f.fd = FileDescriptor(number, 0, file_size, smallest_seqno, + largest_seqno); + new_files_.push_back(std::make_pair(level, f)); + } else { + if (!msg) { + msg = "new-file2 entry"; + } + } + break; + } + + case kNewFile3: { + uint64_t number; + uint32_t path_id; + uint64_t file_size; + SequenceNumber smallest_seqno; + SequenceNumber largest_seqno; + if (GetLevel(&input, &level, &msg) && GetVarint64(&input, &number) && + GetVarint32(&input, &path_id) && GetVarint64(&input, &file_size) && + GetInternalKey(&input, &f.smallest) && + GetInternalKey(&input, &f.largest) && + GetVarint64(&input, &smallest_seqno) && + GetVarint64(&input, &largest_seqno)) { + f.fd = FileDescriptor(number, path_id, file_size, smallest_seqno, + largest_seqno); + new_files_.push_back(std::make_pair(level, f)); + } else { + if (!msg) { + msg = "new-file3 entry"; + } + } + break; + } + + case kNewFile4: { + msg = DecodeNewFile4From(&input); + break; + } + + case kColumnFamily: + if (!GetVarint32(&input, &column_family_)) { + if (!msg) { + msg = "set column family id"; + } + } + break; + + case kColumnFamilyAdd: + if (GetLengthPrefixedSlice(&input, &str)) { + is_column_family_add_ = true; + column_family_name_ = str.ToString(); + } else { + if (!msg) { + msg = "column family add"; + } + } + break; + + case kColumnFamilyDrop: + is_column_family_drop_ = true; + break; + + case kInAtomicGroup: + is_in_atomic_group_ = true; + if (!GetVarint32(&input, &remaining_entries_)) { + if (!msg) { + msg = "remaining entries"; + } + } + break; + + default: + if (tag & kTagSafeIgnoreMask) { + // Tag from future which can be safely ignored. + // The next field must be the length of the entry. + uint32_t field_len; + if (!GetVarint32(&input, &field_len) || + static_cast<size_t>(field_len) > input.size()) { + if (!msg) { + msg = "safely ignoreable tag length error"; + } + } else { + input.remove_prefix(static_cast<size_t>(field_len)); + } + } else { + msg = "unknown tag"; + } + break; + } + } + + if (msg == nullptr && !input.empty()) { + msg = "invalid tag"; + } + + Status result; + if (msg != nullptr) { + result = Status::Corruption("VersionEdit", msg); + } + return result; +} + +std::string VersionEdit::DebugString(bool hex_key) const { + std::string r; + r.append("VersionEdit {"); + if (has_comparator_) { + r.append("\n Comparator: "); + r.append(comparator_); + } + if (has_log_number_) { + r.append("\n LogNumber: "); + AppendNumberTo(&r, log_number_); + } + if (has_prev_log_number_) { + r.append("\n PrevLogNumber: "); + AppendNumberTo(&r, prev_log_number_); + } + if (has_next_file_number_) { + r.append("\n NextFileNumber: "); + AppendNumberTo(&r, next_file_number_); + } + if (has_min_log_number_to_keep_) { + r.append("\n MinLogNumberToKeep: "); + AppendNumberTo(&r, min_log_number_to_keep_); + } + if (has_last_sequence_) { + r.append("\n LastSeq: "); + AppendNumberTo(&r, last_sequence_); + } + for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); + iter != deleted_files_.end(); + ++iter) { + r.append("\n DeleteFile: "); + AppendNumberTo(&r, iter->first); + r.append(" "); + AppendNumberTo(&r, iter->second); + } + for (size_t i = 0; i < new_files_.size(); i++) { + const FileMetaData& f = new_files_[i].second; + r.append("\n AddFile: "); + AppendNumberTo(&r, new_files_[i].first); + r.append(" "); + AppendNumberTo(&r, f.fd.GetNumber()); + r.append(" "); + AppendNumberTo(&r, f.fd.GetFileSize()); + r.append(" "); + r.append(f.smallest.DebugString(hex_key)); + r.append(" .. "); + r.append(f.largest.DebugString(hex_key)); + } + r.append("\n ColumnFamily: "); + AppendNumberTo(&r, column_family_); + if (is_column_family_add_) { + r.append("\n ColumnFamilyAdd: "); + r.append(column_family_name_); + } + if (is_column_family_drop_) { + r.append("\n ColumnFamilyDrop"); + } + if (has_max_column_family_) { + r.append("\n MaxColumnFamily: "); + AppendNumberTo(&r, max_column_family_); + } + if (is_in_atomic_group_) { + r.append("\n AtomicGroup: "); + AppendNumberTo(&r, remaining_entries_); + r.append(" entries remains"); + } + r.append("\n}\n"); + return r; +} + +std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const { + JSONWriter jw; + jw << "EditNumber" << edit_num; + + if (has_comparator_) { + jw << "Comparator" << comparator_; + } + if (has_log_number_) { + jw << "LogNumber" << log_number_; + } + if (has_prev_log_number_) { + jw << "PrevLogNumber" << prev_log_number_; + } + if (has_next_file_number_) { + jw << "NextFileNumber" << next_file_number_; + } + if (has_last_sequence_) { + jw << "LastSeq" << last_sequence_; + } + + if (!deleted_files_.empty()) { + jw << "DeletedFiles"; + jw.StartArray(); + + for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); + iter != deleted_files_.end(); + ++iter) { + jw.StartArrayedObject(); + jw << "Level" << iter->first; + jw << "FileNumber" << iter->second; + jw.EndArrayedObject(); + } + + jw.EndArray(); + } + + if (!new_files_.empty()) { + jw << "AddedFiles"; + jw.StartArray(); + + for (size_t i = 0; i < new_files_.size(); i++) { + jw.StartArrayedObject(); + jw << "Level" << new_files_[i].first; + const FileMetaData& f = new_files_[i].second; + jw << "FileNumber" << f.fd.GetNumber(); + jw << "FileSize" << f.fd.GetFileSize(); + jw << "SmallestIKey" << f.smallest.DebugString(hex_key); + jw << "LargestIKey" << f.largest.DebugString(hex_key); + jw.EndArrayedObject(); + } + + jw.EndArray(); + } + + jw << "ColumnFamily" << column_family_; + + if (is_column_family_add_) { + jw << "ColumnFamilyAdd" << column_family_name_; + } + if (is_column_family_drop_) { + jw << "ColumnFamilyDrop" << column_family_name_; + } + if (has_max_column_family_) { + jw << "MaxColumnFamily" << max_column_family_; + } + if (has_min_log_number_to_keep_) { + jw << "MinLogNumberToKeep" << min_log_number_to_keep_; + } + if (is_in_atomic_group_) { + jw << "AtomicGroup" << remaining_entries_; + } + + jw.EndObject(); + + return jw.Get(); +} + +} // namespace rocksdb |