From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/rocksdb/db/snapshot_impl.h | 239 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 src/rocksdb/db/snapshot_impl.h (limited to 'src/rocksdb/db/snapshot_impl.h') diff --git a/src/rocksdb/db/snapshot_impl.h b/src/rocksdb/db/snapshot_impl.h new file mode 100644 index 000000000..23e5e98cd --- /dev/null +++ b/src/rocksdb/db/snapshot_impl.h @@ -0,0 +1,239 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once +#include + +#include "db/dbformat.h" +#include "rocksdb/db.h" +#include "util/autovector.h" + +namespace ROCKSDB_NAMESPACE { + +class SnapshotList; + +// Snapshots are kept in a doubly-linked list in the DB. +// Each SnapshotImpl corresponds to a particular sequence number. +class SnapshotImpl : public Snapshot { + public: + SequenceNumber number_; // const after creation + // It indicates the smallest uncommitted data at the time the snapshot was + // taken. This is currently used by WritePrepared transactions to limit the + // scope of queries to IsInSnapshot. + SequenceNumber min_uncommitted_ = kMinUnCommittedSeq; + + SequenceNumber GetSequenceNumber() const override { return number_; } + + int64_t GetUnixTime() const override { return unix_time_; } + + uint64_t GetTimestamp() const override { return timestamp_; } + + private: + friend class SnapshotList; + + // SnapshotImpl is kept in a doubly-linked circular list + SnapshotImpl* prev_; + SnapshotImpl* next_; + + SnapshotList* list_; // just for sanity checks + + int64_t unix_time_; + + uint64_t timestamp_; + + // Will this snapshot be used by a Transaction to do write-conflict checking? + bool is_write_conflict_boundary_; +}; + +class SnapshotList { + public: + SnapshotList() { + list_.prev_ = &list_; + list_.next_ = &list_; + list_.number_ = 0xFFFFFFFFL; // placeholder marker, for debugging + // Set all the variables to make UBSAN happy. + list_.list_ = nullptr; + list_.unix_time_ = 0; + list_.timestamp_ = 0; + list_.is_write_conflict_boundary_ = false; + count_ = 0; + } + + // No copy-construct. + SnapshotList(const SnapshotList&) = delete; + + bool empty() const { + assert(list_.next_ != &list_ || 0 == count_); + return list_.next_ == &list_; + } + SnapshotImpl* oldest() const { + assert(!empty()); + return list_.next_; + } + SnapshotImpl* newest() const { + assert(!empty()); + return list_.prev_; + } + + SnapshotImpl* New(SnapshotImpl* s, SequenceNumber seq, uint64_t unix_time, + bool is_write_conflict_boundary, + uint64_t ts = std::numeric_limits::max()) { + s->number_ = seq; + s->unix_time_ = unix_time; + s->timestamp_ = ts; + s->is_write_conflict_boundary_ = is_write_conflict_boundary; + s->list_ = this; + s->next_ = &list_; + s->prev_ = list_.prev_; + s->prev_->next_ = s; + s->next_->prev_ = s; + count_++; + return s; + } + + // Do not responsible to free the object. + void Delete(const SnapshotImpl* s) { + assert(s->list_ == this); + s->prev_->next_ = s->next_; + s->next_->prev_ = s->prev_; + count_--; + } + + // retrieve all snapshot numbers up until max_seq. They are sorted in + // ascending order (with no duplicates). + std::vector GetAll( + SequenceNumber* oldest_write_conflict_snapshot = nullptr, + const SequenceNumber& max_seq = kMaxSequenceNumber) const { + std::vector ret; + GetAll(&ret, oldest_write_conflict_snapshot, max_seq); + return ret; + } + + void GetAll(std::vector* snap_vector, + SequenceNumber* oldest_write_conflict_snapshot = nullptr, + const SequenceNumber& max_seq = kMaxSequenceNumber) const { + std::vector& ret = *snap_vector; + // So far we have no use case that would pass a non-empty vector + assert(ret.size() == 0); + + if (oldest_write_conflict_snapshot != nullptr) { + *oldest_write_conflict_snapshot = kMaxSequenceNumber; + } + + if (empty()) { + return; + } + const SnapshotImpl* s = &list_; + while (s->next_ != &list_) { + if (s->next_->number_ > max_seq) { + break; + } + // Avoid duplicates + if (ret.empty() || ret.back() != s->next_->number_) { + ret.push_back(s->next_->number_); + } + + if (oldest_write_conflict_snapshot != nullptr && + *oldest_write_conflict_snapshot == kMaxSequenceNumber && + s->next_->is_write_conflict_boundary_) { + // If this is the first write-conflict boundary snapshot in the list, + // it is the oldest + *oldest_write_conflict_snapshot = s->next_->number_; + } + + s = s->next_; + } + return; + } + + // get the sequence number of the most recent snapshot + SequenceNumber GetNewest() { + if (empty()) { + return 0; + } + return newest()->number_; + } + + int64_t GetOldestSnapshotTime() const { + if (empty()) { + return 0; + } else { + return oldest()->unix_time_; + } + } + + int64_t GetOldestSnapshotSequence() const { + if (empty()) { + return 0; + } else { + return oldest()->GetSequenceNumber(); + } + } + + uint64_t count() const { return count_; } + + private: + // Dummy head of doubly-linked list of snapshots + SnapshotImpl list_; + uint64_t count_; +}; + +// All operations on TimestampedSnapshotList must be protected by db mutex. +class TimestampedSnapshotList { + public: + explicit TimestampedSnapshotList() = default; + + std::shared_ptr GetSnapshot(uint64_t ts) const { + if (ts == std::numeric_limits::max() && !snapshots_.empty()) { + auto it = snapshots_.rbegin(); + assert(it != snapshots_.rend()); + return it->second; + } + auto it = snapshots_.find(ts); + if (it == snapshots_.end()) { + return std::shared_ptr(); + } + return it->second; + } + + void GetSnapshots( + uint64_t ts_lb, uint64_t ts_ub, + std::vector>& snapshots) const { + assert(ts_lb < ts_ub); + auto it_low = snapshots_.lower_bound(ts_lb); + auto it_high = snapshots_.lower_bound(ts_ub); + for (auto it = it_low; it != it_high; ++it) { + snapshots.emplace_back(it->second); + } + } + + void AddSnapshot(const std::shared_ptr& snapshot) { + assert(snapshot); + snapshots_.try_emplace(snapshot->GetTimestamp(), snapshot); + } + + // snapshots_to_release: the container to where the timestamped snapshots will + // be moved so that it retains the last reference to the snapshots and the + // snapshots won't be actually released which requires db mutex. The + // snapshots will be released by caller of ReleaseSnapshotsOlderThan(). + void ReleaseSnapshotsOlderThan( + uint64_t ts, + autovector>& snapshots_to_release) { + auto ub = snapshots_.lower_bound(ts); + for (auto it = snapshots_.begin(); it != ub; ++it) { + snapshots_to_release.emplace_back(it->second); + } + snapshots_.erase(snapshots_.begin(), ub); + } + + private: + std::map> snapshots_; +}; + +} // namespace ROCKSDB_NAMESPACE -- cgit v1.2.3