summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/utilities/transactions/pessimistic_transaction.h
blob: 8f2c844058ce8cfe0a557d9757b03a646179ec9c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).

#pragma once

#ifndef ROCKSDB_LITE

#include <algorithm>
#include <atomic>
#include <mutex>
#include <stack>
#include <string>
#include <unordered_map>
#include <vector>

#include "db/write_callback.h"
#include "rocksdb/db.h"
#include "rocksdb/slice.h"
#include "rocksdb/snapshot.h"
#include "rocksdb/status.h"
#include "rocksdb/types.h"
#include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/transaction_db.h"
#include "rocksdb/utilities/write_batch_with_index.h"
#include "util/autovector.h"
#include "utilities/transactions/transaction_base.h"
#include "utilities/transactions/transaction_util.h"

namespace ROCKSDB_NAMESPACE {

class PessimisticTransactionDB;

// A transaction under pessimistic concurrency control. This class implements
// the locking API and interfaces with the lock manager as well as the
// pessimistic transactional db.
class PessimisticTransaction : public TransactionBaseImpl {
 public:
  PessimisticTransaction(TransactionDB* db, const WriteOptions& write_options,
                         const TransactionOptions& txn_options,
                         const bool init = true);
  // No copying allowed
  PessimisticTransaction(const PessimisticTransaction&) = delete;
  void operator=(const PessimisticTransaction&) = delete;

  virtual ~PessimisticTransaction();

  void Reinitialize(TransactionDB* txn_db, const WriteOptions& write_options,
                    const TransactionOptions& txn_options);

  Status Prepare() override;

  Status Commit() override;

  // It is basically Commit without going through Prepare phase. The write batch
  // is also directly provided instead of expecting txn to gradually batch the
  // transactions writes to an internal write batch.
  Status CommitBatch(WriteBatch* batch);

  Status Rollback() override;

  Status RollbackToSavePoint() override;

  Status SetName(const TransactionName& name) override;

  // Generate a new unique transaction identifier
  static TransactionID GenTxnID();

  TransactionID GetID() const override { return txn_id_; }

  std::vector<TransactionID> GetWaitingTxns(uint32_t* column_family_id,
                                            std::string* key) const override {
    std::lock_guard<std::mutex> lock(wait_mutex_);
    std::vector<TransactionID> ids(waiting_txn_ids_.size());
    if (key) *key = waiting_key_ ? *waiting_key_ : "";
    if (column_family_id) *column_family_id = waiting_cf_id_;
    std::copy(waiting_txn_ids_.begin(), waiting_txn_ids_.end(), ids.begin());
    return ids;
  }

  void SetWaitingTxn(autovector<TransactionID> ids, uint32_t column_family_id,
                     const std::string* key) {
    std::lock_guard<std::mutex> lock(wait_mutex_);
    waiting_txn_ids_ = ids;
    waiting_cf_id_ = column_family_id;
    waiting_key_ = key;
  }

  void ClearWaitingTxn() {
    std::lock_guard<std::mutex> lock(wait_mutex_);
    waiting_txn_ids_.clear();
    waiting_cf_id_ = 0;
    waiting_key_ = nullptr;
  }

  // Returns the time (in microseconds according to Env->GetMicros())
  // that this transaction will be expired.  Returns 0 if this transaction does
  // not expire.
  uint64_t GetExpirationTime() const { return expiration_time_; }

  // returns true if this transaction has an expiration_time and has expired.
  bool IsExpired() const;

  // Returns the number of microseconds a transaction can wait on acquiring a
  // lock or -1 if there is no timeout.
  int64_t GetLockTimeout() const { return lock_timeout_; }
  void SetLockTimeout(int64_t timeout) override {
    lock_timeout_ = timeout * 1000;
  }

  // Returns true if locks were stolen successfully, false otherwise.
  bool TryStealingLocks();

  bool IsDeadlockDetect() const override { return deadlock_detect_; }

  int64_t GetDeadlockDetectDepth() const { return deadlock_detect_depth_; }

 protected:
  // Refer to
  // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery
  bool use_only_the_last_commit_time_batch_for_recovery_ = false;

  virtual Status PrepareInternal() = 0;

  virtual Status CommitWithoutPrepareInternal() = 0;

  // batch_cnt if non-zero is the number of sub-batches. A sub-batch is a batch
  // with no duplicate keys. If zero, then the number of sub-batches is unknown.
  virtual Status CommitBatchInternal(WriteBatch* batch,
                                     size_t batch_cnt = 0) = 0;

  virtual Status CommitInternal() = 0;

  virtual Status RollbackInternal() = 0;

  virtual void Initialize(const TransactionOptions& txn_options);

  Status LockBatch(WriteBatch* batch, TransactionKeyMap* keys_to_unlock);

  Status TryLock(ColumnFamilyHandle* column_family, const Slice& key,
                 bool read_only, bool exclusive, const bool do_validate = true,
                 const bool assume_tracked = false) override;

  void Clear() override;

  PessimisticTransactionDB* txn_db_impl_;
  DBImpl* db_impl_;

  // If non-zero, this transaction should not be committed after this time (in
  // microseconds according to Env->NowMicros())
  uint64_t expiration_time_;

 private:
  friend class TransactionTest_ValidateSnapshotTest_Test;
  // Used to create unique ids for transactions.
  static std::atomic<TransactionID> txn_id_counter_;

  // Unique ID for this transaction
  TransactionID txn_id_;

  // IDs for the transactions that are blocking the current transaction.
  //
  // empty if current transaction is not waiting.
  autovector<TransactionID> waiting_txn_ids_;

  // The following two represents the (cf, key) that a transaction is waiting
  // on.
  //
  // If waiting_key_ is not null, then the pointer should always point to
  // a valid string object. The reason is that it is only non-null when the
  // transaction is blocked in the TransactionLockMgr::AcquireWithTimeout
  // function. At that point, the key string object is one of the function
  // parameters.
  uint32_t waiting_cf_id_;
  const std::string* waiting_key_;

  // Mutex protecting waiting_txn_ids_, waiting_cf_id_ and waiting_key_.
  mutable std::mutex wait_mutex_;

  // Timeout in microseconds when locking a key or -1 if there is no timeout.
  int64_t lock_timeout_;

  // Whether to perform deadlock detection or not.
  bool deadlock_detect_;

  // Whether to perform deadlock detection or not.
  int64_t deadlock_detect_depth_;

  // Refer to TransactionOptions::skip_concurrency_control
  bool skip_concurrency_control_;

  virtual Status ValidateSnapshot(ColumnFamilyHandle* column_family,
                                  const Slice& key,
                                  SequenceNumber* tracked_at_seq);

  void UnlockGetForUpdate(ColumnFamilyHandle* column_family,
                          const Slice& key) override;
};

class WriteCommittedTxn : public PessimisticTransaction {
 public:
  WriteCommittedTxn(TransactionDB* db, const WriteOptions& write_options,
                    const TransactionOptions& txn_options);
  // No copying allowed
  WriteCommittedTxn(const WriteCommittedTxn&) = delete;
  void operator=(const WriteCommittedTxn&) = delete;

  virtual ~WriteCommittedTxn() {}

 private:
  Status PrepareInternal() override;

  Status CommitWithoutPrepareInternal() override;

  Status CommitBatchInternal(WriteBatch* batch, size_t batch_cnt) override;

  Status CommitInternal() override;

  Status RollbackInternal() override;
};

}  // namespace ROCKSDB_NAMESPACE

#endif  // ROCKSDB_LITE