summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/include/rocksdb/write_batch.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/include/rocksdb/write_batch.h')
-rw-r--r--src/rocksdb/include/rocksdb/write_batch.h377
1 files changed, 377 insertions, 0 deletions
diff --git a/src/rocksdb/include/rocksdb/write_batch.h b/src/rocksdb/include/rocksdb/write_batch.h
new file mode 100644
index 000000000..51fd4d8ac
--- /dev/null
+++ b/src/rocksdb/include/rocksdb/write_batch.h
@@ -0,0 +1,377 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch holds a collection of updates to apply atomically to a DB.
+//
+// The updates are applied in the order in which they are added
+// to the WriteBatch. For example, the value of "key" will be "v3"
+// after the following batch is written:
+//
+// batch.Put("key", "v1");
+// batch.Delete("key");
+// batch.Put("key", "v2");
+// batch.Put("key", "v3");
+//
+// Multiple threads can invoke const methods on a WriteBatch without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same WriteBatch must use
+// external synchronization.
+
+#pragma once
+
+#include <stdint.h>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <vector>
+#include "rocksdb/status.h"
+#include "rocksdb/write_batch_base.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class Slice;
+class ColumnFamilyHandle;
+struct SavePoints;
+struct SliceParts;
+
+struct SavePoint {
+ size_t size; // size of rep_
+ int count; // count of elements in rep_
+ uint32_t content_flags;
+
+ SavePoint() : size(0), count(0), content_flags(0) {}
+
+ SavePoint(size_t _size, int _count, uint32_t _flags)
+ : size(_size), count(_count), content_flags(_flags) {}
+
+ void clear() {
+ size = 0;
+ count = 0;
+ content_flags = 0;
+ }
+
+ bool is_cleared() const { return (size | count | content_flags) == 0; }
+};
+
+class WriteBatch : public WriteBatchBase {
+ public:
+ explicit WriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0);
+ explicit WriteBatch(size_t reserved_bytes, size_t max_bytes, size_t ts_sz);
+ ~WriteBatch() override;
+
+ using WriteBatchBase::Put;
+ // Store the mapping "key->value" in the database.
+ Status Put(ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value) override;
+ Status Put(const Slice& key, const Slice& value) override {
+ return Put(nullptr, key, value);
+ }
+
+ // Variant of Put() that gathers output like writev(2). The key and value
+ // that will be written to the database are concatenations of arrays of
+ // slices.
+ Status Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+ const SliceParts& value) override;
+ Status Put(const SliceParts& key, const SliceParts& value) override {
+ return Put(nullptr, key, value);
+ }
+
+ using WriteBatchBase::Delete;
+ // If the database contains a mapping for "key", erase it. Else do nothing.
+ Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
+ Status Delete(const Slice& key) override { return Delete(nullptr, key); }
+
+ // variant that takes SliceParts
+ Status Delete(ColumnFamilyHandle* column_family,
+ const SliceParts& key) override;
+ Status Delete(const SliceParts& key) override { return Delete(nullptr, key); }
+
+ using WriteBatchBase::SingleDelete;
+ // WriteBatch implementation of DB::SingleDelete(). See db.h.
+ Status SingleDelete(ColumnFamilyHandle* column_family,
+ const Slice& key) override;
+ Status SingleDelete(const Slice& key) override {
+ return SingleDelete(nullptr, key);
+ }
+
+ // variant that takes SliceParts
+ Status SingleDelete(ColumnFamilyHandle* column_family,
+ const SliceParts& key) override;
+ Status SingleDelete(const SliceParts& key) override {
+ return SingleDelete(nullptr, key);
+ }
+
+ using WriteBatchBase::DeleteRange;
+ // WriteBatch implementation of DB::DeleteRange(). See db.h.
+ Status DeleteRange(ColumnFamilyHandle* column_family, const Slice& begin_key,
+ const Slice& end_key) override;
+ Status DeleteRange(const Slice& begin_key, const Slice& end_key) override {
+ return DeleteRange(nullptr, begin_key, end_key);
+ }
+
+ // variant that takes SliceParts
+ Status DeleteRange(ColumnFamilyHandle* column_family,
+ const SliceParts& begin_key,
+ const SliceParts& end_key) override;
+ Status DeleteRange(const SliceParts& begin_key,
+ const SliceParts& end_key) override {
+ return DeleteRange(nullptr, begin_key, end_key);
+ }
+
+ using WriteBatchBase::Merge;
+ // Merge "value" with the existing value of "key" in the database.
+ // "key->merge(existing, value)"
+ Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value) override;
+ Status Merge(const Slice& key, const Slice& value) override {
+ return Merge(nullptr, key, value);
+ }
+
+ // variant that takes SliceParts
+ Status Merge(ColumnFamilyHandle* column_family, const SliceParts& key,
+ const SliceParts& value) override;
+ Status Merge(const SliceParts& key, const SliceParts& value) override {
+ return Merge(nullptr, key, value);
+ }
+
+ using WriteBatchBase::PutLogData;
+ // Append a blob of arbitrary size to the records in this batch. The blob will
+ // be stored in the transaction log but not in any other file. In particular,
+ // it will not be persisted to the SST files. When iterating over this
+ // WriteBatch, WriteBatch::Handler::LogData will be called with the contents
+ // of the blob as it is encountered. Blobs, puts, deletes, and merges will be
+ // encountered in the same order in which they were inserted. The blob will
+ // NOT consume sequence number(s) and will NOT increase the count of the batch
+ //
+ // Example application: add timestamps to the transaction log for use in
+ // replication.
+ Status PutLogData(const Slice& blob) override;
+
+ using WriteBatchBase::Clear;
+ // Clear all updates buffered in this batch.
+ void Clear() override;
+
+ // Records the state of the batch for future calls to RollbackToSavePoint().
+ // May be called multiple times to set multiple save points.
+ void SetSavePoint() override;
+
+ // Remove all entries in this batch (Put, Merge, Delete, PutLogData) since the
+ // most recent call to SetSavePoint() and removes the most recent save point.
+ // If there is no previous call to SetSavePoint(), Status::NotFound()
+ // will be returned.
+ // Otherwise returns Status::OK().
+ Status RollbackToSavePoint() override;
+
+ // Pop the most recent save point.
+ // If there is no previous call to SetSavePoint(), Status::NotFound()
+ // will be returned.
+ // Otherwise returns Status::OK().
+ Status PopSavePoint() override;
+
+ // Support for iterating over the contents of a batch.
+ class Handler {
+ public:
+ virtual ~Handler();
+ // All handler functions in this class provide default implementations so
+ // we won't break existing clients of Handler on a source code level when
+ // adding a new member function.
+
+ // default implementation will just call Put without column family for
+ // backwards compatibility. If the column family is not default,
+ // the function is noop
+ virtual Status PutCF(uint32_t column_family_id, const Slice& key,
+ const Slice& value) {
+ if (column_family_id == 0) {
+ // Put() historically doesn't return status. We didn't want to be
+ // backwards incompatible so we didn't change the return status
+ // (this is a public API). We do an ordinary get and return Status::OK()
+ Put(key, value);
+ return Status::OK();
+ }
+ return Status::InvalidArgument(
+ "non-default column family and PutCF not implemented");
+ }
+ virtual void Put(const Slice& /*key*/, const Slice& /*value*/) {}
+
+ virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) {
+ if (column_family_id == 0) {
+ Delete(key);
+ return Status::OK();
+ }
+ return Status::InvalidArgument(
+ "non-default column family and DeleteCF not implemented");
+ }
+ virtual void Delete(const Slice& /*key*/) {}
+
+ virtual Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) {
+ if (column_family_id == 0) {
+ SingleDelete(key);
+ return Status::OK();
+ }
+ return Status::InvalidArgument(
+ "non-default column family and SingleDeleteCF not implemented");
+ }
+ virtual void SingleDelete(const Slice& /*key*/) {}
+
+ virtual Status DeleteRangeCF(uint32_t /*column_family_id*/,
+ const Slice& /*begin_key*/,
+ const Slice& /*end_key*/) {
+ return Status::InvalidArgument("DeleteRangeCF not implemented");
+ }
+
+ virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
+ const Slice& value) {
+ if (column_family_id == 0) {
+ Merge(key, value);
+ return Status::OK();
+ }
+ return Status::InvalidArgument(
+ "non-default column family and MergeCF not implemented");
+ }
+ virtual void Merge(const Slice& /*key*/, const Slice& /*value*/) {}
+
+ virtual Status PutBlobIndexCF(uint32_t /*column_family_id*/,
+ const Slice& /*key*/,
+ const Slice& /*value*/) {
+ return Status::InvalidArgument("PutBlobIndexCF not implemented");
+ }
+
+ // The default implementation of LogData does nothing.
+ virtual void LogData(const Slice& blob);
+
+ virtual Status MarkBeginPrepare(bool = false) {
+ return Status::InvalidArgument("MarkBeginPrepare() handler not defined.");
+ }
+
+ virtual Status MarkEndPrepare(const Slice& /*xid*/) {
+ return Status::InvalidArgument("MarkEndPrepare() handler not defined.");
+ }
+
+ virtual Status MarkNoop(bool /*empty_batch*/) {
+ return Status::InvalidArgument("MarkNoop() handler not defined.");
+ }
+
+ virtual Status MarkRollback(const Slice& /*xid*/) {
+ return Status::InvalidArgument(
+ "MarkRollbackPrepare() handler not defined.");
+ }
+
+ virtual Status MarkCommit(const Slice& /*xid*/) {
+ return Status::InvalidArgument("MarkCommit() handler not defined.");
+ }
+
+ // Continue is called by WriteBatch::Iterate. If it returns false,
+ // iteration is halted. Otherwise, it continues iterating. The default
+ // implementation always returns true.
+ virtual bool Continue();
+
+ protected:
+ friend class WriteBatchInternal;
+ virtual bool WriteAfterCommit() const { return true; }
+ virtual bool WriteBeforePrepare() const { return false; }
+ };
+ Status Iterate(Handler* handler) const;
+
+ // Retrieve the serialized version of this batch.
+ const std::string& Data() const { return rep_; }
+
+ // Retrieve data size of the batch.
+ size_t GetDataSize() const { return rep_.size(); }
+
+ // Returns the number of updates in the batch
+ uint32_t Count() const;
+
+ // Returns true if PutCF will be called during Iterate
+ bool HasPut() const;
+
+ // Returns true if DeleteCF will be called during Iterate
+ bool HasDelete() const;
+
+ // Returns true if SingleDeleteCF will be called during Iterate
+ bool HasSingleDelete() const;
+
+ // Returns true if DeleteRangeCF will be called during Iterate
+ bool HasDeleteRange() const;
+
+ // Returns true if MergeCF will be called during Iterate
+ bool HasMerge() const;
+
+ // Returns true if MarkBeginPrepare will be called during Iterate
+ bool HasBeginPrepare() const;
+
+ // Returns true if MarkEndPrepare will be called during Iterate
+ bool HasEndPrepare() const;
+
+ // Returns trie if MarkCommit will be called during Iterate
+ bool HasCommit() const;
+
+ // Returns trie if MarkRollback will be called during Iterate
+ bool HasRollback() const;
+
+ // Assign timestamp to write batch
+ Status AssignTimestamp(const Slice& ts);
+
+ // Assign timestamps to write batch
+ Status AssignTimestamps(const std::vector<Slice>& ts_list);
+
+ using WriteBatchBase::GetWriteBatch;
+ WriteBatch* GetWriteBatch() override { return this; }
+
+ // Constructor with a serialized string object
+ explicit WriteBatch(const std::string& rep);
+ explicit WriteBatch(std::string&& rep);
+
+ WriteBatch(const WriteBatch& src);
+ WriteBatch(WriteBatch&& src) noexcept;
+ WriteBatch& operator=(const WriteBatch& src);
+ WriteBatch& operator=(WriteBatch&& src);
+
+ // marks this point in the WriteBatch as the last record to
+ // be inserted into the WAL, provided the WAL is enabled
+ void MarkWalTerminationPoint();
+ const SavePoint& GetWalTerminationPoint() const { return wal_term_point_; }
+
+ void SetMaxBytes(size_t max_bytes) override { max_bytes_ = max_bytes; }
+
+ private:
+ friend class WriteBatchInternal;
+ friend class LocalSavePoint;
+ // TODO(myabandeh): this is needed for a hack to collapse the write batch and
+ // remove duplicate keys. Remove it when the hack is replaced with a proper
+ // solution.
+ friend class WriteBatchWithIndex;
+ std::unique_ptr<SavePoints> save_points_;
+
+ // When sending a WriteBatch through WriteImpl we might want to
+ // specify that only the first x records of the batch be written to
+ // the WAL.
+ SavePoint wal_term_point_;
+
+ // For HasXYZ. Mutable to allow lazy computation of results
+ mutable std::atomic<uint32_t> content_flags_;
+
+ // Performs deferred computation of content_flags if necessary
+ uint32_t ComputeContentFlags() const;
+
+ // Maximum size of rep_.
+ size_t max_bytes_;
+
+ // Is the content of the batch the application's latest state that meant only
+ // to be used for recovery? Refer to
+ // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery for
+ // more details.
+ bool is_latest_persistent_state_ = false;
+
+ protected:
+ std::string rep_; // See comment in write_batch.cc for the format of rep_
+ const size_t timestamp_size_;
+
+ // Intentionally copyable
+};
+
+} // namespace ROCKSDB_NAMESPACE