summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/trace_replay/trace_replay.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rocksdb/trace_replay/trace_replay.h183
1 files changed, 183 insertions, 0 deletions
diff --git a/src/rocksdb/trace_replay/trace_replay.h b/src/rocksdb/trace_replay/trace_replay.h
new file mode 100644
index 000000000..9aba5ceb7
--- /dev/null
+++ b/src/rocksdb/trace_replay/trace_replay.h
@@ -0,0 +1,183 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+
+#include "rocksdb/options.h"
+#include "rocksdb/rocksdb_namespace.h"
+#include "rocksdb/status.h"
+#include "rocksdb/trace_record.h"
+#include "rocksdb/utilities/replayer.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// This file contains Tracer and Replayer classes that enable capturing and
+// replaying RocksDB traces.
+
+class ColumnFamilyHandle;
+class ColumnFamilyData;
+class DB;
+class DBImpl;
+class Env;
+class Slice;
+class SystemClock;
+class TraceReader;
+class TraceWriter;
+class WriteBatch;
+
+struct ReadOptions;
+struct TraceOptions;
+struct WriteOptions;
+
+extern const std::string kTraceMagic;
+const unsigned int kTraceTimestampSize = 8;
+const unsigned int kTraceTypeSize = 1;
+const unsigned int kTracePayloadLengthSize = 4;
+const unsigned int kTraceMetadataSize =
+ kTraceTimestampSize + kTraceTypeSize + kTracePayloadLengthSize;
+
+static const int kTraceFileMajorVersion = 0;
+static const int kTraceFileMinorVersion = 2;
+
+// The data structure that defines a single trace.
+struct Trace {
+ uint64_t ts; // timestamp
+ TraceType type;
+ // Each bit in payload_map stores which corresponding struct member added in
+ // the payload. Each TraceType has its corresponding payload struct. For
+ // example, if bit at position 0 is set in write payload, then the write batch
+ // will be addedd.
+ uint64_t payload_map = 0;
+ // Each trace type has its own payload_struct, which will be serilized in the
+ // payload.
+ std::string payload;
+
+ void reset() {
+ ts = 0;
+ type = kTraceMax;
+ payload_map = 0;
+ payload.clear();
+ }
+};
+
+enum TracePayloadType : char {
+ // Each member of all query payload structs should have a corresponding flag
+ // here. Make sure to add them sequentially in the order of it is added.
+ kEmptyPayload = 0,
+ kWriteBatchData = 1,
+ kGetCFID = 2,
+ kGetKey = 3,
+ kIterCFID = 4,
+ kIterKey = 5,
+ kIterLowerBound = 6,
+ kIterUpperBound = 7,
+ kMultiGetSize = 8,
+ kMultiGetCFIDs = 9,
+ kMultiGetKeys = 10,
+};
+
+class TracerHelper {
+ public:
+ // Parse the string with major and minor version only
+ static Status ParseVersionStr(std::string& v_string, int* v_num);
+
+ // Parse the trace file version and db version in trace header
+ static Status ParseTraceHeader(const Trace& header, int* trace_version,
+ int* db_version);
+
+ // Encode a version 0.1 trace object into the given string.
+ static void EncodeTrace(const Trace& trace, std::string* encoded_trace);
+
+ // Decode a string into the given trace object.
+ static Status DecodeTrace(const std::string& encoded_trace, Trace* trace);
+
+ // Decode a string into the given trace header.
+ static Status DecodeHeader(const std::string& encoded_trace, Trace* header);
+
+ // Set the payload map based on the payload type
+ static bool SetPayloadMap(uint64_t& payload_map,
+ const TracePayloadType payload_type);
+
+ // Decode a Trace object into the corresponding TraceRecord.
+ // Return Status::OK() if nothing is wrong, record will be set accordingly.
+ // Return Status::NotSupported() if the trace type is not support, or the
+ // corresponding error status, record will be set to nullptr.
+ static Status DecodeTraceRecord(Trace* trace, int trace_file_version,
+ std::unique_ptr<TraceRecord>* record);
+};
+
+// Tracer captures all RocksDB operations using a user-provided TraceWriter.
+// Every RocksDB operation is written as a single trace. Each trace will have a
+// timestamp and type, followed by the trace payload.
+class Tracer {
+ public:
+ Tracer(SystemClock* clock, const TraceOptions& trace_options,
+ std::unique_ptr<TraceWriter>&& trace_writer);
+ ~Tracer();
+
+ // Trace all write operations -- Put, Merge, Delete, SingleDelete, Write
+ Status Write(WriteBatch* write_batch);
+
+ // Trace Get operations.
+ Status Get(ColumnFamilyHandle* cfname, const Slice& key);
+
+ // Trace Iterators.
+ Status IteratorSeek(const uint32_t& cf_id, const Slice& key,
+ const Slice& lower_bound, const Slice upper_bound);
+ Status IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
+ const Slice& lower_bound, const Slice upper_bound);
+
+ // Trace MultiGet
+
+ Status MultiGet(const size_t num_keys, ColumnFamilyHandle** column_families,
+ const Slice* keys);
+
+ Status MultiGet(const size_t num_keys, ColumnFamilyHandle* column_family,
+ const Slice* keys);
+
+ Status MultiGet(const std::vector<ColumnFamilyHandle*>& column_family,
+ const std::vector<Slice>& keys);
+
+ // Returns true if the trace is over the configured max trace file limit.
+ // False otherwise.
+ bool IsTraceFileOverMax();
+
+ // Returns true if the order of write trace records must match the order of
+ // the corresponding records logged to WAL and applied to the DB.
+ bool IsWriteOrderPreserved() { return trace_options_.preserve_write_order; }
+
+ // Writes a trace footer at the end of the tracing
+ Status Close();
+
+ private:
+ // Write a trace header at the beginning, typically on initiating a trace,
+ // with some metadata like a magic number, trace version, RocksDB version, and
+ // trace format.
+ Status WriteHeader();
+
+ // Write a trace footer, typically on ending a trace, with some metadata.
+ Status WriteFooter();
+
+ // Write a single trace using the provided TraceWriter to the underlying
+ // system, say, a filesystem or a streaming service.
+ Status WriteTrace(const Trace& trace);
+
+ // Helps in filtering and sampling of traces.
+ // Returns true if a trace should be skipped, false otherwise.
+ bool ShouldSkipTrace(const TraceType& type);
+
+ SystemClock* clock_;
+ TraceOptions trace_options_;
+ std::unique_ptr<TraceWriter> trace_writer_;
+ uint64_t trace_request_count_;
+};
+
+} // namespace ROCKSDB_NAMESPACE