summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/trace_replay/io_tracer.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rocksdb/trace_replay/io_tracer.h185
1 files changed, 185 insertions, 0 deletions
diff --git a/src/rocksdb/trace_replay/io_tracer.h b/src/rocksdb/trace_replay/io_tracer.h
new file mode 100644
index 000000000..3fc7cdba0
--- /dev/null
+++ b/src/rocksdb/trace_replay/io_tracer.h
@@ -0,0 +1,185 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <fstream>
+
+#include "monitoring/instrumented_mutex.h"
+#include "port/lang.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/options.h"
+#include "rocksdb/trace_record.h"
+#include "trace_replay/trace_replay.h"
+
+namespace ROCKSDB_NAMESPACE {
+class SystemClock;
+class TraceReader;
+class TraceWriter;
+
+/* In order to log new data in trace record for specified operations, do
+ following:
+ 1. Add new data in IOTraceOP (say kIONewData= 3)
+ 2. Log it in IOTraceWriter::WriteIOOp, and read that in
+ IOTraceReader::ReadIOOp and
+ IOTraceRecordParser::PrintHumanReadableIOTraceRecord in the switch case.
+ 3. In the FileSystemTracer APIs where this data will be logged with, update
+ io_op_data |= (1 << IOTraceOp::kIONewData).
+*/
+enum IOTraceOp : char {
+ // The value of each enum represents the bitwise position for
+ // IOTraceRecord.io_op_data.
+ kIOFileSize = 0,
+ kIOLen = 1,
+ kIOOffset = 2,
+};
+
+struct IOTraceRecord {
+ // Required fields for all accesses.
+ uint64_t access_timestamp = 0;
+ TraceType trace_type = TraceType::kTraceMax;
+ // Each bit in io_op_data stores which corresponding info from IOTraceOp will
+ // be added in the trace. Foreg, if bit at position 1 is set then
+ // IOTraceOp::kIOLen (length) will be logged in the record.
+ uint64_t io_op_data = 0;
+ std::string file_operation;
+ uint64_t latency = 0;
+ std::string io_status;
+ // Stores file name instead of full path.
+ std::string file_name;
+
+ // Fields added to record based on IO operation.
+ uint64_t len = 0;
+ uint64_t offset = 0;
+ uint64_t file_size = 0;
+
+ // Additional information passed in IODebugContext.
+ uint64_t trace_data = 0;
+ std::string request_id;
+
+ IOTraceRecord() {}
+
+ IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
+ const uint64_t& _io_op_data, const std::string& _file_operation,
+ const uint64_t& _latency, const std::string& _io_status,
+ const std::string& _file_name, const uint64_t& _file_size = 0)
+ : access_timestamp(_access_timestamp),
+ trace_type(_trace_type),
+ io_op_data(_io_op_data),
+ file_operation(_file_operation),
+ latency(_latency),
+ io_status(_io_status),
+ file_name(_file_name),
+ file_size(_file_size) {}
+
+ IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
+ const uint64_t& _io_op_data, const std::string& _file_operation,
+ const uint64_t& _latency, const std::string& _io_status,
+ const std::string& _file_name, const uint64_t& _len,
+ const uint64_t& _offset)
+ : access_timestamp(_access_timestamp),
+ trace_type(_trace_type),
+ io_op_data(_io_op_data),
+ file_operation(_file_operation),
+ latency(_latency),
+ io_status(_io_status),
+ file_name(_file_name),
+ len(_len),
+ offset(_offset) {}
+};
+
+struct IOTraceHeader {
+ uint64_t start_time;
+ uint32_t rocksdb_major_version;
+ uint32_t rocksdb_minor_version;
+};
+
+// IOTraceWriter writes IO operation as a single trace. Each trace will have a
+// timestamp and type, followed by the trace payload.
+class IOTraceWriter {
+ public:
+ IOTraceWriter(SystemClock* clock, const TraceOptions& trace_options,
+ std::unique_ptr<TraceWriter>&& trace_writer);
+ ~IOTraceWriter() = default;
+ // No copy and move.
+ IOTraceWriter(const IOTraceWriter&) = delete;
+ IOTraceWriter& operator=(const IOTraceWriter&) = delete;
+ IOTraceWriter(IOTraceWriter&&) = delete;
+ IOTraceWriter& operator=(IOTraceWriter&&) = delete;
+
+ Status WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg);
+
+ // Write a trace header at the beginning, typically on initiating a trace,
+ // with some metadata like a magic number and RocksDB version.
+ Status WriteHeader();
+
+ private:
+ SystemClock* clock_;
+ TraceOptions trace_options_;
+ std::unique_ptr<TraceWriter> trace_writer_;
+};
+
+// IOTraceReader helps read the trace file generated by IOTraceWriter.
+class IOTraceReader {
+ public:
+ explicit IOTraceReader(std::unique_ptr<TraceReader>&& reader);
+ ~IOTraceReader() = default;
+ // No copy and move.
+ IOTraceReader(const IOTraceReader&) = delete;
+ IOTraceReader& operator=(const IOTraceReader&) = delete;
+ IOTraceReader(IOTraceReader&&) = delete;
+ IOTraceReader& operator=(IOTraceReader&&) = delete;
+
+ Status ReadHeader(IOTraceHeader* header);
+
+ Status ReadIOOp(IOTraceRecord* record);
+
+ private:
+ std::unique_ptr<TraceReader> trace_reader_;
+};
+
+// An IO tracer. It uses IOTraceWriter to write the access record to the
+// trace file.
+class IOTracer {
+ public:
+ IOTracer();
+ ~IOTracer();
+ // No copy and move.
+ IOTracer(const IOTracer&) = delete;
+ IOTracer& operator=(const IOTracer&) = delete;
+ IOTracer(IOTracer&&) = delete;
+ IOTracer& operator=(IOTracer&&) = delete;
+
+ // no_sanitize is added for tracing_enabled. writer_ is protected under mutex
+ // so even if user call Start/EndIOTrace and tracing_enabled is not updated in
+ // the meanwhile, WriteIOOp will anyways check the writer_ protected under
+ // mutex and ignore the operation if writer_is null. So its ok if
+ // tracing_enabled shows non updated value.
+
+ // Start writing IO operations to the trace_writer.
+ TSAN_SUPPRESSION Status
+ StartIOTrace(SystemClock* clock, const TraceOptions& trace_options,
+ std::unique_ptr<TraceWriter>&& trace_writer);
+
+ // Stop writing IO operations to the trace_writer.
+ TSAN_SUPPRESSION void EndIOTrace();
+
+ TSAN_SUPPRESSION bool is_tracing_enabled() const { return tracing_enabled; }
+
+ void WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg);
+
+ private:
+ TraceOptions trace_options_;
+ // A mutex protects the writer_.
+ InstrumentedMutex trace_writer_mutex_;
+ std::atomic<IOTraceWriter*> writer_;
+ // bool tracing_enabled is added to avoid costly operation of checking atomic
+ // variable 'writer_' is nullptr or not in is_tracing_enabled().
+ // is_tracing_enabled() is invoked multiple times by FileSystem classes.
+ bool tracing_enabled;
+};
+
+} // namespace ROCKSDB_NAMESPACE