summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/trace_replay/block_cache_tracer.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/trace_replay/block_cache_tracer.h')
-rw-r--r--src/rocksdb/trace_replay/block_cache_tracer.h294
1 files changed, 294 insertions, 0 deletions
diff --git a/src/rocksdb/trace_replay/block_cache_tracer.h b/src/rocksdb/trace_replay/block_cache_tracer.h
new file mode 100644
index 000000000..5849273dc
--- /dev/null
+++ b/src/rocksdb/trace_replay/block_cache_tracer.h
@@ -0,0 +1,294 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#include <atomic>
+#include <fstream>
+
+#include "monitoring/instrumented_mutex.h"
+#include "rocksdb/env.h"
+#include "rocksdb/options.h"
+#include "rocksdb/trace_reader_writer.h"
+#include "table/table_reader_caller.h"
+#include "trace_replay/trace_replay.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+extern const uint64_t kMicrosInSecond;
+extern const uint64_t kSecondInMinute;
+extern const uint64_t kSecondInHour;
+
+struct BlockCacheTraceRecord;
+
+class BlockCacheTraceHelper {
+ public:
+ static bool IsGetOrMultiGetOnDataBlock(TraceType block_type,
+ TableReaderCaller caller);
+ static bool IsGetOrMultiGet(TableReaderCaller caller);
+ static bool IsUserAccess(TableReaderCaller caller);
+ // Row key is a concatenation of the access's fd_number and the referenced
+ // user key.
+ static std::string ComputeRowKey(const BlockCacheTraceRecord& access);
+ // The first four bytes of the referenced key in a Get request is the table
+ // id.
+ static uint64_t GetTableId(const BlockCacheTraceRecord& access);
+ // The sequence number of a get request is the last part of the referenced
+ // key.
+ static uint64_t GetSequenceNumber(const BlockCacheTraceRecord& access);
+ // Block offset in a file is the last varint64 in the block key.
+ static uint64_t GetBlockOffsetInFile(const BlockCacheTraceRecord& access);
+
+ static const std::string kUnknownColumnFamilyName;
+ static const uint64_t kReservedGetId;
+};
+
+// Lookup context for tracing block cache accesses.
+// We trace block accesses at five places:
+// 1. BlockBasedTable::GetFilter
+// 2. BlockBasedTable::GetUncompressedDict.
+// 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index,
+// and range deletion block.)
+// 4. BlockBasedTable::Get. (To trace the referenced key and whether the
+// referenced key exists in a fetched data block.)
+// 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the
+// referenced key exists in a fetched data block.)
+// The context is created at:
+// 1. BlockBasedTable::Get. (kUserGet)
+// 2. BlockBasedTable::MultiGet. (kUserMGet)
+// 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or
+// external SST ingestion calls this function.)
+// 4. BlockBasedTable::Open. (kPrefetch)
+// 5. Index/Filter::CacheDependencies. (kPrefetch)
+// 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or
+// kUserApproximateSize).
+struct BlockCacheLookupContext {
+ BlockCacheLookupContext(const TableReaderCaller& _caller) : caller(_caller) {}
+ BlockCacheLookupContext(const TableReaderCaller& _caller, uint64_t _get_id,
+ bool _get_from_user_specified_snapshot)
+ : caller(_caller),
+ get_id(_get_id),
+ get_from_user_specified_snapshot(_get_from_user_specified_snapshot) {}
+ const TableReaderCaller caller;
+ // These are populated when we perform lookup/insert on block cache. The block
+ // cache tracer uses these inforation when logging the block access at
+ // BlockBasedTable::GET and BlockBasedTable::MultiGet.
+ bool is_cache_hit = false;
+ bool no_insert = false;
+ TraceType block_type = TraceType::kTraceMax;
+ uint64_t block_size = 0;
+ std::string block_key;
+ uint64_t num_keys_in_block = 0;
+ // The unique id associated with Get and MultiGet. This enables us to track
+ // how many blocks a Get/MultiGet request accesses. We can also measure the
+ // impact of row cache vs block cache.
+ uint64_t get_id = 0;
+ std::string referenced_key;
+ bool get_from_user_specified_snapshot = false;
+
+ void FillLookupContext(bool _is_cache_hit, bool _no_insert,
+ TraceType _block_type, uint64_t _block_size,
+ const std::string& _block_key,
+ uint64_t _num_keys_in_block) {
+ is_cache_hit = _is_cache_hit;
+ no_insert = _no_insert;
+ block_type = _block_type;
+ block_size = _block_size;
+ block_key = _block_key;
+ num_keys_in_block = _num_keys_in_block;
+ }
+};
+
+enum Boolean : char { kTrue = 1, kFalse = 0 };
+
+struct BlockCacheTraceRecord {
+ // Required fields for all accesses.
+ uint64_t access_timestamp = 0;
+ std::string block_key;
+ TraceType block_type = TraceType::kTraceMax;
+ uint64_t block_size = 0;
+ uint64_t cf_id = 0;
+ std::string cf_name;
+ uint32_t level = 0;
+ uint64_t sst_fd_number = 0;
+ TableReaderCaller caller = TableReaderCaller::kMaxBlockCacheLookupCaller;
+ Boolean is_cache_hit = Boolean::kFalse;
+ Boolean no_insert = Boolean::kFalse;
+ // Required field for Get and MultiGet
+ uint64_t get_id = BlockCacheTraceHelper::kReservedGetId;
+ Boolean get_from_user_specified_snapshot = Boolean::kFalse;
+ std::string referenced_key;
+ // Required fields for data block and user Get/Multi-Get only.
+ uint64_t referenced_data_size = 0;
+ uint64_t num_keys_in_block = 0;
+ Boolean referenced_key_exist_in_block = Boolean::kFalse;
+
+ BlockCacheTraceRecord() {}
+
+ BlockCacheTraceRecord(
+ uint64_t _access_timestamp, std::string _block_key, TraceType _block_type,
+ uint64_t _block_size, uint64_t _cf_id, std::string _cf_name,
+ uint32_t _level, uint64_t _sst_fd_number, TableReaderCaller _caller,
+ bool _is_cache_hit, bool _no_insert,
+ uint64_t _get_id = BlockCacheTraceHelper::kReservedGetId,
+ bool _get_from_user_specified_snapshot = false,
+ std::string _referenced_key = "", uint64_t _referenced_data_size = 0,
+ uint64_t _num_keys_in_block = 0,
+ bool _referenced_key_exist_in_block = false)
+ : access_timestamp(_access_timestamp),
+ block_key(_block_key),
+ block_type(_block_type),
+ block_size(_block_size),
+ cf_id(_cf_id),
+ cf_name(_cf_name),
+ level(_level),
+ sst_fd_number(_sst_fd_number),
+ caller(_caller),
+ is_cache_hit(_is_cache_hit ? Boolean::kTrue : Boolean::kFalse),
+ no_insert(_no_insert ? Boolean::kTrue : Boolean::kFalse),
+ get_id(_get_id),
+ get_from_user_specified_snapshot(_get_from_user_specified_snapshot
+ ? Boolean::kTrue
+ : Boolean::kFalse),
+ referenced_key(_referenced_key),
+ referenced_data_size(_referenced_data_size),
+ num_keys_in_block(_num_keys_in_block),
+ referenced_key_exist_in_block(
+ _referenced_key_exist_in_block ? Boolean::kTrue : Boolean::kFalse) {
+ }
+};
+
+struct BlockCacheTraceHeader {
+ uint64_t start_time;
+ uint32_t rocksdb_major_version;
+ uint32_t rocksdb_minor_version;
+};
+
+// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
+// user-provided TraceWriter. Every RocksDB operation is written as a single
+// trace. Each trace will have a timestamp and type, followed by the trace
+// payload.
+class BlockCacheTraceWriter {
+ public:
+ BlockCacheTraceWriter(Env* env, const TraceOptions& trace_options,
+ std::unique_ptr<TraceWriter>&& trace_writer);
+ ~BlockCacheTraceWriter() = default;
+ // No copy and move.
+ BlockCacheTraceWriter(const BlockCacheTraceWriter&) = delete;
+ BlockCacheTraceWriter& operator=(const BlockCacheTraceWriter&) = delete;
+ BlockCacheTraceWriter(BlockCacheTraceWriter&&) = delete;
+ BlockCacheTraceWriter& operator=(BlockCacheTraceWriter&&) = delete;
+
+ // Pass Slice references to avoid copy.
+ Status WriteBlockAccess(const BlockCacheTraceRecord& record,
+ const Slice& block_key, const Slice& cf_name,
+ const Slice& referenced_key);
+
+ // Write a trace header at the beginning, typically on initiating a trace,
+ // with some metadata like a magic number and RocksDB version.
+ Status WriteHeader();
+
+ private:
+ Env* env_;
+ TraceOptions trace_options_;
+ std::unique_ptr<TraceWriter> trace_writer_;
+};
+
+// Write a trace record in human readable format, see
+// https://github.com/facebook/rocksdb/wiki/Block-cache-analysis-and-simulation-tools#trace-format
+// for details.
+class BlockCacheHumanReadableTraceWriter {
+ public:
+ ~BlockCacheHumanReadableTraceWriter();
+
+ Status NewWritableFile(const std::string& human_readable_trace_file_path,
+ ROCKSDB_NAMESPACE::Env* env);
+
+ Status WriteHumanReadableTraceRecord(const BlockCacheTraceRecord& access,
+ uint64_t block_id, uint64_t get_key_id);
+
+ private:
+ char trace_record_buffer_[1024 * 1024];
+ std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
+ human_readable_trace_file_writer_;
+};
+
+// BlockCacheTraceReader helps read the trace file generated by
+// BlockCacheTraceWriter using a user provided TraceReader.
+class BlockCacheTraceReader {
+ public:
+ BlockCacheTraceReader(std::unique_ptr<TraceReader>&& reader);
+ ~BlockCacheTraceReader() = default;
+ // No copy and move.
+ BlockCacheTraceReader(const BlockCacheTraceReader&) = delete;
+ BlockCacheTraceReader& operator=(const BlockCacheTraceReader&) = delete;
+ BlockCacheTraceReader(BlockCacheTraceReader&&) = delete;
+ BlockCacheTraceReader& operator=(BlockCacheTraceReader&&) = delete;
+
+ Status ReadHeader(BlockCacheTraceHeader* header);
+
+ Status ReadAccess(BlockCacheTraceRecord* record);
+
+ private:
+ std::unique_ptr<TraceReader> trace_reader_;
+};
+
+// Read a trace record in human readable format, see
+// https://github.com/facebook/rocksdb/wiki/Block-cache-analysis-and-simulation-tools#trace-format
+// for detailed.
+class BlockCacheHumanReadableTraceReader : public BlockCacheTraceReader {
+ public:
+ BlockCacheHumanReadableTraceReader(const std::string& trace_file_path);
+
+ ~BlockCacheHumanReadableTraceReader();
+
+ Status ReadHeader(BlockCacheTraceHeader* header);
+
+ Status ReadAccess(BlockCacheTraceRecord* record);
+
+ private:
+ std::ifstream human_readable_trace_reader_;
+};
+
+// A block cache tracer. It downsamples the accesses according to
+// trace_options and uses BlockCacheTraceWriter to write the access record to
+// the trace file.
+class BlockCacheTracer {
+ public:
+ BlockCacheTracer();
+ ~BlockCacheTracer();
+ // No copy and move.
+ BlockCacheTracer(const BlockCacheTracer&) = delete;
+ BlockCacheTracer& operator=(const BlockCacheTracer&) = delete;
+ BlockCacheTracer(BlockCacheTracer&&) = delete;
+ BlockCacheTracer& operator=(BlockCacheTracer&&) = delete;
+
+ // Start writing block cache accesses to the trace_writer.
+ Status StartTrace(Env* env, const TraceOptions& trace_options,
+ std::unique_ptr<TraceWriter>&& trace_writer);
+
+ // Stop writing block cache accesses to the trace_writer.
+ void EndTrace();
+
+ bool is_tracing_enabled() const {
+ return writer_.load(std::memory_order_relaxed);
+ }
+
+ Status WriteBlockAccess(const BlockCacheTraceRecord& record,
+ const Slice& block_key, const Slice& cf_name,
+ const Slice& referenced_key);
+
+ // GetId cycles from 1 to port::kMaxUint64.
+ uint64_t NextGetId();
+
+ private:
+ TraceOptions trace_options_;
+ // A mutex protects the writer_.
+ InstrumentedMutex trace_writer_mutex_;
+ std::atomic<BlockCacheTraceWriter*> writer_;
+ std::atomic<uint64_t> get_id_counter_;
+};
+
+} // namespace ROCKSDB_NAMESPACE