summaryrefslogtreecommitdiffstats
path: root/storage/rocksdb/rocksdb/db/log_writer.h
diff options
context:
space:
mode:
Diffstat (limited to 'storage/rocksdb/rocksdb/db/log_writer.h')
-rw-r--r--storage/rocksdb/rocksdb/db/log_writer.h114
1 files changed, 114 insertions, 0 deletions
diff --git a/storage/rocksdb/rocksdb/db/log_writer.h b/storage/rocksdb/rocksdb/db/log_writer.h
new file mode 100644
index 00000000..a7f952ed
--- /dev/null
+++ b/storage/rocksdb/rocksdb/db/log_writer.h
@@ -0,0 +1,114 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <stdint.h>
+
+#include <memory>
+
+#include "db/log_format.h"
+#include "rocksdb/slice.h"
+#include "rocksdb/status.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class WritableFileWriter;
+
+namespace log {
+
+/**
+ * Writer is a general purpose log stream writer. It provides an append-only
+ * abstraction for writing data. The details of the how the data is written is
+ * handled by the WriteableFile sub-class implementation.
+ *
+ * File format:
+ *
+ * File is broken down into variable sized records. The format of each record
+ * is described below.
+ * +-----+-------------+--+----+----------+------+-- ... ----+
+ * File | r0 | r1 |P | r2 | r3 | r4 | |
+ * +-----+-------------+--+----+----------+------+-- ... ----+
+ * <--- kBlockSize ------>|<-- kBlockSize ------>|
+ * rn = variable size records
+ * P = Padding
+ *
+ * Data is written out in kBlockSize chunks. If next record does not fit
+ * into the space left, the leftover space will be padded with \0.
+ *
+ * Legacy record format:
+ *
+ * +---------+-----------+-----------+--- ... ---+
+ * |CRC (4B) | Size (2B) | Type (1B) | Payload |
+ * +---------+-----------+-----------+--- ... ---+
+ *
+ * CRC = 32bit hash computed over the record type and payload using CRC
+ * Size = Length of the payload data
+ * Type = Type of record
+ * (kZeroType, kFullType, kFirstType, kLastType, kMiddleType )
+ * The type is used to group a bunch of records together to represent
+ * blocks that are larger than kBlockSize
+ * Payload = Byte stream as long as specified by the payload size
+ *
+ * Recyclable record format:
+ *
+ * +---------+-----------+-----------+----------------+--- ... ---+
+ * |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload |
+ * +---------+-----------+-----------+----------------+--- ... ---+
+ *
+ * Same as above, with the addition of
+ * Log number = 32bit log file number, so that we can distinguish between
+ * records written by the most recent log writer vs a previous one.
+ */
+class Writer {
+ public:
+ // Create a writer that will append data to "*dest".
+ // "*dest" must be initially empty.
+ // "*dest" must remain live while this Writer is in use.
+ explicit Writer(std::unique_ptr<WritableFileWriter>&& dest,
+ uint64_t log_number, bool recycle_log_files,
+ bool manual_flush = false);
+ // No copying allowed
+ Writer(const Writer&) = delete;
+ void operator=(const Writer&) = delete;
+
+ ~Writer();
+
+ Status AddRecord(const Slice& slice);
+
+ WritableFileWriter* file() { return dest_.get(); }
+ const WritableFileWriter* file() const { return dest_.get(); }
+
+ uint64_t get_log_number() const { return log_number_; }
+
+ Status WriteBuffer();
+
+ Status Close();
+
+ bool TEST_BufferIsEmpty();
+
+ private:
+ std::unique_ptr<WritableFileWriter> dest_;
+ size_t block_offset_; // Current offset in block
+ uint64_t log_number_;
+ bool recycle_log_files_;
+
+ // crc32c values for all supported record types. These are
+ // pre-computed to reduce the overhead of computing the crc of the
+ // record type stored in the header.
+ uint32_t type_crc_[kMaxRecordType + 1];
+
+ Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
+
+ // If true, it does not flush after each write. Instead it relies on the upper
+ // layer to manually does the flush by calling ::WriteBuffer()
+ bool manual_flush_;
+};
+
+} // namespace log
+} // namespace ROCKSDB_NAMESPACE