1 files changed, 201 insertions, 0 deletions
diff --git a/src/rocksdb/table/plain/plain_table_key_coding.h b/src/rocksdb/table/plain/plain_table_key_coding.h
new file mode 100644
index 000000000..9cda7df32
--- /dev/null
+++ b/src/rocksdb/table/plain/plain_table_key_coding.h
@@ -0,0 +1,201 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#pragma once
+
+#ifndef ROCKSDB_LITE
+
+#include <array>
+
+#include "rocksdb/slice.h"
+#include "table/plain/plain_table_reader.h"
+
+// The file contains three helper classes of PlainTable format,
+// PlainTableKeyEncoder, PlainTableKeyDecoder and PlainTableFileReader.
+// These classes issue the lowest level of operations of PlainTable.
+// Actual data format of the key is documented in comments of class
+// PlainTableFactory.
+namespace ROCKSDB_NAMESPACE {
+
+class WritableFile;
+struct ParsedInternalKey;
+struct PlainTableReaderFileInfo;
+enum PlainTableEntryType : unsigned char;
+
+// Helper class for PlainTable format to write out a key to an output file
+// The class is used in PlainTableBuilder.
+class PlainTableKeyEncoder {
+ public:
+  explicit PlainTableKeyEncoder(EncodingType encoding_type,
+                                uint32_t user_key_len,
+                                const SliceTransform* prefix_extractor,
+                                size_t index_sparseness)
+      : encoding_type_((prefix_extractor != nullptr) ? encoding_type : kPlain),
+        fixed_user_key_len_(user_key_len),
+        prefix_extractor_(prefix_extractor),
+        index_sparseness_((index_sparseness > 1) ? index_sparseness : 1),
+        key_count_for_prefix_(0) {}
+  // key: the key to write out, in the format of internal key.
+  // file: the output file to write out
+  // offset: offset in the file. Needs to be updated after appending bytes
+  //         for the key
+  // meta_bytes_buf: buffer for extra meta bytes
+  // meta_bytes_buf_size: offset to append extra meta bytes. Will be updated
+  //                      if meta_bytes_buf is updated.
+  IOStatus AppendKey(const Slice& key, WritableFileWriter* file,
+                     uint64_t* offset, char* meta_bytes_buf,
+                     size_t* meta_bytes_buf_size);
+
+  // Return actual encoding type to be picked
+  EncodingType GetEncodingType() { return encoding_type_; }
+
+ private:
+  EncodingType encoding_type_;
+  uint32_t fixed_user_key_len_;
+  const SliceTransform* prefix_extractor_;
+  const size_t index_sparseness_;
+  size_t key_count_for_prefix_;
+  IterKey pre_prefix_;
+};
+
+// The class does raw file reads for PlainTableReader.
+// It hides whether it is a mmap-read, or a non-mmap read.
+// The class is implemented in a way to favor the performance of mmap case.
+// The class is used by PlainTableReader.
+class PlainTableFileReader {
+ public:
+  explicit PlainTableFileReader(const PlainTableReaderFileInfo* _file_info)
+      : file_info_(_file_info), num_buf_(0) {}
+
+  ~PlainTableFileReader() {
+    // Should fix.
+    status_.PermitUncheckedError();
+  }
+
+  // In mmaped mode, the results point to mmaped area of the file, which
+  // means it is always valid before closing the file.
+  // In non-mmap mode, the results point to an internal buffer. If the caller
+  // makes another read call, the results may not be valid. So callers should
+  // make a copy when needed.
+  // In order to save read calls to files, we keep two internal buffers:
+  // the first read and the most recent read. This is efficient because it
+  // columns these two common use cases:
+  // (1) hash index only identify one location, we read the key to verify
+  //     the location, and read key and value if it is the right location.
+  // (2) after hash index checking, we identify two locations (because of
+  //     hash bucket conflicts), we binary search the two location to see
+  //     which one is what we need and start to read from the location.
+  // These two most common use cases will be covered by the two buffers
+  // so that we don't need to re-read the same location.
+  // Currently we keep a fixed size buffer. If a read doesn't exactly fit
+  // the buffer, we replace the second buffer with the location user reads.
+  //
+  // If return false, status code is stored in status_.
+  bool Read(uint32_t file_offset, uint32_t len, Slice* out) {
+    if (file_info_->is_mmap_mode) {
+      assert(file_offset + len <= file_info_->data_end_offset);
+      *out = Slice(file_info_->file_data.data() + file_offset, len);
+      return true;
+    } else {
+      return ReadNonMmap(file_offset, len, out);
+    }
+  }
+
+  // If return false, status code is stored in status_.
+  bool ReadNonMmap(uint32_t file_offset, uint32_t len, Slice* output);
+
+  // *bytes_read = 0 means eof. false means failure and status is saved
+  // in status_. Not directly returning Status to save copying status
+  // object to map previous performance of mmap mode.
+  inline bool ReadVarint32(uint32_t offset, uint32_t* output,
+                           uint32_t* bytes_read);
+
+  bool ReadVarint32NonMmap(uint32_t offset, uint32_t* output,
+                           uint32_t* bytes_read);
+
+  Status status() const { return status_; }
+
+  const PlainTableReaderFileInfo* file_info() { return file_info_; }
+
+ private:
+  const PlainTableReaderFileInfo* file_info_;
+
+  struct Buffer {
+    Buffer() : buf_start_offset(0), buf_len(0), buf_capacity(0) {}
+    std::unique_ptr<char[]> buf;
+    uint32_t buf_start_offset;
+    uint32_t buf_len;
+    uint32_t buf_capacity;
+  };
+
+  // Keep buffers for two recent reads.
+  std::array<std::unique_ptr<Buffer>, 2> buffers_;
+  uint32_t num_buf_;
+  Status status_;
+
+  Slice GetFromBuffer(Buffer* buf, uint32_t file_offset, uint32_t len);
+};
+
+// A helper class to decode keys from input buffer
+// The class is used by PlainTableBuilder.
+class PlainTableKeyDecoder {
+ public:
+  explicit PlainTableKeyDecoder(const PlainTableReaderFileInfo* file_info,
+                                EncodingType encoding_type,
+                                uint32_t user_key_len,
+                                const SliceTransform* prefix_extractor)
+      : file_reader_(file_info),
+        encoding_type_(encoding_type),
+        prefix_len_(0),
+        fixed_user_key_len_(user_key_len),
+        prefix_extractor_(prefix_extractor),
+        in_prefix_(false) {}
+
+  // Find the next key.
+  // start: char array where the key starts.
+  // limit: boundary of the char array
+  // parsed_key: the output of the result key
+  // internal_key: if not null, fill with the output of the result key in
+  //               un-parsed format
+  // bytes_read: how many bytes read from start. Output
+  // seekable: whether key can be read from this place. Used when building
+  //           indexes. Output.
+  Status NextKey(uint32_t start_offset, ParsedInternalKey* parsed_key,
+                 Slice* internal_key, Slice* value, uint32_t* bytes_read,
+                 bool* seekable = nullptr);
+
+  Status NextKeyNoValue(uint32_t start_offset, ParsedInternalKey* parsed_key,
+                        Slice* internal_key, uint32_t* bytes_read,
+                        bool* seekable = nullptr);
+
+  PlainTableFileReader file_reader_;
+  EncodingType encoding_type_;
+  uint32_t prefix_len_;
+  uint32_t fixed_user_key_len_;
+  Slice saved_user_key_;
+  IterKey cur_key_;
+  const SliceTransform* prefix_extractor_;
+  bool in_prefix_;
+
+ private:
+  Status NextPlainEncodingKey(uint32_t start_offset,
+                              ParsedInternalKey* parsed_key,
+                              Slice* internal_key, uint32_t* bytes_read,
+                              bool* seekable = nullptr);
+  Status NextPrefixEncodingKey(uint32_t start_offset,
+                               ParsedInternalKey* parsed_key,
+                               Slice* internal_key, uint32_t* bytes_read,
+                               bool* seekable = nullptr);
+  Status ReadInternalKey(uint32_t file_offset, uint32_t user_key_size,
+                         ParsedInternalKey* parsed_key, uint32_t* bytes_read,
+                         bool* internal_key_valid, Slice* internal_key);
+  inline Status DecodeSize(uint32_t start_offset,
+                           PlainTableEntryType* entry_type, uint32_t* key_size,
+                           uint32_t* bytes_read);
+};
+
+}  // namespace ROCKSDB_NAMESPACE
+
+#endif  // ROCKSDB_LITE