summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/table/plain/plain_table_builder.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rocksdb/table/plain/plain_table_builder.cc337
1 files changed, 337 insertions, 0 deletions
diff --git a/src/rocksdb/table/plain/plain_table_builder.cc b/src/rocksdb/table/plain/plain_table_builder.cc
new file mode 100644
index 000000000..04723955c
--- /dev/null
+++ b/src/rocksdb/table/plain/plain_table_builder.cc
@@ -0,0 +1,337 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#ifndef ROCKSDB_LITE
+#include "table/plain/plain_table_builder.h"
+
+#include <assert.h>
+
+#include <limits>
+#include <map>
+#include <string>
+
+#include "db/dbformat.h"
+#include "file/writable_file_writer.h"
+#include "logging/logging.h"
+#include "rocksdb/comparator.h"
+#include "rocksdb/env.h"
+#include "rocksdb/filter_policy.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
+#include "table/block_based/block_builder.h"
+#include "table/format.h"
+#include "table/meta_blocks.h"
+#include "table/plain/plain_table_bloom.h"
+#include "table/plain/plain_table_factory.h"
+#include "table/plain/plain_table_index.h"
+#include "util/coding.h"
+#include "util/crc32c.h"
+#include "util/stop_watch.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
+
+// a utility that helps writing block content to the file
+// @offset will advance if @block_contents was successfully written.
+// @block_handle the block handle this particular block.
+IOStatus WriteBlock(const Slice& block_contents, WritableFileWriter* file,
+ uint64_t* offset, BlockHandle* block_handle) {
+ block_handle->set_offset(*offset);
+ block_handle->set_size(block_contents.size());
+ IOStatus io_s = file->Append(block_contents);
+
+ if (io_s.ok()) {
+ *offset += block_contents.size();
+ }
+ return io_s;
+}
+
+} // namespace
+
+// kPlainTableMagicNumber was picked by running
+// echo rocksdb.table.plain | sha1sum
+// and taking the leading 64 bits.
+extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull;
+extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull;
+
+PlainTableBuilder::PlainTableBuilder(
+ const ImmutableOptions& ioptions, const MutableCFOptions& moptions,
+ const IntTblPropCollectorFactories* int_tbl_prop_collector_factories,
+ uint32_t column_family_id, int level_at_creation, WritableFileWriter* file,
+ uint32_t user_key_len, EncodingType encoding_type, size_t index_sparseness,
+ uint32_t bloom_bits_per_key, const std::string& column_family_name,
+ uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio,
+ bool store_index_in_file, const std::string& db_id,
+ const std::string& db_session_id, uint64_t file_number)
+ : ioptions_(ioptions),
+ moptions_(moptions),
+ bloom_block_(num_probes),
+ file_(file),
+ bloom_bits_per_key_(bloom_bits_per_key),
+ huge_page_tlb_size_(huge_page_tlb_size),
+ encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(),
+ index_sparseness),
+ store_index_in_file_(store_index_in_file),
+ prefix_extractor_(moptions.prefix_extractor.get()) {
+ // Build index block and save it in the file if hash_table_ratio > 0
+ if (store_index_in_file_) {
+ assert(hash_table_ratio > 0 || IsTotalOrderMode());
+ index_builder_.reset(new PlainTableIndexBuilder(
+ &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness,
+ hash_table_ratio, huge_page_tlb_size_));
+ properties_
+ .user_collected_properties[PlainTablePropertyNames::kBloomVersion] =
+ "1"; // For future use
+ }
+
+ properties_.fixed_key_len = user_key_len;
+
+ // for plain table, we put all the data in a big chuck.
+ properties_.num_data_blocks = 1;
+ // Fill it later if store_index_in_file_ == true
+ properties_.index_size = 0;
+ properties_.filter_size = 0;
+ // To support roll-back to previous version, now still use version 0 for
+ // plain encoding.
+ properties_.format_version = (encoding_type == kPlain) ? 0 : 1;
+ properties_.column_family_id = column_family_id;
+ properties_.column_family_name = column_family_name;
+ properties_.db_id = db_id;
+ properties_.db_session_id = db_session_id;
+ properties_.db_host_id = ioptions.db_host_id;
+ if (!ReifyDbHostIdProperty(ioptions_.env, &properties_.db_host_id).ok()) {
+ ROCKS_LOG_INFO(ioptions_.logger, "db_host_id property will not be set");
+ }
+ properties_.orig_file_number = file_number;
+ properties_.prefix_extractor_name =
+ moptions_.prefix_extractor != nullptr
+ ? moptions_.prefix_extractor->AsString()
+ : "nullptr";
+
+ std::string val;
+ PutFixed32(&val, static_cast<uint32_t>(encoder_.GetEncodingType()));
+ properties_
+ .user_collected_properties[PlainTablePropertyNames::kEncodingType] = val;
+
+ assert(int_tbl_prop_collector_factories);
+ for (auto& factory : *int_tbl_prop_collector_factories) {
+ assert(factory);
+
+ table_properties_collectors_.emplace_back(
+ factory->CreateIntTblPropCollector(column_family_id,
+ level_at_creation));
+ }
+}
+
+PlainTableBuilder::~PlainTableBuilder() {
+ // They are supposed to have been passed to users through Finish()
+ // if the file succeeds.
+ status_.PermitUncheckedError();
+ io_status_.PermitUncheckedError();
+}
+
+void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
+ // temp buffer for metadata bytes between key and value.
+ char meta_bytes_buf[6];
+ size_t meta_bytes_buf_size = 0;
+
+ ParsedInternalKey internal_key;
+ if (!ParseInternalKey(key, &internal_key, false /* log_err_key */)
+ .ok()) { // TODO
+ assert(false);
+ return;
+ }
+ if (internal_key.type == kTypeRangeDeletion) {
+ status_ = Status::NotSupported("Range deletion unsupported");
+ return;
+ }
+
+ // Store key hash
+ if (store_index_in_file_) {
+ if (moptions_.prefix_extractor == nullptr) {
+ keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key));
+ } else {
+ Slice prefix =
+ moptions_.prefix_extractor->Transform(internal_key.user_key);
+ keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix));
+ }
+ }
+
+ // Write value
+ assert(offset_ <= std::numeric_limits<uint32_t>::max());
+ auto prev_offset = static_cast<uint32_t>(offset_);
+ // Write out the key
+ io_status_ = encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf,
+ &meta_bytes_buf_size);
+ if (SaveIndexInFile()) {
+ index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset);
+ }
+
+ // Write value length
+ uint32_t value_size = static_cast<uint32_t>(value.size());
+ if (io_status_.ok()) {
+ char* end_ptr =
+ EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size);
+ assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf));
+ meta_bytes_buf_size = end_ptr - meta_bytes_buf;
+ io_status_ = file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size));
+ }
+
+ // Write value
+ if (io_status_.ok()) {
+ io_status_ = file_->Append(value);
+ offset_ += value_size + meta_bytes_buf_size;
+ }
+
+ if (io_status_.ok()) {
+ properties_.num_entries++;
+ properties_.raw_key_size += key.size();
+ properties_.raw_value_size += value.size();
+ if (internal_key.type == kTypeDeletion ||
+ internal_key.type == kTypeSingleDeletion) {
+ properties_.num_deletions++;
+ } else if (internal_key.type == kTypeMerge) {
+ properties_.num_merge_operands++;
+ }
+ }
+
+ // notify property collectors
+ NotifyCollectTableCollectorsOnAdd(
+ key, value, offset_, table_properties_collectors_, ioptions_.logger);
+ status_ = io_status_;
+}
+
+Status PlainTableBuilder::Finish() {
+ assert(!closed_);
+ closed_ = true;
+
+ properties_.data_size = offset_;
+
+ // Write the following blocks
+ // 1. [meta block: bloom] - optional
+ // 2. [meta block: index] - optional
+ // 3. [meta block: properties]
+ // 4. [metaindex block]
+ // 5. [footer]
+
+ MetaIndexBuilder meta_index_builer;
+
+ if (store_index_in_file_ && (properties_.num_entries > 0)) {
+ assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max());
+ BlockHandle bloom_block_handle;
+ if (bloom_bits_per_key_ > 0) {
+ bloom_block_.SetTotalBits(
+ &arena_,
+ static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_,
+ ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.logger);
+
+ PutVarint32(&properties_.user_collected_properties
+ [PlainTablePropertyNames::kNumBloomBlocks],
+ bloom_block_.GetNumBlocks());
+
+ bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_);
+
+ Slice bloom_finish_result = bloom_block_.Finish();
+
+ properties_.filter_size = bloom_finish_result.size();
+ io_status_ =
+ WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle);
+
+ if (!io_status_.ok()) {
+ status_ = io_status_;
+ return status_;
+ }
+ meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle);
+ }
+ BlockHandle index_block_handle;
+ Slice index_finish_result = index_builder_->Finish();
+
+ properties_.index_size = index_finish_result.size();
+ io_status_ =
+ WriteBlock(index_finish_result, file_, &offset_, &index_block_handle);
+
+ if (!io_status_.ok()) {
+ status_ = io_status_;
+ return status_;
+ }
+
+ meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock,
+ index_block_handle);
+ }
+
+ // Calculate bloom block size and index block size
+ PropertyBlockBuilder property_block_builder;
+ // -- Add basic properties
+ property_block_builder.AddTableProperty(properties_);
+
+ property_block_builder.Add(properties_.user_collected_properties);
+
+ // -- Add user collected properties
+ NotifyCollectTableCollectorsOnFinish(
+ table_properties_collectors_, ioptions_.logger, &property_block_builder);
+
+ // -- Write property block
+ BlockHandle property_block_handle;
+ IOStatus s = WriteBlock(property_block_builder.Finish(), file_, &offset_,
+ &property_block_handle);
+ if (!s.ok()) {
+ return static_cast<Status>(s);
+ }
+ meta_index_builer.Add(kPropertiesBlockName, property_block_handle);
+
+ // -- write metaindex block
+ BlockHandle metaindex_block_handle;
+ io_status_ = WriteBlock(meta_index_builer.Finish(), file_, &offset_,
+ &metaindex_block_handle);
+ if (!io_status_.ok()) {
+ status_ = io_status_;
+ return status_;
+ }
+
+ // Write Footer
+ // no need to write out new footer if we're using default checksum
+ FooterBuilder footer;
+ footer.Build(kPlainTableMagicNumber, /* format_version */ 0, offset_,
+ kNoChecksum, metaindex_block_handle);
+ io_status_ = file_->Append(footer.GetSlice());
+ if (io_status_.ok()) {
+ offset_ += footer.GetSlice().size();
+ }
+ status_ = io_status_;
+ return status_;
+}
+
+void PlainTableBuilder::Abandon() { closed_ = true; }
+
+uint64_t PlainTableBuilder::NumEntries() const {
+ return properties_.num_entries;
+}
+
+uint64_t PlainTableBuilder::FileSize() const { return offset_; }
+
+std::string PlainTableBuilder::GetFileChecksum() const {
+ if (file_ != nullptr) {
+ return file_->GetFileChecksum();
+ } else {
+ return kUnknownFileChecksum;
+ }
+}
+
+const char* PlainTableBuilder::GetFileChecksumFuncName() const {
+ if (file_ != nullptr) {
+ return file_->GetFileChecksumFuncName();
+ } else {
+ return kUnknownFileChecksumFuncName;
+ }
+}
+void PlainTableBuilder::SetSeqnoTimeTableProperties(const std::string& string,
+ uint64_t uint_64) {
+ // TODO: storing seqno to time mapping is not yet support for plain table.
+ TableBuilder::SetSeqnoTimeTableProperties(string, uint_64);
+}
+
+} // namespace ROCKSDB_NAMESPACE
+#endif // ROCKSDB_LITE