From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/rocksdb/table/cuckoo/cuckoo_table_builder.cc | 553 ++++++++++++++++++ src/rocksdb/table/cuckoo/cuckoo_table_builder.h | 138 +++++ .../table/cuckoo/cuckoo_table_builder_test.cc | 640 +++++++++++++++++++++ src/rocksdb/table/cuckoo/cuckoo_table_factory.cc | 104 ++++ src/rocksdb/table/cuckoo/cuckoo_table_factory.h | 82 +++ src/rocksdb/table/cuckoo/cuckoo_table_reader.cc | 411 +++++++++++++ src/rocksdb/table/cuckoo/cuckoo_table_reader.h | 100 ++++ .../table/cuckoo/cuckoo_table_reader_test.cc | 584 +++++++++++++++++++ 8 files changed, 2612 insertions(+) create mode 100644 src/rocksdb/table/cuckoo/cuckoo_table_builder.cc create mode 100644 src/rocksdb/table/cuckoo/cuckoo_table_builder.h create mode 100644 src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc create mode 100644 src/rocksdb/table/cuckoo/cuckoo_table_factory.cc create mode 100644 src/rocksdb/table/cuckoo/cuckoo_table_factory.h create mode 100644 src/rocksdb/table/cuckoo/cuckoo_table_reader.cc create mode 100644 src/rocksdb/table/cuckoo/cuckoo_table_reader.h create mode 100644 src/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc (limited to 'src/rocksdb/table/cuckoo') diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_builder.cc b/src/rocksdb/table/cuckoo/cuckoo_table_builder.cc new file mode 100644 index 000000000..296825d94 --- /dev/null +++ b/src/rocksdb/table/cuckoo/cuckoo_table_builder.cc @@ -0,0 +1,553 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE +#include "table/cuckoo/cuckoo_table_builder.h" + +#include + +#include +#include +#include +#include + +#include "db/dbformat.h" +#include "file/writable_file_writer.h" +#include "rocksdb/env.h" +#include "rocksdb/table.h" +#include "table/block_based/block_builder.h" +#include "table/cuckoo/cuckoo_table_factory.h" +#include "table/format.h" +#include "table/meta_blocks.h" +#include "util/autovector.h" +#include "util/random.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { +const std::string CuckooTablePropertyNames::kEmptyKey = + "rocksdb.cuckoo.bucket.empty.key"; +const std::string CuckooTablePropertyNames::kNumHashFunc = + "rocksdb.cuckoo.hash.num"; +const std::string CuckooTablePropertyNames::kHashTableSize = + "rocksdb.cuckoo.hash.size"; +const std::string CuckooTablePropertyNames::kValueLength = + "rocksdb.cuckoo.value.length"; +const std::string CuckooTablePropertyNames::kIsLastLevel = + "rocksdb.cuckoo.file.islastlevel"; +const std::string CuckooTablePropertyNames::kCuckooBlockSize = + "rocksdb.cuckoo.hash.cuckooblocksize"; +const std::string CuckooTablePropertyNames::kIdentityAsFirstHash = + "rocksdb.cuckoo.hash.identityfirst"; +const std::string CuckooTablePropertyNames::kUseModuleHash = + "rocksdb.cuckoo.hash.usemodule"; +const std::string CuckooTablePropertyNames::kUserKeyLength = + "rocksdb.cuckoo.hash.userkeylength"; + +// Obtained by running echo rocksdb.table.cuckoo | sha1sum +extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; + +CuckooTableBuilder::CuckooTableBuilder( + WritableFileWriter* file, double max_hash_table_ratio, + uint32_t max_num_hash_table, uint32_t max_search_depth, + const Comparator* user_comparator, uint32_t cuckoo_block_size, + bool use_module_hash, bool identity_as_first_hash, + uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t), + uint32_t column_family_id, const std::string& column_family_name, + const std::string& db_id, const std::string& db_session_id, + uint64_t file_number) + : num_hash_func_(2), + file_(file), + max_hash_table_ratio_(max_hash_table_ratio), + max_num_hash_func_(max_num_hash_table), + max_search_depth_(max_search_depth), + cuckoo_block_size_(std::max(1U, cuckoo_block_size)), + hash_table_size_(use_module_hash ? 0 : 2), + is_last_level_file_(false), + has_seen_first_key_(false), + has_seen_first_value_(false), + key_size_(0), + value_size_(0), + num_entries_(0), + num_values_(0), + ucomp_(user_comparator), + use_module_hash_(use_module_hash), + identity_as_first_hash_(identity_as_first_hash), + get_slice_hash_(get_slice_hash), + closed_(false) { + // Data is in a huge block. + properties_.num_data_blocks = 1; + properties_.index_size = 0; + properties_.filter_size = 0; + properties_.column_family_id = column_family_id; + properties_.column_family_name = column_family_name; + properties_.db_id = db_id; + properties_.db_session_id = db_session_id; + properties_.orig_file_number = file_number; + status_.PermitUncheckedError(); + io_status_.PermitUncheckedError(); +} + +void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { + if (num_entries_ >= kMaxVectorIdx - 1) { + status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1"); + return; + } + ParsedInternalKey ikey; + Status pik_status = + ParseInternalKey(key, &ikey, false /* log_err_key */); // TODO + if (!pik_status.ok()) { + status_ = Status::Corruption("Unable to parse key into internal key. ", + pik_status.getState()); + return; + } + if (ikey.type != kTypeDeletion && ikey.type != kTypeValue) { + status_ = Status::NotSupported("Unsupported key type " + + std::to_string(ikey.type)); + return; + } + + // Determine if we can ignore the sequence number and value type from + // internal keys by looking at sequence number from first key. We assume + // that if first key has a zero sequence number, then all the remaining + // keys will have zero seq. no. + if (!has_seen_first_key_) { + is_last_level_file_ = ikey.sequence == 0; + has_seen_first_key_ = true; + smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); + largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); + key_size_ = is_last_level_file_ ? ikey.user_key.size() : key.size(); + } + if (key_size_ != (is_last_level_file_ ? ikey.user_key.size() : key.size())) { + status_ = Status::NotSupported("all keys have to be the same size"); + return; + } + + if (ikey.type == kTypeValue) { + if (!has_seen_first_value_) { + has_seen_first_value_ = true; + value_size_ = value.size(); + } + if (value_size_ != value.size()) { + status_ = Status::NotSupported("all values have to be the same size"); + return; + } + + if (is_last_level_file_) { + kvs_.append(ikey.user_key.data(), ikey.user_key.size()); + } else { + kvs_.append(key.data(), key.size()); + } + kvs_.append(value.data(), value.size()); + ++num_values_; + } else { + if (is_last_level_file_) { + deleted_keys_.append(ikey.user_key.data(), ikey.user_key.size()); + } else { + deleted_keys_.append(key.data(), key.size()); + } + } + ++num_entries_; + + // In order to fill the empty buckets in the hash table, we identify a + // key which is not used so far (unused_user_key). We determine this by + // maintaining smallest and largest keys inserted so far in bytewise order + // and use them to find a key outside this range in Finish() operation. + // Note that this strategy is independent of user comparator used here. + if (ikey.user_key.compare(smallest_user_key_) < 0) { + smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); + } else if (ikey.user_key.compare(largest_user_key_) > 0) { + largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); + } + if (!use_module_hash_) { + if (hash_table_size_ < num_entries_ / max_hash_table_ratio_) { + hash_table_size_ *= 2; + } + } +} + +bool CuckooTableBuilder::IsDeletedKey(uint64_t idx) const { + assert(closed_); + return idx >= num_values_; +} + +Slice CuckooTableBuilder::GetKey(uint64_t idx) const { + assert(closed_); + if (IsDeletedKey(idx)) { + return Slice( + &deleted_keys_[static_cast((idx - num_values_) * key_size_)], + static_cast(key_size_)); + } + return Slice(&kvs_[static_cast(idx * (key_size_ + value_size_))], + static_cast(key_size_)); +} + +Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const { + assert(closed_); + return is_last_level_file_ ? GetKey(idx) : ExtractUserKey(GetKey(idx)); +} + +Slice CuckooTableBuilder::GetValue(uint64_t idx) const { + assert(closed_); + if (IsDeletedKey(idx)) { + static std::string empty_value(static_cast(value_size_), 'a'); + return Slice(empty_value); + } + return Slice( + &kvs_[static_cast(idx * (key_size_ + value_size_) + key_size_)], + static_cast(value_size_)); +} + +Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { + buckets->resize( + static_cast(hash_table_size_ + cuckoo_block_size_ - 1)); + uint32_t make_space_for_key_call_id = 0; + for (uint32_t vector_idx = 0; vector_idx < num_entries_; vector_idx++) { + uint64_t bucket_id = 0; + bool bucket_found = false; + autovector hash_vals; + Slice user_key = GetUserKey(vector_idx); + for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found; + ++hash_cnt) { + uint64_t hash_val = + CuckooHash(user_key, hash_cnt, use_module_hash_, hash_table_size_, + identity_as_first_hash_, get_slice_hash_); + // If there is a collision, check next cuckoo_block_size_ locations for + // empty locations. While checking, if we reach end of the hash table, + // stop searching and proceed for next hash function. + for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; + ++block_idx, ++hash_val) { + if ((*buckets)[static_cast(hash_val)].vector_idx == + kMaxVectorIdx) { + bucket_id = hash_val; + bucket_found = true; + break; + } else { + if (ucomp_->Compare( + user_key, GetUserKey((*buckets)[static_cast(hash_val)] + .vector_idx)) == 0) { + return Status::NotSupported("Same key is being inserted again."); + } + hash_vals.push_back(hash_val); + } + } + } + while (!bucket_found && + !MakeSpaceForKey(hash_vals, ++make_space_for_key_call_id, buckets, + &bucket_id)) { + // Rehash by increashing number of hash tables. + if (num_hash_func_ >= max_num_hash_func_) { + return Status::NotSupported("Too many collisions. Unable to hash."); + } + // We don't really need to rehash the entire table because old hashes are + // still valid and we only increased the number of hash functions. + uint64_t hash_val = CuckooHash(user_key, num_hash_func_, use_module_hash_, + hash_table_size_, identity_as_first_hash_, + get_slice_hash_); + ++num_hash_func_; + for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; + ++block_idx, ++hash_val) { + if ((*buckets)[static_cast(hash_val)].vector_idx == + kMaxVectorIdx) { + bucket_found = true; + bucket_id = hash_val; + break; + } else { + hash_vals.push_back(hash_val); + } + } + } + (*buckets)[static_cast(bucket_id)].vector_idx = vector_idx; + } + return Status::OK(); +} + +Status CuckooTableBuilder::Finish() { + assert(!closed_); + closed_ = true; + std::vector buckets; + std::string unused_bucket; + if (num_entries_ > 0) { + // Calculate the real hash size if module hash is enabled. + if (use_module_hash_) { + hash_table_size_ = + static_cast(num_entries_ / max_hash_table_ratio_); + } + status_ = MakeHashTable(&buckets); + if (!status_.ok()) { + return status_; + } + // Determine unused_user_key to fill empty buckets. + std::string unused_user_key = smallest_user_key_; + int curr_pos = static_cast(unused_user_key.size()) - 1; + while (curr_pos >= 0) { + --unused_user_key[curr_pos]; + if (Slice(unused_user_key).compare(smallest_user_key_) < 0) { + break; + } + --curr_pos; + } + if (curr_pos < 0) { + // Try using the largest key to identify an unused key. + unused_user_key = largest_user_key_; + curr_pos = static_cast(unused_user_key.size()) - 1; + while (curr_pos >= 0) { + ++unused_user_key[curr_pos]; + if (Slice(unused_user_key).compare(largest_user_key_) > 0) { + break; + } + --curr_pos; + } + } + if (curr_pos < 0) { + return Status::Corruption("Unable to find unused key"); + } + if (is_last_level_file_) { + unused_bucket = unused_user_key; + } else { + ParsedInternalKey ikey(unused_user_key, 0, kTypeValue); + AppendInternalKey(&unused_bucket, ikey); + } + } + properties_.num_entries = num_entries_; + properties_.num_deletions = num_entries_ - num_values_; + properties_.fixed_key_len = key_size_; + properties_.user_collected_properties[CuckooTablePropertyNames::kValueLength] + .assign(reinterpret_cast(&value_size_), sizeof(value_size_)); + + uint64_t bucket_size = key_size_ + value_size_; + unused_bucket.resize(static_cast(bucket_size), 'a'); + // Write the table. + uint32_t num_added = 0; + for (auto& bucket : buckets) { + if (bucket.vector_idx == kMaxVectorIdx) { + io_status_ = file_->Append(Slice(unused_bucket)); + } else { + ++num_added; + io_status_ = file_->Append(GetKey(bucket.vector_idx)); + if (io_status_.ok()) { + if (value_size_ > 0) { + io_status_ = file_->Append(GetValue(bucket.vector_idx)); + } + } + } + if (!io_status_.ok()) { + status_ = io_status_; + return status_; + } + } + assert(num_added == NumEntries()); + properties_.raw_key_size = num_added * properties_.fixed_key_len; + properties_.raw_value_size = num_added * value_size_; + + uint64_t offset = buckets.size() * bucket_size; + properties_.data_size = offset; + unused_bucket.resize(static_cast(properties_.fixed_key_len)); + properties_.user_collected_properties[CuckooTablePropertyNames::kEmptyKey] = + unused_bucket; + properties_.user_collected_properties[CuckooTablePropertyNames::kNumHashFunc] + .assign(reinterpret_cast(&num_hash_func_), sizeof(num_hash_func_)); + + properties_ + .user_collected_properties[CuckooTablePropertyNames::kHashTableSize] + .assign(reinterpret_cast(&hash_table_size_), + sizeof(hash_table_size_)); + properties_.user_collected_properties[CuckooTablePropertyNames::kIsLastLevel] + .assign(reinterpret_cast(&is_last_level_file_), + sizeof(is_last_level_file_)); + properties_ + .user_collected_properties[CuckooTablePropertyNames::kCuckooBlockSize] + .assign(reinterpret_cast(&cuckoo_block_size_), + sizeof(cuckoo_block_size_)); + properties_ + .user_collected_properties[CuckooTablePropertyNames::kIdentityAsFirstHash] + .assign(reinterpret_cast(&identity_as_first_hash_), + sizeof(identity_as_first_hash_)); + properties_ + .user_collected_properties[CuckooTablePropertyNames::kUseModuleHash] + .assign(reinterpret_cast(&use_module_hash_), + sizeof(use_module_hash_)); + uint32_t user_key_len = static_cast(smallest_user_key_.size()); + properties_ + .user_collected_properties[CuckooTablePropertyNames::kUserKeyLength] + .assign(reinterpret_cast(&user_key_len), + sizeof(user_key_len)); + + // Write meta blocks. + MetaIndexBuilder meta_index_builder; + PropertyBlockBuilder property_block_builder; + + property_block_builder.AddTableProperty(properties_); + property_block_builder.Add(properties_.user_collected_properties); + Slice property_block = property_block_builder.Finish(); + BlockHandle property_block_handle; + property_block_handle.set_offset(offset); + property_block_handle.set_size(property_block.size()); + io_status_ = file_->Append(property_block); + offset += property_block.size(); + if (!io_status_.ok()) { + status_ = io_status_; + return status_; + } + + meta_index_builder.Add(kPropertiesBlockName, property_block_handle); + Slice meta_index_block = meta_index_builder.Finish(); + + BlockHandle meta_index_block_handle; + meta_index_block_handle.set_offset(offset); + meta_index_block_handle.set_size(meta_index_block.size()); + io_status_ = file_->Append(meta_index_block); + if (!io_status_.ok()) { + status_ = io_status_; + return status_; + } + + FooterBuilder footer; + footer.Build(kCuckooTableMagicNumber, /* format_version */ 1, offset, + kNoChecksum, meta_index_block_handle); + io_status_ = file_->Append(footer.GetSlice()); + status_ = io_status_; + return status_; +} + +void CuckooTableBuilder::Abandon() { + assert(!closed_); + closed_ = true; +} + +uint64_t CuckooTableBuilder::NumEntries() const { return num_entries_; } + +uint64_t CuckooTableBuilder::FileSize() const { + if (closed_) { + return file_->GetFileSize(); + } else if (num_entries_ == 0) { + return 0; + } + + if (use_module_hash_) { + return static_cast((key_size_ + value_size_) * num_entries_ / + max_hash_table_ratio_); + } else { + // Account for buckets being a power of two. + // As elements are added, file size remains constant for a while and + // doubles its size. Since compaction algorithm stops adding elements + // only after it exceeds the file limit, we account for the extra element + // being added here. + uint64_t expected_hash_table_size = hash_table_size_; + if (expected_hash_table_size < (num_entries_ + 1) / max_hash_table_ratio_) { + expected_hash_table_size *= 2; + } + return (key_size_ + value_size_) * expected_hash_table_size - 1; + } +} + +// This method is invoked when there is no place to insert the target key. +// It searches for a set of elements that can be moved to accommodate target +// key. The search is a BFS graph traversal with first level (hash_vals) +// being all the buckets target key could go to. +// Then, from each node (curr_node), we find all the buckets that curr_node +// could go to. They form the children of curr_node in the tree. +// We continue the traversal until we find an empty bucket, in which case, we +// move all elements along the path from first level to this empty bucket, to +// make space for target key which is inserted at first level (*bucket_id). +// If tree depth exceedes max depth, we return false indicating failure. +bool CuckooTableBuilder::MakeSpaceForKey( + const autovector& hash_vals, + const uint32_t make_space_for_key_call_id, + std::vector* buckets, uint64_t* bucket_id) { + struct CuckooNode { + uint64_t bucket_id; + uint32_t depth; + uint32_t parent_pos; + CuckooNode(uint64_t _bucket_id, uint32_t _depth, int _parent_pos) + : bucket_id(_bucket_id), depth(_depth), parent_pos(_parent_pos) {} + }; + // This is BFS search tree that is stored simply as a vector. + // Each node stores the index of parent node in the vector. + std::vector tree; + // We want to identify already visited buckets in the current method call so + // that we don't add same buckets again for exploration in the tree. + // We do this by maintaining a count of current method call in + // make_space_for_key_call_id, which acts as a unique id for this invocation + // of the method. We store this number into the nodes that we explore in + // current method call. + // It is unlikely for the increment operation to overflow because the maximum + // no. of times this will be called is <= max_num_hash_func_ + num_entries_. + for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { + uint64_t bid = hash_vals[hash_cnt]; + (*buckets)[static_cast(bid)].make_space_for_key_call_id = + make_space_for_key_call_id; + tree.push_back(CuckooNode(bid, 0, 0)); + } + bool null_found = false; + uint32_t curr_pos = 0; + while (!null_found && curr_pos < tree.size()) { + CuckooNode& curr_node = tree[curr_pos]; + uint32_t curr_depth = curr_node.depth; + if (curr_depth >= max_search_depth_) { + break; + } + CuckooBucket& curr_bucket = + (*buckets)[static_cast(curr_node.bucket_id)]; + for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !null_found; + ++hash_cnt) { + uint64_t child_bucket_id = CuckooHash( + GetUserKey(curr_bucket.vector_idx), hash_cnt, use_module_hash_, + hash_table_size_, identity_as_first_hash_, get_slice_hash_); + // Iterate inside Cuckoo Block. + for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; + ++block_idx, ++child_bucket_id) { + if ((*buckets)[static_cast(child_bucket_id)] + .make_space_for_key_call_id == make_space_for_key_call_id) { + continue; + } + (*buckets)[static_cast(child_bucket_id)] + .make_space_for_key_call_id = make_space_for_key_call_id; + tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1, curr_pos)); + if ((*buckets)[static_cast(child_bucket_id)].vector_idx == + kMaxVectorIdx) { + null_found = true; + break; + } + } + } + ++curr_pos; + } + + if (null_found) { + // There is an empty node in tree.back(). Now, traverse the path from this + // empty node to top of the tree and at every node in the path, replace + // child with the parent. Stop when first level is reached in the tree + // (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return + // this location in first level for target key to be inserted. + uint32_t bucket_to_replace_pos = static_cast(tree.size()) - 1; + while (bucket_to_replace_pos >= num_hash_func_) { + CuckooNode& curr_node = tree[bucket_to_replace_pos]; + (*buckets)[static_cast(curr_node.bucket_id)] = + (*buckets)[static_cast(tree[curr_node.parent_pos].bucket_id)]; + bucket_to_replace_pos = curr_node.parent_pos; + } + *bucket_id = tree[bucket_to_replace_pos].bucket_id; + } + return null_found; +} + +std::string CuckooTableBuilder::GetFileChecksum() const { + if (file_ != nullptr) { + return file_->GetFileChecksum(); + } else { + return kUnknownFileChecksum; + } +} + +const char* CuckooTableBuilder::GetFileChecksumFuncName() const { + if (file_ != nullptr) { + return file_->GetFileChecksumFuncName(); + } else { + return kUnknownFileChecksumFuncName; + } +} + +} // namespace ROCKSDB_NAMESPACE +#endif // ROCKSDB_LITE diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_builder.h b/src/rocksdb/table/cuckoo/cuckoo_table_builder.h new file mode 100644 index 000000000..a125e1f4c --- /dev/null +++ b/src/rocksdb/table/cuckoo/cuckoo_table_builder.h @@ -0,0 +1,138 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once +#ifndef ROCKSDB_LITE +#include + +#include +#include +#include +#include + +#include "db/version_edit.h" +#include "port/port.h" +#include "rocksdb/status.h" +#include "rocksdb/table.h" +#include "rocksdb/table_properties.h" +#include "table/table_builder.h" +#include "util/autovector.h" + +namespace ROCKSDB_NAMESPACE { + +class CuckooTableBuilder : public TableBuilder { + public: + CuckooTableBuilder( + WritableFileWriter* file, double max_hash_table_ratio, + uint32_t max_num_hash_func, uint32_t max_search_depth, + const Comparator* user_comparator, uint32_t cuckoo_block_size, + bool use_module_hash, bool identity_as_first_hash, + uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t), + uint32_t column_family_id, const std::string& column_family_name, + const std::string& db_id = "", const std::string& db_session_id = "", + uint64_t file_number = 0); + // No copying allowed + CuckooTableBuilder(const CuckooTableBuilder&) = delete; + void operator=(const CuckooTableBuilder&) = delete; + + // REQUIRES: Either Finish() or Abandon() has been called. + ~CuckooTableBuilder() {} + + // Add key,value to the table being constructed. + // REQUIRES: key is after any previously added key according to comparator. + // REQUIRES: Finish(), Abandon() have not been called + void Add(const Slice& key, const Slice& value) override; + + // Return non-ok iff some error has been detected. + Status status() const override { return status_; } + + // Return non-ok iff some error happens during IO. + IOStatus io_status() const override { return io_status_; } + + // Finish building the table. Stops using the file passed to the + // constructor after this function returns. + // REQUIRES: Finish(), Abandon() have not been called + Status Finish() override; + + // Indicate that the contents of this builder should be abandoned. Stops + // using the file passed to the constructor after this function returns. + // If the caller is not going to call Finish(), it must call Abandon() + // before destroying this builder. + // REQUIRES: Finish(), Abandon() have not been called + void Abandon() override; + + // Number of calls to Add() so far. + uint64_t NumEntries() const override; + + // Size of the file generated so far. If invoked after a successful + // Finish() call, returns the size of the final generated file. + uint64_t FileSize() const override; + + TableProperties GetTableProperties() const override { return properties_; } + + // Get file checksum + std::string GetFileChecksum() const override; + + // Get file checksum function name + const char* GetFileChecksumFuncName() const override; + + private: + struct CuckooBucket { + CuckooBucket() : vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {} + uint32_t vector_idx; + // This number will not exceed kvs_.size() + max_num_hash_func_. + // We assume number of items is <= 2^32. + uint32_t make_space_for_key_call_id; + }; + static const uint32_t kMaxVectorIdx = std::numeric_limits::max(); + + bool MakeSpaceForKey(const autovector& hash_vals, + const uint32_t call_id, + std::vector* buckets, uint64_t* bucket_id); + Status MakeHashTable(std::vector* buckets); + + inline bool IsDeletedKey(uint64_t idx) const; + inline Slice GetKey(uint64_t idx) const; + inline Slice GetUserKey(uint64_t idx) const; + inline Slice GetValue(uint64_t idx) const; + + uint32_t num_hash_func_; + WritableFileWriter* file_; + const double max_hash_table_ratio_; + const uint32_t max_num_hash_func_; + const uint32_t max_search_depth_; + const uint32_t cuckoo_block_size_; + uint64_t hash_table_size_; + bool is_last_level_file_; + bool has_seen_first_key_; + bool has_seen_first_value_; + uint64_t key_size_; + uint64_t value_size_; + // A list of fixed-size key-value pairs concatenating into a string. + // Use GetKey(), GetUserKey(), and GetValue() to retrieve a specific + // key / value given an index + std::string kvs_; + std::string deleted_keys_; + // Number of key-value pairs stored in kvs_ + number of deleted keys + uint64_t num_entries_; + // Number of keys that contain value (non-deletion op) + uint64_t num_values_; + Status status_; + IOStatus io_status_; + TableProperties properties_; + const Comparator* ucomp_; + bool use_module_hash_; + bool identity_as_first_hash_; + uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, + uint64_t max_num_buckets); + std::string largest_user_key_ = ""; + std::string smallest_user_key_ = ""; + + bool closed_; // Either Finish() or Abandon() has been called. +}; + +} // namespace ROCKSDB_NAMESPACE + +#endif // ROCKSDB_LITE diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc b/src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc new file mode 100644 index 000000000..be1c62117 --- /dev/null +++ b/src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc @@ -0,0 +1,640 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE + +#include "table/cuckoo/cuckoo_table_builder.h" + +#include +#include +#include +#include + +#include "file/random_access_file_reader.h" +#include "file/writable_file_writer.h" +#include "rocksdb/db.h" +#include "rocksdb/file_system.h" +#include "table/meta_blocks.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" + +namespace ROCKSDB_NAMESPACE { +extern const uint64_t kCuckooTableMagicNumber; + +namespace { +std::unordered_map> hash_map; + +uint64_t GetSliceHash(const Slice& s, uint32_t index, + uint64_t /*max_num_buckets*/) { + return hash_map[s.ToString()][index]; +} +} // namespace + +class CuckooBuilderTest : public testing::Test { + public: + CuckooBuilderTest() { + env_ = Env::Default(); + Options options; + options.allow_mmap_reads = true; + file_options_ = FileOptions(options); + } + + void CheckFileContents(const std::vector& keys, + const std::vector& values, + const std::vector& expected_locations, + std::string expected_unused_bucket, + uint64_t expected_table_size, + uint32_t expected_num_hash_func, + bool expected_is_last_level, + uint32_t expected_cuckoo_block_size = 1) { + uint64_t num_deletions = 0; + for (const auto& key : keys) { + ParsedInternalKey parsed; + Status pik_status = + ParseInternalKey(key, &parsed, true /* log_err_key */); + if (pik_status.ok() && parsed.type == kTypeDeletion) { + num_deletions++; + } + } + // Read file + uint64_t read_file_size; + ASSERT_OK(env_->GetFileSize(fname, &read_file_size)); + std::unique_ptr file_reader; + ASSERT_OK(RandomAccessFileReader::Create( + env_->GetFileSystem(), fname, file_options_, &file_reader, nullptr)); + + Options options; + options.allow_mmap_reads = true; + ImmutableOptions ioptions(options); + + // Assert Table Properties. + std::unique_ptr props; + ASSERT_OK(ReadTableProperties(file_reader.get(), read_file_size, + kCuckooTableMagicNumber, ioptions, &props)); + // Check unused bucket. + std::string unused_key = + props->user_collected_properties[CuckooTablePropertyNames::kEmptyKey]; + ASSERT_EQ(expected_unused_bucket.substr(0, props->fixed_key_len), + unused_key); + + uint64_t value_len_found = *reinterpret_cast( + props->user_collected_properties[CuckooTablePropertyNames::kValueLength] + .data()); + ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found); + ASSERT_EQ(props->raw_value_size, values.size() * value_len_found); + const uint64_t table_size = *reinterpret_cast( + props + ->user_collected_properties + [CuckooTablePropertyNames::kHashTableSize] + .data()); + ASSERT_EQ(expected_table_size, table_size); + const uint32_t num_hash_func_found = *reinterpret_cast( + props->user_collected_properties[CuckooTablePropertyNames::kNumHashFunc] + .data()); + ASSERT_EQ(expected_num_hash_func, num_hash_func_found); + const uint32_t cuckoo_block_size = *reinterpret_cast( + props + ->user_collected_properties + [CuckooTablePropertyNames::kCuckooBlockSize] + .data()); + ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size); + const bool is_last_level_found = *reinterpret_cast( + props->user_collected_properties[CuckooTablePropertyNames::kIsLastLevel] + .data()); + ASSERT_EQ(expected_is_last_level, is_last_level_found); + + ASSERT_EQ(props->num_entries, keys.size()); + ASSERT_EQ(props->num_deletions, num_deletions); + ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); + ASSERT_EQ(props->data_size, + expected_unused_bucket.size() * + (expected_table_size + expected_cuckoo_block_size - 1)); + ASSERT_EQ(props->raw_key_size, keys.size() * props->fixed_key_len); + ASSERT_EQ(props->column_family_id, 0); + ASSERT_EQ(props->column_family_name, kDefaultColumnFamilyName); + + // Check contents of the bucket. + std::vector keys_found(keys.size(), false); + size_t bucket_size = expected_unused_bucket.size(); + for (uint32_t i = 0; i + 1 < table_size + cuckoo_block_size; ++i) { + Slice read_slice; + ASSERT_OK(file_reader->Read(IOOptions(), i * bucket_size, bucket_size, + &read_slice, nullptr, nullptr, + Env::IO_TOTAL /* rate_limiter_priority */)); + size_t key_idx = + std::find(expected_locations.begin(), expected_locations.end(), i) - + expected_locations.begin(); + if (key_idx == keys.size()) { + // i is not one of the expected locations. Empty bucket. + if (read_slice.data() == nullptr) { + ASSERT_EQ(0, expected_unused_bucket.size()); + } else { + ASSERT_EQ(read_slice.compare(expected_unused_bucket), 0); + } + } else { + keys_found[key_idx] = true; + ASSERT_EQ(read_slice.compare(keys[key_idx] + values[key_idx]), 0); + } + } + for (auto key_found : keys_found) { + // Check that all keys wereReader found. + ASSERT_TRUE(key_found); + } + } + + std::string GetInternalKey(Slice user_key, bool zero_seqno, + ValueType type = kTypeValue) { + IterKey ikey; + ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, type); + return ikey.GetInternalKey().ToString(); + } + + uint64_t NextPowOf2(uint64_t num) { + uint64_t n = 2; + while (n <= num) { + n *= 2; + } + return n; + } + + uint64_t GetExpectedTableSize(uint64_t num) { + return NextPowOf2(static_cast(num / kHashTableRatio)); + } + + Env* env_; + FileOptions file_options_; + std::string fname; + const double kHashTableRatio = 0.9; +}; + +TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) { + std::unique_ptr writable_file; + fname = test::PerThreadDBPath("EmptyFile"); + std::unique_ptr file_writer; + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, 4, 100, + BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + ASSERT_EQ(0UL, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + CheckFileContents({}, {}, {}, "", 2, 2, false); +} + +TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { + for (auto type : {kTypeValue, kTypeDeletion}) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values; + if (type == kTypeValue) { + values = {"v01", "v02", "v03", "v04"}; + } else { + values = {"", "", "", ""}; + } + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {1, 2, 3, 4}}, + {user_keys[2], {2, 3, 4, 5}}, + {user_keys[3], {3, 4, 5, 6}}}; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 2, 3}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false, type)); + } + uint64_t expected_table_size = GetExpectedTableSize(keys.size()); + + fname = test::PerThreadDBPath("NoCollisionFullKey"); + std::unique_ptr file_writer; + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, expected_unused_bucket, + expected_table_size, 2, false); + } +} + +TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values = {"v01", "v02", "v03", "v04"}; + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {0, 1, 2, 3}}, + {user_keys[2], {0, 1, 2, 3}}, + {user_keys[3], {0, 1, 2, 3}}, + }; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 2, 3}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false)); + } + uint64_t expected_table_size = GetExpectedTableSize(keys.size()); + + fname = test::PerThreadDBPath("WithCollisionFullKey"); + std::unique_ptr file_writer; + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, expected_unused_bucket, + expected_table_size, 4, false); +} + +TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values = {"v01", "v02", "v03", "v04"}; + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {0, 1, 2, 3}}, + {user_keys[2], {0, 1, 2, 3}}, + {user_keys[3], {0, 1, 2, 3}}, + }; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 2, 3}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false)); + } + uint64_t expected_table_size = GetExpectedTableSize(keys.size()); + + std::unique_ptr file_writer; + uint32_t cuckoo_block_size = 2; + fname = test::PerThreadDBPath("WithCollisionFullKey2"); + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + CuckooTableBuilder builder( + file_writer.get(), kHashTableRatio, num_hash_fun, 100, + BytewiseComparator(), cuckoo_block_size, false, false, GetSliceHash, + 0 /* column_family_id */, kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, expected_unused_bucket, + expected_table_size, 3, false, cuckoo_block_size); +} + +TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) { + // Have two hash functions. Insert elements with overlapping hashes. + // Finally insert an element with hash value somewhere in the middle + // so that it displaces all the elements after that. + uint32_t num_hash_fun = 2; + std::vector user_keys = {"key01", "key02", "key03", "key04", + "key05"}; + std::vector values = {"v01", "v02", "v03", "v04", "v05"}; + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}}, + {user_keys[3], {3, 4}}, {user_keys[4], {0, 2}}, + }; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 3, 4, 2}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false)); + } + uint64_t expected_table_size = GetExpectedTableSize(keys.size()); + + std::unique_ptr file_writer; + fname = test::PerThreadDBPath("WithCollisionPathFullKey"); + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, expected_unused_bucket, + expected_table_size, 2, false); +} + +TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { + uint32_t num_hash_fun = 2; + std::vector user_keys = {"key01", "key02", "key03", "key04", + "key05"}; + std::vector values = {"v01", "v02", "v03", "v04", "v05"}; + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {3, 4}}, + {user_keys[3], {4, 5}}, {user_keys[4], {0, 3}}, + }; + hash_map = std::move(hm); + + std::vector expected_locations = {2, 1, 3, 4, 0}; + std::vector keys; + for (auto& user_key : user_keys) { + keys.push_back(GetInternalKey(user_key, false)); + } + uint64_t expected_table_size = GetExpectedTableSize(keys.size()); + + std::unique_ptr file_writer; + fname = test::PerThreadDBPath("WithCollisionPathFullKeyAndCuckooBlock"); + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 2, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(keys[i]), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = GetInternalKey("key00", true); + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(keys, values, expected_locations, expected_unused_bucket, + expected_table_size, 2, false, 2); +} + +TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values = {"v01", "v02", "v03", "v04"}; + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {1, 2, 3, 4}}, + {user_keys[2], {2, 3, 4, 5}}, + {user_keys[3], {3, 4, 5, 6}}}; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 2, 3}; + uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); + + std::unique_ptr file_writer; + fname = test::PerThreadDBPath("NoCollisionUserKey"); + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = user_keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = "key00"; + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(user_keys, values, expected_locations, + expected_unused_bucket, expected_table_size, 2, true); +} + +TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { + uint32_t num_hash_fun = 4; + std::vector user_keys = {"key01", "key02", "key03", "key04"}; + std::vector values = {"v01", "v02", "v03", "v04"}; + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1, 2, 3}}, + {user_keys[1], {0, 1, 2, 3}}, + {user_keys[2], {0, 1, 2, 3}}, + {user_keys[3], {0, 1, 2, 3}}, + }; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 2, 3}; + uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); + + std::unique_ptr file_writer; + fname = test::PerThreadDBPath("WithCollisionUserKey"); + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = user_keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = "key00"; + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(user_keys, values, expected_locations, + expected_unused_bucket, expected_table_size, 4, true); +} + +TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) { + uint32_t num_hash_fun = 2; + std::vector user_keys = {"key01", "key02", "key03", "key04", + "key05"}; + std::vector values = {"v01", "v02", "v03", "v04", "v05"}; + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}}, + {user_keys[3], {3, 4}}, {user_keys[4], {0, 2}}, + }; + hash_map = std::move(hm); + + std::vector expected_locations = {0, 1, 3, 4, 2}; + uint64_t expected_table_size = GetExpectedTableSize(user_keys.size()); + + std::unique_ptr file_writer; + fname = test::PerThreadDBPath("WithCollisionPathUserKey"); + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 2, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + size_t bucket_size = user_keys[0].size() + values[0].size(); + ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); + ASSERT_OK(builder.Finish()); + ASSERT_OK(file_writer->Close()); + ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); + + std::string expected_unused_bucket = "key00"; + expected_unused_bucket += std::string(values[0].size(), 'a'); + CheckFileContents(user_keys, values, expected_locations, + expected_unused_bucket, expected_table_size, 2, true); +} + +TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) { + // Have two hash functions. Insert elements with overlapping hashes. + // Finally try inserting an element with hash value somewhere in the middle + // and it should fail because the no. of elements to displace is too high. + uint32_t num_hash_fun = 2; + std::vector user_keys = {"key01", "key02", "key03", "key04", + "key05"}; + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}}, + {user_keys[3], {3, 4}}, {user_keys[4], {0, 1}}, + }; + hash_map = std::move(hm); + + std::unique_ptr file_writer; + fname = test::PerThreadDBPath("WithCollisionPathUserKey"); + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 2, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t i = 0; i < user_keys.size(); i++) { + builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value")); + ASSERT_EQ(builder.NumEntries(), i + 1); + ASSERT_OK(builder.status()); + } + ASSERT_TRUE(builder.Finish().IsNotSupported()); + ASSERT_OK(file_writer->Close()); +} + +TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) { + // Need to have a temporary variable here as VS compiler does not currently + // support operator= with initializer_list as a parameter + std::unordered_map> hm = { + {"repeatedkey", {0, 1, 2, 3}}}; + hash_map = std::move(hm); + uint32_t num_hash_fun = 4; + std::string user_key = "repeatedkey"; + + std::unique_ptr file_writer; + fname = test::PerThreadDBPath("FailWhenSameKeyInserted"); + ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname, + file_options_, &file_writer, nullptr)); + CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun, + 100, BytewiseComparator(), 1, false, false, + GetSliceHash, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + + builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1")); + ASSERT_EQ(builder.NumEntries(), 1u); + ASSERT_OK(builder.status()); + builder.Add(Slice(GetInternalKey(user_key, true)), Slice("value2")); + ASSERT_EQ(builder.NumEntries(), 2u); + ASSERT_OK(builder.status()); + + ASSERT_TRUE(builder.Finish().IsNotSupported()); + ASSERT_OK(file_writer->Close()); +} +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +#else +#include + +int main(int /*argc*/, char** /*argv*/) { + fprintf(stderr, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // ROCKSDB_LITE diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_factory.cc b/src/rocksdb/table/cuckoo/cuckoo_table_factory.cc new file mode 100644 index 000000000..1253c92dd --- /dev/null +++ b/src/rocksdb/table/cuckoo/cuckoo_table_factory.cc @@ -0,0 +1,104 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE +#include "table/cuckoo/cuckoo_table_factory.h" + +#include "db/dbformat.h" +#include "options/configurable_helper.h" +#include "rocksdb/utilities/options_type.h" +#include "table/cuckoo/cuckoo_table_builder.h" +#include "table/cuckoo/cuckoo_table_reader.h" + +namespace ROCKSDB_NAMESPACE { + +Status CuckooTableFactory::NewTableReader( + const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table, + bool /*prefetch_index_and_filter_in_cache*/) const { + std::unique_ptr new_reader(new CuckooTableReader( + table_reader_options.ioptions, std::move(file), file_size, + table_reader_options.internal_comparator.user_comparator(), nullptr)); + Status s = new_reader->status(); + if (s.ok()) { + *table = std::move(new_reader); + } + return s; +} + +TableBuilder* CuckooTableFactory::NewTableBuilder( + const TableBuilderOptions& table_builder_options, + WritableFileWriter* file) const { + // TODO: change builder to take the option struct + return new CuckooTableBuilder( + file, table_options_.hash_table_ratio, 64, + table_options_.max_search_depth, + table_builder_options.internal_comparator.user_comparator(), + table_options_.cuckoo_block_size, table_options_.use_module_hash, + table_options_.identity_as_first_hash, nullptr /* get_slice_hash */, + table_builder_options.column_family_id, + table_builder_options.column_family_name, table_builder_options.db_id, + table_builder_options.db_session_id, table_builder_options.cur_file_num); +} + +std::string CuckooTableFactory::GetPrintableOptions() const { + std::string ret; + ret.reserve(2000); + const int kBufferSize = 200; + char buffer[kBufferSize]; + + snprintf(buffer, kBufferSize, " hash_table_ratio: %lf\n", + table_options_.hash_table_ratio); + ret.append(buffer); + snprintf(buffer, kBufferSize, " max_search_depth: %u\n", + table_options_.max_search_depth); + ret.append(buffer); + snprintf(buffer, kBufferSize, " cuckoo_block_size: %u\n", + table_options_.cuckoo_block_size); + ret.append(buffer); + snprintf(buffer, kBufferSize, " identity_as_first_hash: %d\n", + table_options_.identity_as_first_hash); + ret.append(buffer); + return ret; +} + +static std::unordered_map cuckoo_table_type_info = + { +#ifndef ROCKSDB_LITE + {"hash_table_ratio", + {offsetof(struct CuckooTableOptions, hash_table_ratio), + OptionType::kDouble, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"max_search_depth", + {offsetof(struct CuckooTableOptions, max_search_depth), + OptionType::kUInt32T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"cuckoo_block_size", + {offsetof(struct CuckooTableOptions, cuckoo_block_size), + OptionType::kUInt32T, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"identity_as_first_hash", + {offsetof(struct CuckooTableOptions, identity_as_first_hash), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, + {"use_module_hash", + {offsetof(struct CuckooTableOptions, use_module_hash), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, +#endif // ROCKSDB_LITE +}; + +CuckooTableFactory::CuckooTableFactory(const CuckooTableOptions& table_options) + : table_options_(table_options) { + RegisterOptions(&table_options_, &cuckoo_table_type_info); +} + +TableFactory* NewCuckooTableFactory(const CuckooTableOptions& table_options) { + return new CuckooTableFactory(table_options); +} + +} // namespace ROCKSDB_NAMESPACE +#endif // ROCKSDB_LITE diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_factory.h b/src/rocksdb/table/cuckoo/cuckoo_table_factory.h new file mode 100644 index 000000000..9937c28dd --- /dev/null +++ b/src/rocksdb/table/cuckoo/cuckoo_table_factory.h @@ -0,0 +1,82 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once +#ifndef ROCKSDB_LITE + +#include + +#include "rocksdb/options.h" +#include "rocksdb/table.h" +#include "util/murmurhash.h" + +namespace ROCKSDB_NAMESPACE { + +const uint32_t kCuckooMurmurSeedMultiplier = 816922183; +static inline uint64_t CuckooHash( + const Slice& user_key, uint32_t hash_cnt, bool use_module_hash, + uint64_t table_size_, bool identity_as_first_hash, + uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) { +#if !defined NDEBUG || defined OS_WIN + // This part is used only in unit tests but we have to keep it for Windows + // build as we run test in both debug and release modes under Windows. + if (get_slice_hash != nullptr) { + return get_slice_hash(user_key, hash_cnt, table_size_); + } +#else + (void)get_slice_hash; +#endif + + uint64_t value = 0; + if (hash_cnt == 0 && identity_as_first_hash) { + value = (*reinterpret_cast(user_key.data())); + } else { + value = MurmurHash(user_key.data(), static_cast(user_key.size()), + kCuckooMurmurSeedMultiplier * hash_cnt); + } + if (use_module_hash) { + return value % table_size_; + } else { + return value & (table_size_ - 1); + } +} + +// Cuckoo Table is designed for applications that require fast point lookups +// but not fast range scans. +// +// Some assumptions: +// - Key length and Value length are fixed. +// - Does not support Snapshot. +// - Does not support Merge operations. +// - Does not support prefix bloom filters. +class CuckooTableFactory : public TableFactory { + public: + explicit CuckooTableFactory( + const CuckooTableOptions& table_option = CuckooTableOptions()); + ~CuckooTableFactory() {} + + // Method to allow CheckedCast to work for this class + static const char* kClassName() { return kCuckooTableName(); } + const char* Name() const override { return kCuckooTableName(); } + + using TableFactory::NewTableReader; + Status NewTableReader( + const ReadOptions& ro, const TableReaderOptions& table_reader_options, + std::unique_ptr&& file, uint64_t file_size, + std::unique_ptr* table, + bool prefetch_index_and_filter_in_cache = true) const override; + + TableBuilder* NewTableBuilder( + const TableBuilderOptions& table_builder_options, + WritableFileWriter* file) const override; + + std::string GetPrintableOptions() const override; + + private: + CuckooTableOptions table_options_; +}; + +} // namespace ROCKSDB_NAMESPACE +#endif // ROCKSDB_LITE diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_reader.cc b/src/rocksdb/table/cuckoo/cuckoo_table_reader.cc new file mode 100644 index 000000000..1d70909a6 --- /dev/null +++ b/src/rocksdb/table/cuckoo/cuckoo_table_reader.cc @@ -0,0 +1,411 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef ROCKSDB_LITE +#include "table/cuckoo/cuckoo_table_reader.h" + +#include +#include +#include +#include +#include + +#include "memory/arena.h" +#include "options/cf_options.h" +#include "rocksdb/iterator.h" +#include "rocksdb/table.h" +#include "table/cuckoo/cuckoo_table_factory.h" +#include "table/get_context.h" +#include "table/internal_iterator.h" +#include "table/meta_blocks.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { +namespace { +const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1); +const uint32_t kInvalidIndex = std::numeric_limits::max(); +} // namespace + +extern const uint64_t kCuckooTableMagicNumber; + +CuckooTableReader::CuckooTableReader( + const ImmutableOptions& ioptions, + std::unique_ptr&& file, uint64_t file_size, + const Comparator* comparator, + uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) + : file_(std::move(file)), + is_last_level_(false), + identity_as_first_hash_(false), + use_module_hash_(false), + num_hash_func_(0), + unused_key_(""), + key_length_(0), + user_key_length_(0), + value_length_(0), + bucket_length_(0), + cuckoo_block_size_(0), + cuckoo_block_bytes_minus_one_(0), + table_size_(0), + ucomp_(comparator), + get_slice_hash_(get_slice_hash) { + if (!ioptions.allow_mmap_reads) { + status_ = Status::InvalidArgument("File is not mmaped"); + return; + } + { + std::unique_ptr props; + status_ = ReadTableProperties(file_.get(), file_size, + kCuckooTableMagicNumber, ioptions, &props); + if (!status_.ok()) { + return; + } + table_props_ = std::move(props); + } + auto& user_props = table_props_->user_collected_properties; + auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc); + if (hash_funs == user_props.end()) { + status_ = Status::Corruption("Number of hash functions not found"); + return; + } + num_hash_func_ = *reinterpret_cast(hash_funs->second.data()); + auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey); + if (unused_key == user_props.end()) { + status_ = Status::Corruption("Empty bucket value not found"); + return; + } + unused_key_ = unused_key->second; + + key_length_ = static_cast(table_props_->fixed_key_len); + auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength); + if (user_key_len == user_props.end()) { + status_ = Status::Corruption("User key length not found"); + return; + } + user_key_length_ = + *reinterpret_cast(user_key_len->second.data()); + + auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength); + if (value_length == user_props.end()) { + status_ = Status::Corruption("Value length not found"); + return; + } + value_length_ = + *reinterpret_cast(value_length->second.data()); + bucket_length_ = key_length_ + value_length_; + + auto hash_table_size = + user_props.find(CuckooTablePropertyNames::kHashTableSize); + if (hash_table_size == user_props.end()) { + status_ = Status::Corruption("Hash table size not found"); + return; + } + table_size_ = + *reinterpret_cast(hash_table_size->second.data()); + + auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel); + if (is_last_level == user_props.end()) { + status_ = Status::Corruption("Is last level not found"); + return; + } + is_last_level_ = *reinterpret_cast(is_last_level->second.data()); + + auto identity_as_first_hash = + user_props.find(CuckooTablePropertyNames::kIdentityAsFirstHash); + if (identity_as_first_hash == user_props.end()) { + status_ = Status::Corruption("identity as first hash not found"); + return; + } + identity_as_first_hash_ = + *reinterpret_cast(identity_as_first_hash->second.data()); + + auto use_module_hash = + user_props.find(CuckooTablePropertyNames::kUseModuleHash); + if (use_module_hash == user_props.end()) { + status_ = Status::Corruption("hash type is not found"); + return; + } + use_module_hash_ = + *reinterpret_cast(use_module_hash->second.data()); + auto cuckoo_block_size = + user_props.find(CuckooTablePropertyNames::kCuckooBlockSize); + if (cuckoo_block_size == user_props.end()) { + status_ = Status::Corruption("Cuckoo block size not found"); + return; + } + cuckoo_block_size_ = + *reinterpret_cast(cuckoo_block_size->second.data()); + cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1; + // TODO: rate limit reads of whole cuckoo tables. + status_ = + file_->Read(IOOptions(), 0, static_cast(file_size), &file_data_, + nullptr, nullptr, Env::IO_TOTAL /* rate_limiter_priority */); +} + +Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/, + const Slice& key, GetContext* get_context, + const SliceTransform* /* prefix_extractor */, + bool /*skip_filters*/) { + assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0)); + Slice user_key = ExtractUserKey(key); + for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { + uint64_t offset = + bucket_length_ * CuckooHash(user_key, hash_cnt, use_module_hash_, + table_size_, identity_as_first_hash_, + get_slice_hash_); + const char* bucket = &file_data_.data()[offset]; + for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; + ++block_idx, bucket += bucket_length_) { + if (ucomp_->Equal(Slice(unused_key_.data(), user_key.size()), + Slice(bucket, user_key.size()))) { + return Status::OK(); + } + // Here, we compare only the user key part as we support only one entry + // per user key and we don't support snapshot. + if (ucomp_->Equal(user_key, Slice(bucket, user_key.size()))) { + Slice value(bucket + key_length_, value_length_); + if (is_last_level_) { + // Sequence number is not stored at the last level, so we will use + // kMaxSequenceNumber since it is unknown. This could cause some + // transactions to fail to lock a key due to known sequence number. + // However, it is expected for anyone to use a CuckooTable in a + // TransactionDB. + get_context->SaveValue(value, kMaxSequenceNumber); + } else { + Slice full_key(bucket, key_length_); + ParsedInternalKey found_ikey; + Status s = ParseInternalKey(full_key, &found_ikey, + false /* log_err_key */); // TODO + if (!s.ok()) return s; + bool dont_care __attribute__((__unused__)); + get_context->SaveValue(found_ikey, value, &dont_care); + } + // We don't support merge operations. So, we return here. + return Status::OK(); + } + } + } + return Status::OK(); +} + +void CuckooTableReader::Prepare(const Slice& key) { + // Prefetch the first Cuckoo Block. + Slice user_key = ExtractUserKey(key); + uint64_t addr = + reinterpret_cast(file_data_.data()) + + bucket_length_ * CuckooHash(user_key, 0, use_module_hash_, table_size_, + identity_as_first_hash_, nullptr); + uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_; + for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) { + PREFETCH(reinterpret_cast(addr), 0, 3); + } +} + +class CuckooTableIterator : public InternalIterator { + public: + explicit CuckooTableIterator(CuckooTableReader* reader); + // No copying allowed + CuckooTableIterator(const CuckooTableIterator&) = delete; + void operator=(const Iterator&) = delete; + ~CuckooTableIterator() override {} + bool Valid() const override; + void SeekToFirst() override; + void SeekToLast() override; + void Seek(const Slice& target) override; + void SeekForPrev(const Slice& target) override; + void Next() override; + void Prev() override; + Slice key() const override; + Slice value() const override; + Status status() const override { return Status::OK(); } + void InitIfNeeded(); + + private: + struct BucketComparator { + BucketComparator(const Slice& file_data, const Comparator* ucomp, + uint32_t bucket_len, uint32_t user_key_len, + const Slice& target = Slice()) + : file_data_(file_data), + ucomp_(ucomp), + bucket_len_(bucket_len), + user_key_len_(user_key_len), + target_(target) {} + bool operator()(const uint32_t first, const uint32_t second) const { + const char* first_bucket = (first == kInvalidIndex) + ? target_.data() + : &file_data_.data()[first * bucket_len_]; + const char* second_bucket = + (second == kInvalidIndex) ? target_.data() + : &file_data_.data()[second * bucket_len_]; + return ucomp_->Compare(Slice(first_bucket, user_key_len_), + Slice(second_bucket, user_key_len_)) < 0; + } + + private: + const Slice file_data_; + const Comparator* ucomp_; + const uint32_t bucket_len_; + const uint32_t user_key_len_; + const Slice target_; + }; + + const BucketComparator bucket_comparator_; + void PrepareKVAtCurrIdx(); + CuckooTableReader* reader_; + bool initialized_; + // Contains a map of keys to bucket_id sorted in key order. + std::vector sorted_bucket_ids_; + // We assume that the number of items can be stored in uint32 (4 Billion). + uint32_t curr_key_idx_; + Slice curr_value_; + IterKey curr_key_; +}; + +CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader) + : bucket_comparator_(reader->file_data_, reader->ucomp_, + reader->bucket_length_, reader->user_key_length_), + reader_(reader), + initialized_(false), + curr_key_idx_(kInvalidIndex) { + sorted_bucket_ids_.clear(); + curr_value_.clear(); + curr_key_.Clear(); +} + +void CuckooTableIterator::InitIfNeeded() { + if (initialized_) { + return; + } + sorted_bucket_ids_.reserve( + static_cast(reader_->GetTableProperties()->num_entries)); + uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1; + assert(num_buckets < kInvalidIndex); + const char* bucket = reader_->file_data_.data(); + for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) { + if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) { + sorted_bucket_ids_.push_back(bucket_id); + } + bucket += reader_->bucket_length_; + } + assert(sorted_bucket_ids_.size() == + reader_->GetTableProperties()->num_entries); + std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(), + bucket_comparator_); + curr_key_idx_ = kInvalidIndex; + initialized_ = true; +} + +void CuckooTableIterator::SeekToFirst() { + InitIfNeeded(); + curr_key_idx_ = 0; + PrepareKVAtCurrIdx(); +} + +void CuckooTableIterator::SeekToLast() { + InitIfNeeded(); + curr_key_idx_ = static_cast(sorted_bucket_ids_.size()) - 1; + PrepareKVAtCurrIdx(); +} + +void CuckooTableIterator::Seek(const Slice& target) { + InitIfNeeded(); + const BucketComparator seek_comparator( + reader_->file_data_, reader_->ucomp_, reader_->bucket_length_, + reader_->user_key_length_, ExtractUserKey(target)); + auto seek_it = + std::lower_bound(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(), + kInvalidIndex, seek_comparator); + curr_key_idx_ = + static_cast(std::distance(sorted_bucket_ids_.begin(), seek_it)); + PrepareKVAtCurrIdx(); +} + +void CuckooTableIterator::SeekForPrev(const Slice& /*target*/) { + // Not supported + assert(false); +} + +bool CuckooTableIterator::Valid() const { + return curr_key_idx_ < sorted_bucket_ids_.size(); +} + +void CuckooTableIterator::PrepareKVAtCurrIdx() { + if (!Valid()) { + curr_value_.clear(); + curr_key_.Clear(); + return; + } + uint32_t id = sorted_bucket_ids_[curr_key_idx_]; + const char* offset = + reader_->file_data_.data() + id * reader_->bucket_length_; + if (reader_->is_last_level_) { + // Always return internal key. + curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_), 0, + kTypeValue); + } else { + curr_key_.SetInternalKey(Slice(offset, reader_->key_length_)); + } + curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_); +} + +void CuckooTableIterator::Next() { + if (!Valid()) { + curr_value_.clear(); + curr_key_.Clear(); + return; + } + ++curr_key_idx_; + PrepareKVAtCurrIdx(); +} + +void CuckooTableIterator::Prev() { + if (curr_key_idx_ == 0) { + curr_key_idx_ = static_cast(sorted_bucket_ids_.size()); + } + if (!Valid()) { + curr_value_.clear(); + curr_key_.Clear(); + return; + } + --curr_key_idx_; + PrepareKVAtCurrIdx(); +} + +Slice CuckooTableIterator::key() const { + assert(Valid()); + return curr_key_.GetInternalKey(); +} + +Slice CuckooTableIterator::value() const { + assert(Valid()); + return curr_value_; +} + +InternalIterator* CuckooTableReader::NewIterator( + const ReadOptions& /*read_options*/, + const SliceTransform* /* prefix_extractor */, Arena* arena, + bool /*skip_filters*/, TableReaderCaller /*caller*/, + size_t /*compaction_readahead_size*/, bool /* allow_unprepared_value */) { + if (!status().ok()) { + return NewErrorInternalIterator( + Status::Corruption("CuckooTableReader status is not okay."), arena); + } + CuckooTableIterator* iter; + if (arena == nullptr) { + iter = new CuckooTableIterator(this); + } else { + auto iter_mem = arena->AllocateAligned(sizeof(CuckooTableIterator)); + iter = new (iter_mem) CuckooTableIterator(this); + } + return iter; +} + +size_t CuckooTableReader::ApproximateMemoryUsage() const { return 0; } + +} // namespace ROCKSDB_NAMESPACE +#endif diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_reader.h b/src/rocksdb/table/cuckoo/cuckoo_table_reader.h new file mode 100644 index 000000000..f6c599ae8 --- /dev/null +++ b/src/rocksdb/table/cuckoo/cuckoo_table_reader.h @@ -0,0 +1,100 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once +#ifndef ROCKSDB_LITE +#include +#include +#include +#include + +#include "file/random_access_file_reader.h" +#include "rocksdb/env.h" +#include "rocksdb/options.h" +#include "table/table_reader.h" + +namespace ROCKSDB_NAMESPACE { + +class Arena; +class TableReader; +struct ImmutableOptions; + +class CuckooTableReader : public TableReader { + public: + CuckooTableReader(const ImmutableOptions& ioptions, + std::unique_ptr&& file, + uint64_t file_size, const Comparator* user_comparator, + uint64_t (*get_slice_hash)(const Slice&, uint32_t, + uint64_t)); + ~CuckooTableReader() {} + + std::shared_ptr GetTableProperties() const override { + return table_props_; + } + + Status status() const { return status_; } + + Status Get(const ReadOptions& readOptions, const Slice& key, + GetContext* get_context, const SliceTransform* prefix_extractor, + bool skip_filters = false) override; + + // Returns a new iterator over table contents + // compaction_readahead_size: its value will only be used if for_compaction = + // true + InternalIterator* NewIterator(const ReadOptions&, + const SliceTransform* prefix_extractor, + Arena* arena, bool skip_filters, + TableReaderCaller caller, + size_t compaction_readahead_size = 0, + bool allow_unprepared_value = false) override; + void Prepare(const Slice& target) override; + + // Report an approximation of how much memory has been used. + size_t ApproximateMemoryUsage() const override; + + // Following methods are not implemented for Cuckoo Table Reader + uint64_t ApproximateOffsetOf(const Slice& /*key*/, + TableReaderCaller /*caller*/) override { + return 0; + } + + uint64_t ApproximateSize(const Slice& /*start*/, const Slice& /*end*/, + TableReaderCaller /*caller*/) override { + return 0; + } + + void SetupForCompaction() override {} + // End of methods not implemented. + + private: + friend class CuckooTableIterator; + void LoadAllKeys(std::vector>* key_to_bucket_id); + std::unique_ptr file_; + Slice file_data_; + bool is_last_level_; + bool identity_as_first_hash_; + bool use_module_hash_; + std::shared_ptr table_props_; + Status status_; + uint32_t num_hash_func_; + std::string unused_key_; + uint32_t key_length_; + uint32_t user_key_length_; + uint32_t value_length_; + uint32_t bucket_length_; + uint32_t cuckoo_block_size_; + uint32_t cuckoo_block_bytes_minus_one_; + uint64_t table_size_; + const Comparator* ucomp_; + uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, + uint64_t max_num_buckets); +}; + +} // namespace ROCKSDB_NAMESPACE +#endif // ROCKSDB_LITE diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc b/src/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc new file mode 100644 index 000000000..d3d1490c6 --- /dev/null +++ b/src/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc @@ -0,0 +1,584 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE + +#ifndef GFLAGS +#include +int main() { + fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); + return 0; +} +#else + +#include +#include +#include +#include + +#include "memory/arena.h" +#include "rocksdb/db.h" +#include "table/cuckoo/cuckoo_table_builder.h" +#include "table/cuckoo/cuckoo_table_factory.h" +#include "table/cuckoo/cuckoo_table_reader.h" +#include "table/get_context.h" +#include "table/meta_blocks.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" +#include "util/gflags_compat.h" +#include "util/random.h" +#include "util/string_util.h" + +using GFLAGS_NAMESPACE::ParseCommandLineFlags; + +DEFINE_string(file_dir, "", + "Directory where the files will be created" + " for benchmark. Added for using tmpfs."); +DEFINE_bool(enable_perf, false, "Run Benchmark Tests too."); +DEFINE_bool(write, false, + "Should write new values to file in performance tests?"); +DEFINE_bool(identity_as_first_hash, true, "use identity as first hash"); + +namespace ROCKSDB_NAMESPACE { + +namespace { +const uint32_t kNumHashFunc = 10; +// Methods, variables related to Hash functions. +std::unordered_map > hash_map; + +void AddHashLookups(const std::string& s, uint64_t bucket_id, + uint32_t num_hash_fun) { + std::vector v; + for (uint32_t i = 0; i < num_hash_fun; i++) { + v.push_back(bucket_id + i); + } + hash_map[s] = v; +} + +uint64_t GetSliceHash(const Slice& s, uint32_t index, + uint64_t /*max_num_buckets*/) { + return hash_map[s.ToString()][index]; +} +} // namespace + +class CuckooReaderTest : public testing::Test { + public: + using testing::Test::SetUp; + + CuckooReaderTest() { + options.allow_mmap_reads = true; + env = options.env; + file_options = FileOptions(options); + } + + void SetUp(int num) { + num_items = num; + hash_map.clear(); + keys.clear(); + keys.resize(num_items); + user_keys.clear(); + user_keys.resize(num_items); + values.clear(); + values.resize(num_items); + } + + std::string NumToStr(int64_t i) { + return std::string(reinterpret_cast(&i), sizeof(i)); + } + + void CreateCuckooFileAndCheckReader( + const Comparator* ucomp = BytewiseComparator()) { + std::unique_ptr file_writer; + ASSERT_OK(WritableFileWriter::Create(env->GetFileSystem(), fname, + file_options, &file_writer, nullptr)); + CuckooTableBuilder builder( + file_writer.get(), 0.9, kNumHashFunc, 100, ucomp, 2, false, false, + GetSliceHash, 0 /* column_family_id */, kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) { + builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); + ASSERT_OK(builder.status()); + ASSERT_EQ(builder.NumEntries(), key_idx + 1); + } + ASSERT_OK(builder.Finish()); + ASSERT_EQ(num_items, builder.NumEntries()); + file_size = builder.FileSize(); + ASSERT_OK(file_writer->Close()); + + // Check reader now. + std::unique_ptr file_reader; + ASSERT_OK(RandomAccessFileReader::Create( + env->GetFileSystem(), fname, file_options, &file_reader, nullptr)); + const ImmutableOptions ioptions(options); + CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp, + GetSliceHash); + ASSERT_OK(reader.status()); + // Assume no merge/deletion + for (uint32_t i = 0; i < num_items; ++i) { + PinnableSlice value; + GetContext get_context(ucomp, nullptr, nullptr, nullptr, + GetContext::kNotFound, Slice(user_keys[i]), &value, + nullptr, nullptr, nullptr, nullptr, true, nullptr, + nullptr); + ASSERT_OK( + reader.Get(ReadOptions(), Slice(keys[i]), &get_context, nullptr)); + ASSERT_STREQ(values[i].c_str(), value.data()); + } + } + void UpdateKeys(bool with_zero_seqno) { + for (uint32_t i = 0; i < num_items; i++) { + ParsedInternalKey ikey(user_keys[i], with_zero_seqno ? 0 : i + 1000, + kTypeValue); + keys[i].clear(); + AppendInternalKey(&keys[i], ikey); + } + } + + void CheckIterator(const Comparator* ucomp = BytewiseComparator()) { + std::unique_ptr file_reader; + ASSERT_OK(RandomAccessFileReader::Create( + env->GetFileSystem(), fname, file_options, &file_reader, nullptr)); + const ImmutableOptions ioptions(options); + CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp, + GetSliceHash); + ASSERT_OK(reader.status()); + InternalIterator* it = reader.NewIterator( + ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized); + ASSERT_OK(it->status()); + ASSERT_TRUE(!it->Valid()); + it->SeekToFirst(); + int cnt = 0; + while (it->Valid()) { + ASSERT_OK(it->status()); + ASSERT_TRUE(Slice(keys[cnt]) == it->key()); + ASSERT_TRUE(Slice(values[cnt]) == it->value()); + ++cnt; + it->Next(); + } + ASSERT_EQ(static_cast(cnt), num_items); + + it->SeekToLast(); + cnt = static_cast(num_items) - 1; + ASSERT_TRUE(it->Valid()); + while (it->Valid()) { + ASSERT_OK(it->status()); + ASSERT_TRUE(Slice(keys[cnt]) == it->key()); + ASSERT_TRUE(Slice(values[cnt]) == it->value()); + --cnt; + it->Prev(); + } + ASSERT_EQ(cnt, -1); + + cnt = static_cast(num_items) / 2; + it->Seek(keys[cnt]); + while (it->Valid()) { + ASSERT_OK(it->status()); + ASSERT_TRUE(Slice(keys[cnt]) == it->key()); + ASSERT_TRUE(Slice(values[cnt]) == it->value()); + ++cnt; + it->Next(); + } + ASSERT_EQ(static_cast(cnt), num_items); + delete it; + + Arena arena; + it = reader.NewIterator(ReadOptions(), /*prefix_extractor=*/nullptr, &arena, + /*skip_filters=*/false, + TableReaderCaller::kUncategorized); + ASSERT_OK(it->status()); + ASSERT_TRUE(!it->Valid()); + it->Seek(keys[num_items / 2]); + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_TRUE(keys[num_items / 2] == it->key()); + ASSERT_TRUE(values[num_items / 2] == it->value()); + ASSERT_OK(it->status()); + it->~InternalIterator(); + } + + std::vector keys; + std::vector user_keys; + std::vector values; + uint64_t num_items; + std::string fname; + uint64_t file_size; + Options options; + Env* env; + FileOptions file_options; +}; + +TEST_F(CuckooReaderTest, FileNotMmaped) { + options.allow_mmap_reads = false; + ImmutableOptions ioptions(options); + CuckooTableReader reader(ioptions, nullptr, 0, nullptr, nullptr); + ASSERT_TRUE(reader.status().IsInvalidArgument()); + ASSERT_STREQ("File is not mmaped", reader.status().getState()); +} + +TEST_F(CuckooReaderTest, WhenKeyExists) { + SetUp(kNumHashFunc); + fname = test::PerThreadDBPath("CuckooReader_WhenKeyExists"); + for (uint64_t i = 0; i < num_items; i++) { + user_keys[i] = "key" + NumToStr(i); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + NumToStr(i); + // Give disjoint hash values. + AddHashLookups(user_keys[i], i, kNumHashFunc); + } + CreateCuckooFileAndCheckReader(); + // Last level file. + UpdateKeys(true); + CreateCuckooFileAndCheckReader(); + // Test with collision. Make all hash values collide. + hash_map.clear(); + for (uint32_t i = 0; i < num_items; i++) { + AddHashLookups(user_keys[i], 0, kNumHashFunc); + } + UpdateKeys(false); + CreateCuckooFileAndCheckReader(); + // Last level file. + UpdateKeys(true); + CreateCuckooFileAndCheckReader(); +} + +TEST_F(CuckooReaderTest, WhenKeyExistsWithUint64Comparator) { + SetUp(kNumHashFunc); + fname = test::PerThreadDBPath("CuckooReaderUint64_WhenKeyExists"); + for (uint64_t i = 0; i < num_items; i++) { + user_keys[i].resize(8); + memcpy(&user_keys[i][0], static_cast(&i), 8); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + NumToStr(i); + // Give disjoint hash values. + AddHashLookups(user_keys[i], i, kNumHashFunc); + } + CreateCuckooFileAndCheckReader(test::Uint64Comparator()); + // Last level file. + UpdateKeys(true); + CreateCuckooFileAndCheckReader(test::Uint64Comparator()); + // Test with collision. Make all hash values collide. + hash_map.clear(); + for (uint32_t i = 0; i < num_items; i++) { + AddHashLookups(user_keys[i], 0, kNumHashFunc); + } + UpdateKeys(false); + CreateCuckooFileAndCheckReader(test::Uint64Comparator()); + // Last level file. + UpdateKeys(true); + CreateCuckooFileAndCheckReader(test::Uint64Comparator()); +} + +TEST_F(CuckooReaderTest, CheckIterator) { + SetUp(2 * kNumHashFunc); + fname = test::PerThreadDBPath("CuckooReader_CheckIterator"); + for (uint64_t i = 0; i < num_items; i++) { + user_keys[i] = "key" + NumToStr(i); + ParsedInternalKey ikey(user_keys[i], 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + NumToStr(i); + // Give disjoint hash values, in reverse order. + AddHashLookups(user_keys[i], num_items - i - 1, kNumHashFunc); + } + CreateCuckooFileAndCheckReader(); + CheckIterator(); + // Last level file. + UpdateKeys(true); + CreateCuckooFileAndCheckReader(); + CheckIterator(); +} + +TEST_F(CuckooReaderTest, CheckIteratorUint64) { + SetUp(2 * kNumHashFunc); + fname = test::PerThreadDBPath("CuckooReader_CheckIterator"); + for (uint64_t i = 0; i < num_items; i++) { + user_keys[i].resize(8); + memcpy(&user_keys[i][0], static_cast(&i), 8); + ParsedInternalKey ikey(user_keys[i], 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + NumToStr(i); + // Give disjoint hash values, in reverse order. + AddHashLookups(user_keys[i], num_items - i - 1, kNumHashFunc); + } + CreateCuckooFileAndCheckReader(test::Uint64Comparator()); + CheckIterator(test::Uint64Comparator()); + // Last level file. + UpdateKeys(true); + CreateCuckooFileAndCheckReader(test::Uint64Comparator()); + CheckIterator(test::Uint64Comparator()); +} + +TEST_F(CuckooReaderTest, WhenKeyNotFound) { + // Add keys with colliding hash values. + SetUp(kNumHashFunc); + fname = test::PerThreadDBPath("CuckooReader_WhenKeyNotFound"); + for (uint64_t i = 0; i < num_items; i++) { + user_keys[i] = "key" + NumToStr(i); + ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); + AppendInternalKey(&keys[i], ikey); + values[i] = "value" + NumToStr(i); + // Make all hash values collide. + AddHashLookups(user_keys[i], 0, kNumHashFunc); + } + auto* ucmp = BytewiseComparator(); + CreateCuckooFileAndCheckReader(); + + std::unique_ptr file_reader; + ASSERT_OK(RandomAccessFileReader::Create( + env->GetFileSystem(), fname, file_options, &file_reader, nullptr)); + + const ImmutableOptions ioptions(options); + CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucmp, + GetSliceHash); + ASSERT_OK(reader.status()); + // Search for a key with colliding hash values. + std::string not_found_user_key = "key" + NumToStr(num_items); + std::string not_found_key; + AddHashLookups(not_found_user_key, 0, kNumHashFunc); + ParsedInternalKey ikey(not_found_user_key, 1000, kTypeValue); + AppendInternalKey(¬_found_key, ikey); + PinnableSlice value; + GetContext get_context(ucmp, nullptr, nullptr, nullptr, GetContext::kNotFound, + Slice(not_found_key), &value, nullptr, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + ASSERT_OK( + reader.Get(ReadOptions(), Slice(not_found_key), &get_context, nullptr)); + ASSERT_TRUE(value.empty()); + ASSERT_OK(reader.status()); + // Search for a key with an independent hash value. + std::string not_found_user_key2 = "key" + NumToStr(num_items + 1); + AddHashLookups(not_found_user_key2, kNumHashFunc, kNumHashFunc); + ParsedInternalKey ikey2(not_found_user_key2, 1000, kTypeValue); + std::string not_found_key2; + AppendInternalKey(¬_found_key2, ikey2); + value.Reset(); + GetContext get_context2(ucmp, nullptr, nullptr, nullptr, + GetContext::kNotFound, Slice(not_found_key2), &value, + nullptr, nullptr, nullptr, nullptr, true, nullptr, + nullptr); + ASSERT_OK( + reader.Get(ReadOptions(), Slice(not_found_key2), &get_context2, nullptr)); + ASSERT_TRUE(value.empty()); + ASSERT_OK(reader.status()); + + // Test read when key is unused key. + std::string unused_key = + reader.GetTableProperties()->user_collected_properties.at( + CuckooTablePropertyNames::kEmptyKey); + // Add hash values that map to empty buckets. + AddHashLookups(ExtractUserKey(unused_key).ToString(), kNumHashFunc, + kNumHashFunc); + value.Reset(); + GetContext get_context3( + ucmp, nullptr, nullptr, nullptr, GetContext::kNotFound, Slice(unused_key), + &value, nullptr, nullptr, nullptr, nullptr, true, nullptr, nullptr); + ASSERT_OK( + reader.Get(ReadOptions(), Slice(unused_key), &get_context3, nullptr)); + ASSERT_TRUE(value.empty()); + ASSERT_OK(reader.status()); +} + +// Performance tests +namespace { +void GetKeys(uint64_t num, std::vector* keys) { + keys->clear(); + IterKey k; + k.SetInternalKey("", 0, kTypeValue); + std::string internal_key_suffix = k.GetInternalKey().ToString(); + ASSERT_EQ(static_cast(8), internal_key_suffix.size()); + for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { + uint64_t value = 2 * key_idx; + std::string new_key(reinterpret_cast(&value), sizeof(value)); + new_key += internal_key_suffix; + keys->push_back(new_key); + } +} + +std::string GetFileName(uint64_t num) { + if (FLAGS_file_dir.empty()) { + FLAGS_file_dir = test::TmpDir(); + } + return test::PerThreadDBPath(FLAGS_file_dir, "cuckoo_read_benchmark") + + std::to_string(num / 1000000) + "Mkeys"; +} + +// Create last level file as we are interested in measuring performance of +// last level file only. +void WriteFile(const std::vector& keys, const uint64_t num, + double hash_ratio) { + Options options; + options.allow_mmap_reads = true; + const auto& fs = options.env->GetFileSystem(); + FileOptions file_options(options); + std::string fname = GetFileName(num); + + std::unique_ptr file_writer; + ASSERT_OK(WritableFileWriter::Create(fs, fname, file_options, &file_writer, + nullptr)); + CuckooTableBuilder builder( + file_writer.get(), hash_ratio, 64, 1000, test::Uint64Comparator(), 5, + false, FLAGS_identity_as_first_hash, nullptr, 0 /* column_family_id */, + kDefaultColumnFamilyName); + ASSERT_OK(builder.status()); + for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { + // Value is just a part of key. + builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], 4)); + ASSERT_EQ(builder.NumEntries(), key_idx + 1); + ASSERT_OK(builder.status()); + } + ASSERT_OK(builder.Finish()); + ASSERT_EQ(num, builder.NumEntries()); + ASSERT_OK(file_writer->Close()); + + uint64_t file_size; + ASSERT_OK( + fs->GetFileSize(fname, file_options.io_options, &file_size, nullptr)); + std::unique_ptr file_reader; + ASSERT_OK(RandomAccessFileReader::Create(fs, fname, file_options, + &file_reader, nullptr)); + + const ImmutableOptions ioptions(options); + CuckooTableReader reader(ioptions, std::move(file_reader), file_size, + test::Uint64Comparator(), nullptr); + ASSERT_OK(reader.status()); + ReadOptions r_options; + PinnableSlice value; + // Assume only the fast path is triggered + GetContext get_context(nullptr, nullptr, nullptr, nullptr, + GetContext::kNotFound, Slice(), &value, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + for (uint64_t i = 0; i < num; ++i) { + value.Reset(); + value.clear(); + ASSERT_OK(reader.Get(r_options, Slice(keys[i]), &get_context, nullptr)); + ASSERT_TRUE(Slice(keys[i]) == Slice(&keys[i][0], 4)); + } +} + +void ReadKeys(uint64_t num, uint32_t batch_size) { + Options options; + options.allow_mmap_reads = true; + Env* env = options.env; + const auto& fs = options.env->GetFileSystem(); + FileOptions file_options(options); + std::string fname = GetFileName(num); + + uint64_t file_size; + ASSERT_OK( + fs->GetFileSize(fname, file_options.io_options, &file_size, nullptr)); + std::unique_ptr file_reader; + ASSERT_OK(RandomAccessFileReader::Create(fs, fname, file_options, + &file_reader, nullptr)); + + const ImmutableOptions ioptions(options); + CuckooTableReader reader(ioptions, std::move(file_reader), file_size, + test::Uint64Comparator(), nullptr); + ASSERT_OK(reader.status()); + const UserCollectedProperties user_props = + reader.GetTableProperties()->user_collected_properties; + const uint32_t num_hash_fun = *reinterpret_cast( + user_props.at(CuckooTablePropertyNames::kNumHashFunc).data()); + const uint64_t table_size = *reinterpret_cast( + user_props.at(CuckooTablePropertyNames::kHashTableSize).data()); + fprintf(stderr, + "With %" PRIu64 + " items, utilization is %.2f%%, number of" + " hash functions: %u.\n", + num, num * 100.0 / (table_size), num_hash_fun); + ReadOptions r_options; + + std::vector keys; + keys.reserve(num); + for (uint64_t i = 0; i < num; ++i) { + keys.push_back(2 * i); + } + RandomShuffle(keys.begin(), keys.end()); + + PinnableSlice value; + // Assume only the fast path is triggered + GetContext get_context(nullptr, nullptr, nullptr, nullptr, + GetContext::kNotFound, Slice(), &value, nullptr, + nullptr, nullptr, true, nullptr, nullptr); + uint64_t start_time = env->NowMicros(); + if (batch_size > 0) { + for (uint64_t i = 0; i < num; i += batch_size) { + for (uint64_t j = i; j < i + batch_size && j < num; ++j) { + reader.Prepare(Slice(reinterpret_cast(&keys[j]), 16)); + } + for (uint64_t j = i; j < i + batch_size && j < num; ++j) { + reader.Get(r_options, Slice(reinterpret_cast(&keys[j]), 16), + &get_context, nullptr); + } + } + } else { + for (uint64_t i = 0; i < num; i++) { + reader.Get(r_options, Slice(reinterpret_cast(&keys[i]), 16), + &get_context, nullptr); + } + } + float time_per_op = (env->NowMicros() - start_time) * 1.0f / num; + fprintf(stderr, + "Time taken per op is %.3fus (%.1f Mqps) with batch size of %u\n", + time_per_op, 1.0 / time_per_op, batch_size); +} +} // namespace. + +TEST_F(CuckooReaderTest, TestReadPerformance) { + if (!FLAGS_enable_perf) { + return; + } + double hash_ratio = 0.95; + // These numbers are chosen to have a hash utilization % close to + // 0.9, 0.75, 0.6 and 0.5 respectively. + // They all create 128 M buckets. + std::vector nums = {120 * 1024 * 1024, 100 * 1024 * 1024, + 80 * 1024 * 1024, 70 * 1024 * 1024}; +#ifndef NDEBUG + fprintf( + stdout, + "WARNING: Not compiled with DNDEBUG. Performance tests may be slow.\n"); +#endif + for (uint64_t num : nums) { + if (FLAGS_write || + Env::Default()->FileExists(GetFileName(num)).IsNotFound()) { + std::vector all_keys; + GetKeys(num, &all_keys); + WriteFile(all_keys, num, hash_ratio); + } + ReadKeys(num, 0); + ReadKeys(num, 10); + ReadKeys(num, 25); + ReadKeys(num, 50); + ReadKeys(num, 100); + fprintf(stderr, "\n"); + } +} +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + if (ROCKSDB_NAMESPACE::port::kLittleEndian) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + ParseCommandLineFlags(&argc, &argv, true); + return RUN_ALL_TESTS(); + } else { + fprintf(stderr, "SKIPPED as Cuckoo table doesn't support Big Endian\n"); + return 0; + } +} + +#endif // GFLAGS. + +#else +#include + +int main(int /*argc*/, char** /*argv*/) { + fprintf(stderr, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n"); + return 0; +} + +#endif // ROCKSDB_LITE -- cgit v1.2.3