summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc640
1 files changed, 640 insertions, 0 deletions
diff --git a/src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc b/src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc
new file mode 100644
index 000000000..be1c62117
--- /dev/null
+++ b/src/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc
@@ -0,0 +1,640 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+
+#ifndef ROCKSDB_LITE
+
+#include "table/cuckoo/cuckoo_table_builder.h"
+
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "file/random_access_file_reader.h"
+#include "file/writable_file_writer.h"
+#include "rocksdb/db.h"
+#include "rocksdb/file_system.h"
+#include "table/meta_blocks.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+
+namespace ROCKSDB_NAMESPACE {
+extern const uint64_t kCuckooTableMagicNumber;
+
+namespace {
+std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
+
+uint64_t GetSliceHash(const Slice& s, uint32_t index,
+ uint64_t /*max_num_buckets*/) {
+ return hash_map[s.ToString()][index];
+}
+} // namespace
+
+class CuckooBuilderTest : public testing::Test {
+ public:
+ CuckooBuilderTest() {
+ env_ = Env::Default();
+ Options options;
+ options.allow_mmap_reads = true;
+ file_options_ = FileOptions(options);
+ }
+
+ void CheckFileContents(const std::vector<std::string>& keys,
+ const std::vector<std::string>& values,
+ const std::vector<uint64_t>& expected_locations,
+ std::string expected_unused_bucket,
+ uint64_t expected_table_size,
+ uint32_t expected_num_hash_func,
+ bool expected_is_last_level,
+ uint32_t expected_cuckoo_block_size = 1) {
+ uint64_t num_deletions = 0;
+ for (const auto& key : keys) {
+ ParsedInternalKey parsed;
+ Status pik_status =
+ ParseInternalKey(key, &parsed, true /* log_err_key */);
+ if (pik_status.ok() && parsed.type == kTypeDeletion) {
+ num_deletions++;
+ }
+ }
+ // Read file
+ uint64_t read_file_size;
+ ASSERT_OK(env_->GetFileSize(fname, &read_file_size));
+ std::unique_ptr<RandomAccessFileReader> file_reader;
+ ASSERT_OK(RandomAccessFileReader::Create(
+ env_->GetFileSystem(), fname, file_options_, &file_reader, nullptr));
+
+ Options options;
+ options.allow_mmap_reads = true;
+ ImmutableOptions ioptions(options);
+
+ // Assert Table Properties.
+ std::unique_ptr<TableProperties> props;
+ ASSERT_OK(ReadTableProperties(file_reader.get(), read_file_size,
+ kCuckooTableMagicNumber, ioptions, &props));
+ // Check unused bucket.
+ std::string unused_key =
+ props->user_collected_properties[CuckooTablePropertyNames::kEmptyKey];
+ ASSERT_EQ(expected_unused_bucket.substr(0, props->fixed_key_len),
+ unused_key);
+
+ uint64_t value_len_found = *reinterpret_cast<const uint64_t*>(
+ props->user_collected_properties[CuckooTablePropertyNames::kValueLength]
+ .data());
+ ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found);
+ ASSERT_EQ(props->raw_value_size, values.size() * value_len_found);
+ const uint64_t table_size = *reinterpret_cast<const uint64_t*>(
+ props
+ ->user_collected_properties
+ [CuckooTablePropertyNames::kHashTableSize]
+ .data());
+ ASSERT_EQ(expected_table_size, table_size);
+ const uint32_t num_hash_func_found = *reinterpret_cast<const uint32_t*>(
+ props->user_collected_properties[CuckooTablePropertyNames::kNumHashFunc]
+ .data());
+ ASSERT_EQ(expected_num_hash_func, num_hash_func_found);
+ const uint32_t cuckoo_block_size = *reinterpret_cast<const uint32_t*>(
+ props
+ ->user_collected_properties
+ [CuckooTablePropertyNames::kCuckooBlockSize]
+ .data());
+ ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size);
+ const bool is_last_level_found = *reinterpret_cast<const bool*>(
+ props->user_collected_properties[CuckooTablePropertyNames::kIsLastLevel]
+ .data());
+ ASSERT_EQ(expected_is_last_level, is_last_level_found);
+
+ ASSERT_EQ(props->num_entries, keys.size());
+ ASSERT_EQ(props->num_deletions, num_deletions);
+ ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
+ ASSERT_EQ(props->data_size,
+ expected_unused_bucket.size() *
+ (expected_table_size + expected_cuckoo_block_size - 1));
+ ASSERT_EQ(props->raw_key_size, keys.size() * props->fixed_key_len);
+ ASSERT_EQ(props->column_family_id, 0);
+ ASSERT_EQ(props->column_family_name, kDefaultColumnFamilyName);
+
+ // Check contents of the bucket.
+ std::vector<bool> keys_found(keys.size(), false);
+ size_t bucket_size = expected_unused_bucket.size();
+ for (uint32_t i = 0; i + 1 < table_size + cuckoo_block_size; ++i) {
+ Slice read_slice;
+ ASSERT_OK(file_reader->Read(IOOptions(), i * bucket_size, bucket_size,
+ &read_slice, nullptr, nullptr,
+ Env::IO_TOTAL /* rate_limiter_priority */));
+ size_t key_idx =
+ std::find(expected_locations.begin(), expected_locations.end(), i) -
+ expected_locations.begin();
+ if (key_idx == keys.size()) {
+ // i is not one of the expected locations. Empty bucket.
+ if (read_slice.data() == nullptr) {
+ ASSERT_EQ(0, expected_unused_bucket.size());
+ } else {
+ ASSERT_EQ(read_slice.compare(expected_unused_bucket), 0);
+ }
+ } else {
+ keys_found[key_idx] = true;
+ ASSERT_EQ(read_slice.compare(keys[key_idx] + values[key_idx]), 0);
+ }
+ }
+ for (auto key_found : keys_found) {
+ // Check that all keys wereReader found.
+ ASSERT_TRUE(key_found);
+ }
+ }
+
+ std::string GetInternalKey(Slice user_key, bool zero_seqno,
+ ValueType type = kTypeValue) {
+ IterKey ikey;
+ ikey.SetInternalKey(user_key, zero_seqno ? 0 : 1000, type);
+ return ikey.GetInternalKey().ToString();
+ }
+
+ uint64_t NextPowOf2(uint64_t num) {
+ uint64_t n = 2;
+ while (n <= num) {
+ n *= 2;
+ }
+ return n;
+ }
+
+ uint64_t GetExpectedTableSize(uint64_t num) {
+ return NextPowOf2(static_cast<uint64_t>(num / kHashTableRatio));
+ }
+
+ Env* env_;
+ FileOptions file_options_;
+ std::string fname;
+ const double kHashTableRatio = 0.9;
+};
+
+TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) {
+ std::unique_ptr<WritableFile> writable_file;
+ fname = test::PerThreadDBPath("EmptyFile");
+ std::unique_ptr<WritableFileWriter> file_writer;
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, 4, 100,
+ BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ ASSERT_EQ(0UL, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ CheckFileContents({}, {}, {}, "", 2, 2, false);
+}
+
+TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
+ for (auto type : {kTypeValue, kTypeDeletion}) {
+ uint32_t num_hash_fun = 4;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
+ std::vector<std::string> values;
+ if (type == kTypeValue) {
+ values = {"v01", "v02", "v03", "v04"};
+ } else {
+ values = {"", "", "", ""};
+ }
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1, 2, 3}},
+ {user_keys[1], {1, 2, 3, 4}},
+ {user_keys[2], {2, 3, 4, 5}},
+ {user_keys[3], {3, 4, 5, 6}}};
+ hash_map = std::move(hm);
+
+ std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
+ std::vector<std::string> keys;
+ for (auto& user_key : user_keys) {
+ keys.push_back(GetInternalKey(user_key, false, type));
+ }
+ uint64_t expected_table_size = GetExpectedTableSize(keys.size());
+
+ fname = test::PerThreadDBPath("NoCollisionFullKey");
+ std::unique_ptr<WritableFileWriter> file_writer;
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 100, BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(keys[i]), Slice(values[i]));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ size_t bucket_size = keys[0].size() + values[0].size();
+ ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
+
+ std::string expected_unused_bucket = GetInternalKey("key00", true);
+ expected_unused_bucket += std::string(values[0].size(), 'a');
+ CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
+ expected_table_size, 2, false);
+ }
+}
+
+TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
+ uint32_t num_hash_fun = 4;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
+ std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1, 2, 3}},
+ {user_keys[1], {0, 1, 2, 3}},
+ {user_keys[2], {0, 1, 2, 3}},
+ {user_keys[3], {0, 1, 2, 3}},
+ };
+ hash_map = std::move(hm);
+
+ std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
+ std::vector<std::string> keys;
+ for (auto& user_key : user_keys) {
+ keys.push_back(GetInternalKey(user_key, false));
+ }
+ uint64_t expected_table_size = GetExpectedTableSize(keys.size());
+
+ fname = test::PerThreadDBPath("WithCollisionFullKey");
+ std::unique_ptr<WritableFileWriter> file_writer;
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 100, BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(keys[i]), Slice(values[i]));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ size_t bucket_size = keys[0].size() + values[0].size();
+ ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
+
+ std::string expected_unused_bucket = GetInternalKey("key00", true);
+ expected_unused_bucket += std::string(values[0].size(), 'a');
+ CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
+ expected_table_size, 4, false);
+}
+
+TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
+ uint32_t num_hash_fun = 4;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
+ std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1, 2, 3}},
+ {user_keys[1], {0, 1, 2, 3}},
+ {user_keys[2], {0, 1, 2, 3}},
+ {user_keys[3], {0, 1, 2, 3}},
+ };
+ hash_map = std::move(hm);
+
+ std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
+ std::vector<std::string> keys;
+ for (auto& user_key : user_keys) {
+ keys.push_back(GetInternalKey(user_key, false));
+ }
+ uint64_t expected_table_size = GetExpectedTableSize(keys.size());
+
+ std::unique_ptr<WritableFileWriter> file_writer;
+ uint32_t cuckoo_block_size = 2;
+ fname = test::PerThreadDBPath("WithCollisionFullKey2");
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+ CuckooTableBuilder builder(
+ file_writer.get(), kHashTableRatio, num_hash_fun, 100,
+ BytewiseComparator(), cuckoo_block_size, false, false, GetSliceHash,
+ 0 /* column_family_id */, kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(keys[i]), Slice(values[i]));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ size_t bucket_size = keys[0].size() + values[0].size();
+ ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
+
+ std::string expected_unused_bucket = GetInternalKey("key00", true);
+ expected_unused_bucket += std::string(values[0].size(), 'a');
+ CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
+ expected_table_size, 3, false, cuckoo_block_size);
+}
+
+TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) {
+ // Have two hash functions. Insert elements with overlapping hashes.
+ // Finally insert an element with hash value somewhere in the middle
+ // so that it displaces all the elements after that.
+ uint32_t num_hash_fun = 2;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04",
+ "key05"};
+ std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}},
+ {user_keys[3], {3, 4}}, {user_keys[4], {0, 2}},
+ };
+ hash_map = std::move(hm);
+
+ std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
+ std::vector<std::string> keys;
+ for (auto& user_key : user_keys) {
+ keys.push_back(GetInternalKey(user_key, false));
+ }
+ uint64_t expected_table_size = GetExpectedTableSize(keys.size());
+
+ std::unique_ptr<WritableFileWriter> file_writer;
+ fname = test::PerThreadDBPath("WithCollisionPathFullKey");
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 100, BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(keys[i]), Slice(values[i]));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ size_t bucket_size = keys[0].size() + values[0].size();
+ ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
+
+ std::string expected_unused_bucket = GetInternalKey("key00", true);
+ expected_unused_bucket += std::string(values[0].size(), 'a');
+ CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
+ expected_table_size, 2, false);
+}
+
+TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
+ uint32_t num_hash_fun = 2;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04",
+ "key05"};
+ std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {3, 4}},
+ {user_keys[3], {4, 5}}, {user_keys[4], {0, 3}},
+ };
+ hash_map = std::move(hm);
+
+ std::vector<uint64_t> expected_locations = {2, 1, 3, 4, 0};
+ std::vector<std::string> keys;
+ for (auto& user_key : user_keys) {
+ keys.push_back(GetInternalKey(user_key, false));
+ }
+ uint64_t expected_table_size = GetExpectedTableSize(keys.size());
+
+ std::unique_ptr<WritableFileWriter> file_writer;
+ fname = test::PerThreadDBPath("WithCollisionPathFullKeyAndCuckooBlock");
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 100, BytewiseComparator(), 2, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(keys[i]), Slice(values[i]));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ size_t bucket_size = keys[0].size() + values[0].size();
+ ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
+
+ std::string expected_unused_bucket = GetInternalKey("key00", true);
+ expected_unused_bucket += std::string(values[0].size(), 'a');
+ CheckFileContents(keys, values, expected_locations, expected_unused_bucket,
+ expected_table_size, 2, false, 2);
+}
+
+TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
+ uint32_t num_hash_fun = 4;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
+ std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1, 2, 3}},
+ {user_keys[1], {1, 2, 3, 4}},
+ {user_keys[2], {2, 3, 4, 5}},
+ {user_keys[3], {3, 4, 5, 6}}};
+ hash_map = std::move(hm);
+
+ std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
+ uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
+
+ std::unique_ptr<WritableFileWriter> file_writer;
+ fname = test::PerThreadDBPath("NoCollisionUserKey");
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 100, BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ size_t bucket_size = user_keys[0].size() + values[0].size();
+ ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
+
+ std::string expected_unused_bucket = "key00";
+ expected_unused_bucket += std::string(values[0].size(), 'a');
+ CheckFileContents(user_keys, values, expected_locations,
+ expected_unused_bucket, expected_table_size, 2, true);
+}
+
+TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
+ uint32_t num_hash_fun = 4;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
+ std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1, 2, 3}},
+ {user_keys[1], {0, 1, 2, 3}},
+ {user_keys[2], {0, 1, 2, 3}},
+ {user_keys[3], {0, 1, 2, 3}},
+ };
+ hash_map = std::move(hm);
+
+ std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
+ uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
+
+ std::unique_ptr<WritableFileWriter> file_writer;
+ fname = test::PerThreadDBPath("WithCollisionUserKey");
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 100, BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ size_t bucket_size = user_keys[0].size() + values[0].size();
+ ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
+
+ std::string expected_unused_bucket = "key00";
+ expected_unused_bucket += std::string(values[0].size(), 'a');
+ CheckFileContents(user_keys, values, expected_locations,
+ expected_unused_bucket, expected_table_size, 4, true);
+}
+
+TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) {
+ uint32_t num_hash_fun = 2;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04",
+ "key05"};
+ std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}},
+ {user_keys[3], {3, 4}}, {user_keys[4], {0, 2}},
+ };
+ hash_map = std::move(hm);
+
+ std::vector<uint64_t> expected_locations = {0, 1, 3, 4, 2};
+ uint64_t expected_table_size = GetExpectedTableSize(user_keys.size());
+
+ std::unique_ptr<WritableFileWriter> file_writer;
+ fname = test::PerThreadDBPath("WithCollisionPathUserKey");
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 2, BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ size_t bucket_size = user_keys[0].size() + values[0].size();
+ ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
+ ASSERT_OK(builder.Finish());
+ ASSERT_OK(file_writer->Close());
+ ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
+
+ std::string expected_unused_bucket = "key00";
+ expected_unused_bucket += std::string(values[0].size(), 'a');
+ CheckFileContents(user_keys, values, expected_locations,
+ expected_unused_bucket, expected_table_size, 2, true);
+}
+
+TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
+ // Have two hash functions. Insert elements with overlapping hashes.
+ // Finally try inserting an element with hash value somewhere in the middle
+ // and it should fail because the no. of elements to displace is too high.
+ uint32_t num_hash_fun = 2;
+ std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04",
+ "key05"};
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {user_keys[0], {0, 1}}, {user_keys[1], {1, 2}}, {user_keys[2], {2, 3}},
+ {user_keys[3], {3, 4}}, {user_keys[4], {0, 1}},
+ };
+ hash_map = std::move(hm);
+
+ std::unique_ptr<WritableFileWriter> file_writer;
+ fname = test::PerThreadDBPath("WithCollisionPathUserKey");
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 2, BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+ for (uint32_t i = 0; i < user_keys.size(); i++) {
+ builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
+ ASSERT_EQ(builder.NumEntries(), i + 1);
+ ASSERT_OK(builder.status());
+ }
+ ASSERT_TRUE(builder.Finish().IsNotSupported());
+ ASSERT_OK(file_writer->Close());
+}
+
+TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) {
+ // Need to have a temporary variable here as VS compiler does not currently
+ // support operator= with initializer_list as a parameter
+ std::unordered_map<std::string, std::vector<uint64_t>> hm = {
+ {"repeatedkey", {0, 1, 2, 3}}};
+ hash_map = std::move(hm);
+ uint32_t num_hash_fun = 4;
+ std::string user_key = "repeatedkey";
+
+ std::unique_ptr<WritableFileWriter> file_writer;
+ fname = test::PerThreadDBPath("FailWhenSameKeyInserted");
+ ASSERT_OK(WritableFileWriter::Create(env_->GetFileSystem(), fname,
+ file_options_, &file_writer, nullptr));
+ CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
+ 100, BytewiseComparator(), 1, false, false,
+ GetSliceHash, 0 /* column_family_id */,
+ kDefaultColumnFamilyName);
+ ASSERT_OK(builder.status());
+
+ builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
+ ASSERT_EQ(builder.NumEntries(), 1u);
+ ASSERT_OK(builder.status());
+ builder.Add(Slice(GetInternalKey(user_key, true)), Slice("value2"));
+ ASSERT_EQ(builder.NumEntries(), 2u);
+ ASSERT_OK(builder.status());
+
+ ASSERT_TRUE(builder.Finish().IsNotSupported());
+ ASSERT_OK(file_writer->Close());
+}
+} // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+ ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
+#else
+#include <stdio.h>
+
+int main(int /*argc*/, char** /*argv*/) {
+ fprintf(stderr, "SKIPPED as Cuckoo table is not supported in ROCKSDB_LITE\n");
+ return 0;
+}
+
+#endif // ROCKSDB_LITE