diff options
Diffstat (limited to '')
-rw-r--r-- | src/rocksdb/memtable/skiplistrep.cc | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/src/rocksdb/memtable/skiplistrep.cc b/src/rocksdb/memtable/skiplistrep.cc new file mode 100644 index 000000000..40f13a2c1 --- /dev/null +++ b/src/rocksdb/memtable/skiplistrep.cc @@ -0,0 +1,370 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +#include <random> + +#include "db/memtable.h" +#include "memory/arena.h" +#include "memtable/inlineskiplist.h" +#include "rocksdb/memtablerep.h" +#include "rocksdb/utilities/options_type.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { +namespace { +class SkipListRep : public MemTableRep { + InlineSkipList<const MemTableRep::KeyComparator&> skip_list_; + const MemTableRep::KeyComparator& cmp_; + const SliceTransform* transform_; + const size_t lookahead_; + + friend class LookaheadIterator; + + public: + explicit SkipListRep(const MemTableRep::KeyComparator& compare, + Allocator* allocator, const SliceTransform* transform, + const size_t lookahead) + : MemTableRep(allocator), + skip_list_(compare, allocator), + cmp_(compare), + transform_(transform), + lookahead_(lookahead) {} + + KeyHandle Allocate(const size_t len, char** buf) override { + *buf = skip_list_.AllocateKey(len); + return static_cast<KeyHandle>(*buf); + } + + // Insert key into the list. + // REQUIRES: nothing that compares equal to key is currently in the list. + void Insert(KeyHandle handle) override { + skip_list_.Insert(static_cast<char*>(handle)); + } + + bool InsertKey(KeyHandle handle) override { + return skip_list_.Insert(static_cast<char*>(handle)); + } + + void InsertWithHint(KeyHandle handle, void** hint) override { + skip_list_.InsertWithHint(static_cast<char*>(handle), hint); + } + + bool InsertKeyWithHint(KeyHandle handle, void** hint) override { + return skip_list_.InsertWithHint(static_cast<char*>(handle), hint); + } + + void InsertWithHintConcurrently(KeyHandle handle, void** hint) override { + skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), hint); + } + + bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) override { + return skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), + hint); + } + + void InsertConcurrently(KeyHandle handle) override { + skip_list_.InsertConcurrently(static_cast<char*>(handle)); + } + + bool InsertKeyConcurrently(KeyHandle handle) override { + return skip_list_.InsertConcurrently(static_cast<char*>(handle)); + } + + // Returns true iff an entry that compares equal to key is in the list. + bool Contains(const char* key) const override { + return skip_list_.Contains(key); + } + + size_t ApproximateMemoryUsage() override { + // All memory is allocated through allocator; nothing to report here + return 0; + } + + void Get(const LookupKey& k, void* callback_args, + bool (*callback_func)(void* arg, const char* entry)) override { + SkipListRep::Iterator iter(&skip_list_); + Slice dummy_slice; + for (iter.Seek(dummy_slice, k.memtable_key().data()); + iter.Valid() && callback_func(callback_args, iter.key()); + iter.Next()) { + } + } + + uint64_t ApproximateNumEntries(const Slice& start_ikey, + const Slice& end_ikey) override { + std::string tmp; + uint64_t start_count = + skip_list_.EstimateCount(EncodeKey(&tmp, start_ikey)); + uint64_t end_count = skip_list_.EstimateCount(EncodeKey(&tmp, end_ikey)); + return (end_count >= start_count) ? (end_count - start_count) : 0; + } + + void UniqueRandomSample(const uint64_t num_entries, + const uint64_t target_sample_size, + std::unordered_set<const char*>* entries) override { + entries->clear(); + // Avoid divide-by-0. + assert(target_sample_size > 0); + assert(num_entries > 0); + // NOTE: the size of entries is not enforced to be exactly + // target_sample_size at the end of this function, it might be slightly + // greater or smaller. + SkipListRep::Iterator iter(&skip_list_); + // There are two methods to create the subset of samples (size m) + // from the table containing N elements: + // 1-Iterate linearly through the N memtable entries. For each entry i, + // add it to the sample set with a probability + // (target_sample_size - entries.size() ) / (N-i). + // + // 2-Pick m random elements without repetition. + // We pick Option 2 when m<sqrt(N) and + // Option 1 when m > sqrt(N). + if (target_sample_size > + static_cast<uint64_t>(std::sqrt(1.0 * num_entries))) { + Random* rnd = Random::GetTLSInstance(); + iter.SeekToFirst(); + uint64_t counter = 0, num_samples_left = target_sample_size; + for (; iter.Valid() && (num_samples_left > 0); iter.Next(), counter++) { + // Add entry to sample set with probability + // num_samples_left/(num_entries - counter). + if (rnd->Next() % (num_entries - counter) < num_samples_left) { + entries->insert(iter.key()); + num_samples_left--; + } + } + } else { + // Option 2: pick m random elements with no duplicates. + // If Option 2 is picked, then target_sample_size<sqrt(N) + // Using a set spares the need to check for duplicates. + for (uint64_t i = 0; i < target_sample_size; i++) { + // We give it 5 attempts to find a non-duplicate + // With 5 attempts, the chances of returning `entries` set + // of size target_sample_size is: + // PROD_{i=1}^{target_sample_size-1} [1-(i/N)^5] + // which is monotonically increasing with N in the worse case + // of target_sample_size=sqrt(N), and is always >99.9% for N>4. + // At worst, for the final pick , when m=sqrt(N) there is + // a probability of p= 1/sqrt(N) chances to find a duplicate. + for (uint64_t j = 0; j < 5; j++) { + iter.RandomSeek(); + // unordered_set::insert returns pair<iterator, bool>. + // The second element is true if an insert successfully happened. + // If element is already in the set, this bool will be false, and + // true otherwise. + if ((entries->insert(iter.key())).second) { + break; + } + } + } + } + } + + ~SkipListRep() override {} + + // Iteration over the contents of a skip list + class Iterator : public MemTableRep::Iterator { + InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_; + + public: + // Initialize an iterator over the specified list. + // The returned iterator is not valid. + explicit Iterator( + const InlineSkipList<const MemTableRep::KeyComparator&>* list) + : iter_(list) {} + + ~Iterator() override {} + + // Returns true iff the iterator is positioned at a valid node. + bool Valid() const override { return iter_.Valid(); } + + // Returns the key at the current position. + // REQUIRES: Valid() + const char* key() const override { return iter_.key(); } + + // Advances to the next position. + // REQUIRES: Valid() + void Next() override { iter_.Next(); } + + // Advances to the previous position. + // REQUIRES: Valid() + void Prev() override { iter_.Prev(); } + + // Advance to the first entry with a key >= target + void Seek(const Slice& user_key, const char* memtable_key) override { + if (memtable_key != nullptr) { + iter_.Seek(memtable_key); + } else { + iter_.Seek(EncodeKey(&tmp_, user_key)); + } + } + + // Retreat to the last entry with a key <= target + void SeekForPrev(const Slice& user_key, const char* memtable_key) override { + if (memtable_key != nullptr) { + iter_.SeekForPrev(memtable_key); + } else { + iter_.SeekForPrev(EncodeKey(&tmp_, user_key)); + } + } + + void RandomSeek() override { iter_.RandomSeek(); } + + // Position at the first entry in list. + // Final state of iterator is Valid() iff list is not empty. + void SeekToFirst() override { iter_.SeekToFirst(); } + + // Position at the last entry in list. + // Final state of iterator is Valid() iff list is not empty. + void SeekToLast() override { iter_.SeekToLast(); } + + protected: + std::string tmp_; // For passing to EncodeKey + }; + + // Iterator over the contents of a skip list which also keeps track of the + // previously visited node. In Seek(), it examines a few nodes after it + // first, falling back to O(log n) search from the head of the list only if + // the target key hasn't been found. + class LookaheadIterator : public MemTableRep::Iterator { + public: + explicit LookaheadIterator(const SkipListRep& rep) + : rep_(rep), iter_(&rep_.skip_list_), prev_(iter_) {} + + ~LookaheadIterator() override {} + + bool Valid() const override { return iter_.Valid(); } + + const char* key() const override { + assert(Valid()); + return iter_.key(); + } + + void Next() override { + assert(Valid()); + + bool advance_prev = true; + if (prev_.Valid()) { + auto k1 = rep_.UserKey(prev_.key()); + auto k2 = rep_.UserKey(iter_.key()); + + if (k1.compare(k2) == 0) { + // same user key, don't move prev_ + advance_prev = false; + } else if (rep_.transform_) { + // only advance prev_ if it has the same prefix as iter_ + auto t1 = rep_.transform_->Transform(k1); + auto t2 = rep_.transform_->Transform(k2); + advance_prev = t1.compare(t2) == 0; + } + } + + if (advance_prev) { + prev_ = iter_; + } + iter_.Next(); + } + + void Prev() override { + assert(Valid()); + iter_.Prev(); + prev_ = iter_; + } + + void Seek(const Slice& internal_key, const char* memtable_key) override { + const char* encoded_key = (memtable_key != nullptr) + ? memtable_key + : EncodeKey(&tmp_, internal_key); + + if (prev_.Valid() && rep_.cmp_(encoded_key, prev_.key()) >= 0) { + // prev_.key() is smaller or equal to our target key; do a quick + // linear search (at most lookahead_ steps) starting from prev_ + iter_ = prev_; + + size_t cur = 0; + while (cur++ <= rep_.lookahead_ && iter_.Valid()) { + if (rep_.cmp_(encoded_key, iter_.key()) <= 0) { + return; + } + Next(); + } + } + + iter_.Seek(encoded_key); + prev_ = iter_; + } + + void SeekForPrev(const Slice& internal_key, + const char* memtable_key) override { + const char* encoded_key = (memtable_key != nullptr) + ? memtable_key + : EncodeKey(&tmp_, internal_key); + iter_.SeekForPrev(encoded_key); + prev_ = iter_; + } + + void SeekToFirst() override { + iter_.SeekToFirst(); + prev_ = iter_; + } + + void SeekToLast() override { + iter_.SeekToLast(); + prev_ = iter_; + } + + protected: + std::string tmp_; // For passing to EncodeKey + + private: + const SkipListRep& rep_; + InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_; + InlineSkipList<const MemTableRep::KeyComparator&>::Iterator prev_; + }; + + MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override { + if (lookahead_ > 0) { + void* mem = + arena ? arena->AllocateAligned(sizeof(SkipListRep::LookaheadIterator)) + : + operator new(sizeof(SkipListRep::LookaheadIterator)); + return new (mem) SkipListRep::LookaheadIterator(*this); + } else { + void* mem = arena ? arena->AllocateAligned(sizeof(SkipListRep::Iterator)) + : + operator new(sizeof(SkipListRep::Iterator)); + return new (mem) SkipListRep::Iterator(&skip_list_); + } + } +}; +} // namespace + +static std::unordered_map<std::string, OptionTypeInfo> skiplist_factory_info = { +#ifndef ROCKSDB_LITE + {"lookahead", + {0, OptionType::kSizeT, OptionVerificationType::kNormal, + OptionTypeFlags::kDontSerialize /*Since it is part of the ID*/}}, +#endif +}; + +SkipListFactory::SkipListFactory(size_t lookahead) : lookahead_(lookahead) { + RegisterOptions("SkipListFactoryOptions", &lookahead_, + &skiplist_factory_info); +} + +std::string SkipListFactory::GetId() const { + std::string id = Name(); + if (lookahead_ > 0) { + id.append(":").append(std::to_string(lookahead_)); + } + return id; +} + +MemTableRep* SkipListFactory::CreateMemTableRep( + const MemTableRep::KeyComparator& compare, Allocator* allocator, + const SliceTransform* transform, Logger* /*logger*/) { + return new SkipListRep(compare, allocator, transform, lookahead_); +} + +} // namespace ROCKSDB_NAMESPACE |