summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/memtable/skiplistrep.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/rocksdb/memtable/skiplistrep.cc')
-rw-r--r--src/rocksdb/memtable/skiplistrep.cc370
1 files changed, 370 insertions, 0 deletions
diff --git a/src/rocksdb/memtable/skiplistrep.cc b/src/rocksdb/memtable/skiplistrep.cc
new file mode 100644
index 000000000..40f13a2c1
--- /dev/null
+++ b/src/rocksdb/memtable/skiplistrep.cc
@@ -0,0 +1,370 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+#include <random>
+
+#include "db/memtable.h"
+#include "memory/arena.h"
+#include "memtable/inlineskiplist.h"
+#include "rocksdb/memtablerep.h"
+#include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace {
+class SkipListRep : public MemTableRep {
+ InlineSkipList<const MemTableRep::KeyComparator&> skip_list_;
+ const MemTableRep::KeyComparator& cmp_;
+ const SliceTransform* transform_;
+ const size_t lookahead_;
+
+ friend class LookaheadIterator;
+
+ public:
+ explicit SkipListRep(const MemTableRep::KeyComparator& compare,
+ Allocator* allocator, const SliceTransform* transform,
+ const size_t lookahead)
+ : MemTableRep(allocator),
+ skip_list_(compare, allocator),
+ cmp_(compare),
+ transform_(transform),
+ lookahead_(lookahead) {}
+
+ KeyHandle Allocate(const size_t len, char** buf) override {
+ *buf = skip_list_.AllocateKey(len);
+ return static_cast<KeyHandle>(*buf);
+ }
+
+ // Insert key into the list.
+ // REQUIRES: nothing that compares equal to key is currently in the list.
+ void Insert(KeyHandle handle) override {
+ skip_list_.Insert(static_cast<char*>(handle));
+ }
+
+ bool InsertKey(KeyHandle handle) override {
+ return skip_list_.Insert(static_cast<char*>(handle));
+ }
+
+ void InsertWithHint(KeyHandle handle, void** hint) override {
+ skip_list_.InsertWithHint(static_cast<char*>(handle), hint);
+ }
+
+ bool InsertKeyWithHint(KeyHandle handle, void** hint) override {
+ return skip_list_.InsertWithHint(static_cast<char*>(handle), hint);
+ }
+
+ void InsertWithHintConcurrently(KeyHandle handle, void** hint) override {
+ skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), hint);
+ }
+
+ bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) override {
+ return skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle),
+ hint);
+ }
+
+ void InsertConcurrently(KeyHandle handle) override {
+ skip_list_.InsertConcurrently(static_cast<char*>(handle));
+ }
+
+ bool InsertKeyConcurrently(KeyHandle handle) override {
+ return skip_list_.InsertConcurrently(static_cast<char*>(handle));
+ }
+
+ // Returns true iff an entry that compares equal to key is in the list.
+ bool Contains(const char* key) const override {
+ return skip_list_.Contains(key);
+ }
+
+ size_t ApproximateMemoryUsage() override {
+ // All memory is allocated through allocator; nothing to report here
+ return 0;
+ }
+
+ void Get(const LookupKey& k, void* callback_args,
+ bool (*callback_func)(void* arg, const char* entry)) override {
+ SkipListRep::Iterator iter(&skip_list_);
+ Slice dummy_slice;
+ for (iter.Seek(dummy_slice, k.memtable_key().data());
+ iter.Valid() && callback_func(callback_args, iter.key());
+ iter.Next()) {
+ }
+ }
+
+ uint64_t ApproximateNumEntries(const Slice& start_ikey,
+ const Slice& end_ikey) override {
+ std::string tmp;
+ uint64_t start_count =
+ skip_list_.EstimateCount(EncodeKey(&tmp, start_ikey));
+ uint64_t end_count = skip_list_.EstimateCount(EncodeKey(&tmp, end_ikey));
+ return (end_count >= start_count) ? (end_count - start_count) : 0;
+ }
+
+ void UniqueRandomSample(const uint64_t num_entries,
+ const uint64_t target_sample_size,
+ std::unordered_set<const char*>* entries) override {
+ entries->clear();
+ // Avoid divide-by-0.
+ assert(target_sample_size > 0);
+ assert(num_entries > 0);
+ // NOTE: the size of entries is not enforced to be exactly
+ // target_sample_size at the end of this function, it might be slightly
+ // greater or smaller.
+ SkipListRep::Iterator iter(&skip_list_);
+ // There are two methods to create the subset of samples (size m)
+ // from the table containing N elements:
+ // 1-Iterate linearly through the N memtable entries. For each entry i,
+ // add it to the sample set with a probability
+ // (target_sample_size - entries.size() ) / (N-i).
+ //
+ // 2-Pick m random elements without repetition.
+ // We pick Option 2 when m<sqrt(N) and
+ // Option 1 when m > sqrt(N).
+ if (target_sample_size >
+ static_cast<uint64_t>(std::sqrt(1.0 * num_entries))) {
+ Random* rnd = Random::GetTLSInstance();
+ iter.SeekToFirst();
+ uint64_t counter = 0, num_samples_left = target_sample_size;
+ for (; iter.Valid() && (num_samples_left > 0); iter.Next(), counter++) {
+ // Add entry to sample set with probability
+ // num_samples_left/(num_entries - counter).
+ if (rnd->Next() % (num_entries - counter) < num_samples_left) {
+ entries->insert(iter.key());
+ num_samples_left--;
+ }
+ }
+ } else {
+ // Option 2: pick m random elements with no duplicates.
+ // If Option 2 is picked, then target_sample_size<sqrt(N)
+ // Using a set spares the need to check for duplicates.
+ for (uint64_t i = 0; i < target_sample_size; i++) {
+ // We give it 5 attempts to find a non-duplicate
+ // With 5 attempts, the chances of returning `entries` set
+ // of size target_sample_size is:
+ // PROD_{i=1}^{target_sample_size-1} [1-(i/N)^5]
+ // which is monotonically increasing with N in the worse case
+ // of target_sample_size=sqrt(N), and is always >99.9% for N>4.
+ // At worst, for the final pick , when m=sqrt(N) there is
+ // a probability of p= 1/sqrt(N) chances to find a duplicate.
+ for (uint64_t j = 0; j < 5; j++) {
+ iter.RandomSeek();
+ // unordered_set::insert returns pair<iterator, bool>.
+ // The second element is true if an insert successfully happened.
+ // If element is already in the set, this bool will be false, and
+ // true otherwise.
+ if ((entries->insert(iter.key())).second) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ ~SkipListRep() override {}
+
+ // Iteration over the contents of a skip list
+ class Iterator : public MemTableRep::Iterator {
+ InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_;
+
+ public:
+ // Initialize an iterator over the specified list.
+ // The returned iterator is not valid.
+ explicit Iterator(
+ const InlineSkipList<const MemTableRep::KeyComparator&>* list)
+ : iter_(list) {}
+
+ ~Iterator() override {}
+
+ // Returns true iff the iterator is positioned at a valid node.
+ bool Valid() const override { return iter_.Valid(); }
+
+ // Returns the key at the current position.
+ // REQUIRES: Valid()
+ const char* key() const override { return iter_.key(); }
+
+ // Advances to the next position.
+ // REQUIRES: Valid()
+ void Next() override { iter_.Next(); }
+
+ // Advances to the previous position.
+ // REQUIRES: Valid()
+ void Prev() override { iter_.Prev(); }
+
+ // Advance to the first entry with a key >= target
+ void Seek(const Slice& user_key, const char* memtable_key) override {
+ if (memtable_key != nullptr) {
+ iter_.Seek(memtable_key);
+ } else {
+ iter_.Seek(EncodeKey(&tmp_, user_key));
+ }
+ }
+
+ // Retreat to the last entry with a key <= target
+ void SeekForPrev(const Slice& user_key, const char* memtable_key) override {
+ if (memtable_key != nullptr) {
+ iter_.SeekForPrev(memtable_key);
+ } else {
+ iter_.SeekForPrev(EncodeKey(&tmp_, user_key));
+ }
+ }
+
+ void RandomSeek() override { iter_.RandomSeek(); }
+
+ // Position at the first entry in list.
+ // Final state of iterator is Valid() iff list is not empty.
+ void SeekToFirst() override { iter_.SeekToFirst(); }
+
+ // Position at the last entry in list.
+ // Final state of iterator is Valid() iff list is not empty.
+ void SeekToLast() override { iter_.SeekToLast(); }
+
+ protected:
+ std::string tmp_; // For passing to EncodeKey
+ };
+
+ // Iterator over the contents of a skip list which also keeps track of the
+ // previously visited node. In Seek(), it examines a few nodes after it
+ // first, falling back to O(log n) search from the head of the list only if
+ // the target key hasn't been found.
+ class LookaheadIterator : public MemTableRep::Iterator {
+ public:
+ explicit LookaheadIterator(const SkipListRep& rep)
+ : rep_(rep), iter_(&rep_.skip_list_), prev_(iter_) {}
+
+ ~LookaheadIterator() override {}
+
+ bool Valid() const override { return iter_.Valid(); }
+
+ const char* key() const override {
+ assert(Valid());
+ return iter_.key();
+ }
+
+ void Next() override {
+ assert(Valid());
+
+ bool advance_prev = true;
+ if (prev_.Valid()) {
+ auto k1 = rep_.UserKey(prev_.key());
+ auto k2 = rep_.UserKey(iter_.key());
+
+ if (k1.compare(k2) == 0) {
+ // same user key, don't move prev_
+ advance_prev = false;
+ } else if (rep_.transform_) {
+ // only advance prev_ if it has the same prefix as iter_
+ auto t1 = rep_.transform_->Transform(k1);
+ auto t2 = rep_.transform_->Transform(k2);
+ advance_prev = t1.compare(t2) == 0;
+ }
+ }
+
+ if (advance_prev) {
+ prev_ = iter_;
+ }
+ iter_.Next();
+ }
+
+ void Prev() override {
+ assert(Valid());
+ iter_.Prev();
+ prev_ = iter_;
+ }
+
+ void Seek(const Slice& internal_key, const char* memtable_key) override {
+ const char* encoded_key = (memtable_key != nullptr)
+ ? memtable_key
+ : EncodeKey(&tmp_, internal_key);
+
+ if (prev_.Valid() && rep_.cmp_(encoded_key, prev_.key()) >= 0) {
+ // prev_.key() is smaller or equal to our target key; do a quick
+ // linear search (at most lookahead_ steps) starting from prev_
+ iter_ = prev_;
+
+ size_t cur = 0;
+ while (cur++ <= rep_.lookahead_ && iter_.Valid()) {
+ if (rep_.cmp_(encoded_key, iter_.key()) <= 0) {
+ return;
+ }
+ Next();
+ }
+ }
+
+ iter_.Seek(encoded_key);
+ prev_ = iter_;
+ }
+
+ void SeekForPrev(const Slice& internal_key,
+ const char* memtable_key) override {
+ const char* encoded_key = (memtable_key != nullptr)
+ ? memtable_key
+ : EncodeKey(&tmp_, internal_key);
+ iter_.SeekForPrev(encoded_key);
+ prev_ = iter_;
+ }
+
+ void SeekToFirst() override {
+ iter_.SeekToFirst();
+ prev_ = iter_;
+ }
+
+ void SeekToLast() override {
+ iter_.SeekToLast();
+ prev_ = iter_;
+ }
+
+ protected:
+ std::string tmp_; // For passing to EncodeKey
+
+ private:
+ const SkipListRep& rep_;
+ InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_;
+ InlineSkipList<const MemTableRep::KeyComparator&>::Iterator prev_;
+ };
+
+ MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override {
+ if (lookahead_ > 0) {
+ void* mem =
+ arena ? arena->AllocateAligned(sizeof(SkipListRep::LookaheadIterator))
+ :
+ operator new(sizeof(SkipListRep::LookaheadIterator));
+ return new (mem) SkipListRep::LookaheadIterator(*this);
+ } else {
+ void* mem = arena ? arena->AllocateAligned(sizeof(SkipListRep::Iterator))
+ :
+ operator new(sizeof(SkipListRep::Iterator));
+ return new (mem) SkipListRep::Iterator(&skip_list_);
+ }
+ }
+};
+} // namespace
+
+static std::unordered_map<std::string, OptionTypeInfo> skiplist_factory_info = {
+#ifndef ROCKSDB_LITE
+ {"lookahead",
+ {0, OptionType::kSizeT, OptionVerificationType::kNormal,
+ OptionTypeFlags::kDontSerialize /*Since it is part of the ID*/}},
+#endif
+};
+
+SkipListFactory::SkipListFactory(size_t lookahead) : lookahead_(lookahead) {
+ RegisterOptions("SkipListFactoryOptions", &lookahead_,
+ &skiplist_factory_info);
+}
+
+std::string SkipListFactory::GetId() const {
+ std::string id = Name();
+ if (lookahead_ > 0) {
+ id.append(":").append(std::to_string(lookahead_));
+ }
+ return id;
+}
+
+MemTableRep* SkipListFactory::CreateMemTableRep(
+ const MemTableRep::KeyComparator& compare, Allocator* allocator,
+ const SliceTransform* transform, Logger* /*logger*/) {
+ return new SkipListRep(compare, allocator, transform, lookahead_);
+}
+
+} // namespace ROCKSDB_NAMESPACE