summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/db/db_iterator_test.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rocksdb/db/db_iterator_test.cc3265
1 files changed, 3265 insertions, 0 deletions
diff --git a/src/rocksdb/db/db_iterator_test.cc b/src/rocksdb/db/db_iterator_test.cc
new file mode 100644
index 000000000..aaf1408b4
--- /dev/null
+++ b/src/rocksdb/db/db_iterator_test.cc
@@ -0,0 +1,3265 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <functional>
+
+#include "db/arena_wrapped_db_iter.h"
+#include "db/db_iter.h"
+#include "db/db_test_util.h"
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "rocksdb/iostats_context.h"
+#include "rocksdb/perf_context.h"
+#include "table/block_based/flush_block_policy.h"
+#include "util/random.h"
+#include "utilities/merge_operators/string_append/stringappend2.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// A dumb ReadCallback which saying every key is committed.
+class DummyReadCallback : public ReadCallback {
+ public:
+ DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {}
+ bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; }
+ void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; }
+};
+
+// Test param:
+// bool: whether to pass read_callback to NewIterator().
+class DBIteratorTest : public DBTestBase,
+ public testing::WithParamInterface<bool> {
+ public:
+ DBIteratorTest() : DBTestBase("db_iterator_test", /*env_do_fsync=*/true) {}
+
+ Iterator* NewIterator(const ReadOptions& read_options,
+ ColumnFamilyHandle* column_family = nullptr) {
+ if (column_family == nullptr) {
+ column_family = db_->DefaultColumnFamily();
+ }
+ auto* cfd =
+ static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
+ SequenceNumber seq = read_options.snapshot != nullptr
+ ? read_options.snapshot->GetSequenceNumber()
+ : db_->GetLatestSequenceNumber();
+ bool use_read_callback = GetParam();
+ DummyReadCallback* read_callback = nullptr;
+ if (use_read_callback) {
+ read_callback = new DummyReadCallback();
+ read_callback->SetSnapshot(seq);
+ InstrumentedMutexLock lock(&mutex_);
+ read_callbacks_.push_back(
+ std::unique_ptr<DummyReadCallback>(read_callback));
+ }
+ return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback);
+ }
+
+ private:
+ InstrumentedMutex mutex_;
+ std::vector<std::unique_ptr<DummyReadCallback>> read_callbacks_;
+};
+
+TEST_P(DBIteratorTest, IteratorProperty) {
+ // The test needs to be changed if kPersistedTier is supported in iterator.
+ Options options = CurrentOptions();
+ CreateAndReopenWithCF({"pikachu"}, options);
+ ASSERT_OK(Put(1, "1", "2"));
+ ASSERT_OK(Delete(1, "2"));
+ ReadOptions ropt;
+ ropt.pin_data = false;
+ {
+ std::unique_ptr<Iterator> iter(NewIterator(ropt, handles_[1]));
+ iter->SeekToFirst();
+ std::string prop_value;
+ ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
+ ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("0", prop_value);
+ ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
+ ASSERT_EQ("1", prop_value);
+ iter->Next();
+ ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("Iterator is not valid.", prop_value);
+
+ // Get internal key at which the iteration stopped (tombstone in this case).
+ ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
+ ASSERT_EQ("2", prop_value);
+ }
+ Close();
+}
+
+TEST_P(DBIteratorTest, PersistedTierOnIterator) {
+ // The test needs to be changed if kPersistedTier is supported in iterator.
+ Options options = CurrentOptions();
+ CreateAndReopenWithCF({"pikachu"}, options);
+ ReadOptions ropt;
+ ropt.read_tier = kPersistedTier;
+
+ auto* iter = db_->NewIterator(ropt, handles_[1]);
+ ASSERT_TRUE(iter->status().IsNotSupported());
+ delete iter;
+
+ std::vector<Iterator*> iters;
+ ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
+ Close();
+}
+
+TEST_P(DBIteratorTest, NonBlockingIteration) {
+ do {
+ ReadOptions non_blocking_opts, regular_opts;
+ anon::OptionsOverride options_override;
+ options_override.full_block_cache = true;
+ Options options = CurrentOptions(options_override);
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ non_blocking_opts.read_tier = kBlockCacheTier;
+
+ CreateAndReopenWithCF({"pikachu"}, options);
+ // write one kv to the database.
+ ASSERT_OK(Put(1, "a", "b"));
+
+ // scan using non-blocking iterator. We should find it because
+ // it is in memtable.
+ Iterator* iter = NewIterator(non_blocking_opts, handles_[1]);
+ int count = 0;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ ASSERT_OK(iter->status());
+ count++;
+ }
+ ASSERT_EQ(count, 1);
+ delete iter;
+
+ // flush memtable to storage. Now, the key should not be in the
+ // memtable neither in the block cache.
+ ASSERT_OK(Flush(1));
+
+ // verify that a non-blocking iterator does not find any
+ // kvs. Neither does it do any IOs to storage.
+ uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+ uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+ iter = NewIterator(non_blocking_opts, handles_[1]);
+ count = 0;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ count++;
+ }
+ ASSERT_EQ(count, 0);
+ ASSERT_TRUE(iter->status().IsIncomplete());
+ ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+ ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+ delete iter;
+
+ // read in the specified block via a regular get
+ ASSERT_EQ(Get(1, "a"), "b");
+
+ // verify that we can find it via a non-blocking scan
+ numopen = TestGetTickerCount(options, NO_FILE_OPENS);
+ cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
+ iter = NewIterator(non_blocking_opts, handles_[1]);
+ count = 0;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ ASSERT_OK(iter->status());
+ count++;
+ }
+ ASSERT_EQ(count, 1);
+ ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
+ ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
+ delete iter;
+
+ // This test verifies block cache behaviors, which is not used by plain
+ // table format.
+ } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
+}
+
+TEST_P(DBIteratorTest, IterSeekBeforePrev) {
+ ASSERT_OK(Put("a", "b"));
+ ASSERT_OK(Put("c", "d"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("0", "f"));
+ ASSERT_OK(Put("1", "h"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("2", "j"));
+ auto iter = NewIterator(ReadOptions());
+ iter->Seek(Slice("c"));
+ iter->Prev();
+ iter->Seek(Slice("a"));
+ iter->Prev();
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, IterReseekNewUpperBound) {
+ Random rnd(301);
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options;
+ table_options.block_size = 1024;
+ table_options.block_size_deviation = 50;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ options.compression = kNoCompression;
+ Reopen(options);
+
+ ASSERT_OK(Put("a", rnd.RandomString(400)));
+ ASSERT_OK(Put("aabb", rnd.RandomString(400)));
+ ASSERT_OK(Put("aaef", rnd.RandomString(400)));
+ ASSERT_OK(Put("b", rnd.RandomString(400)));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ReadOptions opts;
+ Slice ub = Slice("aa");
+ opts.iterate_upper_bound = &ub;
+ auto iter = NewIterator(opts);
+ iter->Seek(Slice("a"));
+ ub = Slice("b");
+ iter->Seek(Slice("aabc"));
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().ToString(), "aaef");
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) {
+ ASSERT_OK(Put("a", "b"));
+ ASSERT_OK(Put("c", "d"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("0", "f"));
+ ASSERT_OK(Put("1", "h"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("2", "j"));
+ auto iter = NewIterator(ReadOptions());
+ iter->SeekForPrev(Slice("0"));
+ iter->Next();
+ iter->SeekForPrev(Slice("1"));
+ iter->Next();
+ delete iter;
+}
+
+namespace {
+std::string MakeLongKey(size_t length, char c) {
+ return std::string(length, c);
+}
+} // anonymous namespace
+
+TEST_P(DBIteratorTest, IterLongKeys) {
+ ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
+ ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
+ ASSERT_OK(Put("a", "b"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
+ ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
+ ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
+ auto iter = NewIterator(ReadOptions());
+
+ // Create a key that needs to be skipped for Seq too new
+ iter->Seek(MakeLongKey(20, 0));
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
+
+ iter->SeekForPrev(MakeLongKey(127, 3));
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
+ delete iter;
+
+ iter = NewIterator(ReadOptions());
+ iter->Seek(MakeLongKey(50, 1));
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, IterNextWithNewerSeq) {
+ ASSERT_OK(Put("0", "0"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("a", "b"));
+ ASSERT_OK(Put("c", "d"));
+ ASSERT_OK(Put("d", "e"));
+ auto iter = NewIterator(ReadOptions());
+
+ // Create a key that needs to be skipped for Seq too new
+ for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
+ i++) {
+ ASSERT_OK(Put("b", "f"));
+ }
+
+ iter->Seek(Slice("a"));
+ ASSERT_EQ(IterStatus(iter), "a->b");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "c->d");
+ iter->SeekForPrev(Slice("b"));
+ ASSERT_EQ(IterStatus(iter), "a->b");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "c->d");
+
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, IterPrevWithNewerSeq) {
+ ASSERT_OK(Put("0", "0"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("a", "b"));
+ ASSERT_OK(Put("c", "d"));
+ ASSERT_OK(Put("d", "e"));
+ auto iter = NewIterator(ReadOptions());
+
+ // Create a key that needs to be skipped for Seq too new
+ for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
+ i++) {
+ ASSERT_OK(Put("b", "f"));
+ }
+
+ iter->Seek(Slice("d"));
+ ASSERT_EQ(IterStatus(iter), "d->e");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "c->d");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "a->b");
+ iter->Prev();
+ iter->SeekForPrev(Slice("d"));
+ ASSERT_EQ(IterStatus(iter), "d->e");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "c->d");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "a->b");
+ iter->Prev();
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) {
+ ASSERT_OK(Put("0", "0"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("a", "b"));
+ ASSERT_OK(Put("c", "d"));
+ ASSERT_OK(Put("e", "f"));
+ auto iter = NewIterator(ReadOptions());
+ auto iter2 = NewIterator(ReadOptions());
+ iter->Seek(Slice("c"));
+ iter2->SeekForPrev(Slice("d"));
+ ASSERT_EQ(IterStatus(iter), "c->d");
+ ASSERT_EQ(IterStatus(iter2), "c->d");
+
+ // Create a key that needs to be skipped for Seq too new
+ for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
+ i++) {
+ ASSERT_OK(Put("b", "f"));
+ }
+
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "a->b");
+ iter->Prev();
+ iter2->Prev();
+ ASSERT_EQ(IterStatus(iter2), "a->b");
+ iter2->Prev();
+ delete iter;
+ delete iter2;
+}
+
+TEST_P(DBIteratorTest, IterEmpty) {
+ do {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+ Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
+
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->Seek("foo");
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->SeekForPrev("foo");
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ ASSERT_OK(iter->status());
+
+ delete iter;
+ } while (ChangeCompactOptions());
+}
+
+TEST_P(DBIteratorTest, IterSingle) {
+ do {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+ ASSERT_OK(Put(1, "a", "va"));
+ Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
+
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->Seek("");
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekForPrev("");
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->Seek("a");
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekForPrev("a");
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->Seek("b");
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekForPrev("b");
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ delete iter;
+ } while (ChangeCompactOptions());
+}
+
+TEST_P(DBIteratorTest, IterMulti) {
+ do {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+ ASSERT_OK(Put(1, "a", "va"));
+ ASSERT_OK(Put(1, "b", "vb"));
+ ASSERT_OK(Put(1, "c", "vc"));
+ Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
+
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->Seek("");
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Seek("a");
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Seek("ax");
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+ iter->SeekForPrev("d");
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->SeekForPrev("c");
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->SeekForPrev("bx");
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+
+ iter->Seek("b");
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+ iter->Seek("z");
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekForPrev("b");
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+ iter->SeekForPrev("");
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ // Switch from reverse to forward
+ iter->SeekToLast();
+ iter->Prev();
+ iter->Prev();
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+
+ // Switch from forward to reverse
+ iter->SeekToFirst();
+ iter->Next();
+ iter->Next();
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+
+ // Make sure iter stays at snapshot
+ ASSERT_OK(Put(1, "a", "va2"));
+ ASSERT_OK(Put(1, "a2", "va3"));
+ ASSERT_OK(Put(1, "b", "vb2"));
+ ASSERT_OK(Put(1, "c", "vc2"));
+ ASSERT_OK(Delete(1, "b"));
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "b->vb");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ delete iter;
+ } while (ChangeCompactOptions());
+}
+
+// Check that we can skip over a run of user keys
+// by using reseek rather than sequential scan
+TEST_P(DBIteratorTest, IterReseek) {
+ anon::OptionsOverride options_override;
+ options_override.skip_policy = kSkipNoSnapshot;
+ Options options = CurrentOptions(options_override);
+ options.max_sequential_skip_in_iterations = 3;
+ options.create_if_missing = true;
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ DestroyAndReopen(options);
+ CreateAndReopenWithCF({"pikachu"}, options);
+
+ // insert three keys with same userkey and verify that
+ // reseek is not invoked. For each of these test cases,
+ // verify that we can find the next key "b".
+ ASSERT_OK(Put(1, "a", "zero"));
+ ASSERT_OK(Put(1, "a", "one"));
+ ASSERT_OK(Put(1, "a", "two"));
+ ASSERT_OK(Put(1, "b", "bone"));
+ Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
+ iter->SeekToFirst();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+ ASSERT_EQ(IterStatus(iter), "a->two");
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+ ASSERT_EQ(IterStatus(iter), "b->bone");
+ delete iter;
+
+ // insert a total of three keys with same userkey and verify
+ // that reseek is still not invoked.
+ ASSERT_OK(Put(1, "a", "three"));
+ iter = NewIterator(ReadOptions(), handles_[1]);
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->three");
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+ ASSERT_EQ(IterStatus(iter), "b->bone");
+ delete iter;
+
+ // insert a total of four keys with same userkey and verify
+ // that reseek is invoked.
+ ASSERT_OK(Put(1, "a", "four"));
+ iter = NewIterator(ReadOptions(), handles_[1]);
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->four");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
+ ASSERT_EQ(IterStatus(iter), "b->bone");
+ delete iter;
+
+ // Testing reverse iterator
+ // At this point, we have three versions of "a" and one version of "b".
+ // The reseek statistics is already at 1.
+ int num_reseeks = static_cast<int>(
+ TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
+
+ // Insert another version of b and assert that reseek is not invoked
+ ASSERT_OK(Put(1, "b", "btwo"));
+ iter = NewIterator(ReadOptions(), handles_[1]);
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "b->btwo");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
+ num_reseeks);
+ iter->Prev();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
+ num_reseeks + 1);
+ ASSERT_EQ(IterStatus(iter), "a->four");
+ delete iter;
+
+ // insert two more versions of b. This makes a total of 4 versions
+ // of b and 4 versions of a.
+ ASSERT_OK(Put(1, "b", "bthree"));
+ ASSERT_OK(Put(1, "b", "bfour"));
+ iter = NewIterator(ReadOptions(), handles_[1]);
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "b->bfour");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
+ num_reseeks + 2);
+ iter->Prev();
+
+ // the previous Prev call should have invoked reseek
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
+ num_reseeks + 3);
+ ASSERT_EQ(IterStatus(iter), "a->four");
+ delete iter;
+}
+
+TEST_F(DBIteratorTest, ReseekUponDirectionChange) {
+ Options options = GetDefaultOptions();
+ options.create_if_missing = true;
+ options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ options.merge_operator.reset(
+ new StringAppendTESTOperator(/*delim_char=*/' '));
+ DestroyAndReopen(options);
+ ASSERT_OK(Put("foo", "value"));
+ ASSERT_OK(Put("bar", "value"));
+ {
+ std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
+ it->SeekToLast();
+ it->Prev();
+ it->Next();
+ }
+ ASSERT_EQ(1,
+ options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION));
+
+ const std::string merge_key("good");
+ ASSERT_OK(Put(merge_key, "orig"));
+ ASSERT_OK(Merge(merge_key, "suffix"));
+ {
+ std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
+ it->Seek(merge_key);
+ ASSERT_TRUE(it->Valid());
+ const uint64_t prev_reseek_count =
+ options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION);
+ it->Prev();
+ ASSERT_EQ(prev_reseek_count + 1, options.statistics->getTickerCount(
+ NUMBER_OF_RESEEKS_IN_ITERATION));
+ }
+}
+
+TEST_P(DBIteratorTest, IterSmallAndLargeMix) {
+ do {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+ ASSERT_OK(Put(1, "a", "va"));
+ ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
+ ASSERT_OK(Put(1, "c", "vc"));
+ ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
+ ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
+
+ Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
+
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->SeekToLast();
+ ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ delete iter;
+ } while (ChangeCompactOptions());
+}
+
+TEST_P(DBIteratorTest, IterMultiWithDelete) {
+ do {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+ ASSERT_OK(Put(1, "ka", "va"));
+ ASSERT_OK(Put(1, "kb", "vb"));
+ ASSERT_OK(Put(1, "kc", "vc"));
+ ASSERT_OK(Delete(1, "kb"));
+ ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
+
+ Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
+ iter->Seek("kc");
+ ASSERT_EQ(IterStatus(iter), "kc->vc");
+ if (!CurrentOptions().merge_operator) {
+ // TODO: merge operator does not support backward iteration yet
+ if (kPlainTableAllBytesPrefix != option_config_ &&
+ kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
+ kHashLinkList != option_config_ &&
+ kHashSkipList != option_config_) { // doesn't support SeekToLast
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "ka->va");
+ }
+ }
+ delete iter;
+ } while (ChangeOptions());
+}
+
+TEST_P(DBIteratorTest, IterPrevMaxSkip) {
+ do {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+ for (int i = 0; i < 2; i++) {
+ ASSERT_OK(Put(1, "key1", "v1"));
+ ASSERT_OK(Put(1, "key2", "v2"));
+ ASSERT_OK(Put(1, "key3", "v3"));
+ ASSERT_OK(Put(1, "key4", "v4"));
+ ASSERT_OK(Put(1, "key5", "v5"));
+ }
+
+ VerifyIterLast("key5->v5", 1);
+
+ ASSERT_OK(Delete(1, "key5"));
+ VerifyIterLast("key4->v4", 1);
+
+ ASSERT_OK(Delete(1, "key4"));
+ VerifyIterLast("key3->v3", 1);
+
+ ASSERT_OK(Delete(1, "key3"));
+ VerifyIterLast("key2->v2", 1);
+
+ ASSERT_OK(Delete(1, "key2"));
+ VerifyIterLast("key1->v1", 1);
+
+ ASSERT_OK(Delete(1, "key1"));
+ VerifyIterLast("(invalid)", 1);
+ } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
+}
+
+TEST_P(DBIteratorTest, IterWithSnapshot) {
+ anon::OptionsOverride options_override;
+ options_override.skip_policy = kSkipNoSnapshot;
+ do {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
+ ASSERT_OK(Put(1, "key1", "val1"));
+ ASSERT_OK(Put(1, "key2", "val2"));
+ ASSERT_OK(Put(1, "key3", "val3"));
+ ASSERT_OK(Put(1, "key4", "val4"));
+ ASSERT_OK(Put(1, "key5", "val5"));
+
+ const Snapshot* snapshot = db_->GetSnapshot();
+ ReadOptions options;
+ options.snapshot = snapshot;
+ Iterator* iter = NewIterator(options, handles_[1]);
+
+ ASSERT_OK(Put(1, "key0", "val0"));
+ // Put more values after the snapshot
+ ASSERT_OK(Put(1, "key100", "val100"));
+ ASSERT_OK(Put(1, "key101", "val101"));
+
+ iter->Seek("key5");
+ ASSERT_EQ(IterStatus(iter), "key5->val5");
+ if (!CurrentOptions().merge_operator) {
+ // TODO: merge operator does not support backward iteration yet
+ if (kPlainTableAllBytesPrefix != option_config_ &&
+ kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
+ kHashLinkList != option_config_ && kHashSkipList != option_config_) {
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "key4->val4");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "key3->val3");
+
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "key4->val4");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "key5->val5");
+ }
+ iter->Next();
+ ASSERT_TRUE(!iter->Valid());
+ }
+
+ if (!CurrentOptions().merge_operator) {
+ // TODO(gzh): merge operator does not support backward iteration yet
+ if (kPlainTableAllBytesPrefix != option_config_ &&
+ kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
+ kHashLinkList != option_config_ && kHashSkipList != option_config_) {
+ iter->SeekForPrev("key1");
+ ASSERT_EQ(IterStatus(iter), "key1->val1");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "key2->val2");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "key3->val3");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "key2->val2");
+ iter->Prev();
+ ASSERT_EQ(IterStatus(iter), "key1->val1");
+ iter->Prev();
+ ASSERT_TRUE(!iter->Valid());
+ }
+ }
+ db_->ReleaseSnapshot(snapshot);
+ delete iter;
+ } while (ChangeOptions());
+}
+
+TEST_P(DBIteratorTest, IteratorPinsRef) {
+ do {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+ ASSERT_OK(Put(1, "foo", "hello"));
+
+ // Get iterator that will yield the current contents of the DB.
+ Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
+
+ // Write to force compactions
+ ASSERT_OK(Put(1, "foo", "newvalue1"));
+ for (int i = 0; i < 100; i++) {
+ // 100K values
+ ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
+ }
+ ASSERT_OK(Put(1, "foo", "newvalue2"));
+
+ iter->SeekToFirst();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ("foo", iter->key().ToString());
+ ASSERT_EQ("hello", iter->value().ToString());
+ iter->Next();
+ ASSERT_TRUE(!iter->Valid());
+ delete iter;
+ } while (ChangeCompactOptions());
+}
+
+TEST_P(DBIteratorTest, IteratorDeleteAfterCfDelete) {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+
+ ASSERT_OK(Put(1, "foo", "delete-cf-then-delete-iter"));
+ ASSERT_OK(Put(1, "hello", "value2"));
+
+ ColumnFamilyHandle* cf = handles_[1];
+ ReadOptions ro;
+
+ auto* iter = db_->NewIterator(ro, cf);
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "foo->delete-cf-then-delete-iter");
+
+ // delete CF handle
+ EXPECT_OK(db_->DestroyColumnFamilyHandle(cf));
+ handles_.erase(std::begin(handles_) + 1);
+
+ // delete Iterator after CF handle is deleted
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "hello->value2");
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, IteratorDeleteAfterCfDrop) {
+ CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
+
+ ASSERT_OK(Put(1, "foo", "drop-cf-then-delete-iter"));
+
+ ReadOptions ro;
+ ColumnFamilyHandle* cf = handles_[1];
+
+ auto* iter = db_->NewIterator(ro, cf);
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "foo->drop-cf-then-delete-iter");
+
+ // drop and delete CF
+ EXPECT_OK(db_->DropColumnFamily(cf));
+ EXPECT_OK(db_->DestroyColumnFamilyHandle(cf));
+ handles_.erase(std::begin(handles_) + 1);
+
+ // delete Iterator after CF handle is dropped
+ delete iter;
+}
+
+// SetOptions not defined in ROCKSDB LITE
+#ifndef ROCKSDB_LITE
+TEST_P(DBIteratorTest, DBIteratorBoundTest) {
+ Options options = CurrentOptions();
+ options.env = env_;
+ options.create_if_missing = true;
+
+ options.prefix_extractor = nullptr;
+ DestroyAndReopen(options);
+ ASSERT_OK(Put("a", "0"));
+ ASSERT_OK(Put("foo", "bar"));
+ ASSERT_OK(Put("foo1", "bar1"));
+ ASSERT_OK(Put("g1", "0"));
+
+ // testing basic case with no iterate_upper_bound and no prefix_extractor
+ {
+ ReadOptions ro;
+ ro.iterate_upper_bound = nullptr;
+
+ std::unique_ptr<Iterator> iter(NewIterator(ro));
+
+ iter->Seek("foo");
+
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
+
+ iter->SeekForPrev("g1");
+
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
+
+ iter->Prev();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
+
+ iter->Prev();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
+ }
+
+ // testing iterate_upper_bound and forward iterator
+ // to make sure it stops at bound
+ {
+ ReadOptions ro;
+ // iterate_upper_bound points beyond the last expected entry
+ Slice prefix("foo2");
+ ro.iterate_upper_bound = &prefix;
+
+ std::unique_ptr<Iterator> iter(NewIterator(ro));
+
+ iter->Seek("foo");
+
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(("foo1")), 0);
+
+ iter->Next();
+ // should stop here...
+ ASSERT_TRUE(!iter->Valid());
+ }
+ // Testing SeekToLast with iterate_upper_bound set
+ {
+ ReadOptions ro;
+
+ Slice prefix("foo");
+ ro.iterate_upper_bound = &prefix;
+
+ std::unique_ptr<Iterator> iter(NewIterator(ro));
+
+ iter->SeekToLast();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("a")), 0);
+ }
+
+ // prefix is the first letter of the key
+ ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
+ ASSERT_OK(Put("a", "0"));
+ ASSERT_OK(Put("foo", "bar"));
+ ASSERT_OK(Put("foo1", "bar1"));
+ ASSERT_OK(Put("g1", "0"));
+
+ // testing with iterate_upper_bound and prefix_extractor
+ // Seek target and iterate_upper_bound are not is same prefix
+ // This should be an error
+ {
+ ReadOptions ro;
+ Slice upper_bound("g");
+ ro.iterate_upper_bound = &upper_bound;
+
+ std::unique_ptr<Iterator> iter(NewIterator(ro));
+
+ iter->Seek("foo");
+
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ("foo", iter->key().ToString());
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ("foo1", iter->key().ToString());
+
+ iter->Next();
+ ASSERT_TRUE(!iter->Valid());
+ }
+
+ // testing that iterate_upper_bound prevents iterating over deleted items
+ // if the bound has already reached
+ {
+ options.prefix_extractor = nullptr;
+ DestroyAndReopen(options);
+ ASSERT_OK(Put("a", "0"));
+ ASSERT_OK(Put("b", "0"));
+ ASSERT_OK(Put("b1", "0"));
+ ASSERT_OK(Put("c", "0"));
+ ASSERT_OK(Put("d", "0"));
+ ASSERT_OK(Put("e", "0"));
+ ASSERT_OK(Delete("c"));
+ ASSERT_OK(Delete("d"));
+
+ // base case with no bound
+ ReadOptions ro;
+ ro.iterate_upper_bound = nullptr;
+
+ std::unique_ptr<Iterator> iter(NewIterator(ro));
+
+ iter->Seek("b");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("b")), 0);
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(("b1")), 0);
+
+ get_perf_context()->Reset();
+ iter->Next();
+
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(
+ static_cast<int>(get_perf_context()->internal_delete_skipped_count), 2);
+
+ // now testing with iterate_bound
+ Slice prefix("c");
+ ro.iterate_upper_bound = &prefix;
+
+ iter.reset(NewIterator(ro));
+
+ get_perf_context()->Reset();
+
+ iter->Seek("b");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("b")), 0);
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(("b1")), 0);
+
+ iter->Next();
+ // the iteration should stop as soon as the bound key is reached
+ // even though the key is deleted
+ // hence internal_delete_skipped_count should be 0
+ ASSERT_TRUE(!iter->Valid());
+ ASSERT_EQ(
+ static_cast<int>(get_perf_context()->internal_delete_skipped_count), 0);
+ }
+}
+
+TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) {
+ Options options = CurrentOptions();
+ options.env = env_;
+ options.create_if_missing = true;
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ options.prefix_extractor = nullptr;
+ DestroyAndReopen(options);
+ ASSERT_OK(Put("a", "0"));
+ ASSERT_OK(Put("z", "0"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("foo1", "bar1"));
+ ASSERT_OK(Put("foo2", "bar2"));
+ ASSERT_OK(Put("foo3", "bar3"));
+ ASSERT_OK(Put("foo4", "bar4"));
+
+ {
+ std::string up_str = "foo5";
+ Slice up(up_str);
+ ReadOptions ro;
+ ro.iterate_upper_bound = &up;
+ std::unique_ptr<Iterator> iter(NewIterator(ro));
+
+ iter->Seek("foo1");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
+
+ uint64_t prev_block_cache_hit =
+ TestGetTickerCount(options, BLOCK_CACHE_HIT);
+ uint64_t prev_block_cache_miss =
+ TestGetTickerCount(options, BLOCK_CACHE_MISS);
+
+ ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0);
+
+ iter->Seek("foo4");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo4")), 0);
+ ASSERT_EQ(prev_block_cache_hit,
+ TestGetTickerCount(options, BLOCK_CACHE_HIT));
+ ASSERT_EQ(prev_block_cache_miss,
+ TestGetTickerCount(options, BLOCK_CACHE_MISS));
+
+ iter->Seek("foo2");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo3")), 0);
+ ASSERT_EQ(prev_block_cache_hit,
+ TestGetTickerCount(options, BLOCK_CACHE_HIT));
+ ASSERT_EQ(prev_block_cache_miss,
+ TestGetTickerCount(options, BLOCK_CACHE_MISS));
+ }
+}
+#endif
+
+TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
+ for (auto format_version : {2, 3, 4}) {
+ int upper_bound_hits = 0;
+ Options options = CurrentOptions();
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+ "BlockBasedTableIterator:out_of_bound",
+ [&upper_bound_hits](void*) { upper_bound_hits++; });
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+ options.env = env_;
+ options.create_if_missing = true;
+ options.prefix_extractor = nullptr;
+ BlockBasedTableOptions table_options;
+ table_options.format_version = format_version;
+ table_options.flush_block_policy_factory =
+ std::make_shared<FlushBlockEveryKeyPolicyFactory>();
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+ DestroyAndReopen(options);
+ ASSERT_OK(Put("foo1", "bar1"));
+ ASSERT_OK(Put("foo2", "bar2"));
+ ASSERT_OK(Put("foo4", "bar4"));
+ ASSERT_OK(Flush());
+
+ Slice ub("foo3");
+ ReadOptions ro;
+ ro.iterate_upper_bound = &ub;
+
+ std::unique_ptr<Iterator> iter(NewIterator(ro));
+
+ iter->Seek("foo");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
+ ASSERT_EQ(upper_bound_hits, 0);
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
+ ASSERT_EQ(upper_bound_hits, 0);
+
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+ ASSERT_EQ(upper_bound_hits, 1);
+ }
+}
+
+// Enable kBinarySearchWithFirstKey, do some iterator operations and check that
+// they don't do unnecessary block reads.
+TEST_P(DBIteratorTest, IndexWithFirstKey) {
+ for (int tailing = 0; tailing < 2; ++tailing) {
+ SCOPED_TRACE("tailing = " + std::to_string(tailing));
+ Options options = CurrentOptions();
+ options.env = env_;
+ options.create_if_missing = true;
+ options.prefix_extractor = nullptr;
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ Statistics* stats = options.statistics.get();
+ BlockBasedTableOptions table_options;
+ table_options.index_type =
+ BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
+ table_options.index_shortening =
+ BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
+ table_options.flush_block_policy_factory =
+ std::make_shared<FlushBlockEveryKeyPolicyFactory>();
+ table_options.block_cache =
+ NewLRUCache(8000); // fits all blocks and their cache metadata overhead
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+ DestroyAndReopen(options);
+ ASSERT_OK(Merge("a1", "x1"));
+ ASSERT_OK(Merge("b1", "y1"));
+ ASSERT_OK(Merge("c0", "z1"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Merge("a2", "x2"));
+ ASSERT_OK(Merge("b2", "y2"));
+ ASSERT_OK(Merge("c0", "z2"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Merge("a3", "x3"));
+ ASSERT_OK(Merge("b3", "y3"));
+ ASSERT_OK(Merge("c3", "z3"));
+ ASSERT_OK(Flush());
+
+ // Block cache is not important for this test.
+ // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
+ // available way of counting block accesses.
+
+ ReadOptions ropt;
+ ropt.tailing = tailing;
+ std::unique_ptr<Iterator> iter(NewIterator(ropt));
+
+ ropt.read_tier = ReadTier::kBlockCacheTier;
+ std::unique_ptr<Iterator> nonblocking_iter(NewIterator(ropt));
+
+ iter->Seek("b10");
+ ASSERT_TRUE(iter->Valid());
+ EXPECT_EQ("b2", iter->key().ToString());
+ EXPECT_EQ("y2", iter->value().ToString());
+ EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+
+ // The cache-only iterator should succeed too, using the blocks pulled into
+ // the cache by the previous iterator.
+ nonblocking_iter->Seek("b10");
+ ASSERT_TRUE(nonblocking_iter->Valid());
+ EXPECT_EQ("b2", nonblocking_iter->key().ToString());
+ EXPECT_EQ("y2", nonblocking_iter->value().ToString());
+ EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+
+ // ... but it shouldn't be able to step forward since the next block is
+ // not in cache yet.
+ nonblocking_iter->Next();
+ ASSERT_FALSE(nonblocking_iter->Valid());
+ ASSERT_TRUE(nonblocking_iter->status().IsIncomplete());
+
+ // ... nor should a seek to the next key succeed.
+ nonblocking_iter->Seek("b20");
+ ASSERT_FALSE(nonblocking_iter->Valid());
+ ASSERT_TRUE(nonblocking_iter->status().IsIncomplete());
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ EXPECT_EQ("b3", iter->key().ToString());
+ EXPECT_EQ("y3", iter->value().ToString());
+ EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+ EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+
+ // After the blocking iterator loaded the next block, the nonblocking
+ // iterator's seek should succeed.
+ nonblocking_iter->Seek("b20");
+ ASSERT_TRUE(nonblocking_iter->Valid());
+ EXPECT_EQ("b3", nonblocking_iter->key().ToString());
+ EXPECT_EQ("y3", nonblocking_iter->value().ToString());
+ EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+
+ iter->Seek("c0");
+ ASSERT_TRUE(iter->Valid());
+ EXPECT_EQ("c0", iter->key().ToString());
+ EXPECT_EQ("z1,z2", iter->value().ToString());
+ EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+ EXPECT_EQ(6, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ EXPECT_EQ("c3", iter->key().ToString());
+ EXPECT_EQ("z3", iter->value().ToString());
+ EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+ EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+
+ iter.reset();
+
+ // Enable iterate_upper_bound and check that iterator is not trying to read
+ // blocks that are fully above upper bound.
+ std::string ub = "b3";
+ Slice ub_slice(ub);
+ ropt.iterate_upper_bound = &ub_slice;
+ iter.reset(NewIterator(ropt));
+
+ iter->Seek("b2");
+ ASSERT_TRUE(iter->Valid());
+ EXPECT_EQ("b2", iter->key().ToString());
+ EXPECT_EQ("y2", iter->value().ToString());
+ EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+ EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+ EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+ EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+ }
+}
+
+TEST_P(DBIteratorTest, IndexWithFirstKeyGet) {
+ Options options = CurrentOptions();
+ options.env = env_;
+ options.create_if_missing = true;
+ options.prefix_extractor = nullptr;
+ options.merge_operator = MergeOperators::CreateStringAppendOperator();
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ Statistics* stats = options.statistics.get();
+ BlockBasedTableOptions table_options;
+ table_options.index_type =
+ BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
+ table_options.index_shortening =
+ BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
+ table_options.flush_block_policy_factory =
+ std::make_shared<FlushBlockEveryKeyPolicyFactory>();
+ table_options.block_cache = NewLRUCache(1000); // fits all blocks
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+ DestroyAndReopen(options);
+ ASSERT_OK(Merge("a", "x1"));
+ ASSERT_OK(Merge("c", "y1"));
+ ASSERT_OK(Merge("e", "z1"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Merge("c", "y2"));
+ ASSERT_OK(Merge("e", "z2"));
+ ASSERT_OK(Flush());
+
+ // Get() between blocks shouldn't read any blocks.
+ ASSERT_EQ("NOT_FOUND", Get("b"));
+ EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+ EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+
+ // Get() of an existing key shouldn't read any unnecessary blocks when there's
+ // only one key per block.
+
+ ASSERT_EQ("y1,y2", Get("c"));
+ EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+ EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+
+ ASSERT_EQ("x1", Get("a"));
+ EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
+ EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
+
+ EXPECT_EQ(std::vector<std::string>({"NOT_FOUND", "z1,z2"}),
+ MultiGet({"b", "e"}));
+}
+
+// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
+// return the biggest key which is smaller than the seek key.
+TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) {
+ Options options;
+ options.create_if_missing = true;
+ options.merge_operator = MergeOperators::CreatePutOperator();
+ options.env = env_;
+ DestroyAndReopen(options);
+
+ // write three entries with different keys using Merge()
+ WriteOptions wopts;
+ ASSERT_OK(db_->Merge(wopts, "1", "data1"));
+ ASSERT_OK(db_->Merge(wopts, "2", "data2"));
+ ASSERT_OK(db_->Merge(wopts, "3", "data3"));
+
+ std::unique_ptr<Iterator> it(NewIterator(ReadOptions()));
+
+ it->Seek("2");
+ ASSERT_TRUE(it->Valid());
+ ASSERT_EQ("2", it->key().ToString());
+
+ it->Prev();
+ ASSERT_TRUE(it->Valid());
+ ASSERT_EQ("1", it->key().ToString());
+
+ it->SeekForPrev("1");
+ ASSERT_TRUE(it->Valid());
+ ASSERT_EQ("1", it->key().ToString());
+
+ it->Next();
+ ASSERT_TRUE(it->Valid());
+ ASSERT_EQ("2", it->key().ToString());
+}
+
+class DBIteratorTestForPinnedData : public DBIteratorTest {
+ public:
+ enum TestConfig {
+ NORMAL,
+ CLOSE_AND_OPEN,
+ COMPACT_BEFORE_READ,
+ FLUSH_EVERY_1000,
+ MAX
+ };
+ DBIteratorTestForPinnedData() : DBIteratorTest() {}
+ void PinnedDataIteratorRandomized(TestConfig run_config) {
+ // Generate Random data
+ Random rnd(301);
+
+ int puts = 100000;
+ int key_pool = static_cast<int>(puts * 0.7);
+ int key_size = 100;
+ int val_size = 1000;
+ int seeks_percentage = 20; // 20% of keys will be used to test seek()
+ int delete_percentage = 20; // 20% of keys will be deleted
+ int merge_percentage = 20; // 20% of keys will be added using Merge()
+
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options;
+ table_options.use_delta_encoding = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ options.merge_operator = MergeOperators::CreatePutOperator();
+ DestroyAndReopen(options);
+
+ std::vector<std::string> generated_keys(key_pool);
+ for (int i = 0; i < key_pool; i++) {
+ generated_keys[i] = rnd.RandomString(key_size);
+ }
+
+ std::map<std::string, std::string> true_data;
+ std::vector<std::string> random_keys;
+ std::vector<std::string> deleted_keys;
+ for (int i = 0; i < puts; i++) {
+ auto& k = generated_keys[rnd.Next() % key_pool];
+ auto v = rnd.RandomString(val_size);
+
+ // Insert data to true_data map and to DB
+ true_data[k] = v;
+ if (rnd.PercentTrue(merge_percentage)) {
+ ASSERT_OK(db_->Merge(WriteOptions(), k, v));
+ } else {
+ ASSERT_OK(Put(k, v));
+ }
+
+ // Pick random keys to be used to test Seek()
+ if (rnd.PercentTrue(seeks_percentage)) {
+ random_keys.push_back(k);
+ }
+
+ // Delete some random keys
+ if (rnd.PercentTrue(delete_percentage)) {
+ deleted_keys.push_back(k);
+ true_data.erase(k);
+ ASSERT_OK(Delete(k));
+ }
+
+ if (run_config == TestConfig::FLUSH_EVERY_1000) {
+ if (i && i % 1000 == 0) {
+ ASSERT_OK(Flush());
+ }
+ }
+ }
+
+ if (run_config == TestConfig::CLOSE_AND_OPEN) {
+ Close();
+ Reopen(options);
+ } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+ }
+
+ ReadOptions ro;
+ ro.pin_data = true;
+ auto iter = NewIterator(ro);
+
+ {
+ // Test Seek to random keys
+ std::vector<Slice> keys_slices;
+ std::vector<std::string> true_keys;
+ for (auto& k : random_keys) {
+ iter->Seek(k);
+ if (!iter->Valid()) {
+ ASSERT_EQ(true_data.lower_bound(k), true_data.end());
+ continue;
+ }
+ std::string prop_value;
+ ASSERT_OK(
+ iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("1", prop_value);
+ keys_slices.push_back(iter->key());
+ true_keys.push_back(true_data.lower_bound(k)->first);
+ }
+
+ for (size_t i = 0; i < keys_slices.size(); i++) {
+ ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
+ }
+ }
+
+ {
+ // Test SeekForPrev to random keys
+ std::vector<Slice> keys_slices;
+ std::vector<std::string> true_keys;
+ for (auto& k : random_keys) {
+ iter->SeekForPrev(k);
+ if (!iter->Valid()) {
+ ASSERT_EQ(true_data.upper_bound(k), true_data.begin());
+ continue;
+ }
+ std::string prop_value;
+ ASSERT_OK(
+ iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("1", prop_value);
+ keys_slices.push_back(iter->key());
+ true_keys.push_back((--true_data.upper_bound(k))->first);
+ }
+
+ for (size_t i = 0; i < keys_slices.size(); i++) {
+ ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
+ }
+ }
+
+ {
+ // Test iterating all data forward
+ std::vector<Slice> all_keys;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ std::string prop_value;
+ ASSERT_OK(
+ iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("1", prop_value);
+ all_keys.push_back(iter->key());
+ }
+ ASSERT_EQ(all_keys.size(), true_data.size());
+
+ // Verify that all keys slices are valid
+ auto data_iter = true_data.begin();
+ for (size_t i = 0; i < all_keys.size(); i++) {
+ ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
+ data_iter++;
+ }
+ }
+
+ {
+ // Test iterating all data backward
+ std::vector<Slice> all_keys;
+ for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
+ std::string prop_value;
+ ASSERT_OK(
+ iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("1", prop_value);
+ all_keys.push_back(iter->key());
+ }
+ ASSERT_EQ(all_keys.size(), true_data.size());
+
+ // Verify that all keys slices are valid (backward)
+ auto data_iter = true_data.rbegin();
+ for (size_t i = 0; i < all_keys.size(); i++) {
+ ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
+ data_iter++;
+ }
+ }
+
+ delete iter;
+ }
+};
+
+#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
+TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) {
+ PinnedDataIteratorRandomized(TestConfig::NORMAL);
+}
+#endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
+
+TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) {
+ PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN);
+}
+
+TEST_P(DBIteratorTestForPinnedData,
+ PinnedDataIteratorRandomizedCompactBeforeRead) {
+ PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ);
+}
+
+TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) {
+ PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000);
+}
+
+INSTANTIATE_TEST_CASE_P(DBIteratorTestForPinnedDataInstance,
+ DBIteratorTestForPinnedData,
+ testing::Values(true, false));
+
+#ifndef ROCKSDB_LITE
+TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) {
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options;
+ table_options.use_delta_encoding = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ options.disable_auto_compactions = true;
+ options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb
+ DestroyAndReopen(options);
+
+ std::map<std::string, std::string> true_data;
+
+ // Generate 4 sst files in L2
+ Random rnd(301);
+ for (int i = 1; i <= 1000; i++) {
+ std::string k = Key(i * 3);
+ std::string v = rnd.RandomString(100);
+ ASSERT_OK(Put(k, v));
+ true_data[k] = v;
+ if (i % 250 == 0) {
+ ASSERT_OK(Flush());
+ }
+ }
+ ASSERT_EQ(FilesPerLevel(0), "4");
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+ ASSERT_EQ(FilesPerLevel(0), "0,4");
+
+ // Generate 4 sst files in L0
+ for (int i = 1; i <= 1000; i++) {
+ std::string k = Key(i * 2);
+ std::string v = rnd.RandomString(100);
+ ASSERT_OK(Put(k, v));
+ true_data[k] = v;
+ if (i % 250 == 0) {
+ ASSERT_OK(Flush());
+ }
+ }
+ ASSERT_EQ(FilesPerLevel(0), "4,4");
+
+ // Add some keys/values in memtables
+ for (int i = 1; i <= 1000; i++) {
+ std::string k = Key(i);
+ std::string v = rnd.RandomString(100);
+ ASSERT_OK(Put(k, v));
+ true_data[k] = v;
+ }
+ ASSERT_EQ(FilesPerLevel(0), "4,4");
+
+ ReadOptions ro;
+ ro.pin_data = true;
+ auto iter = NewIterator(ro);
+
+ std::vector<std::pair<Slice, std::string>> results;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ std::string prop_value;
+ ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("1", prop_value);
+ results.emplace_back(iter->key(), iter->value().ToString());
+ }
+
+ ASSERT_EQ(results.size(), true_data.size());
+ auto data_iter = true_data.begin();
+ for (size_t i = 0; i < results.size(); i++, data_iter++) {
+ auto& kv = results[i];
+ ASSERT_EQ(kv.first, data_iter->first);
+ ASSERT_EQ(kv.second, data_iter->second);
+ }
+
+ delete iter;
+}
+#endif
+
+TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) {
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options;
+ table_options.use_delta_encoding = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ options.merge_operator = MergeOperators::CreateUInt64AddOperator();
+ DestroyAndReopen(options);
+
+ std::string numbers[7];
+ for (int val = 0; val <= 6; val++) {
+ PutFixed64(numbers + val, val);
+ }
+
+ // +1 all keys in range [ 0 => 999]
+ for (int i = 0; i < 1000; i++) {
+ WriteOptions wo;
+ ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
+ }
+
+ // +2 all keys divisible by 2 in range [ 0 => 999]
+ for (int i = 0; i < 1000; i += 2) {
+ WriteOptions wo;
+ ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
+ }
+
+ // +3 all keys divisible by 5 in range [ 0 => 999]
+ for (int i = 0; i < 1000; i += 5) {
+ WriteOptions wo;
+ ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
+ }
+
+ ReadOptions ro;
+ ro.pin_data = true;
+ auto iter = NewIterator(ro);
+
+ std::vector<std::pair<Slice, std::string>> results;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ std::string prop_value;
+ ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("1", prop_value);
+ results.emplace_back(iter->key(), iter->value().ToString());
+ }
+
+ ASSERT_EQ(results.size(), 1000);
+ for (size_t i = 0; i < results.size(); i++) {
+ auto& kv = results[i];
+ ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
+ int expected_val = 1;
+ if (i % 2 == 0) {
+ expected_val += 2;
+ }
+ if (i % 5 == 0) {
+ expected_val += 3;
+ }
+ ASSERT_EQ(kv.second, numbers[expected_val]);
+ }
+
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options;
+ table_options.use_delta_encoding = false;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ options.write_buffer_size = 100000;
+ DestroyAndReopen(options);
+
+ Random rnd(301);
+
+ std::map<std::string, std::string> true_data;
+ for (int i = 0; i < 1000; i++) {
+ std::string k = rnd.RandomString(10);
+ std::string v = rnd.RandomString(1000);
+ ASSERT_OK(Put(k, v));
+ true_data[k] = v;
+ }
+
+ ReadOptions ro;
+ ro.pin_data = true;
+ auto iter = NewIterator(ro);
+
+ // Delete 50% of the keys and update the other 50%
+ for (auto& kv : true_data) {
+ if (rnd.OneIn(2)) {
+ ASSERT_OK(Delete(kv.first));
+ } else {
+ std::string new_val = rnd.RandomString(1000);
+ ASSERT_OK(Put(kv.first, new_val));
+ }
+ }
+
+ std::vector<std::pair<Slice, std::string>> results;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ std::string prop_value;
+ ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
+ ASSERT_EQ("1", prop_value);
+ results.emplace_back(iter->key(), iter->value().ToString());
+ }
+
+ auto data_iter = true_data.begin();
+ for (size_t i = 0; i < results.size(); i++, data_iter++) {
+ auto& kv = results[i];
+ ASSERT_EQ(kv.first, data_iter->first);
+ ASSERT_EQ(kv.second, data_iter->second);
+ }
+
+ delete iter;
+}
+
+class SliceTransformLimitedDomainGeneric : public SliceTransform {
+ const char* Name() const override {
+ return "SliceTransformLimitedDomainGeneric";
+ }
+
+ Slice Transform(const Slice& src) const override {
+ return Slice(src.data(), 1);
+ }
+
+ bool InDomain(const Slice& src) const override {
+ // prefix will be x????
+ return src.size() >= 1;
+ }
+
+ bool InRange(const Slice& dst) const override {
+ // prefix will be x????
+ return dst.size() == 1;
+ }
+};
+
+TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) {
+ Options options = CurrentOptions();
+ options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+ options.disable_auto_compactions = true;
+ // Enable prefix bloom for SST files
+ BlockBasedTableOptions table_options;
+ table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("a1", "va1"));
+ ASSERT_OK(Put("a2", "va2"));
+ ASSERT_OK(Put("a3", "va3"));
+ ASSERT_OK(Flush());
+
+ ASSERT_OK(Put("b1", "vb1"));
+ ASSERT_OK(Put("b2", "vb2"));
+ ASSERT_OK(Put("b3", "vb3"));
+ ASSERT_OK(Flush());
+
+ ASSERT_OK(Put("b4", "vb4"));
+ ASSERT_OK(Put("d1", "vd1"));
+ ASSERT_OK(Put("d2", "vd2"));
+ ASSERT_OK(Put("d4", "vd4"));
+ ASSERT_OK(Flush());
+
+ MoveFilesToLevel(1);
+ {
+ ReadOptions ro;
+ Iterator* iter = NewIterator(ro);
+
+ iter->SeekForPrev("a4");
+ ASSERT_EQ(iter->key().ToString(), "a3");
+ ASSERT_EQ(iter->value().ToString(), "va3");
+
+ iter->SeekForPrev("c2");
+ ASSERT_EQ(iter->key().ToString(), "b3");
+ iter->SeekForPrev("d3");
+ ASSERT_EQ(iter->key().ToString(), "d2");
+ iter->SeekForPrev("b5");
+ ASSERT_EQ(iter->key().ToString(), "b4");
+ delete iter;
+ }
+
+ {
+ ReadOptions ro;
+ ro.prefix_same_as_start = true;
+ Iterator* iter = NewIterator(ro);
+ iter->SeekForPrev("c2");
+ ASSERT_TRUE(!iter->Valid());
+ ASSERT_OK(iter->status());
+ delete iter;
+ }
+}
+
+TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) {
+ Options options = CurrentOptions();
+ options.prefix_extractor =
+ std::make_shared<SliceTransformLimitedDomainGeneric>();
+ options.disable_auto_compactions = true;
+ // Enable prefix bloom for SST files
+ BlockBasedTableOptions table_options;
+ table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+
+ ASSERT_OK(Put("a1", "va1"));
+ ASSERT_OK(Put("a2", "va2"));
+ ASSERT_OK(Put("a3", "va3"));
+ ASSERT_OK(Flush());
+
+ ASSERT_OK(Put("b1", "vb1"));
+ ASSERT_OK(Put("b2", "vb2"));
+ ASSERT_OK(Put("b3", "vb3"));
+ ASSERT_OK(Flush());
+
+ ASSERT_OK(Put("b4", "vb4"));
+ ASSERT_OK(Put("d1", "vd1"));
+ ASSERT_OK(Put("d2", "vd2"));
+ ASSERT_OK(Put("d4", "vd4"));
+ ASSERT_OK(Flush());
+
+ MoveFilesToLevel(1);
+ {
+ ReadOptions ro;
+ Iterator* iter = NewIterator(ro);
+
+ iter->SeekForPrev("a4");
+ ASSERT_EQ(iter->key().ToString(), "a3");
+ ASSERT_EQ(iter->value().ToString(), "va3");
+
+ iter->SeekForPrev("c2");
+ ASSERT_EQ(iter->key().ToString(), "b3");
+ iter->SeekForPrev("d3");
+ ASSERT_EQ(iter->key().ToString(), "d2");
+ iter->SeekForPrev("b5");
+ ASSERT_EQ(iter->key().ToString(), "b4");
+ delete iter;
+ }
+
+ {
+ ReadOptions ro;
+ ro.prefix_same_as_start = true;
+ Iterator* iter = NewIterator(ro);
+ iter->SeekForPrev("c2");
+ ASSERT_TRUE(!iter->Valid());
+ ASSERT_OK(iter->status());
+ delete iter;
+ }
+}
+
+TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) {
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options;
+ table_options.block_size = 1; // every block will contain one entry
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
+ options.disable_auto_compactions = true;
+ options.max_sequential_skip_in_iterations = 8;
+
+ DestroyAndReopen(options);
+
+ // Putting such deletes will force DBIter::Prev() to fallback to a Seek
+ for (int file_num = 0; file_num < 10; file_num++) {
+ ASSERT_OK(Delete("key4"));
+ ASSERT_OK(Flush());
+ }
+
+ // First File containing 5 blocks of puts
+ ASSERT_OK(Put("key1", "val1.0"));
+ ASSERT_OK(Put("key2", "val2.0"));
+ ASSERT_OK(Put("key3", "val3.0"));
+ ASSERT_OK(Put("key4", "val4.0"));
+ ASSERT_OK(Put("key5", "val5.0"));
+ ASSERT_OK(Flush());
+
+ // Second file containing 9 blocks of merge operands
+ ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
+ ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
+
+ ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
+ ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
+ ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
+
+ ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
+ ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
+ ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
+ ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
+ ASSERT_OK(Flush());
+
+ {
+ ReadOptions ro;
+ ro.fill_cache = false;
+ Iterator* iter = NewIterator(ro);
+
+ iter->SeekToLast();
+ ASSERT_EQ(iter->key().ToString(), "key5");
+ ASSERT_EQ(iter->value().ToString(), "val5.0");
+
+ iter->Prev();
+ ASSERT_EQ(iter->key().ToString(), "key4");
+ ASSERT_EQ(iter->value().ToString(), "val4.0");
+
+ iter->Prev();
+ ASSERT_EQ(iter->key().ToString(), "key3");
+ ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
+
+ iter->Prev();
+ ASSERT_EQ(iter->key().ToString(), "key2");
+ ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
+
+ iter->Prev();
+ ASSERT_EQ(iter->key().ToString(), "key1");
+ ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
+
+ delete iter;
+ }
+}
+
+TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
+ Options options = CurrentOptions();
+ options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
+ options.disable_auto_compactions = true;
+ options.level0_slowdown_writes_trigger = (1 << 30);
+ options.level0_stop_writes_trigger = (1 << 30);
+ options.max_sequential_skip_in_iterations = 8;
+ DestroyAndReopen(options);
+
+ const int kNumKeys = 500;
+ // Small number of merge operands to make sure that DBIter::Prev() don't
+ // fall back to Seek()
+ const int kNumMergeOperands = 3;
+ // Use value size that will make sure that every block contain 1 key
+ const int kValSize =
+ static_cast<int>(BlockBasedTableOptions().block_size) * 4;
+ // Percentage of keys that wont get merge operations
+ const int kNoMergeOpPercentage = 20;
+ // Percentage of keys that will be deleted
+ const int kDeletePercentage = 10;
+
+ // For half of the key range we will write multiple deletes first to
+ // force DBIter::Prev() to fall back to Seek()
+ for (int file_num = 0; file_num < 10; file_num++) {
+ for (int i = 0; i < kNumKeys; i += 2) {
+ ASSERT_OK(Delete(Key(i)));
+ }
+ ASSERT_OK(Flush());
+ }
+
+ Random rnd(301);
+ std::map<std::string, std::string> true_data;
+ std::string gen_key;
+ std::string gen_val;
+
+ for (int i = 0; i < kNumKeys; i++) {
+ gen_key = Key(i);
+ gen_val = rnd.RandomString(kValSize);
+
+ ASSERT_OK(Put(gen_key, gen_val));
+ true_data[gen_key] = gen_val;
+ }
+ ASSERT_OK(Flush());
+
+ // Separate values and merge operands in different file so that we
+ // make sure that we don't merge them while flushing but actually
+ // merge them in the read path
+ for (int i = 0; i < kNumKeys; i++) {
+ if (rnd.PercentTrue(kNoMergeOpPercentage)) {
+ // Dont give merge operations for some keys
+ continue;
+ }
+
+ for (int j = 0; j < kNumMergeOperands; j++) {
+ gen_key = Key(i);
+ gen_val = rnd.RandomString(kValSize);
+
+ ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
+ true_data[gen_key] += "," + gen_val;
+ }
+ }
+ ASSERT_OK(Flush());
+
+ for (int i = 0; i < kNumKeys; i++) {
+ if (rnd.PercentTrue(kDeletePercentage)) {
+ gen_key = Key(i);
+
+ ASSERT_OK(Delete(gen_key));
+ true_data.erase(gen_key);
+ }
+ }
+ ASSERT_OK(Flush());
+
+ {
+ ReadOptions ro;
+ ro.fill_cache = false;
+ Iterator* iter = NewIterator(ro);
+ auto data_iter = true_data.rbegin();
+
+ for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
+ ASSERT_EQ(iter->key().ToString(), data_iter->first);
+ ASSERT_EQ(iter->value().ToString(), data_iter->second);
+ data_iter++;
+ }
+ ASSERT_EQ(data_iter, true_data.rend());
+
+ delete iter;
+ }
+
+ {
+ ReadOptions ro;
+ ro.fill_cache = false;
+ Iterator* iter = NewIterator(ro);
+ auto data_iter = true_data.rbegin();
+
+ int entries_right = 0;
+ std::string seek_key;
+ for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
+ // Verify key/value of current position
+ ASSERT_EQ(iter->key().ToString(), data_iter->first);
+ ASSERT_EQ(iter->value().ToString(), data_iter->second);
+
+ bool restore_position_with_seek = rnd.Uniform(2);
+ if (restore_position_with_seek) {
+ seek_key = iter->key().ToString();
+ }
+
+ // Do some Next() operations the restore the iterator to orignal position
+ int next_count =
+ entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
+ for (int i = 0; i < next_count; i++) {
+ iter->Next();
+ data_iter--;
+
+ ASSERT_EQ(iter->key().ToString(), data_iter->first);
+ ASSERT_EQ(iter->value().ToString(), data_iter->second);
+ }
+
+ if (restore_position_with_seek) {
+ // Restore orignal position using Seek()
+ iter->Seek(seek_key);
+ for (int i = 0; i < next_count; i++) {
+ data_iter++;
+ }
+
+ ASSERT_EQ(iter->key().ToString(), data_iter->first);
+ ASSERT_EQ(iter->value().ToString(), data_iter->second);
+ } else {
+ // Restore original position using Prev()
+ for (int i = 0; i < next_count; i++) {
+ iter->Prev();
+ data_iter++;
+
+ ASSERT_EQ(iter->key().ToString(), data_iter->first);
+ ASSERT_EQ(iter->value().ToString(), data_iter->second);
+ }
+ }
+
+ entries_right++;
+ data_iter++;
+ }
+ ASSERT_EQ(data_iter, true_data.rend());
+
+ delete iter;
+ }
+}
+
+TEST_P(DBIteratorTest, IteratorWithLocalStatistics) {
+ Options options = CurrentOptions();
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ DestroyAndReopen(options);
+
+ Random rnd(301);
+ for (int i = 0; i < 1000; i++) {
+ // Key 10 bytes / Value 10 bytes
+ ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10)));
+ }
+
+ std::atomic<uint64_t> total_next(0);
+ std::atomic<uint64_t> total_next_found(0);
+ std::atomic<uint64_t> total_prev(0);
+ std::atomic<uint64_t> total_prev_found(0);
+ std::atomic<uint64_t> total_bytes(0);
+
+ std::vector<port::Thread> threads;
+ std::function<void()> reader_func_next = [&]() {
+ SetPerfLevel(kEnableCount);
+ get_perf_context()->Reset();
+ Iterator* iter = NewIterator(ReadOptions());
+
+ iter->SeekToFirst();
+ // Seek will bump ITER_BYTES_READ
+ uint64_t bytes = 0;
+ bytes += iter->key().size();
+ bytes += iter->value().size();
+ while (true) {
+ iter->Next();
+ total_next++;
+
+ if (!iter->Valid()) {
+ break;
+ }
+ total_next_found++;
+ bytes += iter->key().size();
+ bytes += iter->value().size();
+ }
+
+ delete iter;
+ ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
+ SetPerfLevel(kDisable);
+ total_bytes += bytes;
+ };
+
+ std::function<void()> reader_func_prev = [&]() {
+ SetPerfLevel(kEnableCount);
+ Iterator* iter = NewIterator(ReadOptions());
+
+ iter->SeekToLast();
+ // Seek will bump ITER_BYTES_READ
+ uint64_t bytes = 0;
+ bytes += iter->key().size();
+ bytes += iter->value().size();
+ while (true) {
+ iter->Prev();
+ total_prev++;
+
+ if (!iter->Valid()) {
+ break;
+ }
+ total_prev_found++;
+ bytes += iter->key().size();
+ bytes += iter->value().size();
+ }
+
+ delete iter;
+ ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
+ SetPerfLevel(kDisable);
+ total_bytes += bytes;
+ };
+
+ for (int i = 0; i < 10; i++) {
+ threads.emplace_back(reader_func_next);
+ }
+ for (int i = 0; i < 15; i++) {
+ threads.emplace_back(reader_func_prev);
+ }
+
+ for (auto& t : threads) {
+ t.join();
+ }
+
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next);
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
+ (uint64_t)total_next_found);
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev);
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
+ (uint64_t)total_prev_found);
+ ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ),
+ (uint64_t)total_bytes);
+}
+
+TEST_P(DBIteratorTest, ReadAhead) {
+ Options options;
+ env_->count_random_reads_ = true;
+ options.env = env_;
+ options.disable_auto_compactions = true;
+ options.write_buffer_size = 4 << 20;
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ BlockBasedTableOptions table_options;
+ table_options.block_size = 1024;
+ table_options.no_block_cache = true;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(options);
+
+ std::string value(1024, 'a');
+ for (int i = 0; i < 100; i++) {
+ ASSERT_OK(Put(Key(i), value));
+ }
+ ASSERT_OK(Flush());
+ MoveFilesToLevel(2);
+
+ for (int i = 0; i < 100; i++) {
+ ASSERT_OK(Put(Key(i), value));
+ }
+ ASSERT_OK(Flush());
+ MoveFilesToLevel(1);
+
+ for (int i = 0; i < 100; i++) {
+ ASSERT_OK(Put(Key(i), value));
+ }
+ ASSERT_OK(Flush());
+#ifndef ROCKSDB_LITE
+ ASSERT_EQ("1,1,1", FilesPerLevel());
+#endif // !ROCKSDB_LITE
+
+ env_->random_read_bytes_counter_ = 0;
+ options.statistics->setTickerCount(NO_FILE_OPENS, 0);
+ ReadOptions read_options;
+ auto* iter = NewIterator(read_options);
+ iter->SeekToFirst();
+ int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
+ size_t bytes_read = env_->random_read_bytes_counter_;
+ delete iter;
+
+ int64_t num_file_closes = TestGetTickerCount(options, NO_FILE_CLOSES);
+ env_->random_read_bytes_counter_ = 0;
+ options.statistics->setTickerCount(NO_FILE_OPENS, 0);
+ read_options.readahead_size = 1024 * 10;
+ iter = NewIterator(read_options);
+ iter->SeekToFirst();
+ int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
+ size_t bytes_read_readahead = env_->random_read_bytes_counter_;
+ delete iter;
+ int64_t num_file_closes_readahead =
+ TestGetTickerCount(options, NO_FILE_CLOSES);
+ ASSERT_EQ(num_file_opens, num_file_opens_readahead);
+ ASSERT_EQ(num_file_closes, num_file_closes_readahead);
+ ASSERT_GT(bytes_read_readahead, bytes_read);
+ ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
+
+ // Verify correctness.
+ iter = NewIterator(read_options);
+ int count = 0;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ ASSERT_EQ(value, iter->value());
+ count++;
+ }
+ ASSERT_EQ(100, count);
+ for (int i = 0; i < 100; i++) {
+ iter->Seek(Key(i));
+ ASSERT_EQ(value, iter->value());
+ }
+ delete iter;
+}
+
+// Insert a key, create a snapshot iterator, overwrite key lots of times,
+// seek to a smaller key. Expect DBIter to fall back to a seek instead of
+// going through all the overwrites linearly.
+TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) {
+ Options options = CurrentOptions();
+ options.env = env_;
+ options.create_if_missing = true;
+ options.max_sequential_skip_in_iterations = 3;
+ options.prefix_extractor = nullptr;
+ options.write_buffer_size = 1 << 27; // big enough to avoid flush
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ DestroyAndReopen(options);
+
+ // Insert.
+ ASSERT_OK(Put("b", "0"));
+
+ // Create iterator.
+ ReadOptions ro;
+ std::unique_ptr<Iterator> iter(NewIterator(ro));
+
+ // Insert a lot.
+ for (int i = 0; i < 100; ++i) {
+ ASSERT_OK(Put("b", std::to_string(i + 1).c_str()));
+ }
+
+#ifndef ROCKSDB_LITE
+ // Check that memtable wasn't flushed.
+ std::string val;
+ ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val));
+ EXPECT_EQ("0", val);
+#endif
+
+ // Seek iterator to a smaller key.
+ get_perf_context()->Reset();
+ iter->Seek("a");
+ ASSERT_TRUE(iter->Valid());
+ EXPECT_EQ("b", iter->key().ToString());
+ EXPECT_EQ("0", iter->value().ToString());
+
+ // Check that the seek didn't do too much work.
+ // Checks are not tight, just make sure that everything is well below 100.
+ EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4);
+ EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8);
+ EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10);
+ EXPECT_LT(get_perf_context()->next_on_memtable_count, 10);
+ EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10);
+
+ // Check that iterator did something like what we expect.
+ EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0);
+ EXPECT_EQ(get_perf_context()->internal_merge_count, 0);
+ EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2);
+ EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2);
+ EXPECT_EQ(1,
+ options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION));
+}
+
+TEST_P(DBIteratorTest, Refresh) {
+ ASSERT_OK(Put("x", "y"));
+
+ std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
+ ASSERT_OK(iter->status());
+ iter->Seek(Slice("a"));
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("x")), 0);
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+
+ ASSERT_OK(Put("c", "d"));
+
+ iter->Seek(Slice("a"));
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("x")), 0);
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+
+ ASSERT_OK(iter->status());
+ ASSERT_OK(iter->Refresh());
+
+ iter->Seek(Slice("a"));
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("c")), 0);
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("x")), 0);
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+
+ ASSERT_OK(Put("m", "n"));
+
+ iter->Seek(Slice("a"));
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("c")), 0);
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("x")), 0);
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+
+ ASSERT_OK(iter->status());
+ ASSERT_OK(iter->Refresh());
+
+ iter->Seek(Slice("a"));
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("c")), 0);
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("m")), 0);
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("x")), 0);
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+
+ iter.reset();
+}
+
+TEST_P(DBIteratorTest, RefreshWithSnapshot) {
+ ASSERT_OK(Put("x", "y"));
+ const Snapshot* snapshot = db_->GetSnapshot();
+ ReadOptions options;
+ options.snapshot = snapshot;
+ Iterator* iter = NewIterator(options);
+ ASSERT_OK(iter->status());
+
+ iter->Seek(Slice("a"));
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("x")), 0);
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+
+ ASSERT_OK(Put("c", "d"));
+
+ iter->Seek(Slice("a"));
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().compare(Slice("x")), 0);
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+
+ ASSERT_OK(iter->status());
+ Status s = iter->Refresh();
+ ASSERT_TRUE(s.IsNotSupported());
+ db_->ReleaseSnapshot(snapshot);
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, CreationFailure) {
+ SyncPoint::GetInstance()->SetCallBack(
+ "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) {
+ *(reinterpret_cast<Status*>(arg)) = Status::Corruption("test status");
+ });
+ SyncPoint::GetInstance()->EnableProcessing();
+
+ Iterator* iter = NewIterator(ReadOptions());
+ ASSERT_FALSE(iter->Valid());
+ ASSERT_TRUE(iter->status().IsCorruption());
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) {
+ Options options = CurrentOptions();
+ options.max_sequential_skip_in_iterations = 3;
+ DestroyAndReopen(options);
+
+ // write a bunch of kvs to the database.
+ ASSERT_OK(Put("a", "1"));
+ ASSERT_OK(Put("y", "1"));
+ ASSERT_OK(Put("y1", "1"));
+ ASSERT_OK(Put("y2", "1"));
+ ASSERT_OK(Put("y3", "1"));
+ ASSERT_OK(Put("z", "1"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("a", "1"));
+ ASSERT_OK(Put("z", "1"));
+ ASSERT_OK(Put("bar", "1"));
+ ASSERT_OK(Put("foo", "1"));
+
+ std::string upper_bound = "x";
+ Slice ub_slice(upper_bound);
+ ReadOptions ro;
+ ro.iterate_upper_bound = &ub_slice;
+ ro.max_skippable_internal_keys = 1000;
+
+ Iterator* iter = NewIterator(ro);
+ iter->Seek("foo");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ("foo", iter->key().ToString());
+
+ iter->Prev();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("bar", iter->key().ToString());
+
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, TableFilter) {
+ ASSERT_OK(Put("a", "1"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("b", "2"));
+ ASSERT_OK(Put("c", "3"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+ ASSERT_OK(Put("d", "4"));
+ ASSERT_OK(Put("e", "5"));
+ ASSERT_OK(Put("f", "6"));
+ EXPECT_OK(dbfull()->Flush(FlushOptions()));
+
+ // Ensure the table_filter callback is called once for each table.
+ {
+ std::set<uint64_t> unseen{1, 2, 3};
+ ReadOptions opts;
+ opts.table_filter = [&](const TableProperties& props) {
+ auto it = unseen.find(props.num_entries);
+ if (it == unseen.end()) {
+ ADD_FAILURE() << "saw table properties with an unexpected "
+ << props.num_entries << " entries";
+ } else {
+ unseen.erase(it);
+ }
+ return true;
+ };
+ auto iter = NewIterator(opts);
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->1");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "b->2");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "c->3");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "d->4");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "e->5");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "f->6");
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+ ASSERT_TRUE(unseen.empty());
+ delete iter;
+ }
+
+ // Ensure returning false in the table_filter hides the keys from that table
+ // during iteration.
+ {
+ ReadOptions opts;
+ opts.table_filter = [](const TableProperties& props) {
+ return props.num_entries != 2;
+ };
+ auto iter = NewIterator(opts);
+ iter->SeekToFirst();
+ ASSERT_EQ(IterStatus(iter), "a->1");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "d->4");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "e->5");
+ iter->Next();
+ ASSERT_EQ(IterStatus(iter), "f->6");
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+ delete iter;
+ }
+}
+
+TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) {
+ Options options = CurrentOptions();
+ options.max_sequential_skip_in_iterations = 3;
+ DestroyAndReopen(options);
+
+ // write a bunch of kvs to the database.
+ ASSERT_OK(Put("a", "1"));
+ ASSERT_OK(Put("y", "1"));
+ ASSERT_OK(Put("z", "1"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("a", "1"));
+ ASSERT_OK(Put("z", "1"));
+ ASSERT_OK(Put("bar", "1"));
+ ASSERT_OK(Put("foo", "1"));
+ ASSERT_OK(Put("foo", "2"));
+
+ ASSERT_OK(Put("foo", "3"));
+ ASSERT_OK(Put("foo", "4"));
+ ASSERT_OK(Put("foo", "5"));
+ const Snapshot* snapshot = db_->GetSnapshot();
+ ASSERT_OK(Put("foo", "6"));
+
+ std::string upper_bound = "x";
+ Slice ub_slice(upper_bound);
+ ReadOptions ro;
+ ro.snapshot = snapshot;
+ ro.iterate_upper_bound = &ub_slice;
+
+ Iterator* iter = NewIterator(ro);
+ iter->SeekForPrev("goo");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ("foo", iter->key().ToString());
+ iter->Prev();
+
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ("bar", iter->key().ToString());
+
+ delete iter;
+ db_->ReleaseSnapshot(snapshot);
+}
+
+TEST_P(DBIteratorTest, SkipStatistics) {
+ Options options = CurrentOptions();
+ options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
+ DestroyAndReopen(options);
+
+ int skip_count = 0;
+
+ // write a bunch of kvs to the database.
+ ASSERT_OK(Put("a", "1"));
+ ASSERT_OK(Put("b", "1"));
+ ASSERT_OK(Put("c", "1"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("d", "1"));
+ ASSERT_OK(Put("e", "1"));
+ ASSERT_OK(Put("f", "1"));
+ ASSERT_OK(Put("a", "2"));
+ ASSERT_OK(Put("b", "2"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Delete("d"));
+ ASSERT_OK(Delete("e"));
+ ASSERT_OK(Delete("f"));
+
+ Iterator* iter = NewIterator(ReadOptions());
+ int count = 0;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ ASSERT_OK(iter->status());
+ count++;
+ }
+ ASSERT_EQ(count, 3);
+ delete iter;
+ skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
+ ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
+
+ iter = NewIterator(ReadOptions());
+ count = 0;
+ for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
+ ASSERT_OK(iter->status());
+ count++;
+ }
+ ASSERT_EQ(count, 3);
+ delete iter;
+ skip_count += 8; // Same as above, but in reverse order
+ ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
+
+ ASSERT_OK(Put("aa", "1"));
+ ASSERT_OK(Put("ab", "1"));
+ ASSERT_OK(Put("ac", "1"));
+ ASSERT_OK(Put("ad", "1"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Delete("ab"));
+ ASSERT_OK(Delete("ac"));
+ ASSERT_OK(Delete("ad"));
+
+ ReadOptions ro;
+ Slice prefix("b");
+ ro.iterate_upper_bound = &prefix;
+
+ iter = NewIterator(ro);
+ count = 0;
+ for (iter->Seek("aa"); iter->Valid(); iter->Next()) {
+ ASSERT_OK(iter->status());
+ count++;
+ }
+ ASSERT_EQ(count, 1);
+ delete iter;
+ skip_count += 6; // 3 deletes + 3 original keys
+ ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
+
+ iter = NewIterator(ro);
+ count = 0;
+ for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
+ ASSERT_OK(iter->status());
+ count++;
+ }
+ ASSERT_EQ(count, 2);
+ delete iter;
+ // 3 deletes + 3 original keys + lower sequence of "a"
+ skip_count += 7;
+ ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
+}
+
+TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) {
+ Options options = CurrentOptions();
+ DestroyAndReopen(options);
+ ReadOptions ropts;
+ ropts.max_skippable_internal_keys = 2;
+
+ ASSERT_OK(Put("1", "val_1"));
+ // Add more tombstones than max_skippable_internal_keys so that Next() fails.
+ ASSERT_OK(Delete("2"));
+ ASSERT_OK(Delete("3"));
+ ASSERT_OK(Delete("4"));
+ ASSERT_OK(Delete("5"));
+ ASSERT_OK(Put("6", "val_6"));
+
+ std::unique_ptr<Iterator> iter(NewIterator(ropts));
+ iter->SeekToFirst();
+
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(iter->key().ToString(), "1");
+ ASSERT_EQ(iter->value().ToString(), "val_1");
+
+ // This should fail as incomplete due to too many non-visible internal keys on
+ // the way to the next valid user key.
+ iter->Next();
+ ASSERT_TRUE(!iter->Valid());
+ ASSERT_TRUE(iter->status().IsIncomplete());
+
+ // Get the internal key at which Next() failed.
+ std::string prop_value;
+ ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
+ ASSERT_EQ("4", prop_value);
+
+ // Create a new iterator to seek to the internal key.
+ std::unique_ptr<Iterator> iter2(NewIterator(ropts));
+ iter2->Seek(prop_value);
+ ASSERT_TRUE(iter2->Valid());
+ ASSERT_OK(iter2->status());
+
+ ASSERT_EQ(iter2->key().ToString(), "6");
+ ASSERT_EQ(iter2->value().ToString(), "val_6");
+}
+
+// Reproduces a former bug where iterator would skip some records when DBIter
+// re-seeks subiterator with Incomplete status.
+TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) {
+ Options options = CurrentOptions();
+ BlockBasedTableOptions table_options;
+ // Make sure the sst file has more than one block.
+ table_options.flush_block_policy_factory =
+ std::make_shared<FlushBlockEveryKeyPolicyFactory>();
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ DestroyAndReopen(options);
+
+ // Two records in sst file, each in its own block.
+ ASSERT_OK(Put("b", ""));
+ ASSERT_OK(Put("d", ""));
+ ASSERT_OK(Flush());
+
+ // Create a nonblocking iterator before writing to memtable.
+ ReadOptions ropt;
+ ropt.read_tier = kBlockCacheTier;
+ std::unique_ptr<Iterator> iter(NewIterator(ropt));
+
+ // Overwrite a key in memtable many times to hit
+ // max_sequential_skip_in_iterations (which is 8 by default).
+ for (int i = 0; i < 20; ++i) {
+ ASSERT_OK(Put("c", ""));
+ }
+
+ // Load the second block in sst file into the block cache.
+ {
+ std::unique_ptr<Iterator> iter2(NewIterator(ReadOptions()));
+ iter2->Seek("d");
+ }
+
+ // Finally seek the nonblocking iterator.
+ iter->Seek("a");
+ // With the bug, the status used to be OK, and the iterator used to point to
+ // "d".
+ EXPECT_TRUE(iter->status().IsIncomplete());
+}
+
+TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
+ ASSERT_OK(Put("a", ""));
+ ASSERT_OK(Put("b", ""));
+ ASSERT_OK(Flush());
+
+ ReadOptions ropt;
+ Slice ub = "b";
+ ropt.iterate_upper_bound = &ub;
+
+ std::unique_ptr<Iterator> it(dbfull()->NewIterator(ropt));
+ it->SeekForPrev("a");
+ ASSERT_TRUE(it->Valid());
+ ASSERT_OK(it->status());
+ ASSERT_EQ("a", it->key().ToString());
+ it->Next();
+ ASSERT_FALSE(it->Valid());
+ ASSERT_OK(it->status());
+ it->SeekForPrev("a");
+ ASSERT_OK(it->status());
+
+ ASSERT_TRUE(it->Valid());
+ ASSERT_EQ("a", it->key().ToString());
+}
+
+TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
+ Options options = CurrentOptions();
+ options.compression = CompressionType::kNoCompression;
+ BlockBasedTableOptions table_options;
+ table_options.block_size = 800;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ Reopen(options);
+
+ Random rnd(301);
+ std::string random_str = rnd.RandomString(180);
+
+ ASSERT_OK(Put("1", random_str));
+ ASSERT_OK(Put("2", random_str));
+ ASSERT_OK(Put("3", random_str));
+ ASSERT_OK(Put("4", random_str));
+ // A new block
+ ASSERT_OK(Put("5", random_str));
+ ASSERT_OK(Put("6", random_str));
+ ASSERT_OK(Put("7", random_str));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("8", random_str));
+ ASSERT_OK(Put("9", random_str));
+ ASSERT_OK(Flush());
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+
+ int num_find_file_in_level = 0;
+ int num_idx_blk_seek = 0;
+ SyncPoint::GetInstance()->SetCallBack(
+ "LevelIterator::Seek:BeforeFindFile",
+ [&](void* /*arg*/) { num_find_file_in_level++; });
+ SyncPoint::GetInstance()->SetCallBack(
+ "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; });
+ SyncPoint::GetInstance()->EnableProcessing();
+
+ {
+ std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
+ iter->Seek("1");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(1, num_find_file_in_level);
+ ASSERT_EQ(1, num_idx_blk_seek);
+
+ iter->Seek("2");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(1, num_find_file_in_level);
+ ASSERT_EQ(1, num_idx_blk_seek);
+
+ iter->Seek("3");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(1, num_find_file_in_level);
+ ASSERT_EQ(1, num_idx_blk_seek);
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(1, num_find_file_in_level);
+ ASSERT_EQ(1, num_idx_blk_seek);
+
+ iter->Seek("5");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(1, num_find_file_in_level);
+ ASSERT_EQ(2, num_idx_blk_seek);
+
+ iter->Seek("6");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(1, num_find_file_in_level);
+ ASSERT_EQ(2, num_idx_blk_seek);
+
+ iter->Seek("7");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(1, num_find_file_in_level);
+ ASSERT_EQ(3, num_idx_blk_seek);
+
+ iter->Seek("8");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(2, num_find_file_in_level);
+ // Still re-seek because "8" is the boundary key, which has
+ // the same user key as the seek key.
+ ASSERT_EQ(4, num_idx_blk_seek);
+
+ iter->Seek("5");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(3, num_find_file_in_level);
+ ASSERT_EQ(5, num_idx_blk_seek);
+
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(3, num_find_file_in_level);
+ ASSERT_EQ(5, num_idx_blk_seek);
+
+ // Seek backward never triggers the index block seek to be skipped
+ iter->Seek("5");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_EQ(3, num_find_file_in_level);
+ ASSERT_EQ(6, num_idx_blk_seek);
+ }
+
+ SyncPoint::GetInstance()->DisableProcessing();
+}
+
+// MyRocks may change iterate bounds before seek. Simply test to make sure such
+// usage doesn't break iterator.
+TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) {
+ Options options = CurrentOptions();
+ options.compression = CompressionType::kNoCompression;
+ BlockBasedTableOptions table_options;
+ table_options.block_size = 100;
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+ std::string value(50, 'v');
+ Reopen(options);
+ ASSERT_OK(Put("aaa", value));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("bbb", "v"));
+ ASSERT_OK(Put("ccc", "v"));
+ ASSERT_OK(Put("ddd", "v"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("eee", "v"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+
+ std::string ub1 = "e";
+ std::string ub2 = "c";
+ Slice ub(ub1);
+ ReadOptions read_opts1;
+ read_opts1.iterate_upper_bound = &ub;
+ Iterator* iter = NewIterator(read_opts1);
+ // Seek and iterate accross block boundary.
+ iter->Seek("b");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("bbb", iter->key());
+ ub = Slice(ub2);
+ iter->Seek("b");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("bbb", iter->key());
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+ ASSERT_OK(iter->status());
+ delete iter;
+
+ std::string lb1 = "a";
+ std::string lb2 = "c";
+ Slice lb(lb1);
+ ReadOptions read_opts2;
+ read_opts2.iterate_lower_bound = &lb;
+ iter = NewIterator(read_opts2);
+ iter->SeekForPrev("d");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("ccc", iter->key());
+ lb = Slice(lb2);
+ iter->SeekForPrev("d");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("ccc", iter->key());
+ iter->Prev();
+ ASSERT_FALSE(iter->Valid());
+ ASSERT_OK(iter->status());
+ delete iter;
+}
+
+TEST_P(DBIteratorTest, IterateWithLowerBoundAcrossFileBoundary) {
+ ASSERT_OK(Put("aaa", "v"));
+ ASSERT_OK(Put("bbb", "v"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("ccc", "v"));
+ ASSERT_OK(Put("ddd", "v"));
+ ASSERT_OK(Flush());
+ // Move both files to bottom level.
+ ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+ Slice lower_bound("b");
+ ReadOptions read_opts;
+ read_opts.iterate_lower_bound = &lower_bound;
+ std::unique_ptr<Iterator> iter(NewIterator(read_opts));
+ iter->SeekForPrev("d");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("ccc", iter->key());
+ iter->Prev();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("bbb", iter->key());
+ iter->Prev();
+ ASSERT_FALSE(iter->Valid());
+ ASSERT_OK(iter->status());
+}
+
+TEST_P(DBIteratorTest, Blob) {
+ Options options = CurrentOptions();
+ options.enable_blob_files = true;
+ options.max_sequential_skip_in_iterations = 2;
+ options.statistics = CreateDBStatistics();
+
+ Reopen(options);
+
+ // Note: we have 4 KVs (3 of which are hidden) for key "b" and
+ // max_sequential_skip_in_iterations is set to 2. Thus, we need to do a reseek
+ // anytime we move from "b" to "c" or vice versa.
+ ASSERT_OK(Put("a", "va"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("b", "vb0"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("b", "vb1"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("b", "vb2"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("b", "vb3"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("c", "vc"));
+ ASSERT_OK(Flush());
+
+ std::unique_ptr<Iterator> iter_guard(NewIterator(ReadOptions()));
+ Iterator* const iter = iter_guard.get();
+
+ iter->SeekToFirst();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
+ ASSERT_EQ(IterStatus(iter), "b->vb3");
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekToFirst();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->SeekToLast();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Prev();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "b->vb3");
+ iter->Prev();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Prev();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekToLast();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ iter->Seek("");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Seek("a");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "a->va");
+ iter->Seek("ax");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "b->vb3");
+
+ iter->SeekForPrev("d");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->SeekForPrev("c");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
+ ASSERT_EQ(IterStatus(iter), "c->vc");
+ iter->SeekForPrev("bx");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3);
+ ASSERT_EQ(IterStatus(iter), "b->vb3");
+
+ iter->Seek("b");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3);
+ ASSERT_EQ(IterStatus(iter), "b->vb3");
+ iter->Seek("z");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3);
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+ iter->SeekForPrev("b");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4);
+ ASSERT_EQ(IterStatus(iter), "b->vb3");
+ iter->SeekForPrev("");
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4);
+ ASSERT_EQ(IterStatus(iter), "(invalid)");
+
+ // Switch from reverse to forward
+ iter->SeekToLast();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4);
+ iter->Prev();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 5);
+ iter->Prev();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 5);
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6);
+ ASSERT_EQ(IterStatus(iter), "b->vb3");
+
+ // Switch from forward to reverse
+ iter->SeekToFirst();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6);
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6);
+ iter->Next();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 7);
+ iter->Prev();
+ ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 8);
+ ASSERT_EQ(IterStatus(iter), "b->vb3");
+}
+
+INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
+ testing::Values(true, false));
+
+// Tests how DBIter work with ReadCallback
+class DBIteratorWithReadCallbackTest : public DBIteratorTest {};
+
+TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) {
+ class TestReadCallback : public ReadCallback {
+ public:
+ explicit TestReadCallback(SequenceNumber _max_visible_seq)
+ : ReadCallback(_max_visible_seq) {}
+
+ bool IsVisibleFullCheck(SequenceNumber seq) override {
+ return seq <= max_visible_seq_;
+ }
+ };
+
+ ASSERT_OK(Put("foo", "v1"));
+ ASSERT_OK(Put("foo", "v2"));
+ ASSERT_OK(Put("foo", "v3"));
+ ASSERT_OK(Put("a", "va"));
+ ASSERT_OK(Put("z", "vz"));
+ SequenceNumber seq1 = db_->GetLatestSequenceNumber();
+ TestReadCallback callback1(seq1);
+ ASSERT_OK(Put("foo", "v4"));
+ ASSERT_OK(Put("foo", "v5"));
+ ASSERT_OK(Put("bar", "v7"));
+
+ SequenceNumber seq2 = db_->GetLatestSequenceNumber();
+ auto* cfd =
+ static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
+ ->cfd();
+ // The iterator are suppose to see data before seq1.
+ Iterator* iter =
+ dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1);
+
+ // Seek
+ // The latest value of "foo" before seq1 is "v3"
+ iter->Seek("foo");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("foo", iter->key());
+ ASSERT_EQ("v3", iter->value());
+ // "bar" is not visible to the iterator. It will move on to the next key
+ // "foo".
+ iter->Seek("bar");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("foo", iter->key());
+ ASSERT_EQ("v3", iter->value());
+
+ // Next
+ // Seek to "a"
+ iter->Seek("a");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("va", iter->value());
+ // "bar" is not visible to the iterator. It will move on to the next key
+ // "foo".
+ iter->Next();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("foo", iter->key());
+ ASSERT_EQ("v3", iter->value());
+
+ // Prev
+ // Seek to "z"
+ iter->Seek("z");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("vz", iter->value());
+ // The previous key is "foo", which is visible to the iterator.
+ iter->Prev();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("foo", iter->key());
+ ASSERT_EQ("v3", iter->value());
+ // "bar" is not visible to the iterator. It will move on to the next key "a".
+ iter->Prev(); // skipping "bar"
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("a", iter->key());
+ ASSERT_EQ("va", iter->value());
+
+ // SeekForPrev
+ // The previous key is "foo", which is visible to the iterator.
+ iter->SeekForPrev("y");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("foo", iter->key());
+ ASSERT_EQ("v3", iter->value());
+ // "bar" is not visible to the iterator. It will move on to the next key "a".
+ iter->SeekForPrev("bar");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("a", iter->key());
+ ASSERT_EQ("va", iter->value());
+
+ delete iter;
+
+ // Prev beyond max_sequential_skip_in_iterations
+ uint64_t num_versions =
+ CurrentOptions().max_sequential_skip_in_iterations + 10;
+ for (uint64_t i = 0; i < num_versions; i++) {
+ ASSERT_OK(Put("bar", std::to_string(i)));
+ }
+ SequenceNumber seq3 = db_->GetLatestSequenceNumber();
+ TestReadCallback callback2(seq3);
+ ASSERT_OK(Put("bar", "v8"));
+ SequenceNumber seq4 = db_->GetLatestSequenceNumber();
+
+ // The iterator is suppose to see data before seq3.
+ iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2);
+ // Seek to "z", which is visible.
+ iter->Seek("z");
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("vz", iter->value());
+ // Previous key is "foo" and the last value "v5" is visible.
+ iter->Prev();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("foo", iter->key());
+ ASSERT_EQ("v5", iter->value());
+ // Since the number of values of "bar" is more than
+ // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
+ // seek in forward direction. Here we test the fallback seek is correct.
+ // The last visible value should be (num_versions - 1), as "v8" is not
+ // visible.
+ iter->Prev();
+ ASSERT_TRUE(iter->Valid());
+ ASSERT_OK(iter->status());
+ ASSERT_EQ("bar", iter->key());
+ ASSERT_EQ(std::to_string(num_versions - 1), iter->value());
+
+ delete iter;
+}
+
+TEST_F(DBIteratorTest, BackwardIterationOnInplaceUpdateMemtable) {
+ Options options = CurrentOptions();
+ options.create_if_missing = true;
+ options.inplace_update_support = false;
+ options.env = env_;
+ DestroyAndReopen(options);
+ constexpr int kNumKeys = 10;
+
+ // Write kNumKeys to WAL.
+ for (int i = 0; i < kNumKeys; ++i) {
+ ASSERT_OK(Put(Key(i), "val"));
+ }
+ ReadOptions read_opts;
+ read_opts.total_order_seek = true;
+ {
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
+ int count = 0;
+ for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
+ ++count;
+ }
+ ASSERT_EQ(kNumKeys, count);
+ }
+
+ // Reopen and rebuild the memtable from WAL.
+ options.create_if_missing = false;
+ options.avoid_flush_during_recovery = true;
+ options.inplace_update_support = true;
+ options.allow_concurrent_memtable_write = false;
+ Reopen(options);
+ {
+ std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
+ iter->SeekToLast();
+ // Backward iteration not supported due to inplace_update_support = true.
+ ASSERT_TRUE(iter->status().IsNotSupported());
+ ASSERT_FALSE(iter->Valid());
+ }
+}
+
+TEST_F(DBIteratorTest, IteratorRefreshReturnSV) {
+ Options options = CurrentOptions();
+ options.disable_auto_compactions = true;
+ DestroyAndReopen(options);
+ ASSERT_OK(
+ db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
+ std::unique_ptr<Iterator> iter{db_->NewIterator(ReadOptions())};
+ SyncPoint::GetInstance()->SetCallBack(
+ "ArenaWrappedDBIter::Refresh:SV", [&](void*) {
+ ASSERT_OK(db_->Put(WriteOptions(), "dummy", "new SV"));
+ // This makes the local SV obselete.
+ ASSERT_OK(Flush());
+ SyncPoint::GetInstance()->DisableProcessing();
+ });
+ SyncPoint::GetInstance()->EnableProcessing();
+ ASSERT_OK(iter->Refresh());
+ iter.reset();
+ // iter used to not cleanup SV, so the Close() below would hit an assertion
+ // error.
+ Close();
+}
+
+} // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+ ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}