diff options
Diffstat (limited to 'src/rocksdb/db/db_write_test.cc')
-rw-r--r-- | src/rocksdb/db/db_write_test.cc | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/src/rocksdb/db/db_write_test.cc b/src/rocksdb/db/db_write_test.cc new file mode 100644 index 000000000..cc1aaac08 --- /dev/null +++ b/src/rocksdb/db/db_write_test.cc @@ -0,0 +1,329 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include <atomic> +#include <memory> +#include <thread> +#include <vector> +#include <fstream> +#include "db/db_test_util.h" +#include "db/write_batch_internal.h" +#include "db/write_thread.h" +#include "port/port.h" +#include "port/stack_trace.h" +#include "test_util/fault_injection_test_env.h" +#include "test_util/sync_point.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { + +// Test variations of WriteImpl. +class DBWriteTest : public DBTestBase, public testing::WithParamInterface<int> { + public: + DBWriteTest() : DBTestBase("/db_write_test") {} + + Options GetOptions() { return DBTestBase::GetOptions(GetParam()); } + + void Open() { DBTestBase::Reopen(GetOptions()); } +}; + +// It is invalid to do sync write while disabling WAL. +TEST_P(DBWriteTest, SyncAndDisableWAL) { + WriteOptions write_options; + write_options.sync = true; + write_options.disableWAL = true; + ASSERT_TRUE(dbfull()->Put(write_options, "foo", "bar").IsInvalidArgument()); + WriteBatch batch; + ASSERT_OK(batch.Put("foo", "bar")); + ASSERT_TRUE(dbfull()->Write(write_options, &batch).IsInvalidArgument()); +} + +TEST_P(DBWriteTest, WriteThreadHangOnWriteStall) { + Options options = GetOptions(); + options.level0_stop_writes_trigger = options.level0_slowdown_writes_trigger = 4; + std::vector<port::Thread> threads; + std::atomic<int> thread_num(0); + port::Mutex mutex; + port::CondVar cv(&mutex); + + Reopen(options); + + std::function<void()> write_slowdown_func = [&]() { + int a = thread_num.fetch_add(1); + std::string key = "foo" + std::to_string(a); + WriteOptions wo; + wo.no_slowdown = false; + dbfull()->Put(wo, key, "bar"); + }; + std::function<void()> write_no_slowdown_func = [&]() { + int a = thread_num.fetch_add(1); + std::string key = "foo" + std::to_string(a); + WriteOptions wo; + wo.no_slowdown = true; + dbfull()->Put(wo, key, "bar"); + }; + std::function<void(void *)> unblock_main_thread_func = [&](void *) { + mutex.Lock(); + cv.SignalAll(); + mutex.Unlock(); + }; + + // Create 3 L0 files and schedule 4th without waiting + Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"); + Flush(); + Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"); + Flush(); + Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"); + Flush(); + Put("foo" + std::to_string(thread_num.fetch_add(1)), "bar"); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "WriteThread::JoinBatchGroup:Start", unblock_main_thread_func); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBWriteTest::WriteThreadHangOnWriteStall:1", + "DBImpl::BackgroundCallFlush:start"}, + {"DBWriteTest::WriteThreadHangOnWriteStall:2", + "DBImpl::WriteImpl:BeforeLeaderEnters"}, + // Make compaction start wait for the write stall to be detected and + // implemented by a write group leader + {"DBWriteTest::WriteThreadHangOnWriteStall:3", + "BackgroundCallCompaction:0"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + // Schedule creation of 4th L0 file without waiting. This will seal the + // memtable and then wait for a sync point before writing the file. We need + // to do it this way because SwitchMemtable() needs to enter the + // write_thread + FlushOptions fopt; + fopt.wait = false; + dbfull()->Flush(fopt); + + // Create a mix of slowdown/no_slowdown write threads + mutex.Lock(); + // First leader + threads.emplace_back(write_slowdown_func); + cv.Wait(); + // Second leader. Will stall writes + threads.emplace_back(write_slowdown_func); + cv.Wait(); + threads.emplace_back(write_no_slowdown_func); + cv.Wait(); + threads.emplace_back(write_slowdown_func); + cv.Wait(); + threads.emplace_back(write_no_slowdown_func); + cv.Wait(); + threads.emplace_back(write_slowdown_func); + cv.Wait(); + mutex.Unlock(); + + TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:1"); + dbfull()->TEST_WaitForFlushMemTable(nullptr); + // This would have triggered a write stall. Unblock the write group leader + TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:2"); + // The leader is going to create missing newer links. When the leader finishes, + // the next leader is going to delay writes and fail writers with no_slowdown + + TEST_SYNC_POINT("DBWriteTest::WriteThreadHangOnWriteStall:3"); + for (auto& t : threads) { + t.join(); + } +} + +TEST_P(DBWriteTest, IOErrorOnWALWritePropagateToWriteThreadFollower) { + constexpr int kNumThreads = 5; + std::unique_ptr<FaultInjectionTestEnv> mock_env( + new FaultInjectionTestEnv(Env::Default())); + Options options = GetOptions(); + options.env = mock_env.get(); + Reopen(options); + std::atomic<int> ready_count{0}; + std::atomic<int> leader_count{0}; + std::vector<port::Thread> threads; + mock_env->SetFilesystemActive(false); + + // Wait until all threads linked to write threads, to make sure + // all threads join the same batch group. + SyncPoint::GetInstance()->SetCallBack( + "WriteThread::JoinBatchGroup:Wait", [&](void* arg) { + ready_count++; + auto* w = reinterpret_cast<WriteThread::Writer*>(arg); + if (w->state == WriteThread::STATE_GROUP_LEADER) { + leader_count++; + while (ready_count < kNumThreads) { + // busy waiting + } + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + for (int i = 0; i < kNumThreads; i++) { + threads.push_back(port::Thread( + [&](int index) { + // All threads should fail. + auto res = Put("key" + ToString(index), "value"); + if (options.manual_wal_flush) { + ASSERT_TRUE(res.ok()); + // we should see fs error when we do the flush + + // TSAN reports a false alarm for lock-order-inversion but Open and + // FlushWAL are not run concurrently. Disabling this until TSAN is + // fixed. + // res = dbfull()->FlushWAL(false); + // ASSERT_FALSE(res.ok()); + } else { + ASSERT_FALSE(res.ok()); + } + }, + i)); + } + for (int i = 0; i < kNumThreads; i++) { + threads[i].join(); + } + ASSERT_EQ(1, leader_count); + // Close before mock_env destruct. + Close(); +} + +TEST_P(DBWriteTest, ManualWalFlushInEffect) { + Options options = GetOptions(); + Reopen(options); + // try the 1st WAL created during open + ASSERT_TRUE(Put("key" + ToString(0), "value").ok()); + ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); + ASSERT_TRUE(dbfull()->FlushWAL(false).ok()); + ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty()); + // try the 2nd wal created during SwitchWAL + dbfull()->TEST_SwitchWAL(); + ASSERT_TRUE(Put("key" + ToString(0), "value").ok()); + ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); + ASSERT_TRUE(dbfull()->FlushWAL(false).ok()); + ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty()); +} + +TEST_P(DBWriteTest, IOErrorOnWALWriteTriggersReadOnlyMode) { + std::unique_ptr<FaultInjectionTestEnv> mock_env( + new FaultInjectionTestEnv(Env::Default())); + Options options = GetOptions(); + options.env = mock_env.get(); + Reopen(options); + for (int i = 0; i < 2; i++) { + // Forcibly fail WAL write for the first Put only. Subsequent Puts should + // fail due to read-only mode + mock_env->SetFilesystemActive(i != 0); + auto res = Put("key" + ToString(i), "value"); + // TSAN reports a false alarm for lock-order-inversion but Open and + // FlushWAL are not run concurrently. Disabling this until TSAN is + // fixed. + /* + if (options.manual_wal_flush && i == 0) { + // even with manual_wal_flush the 2nd Put should return error because of + // the read-only mode + ASSERT_TRUE(res.ok()); + // we should see fs error when we do the flush + res = dbfull()->FlushWAL(false); + } + */ + if (!options.manual_wal_flush) { + ASSERT_FALSE(res.ok()); + } + } + // Close before mock_env destruct. + Close(); +} + +TEST_P(DBWriteTest, IOErrorOnSwitchMemtable) { + Random rnd(301); + std::unique_ptr<FaultInjectionTestEnv> mock_env( + new FaultInjectionTestEnv(Env::Default())); + Options options = GetOptions(); + options.env = mock_env.get(); + options.writable_file_max_buffer_size = 4 * 1024 * 1024; + options.write_buffer_size = 3 * 512 * 1024; + options.wal_bytes_per_sync = 256 * 1024; + options.manual_wal_flush = true; + Reopen(options); + mock_env->SetFilesystemActive(false, Status::IOError("Not active")); + Status s; + for (int i = 0; i < 4 * 512; ++i) { + s = Put(Key(i), RandomString(&rnd, 1024)); + if (!s.ok()) { + break; + } + } + ASSERT_EQ(s.severity(), Status::Severity::kFatalError); + + mock_env->SetFilesystemActive(true); + // Close before mock_env destruct. + Close(); +} + +// Test that db->LockWAL() flushes the WAL after locking. +TEST_P(DBWriteTest, LockWalInEffect) { + Options options = GetOptions(); + Reopen(options); + // try the 1st WAL created during open + ASSERT_OK(Put("key" + ToString(0), "value")); + ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); + ASSERT_OK(dbfull()->LockWAL()); + ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false)); + ASSERT_OK(dbfull()->UnlockWAL()); + // try the 2nd wal created during SwitchWAL + dbfull()->TEST_SwitchWAL(); + ASSERT_OK(Put("key" + ToString(0), "value")); + ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty()); + ASSERT_OK(dbfull()->LockWAL()); + ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false)); + ASSERT_OK(dbfull()->UnlockWAL()); +} + +TEST_P(DBWriteTest, ConcurrentlyDisabledWAL) { + Options options = GetOptions(); + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kAll); + Reopen(options); + std::string wal_key_prefix = "WAL_KEY_"; + std::string no_wal_key_prefix = "K_"; + // 100 KB value each for NO-WAL operation + std::string no_wal_value(1024 * 100, 'X'); + // 1B value each for WAL operation + std::string wal_value = "0"; + std::thread threads[10]; + for (int t = 0; t < 10; t++) { + threads[t] = std::thread([t, wal_key_prefix, wal_value, no_wal_key_prefix, no_wal_value, this] { + for(int i = 0; i < 10; i++) { + ROCKSDB_NAMESPACE::WriteOptions write_option_disable; + write_option_disable.disableWAL = true; + ROCKSDB_NAMESPACE::WriteOptions write_option_default; + std::string no_wal_key = no_wal_key_prefix + std::to_string(t) + + "_" + std::to_string(i); + this->Put(no_wal_key, no_wal_value, write_option_disable); + std::string wal_key = + wal_key_prefix + std::to_string(i) + "_" + std::to_string(i); + this->Put(wal_key, wal_value, write_option_default); + dbfull()->SyncWAL(); + } + return 0; + }); + } + for (auto& t: threads) { + t.join(); + } + uint64_t bytes_num = options.statistics->getTickerCount( + ROCKSDB_NAMESPACE::Tickers::WAL_FILE_BYTES); + // written WAL size should less than 100KB (even included HEADER & FOOTER overhead) + ASSERT_LE(bytes_num, 1024 * 100); +} + +INSTANTIATE_TEST_CASE_P(DBWriteTestInstance, DBWriteTest, + testing::Values(DBTestBase::kDefault, + DBTestBase::kConcurrentWALWrites, + DBTestBase::kPipelinedWrite)); + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} |