From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/test/osd/test_scrub_sched.cc | 402 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 402 insertions(+) create mode 100644 src/test/osd/test_scrub_sched.cc (limited to 'src/test/osd/test_scrub_sched.cc') diff --git a/src/test/osd/test_scrub_sched.cc b/src/test/osd/test_scrub_sched.cc new file mode 100644 index 000000000..efd7ba324 --- /dev/null +++ b/src/test/osd/test_scrub_sched.cc @@ -0,0 +1,402 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +/// \file testing the scrub scheduling algorithm + +#include + +#include +#include + +#include "common/async/context_pool.h" +#include "common/ceph_argparse.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "include/utime_fmt.h" +#include "mon/MonClient.h" +#include "msg/Messenger.h" +#include "os/ObjectStore.h" +#include "osd/PG.h" +#include "osd/osd_types.h" +#include "osd/osd_types_fmt.h" +#include "osd/scrubber/osd_scrub_sched.h" +#include "osd/scrubber_common.h" + +int main(int argc, char** argv) +{ + std::map defaults = { + // make sure we have 3 copies, or some tests won't work + {"osd_pool_default_size", "3"}, + // our map is flat, so just try and split across OSDs, not hosts or whatever + {"osd_crush_chooseleaf_type", "0"}, + }; + std::vector args(argv, argv + argc); + auto cct = global_init(&defaults, + args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +using schedule_result_t = Scrub::schedule_result_t; +using ScrubJobRef = ScrubQueue::ScrubJobRef; +using qu_state_t = ScrubQueue::qu_state_t; + +/// enabling access into ScrubQueue internals +class ScrubSchedTestWrapper : public ScrubQueue { + public: + ScrubSchedTestWrapper(Scrub::ScrubSchedListener& osds) + : ScrubQueue(g_ceph_context, osds) + {} + + void rm_unregistered_jobs() + { + ScrubQueue::rm_unregistered_jobs(to_scrub); + ScrubQueue::rm_unregistered_jobs(penalized); + } + + ScrubQContainer collect_ripe_jobs() + { + return ScrubQueue::collect_ripe_jobs(to_scrub, time_now()); + } + + /** + * unit-test support for faking the current time. When + * not activated specifically - the default is to use ceph_clock_now() + */ + void set_time_for_testing(long faked_now) + { + m_time_for_testing = utime_t{timeval{faked_now}}; + } + void clear_time_for_testing() { m_time_for_testing.reset(); } + mutable std::optional m_time_for_testing; + + utime_t time_now() const final + { + if (m_time_for_testing) { + m_time_for_testing->tv.tv_nsec += 1'000'000; + } + return m_time_for_testing.value_or(ceph_clock_now()); + } + + ~ScrubSchedTestWrapper() override = default; +}; + + +/** + * providing the small number of OSD services used when scheduling + * a scrub + */ +class FakeOsd : public Scrub::ScrubSchedListener { + public: + FakeOsd(int osd_num) : m_osd_num(osd_num) {} + + int get_nodeid() const final { return m_osd_num; } + + schedule_result_t initiate_a_scrub(spg_t pgid, + bool allow_requested_repair_only) final + { + std::ignore = allow_requested_repair_only; + auto res = m_next_response.find(pgid); + if (res == m_next_response.end()) { + return schedule_result_t::no_such_pg; + } + return m_next_response[pgid]; + } + + void set_initiation_response(spg_t pgid, schedule_result_t result) + { + m_next_response[pgid] = result; + } + + private: + int m_osd_num; + std::map m_next_response; +}; + + +/// the static blueprint for creating a scrub job in the scrub queue +struct sjob_config_t { + spg_t spg; + bool are_stats_valid; + + utime_t history_scrub_stamp; + std::optional pool_conf_min; + std::optional pool_conf_max; + bool is_must; + bool is_need_auto; + ScrubQueue::scrub_schedule_t initial_schedule; +}; + + +/** + * the runtime configuration for a scrub job. Created basde on the blueprint + * above (sjob_config_t) + */ +struct sjob_dynamic_data_t { + sjob_config_t initial_config; + pg_info_t mocked_pg_info; + pool_opts_t mocked_pool_opts; + requested_scrub_t request_flags; + ScrubQueue::ScrubJobRef job; +}; + +class TestScrubSched : public ::testing::Test { + public: + TestScrubSched() = default; + + protected: + int m_osd_num{1}; + FakeOsd m_osds{m_osd_num}; + std::unique_ptr m_sched{ + new ScrubSchedTestWrapper(m_osds)}; + + /// the pg-info is queried for stats validity and for the last-scrub-stamp + pg_info_t pg_info{}; + + /// the pool configuration holds some per-pool scrub timing settings + pool_opts_t pool_opts{}; + + /** + * the scrub-jobs created for the tests, along with their corresponding + * "pg info" and pool configuration. In real life - the scrub jobs + * are owned by the respective PGs. + */ + std::vector m_scrub_jobs; + + protected: + sjob_dynamic_data_t create_scrub_job(const sjob_config_t& sjob_data) + { + sjob_dynamic_data_t dyn_data; + dyn_data.initial_config = sjob_data; + + // populate the 'pool options' object with the scrub timing settings + if (sjob_data.pool_conf_min) { + dyn_data.mocked_pool_opts.set(pool_opts_t::SCRUB_MIN_INTERVAL, + sjob_data.pool_conf_min.value()); + } + if (sjob_data.pool_conf_max) { + dyn_data.mocked_pool_opts.set(pool_opts_t::SCRUB_MAX_INTERVAL, + sjob_data.pool_conf_max.value()); + } + + // create the 'pg info' object with the stats + dyn_data.mocked_pg_info = pg_info_t{sjob_data.spg}; + + dyn_data.mocked_pg_info.history.last_scrub_stamp = + sjob_data.history_scrub_stamp; + dyn_data.mocked_pg_info.stats.stats_invalid = !sjob_data.are_stats_valid; + + // fake hust the required 'requested-scrub' flags + std::cout << "request_flags: sjob_data.is_must " << sjob_data.is_must + << std::endl; + dyn_data.request_flags.must_scrub = sjob_data.is_must; + dyn_data.request_flags.need_auto = sjob_data.is_need_auto; + + // create the scrub job + dyn_data.job = ceph::make_ref(g_ceph_context, + sjob_data.spg, + m_osd_num); + m_scrub_jobs.push_back(dyn_data); + return dyn_data; + } + + void register_job_set(const std::vector& job_configs) + { + std::for_each(job_configs.begin(), + job_configs.end(), + [this](const sjob_config_t& sj) { + auto dynjob = create_scrub_job(sj); + m_sched->register_with_osd( + dynjob.job, + m_sched->determine_scrub_time(dynjob.request_flags, + dynjob.mocked_pg_info, + dynjob.mocked_pool_opts)); + }); + } + + /// count the scrub-jobs that are currently in a specific state + int count_scrub_jobs_in_state(qu_state_t state) + { + return std::count_if(m_scrub_jobs.begin(), + m_scrub_jobs.end(), + [state](const sjob_dynamic_data_t& sj) { + return sj.job->state == state; + }); + } + + void list_testers_jobs(std::string hdr) + { + std::cout << fmt::format("{}: {} jobs created for the test:", + hdr, + m_scrub_jobs.size()) + << std::endl; + for (const auto& job : m_scrub_jobs) { + std::cout << fmt::format("\t{}: job {}", hdr, *job.job) << std::endl; + } + } + + void print_all_states(std::string hdr) + { + std::cout << fmt::format( + "{}: Created:{}. Per state: not-reg:{} reg:{} unreg:{}", + hdr, + m_scrub_jobs.size(), + count_scrub_jobs_in_state(qu_state_t::not_registered), + count_scrub_jobs_in_state(qu_state_t::registered), + count_scrub_jobs_in_state(qu_state_t::unregistering)) + << std::endl; + } + + void debug_print_jobs(std::string hdr, + const ScrubQueue::ScrubQContainer& jobs) + { + std::cout << fmt::format("{}: time now {}", hdr, m_sched->time_now()) + << std::endl; + for (const auto& job : jobs) { + std::cout << fmt::format( + "\t{}: job {} ({}): scheduled {}", + hdr, + job->pgid, + job->scheduling_state(m_sched->time_now(), false), + job->get_sched_time()) + << std::endl; + } + } +}; + +// /////////////////////////////////////////////////////////////////////////// +// test data. Scrub-job creation requires a PG-id, and a set of 'scrub request' +// flags + +namespace { + +// the times used during the tests are offset to 1.1.2000, so that +// utime_t formatting will treat them as absolute (not as a relative time) +static const auto epoch_2000 = 946'684'800; + +std::vector sjob_configs = { + { + spg_t{pg_t{1, 1}}, + true, // PG has valid stats + utime_t{std::time_t(epoch_2000 + 1'000'000), 0}, // last-scrub-stamp + 100.0, // min scrub delay in pool config + std::nullopt, // max scrub delay in pool config + false, // must-scrub + false, // need-auto + ScrubQueue::scrub_schedule_t{} // initial schedule + }, + + {spg_t{pg_t{4, 1}}, + true, + utime_t{epoch_2000 + 1'000'000, 0}, + 100.0, + std::nullopt, + true, + false, + ScrubQueue::scrub_schedule_t{}}, + + {spg_t{pg_t{7, 1}}, + true, + utime_t{}, + 1.0, + std::nullopt, + false, + false, + ScrubQueue::scrub_schedule_t{}}, + + {spg_t{pg_t{5, 1}}, + true, + utime_t{epoch_2000 + 1'900'000, 0}, + 1.0, + std::nullopt, + false, + false, + ScrubQueue::scrub_schedule_t{}}}; + +} // anonymous namespace + +// //////////////////////////// tests //////////////////////////////////////// + +/// basic test: scheduling simple jobs, validating their calculated schedule +TEST_F(TestScrubSched, populate_queue) +{ + ASSERT_EQ(0, m_sched->list_registered_jobs().size()); + + auto dynjob_0 = create_scrub_job(sjob_configs[0]); + auto suggested = m_sched->determine_scrub_time(dynjob_0.request_flags, + dynjob_0.mocked_pg_info, + dynjob_0.mocked_pool_opts); + m_sched->register_with_osd(dynjob_0.job, suggested); + std::cout << fmt::format("scheduled at: {}", dynjob_0.job->get_sched_time()) + << std::endl; + + auto dynjob_1 = create_scrub_job(sjob_configs[1]); + suggested = m_sched->determine_scrub_time(dynjob_1.request_flags, + dynjob_1.mocked_pg_info, + dynjob_1.mocked_pool_opts); + m_sched->register_with_osd(dynjob_1.job, suggested); + std::cout << fmt::format("scheduled at: {}", dynjob_1.job->get_sched_time()) + << std::endl; + + EXPECT_EQ(dynjob_1.job->get_sched_time(), utime_t(1, 1)); + EXPECT_EQ(2, m_sched->list_registered_jobs().size()); +} + +/// validate the states of the scrub-jobs (as set in the jobs themselves) +TEST_F(TestScrubSched, states) +{ + m_sched->set_time_for_testing(epoch_2000); + register_job_set(sjob_configs); + list_testers_jobs("testing states"); + EXPECT_EQ(sjob_configs.size(), m_sched->list_registered_jobs().size()); + + // check the initial state of the jobs + print_all_states(""); + m_sched->rm_unregistered_jobs(); + EXPECT_EQ(0, count_scrub_jobs_in_state(qu_state_t::not_registered)); + + // now - remove a couple of them + m_sched->remove_from_osd_queue(m_scrub_jobs[2].job); + m_sched->remove_from_osd_queue(m_scrub_jobs[1].job); + m_sched->remove_from_osd_queue(m_scrub_jobs[2].job); // should have no effect + + print_all_states(""); + EXPECT_EQ(2, count_scrub_jobs_in_state(qu_state_t::registered)); + EXPECT_EQ(2, count_scrub_jobs_in_state(qu_state_t::unregistering)); + + m_sched->rm_unregistered_jobs(); + EXPECT_EQ(2, count_scrub_jobs_in_state(qu_state_t::not_registered)); + std::cout << fmt::format("inp size: {}. In list-registered: {}", + sjob_configs.size(), + m_sched->list_registered_jobs().size()) + << std::endl; + EXPECT_EQ(sjob_configs.size() - 2, m_sched->list_registered_jobs().size()); +} + +/// jobs that are ripe should be in the ready list, sorted by their scheduled +/// time +TEST_F(TestScrubSched, ready_list) +{ + m_sched->set_time_for_testing(epoch_2000 + 900'000); + register_job_set(sjob_configs); + list_testers_jobs("testing states"); + EXPECT_EQ(sjob_configs.size(), m_sched->list_registered_jobs().size()); + + m_sched->set_time_for_testing(epoch_2000 + 1'000'000); + auto all_reg_jobs = m_sched->list_registered_jobs(); + debug_print_jobs("registered", all_reg_jobs); + + auto ripe_jobs = m_sched->collect_ripe_jobs(); + EXPECT_EQ(2, ripe_jobs.size()); + debug_print_jobs("ready_list", ripe_jobs); + + m_sched->set_time_for_testing(epoch_2000 + 3'000'000); + // all jobs should be in the ready list + ripe_jobs = m_sched->collect_ripe_jobs(); + EXPECT_EQ(4, ripe_jobs.size()); + debug_print_jobs("ready_list", ripe_jobs); +} -- cgit v1.2.3