diff options
Diffstat (limited to '')
-rw-r--r-- | src/test/mon/CMakeLists.txt | 83 | ||||
-rw-r--r-- | src/test/mon/MonMap.cc | 245 | ||||
-rw-r--r-- | src/test/mon/PGMap.cc | 168 | ||||
-rwxr-xr-x | src/test/mon/bench_auth.py | 105 | ||||
-rw-r--r-- | src/test/mon/moncap.cc | 376 | ||||
-rw-r--r-- | src/test/mon/test-mon-msg.cc | 338 | ||||
-rw-r--r-- | src/test/mon/test_election.cc | 1003 | ||||
-rw-r--r-- | src/test/mon/test_log_rss_usage.cc | 101 | ||||
-rw-r--r-- | src/test/mon/test_mon_memory_target.cc | 79 | ||||
-rw-r--r-- | src/test/mon/test_mon_rss_usage.cc | 72 | ||||
-rw-r--r-- | src/test/mon/test_mon_types.cc | 140 | ||||
-rw-r--r-- | src/test/mon/test_mon_workloadgen.cc | 1104 |
12 files changed, 3814 insertions, 0 deletions
diff --git a/src/test/mon/CMakeLists.txt b/src/test/mon/CMakeLists.txt new file mode 100644 index 000000000..943ca99a3 --- /dev/null +++ b/src/test/mon/CMakeLists.txt @@ -0,0 +1,83 @@ +# ceph_test_mon_workloadgen +add_executable(ceph_test_mon_workloadgen + test_mon_workloadgen.cc + ) +target_link_libraries(ceph_test_mon_workloadgen + os + osdc + global + ${EXTRALIBS} + ${CMAKE_DL_LIBS} + ) +install(TARGETS ceph_test_mon_workloadgen + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# ceph_test_mon_msg +add_executable(ceph_test_mon_msg + test-mon-msg.cc + ) +target_link_libraries(ceph_test_mon_msg os osdc global ${UNITTEST_LIBS}) + +# unittest_mon_moncap +add_executable(unittest_mon_moncap + moncap.cc + ) +add_ceph_unittest(unittest_mon_moncap) +target_link_libraries(unittest_mon_moncap mon global) + +# unittest_mon_map +add_executable(unittest_mon_monmap + MonMap.cc + ) +add_ceph_unittest(unittest_mon_monmap) +target_link_libraries(unittest_mon_monmap mon global) + +# unittest_mon_pgmap +add_executable(unittest_mon_pgmap + PGMap.cc + $<TARGET_OBJECTS:unit-main> + ) +add_ceph_unittest(unittest_mon_pgmap) +target_link_libraries(unittest_mon_pgmap mon global) + +# unittest_mon_montypes +add_executable(unittest_mon_montypes + test_mon_types.cc + ) +add_ceph_unittest(unittest_mon_montypes) +target_link_libraries(unittest_mon_montypes mon global) + +# ceph_test_mon_memory_target +add_executable(ceph_test_mon_memory_target + test_mon_memory_target.cc) +target_link_libraries(ceph_test_mon_memory_target Boost::system Threads::Threads) +set_target_properties(ceph_test_mon_memory_target PROPERTIES + SKIP_RPATH TRUE + INSTALL_RPATH "") +install(TARGETS ceph_test_mon_memory_target + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# ceph_test_mon_log_rss_usage +add_executable(ceph_test_log_rss_usage + test_log_rss_usage.cc) +set_target_properties(ceph_test_log_rss_usage PROPERTIES + SKIP_RPATH TRUE + INSTALL_RPATH "") +install(TARGETS ceph_test_log_rss_usage + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# ceph_test_mon_rss_usage +add_executable(ceph_test_mon_rss_usage + test_mon_rss_usage.cc) +set_target_properties(ceph_test_mon_rss_usage PROPERTIES + SKIP_RPATH TRUE + INSTALL_RPATH "") +install(TARGETS ceph_test_mon_rss_usage + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +#unittest_mon_election +add_executable(unittest_mon_election + test_election.cc + ) +add_ceph_unittest(unittest_mon_election) +target_link_libraries(unittest_mon_election mon global) diff --git a/src/test/mon/MonMap.cc b/src/test/mon/MonMap.cc new file mode 100644 index 000000000..c9bcb7fec --- /dev/null +++ b/src/test/mon/MonMap.cc @@ -0,0 +1,245 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 SUSE LINUX GmbH + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include "mon/MonMap.h" +#include "common/ceph_context.h" +#include "common/dns_resolve.h" +#include "test/common/dns_messages.h" + +#include "common/debug.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include <boost/smart_ptr/intrusive_ptr.hpp> + +#include <sstream> + +#define TEST_DEBUG 20 + +#define dout_subsys ceph_subsys_mon + + +using ::testing::Return; +using ::testing::_; +using ::testing::SetArrayArgument; +using ::testing::DoAll; +using ::testing::StrEq; + + +class MonMapTest : public ::testing::Test { + protected: + virtual void SetUp() { + g_ceph_context->_conf->subsys.set_log_level(dout_subsys, TEST_DEBUG); + } + + virtual void TearDown() { + DNSResolver::get_instance(nullptr); + } +}; + +TEST_F(MonMapTest, DISABLED_build_initial_config_from_dns) { + + MockResolvHWrapper *resolvH = new MockResolvHWrapper(); + DNSResolver::get_instance(resolvH); + + int len = sizeof(ns_search_msg_ok_payload); + int lena = sizeof(ns_query_msg_mon_a_payload); + int lenb = sizeof(ns_query_msg_mon_b_payload); + int lenc = sizeof(ns_query_msg_mon_c_payload); + + using ::testing::InSequence; + { + InSequence s; + +#ifdef HAVE_RES_NQUERY + EXPECT_CALL(*resolvH, res_nsearch(_, StrEq("_cephmon._tcp"), C_IN, T_SRV, _, _)) + .WillOnce(DoAll(SetArrayArgument<4>(ns_search_msg_ok_payload, + ns_search_msg_ok_payload+len), Return(len))); + + EXPECT_CALL(*resolvH, res_nquery(_,StrEq("mon.a.ceph.com"), C_IN, T_A,_,_)) + .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_a_payload, + ns_query_msg_mon_a_payload+lena), Return(lena))); + + EXPECT_CALL(*resolvH, res_nquery(_, StrEq("mon.c.ceph.com"), C_IN, T_A,_,_)) + .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_c_payload, + ns_query_msg_mon_c_payload+lenc), Return(lenc))); + + EXPECT_CALL(*resolvH, res_nquery(_,StrEq("mon.b.ceph.com"), C_IN, T_A, _,_)) + .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_b_payload, + ns_query_msg_mon_b_payload+lenb), Return(lenb))); +#else + EXPECT_CALL(*resolvH, res_search(StrEq("_cephmon._tcp"), C_IN, T_SRV, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>(ns_search_msg_ok_payload, + ns_search_msg_ok_payload+len), Return(len))); + + EXPECT_CALL(*resolvH, res_query(StrEq("mon.a.ceph.com"), C_IN, T_A,_,_)) + .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_a_payload, + ns_query_msg_mon_a_payload+lena), Return(lena))); + + EXPECT_CALL(*resolvH, res_query(StrEq("mon.c.ceph.com"), C_IN, T_A,_,_)) + .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_c_payload, + ns_query_msg_mon_c_payload+lenc), Return(lenc))); + + EXPECT_CALL(*resolvH, res_query(StrEq("mon.b.ceph.com"), C_IN, T_A, _,_)) + .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_b_payload, + ns_query_msg_mon_b_payload+lenb), Return(lenb))); +#endif + } + + + + boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON); + cct->_conf.set_val("mon_dns_srv_name", "cephmon"); + MonMap monmap; + int r = monmap.build_initial(cct.get(), false, std::cerr); + + ASSERT_EQ(r, 0); + ASSERT_EQ(monmap.mon_info.size(), (unsigned int)3); + auto it = monmap.mon_info.find("mon.a"); + ASSERT_NE(it, monmap.mon_info.end()); + std::ostringstream os; + os << it->second.public_addrs; + ASSERT_EQ(os.str(), "192.168.1.11:6789/0"); + os.str(""); + it = monmap.mon_info.find("mon.b"); + ASSERT_NE(it, monmap.mon_info.end()); + os << it->second.public_addrs; + ASSERT_EQ(os.str(), "192.168.1.12:6789/0"); + os.str(""); + it = monmap.mon_info.find("mon.c"); + ASSERT_NE(it, monmap.mon_info.end()); + os << it->second.public_addrs; + ASSERT_EQ(os.str(), "192.168.1.13:6789/0"); +} + +TEST_F(MonMapTest, DISABLED_build_initial_config_from_dns_fail) { + MockResolvHWrapper *resolvH = new MockResolvHWrapper(); + DNSResolver::get_instance(resolvH); + + +#ifdef HAVE_RES_NQUERY + EXPECT_CALL(*resolvH, res_nsearch(_, StrEq("_ceph-mon._tcp"), C_IN, T_SRV, _, _)) + .WillOnce(Return(0)); +#else + EXPECT_CALL(*resolvH, res_search(StrEq("_ceph-mon._tcp"), C_IN, T_SRV, _, _)) + .WillOnce(Return(0)); +#endif + + boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON); + // using default value of mon_dns_srv_name option + MonMap monmap; + int r = monmap.build_initial(cct.get(), false, std::cerr); + + ASSERT_EQ(r, -ENOENT); + ASSERT_EQ(monmap.mon_info.size(), (unsigned int)0); + +} + +TEST_F(MonMapTest, DISABLED_build_initial_config_from_dns_with_domain) { + + MockResolvHWrapper *resolvH = new MockResolvHWrapper(); + DNSResolver::get_instance(resolvH); + + int len = sizeof(ns_search_msg_ok_payload); + int lena = sizeof(ns_query_msg_mon_a_payload); + int lenb = sizeof(ns_query_msg_mon_b_payload); + int lenc = sizeof(ns_query_msg_mon_c_payload); + + using ::testing::InSequence; + { + InSequence s; + +#ifdef HAVE_RES_NQUERY + EXPECT_CALL(*resolvH, res_nsearch(_, StrEq("_cephmon._tcp.ceph.com"), C_IN, T_SRV, _, _)) + .WillOnce(DoAll(SetArrayArgument<4>(ns_search_msg_ok_payload, + ns_search_msg_ok_payload+len), Return(len))); + + EXPECT_CALL(*resolvH, res_nquery(_,StrEq("mon.a.ceph.com"), C_IN, T_A,_,_)) + .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_a_payload, + ns_query_msg_mon_a_payload+lena), Return(lena))); + + EXPECT_CALL(*resolvH, res_nquery(_, StrEq("mon.c.ceph.com"), C_IN, T_A,_,_)) + .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_c_payload, + ns_query_msg_mon_c_payload+lenc), Return(lenc))); + + EXPECT_CALL(*resolvH, res_nquery(_,StrEq("mon.b.ceph.com"), C_IN, T_A, _,_)) + .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_b_payload, + ns_query_msg_mon_b_payload+lenb), Return(lenb))); +#else + EXPECT_CALL(*resolvH, res_search(StrEq("_cephmon._tcp.ceph.com"), C_IN, T_SRV, _, _)) + .WillOnce(DoAll(SetArrayArgument<3>(ns_search_msg_ok_payload, + ns_search_msg_ok_payload+len), Return(len))); + + EXPECT_CALL(*resolvH, res_query(StrEq("mon.a.ceph.com"), C_IN, T_A,_,_)) + .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_a_payload, + ns_query_msg_mon_a_payload+lena), Return(lena))); + + EXPECT_CALL(*resolvH, res_query(StrEq("mon.c.ceph.com"), C_IN, T_A,_,_)) + .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_c_payload, + ns_query_msg_mon_c_payload+lenc), Return(lenc))); + + EXPECT_CALL(*resolvH, res_query(StrEq("mon.b.ceph.com"), C_IN, T_A, _,_)) + .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_b_payload, + ns_query_msg_mon_b_payload+lenb), Return(lenb))); +#endif + } + + + + boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON); + cct->_conf.set_val("mon_dns_srv_name", "cephmon_ceph.com"); + MonMap monmap; + int r = monmap.build_initial(cct.get(), false, std::cerr); + + ASSERT_EQ(r, 0); + ASSERT_EQ(monmap.mon_info.size(), (unsigned int)3); + auto it = monmap.mon_info.find("mon.a"); + ASSERT_NE(it, monmap.mon_info.end()); + std::ostringstream os; + os << it->second.public_addrs; + ASSERT_EQ(os.str(), "192.168.1.11:6789/0"); + os.str(""); + it = monmap.mon_info.find("mon.b"); + ASSERT_NE(it, monmap.mon_info.end()); + os << it->second.public_addrs; + ASSERT_EQ(os.str(), "192.168.1.12:6789/0"); + os.str(""); + it = monmap.mon_info.find("mon.c"); + ASSERT_NE(it, monmap.mon_info.end()); + os << it->second.public_addrs; + ASSERT_EQ(os.str(), "192.168.1.13:6789/0"); +} + +TEST(MonMapBuildInitial, build_initial_mon_host_from_dns) { + boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON); + cct->_conf.set_val("mon_host", "ceph.io"); + MonMap monmap; + int r = monmap.build_initial(cct.get(), false, std::cerr); + ASSERT_EQ(r, 0); + ASSERT_GE(monmap.mon_info.size(), 1u); + for (const auto& [name, info] : monmap.mon_info) { + std::cerr << info << std::endl; + } +} + +TEST(MonMapBuildInitial, build_initial_mon_host_from_dns_fail) { + boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON); + cct->_conf.set_val("mon_host", "ceph.noname"); + MonMap monmap; + int r = monmap.build_initial(cct.get(), false, std::cerr); +#if defined(__FreeBSD__) + ASSERT_EQ(r, -ENOENT); +#else + ASSERT_EQ(r, -EINVAL); +#endif +} diff --git a/src/test/mon/PGMap.cc b/src/test/mon/PGMap.cc new file mode 100644 index 000000000..756665132 --- /dev/null +++ b/src/test/mon/PGMap.cc @@ -0,0 +1,168 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Inktank <info@inktank.com> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2, as published by the Free Software + * Foundation. See file COPYING. + */ + +#include "mon/PGMap.h" +#include "gtest/gtest.h" + +#include "include/stringify.h" + + +namespace { + class CheckTextTable : public TextTable { + public: + explicit CheckTextTable(bool verbose) { + for (int i = 0; i < 5; i++) { + define_column("", TextTable::LEFT, TextTable::LEFT); + } + if (verbose) { + for (int i = 0; i < 9; i++) { + define_column("", TextTable::LEFT, TextTable::LEFT); + } + } + } + const string& get(unsigned r, unsigned c) const { + ceph_assert(r < row.size()); + ceph_assert(c < row[r].size()); + return row[r][c]; + } + }; + + // copied from PGMap.cc + string percentify(float a) { + stringstream ss; + if (a < 0.01) + ss << "0"; + else + ss << std::fixed << std::setprecision(2) << a; + return ss.str(); + } +} + +// dump_object_stat_sum() is called by "ceph df" command +// with table, without formatter, verbose = true, not empty, avail > 0 +TEST(pgmap, dump_object_stat_sum_0) +{ + bool verbose = true; + CheckTextTable tbl(verbose); + pool_stat_t pool_stat; + object_stat_sum_t& sum = pool_stat.stats.sum; + sum.num_bytes = 42 * 1024 * 1024; + sum.num_objects = 42; + sum.num_objects_degraded = 13; // there are 13 missings + not_yet_backfilled + sum.num_objects_dirty = 2; + sum.num_rd = 100; + sum.num_rd_kb = 123; + sum.num_wr = 101; + sum.num_wr_kb = 321; + pool_stat.num_store_stats = 3; + store_statfs_t &statfs = pool_stat.store_stats; + statfs.data_stored = 40 * 1024 * 1024; + statfs.allocated = 41 * 1024 * 1024 * 2; + statfs.data_compressed_allocated = 4334; + statfs.data_compressed_original = 1213; + + sum.calc_copies(3); // assuming we have 3 copies for each obj + // nominal amount of space available for new objects in this pool + uint64_t avail = 2016 * 1024 * 1024; + pg_pool_t pool; + pool.quota_max_objects = 2000; + pool.quota_max_bytes = 2000 * 1024 * 1024; + pool.size = 2; + pool.type = pg_pool_t::TYPE_REPLICATED; + pool.tier_of = 0; + PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail, + pool.get_size(), verbose, true, true, &pool); + float copies_rate = + (static_cast<float>(sum.num_object_copies - sum.num_objects_degraded) / + sum.num_object_copies) * pool.get_size(); + float used_percent = (float)statfs.allocated / + (statfs.allocated + avail) * 100; + uint64_t stored = statfs.data_stored / copies_rate; + + unsigned col = 0; + ASSERT_EQ(stringify(byte_u_t(stored)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(stored)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(si_u_t(sum.num_objects)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(statfs.allocated)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(statfs.allocated)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(percentify(used_percent), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(avail/copies_rate)), tbl.get(0, col++)); + ASSERT_EQ(stringify(si_u_t(pool.quota_max_objects)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(pool.quota_max_bytes)), tbl.get(0, col++)); + ASSERT_EQ(stringify(si_u_t(sum.num_objects_dirty)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(statfs.data_compressed_allocated)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(statfs.data_compressed_original)), tbl.get(0, col++)); +} + +// with table, without formatter, verbose = true, empty, avail > 0 +TEST(pgmap, dump_object_stat_sum_1) +{ + bool verbose = true; + CheckTextTable tbl(verbose); + pool_stat_t pool_stat; + object_stat_sum_t& sum = pool_stat.stats.sum; // zero by default + ASSERT_TRUE(sum.is_zero()); + // nominal amount of space available for new objects in this pool + uint64_t avail = 2016 * 1024 * 1024; + pg_pool_t pool; + pool.quota_max_objects = 2000; + pool.quota_max_bytes = 2000 * 1024 * 1024; + pool.size = 2; + pool.type = pg_pool_t::TYPE_REPLICATED; + pool.tier_of = 0; + PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail, + pool.get_size(), verbose, true, true, &pool); + unsigned col = 0; + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(percentify(0), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++)); + ASSERT_EQ(stringify(si_u_t(pool.quota_max_objects)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(pool.quota_max_bytes)), tbl.get(0, col++)); + ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); +} + +// with table, without formatter, verbose = false, empty, avail = 0 +TEST(pgmap, dump_object_stat_sum_2) +{ + bool verbose = false; + CheckTextTable tbl(verbose); + pool_stat_t pool_stat; + object_stat_sum_t& sum = pool_stat.stats.sum; // zero by default + ASSERT_TRUE(sum.is_zero()); + // nominal amount of space available for new objects in this pool + uint64_t avail = 0; + pg_pool_t pool; + pool.quota_max_objects = 2000; + pool.quota_max_bytes = 2000 * 1024 * 1024; + pool.size = 2; + pool.type = pg_pool_t::TYPE_REPLICATED; + + PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail, + pool.get_size(), verbose, true, true, &pool); + unsigned col = 0; + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++)); + ASSERT_EQ(percentify(0), tbl.get(0, col++)); + ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++)); +} diff --git a/src/test/mon/bench_auth.py b/src/test/mon/bench_auth.py new file mode 100755 index 000000000..5242f6892 --- /dev/null +++ b/src/test/mon/bench_auth.py @@ -0,0 +1,105 @@ +#!/usr/bin/python3 + +import argparse +import copy +import json +import rados +import time +import multiprocessing + +caps_base = ["mon", "profile rbd", "osd", "profile rbd pool=rbd namespace=test"] + +def create_users(conn, num_namespaces, num_users): + cmd = {'prefix': 'auth get-or-create'} + + for i in range(num_namespaces): + caps_base[-1] += ", profile rbd pool=rbd namespace=namespace{}".format(i) + + cmd['caps'] = caps_base + for i in range(num_users): + cmd['entity'] = "client.{}".format(i) + conn.mon_command(json.dumps(cmd), b'') + +class Worker(multiprocessing.Process): + def __init__(self, conn, num, queue, duration): + super().__init__() + self.conn = conn + self.num = num + self.queue = queue + self.duration = duration + + def run(self): + client = "client.{}".format(self.num) + cmd = {'prefix': 'auth caps', 'entity': client} + start_time = time.time() + num_complete = 0 + with rados.Rados(conffile='') as conn: + while True: + now = time.time() + diff = now - start_time + if diff > self.duration: + self.queue.put((num_complete, diff)) + return + caps = copy.deepcopy(caps_base) + caps[-1] += ", profile rbd pool=rbd namespace=namespace{}".format(self.num * 10000 + num_complete) + cmd['caps'] = caps + cmd_start = time.time() + ret, buf, out = conn.mon_command(json.dumps(cmd), b'') + cmd_end = time.time() + if ret != 0: + self.queue.put((Exception("{0}: {1}".format(ret, out)), 0)) + return + num_complete += 1 + print("Process {} finished op {} - latency: {}".format(self.num, num_complete, cmd_end - cmd_start)) + +def main(): + parser = argparse.ArgumentParser(description=""" +Benchmark updates to ceph users' capabilities. Run one update at a time in each thread. +""") + parser.add_argument( + '-n', '--num-namespaces', + type=int, + default=300, + help='number of namespaces per user', + ) + parser.add_argument( + '-t', '--threads', + type=int, + default=10, + help='number of threads (and thus parallel operations) to use', + ) + parser.add_argument( + '-d', '--duration', + type=int, + default=30, + help='how long to run, in seconds', + ) + args = parser.parse_args() + num_namespaces = args.num_namespaces + num_threads = args.threads + duration = args.duration + workers = [] + results = [] + q = multiprocessing.Queue() + with rados.Rados(conffile=rados.Rados.DEFAULT_CONF_FILES) as conn: + create_users(conn, num_namespaces, num_threads) + for i in range(num_threads): + workers.append(Worker(conn, i, q, duration)) + workers[-1].start() + for i in range(num_threads): + num_complete, seconds = q.get() + if isinstance(num_complete, Exception): + raise num_complete + results.append((num_complete, seconds)) + total = 0 + total_rate = 0 + for num, sec in results: + print("Completed {} in {} ({} / s)".format(num, sec, num / sec)) + total += num + total_rate += num / sec + + print("Total: ", total) + print("Avg rate: ", total_rate / len(results)) + +if __name__ == '__main__': + main() diff --git a/src/test/mon/moncap.cc b/src/test/mon/moncap.cc new file mode 100644 index 000000000..1c151b1e3 --- /dev/null +++ b/src/test/mon/moncap.cc @@ -0,0 +1,376 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2012 Inktank + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <iostream> + +#include "include/stringify.h" +#include "mon/MonCap.h" + +#include "gtest/gtest.h" + +const char *parse_good[] = { + + // MonCapMatch + "allow *", + "allow r", + "allow rwx", + "allow r", + " allow rwx", + "allow rwx ", + " allow rwx ", + " allow\t rwx ", + "\tallow\nrwx\t", + "allow service=foo x", + "allow service=\"froo\" x", + "allow profile osd", + "allow profile osd-bootstrap", + "allow profile \"mds-bootstrap\", allow *", + "allow command \"a b c\"", + "allow command abc", + "allow command abc with arg=foo", + "allow command abc with arg=foo arg2=bar", + "allow command abc with arg=foo arg2=bar", + "allow command abc with arg=foo arg2 prefix bar arg3 prefix baz", + "allow command abc with arg=foo arg2 prefix \"bar bingo\" arg3 prefix baz", + "allow command abc with arg regex \"^[0-9a-z.]*$\"", + "allow command abc with arg regex \"\(invaluid regex\"", + "allow service foo x", + "allow service foo x; allow service bar x", + "allow service foo w ;allow service bar x", + "allow service foo w , allow service bar x", + "allow service foo r , allow service bar x", + "allow service foo_foo r, allow service bar r", + "allow service foo-foo r, allow service bar r", + "allow service \" foo \" w, allow service bar r", + "allow command abc with arg=foo arg2=bar, allow service foo r", + "allow command abc.def with arg=foo arg2=bar, allow service foo r", + "allow command \"foo bar\" with arg=\"baz\"", + "allow command \"foo bar\" with arg=\"baz.xx\"", + "profile osd", + "profile \"mds-bootstrap\", profile foo", + "allow * network 1.2.3.4/24", + "allow * network ::1/128", + "allow * network [aa:bb::1]/128", + "allow service=foo x network 1.2.3.4/16", + "allow command abc network 1.2.3.4/8", + "profile osd network 1.2.3.4/32", + "allow profile mon network 1.2.3.4/32", + 0 +}; + +TEST(MonCap, ParseGood) { + for (int i=0; parse_good[i]; ++i) { + string str = parse_good[i]; + MonCap cap; + std::cout << "Testing good input: '" << str << "'" << std::endl; + ASSERT_TRUE(cap.parse(str, &cout)); + std::cout << " -> " << cap << std::endl; + } +} + +// these should stringify to the input value +const char *parse_identity[] = { + "allow *", + "allow r", + "allow rwx", + "allow service foo x", + "allow profile osd", + "allow profile osd-bootstrap", + "allow profile mds-bootstrap, allow *", + "allow profile \"foo bar\", allow *", + "allow command abc", + "allow command \"a b c\"", + "allow command abc with arg=foo", + "allow command abc with arg=foo arg2=bar", + "allow command abc with arg=foo arg2=bar", + "allow command abc with arg=foo arg2 prefix bar arg3 prefix baz", + "allow command abc with arg=foo arg2 prefix \"bar bingo\" arg3 prefix baz", + "allow service foo x", + "allow service foo x, allow service bar x", + "allow service foo w, allow service bar x", + "allow service foo r, allow service bar x", + "allow service foo_foo r, allow service bar r", + "allow service foo-foo r, allow service bar r", + "allow service \" foo \" w, allow service bar r", + "allow command abc with arg=foo arg2=bar, allow service foo r", + 0 +}; + +TEST(MonCap, ParseIdentity) +{ + for (int i=0; parse_identity[i]; ++i) { + string str = parse_identity[i]; + MonCap cap; + std::cout << "Testing good input: '" << str << "'" << std::endl; + ASSERT_TRUE(cap.parse(str, &cout)); + string out = stringify(cap); + ASSERT_EQ(out, str); + } +} + +const char *parse_bad[] = { + "allow r foo", + "allow*", + "foo allow *", + "allow profile foo rwx", + "allow profile", + "allow profile foo bar rwx", + "allow service bar", + "allow command baz x", + "allow r w", + "ALLOW r", + "allow rwx,", + "allow rwx x", + "allow r pool foo r", + "allow wwx pool taco", + "allow wwx pool taco^funny&chars", + "allow rwx pool 'weird name''", + "allow rwx object_prefix \"beforepool\" pool weird", + "allow rwx auid 123 pool asdf", + "allow command foo a prefix b", + "allow command foo with a prefixb", + "allow command foo with a = prefix b", + "allow command foo with a prefix b c", + 0 +}; + +TEST(MonCap, ParseBad) { + for (int i=0; parse_bad[i]; ++i) { + string str = parse_bad[i]; + MonCap cap; + std::cout << "Testing bad input: '" << str << "'" << std::endl; + ASSERT_FALSE(cap.parse(str, &cout)); + } +} + +TEST(MonCap, AllowAll) { + MonCap cap; + ASSERT_FALSE(cap.is_allow_all()); + + ASSERT_TRUE(cap.parse("allow r", NULL)); + ASSERT_FALSE(cap.is_allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow w", NULL)); + ASSERT_FALSE(cap.is_allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow x", NULL)); + ASSERT_FALSE(cap.is_allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rwx", NULL)); + ASSERT_FALSE(cap.is_allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rw", NULL)); + ASSERT_FALSE(cap.is_allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rx", NULL)); + ASSERT_FALSE(cap.is_allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow wx", NULL)); + ASSERT_FALSE(cap.is_allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow *", NULL)); + ASSERT_TRUE(cap.is_allow_all()); + ASSERT_TRUE(cap.is_capable(NULL, {}, "foo", "asdf", {}, true, true, true, + {})); + + MonCap cap2; + ASSERT_FALSE(cap2.is_allow_all()); + cap2.set_allow_all(); + ASSERT_TRUE(cap2.is_allow_all()); +} + +TEST(MonCap, Network) { + MonCap cap; + bool r = cap.parse("allow * network 192.168.0.0/16, allow * network 10.0.0.0/8", NULL); + ASSERT_TRUE(r); + + entity_addr_t a, b, c; + a.parse("10.1.2.3"); + b.parse("192.168.2.3"); + c.parse("192.167.2.3"); + + ASSERT_TRUE(cap.is_capable(NULL, {}, "foo", "asdf", {}, true, true, true, + a)); + ASSERT_TRUE(cap.is_capable(NULL, {}, "foo", "asdf", {}, true, true, true, + b)); + ASSERT_FALSE(cap.is_capable(NULL, {}, "foo", "asdf", {}, true, true, true, + c)); +} + +TEST(MonCap, ProfileOSD) { + MonCap cap; + bool r = cap.parse("allow profile osd", NULL); + ASSERT_TRUE(r); + + EntityName name; + name.from_str("osd.123"); + map<string,string> ca; + + ASSERT_TRUE(cap.is_capable(NULL, name, "osd", "", ca, true, false, false, + {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "osd", "", ca, true, true, false, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "osd", "", ca, true, true, true, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "osd", "", ca, true, true, true, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "mon", "", ca, true, false, false, + {})); + + ASSERT_FALSE(cap.is_capable(NULL, name, "mds", "", ca, true, true, true, {})); + ASSERT_FALSE(cap.is_capable(NULL, name, "mon", "", ca, true, true, true, {})); + + ca.clear(); + ASSERT_FALSE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true, + true, {})); + ca["key"] = "daemon-private/osd.123"; + ASSERT_FALSE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true, + true, {})); + ca["key"] = "daemon-private/osd.12/asdf"; + ASSERT_FALSE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true, + true, {})); + ca["key"] = "daemon-private/osd.123/"; + ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true, + true, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true, + true, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true, + true, {})); + ca["key"] = "daemon-private/osd.123/foo"; + ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true, + true, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key put", ca, true, true, + true, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key set", ca, true, true, + true, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key exists", ca, true, + true, true, {})); + ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key delete", ca, true, + true, true, {})); +} + +TEST(MonCap, CommandRegEx) { + MonCap cap; + ASSERT_FALSE(cap.is_allow_all()); + ASSERT_TRUE(cap.parse("allow command abc with arg regex \"^[0-9a-z.]*$\"", + NULL)); + + EntityName name; + name.from_str("osd.123"); + ASSERT_TRUE(cap.is_capable(nullptr, name, "", "abc", {{"arg", "12345abcde"}}, + true, true, true, {})); + ASSERT_FALSE(cap.is_capable(nullptr, name, "", "abc", {{"arg", "~!@#$"}}, + true, true, true, {})); + + ASSERT_TRUE(cap.parse("allow command abc with arg regex \"[*\"", NULL)); + ASSERT_FALSE(cap.is_capable(nullptr, name, "", "abc", {{"arg", ""}}, true, + true, true, {})); +} + +TEST(MonCap, ProfileBootstrapRBD) { + MonCap cap; + ASSERT_FALSE(cap.is_allow_all()); + ASSERT_TRUE(cap.parse("profile bootstrap-rbd", NULL)); + + EntityName name; + name.from_str("mon.a"); + ASSERT_TRUE(cap.is_capable(nullptr, name, "", + "auth get-or-create", { + {"entity", "client.rbd"}, + {"caps_mon", "profile rbd"}, + {"caps_osd", "profile rbd pool=foo, profile rbd-read-only"}, + }, true, true, true, + {})); + ASSERT_FALSE(cap.is_capable(nullptr, name, "", + "auth get-or-create", { + {"entity", "client.rbd"}, + {"caps_mon", "allow *"}, + {"caps_osd", "profile rbd"}, + }, true, true, true, + {})); + ASSERT_FALSE(cap.is_capable(nullptr, name, "", + "auth get-or-create", { + {"entity", "client.rbd"}, + {"caps_mon", "profile rbd"}, + {"caps_osd", "profile rbd pool=foo, allow *, profile rbd-read-only"}, + }, true, true, true, + {})); +} + +TEST(MonCap, ProfileBootstrapRBDMirror) { + MonCap cap; + ASSERT_FALSE(cap.is_allow_all()); + ASSERT_TRUE(cap.parse("profile bootstrap-rbd-mirror", NULL)); + + EntityName name; + name.from_str("mon.a"); + ASSERT_TRUE(cap.is_capable(nullptr, name, "", + "auth get-or-create", { + {"entity", "client.rbd"}, + {"caps_mon", "profile rbd-mirror"}, + {"caps_osd", "profile rbd pool=foo, profile rbd-read-only"}, + }, true, true, true, + {})); + ASSERT_FALSE(cap.is_capable(nullptr, name, "", + "auth get-or-create", { + {"entity", "client.rbd"}, + {"caps_mon", "profile rbd"}, + {"caps_osd", "profile rbd pool=foo, profile rbd-read-only"}, + }, true, true, true, + {})); + ASSERT_FALSE(cap.is_capable(nullptr, name, "", + "auth get-or-create", { + {"entity", "client.rbd"}, + {"caps_mon", "allow *"}, + {"caps_osd", "profile rbd"}, + }, true, true, true, + {})); + ASSERT_FALSE(cap.is_capable(nullptr, name, "", + "auth get-or-create", { + {"entity", "client.rbd"}, + {"caps_mon", "profile rbd-mirror"}, + {"caps_osd", "profile rbd pool=foo, allow *, profile rbd-read-only"}, + }, true, true, true, + {})); +} + +TEST(MonCap, ProfileRBD) { + MonCap cap; + ASSERT_FALSE(cap.is_allow_all()); + ASSERT_TRUE(cap.parse("profile rbd", NULL)); + + EntityName name; + name.from_str("mon.a"); + ASSERT_FALSE(cap.is_capable(nullptr, name, "config-key", + "config-key get", { + {"key", "rbd/mirror/peer/1/1234"}, + }, true, false, false, {})); +} + +TEST(MonCap, ProfileRBDMirror) { + MonCap cap; + ASSERT_FALSE(cap.is_allow_all()); + ASSERT_TRUE(cap.parse("profile rbd-mirror", NULL)); + + EntityName name; + name.from_str("mon.a"); + ASSERT_TRUE(cap.is_capable(nullptr, name, "config-key", + "config-key get", { + {"key", "rbd/mirror/peer/1/1234"}, + }, true, false, false, {})); +} diff --git a/src/test/mon/test-mon-msg.cc b/src/test/mon/test-mon-msg.cc new file mode 100644 index 000000000..fede9df8f --- /dev/null +++ b/src/test/mon/test-mon-msg.cc @@ -0,0 +1,338 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* +* Ceph - scalable distributed file system +* +* Copyright (C) 2014 Red Hat +* +* This is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License version 2.1, as published by the Free Software +* Foundation. See file COPYING. +*/ +#include <stdio.h> +#include <string.h> +#include <iostream> +#include <sstream> +#include <time.h> +#include <stdlib.h> +#include <map> + +#include "global/global_init.h" +#include "global/global_context.h" +#include "common/async/context_pool.h" +#include "common/ceph_argparse.h" +#include "common/version.h" +#include "common/dout.h" +#include "common/debug.h" +#include "common/ceph_mutex.h" +#include "common/Timer.h" +#include "common/errno.h" +#include "mon/MonClient.h" +#include "msg/Dispatcher.h" +#include "include/err.h" +#include <boost/scoped_ptr.hpp> + +#include "gtest/gtest.h" + +#include "common/config.h" +#include "include/ceph_assert.h" + +#include "messages/MMonProbe.h" +#include "messages/MRoute.h" +#include "messages/MGenericMessage.h" +#include "messages/MMonJoin.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_ +#undef dout_prefix +#define dout_prefix *_dout << "test-mon-msg " + +class MonClientHelper : public Dispatcher +{ +protected: + CephContext *cct; + ceph::async::io_context_pool poolctx; + Messenger *msg; + MonClient monc; + + ceph::mutex lock = ceph::make_mutex("mon-msg-test::lock"); + + set<int> wanted; + +public: + + explicit MonClientHelper(CephContext *cct_) + : Dispatcher(cct_), + cct(cct_), + poolctx(1), + msg(NULL), + monc(cct_, poolctx) + { } + + + int post_init() { + dout(1) << __func__ << dendl; + if (!msg) + return -EINVAL; + msg->add_dispatcher_tail(this); + return 0; + } + + int init_messenger() { + dout(1) << __func__ << dendl; + + std::string public_msgr_type = cct->_conf->ms_public_type.empty() ? cct->_conf.get_val<std::string>("ms_type") : cct->_conf->ms_public_type; + msg = Messenger::create(cct, public_msgr_type, entity_name_t::CLIENT(-1), + "test-mon-msg", 0); + ceph_assert(msg != NULL); + msg->set_default_policy(Messenger::Policy::lossy_client(0)); + dout(0) << __func__ << " starting messenger at " + << msg->get_myaddrs() << dendl; + msg->start(); + return 0; + } + + int init_monc() { + dout(1) << __func__ << dendl; + ceph_assert(msg != NULL); + int err = monc.build_initial_monmap(); + if (err < 0) { + derr << __func__ << " error building monmap: " + << cpp_strerror(err) << dendl; + return err; + } + + monc.set_messenger(msg); + msg->add_dispatcher_head(&monc); + + monc.set_want_keys(CEPH_ENTITY_TYPE_MON); + err = monc.init(); + if (err < 0) { + derr << __func__ << " monc init failed: " + << cpp_strerror(err) << dendl; + goto fail; + } + + err = monc.authenticate(); + if (err < 0) { + derr << __func__ << " monc auth failed: " + << cpp_strerror(err) << dendl; + goto fail_monc; + } + monc.wait_auth_rotating(30.0); + monc.renew_subs(); + dout(0) << __func__ << " finished" << dendl; + return 0; + +fail_monc: + derr << __func__ << " failing monc" << dendl; + monc.shutdown(); +fail: + return err; + } + + void shutdown_messenger() { + dout(0) << __func__ << dendl; + msg->shutdown(); + msg->wait(); + } + + void shutdown_monc() { + dout(0) << __func__ << dendl; + monc.shutdown(); + } + + void shutdown() { + dout(0) << __func__ << dendl; + shutdown_monc(); + shutdown_messenger(); + } + + MonMap *get_monmap() { + return &monc.monmap; + } + + int init() { + int err = init_messenger(); + if (err < 0) + goto fail; + err = init_monc(); + if (err < 0) + goto fail_msgr; + err = post_init(); + if (err < 0) + goto fail_monc; + return 0; +fail_monc: + shutdown_monc(); +fail_msgr: + shutdown_messenger(); +fail: + return err; + } + + virtual void handle_wanted(Message *m) { } + + bool handle_message(Message *m) { + dout(1) << __func__ << " " << *m << dendl; + if (!is_wanted(m)) { + dout(10) << __func__ << " not wanted" << dendl; + return false; + } + handle_wanted(m); + m->put(); + + return true; + } + + bool ms_dispatch(Message *m) override { + return handle_message(m); + } + void ms_handle_connect(Connection *con) override { } + void ms_handle_remote_reset(Connection *con) override { } + bool ms_handle_reset(Connection *con) override { return false; } + bool ms_handle_refused(Connection *con) override { return false; } + + bool is_wanted(Message *m) { + dout(20) << __func__ << " " << *m << " type " << m->get_type() << dendl; + return (wanted.find(m->get_type()) != wanted.end()); + } + + void add_wanted(int t) { + dout(20) << __func__ << " type " << t << dendl; + wanted.insert(t); + } + + void rm_wanted(int t) { + dout(20) << __func__ << " type " << t << dendl; + wanted.erase(t); + } + + void send_message(Message *m) { + dout(15) << __func__ << " " << *m << dendl; + monc.send_mon_message(m); + } + + void wait() { msg->wait(); } +}; + +class MonMsgTest : public MonClientHelper, + public ::testing::Test +{ +protected: + int reply_type = 0; + Message *reply_msg = nullptr; + ceph::mutex lock = ceph::make_mutex("lock"); + ceph::condition_variable cond; + + MonMsgTest() : + MonClientHelper(g_ceph_context) { } + +public: + void SetUp() override { + reply_type = -1; + if (reply_msg) { + reply_msg->put(); + reply_msg = nullptr; + } + ASSERT_EQ(init(), 0); + } + + void TearDown() override { + shutdown(); + if (reply_msg) { + reply_msg->put(); + reply_msg = nullptr; + } + } + + void handle_wanted(Message *m) override { + std::lock_guard l{lock}; + // caller will put() after they call us, so hold on to a ref + m->get(); + reply_msg = m; + cond.notify_all(); + } + + Message *send_wait_reply(Message *m, int t, double timeout=30.0) { + std::unique_lock l{lock}; + reply_type = t; + add_wanted(t); + send_message(m); + + std::cv_status status = std::cv_status::no_timeout; + if (timeout > 0) { + utime_t s = ceph_clock_now(); + status = cond.wait_for(l, ceph::make_timespan(timeout)); + utime_t e = ceph_clock_now(); + dout(20) << __func__ << " took " << (e-s) << " seconds" << dendl; + } else { + cond.wait(l); + } + rm_wanted(t); + l.unlock(); + if (status == std::cv_status::timeout) { + dout(20) << __func__ << " error: " << cpp_strerror(ETIMEDOUT) << dendl; + return (Message*)((long)-ETIMEDOUT); + } + + if (!reply_msg) + dout(20) << __func__ << " reply_msg is nullptr" << dendl; + else + dout(20) << __func__ << " reply_msg " << *reply_msg << dendl; + return reply_msg; + } +}; + +TEST_F(MonMsgTest, MMonProbeTest) +{ + Message *m = new MMonProbe(get_monmap()->fsid, + MMonProbe::OP_PROBE, "b", false, + ceph_release()); + Message *r = send_wait_reply(m, MSG_MON_PROBE); + ASSERT_NE(IS_ERR(r), 0); + ASSERT_EQ(PTR_ERR(r), -ETIMEDOUT); +} + +TEST_F(MonMsgTest, MRouteTest) +{ + Message *payload = new MGenericMessage(CEPH_MSG_SHUTDOWN); + MRoute *m = new MRoute; + m->msg = payload; + Message *r = send_wait_reply(m, CEPH_MSG_SHUTDOWN); + // we want an error + ASSERT_NE(IS_ERR(r), 0); + ASSERT_EQ(PTR_ERR(r), -ETIMEDOUT); +} + +/* MMonScrub and MMonSync have other safeguards in place that prevent + * us from actually receiving a reply even if the message is handled + * by the monitor due to lack of cap checking. + */ +TEST_F(MonMsgTest, MMonJoin) +{ + Message *m = new MMonJoin(get_monmap()->fsid, string("client"), + msg->get_myaddrs()); + send_wait_reply(m, MSG_MON_PAXOS, 10.0); + + int r = monc.get_monmap(); + ASSERT_EQ(r, 0); + ASSERT_FALSE(monc.monmap.contains("client")); +} + +int main(int argc, char *argv[]) +{ + vector<const char*> args; + argv_to_vec(argc, (const char **)argv, args); + + auto cct = global_init(nullptr, args, + CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + g_ceph_context->_conf.apply_changes(nullptr); + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} + diff --git a/src/test/mon/test_election.cc b/src/test/mon/test_election.cc new file mode 100644 index 000000000..a232d7355 --- /dev/null +++ b/src/test/mon/test_election.cc @@ -0,0 +1,1003 @@ +// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "gtest/gtest.h" +#include "mon/ElectionLogic.h" +#include "mon/ConnectionTracker.h" +#include "common/dout.h" + +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/common_init.h" +#include "common/ceph_argparse.h" + +using namespace std; + +#define dout_subsys ceph_subsys_test +#undef dout_prefix +#define dout_prefix _prefix(_dout, prefix_name(), timestep_count()) +static ostream& _prefix(std::ostream *_dout, const char *prefix, int timesteps) { + return *_dout << prefix << timesteps << " "; +} + +const char* prefix_name() { return "test_election: "; } +int timestep_count() { return -1; } + +int main(int argc, char **argv) { + vector<const char*> args(argv, argv+argc); + bool user_set_debug = false; + for (auto& arg : args) { + if (strncmp("--debug_mon", arg, 11) == 0) user_set_debug = true; + } + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + if (!user_set_debug) g_ceph_context->_conf.set_val("debug mon", "0/20"); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + + +class Owner; +struct Election { + map<int, Owner*> electors; + map<int, set<int> > blocked_messages; + int count; + ElectionLogic::election_strategy election_strategy; + int ping_interval; + set<int> disallowed_leaders; + + vector< function<void()> > messages; + int pending_election_messages; + int timesteps_run = 0; + int last_quorum_change = 0; + int last_quorum_formed = -1; + set<int> last_quorum_reported; + int last_leader = -1; + + Election(int c, ElectionLogic::election_strategy es, int pingi=1, double tracker_halflife=5); + ~Election(); + // ElectionOwner interfaces + int get_paxos_size() { return count; } + const set<int>& get_disallowed_leaders() const { return disallowed_leaders; } + void propose_to(int from, int to, epoch_t e, bufferlist& cbl); + void defer_to(int from, int to, epoch_t e); + void claim_victory(int from, int to, epoch_t e, const set<int>& members); + void accept_victory(int from, int to, epoch_t e); + void report_quorum(const set<int>& quorum); + void queue_stable_message(int from, int to, function<void()> m); + void queue_timeout_message(int from, int to, function<void()> m); + void queue_stable_or_timeout(int from, int to, + function<void()> m, function<void()> t); + void queue_election_message(int from, int to, function<void()> m); + + // test runner interfaces + int run_timesteps(int max); + void start_one(int who); + void start_all(); + bool election_stable() const; + bool quorum_stable(int timesteps_stable) const; + bool all_agree_on_leader() const; + bool check_epoch_agreement() const; + void block_messages(int from, int to); + void block_bidirectional_messages(int a, int b); + void unblock_messages(int from, int to); + void unblock_bidirectional_messages(int a, int b); + void add_disallowed_leader(int disallowed) { disallowed_leaders.insert(disallowed); } + void remove_elector(int rank); + const char* prefix_name() const { return "Election: "; } + int timestep_count() const { return timesteps_run; } +}; +struct Owner : public ElectionOwner, RankProvider { + Election *parent; + int rank; + epoch_t persisted_epoch; + bool ever_joined; + ConnectionTracker peer_tracker; + ElectionLogic logic; + set<int> quorum; + int victory_accepters; + int timer_steps; // timesteps until we trigger timeout + bool timer_election; // the timeout is for normal election, or victory + bool rank_deleted = false; + string prefix_str; + Owner(int r, ElectionLogic::election_strategy es, double tracker_halflife, + Election *p) : parent(p), rank(r), persisted_epoch(0), + ever_joined(false), + peer_tracker(this, rank, tracker_halflife, 5, g_ceph_context), + logic(this, es, &peer_tracker, 0.0005, g_ceph_context), + victory_accepters(0), + timer_steps(-1), timer_election(true) { + std::stringstream str; + str << "Owner" << rank << " "; + prefix_str = str.str(); + } + + // in-memory store: just save to variable + void persist_epoch(epoch_t e) { persisted_epoch = e; } + // in-memory store: just return variable + epoch_t read_persisted_epoch() const { return persisted_epoch; } + // in-memory store: don't need to validate + void validate_store() { return; } + // don't need to do anything with our state right now + void notify_bump_epoch() {} + void notify_rank_removed(int removed_rank) { + ldout(g_ceph_context, 1) << "removed_rank: " << removed_rank << dendl; + ldout(g_ceph_context, 1) << "rank before: " << rank << dendl; + if (removed_rank < rank) { + --rank; + } + peer_tracker.notify_rank_removed(removed_rank, rank); + ldout(g_ceph_context, 1) << "rank after: " << rank << dendl; + } + void notify_deleted() { rank_deleted = true; rank = -1; cancel_timer(); } + // pass back to ElectionLogic; we don't need this redirect ourselves + void trigger_new_election() { ceph_assert (!rank_deleted); logic.start(); } + int get_my_rank() const { return rank; } + // we don't need to persist scores as we don't reset and lose memory state + void persist_connectivity_scores() {} + void propose_to_peers(epoch_t e, bufferlist& bl) { + ceph_assert (!rank_deleted); + for (int i = 0; i < parent->get_paxos_size(); ++i) { + if (i == rank) continue; + parent->propose_to(rank, i, e, bl); + } + } + void reset_election() { + ceph_assert (!rank_deleted); + _start(); + logic.start(); + } + bool ever_participated() const { return ever_joined; } + unsigned paxos_size() const { return parent->get_paxos_size(); } + const set<int>& get_disallowed_leaders() const { + return parent->get_disallowed_leaders(); + } + void cancel_timer() { + timer_steps = -1; + } + void reset_timer(int steps) { + cancel_timer(); + timer_steps = 3 + steps; // FIXME? magic number, current step + roundtrip + timer_election = true; + } + void start_victory_timer() { + cancel_timer(); + timer_election = false; + timer_steps = 3; // FIXME? current step + roundtrip + } + void _start() { + reset_timer(0); + quorum.clear(); + } + void _defer_to(int who) { + ceph_assert (!rank_deleted); + parent->defer_to(rank, who, logic.get_epoch()); + reset_timer(0); // wtf does changing this 0->1 cause breakage? + } + void message_victory(const std::set<int>& members) { + ceph_assert (!rank_deleted); + for (auto i : members) { + if (i == rank) continue; + parent->claim_victory(rank, i, logic.get_epoch(), members); + } + start_victory_timer(); + quorum = members; + victory_accepters = 1; + } + bool is_current_member(int r) const { return quorum.count(r) != 0; } + void receive_propose(int from, epoch_t e, ConnectionTracker *oct) { + if (rank_deleted) return; + logic.receive_propose(from, e, oct); + delete oct; + } + void receive_ack(int from, epoch_t e) { + if (rank_deleted) return; + if (e < logic.get_epoch()) + return; + logic.receive_ack(from, e); + } + void receive_victory_claim(int from, epoch_t e, const set<int>& members) { + if (rank_deleted) return; + if (e < logic.get_epoch()) + return; + if (logic.receive_victory_claim(from, e)) { + quorum = members; + cancel_timer(); + parent->accept_victory(rank, from, e); + } + } + void receive_victory_ack(int from, epoch_t e) { + if (rank_deleted) return; + if (e < logic.get_epoch()) + return; + ++victory_accepters; + if (victory_accepters == static_cast<int>(quorum.size())) { + cancel_timer(); + parent->report_quorum(quorum); + } + } + void receive_scores(bufferlist bl) { + ConnectionTracker oct(bl, g_ceph_context); + peer_tracker.receive_peer_report(oct); + ldout(g_ceph_context, 10) << "received scores " << oct << dendl; + } + void receive_ping(int from_rank, bufferlist bl) { + ldout(g_ceph_context, 6) << "receive ping from " << from_rank << dendl; + peer_tracker.report_live_connection(from_rank, parent->ping_interval); + receive_scores(bl); + } + void receive_ping_timeout(int from_rank) { + ldout(g_ceph_context, 6) << "timeout ping from " << from_rank << dendl; + peer_tracker.report_dead_connection(from_rank, parent->ping_interval); + } + void election_timeout() { + ldout(g_ceph_context, 2) << "election epoch " << logic.get_epoch() + << " timed out for " << rank + << ", electing me:" << logic.electing_me + << ", acked_me:" << logic.acked_me << dendl; + ceph_assert (!rank_deleted); + logic.end_election_period(); + } + void victory_timeout() { + ldout(g_ceph_context, 2) << "victory epoch " << logic.get_epoch() + << " timed out for " << rank + << ", electing me:" << logic.electing_me + << ", acked_me:" << logic.acked_me << dendl; + ceph_assert (!rank_deleted); + reset_election(); + } + void encode_scores(bufferlist& bl) { + encode(peer_tracker, bl); + } + void send_pings() { + ceph_assert (!rank_deleted); + if (!parent->ping_interval || + parent->timesteps_run % parent->ping_interval != 0) { + return; + } + + bufferlist bl; + encode_scores(bl); + for (int i = 0; i < parent->get_paxos_size(); ++i) { + if (i == rank) + continue; + Owner *o = parent->electors[i]; + parent->queue_stable_or_timeout(rank, i, + [o, r=rank, bl] { o->receive_ping(r, bl); }, + [o, r=rank] { o->receive_ping_timeout(r); } + ); + } + } + void notify_timestep() { + ceph_assert (!rank_deleted); + assert(timer_steps != 0); + if (timer_steps > 0) { + --timer_steps; + } + if (timer_steps == 0) { + if (timer_election) { + election_timeout(); + } else { + victory_timeout(); + } + } + send_pings(); + } + const char *prefix_name() const { + return prefix_str.c_str(); + } + int timestep_count() const { return parent->timesteps_run; } +}; + +Election::Election(int c, ElectionLogic::election_strategy es, int pingi, + double tracker_halflife) : count(c), election_strategy(es), ping_interval(pingi), + pending_election_messages(0), timesteps_run(0), last_quorum_change(0), last_quorum_formed(-1) +{ + for (int i = 0; i < count; ++i) { + electors[i] = new Owner(i, election_strategy, tracker_halflife, this); + } +} + +Election::~Election() +{ + { + for (auto i : electors) { + delete i.second; + } + } +} + +void Election::queue_stable_message(int from, int to, function<void()> m) +{ + if (!blocked_messages[from].count(to)) { + messages.push_back(m); + } +} + +void Election::queue_election_message(int from, int to, function<void()> m) +{ + if (last_quorum_reported.count(from)) { + last_quorum_change = timesteps_run; + last_quorum_reported.clear(); + last_leader = -1; + } + if (!blocked_messages[from].count(to)) { + bufferlist bl; + electors[from]->encode_scores(bl); + Owner *o = electors[to]; + messages.push_back([this,m,o,bl] { + --this->pending_election_messages; + o->receive_scores(bl); + m(); + }); + ++pending_election_messages; + } +} + +void Election::queue_timeout_message(int from, int to, function<void()> m) +{ + ceph_assert(blocked_messages[from].count(to)); + messages.push_back(m); +} + +void Election::queue_stable_or_timeout(int from, int to, + function<void()> m, function<void()> t) +{ + if (blocked_messages[from].count(to)) { + queue_timeout_message(from, to, t); + } else { + queue_stable_message(from, to, m); + } +} + +void Election::defer_to(int from, int to, epoch_t e) +{ + Owner *o = electors[to]; + queue_election_message(from, to, [o, from, e] { + o->receive_ack(from, e); + }); +} + +void Election::propose_to(int from, int to, epoch_t e, bufferlist& cbl) +{ + Owner *o = electors[to]; + ConnectionTracker *oct = NULL; + if (cbl.length()) { + oct = new ConnectionTracker(cbl, g_ceph_context); // we leak these on blocked cons, meh + } + queue_election_message(from, to, [o, from, e, oct] { + o->receive_propose(from, e, oct); + }); +} + +void Election::claim_victory(int from, int to, epoch_t e, const set<int>& members) +{ + Owner *o = electors[to]; + queue_election_message(from, to, [o, from, e, members] { + o->receive_victory_claim(from, e, members); + }); +} + +void Election::accept_victory(int from, int to, epoch_t e) +{ + Owner *o = electors[to]; + queue_election_message(from, to, [o, from, e] { + o->receive_victory_ack(from, e); + }); +} + +void Election::report_quorum(const set<int>& quorum) +{ + for (int i : quorum) { + electors[i]->ever_joined = true; + } + last_quorum_formed = last_quorum_change = timesteps_run; + last_quorum_reported = quorum; + last_leader = electors[*(quorum.begin())]->logic.get_election_winner(); +} + +int Election::run_timesteps(int max) +{ + vector< function<void()> > current_m; + int steps = 0; + for (; (!max || steps < max) && // we have timesteps left AND ONE OF + (pending_election_messages || // there are messages pending. + !election_stable()); // somebody's not happy and will act in future + ++steps) { + current_m.clear(); + current_m.swap(messages); + ++timesteps_run; + for (auto& m : current_m) { + m(); + } + for (auto o : electors) { + o.second->notify_timestep(); + } + } + + return steps; +} + +void Election::start_one(int who) +{ + assert(who < static_cast<int>(electors.size())); + electors[who]->logic.start(); +} + +void Election::start_all() { + for (auto e : electors) { + e.second->logic.start(); + } +} + +bool Election::election_stable() const +{ + // see if anybody has a timer running + for (auto i : electors) { + if (i.second->timer_steps != -1) { + ldout(g_ceph_context, 30) << "rank " << i.first << " has timer value " << i.second->timer_steps << dendl; + return false; + } + } + return (pending_election_messages == 0); +} + +bool Election::quorum_stable(int timesteps_stable) const +{ + ldout(g_ceph_context, 1) << "quorum_stable? last formed:" << last_quorum_formed + << ", last changed " << last_quorum_change + << ", last reported members " << last_quorum_reported << dendl; + if (last_quorum_reported.empty()) { + return false; + } + if (last_quorum_formed < last_quorum_change) { + return false; + } + for (auto i : last_quorum_reported) { + if (electors.find(i)->second->timer_steps != -1) { + return false; + } + } + if (timesteps_run - timesteps_stable > last_quorum_change) + return true; + return election_stable(); +} + +bool Election::all_agree_on_leader() const +{ + int leader = electors.find(0)->second->logic.get_election_winner(); + ldout(g_ceph_context, 10) << "all_agree_on_leader on " << leader << dendl; + for (auto& i: electors) { + if (leader != i.second->logic.get_election_winner()) { + ldout(g_ceph_context, 10) << "rank " << i.first << " has different leader " + << i.second->logic.get_election_winner() << dendl; + return false; + } + } + if (disallowed_leaders.count(leader)) { + ldout(g_ceph_context, 10) << "that leader is disallowed! member of " + << disallowed_leaders << dendl; + return false; + } + return true; +} + +bool Election::check_epoch_agreement() const +{ + epoch_t epoch = electors.find(0)->second->logic.get_epoch(); + for (auto& i : electors) { + if (epoch != i.second->logic.get_epoch()) { + return false; + } + } + return true; +} + +void Election::block_messages(int from, int to) +{ + blocked_messages[from].insert(to); +} +void Election::block_bidirectional_messages(int a, int b) +{ + block_messages(a, b); + block_messages(b, a); +} +void Election::unblock_messages(int from, int to) +{ + blocked_messages[from].erase(to); +} +void Election::unblock_bidirectional_messages(int a, int b) +{ + unblock_messages(a, b); + unblock_messages(b, a); +} + +void Election::remove_elector(int rank) +{ + for (auto ei = electors.begin(); ei != electors.end(); ) { + if (ei->first == rank) { + ei->second->notify_deleted(); + electors.erase(ei++); + continue; + } + ei->second->notify_rank_removed(rank); + if (ei->first > rank) { + electors[ei->first - 1] = ei->second; + electors.erase(ei++); + continue; + } + ++ei; + } + for (auto bi = blocked_messages.begin(); bi != blocked_messages.end(); ) { + if (bi->first == rank) { + blocked_messages.erase(bi++); + continue; + } + bi->second.erase(rank); + for (auto i = bi->second.upper_bound(rank); + i != bi->second.end();) { + bi->second.insert(*i - 1); + bi->second.erase(*(i++)); + } + ++bi; + } + --count; +} + +void single_startup_election_completes(ElectionLogic::election_strategy strategy) +{ + for (int starter = 0; starter < 5; ++starter) { + Election election(5, strategy); + election.start_one(starter); + // This test is not actually legit since you should start + // all the ElectionLogics, but it seems to work + int steps = election.run_timesteps(0); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + } +} + +void everybody_starts_completes(ElectionLogic::election_strategy strategy) +{ + Election election(5, strategy); + election.start_all(); + int steps = election.run_timesteps(0); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +void blocked_connection_continues_election(ElectionLogic::election_strategy strategy) +{ + Election election(5, strategy); + election.block_bidirectional_messages(0, 1); + election.start_all(); + int steps = election.run_timesteps(100); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + // This is a failure mode! + ASSERT_FALSE(election.election_stable()); + ASSERT_FALSE(election.quorum_stable(6)); // double the timer_steps we use + election.unblock_bidirectional_messages(0, 1); + steps = election.run_timesteps(100); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +void blocked_connection_converges_election(ElectionLogic::election_strategy strategy) +{ + Election election(5, strategy); + election.block_bidirectional_messages(0, 1); + election.start_all(); + int steps = election.run_timesteps(100); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + election.unblock_bidirectional_messages(0, 1); + steps = election.run_timesteps(100); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +void disallowed_doesnt_win(ElectionLogic::election_strategy strategy) +{ + int MON_COUNT = 5; + for (int i = 0; i < MON_COUNT - 1; ++i) { + Election election(MON_COUNT, strategy); + for (int j = 0; j <= i; ++j) { + election.add_disallowed_leader(j); + } + election.start_all(); + int steps = election.run_timesteps(0); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + int leader = election.electors[0]->logic.get_election_winner(); + for (int j = 0; j <= i; ++j) { + ASSERT_NE(j, leader); + } + } + for (int i = MON_COUNT - 1; i > 0; --i) { + Election election(MON_COUNT, strategy); + for (int j = i; j <= MON_COUNT - 1; ++j) { + election.add_disallowed_leader(j); + } + election.start_all(); + int steps = election.run_timesteps(0); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + int leader = election.electors[0]->logic.get_election_winner(); + for (int j = i; j < MON_COUNT; ++j) { + ASSERT_NE(j, leader); + } + } +} + +void converges_after_flapping(ElectionLogic::election_strategy strategy) +{ + Election election(5, strategy); + auto block_cons = [&] { + auto& e = election; + // leave 4 connected to both sides so it will trigger but not trivially win + e.block_bidirectional_messages(0, 2); + e.block_bidirectional_messages(0, 3); + e.block_bidirectional_messages(1, 2); + e.block_bidirectional_messages(1, 3); + }; + auto unblock_cons = [&] { + auto& e = election; + e.unblock_bidirectional_messages(0, 2); + e.unblock_bidirectional_messages(0, 3); + e.unblock_bidirectional_messages(1, 2); + e.unblock_bidirectional_messages(1, 3); + }; + block_cons(); + election.start_all(); + for (int i = 0; i < 5; ++i) { + election.run_timesteps(5); + unblock_cons(); + election.run_timesteps(5); + block_cons(); + } + unblock_cons(); + election.run_timesteps(100); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +void converges_while_flapping(ElectionLogic::election_strategy strategy) +{ + Election election(5, strategy); + auto block_cons = [&] { + auto& e = election; + // leave 4 connected to both sides so it will trigger but not trivially win + e.block_bidirectional_messages(0, 2); + e.block_bidirectional_messages(0, 3); + e.block_bidirectional_messages(1, 2); + e.block_bidirectional_messages(1, 3); + }; + auto unblock_cons = [&] { + auto& e = election; + e.unblock_bidirectional_messages(0, 2); + e.unblock_bidirectional_messages(0, 3); + e.unblock_bidirectional_messages(1, 2); + e.unblock_bidirectional_messages(1, 3); + }; + block_cons(); + election.start_all(); + for (int i = 0; i < 5; ++i) { + election.run_timesteps(10); + ASSERT_TRUE(election.quorum_stable(6)); + unblock_cons(); + election.run_timesteps(5); + block_cons(); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + } + unblock_cons(); + election.run_timesteps(100); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +void netsplit_with_disallowed_tiebreaker_converges(ElectionLogic::election_strategy strategy) +{ + Election election(5, strategy); + election.add_disallowed_leader(4); + auto netsplit = [&] { + auto& e = election; + e.block_bidirectional_messages(0, 2); + e.block_bidirectional_messages(0, 3); + e.block_bidirectional_messages(1, 2); + e.block_bidirectional_messages(1, 3); + }; + auto unsplit = [&] { + auto& e = election; + e.unblock_bidirectional_messages(0, 2); + e.unblock_bidirectional_messages(0, 3); + e.unblock_bidirectional_messages(1, 2); + e.unblock_bidirectional_messages(1, 3); + }; + // hmm, we don't have timeouts to call elections automatically yet + auto call_elections = [&] { + for (auto i : election.electors) { + i.second->trigger_new_election(); + } + }; + // turn everybody on, run happy for a while + election.start_all(); + election.run_timesteps(0); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + int starting_leader = election.last_leader; + // do some netsplits, but leave disallowed tiebreaker alive + for (int i = 0; i < 5; ++i) { + netsplit(); + call_elections(); + election.run_timesteps(15); // tests fail when I run 10 because 0 and 1 time out on same timestamp for some reason, why? + // this ASSERT_EQ only holds while we bias for ranks + ASSERT_EQ(starting_leader, election.last_leader); + ASSERT_TRUE(election.quorum_stable(6)); + ASSERT_FALSE(election.election_stable()); + unsplit(); + call_elections(); + election.run_timesteps(10); + ASSERT_EQ(starting_leader, election.last_leader); + ASSERT_TRUE(election.quorum_stable(6)); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + } + + // now disconnect the tiebreaker and make sure nobody can win + int presplit_quorum_time = election.last_quorum_formed; + netsplit(); + election.block_bidirectional_messages(4, 0); + election.block_bidirectional_messages(4, 1); + election.block_bidirectional_messages(4, 2); + election.block_bidirectional_messages(4, 3); + call_elections(); + election.run_timesteps(100); + ASSERT_EQ(election.last_quorum_formed, presplit_quorum_time); + + // now let in the previously-losing side + election.unblock_bidirectional_messages(4, 2); + election.unblock_bidirectional_messages(4, 3); + call_elections(); + election.run_timesteps(100); + ASSERT_TRUE(election.quorum_stable(50)); + ASSERT_FALSE(election.election_stable()); + + // now reconnect everybody + unsplit(); + election.unblock_bidirectional_messages(4, 0); + election.unblock_bidirectional_messages(4, 1); + call_elections(); + election.run_timesteps(100); + ASSERT_TRUE(election.quorum_stable(50)); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +void handles_singly_connected_peon(ElectionLogic::election_strategy strategy) +{ + Election election(5, strategy); + election.block_bidirectional_messages(0, 1); + election.block_bidirectional_messages(0, 2); + election.block_bidirectional_messages(0, 3); + election.block_bidirectional_messages(0, 4); + + election.start_all(); + election.run_timesteps(20); + ASSERT_TRUE(election.quorum_stable(5)); + ASSERT_FALSE(election.election_stable()); + + election.unblock_bidirectional_messages(0, 1); + election.run_timesteps(100); + ASSERT_TRUE(election.quorum_stable(50)); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + + election.block_bidirectional_messages(0, 1); + election.unblock_bidirectional_messages(0, 4); + for (auto i : election.electors) { + i.second->trigger_new_election(); + } + election.run_timesteps(15); + ASSERT_TRUE(election.quorum_stable(50)); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +ConnectionReport *get_connection_reports(ConnectionTracker& ct) { + return &ct.my_reports; +} +map<int,ConnectionReport> *get_peer_reports(ConnectionTracker& ct) { + return &ct.peer_reports; +} +void handles_outdated_scoring(ElectionLogic::election_strategy strategy) +{ + Election election(3, strategy, 5); // ping every 5 timesteps so they start elections before settling scores! + + // start everybody up and run for a bit + election.start_all(); + election.run_timesteps(20); + ASSERT_TRUE(election.quorum_stable(5)); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + + // now mess up the scores to disagree + ConnectionTracker& ct0 = election.electors[0]->peer_tracker; + ConnectionReport& cr0 = *get_connection_reports(ct0); + cr0.history[1] = 0.5; + cr0.history[2] = 0.5; + ct0.increase_version(); + ConnectionTracker& ct1 = election.electors[1]->peer_tracker; + ConnectionReport& cr1 = *get_connection_reports(ct1); + cr1.history[0] = 0.5; + cr1.history[2] = 0.5; + ct1.increase_version(); + ConnectionTracker& ct2 = election.electors[2]->peer_tracker; + ConnectionReport& cr2 = *get_connection_reports(ct2); + cr2.history[0] = 0.5; + map<int,ConnectionReport>&cp2 = *get_peer_reports(ct2); + cp2[0].history[2] = 0; + cp2[1].history[2] = 0; + ct2.increase_version(); + election.ping_interval = 0; // disable pinging to update the scores + ldout(g_ceph_context, 5) << "mangled the scores to be different" << dendl; + + election.start_all(); + election.run_timesteps(50); + ASSERT_TRUE(election.quorum_stable(30)); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +void handles_disagreeing_connectivity(ElectionLogic::election_strategy strategy) +{ + Election election(5, strategy, 5); // ping every 5 timesteps so they start elections before settling scores! + + // start everybody up and run for a bit + election.start_all(); + election.run_timesteps(20); + ASSERT_TRUE(election.quorum_stable(5)); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + + // block all the connections + for (int i = 0; i < 5; ++i) { + for (int j = i+1; j < 5; ++j) { + election.block_bidirectional_messages(i, j); + } + } + + // now start them electing, which will obviously fail + election.start_all(); + election.run_timesteps(50); // let them all demote scores of their peers + ASSERT_FALSE(election.quorum_stable(10)); + ASSERT_FALSE(election.election_stable()); + + // now reconnect them, at which point they should start running an election before exchanging scores + for (int i = 0; i < 5; ++i) { + for (int j = i+1; j < 5; ++j) { + election.unblock_bidirectional_messages(i, j); + } + } + election.run_timesteps(100); + + // these will pass if the nodes managed to converge on scores, but I expect failure + ASSERT_TRUE(election.quorum_stable(5)); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); +} + +void handles_removing_ranks(ElectionLogic::election_strategy strategy) +{ + ceph_assert(strategy == ElectionLogic::CONNECTIVITY); + for (int deletee = 0; deletee < 5; ++deletee) { + Election election(5, strategy); + election.start_all(); + int steps = election.run_timesteps(0); + ldout(g_ceph_context, 10) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + election.remove_elector(deletee); + ldout(g_ceph_context, 1) << "removed rank " << deletee << " from set" << dendl; + election.start_all(); + steps = election.run_timesteps(0); + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + } + { + Election election(7, strategy); + for (int i = 0; i < (7 - 3); ++i) { + election.start_all(); + election.remove_elector(0); + int steps = election.run_timesteps(0); + ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl; + ASSERT_TRUE(election.election_stable()); + ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use + ASSERT_TRUE(election.all_agree_on_leader()); + ASSERT_TRUE(election.check_epoch_agreement()); + } + } +} + +// TODO: write a test with more complicated connectivity graphs and make sure +// they are stable with multiple disconnected ranks pinging peons + +// TODO: Write a test that disallowing and disconnecting 0 is otherwise stable? + +// TODO: figure out how to test for bumping election epochs with changing scores, +// a la what happened in run +// http://pulpito.ceph.com/gregf-2019-11-26_10:50:50-rados:monthrash-wip-elector-distro-basic-mira/ + +#define test_classic(utest) TEST(classic, utest) { utest(ElectionLogic::CLASSIC); } + +#define test_disallowed(utest) TEST(disallowed, utest) { utest(ElectionLogic::DISALLOW); } + +#define test_connectivity(utest) TEST(connectivity, utest) { utest(ElectionLogic::CONNECTIVITY); } + + +// TODO: test for expected failures; gtest probably supports that? +test_classic(single_startup_election_completes) +test_classic(everybody_starts_completes) +test_classic(blocked_connection_continues_election) +test_classic(converges_after_flapping) + +test_disallowed(single_startup_election_completes) +test_disallowed(everybody_starts_completes) +test_disallowed(blocked_connection_continues_election) +test_disallowed(disallowed_doesnt_win) +test_disallowed(converges_after_flapping) + +/* skip single_startup_election_completes because we crash + on init conditions. That's fine since as noted above it's not + quite following the rules anyway. */ +test_connectivity(everybody_starts_completes) +test_connectivity(blocked_connection_converges_election) +test_connectivity(disallowed_doesnt_win) +test_connectivity(converges_after_flapping) +test_connectivity(converges_while_flapping) +test_connectivity(netsplit_with_disallowed_tiebreaker_converges) +test_connectivity(handles_singly_connected_peon) +test_connectivity(handles_disagreeing_connectivity) +test_connectivity(handles_outdated_scoring) +test_connectivity(handles_removing_ranks) diff --git a/src/test/mon/test_log_rss_usage.cc b/src/test/mon/test_log_rss_usage.cc new file mode 100644 index 000000000..f6e85f414 --- /dev/null +++ b/src/test/mon/test_log_rss_usage.cc @@ -0,0 +1,101 @@ +#include <sys/types.h> +#include <dirent.h> +#include <errno.h> +#include <vector> +#include <string> +#include <iostream> +#include <fstream> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> + +using namespace std; + +int getPidByName(string procName) +{ + int pid = -1; + + // Open the /proc directory + DIR *dp = opendir("/proc"); + if (dp != NULL) + { + // Enumerate all entries in '/proc' until process is found + struct dirent *dirp; + while (pid < 0 && (dirp = readdir(dp))) + { + // Skip non-numeric entries + int id = atoi(dirp->d_name); + if (id > 0) + { + // Read contents of virtual /proc/{pid}/cmdline file + string cmdPath = string("/proc/") + dirp->d_name + "/cmdline"; + ifstream cmdFile(cmdPath.c_str()); + string cmdLine; + getline(cmdFile, cmdLine); + if (!cmdLine.empty()) + { + // Keep first cmdline item which contains the program path + size_t pos = cmdLine.find('\0'); + if (pos != string::npos) { + cmdLine = cmdLine.substr(0, pos); + } + // Get program name only, removing the path + pos = cmdLine.rfind('/'); + if (pos != string::npos) { + cmdLine = cmdLine.substr(pos + 1); + } + // Compare against requested process name + if (procName == cmdLine) { + pid = id; + } + } + } + } + } + + closedir(dp); + + return pid; +} + +uint64_t getRssUsage(string pid) +{ + int totalSize = 0; + int resSize = 0; + + string statmPath = string("/proc/") + pid + "/statm"; + ifstream buffer(statmPath); + buffer >> totalSize >> resSize; + buffer.close(); + + long page_size = sysconf(_SC_PAGE_SIZE); + uint64_t rss = resSize * page_size; + + return rss; +} + +int main(int argc, char* argv[]) +{ + if (argc != 2) { + cout << "Syntax: " + << "ceph_test_log_rss_usage <process name>" + << endl; + exit(EINVAL); + } + uint64_t rss = 0; + int pid = getPidByName(argv[1]); + string rssUsage; + + // Use the pid to get RSS memory usage + // and print it to stdout + if (pid != -1) { + rss = getRssUsage(to_string(pid)); + } else { + cout << "Process " << argv[1] << " NOT FOUND!\n" << endl; + exit(ESRCH); + } + + rssUsage = to_string(rss) + ":" + to_string(pid) + ":"; + cout << rssUsage.c_str() << endl; + return 0; +} diff --git a/src/test/mon/test_mon_memory_target.cc b/src/test/mon/test_mon_memory_target.cc new file mode 100644 index 000000000..e8f975b47 --- /dev/null +++ b/src/test/mon/test_mon_memory_target.cc @@ -0,0 +1,79 @@ +#include <algorithm> +#include <cmath> +#include <iostream> +#include <string> +#include <numeric> +#include <regex> +#include <system_error> + +#include <boost/process.hpp> +#include <boost/tokenizer.hpp> + +namespace bp = boost::process; +using namespace std; + +int main(int argc, char** argv) +{ + cout << "Mon Memory Target Test" << endl; + + if (argc != 2) { + cout << "Syntax: " + << "ceph_test_mon_memory_target <mon-memory-target-bytes>" + << endl; + exit(EINVAL); + } + + string target_directory("/var/log/ceph/"); + unsigned long maxallowed = stoul(argv[1], nullptr, 10); + regex reg(R"(cache_size:(\d*)\s)"); + + string grep_command("grep _set_new_cache_sizes " + target_directory + + "ceph-mon.a.log"); + bp::ipstream is; + error_code ec; + bp::child grep(grep_command, bp::std_out > is, ec); + if (ec) { + cout << "Error grepping logs! Exiting" << endl; + cout << "Error: " << ec.value() << " " << ec.message() << endl; + exit(ec.value()); + } + + string line; + vector<unsigned long> results; + while (grep.running() && getline(is, line) && !line.empty()) { + smatch match; + if (regex_search(line, match, reg)) { + results.push_back(stoul(match[1].str())); + } + } + + if (results.empty()) { + cout << "Error: No grep results found!" << endl; + exit(ENOENT); + } + + auto maxe = *(max_element(results.begin(), results.end())); + cout << "Results for mon_memory_target:" << endl; + cout << "Max: " << maxe << endl; + cout << "Min: " << *(min_element(results.begin(), results.end())) << endl; + auto sum = accumulate(results.begin(), results.end(), + static_cast<unsigned long long>(0)); + auto mean = sum / results.size(); + cout << "Mean average: " << mean << endl; + vector<unsigned long> diff(results.size()); + transform(results.begin(), results.end(), diff.begin(), + [mean](unsigned long x) { return x - mean; }); + auto sump = inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); + auto stdev = sqrt(sump / results.size()); + cout << "Standard deviation: " << stdev << endl; + + if (maxe > maxallowed) { + cout << "Error: Mon memory consumption exceeds maximum allowed!" << endl; + exit(ENOMEM); + } + + grep.wait(); + + cout << "Completed successfully" << endl; + return 0; +} diff --git a/src/test/mon/test_mon_rss_usage.cc b/src/test/mon/test_mon_rss_usage.cc new file mode 100644 index 000000000..76b5856f6 --- /dev/null +++ b/src/test/mon/test_mon_rss_usage.cc @@ -0,0 +1,72 @@ +#include <algorithm> +#include <iostream> +#include <fstream> +#include <string> +#include <numeric> +#include <regex> +#include <cmath> +#include <system_error> + +using namespace std; + +int main(int argc, char **argv) +{ + cout << "Mon RSS Usage Test" << endl; + + if (argc != 2) { + cout << "Syntax: " + << "ceph_test_mon_rss_usage <mon-memory-target-bytes>" + << endl; + exit(EINVAL); + } + + unsigned long maxallowed = stoul(argv[1], nullptr, 10); + // Set max allowed RSS usage to be 125% of mon-memory-target + maxallowed *= 1.25; + + string target_directory("/var/log/ceph/"); + string filePath = target_directory + "ceph-mon-rss-usage.log"; + ifstream buffer(filePath.c_str()); + string line; + vector<unsigned long> results; + while(getline(buffer, line) && !line.empty()) { + string rssUsage; + size_t pos = line.find(':'); + if (pos != string::npos) { + rssUsage = line.substr(0, pos); + } + if (!rssUsage.empty()) { + results.push_back(stoul(rssUsage)); + } + } + + buffer.close(); + if (results.empty()) { + cout << "Error: No grep results found!" << endl; + exit(ENOENT); + } + + auto maxe = *(max_element(results.begin(), results.end())); + cout << "Stats for mon RSS Memory Usage:" << endl; + cout << "Parsed " << results.size() << " entries." << endl; + cout << "Max: " << maxe << endl; + cout << "Min: " << *(min_element(results.begin(), results.end())) << endl; + auto sum = accumulate(results.begin(), results.end(), + static_cast<unsigned long long>(0)); + auto mean = sum / results.size(); + cout << "Mean average: " << mean << endl; + vector<unsigned long> diff(results.size()); + transform(results.begin(), results.end(), diff.begin(), + [mean](unsigned long x) { return x - mean; }); + auto sump = inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); + auto stdev = sqrt(sump / results.size()); + cout << fixed << "Standard deviation: " << stdev << endl; + + if (maxe > maxallowed) { + cout << "Error: Mon RSS memory usage exceeds maximum allowed!" << endl; + exit(ENOMEM); + } + + cout << "Completed successfully" << endl; + return 0; +} diff --git a/src/test/mon/test_mon_types.cc b/src/test/mon/test_mon_types.cc new file mode 100644 index 000000000..e9997f14f --- /dev/null +++ b/src/test/mon/test_mon_types.cc @@ -0,0 +1,140 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2012 Inktank + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include <iostream> +#include "mon/mon_types.h" + +#include "gtest/gtest.h" + +TEST(mon_features, supported_v_persistent) { + + mon_feature_t supported = ceph::features::mon::get_supported(); + mon_feature_t persistent = ceph::features::mon::get_persistent(); + + ASSERT_EQ(supported.intersection(persistent), persistent); + ASSERT_TRUE(supported.contains_all(persistent)); + + mon_feature_t diff = supported.diff(persistent); + ASSERT_TRUE((persistent | diff) == supported); + ASSERT_TRUE((supported & persistent) == persistent); +} + +TEST(mon_features, binary_ops) { + + mon_feature_t FEATURE_NONE(0ULL); + mon_feature_t FEATURE_A((1ULL << 1)); + mon_feature_t FEATURE_B((1ULL << 2)); + mon_feature_t FEATURE_C((1ULL << 3)); + mon_feature_t FEATURE_D((1ULL << 4)); + + mon_feature_t FEATURE_ALL( + FEATURE_A | FEATURE_B | + FEATURE_C | FEATURE_D + ); + + mon_feature_t foo(FEATURE_A|FEATURE_B); + mon_feature_t bar(FEATURE_C|FEATURE_D); + + ASSERT_EQ(FEATURE_A|FEATURE_B, foo); + ASSERT_EQ(FEATURE_C|FEATURE_D, bar); + + ASSERT_NE(FEATURE_C, foo); + ASSERT_NE(FEATURE_B, bar); + ASSERT_NE(FEATURE_NONE, foo); + ASSERT_NE(FEATURE_NONE, bar); + + ASSERT_FALSE(foo.empty()); + ASSERT_FALSE(bar.empty()); + ASSERT_TRUE(FEATURE_NONE.empty()); + + ASSERT_EQ(FEATURE_ALL, (foo ^ bar)); + ASSERT_EQ(FEATURE_NONE, (foo & bar)); + + mon_feature_t baz = foo; + ASSERT_EQ(baz, foo); + + baz |= bar; + ASSERT_EQ(FEATURE_ALL, baz); + baz ^= foo; + ASSERT_EQ(baz, bar); + + baz |= FEATURE_A; + ASSERT_EQ(FEATURE_C, baz & FEATURE_C); + ASSERT_EQ((FEATURE_A|FEATURE_D), baz & (FEATURE_A|FEATURE_D)); + ASSERT_EQ(FEATURE_B|FEATURE_C|FEATURE_D, (baz ^ foo)); +} + +TEST(mon_features, set_funcs) { + + mon_feature_t FEATURE_A((1ULL << 1)); + mon_feature_t FEATURE_B((1ULL << 2)); + mon_feature_t FEATURE_C((1ULL << 3)); + mon_feature_t FEATURE_D((1ULL << 4)); + + mon_feature_t FEATURE_ALL( + FEATURE_A | FEATURE_B | + FEATURE_C | FEATURE_D + ); + + mon_feature_t foo(FEATURE_A|FEATURE_B); + mon_feature_t bar(FEATURE_C|FEATURE_D); + + ASSERT_TRUE(FEATURE_ALL.contains_all(foo)); + ASSERT_TRUE(FEATURE_ALL.contains_all(bar)); + ASSERT_TRUE(FEATURE_ALL.contains_all(foo|bar)); + + ASSERT_EQ(foo.diff(bar), foo); + ASSERT_EQ(bar.diff(foo), bar); + ASSERT_EQ(FEATURE_ALL.diff(foo), bar); + ASSERT_EQ(FEATURE_ALL.diff(bar), foo); + + ASSERT_TRUE(foo.contains_any(FEATURE_A|bar)); + ASSERT_TRUE(bar.contains_any(FEATURE_ALL)); + ASSERT_TRUE(FEATURE_ALL.contains_any(foo)); + + mon_feature_t FEATURE_X((1ULL << 10)); + + ASSERT_FALSE(FEATURE_ALL.contains_any(FEATURE_X)); + ASSERT_FALSE(FEATURE_ALL.contains_all(FEATURE_X)); + ASSERT_EQ(FEATURE_ALL.diff(FEATURE_X), FEATURE_ALL); + + ASSERT_EQ(foo.intersection(FEATURE_ALL), foo); + ASSERT_EQ(bar.intersection(FEATURE_ALL), bar); +} + +TEST(mon_features, set_unset) { + + mon_feature_t FEATURE_A((1ULL << 1)); + mon_feature_t FEATURE_B((1ULL << 2)); + mon_feature_t FEATURE_C((1ULL << 3)); + + mon_feature_t foo; + ASSERT_EQ(ceph::features::mon::FEATURE_NONE, foo); + + foo.set_feature(FEATURE_A); + ASSERT_EQ(FEATURE_A, foo); + ASSERT_TRUE(foo.contains_all(FEATURE_A)); + + foo.set_feature(FEATURE_B|FEATURE_C); + ASSERT_EQ((FEATURE_A|FEATURE_B|FEATURE_C), foo); + ASSERT_TRUE(foo.contains_all((FEATURE_A|FEATURE_B|FEATURE_C))); + + foo.unset_feature(FEATURE_A); + ASSERT_EQ((FEATURE_B|FEATURE_C), foo); + ASSERT_FALSE(foo.contains_any(FEATURE_A)); + ASSERT_TRUE(foo.contains_all((FEATURE_B|FEATURE_C))); + + foo.unset_feature(FEATURE_B|FEATURE_C); + ASSERT_EQ(ceph::features::mon::FEATURE_NONE, foo); + ASSERT_FALSE(foo.contains_any(FEATURE_A|FEATURE_B|FEATURE_C)); +} diff --git a/src/test/mon/test_mon_workloadgen.cc b/src/test/mon/test_mon_workloadgen.cc new file mode 100644 index 000000000..147ea8bcd --- /dev/null +++ b/src/test/mon/test_mon_workloadgen.cc @@ -0,0 +1,1104 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include "acconfig.h" + +#ifdef HAVE_SYS_MOUNT_H +#include <sys/mount.h> +#endif + +#ifdef HAVE_SYS_PARAM_H +#include <sys/param.h> +#endif + +#ifdef HAVE_SYS_VFS_H +#include <sys/vfs.h> +#endif + +#include <iostream> +#include <string> +#include <map> + +#include <boost/scoped_ptr.hpp> +#include <boost/random/mersenne_twister.hpp> +#include <boost/random/uniform_int.hpp> + + +#include "osd/osd_types.h" +#include "osdc/Objecter.h" +#include "mon/MonClient.h" +#include "msg/Dispatcher.h" +#include "msg/Messenger.h" +#include "common/async/context_pool.h" +#include "common/Timer.h" +#include "common/ceph_argparse.h" +#include "global/global_init.h" +#include "global/signal_handler.h" +#include "common/config.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/ceph_mutex.h" +#include "common/strtol.h" +#include "common/LogEntry.h" +#include "auth/KeyRing.h" +#include "auth/AuthAuthorizeHandler.h" +#include "include/uuid.h" +#include "include/ceph_assert.h" + +#include "messages/MOSDBoot.h" +#include "messages/MOSDAlive.h" +#include "messages/MOSDPGCreate.h" +#include "messages/MOSDPGRemove.h" +#include "messages/MOSDMap.h" +#include "messages/MPGStats.h" +#include "messages/MLog.h" +#include "messages/MOSDPGTemp.h" + +using namespace std; + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_ +#undef dout_prefix +#define dout_prefix _prefix(_dout, get_name()) +static ostream& _prefix(std::ostream *_dout, const string &n) { + return *_dout << " stub(" << n << ") "; +} + + +typedef boost::mt11213b rngen_t; +typedef boost::scoped_ptr<Messenger> MessengerRef; +typedef boost::scoped_ptr<Objecter> ObjecterRef; + +class TestStub : public Dispatcher +{ + protected: + MessengerRef messenger; + ceph::async::io_context_pool poolctx; + MonClient monc; + + ceph::mutex lock; + ceph::condition_variable cond; + SafeTimer timer; + + bool do_shutdown; + double tick_seconds; + + struct C_Tick : public Context { + TestStub *s; + explicit C_Tick(TestStub *stub) : s(stub) {} + void finish(int r) override { + generic_dout(20) << "C_Tick::" << __func__ << dendl; + if (r == -ECANCELED) { + generic_dout(20) << "C_Tick::" << __func__ + << " shutdown" << dendl; + return; + } + s->tick(); + } + }; + + bool ms_dispatch(Message *m) override = 0; + void ms_handle_connect(Connection *con) override = 0; + void ms_handle_remote_reset(Connection *con) override = 0; + virtual int _shutdown() = 0; + // courtesy method to be implemented by the stubs at their + // own discretion + virtual void _tick() { } + // different stubs may have different needs; if a stub needs + // to tick, then it must call this function. + void start_ticking(double t=1.0) { + tick_seconds = t; + if (t <= 0) { + stop_ticking(); + return; + } + dout(20) << __func__ << " adding tick timer" << dendl; + timer.add_event_after(tick_seconds, new C_Tick(this)); + } + // If we have a function to start ticking that the stubs can + // use at their own discretion, then we should also have a + // function to disable said ticking to be used the same way. + // Just in case. + // For simplicity's sake, we don't cancel the tick right off + // the bat; instead, we wait for the next tick to kick in and + // disable itself. + void stop_ticking() { + dout(20) << __func__ << " disable tick" << dendl; + tick_seconds = 0; + } + + public: + void tick() { + std::cout << __func__ << std::endl; + if (do_shutdown || (tick_seconds <= 0)) { + std::cout << __func__ << " " + << (do_shutdown ? "shutdown" : "stop ticking") + << std::endl; + return; + } + _tick(); + timer.add_event_after(tick_seconds, new C_Tick(this)); + } + + virtual const string get_name() = 0; + virtual int init() = 0; + + virtual int shutdown() { + std::lock_guard l{lock}; + do_shutdown = true; + int r = _shutdown(); + if (r < 0) { + dout(10) << __func__ << " error shutting down: " + << cpp_strerror(-r) << dendl; + return r; + } + monc.shutdown(); + timer.shutdown(); + messenger->shutdown(); + poolctx.finish(); + return 0; + } + + virtual void print(ostream &out) { + out << "stub(" << get_name() << ")"; + } + + void wait() { + if (messenger != NULL) + messenger->wait(); + } + + TestStub(CephContext *cct, string who) + : Dispatcher(cct), + monc(cct, poolctx), + lock(ceph::make_mutex(who.append("::lock"))), + timer(cct, lock), + do_shutdown(false), + tick_seconds(0.0) { } +}; + +class ClientStub : public TestStub +{ + ObjecterRef objecter; + rngen_t gen; + + protected: + bool ms_dispatch(Message *m) override { + std::lock_guard l{lock}; + dout(1) << "client::" << __func__ << " " << *m << dendl; + switch (m->get_type()) { + case CEPH_MSG_OSD_MAP: + objecter->handle_osd_map((MOSDMap*)m); + cond.notify_all(); + break; + } + return true; + } + + void ms_handle_connect(Connection *con) override { + dout(1) << "client::" << __func__ << " " << con << dendl; + std::lock_guard l{lock}; + objecter->ms_handle_connect(con); + } + + void ms_handle_remote_reset(Connection *con) override { + dout(1) << "client::" << __func__ << " " << con << dendl; + std::lock_guard l{lock}; + objecter->ms_handle_remote_reset(con); + } + + bool ms_handle_reset(Connection *con) override { + dout(1) << "client::" << __func__ << dendl; + std::lock_guard l{lock}; + objecter->ms_handle_reset(con); + return false; + } + + bool ms_handle_refused(Connection *con) override { + return false; + } + + const string get_name() override { + return "client"; + } + + int _shutdown() override { + if (objecter) { + objecter->shutdown(); + } + return 0; + } + + public: + explicit ClientStub(CephContext *cct) + : TestStub(cct, "client"), + gen((int) time(NULL)) + { } + + int init() override { + int err; + poolctx.start(1); + err = monc.build_initial_monmap(); + if (err < 0) { + derr << "ClientStub::" << __func__ << " ERROR: build initial monmap: " + << cpp_strerror(err) << dendl; + return err; + } + + messenger.reset(Messenger::create_client_messenger(cct, "stubclient")); + ceph_assert(messenger.get() != NULL); + + messenger->set_default_policy( + Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX)); + dout(10) << "ClientStub::" << __func__ << " starting messenger at " + << messenger->get_myaddrs() << dendl; + + objecter.reset(new Objecter(cct, messenger.get(), &monc, poolctx)); + ceph_assert(objecter.get() != NULL); + objecter->set_balanced_budget(); + + monc.set_messenger(messenger.get()); + objecter->init(); + messenger->add_dispatcher_head(this); + messenger->start(); + monc.set_want_keys(CEPH_ENTITY_TYPE_MON|CEPH_ENTITY_TYPE_OSD); + + err = monc.init(); + if (err < 0) { + derr << "ClientStub::" << __func__ << " monc init error: " + << cpp_strerror(-err) << dendl; + return err; + } + + err = monc.authenticate(); + if (err < 0) { + derr << "ClientStub::" << __func__ << " monc authenticate error: " + << cpp_strerror(-err) << dendl; + monc.shutdown(); + return err; + } + monc.wait_auth_rotating(30.0); + + objecter->set_client_incarnation(0); + objecter->start(); + + lock.lock(); + timer.init(); + monc.renew_subs(); + + lock.unlock(); + + objecter->wait_for_osd_map(); + + dout(10) << "ClientStub::" << __func__ << " done" << dendl; + return 0; + } +}; + +class OSDStub : public TestStub +{ + int whoami; + OSDSuperblock sb; + OSDMap osdmap; + osd_stat_t osd_stat; + + map<pg_t,pg_stat_t> pgs; + set<pg_t> pgs_changes; + + rngen_t gen; + boost::uniform_int<> mon_osd_rng; + + utime_t last_boot_attempt; + static const double STUB_BOOT_INTERVAL; + + + public: + + enum { + STUB_MON_OSD_ALIVE = 1, + STUB_MON_OSD_PGTEMP = 2, + STUB_MON_OSD_FAILURE = 3, + STUB_MON_OSD_PGSTATS = 4, + STUB_MON_LOG = 5, + + STUB_MON_OSD_FIRST = STUB_MON_OSD_ALIVE, + STUB_MON_OSD_LAST = STUB_MON_LOG, + }; + + struct C_CreatePGs : public Context { + OSDStub *s; + explicit C_CreatePGs(OSDStub *stub) : s(stub) {} + void finish(int r) override { + if (r == -ECANCELED) { + generic_dout(20) << "C_CreatePGs::" << __func__ + << " shutdown" << dendl; + return; + } + generic_dout(20) << "C_CreatePGs::" << __func__ << dendl; + s->auto_create_pgs(); + } + }; + + + OSDStub(int _whoami, CephContext *cct) + : TestStub(cct, "osd"), + whoami(_whoami), + gen(whoami), + mon_osd_rng(STUB_MON_OSD_FIRST, STUB_MON_OSD_LAST) + { + dout(20) << __func__ << " auth supported: " + << cct->_conf->auth_supported << dendl; + stringstream ss; + ss << "client-osd" << whoami; + std::string public_msgr_type = cct->_conf->ms_public_type.empty() ? cct->_conf.get_val<std::string>("ms_type") : cct->_conf->ms_public_type; + messenger.reset(Messenger::create(cct, public_msgr_type, entity_name_t::OSD(whoami), + ss.str().c_str(), getpid())); + + Throttle throttler(g_ceph_context, "osd_client_bytes", + g_conf()->osd_client_message_size_cap); + + messenger->set_default_policy( + Messenger::Policy::stateless_server(0)); + messenger->set_policy_throttlers(entity_name_t::TYPE_CLIENT, + &throttler, NULL); + messenger->set_policy(entity_name_t::TYPE_MON, + Messenger::Policy::lossy_client( + CEPH_FEATURE_UID | + CEPH_FEATURE_PGID64 | + CEPH_FEATURE_OSDENC)); + messenger->set_policy(entity_name_t::TYPE_OSD, + Messenger::Policy::stateless_server(0)); + + dout(10) << __func__ << " public addr " << g_conf()->public_addr << dendl; + int err = messenger->bind(g_conf()->public_addr); + if (err < 0) + exit(1); + + if (monc.build_initial_monmap() < 0) + exit(1); + + messenger->start(); + monc.set_messenger(messenger.get()); + } + + int init() override { + dout(10) << __func__ << dendl; + std::lock_guard l{lock}; + + dout(1) << __func__ << " fsid " << monc.monmap.fsid + << " osd_fsid " << g_conf()->osd_uuid << dendl; + dout(1) << __func__ << " name " << g_conf()->name << dendl; + + timer.init(); + messenger->add_dispatcher_head(this); + monc.set_want_keys(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD); + + int err = monc.init(); + if (err < 0) { + derr << __func__ << " monc init error: " + << cpp_strerror(-err) << dendl; + return err; + } + + err = monc.authenticate(); + if (err < 0) { + derr << __func__ << " monc authenticate error: " + << cpp_strerror(-err) << dendl; + monc.shutdown(); + return err; + } + ceph_assert(!monc.get_fsid().is_zero()); + + monc.wait_auth_rotating(30.0); + + + dout(10) << __func__ << " creating osd superblock" << dendl; + sb.cluster_fsid = monc.monmap.fsid; + sb.osd_fsid.generate_random(); + sb.whoami = whoami; + sb.compat_features = CompatSet(); + dout(20) << __func__ << " " << sb << dendl; + dout(20) << __func__ << " osdmap " << osdmap << dendl; + + update_osd_stat(); + + start_ticking(); + // give a chance to the mons to inform us of what PGs we should create + timer.add_event_after(30.0, new C_CreatePGs(this)); + + return 0; + } + + int _shutdown() override { + + return 0; + } + + void boot() { + dout(1) << __func__ << " boot?" << dendl; + + utime_t now = ceph_clock_now(); + if ((last_boot_attempt > 0.0) + && ((now - last_boot_attempt)) <= STUB_BOOT_INTERVAL) { + dout(1) << __func__ << " backoff and try again later." << dendl; + return; + } + + dout(1) << __func__ << " boot!" << dendl; + MOSDBoot *mboot = new MOSDBoot; + mboot->sb = sb; + last_boot_attempt = now; + monc.send_mon_message(mboot); + } + + void add_pg(pg_t pgid, epoch_t epoch, pg_t parent) { + + utime_t now = ceph_clock_now(); + + pg_stat_t s; + s.created = epoch; + s.last_epoch_clean = epoch; + s.parent = parent; + s.state |= PG_STATE_CLEAN | PG_STATE_ACTIVE; + s.last_fresh = now; + s.last_change = now; + s.last_clean = now; + s.last_active = now; + s.last_unstale = now; + + pgs[pgid] = s; + pgs_changes.insert(pgid); + } + + void auto_create_pgs() { + bool has_pgs = !pgs.empty(); + dout(10) << __func__ + << ": " << (has_pgs ? "has pgs; ignore" : "create pgs") << dendl; + if (has_pgs) + return; + + if (!osdmap.get_epoch()) { + dout(1) << __func__ + << " still don't have osdmap; reschedule pg creation" << dendl; + timer.add_event_after(10.0, new C_CreatePGs(this)); + return; + } + + auto& osdmap_pools = osdmap.get_pools(); + for (auto pit = osdmap_pools.begin(); pit != osdmap_pools.end(); ++pit) { + const int64_t pool_id = pit->first; + const pg_pool_t &pool = pit->second; + int ruleno = pool.get_crush_rule(); + + if (!osdmap.crush->rule_exists(ruleno)) { + dout(20) << __func__ + << " no crush rule for pool id " << pool_id + << " rule no " << ruleno << dendl; + continue; + } + + epoch_t pool_epoch = pool.get_last_change(); + dout(20) << __func__ + << " pool num pgs " << pool.get_pg_num() + << " epoch " << pool_epoch << dendl; + + for (ps_t ps = 0; ps < pool.get_pg_num(); ++ps) { + pg_t pgid(ps, pool_id); + pg_t parent; + dout(20) << __func__ + << " pgid " << pgid << " parent " << parent << dendl; + add_pg(pgid, pool_epoch, parent); + } + } + } + + void update_osd_stat() { + struct statfs stbuf; + int ret = statfs(".", &stbuf); + if (ret < 0) { + ret = -errno; + dout(0) << __func__ + << " cannot statfs ." << cpp_strerror(ret) << dendl; + return; + } + + osd_stat.statfs.total = stbuf.f_blocks * stbuf.f_bsize; + osd_stat.statfs.available = stbuf.f_bavail * stbuf.f_bsize; + osd_stat.statfs.internally_reserved = 0; + } + + void send_pg_stats() { + dout(10) << __func__ + << " pgs " << pgs.size() << " osdmap " << osdmap << dendl; + MPGStats *mstats = new MPGStats(monc.get_fsid(), osdmap.get_epoch()); + + mstats->set_tid(1); + mstats->osd_stat = osd_stat; + + set<pg_t>::iterator it; + for (it = pgs_changes.begin(); it != pgs_changes.end(); ++it) { + pg_t pgid = (*it); + if (pgs.count(pgid) == 0) { + derr << __func__ + << " pgid " << pgid << " not on our map" << dendl; + ceph_abort_msg("pgid not on our map"); + } + pg_stat_t &s = pgs[pgid]; + mstats->pg_stat[pgid] = s; + + JSONFormatter f(true); + s.dump(&f); + dout(20) << __func__ + << " pg " << pgid << " stats:\n"; + f.flush(*_dout); + *_dout << dendl; + + } + dout(10) << __func__ << " send " << *mstats << dendl; + monc.send_mon_message(mstats); + } + + void modify_pg(pg_t pgid) { + dout(10) << __func__ << " pg " << pgid << dendl; + ceph_assert(pgs.count(pgid) > 0); + + pg_stat_t &s = pgs[pgid]; + utime_t now = ceph_clock_now(); + + if (now - s.last_change < 10.0) { + dout(10) << __func__ + << " pg " << pgid << " changed in the last 10s" << dendl; + return; + } + + s.state ^= PG_STATE_CLEAN; + if (s.state & PG_STATE_CLEAN) + s.last_clean = now; + s.last_change = now; + s.reported_seq++; + + pgs_changes.insert(pgid); + } + + void modify_pgs() { + dout(10) << __func__ << dendl; + + if (pgs.empty()) { + dout(1) << __func__ + << " no pgs available! don't attempt to modify." << dendl; + return; + } + + boost::uniform_int<> pg_rng(0, pgs.size()-1); + set<int> pgs_pos; + + int num_pgs = pg_rng(gen); + while ((int)pgs_pos.size() < num_pgs) + pgs_pos.insert(pg_rng(gen)); + + map<pg_t,pg_stat_t>::iterator it = pgs.begin(); + set<int>::iterator pos_it = pgs_pos.begin(); + + int pgs_at = 0; + while (pos_it != pgs_pos.end()) { + int at = *pos_it; + dout(20) << __func__ << " pg at pos " << at << dendl; + while ((pgs_at != at) && (it != pgs.end())) { + ++it; + ++pgs_at; + } + ceph_assert(it != pgs.end()); + dout(20) << __func__ + << " pg at pos " << at << ": " << it->first << dendl; + modify_pg(it->first); + ++pos_it; + } + } + + void op_alive() { + dout(10) << __func__ << dendl; + if (!osdmap.exists(whoami)) { + dout(0) << __func__ << " I'm not in the osdmap!!\n"; + JSONFormatter f(true); + osdmap.dump(&f); + f.flush(*_dout); + *_dout << dendl; + } + if (osdmap.get_epoch() == 0) { + dout(1) << __func__ << " wait for osdmap" << dendl; + return; + } + epoch_t up_thru = osdmap.get_up_thru(whoami); + dout(10) << __func__ << "up_thru: " << osdmap.get_up_thru(whoami) << dendl; + + monc.send_mon_message(new MOSDAlive(osdmap.get_epoch(), up_thru)); + } + + void op_pgtemp() { + if (osdmap.get_epoch() == 0) { + dout(1) << __func__ << " wait for osdmap" << dendl; + return; + } + dout(10) << __func__ << dendl; + MOSDPGTemp *m = new MOSDPGTemp(osdmap.get_epoch()); + monc.send_mon_message(m); + } + + void op_failure() { + dout(10) << __func__ << dendl; + } + + void op_pgstats() { + dout(10) << __func__ << dendl; + + modify_pgs(); + if (!pgs_changes.empty()) + send_pg_stats(); + monc.sub_want("osd_pg_creates", 0, CEPH_SUBSCRIBE_ONETIME); + monc.renew_subs(); + + dout(20) << __func__ << " pg pools:\n"; + + JSONFormatter f(true); + f.open_array_section("pools"); + auto& osdmap_pools = osdmap.get_pools(); + for (auto pit = osdmap_pools.begin(); pit != osdmap_pools.end(); ++pit) { + const int64_t pool_id = pit->first; + const pg_pool_t &pool = pit->second; + f.open_object_section("pool"); + f.dump_int("pool_id", pool_id); + f.open_object_section("pool_dump"); + pool.dump(&f); + f.close_section(); + f.close_section(); + } + f.close_section(); + f.flush(*_dout); + *_dout << dendl; + } + + void op_log() { + dout(10) << __func__ << dendl; + + MLog *m = new MLog(monc.get_fsid()); + + boost::uniform_int<> log_rng(1, 10); + size_t num_entries = log_rng(gen); + dout(10) << __func__ + << " send " << num_entries << " log messages" << dendl; + + utime_t now = ceph_clock_now(); + int seq = 0; + for (; num_entries > 0; --num_entries) { + LogEntry e; + e.rank = messenger->get_myname(); + e.addrs = messenger->get_myaddrs(); + e.stamp = now; + e.seq = seq++; + e.prio = CLOG_DEBUG; + e.msg = "OSDStub::op_log"; + m->entries.push_back(e); + } + + monc.send_mon_message(m); + } + + void _tick() override { + if (!osdmap.exists(whoami)) { + std::cout << __func__ << " not in the cluster; boot!" << std::endl; + boot(); + return; + } + + update_osd_stat(); + + boost::uniform_int<> op_rng(STUB_MON_OSD_FIRST, STUB_MON_OSD_LAST); + int op = op_rng(gen); + switch (op) { + case STUB_MON_OSD_ALIVE: + op_alive(); + break; + case STUB_MON_OSD_PGTEMP: + op_pgtemp(); + break; + case STUB_MON_OSD_FAILURE: + op_failure(); + break; + case STUB_MON_OSD_PGSTATS: + op_pgstats(); + break; + case STUB_MON_LOG: + op_log(); + break; + } + } + + void handle_pg_create(MOSDPGCreate *m) { + ceph_assert(m != NULL); + if (m->epoch < osdmap.get_epoch()) { + std::cout << __func__ << " epoch " << m->epoch << " < " + << osdmap.get_epoch() << "; dropping" << std::endl; + m->put(); + return; + } + + for (map<pg_t,pg_create_t>::iterator it = m->mkpg.begin(); + it != m->mkpg.end(); ++it) { + pg_create_t &c = it->second; + std::cout << __func__ << " pg " << it->first + << " created " << c.created + << " parent " << c.parent << std::endl; + if (pgs.count(it->first)) { + std::cout << __func__ << " pg " << it->first + << " exists; skipping" << std::endl; + continue; + } + + pg_t pgid = it->first; + add_pg(pgid, c.created, c.parent); + } + send_pg_stats(); + } + + void handle_osd_map(MOSDMap *m) { + dout(1) << __func__ << dendl; + if (m->fsid != monc.get_fsid()) { + dout(0) << __func__ + << " message fsid " << m->fsid << " != " << monc.get_fsid() + << dendl; + dout(0) << __func__ << " " << m + << " from " << m->get_source_inst() + << dendl; + dout(0) << monc.get_monmap() << dendl; + } + ceph_assert(m->fsid == monc.get_fsid()); + + epoch_t first = m->get_first(); + epoch_t last = m->get_last(); + dout(5) << __func__ + << " epochs [" << first << "," << last << "]" + << " current " << osdmap.get_epoch() << dendl; + + if (last <= osdmap.get_epoch()) { + dout(5) << __func__ << " no new maps here; dropping" << dendl; + m->put(); + return; + } + + if (first > osdmap.get_epoch() + 1) { + dout(5) << __func__ + << osdmap.get_epoch() + 1 << ".." << (first-1) << dendl; + if ((m->oldest_map < first && osdmap.get_epoch() == 0) || + m->oldest_map <= osdmap.get_epoch()) { + monc.sub_want("osdmap", osdmap.get_epoch()+1, + CEPH_SUBSCRIBE_ONETIME); + monc.renew_subs(); + m->put(); + return; + } + } + + epoch_t start_full = std::max(osdmap.get_epoch() + 1, first); + + if (m->maps.size() > 0) { + map<epoch_t,bufferlist>::reverse_iterator rit; + rit = m->maps.rbegin(); + if (start_full <= rit->first) { + start_full = rit->first; + dout(5) << __func__ + << " full epoch " << start_full << dendl; + bufferlist &bl = rit->second; + auto p = bl.cbegin(); + osdmap.decode(p); + } + } + + for (epoch_t e = start_full; e <= last; e++) { + map<epoch_t,bufferlist>::iterator it; + it = m->incremental_maps.find(e); + if (it == m->incremental_maps.end()) + continue; + + dout(20) << __func__ + << " incremental epoch " << e + << " on full epoch " << start_full << dendl; + OSDMap::Incremental inc; + bufferlist &bl = it->second; + auto p = bl.cbegin(); + inc.decode(p); + + int err = osdmap.apply_incremental(inc); + if (err < 0) { + derr << "osd." << whoami << "::" << __func__ + << "** ERROR: applying incremental: " + << cpp_strerror(err) << dendl; + ceph_abort_msg("error applying incremental"); + } + } + dout(30) << __func__ << "\nosdmap:\n"; + JSONFormatter f(true); + osdmap.dump(&f); + f.flush(*_dout); + *_dout << dendl; + + if (osdmap.is_up(whoami) && + osdmap.get_addrs(whoami) == messenger->get_myaddrs()) { + dout(1) << __func__ + << " got into the osdmap and we're up!" << dendl; + } + + if (m->newest_map && m->newest_map > last) { + dout(1) << __func__ + << " they have more maps; requesting them!" << dendl; + monc.sub_want("osdmap", osdmap.get_epoch()+1, CEPH_SUBSCRIBE_ONETIME); + monc.renew_subs(); + } + + dout(10) << __func__ << " done" << dendl; + m->put(); + } + + bool ms_dispatch(Message *m) override { + dout(1) << __func__ << " " << *m << dendl; + + switch (m->get_type()) { + case MSG_OSD_PG_CREATE: + handle_pg_create((MOSDPGCreate*)m); + break; + case CEPH_MSG_OSD_MAP: + handle_osd_map((MOSDMap*)m); + break; + default: + m->put(); + break; + } + return true; + } + + void ms_handle_connect(Connection *con) override { + dout(1) << __func__ << " " << con << dendl; + if (con->get_peer_type() == CEPH_ENTITY_TYPE_MON) { + dout(10) << __func__ << " on mon" << dendl; + } + } + + void ms_handle_remote_reset(Connection *con) override {} + + bool ms_handle_reset(Connection *con) override { + dout(1) << __func__ << dendl; + return con->get_priv().get(); + } + + bool ms_handle_refused(Connection *con) override { + return false; + } + + const string get_name() override { + stringstream ss; + ss << "osd." << whoami; + return ss.str(); + } +}; + +double const OSDStub::STUB_BOOT_INTERVAL = 10.0; + +#undef dout_prefix +#define dout_prefix *_dout << "main " + +const char *our_name = NULL; +vector<TestStub*> stubs; +ceph::mutex shutdown_lock = ceph::make_mutex("main::shutdown_lock"); +ceph::condition_variable shutdown_cond; +Context *shutdown_cb = NULL; +SafeTimer *shutdown_timer = NULL; + +struct C_Shutdown : public Context +{ + void finish(int r) override { + generic_dout(10) << "main::shutdown time has ran out" << dendl; + shutdown_cond.notify_all(); + } +}; + +void handle_test_signal(int signum) +{ + if ((signum != SIGINT) && (signum != SIGTERM)) + return; + + std::cerr << "*** Got signal " << sig_str(signum) << " ***" << std::endl; + std::lock_guard l{shutdown_lock}; + if (shutdown_timer) { + shutdown_timer->cancel_all_events(); + shutdown_cond.notify_all(); + } +} + +void usage() { + ceph_assert(our_name != NULL); + + std::cout << "usage: " << our_name + << " <--stub-id ID> [--stub-id ID...]" + << std::endl; + std::cout << "\n\ +Global Options:\n\ + -c FILE Read configuration from FILE\n\ + --keyring FILE Read keyring from FILE\n\ + --help This message\n\ +\n\ +Test-specific Options:\n\ + --stub-id ID1..ID2 Interval of OSD ids for multiple stubs to mimic.\n\ + --stub-id ID OSD id a stub will mimic to be\n\ + (same as --stub-id ID..ID)\n\ +" << std::endl; +} + +int get_id_interval(int &first, int &last, string &str) +{ + size_t found = str.find(".."); + string first_str, last_str; + if (found == string::npos) { + first_str = last_str = str; + } else { + first_str = str.substr(0, found); + last_str = str.substr(found+2); + } + + string err; + first = strict_strtol(first_str.c_str(), 10, &err); + if ((first == 0) && (!err.empty())) { + std::cerr << err << std::endl; + return -1; + } + + last = strict_strtol(last_str.c_str(), 10, &err); + if ((last == 0) && (!err.empty())) { + std::cerr << err << std::endl; + return -1; + } + return 0; +} + +int main(int argc, const char *argv[]) +{ + vector<const char*> args; + our_name = argv[0]; + argv_to_vec(argc, argv, args); + + auto cct = global_init(NULL, args, + CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + + common_init_finish(g_ceph_context); + g_ceph_context->_conf.apply_changes(nullptr); + + set<int> stub_ids; + double duration = 300.0; + + for (std::vector<const char*>::iterator i = args.begin(); i != args.end();) { + string val; + + if (ceph_argparse_double_dash(args, i)) { + break; + } else if (ceph_argparse_witharg(args, i, &val, + "--stub-id", (char*) NULL)) { + int first = -1, last = -1; + if (get_id_interval(first, last, val) < 0) { + std::cerr << "** error parsing stub id '" << val << "'" << std::endl; + exit(1); + } + + for (; first <= last; ++first) + stub_ids.insert(first); + } else if (ceph_argparse_witharg(args, i, &val, + "--duration", (char*) NULL)) { + string err; + duration = (double) strict_strtol(val.c_str(), 10, &err); + if ((duration == 0) && (!err.empty())) { + std::cerr << "** error parsing '--duration " << val << "': '" + << err << std::endl; + exit(1); + } + } else if (ceph_argparse_flag(args, i, "--help", (char*) NULL)) { + usage(); + exit(0); + } else { + std::cerr << "unknown argument '" << *i << "'" << std::endl; + return 1; + } + } + + if (stub_ids.empty()) { + std::cerr << "** error: must specify at least one '--stub-id <ID>'" + << std::endl; + usage(); + return 1; + } + + for (set<int>::iterator i = stub_ids.begin(); i != stub_ids.end(); ++i) { + int whoami = *i; + + std::cout << __func__ << " starting stub." << whoami << std::endl; + OSDStub *stub = new OSDStub(whoami, g_ceph_context); + int err = stub->init(); + if (err < 0) { + std::cerr << "** osd stub error: " << cpp_strerror(-err) << std::endl; + return 1; + } + stubs.push_back(stub); + } + + std::cout << __func__ << " starting client stub" << std::endl; + ClientStub *cstub = new ClientStub(g_ceph_context); + int err = cstub->init(); + if (err < 0) { + std::cerr << "** client stub error: " << cpp_strerror(-err) << std::endl; + return 1; + } + stubs.push_back(cstub); + + init_async_signal_handler(); + register_async_signal_handler_oneshot(SIGINT, handle_test_signal); + register_async_signal_handler_oneshot(SIGTERM, handle_test_signal); + + { + unique_lock locker{shutdown_lock}; + shutdown_timer = new SafeTimer(g_ceph_context, shutdown_lock); + shutdown_timer->init(); + if (duration != 0) { + std::cout << __func__ + << " run test for " << duration << " seconds" << std::endl; + shutdown_timer->add_event_after((double) duration, new C_Shutdown); + } + shutdown_cond.wait(locker); + shutdown_timer->shutdown(); + delete shutdown_timer; + shutdown_timer = NULL; + } + unregister_async_signal_handler(SIGINT, handle_test_signal); + unregister_async_signal_handler(SIGTERM, handle_test_signal); + + std::cout << __func__ << " waiting for stubs to finish" << std::endl; + vector<TestStub*>::iterator it; + int i; + for (i = 0, it = stubs.begin(); it != stubs.end(); ++it, ++i) { + if (*it != NULL) { + (*it)->shutdown(); + (*it)->wait(); + std::cout << __func__ << " finished " << (*it)->get_name() << std::endl; + delete (*it); + (*it) = NULL; + } + } + + return 0; +} |