summaryrefslogtreecommitdiffstats
path: root/src/test/mon
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/test/mon/CMakeLists.txt83
-rw-r--r--src/test/mon/MonMap.cc245
-rw-r--r--src/test/mon/PGMap.cc168
-rwxr-xr-xsrc/test/mon/bench_auth.py105
-rw-r--r--src/test/mon/moncap.cc376
-rw-r--r--src/test/mon/test-mon-msg.cc338
-rw-r--r--src/test/mon/test_election.cc1003
-rw-r--r--src/test/mon/test_log_rss_usage.cc101
-rw-r--r--src/test/mon/test_mon_memory_target.cc79
-rw-r--r--src/test/mon/test_mon_rss_usage.cc72
-rw-r--r--src/test/mon/test_mon_types.cc140
-rw-r--r--src/test/mon/test_mon_workloadgen.cc1104
12 files changed, 3814 insertions, 0 deletions
diff --git a/src/test/mon/CMakeLists.txt b/src/test/mon/CMakeLists.txt
new file mode 100644
index 000000000..943ca99a3
--- /dev/null
+++ b/src/test/mon/CMakeLists.txt
@@ -0,0 +1,83 @@
+# ceph_test_mon_workloadgen
+add_executable(ceph_test_mon_workloadgen
+ test_mon_workloadgen.cc
+ )
+target_link_libraries(ceph_test_mon_workloadgen
+ os
+ osdc
+ global
+ ${EXTRALIBS}
+ ${CMAKE_DL_LIBS}
+ )
+install(TARGETS ceph_test_mon_workloadgen
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+# ceph_test_mon_msg
+add_executable(ceph_test_mon_msg
+ test-mon-msg.cc
+ )
+target_link_libraries(ceph_test_mon_msg os osdc global ${UNITTEST_LIBS})
+
+# unittest_mon_moncap
+add_executable(unittest_mon_moncap
+ moncap.cc
+ )
+add_ceph_unittest(unittest_mon_moncap)
+target_link_libraries(unittest_mon_moncap mon global)
+
+# unittest_mon_map
+add_executable(unittest_mon_monmap
+ MonMap.cc
+ )
+add_ceph_unittest(unittest_mon_monmap)
+target_link_libraries(unittest_mon_monmap mon global)
+
+# unittest_mon_pgmap
+add_executable(unittest_mon_pgmap
+ PGMap.cc
+ $<TARGET_OBJECTS:unit-main>
+ )
+add_ceph_unittest(unittest_mon_pgmap)
+target_link_libraries(unittest_mon_pgmap mon global)
+
+# unittest_mon_montypes
+add_executable(unittest_mon_montypes
+ test_mon_types.cc
+ )
+add_ceph_unittest(unittest_mon_montypes)
+target_link_libraries(unittest_mon_montypes mon global)
+
+# ceph_test_mon_memory_target
+add_executable(ceph_test_mon_memory_target
+ test_mon_memory_target.cc)
+target_link_libraries(ceph_test_mon_memory_target Boost::system Threads::Threads)
+set_target_properties(ceph_test_mon_memory_target PROPERTIES
+ SKIP_RPATH TRUE
+ INSTALL_RPATH "")
+install(TARGETS ceph_test_mon_memory_target
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+# ceph_test_mon_log_rss_usage
+add_executable(ceph_test_log_rss_usage
+ test_log_rss_usage.cc)
+set_target_properties(ceph_test_log_rss_usage PROPERTIES
+ SKIP_RPATH TRUE
+ INSTALL_RPATH "")
+install(TARGETS ceph_test_log_rss_usage
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+# ceph_test_mon_rss_usage
+add_executable(ceph_test_mon_rss_usage
+ test_mon_rss_usage.cc)
+set_target_properties(ceph_test_mon_rss_usage PROPERTIES
+ SKIP_RPATH TRUE
+ INSTALL_RPATH "")
+install(TARGETS ceph_test_mon_rss_usage
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+
+#unittest_mon_election
+add_executable(unittest_mon_election
+ test_election.cc
+ )
+add_ceph_unittest(unittest_mon_election)
+target_link_libraries(unittest_mon_election mon global)
diff --git a/src/test/mon/MonMap.cc b/src/test/mon/MonMap.cc
new file mode 100644
index 000000000..c9bcb7fec
--- /dev/null
+++ b/src/test/mon/MonMap.cc
@@ -0,0 +1,245 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2016 SUSE LINUX GmbH
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+#include "mon/MonMap.h"
+#include "common/ceph_context.h"
+#include "common/dns_resolve.h"
+#include "test/common/dns_messages.h"
+
+#include "common/debug.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <boost/smart_ptr/intrusive_ptr.hpp>
+
+#include <sstream>
+
+#define TEST_DEBUG 20
+
+#define dout_subsys ceph_subsys_mon
+
+
+using ::testing::Return;
+using ::testing::_;
+using ::testing::SetArrayArgument;
+using ::testing::DoAll;
+using ::testing::StrEq;
+
+
+class MonMapTest : public ::testing::Test {
+ protected:
+ virtual void SetUp() {
+ g_ceph_context->_conf->subsys.set_log_level(dout_subsys, TEST_DEBUG);
+ }
+
+ virtual void TearDown() {
+ DNSResolver::get_instance(nullptr);
+ }
+};
+
+TEST_F(MonMapTest, DISABLED_build_initial_config_from_dns) {
+
+ MockResolvHWrapper *resolvH = new MockResolvHWrapper();
+ DNSResolver::get_instance(resolvH);
+
+ int len = sizeof(ns_search_msg_ok_payload);
+ int lena = sizeof(ns_query_msg_mon_a_payload);
+ int lenb = sizeof(ns_query_msg_mon_b_payload);
+ int lenc = sizeof(ns_query_msg_mon_c_payload);
+
+ using ::testing::InSequence;
+ {
+ InSequence s;
+
+#ifdef HAVE_RES_NQUERY
+ EXPECT_CALL(*resolvH, res_nsearch(_, StrEq("_cephmon._tcp"), C_IN, T_SRV, _, _))
+ .WillOnce(DoAll(SetArrayArgument<4>(ns_search_msg_ok_payload,
+ ns_search_msg_ok_payload+len), Return(len)));
+
+ EXPECT_CALL(*resolvH, res_nquery(_,StrEq("mon.a.ceph.com"), C_IN, T_A,_,_))
+ .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_a_payload,
+ ns_query_msg_mon_a_payload+lena), Return(lena)));
+
+ EXPECT_CALL(*resolvH, res_nquery(_, StrEq("mon.c.ceph.com"), C_IN, T_A,_,_))
+ .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_c_payload,
+ ns_query_msg_mon_c_payload+lenc), Return(lenc)));
+
+ EXPECT_CALL(*resolvH, res_nquery(_,StrEq("mon.b.ceph.com"), C_IN, T_A, _,_))
+ .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_b_payload,
+ ns_query_msg_mon_b_payload+lenb), Return(lenb)));
+#else
+ EXPECT_CALL(*resolvH, res_search(StrEq("_cephmon._tcp"), C_IN, T_SRV, _, _))
+ .WillOnce(DoAll(SetArrayArgument<3>(ns_search_msg_ok_payload,
+ ns_search_msg_ok_payload+len), Return(len)));
+
+ EXPECT_CALL(*resolvH, res_query(StrEq("mon.a.ceph.com"), C_IN, T_A,_,_))
+ .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_a_payload,
+ ns_query_msg_mon_a_payload+lena), Return(lena)));
+
+ EXPECT_CALL(*resolvH, res_query(StrEq("mon.c.ceph.com"), C_IN, T_A,_,_))
+ .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_c_payload,
+ ns_query_msg_mon_c_payload+lenc), Return(lenc)));
+
+ EXPECT_CALL(*resolvH, res_query(StrEq("mon.b.ceph.com"), C_IN, T_A, _,_))
+ .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_b_payload,
+ ns_query_msg_mon_b_payload+lenb), Return(lenb)));
+#endif
+ }
+
+
+
+ boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON);
+ cct->_conf.set_val("mon_dns_srv_name", "cephmon");
+ MonMap monmap;
+ int r = monmap.build_initial(cct.get(), false, std::cerr);
+
+ ASSERT_EQ(r, 0);
+ ASSERT_EQ(monmap.mon_info.size(), (unsigned int)3);
+ auto it = monmap.mon_info.find("mon.a");
+ ASSERT_NE(it, monmap.mon_info.end());
+ std::ostringstream os;
+ os << it->second.public_addrs;
+ ASSERT_EQ(os.str(), "192.168.1.11:6789/0");
+ os.str("");
+ it = monmap.mon_info.find("mon.b");
+ ASSERT_NE(it, monmap.mon_info.end());
+ os << it->second.public_addrs;
+ ASSERT_EQ(os.str(), "192.168.1.12:6789/0");
+ os.str("");
+ it = monmap.mon_info.find("mon.c");
+ ASSERT_NE(it, monmap.mon_info.end());
+ os << it->second.public_addrs;
+ ASSERT_EQ(os.str(), "192.168.1.13:6789/0");
+}
+
+TEST_F(MonMapTest, DISABLED_build_initial_config_from_dns_fail) {
+ MockResolvHWrapper *resolvH = new MockResolvHWrapper();
+ DNSResolver::get_instance(resolvH);
+
+
+#ifdef HAVE_RES_NQUERY
+ EXPECT_CALL(*resolvH, res_nsearch(_, StrEq("_ceph-mon._tcp"), C_IN, T_SRV, _, _))
+ .WillOnce(Return(0));
+#else
+ EXPECT_CALL(*resolvH, res_search(StrEq("_ceph-mon._tcp"), C_IN, T_SRV, _, _))
+ .WillOnce(Return(0));
+#endif
+
+ boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON);
+ // using default value of mon_dns_srv_name option
+ MonMap monmap;
+ int r = monmap.build_initial(cct.get(), false, std::cerr);
+
+ ASSERT_EQ(r, -ENOENT);
+ ASSERT_EQ(monmap.mon_info.size(), (unsigned int)0);
+
+}
+
+TEST_F(MonMapTest, DISABLED_build_initial_config_from_dns_with_domain) {
+
+ MockResolvHWrapper *resolvH = new MockResolvHWrapper();
+ DNSResolver::get_instance(resolvH);
+
+ int len = sizeof(ns_search_msg_ok_payload);
+ int lena = sizeof(ns_query_msg_mon_a_payload);
+ int lenb = sizeof(ns_query_msg_mon_b_payload);
+ int lenc = sizeof(ns_query_msg_mon_c_payload);
+
+ using ::testing::InSequence;
+ {
+ InSequence s;
+
+#ifdef HAVE_RES_NQUERY
+ EXPECT_CALL(*resolvH, res_nsearch(_, StrEq("_cephmon._tcp.ceph.com"), C_IN, T_SRV, _, _))
+ .WillOnce(DoAll(SetArrayArgument<4>(ns_search_msg_ok_payload,
+ ns_search_msg_ok_payload+len), Return(len)));
+
+ EXPECT_CALL(*resolvH, res_nquery(_,StrEq("mon.a.ceph.com"), C_IN, T_A,_,_))
+ .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_a_payload,
+ ns_query_msg_mon_a_payload+lena), Return(lena)));
+
+ EXPECT_CALL(*resolvH, res_nquery(_, StrEq("mon.c.ceph.com"), C_IN, T_A,_,_))
+ .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_c_payload,
+ ns_query_msg_mon_c_payload+lenc), Return(lenc)));
+
+ EXPECT_CALL(*resolvH, res_nquery(_,StrEq("mon.b.ceph.com"), C_IN, T_A, _,_))
+ .WillOnce(DoAll(SetArrayArgument<4>(ns_query_msg_mon_b_payload,
+ ns_query_msg_mon_b_payload+lenb), Return(lenb)));
+#else
+ EXPECT_CALL(*resolvH, res_search(StrEq("_cephmon._tcp.ceph.com"), C_IN, T_SRV, _, _))
+ .WillOnce(DoAll(SetArrayArgument<3>(ns_search_msg_ok_payload,
+ ns_search_msg_ok_payload+len), Return(len)));
+
+ EXPECT_CALL(*resolvH, res_query(StrEq("mon.a.ceph.com"), C_IN, T_A,_,_))
+ .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_a_payload,
+ ns_query_msg_mon_a_payload+lena), Return(lena)));
+
+ EXPECT_CALL(*resolvH, res_query(StrEq("mon.c.ceph.com"), C_IN, T_A,_,_))
+ .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_c_payload,
+ ns_query_msg_mon_c_payload+lenc), Return(lenc)));
+
+ EXPECT_CALL(*resolvH, res_query(StrEq("mon.b.ceph.com"), C_IN, T_A, _,_))
+ .WillOnce(DoAll(SetArrayArgument<3>(ns_query_msg_mon_b_payload,
+ ns_query_msg_mon_b_payload+lenb), Return(lenb)));
+#endif
+ }
+
+
+
+ boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON);
+ cct->_conf.set_val("mon_dns_srv_name", "cephmon_ceph.com");
+ MonMap monmap;
+ int r = monmap.build_initial(cct.get(), false, std::cerr);
+
+ ASSERT_EQ(r, 0);
+ ASSERT_EQ(monmap.mon_info.size(), (unsigned int)3);
+ auto it = monmap.mon_info.find("mon.a");
+ ASSERT_NE(it, monmap.mon_info.end());
+ std::ostringstream os;
+ os << it->second.public_addrs;
+ ASSERT_EQ(os.str(), "192.168.1.11:6789/0");
+ os.str("");
+ it = monmap.mon_info.find("mon.b");
+ ASSERT_NE(it, monmap.mon_info.end());
+ os << it->second.public_addrs;
+ ASSERT_EQ(os.str(), "192.168.1.12:6789/0");
+ os.str("");
+ it = monmap.mon_info.find("mon.c");
+ ASSERT_NE(it, monmap.mon_info.end());
+ os << it->second.public_addrs;
+ ASSERT_EQ(os.str(), "192.168.1.13:6789/0");
+}
+
+TEST(MonMapBuildInitial, build_initial_mon_host_from_dns) {
+ boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON);
+ cct->_conf.set_val("mon_host", "ceph.io");
+ MonMap monmap;
+ int r = monmap.build_initial(cct.get(), false, std::cerr);
+ ASSERT_EQ(r, 0);
+ ASSERT_GE(monmap.mon_info.size(), 1u);
+ for (const auto& [name, info] : monmap.mon_info) {
+ std::cerr << info << std::endl;
+ }
+}
+
+TEST(MonMapBuildInitial, build_initial_mon_host_from_dns_fail) {
+ boost::intrusive_ptr<CephContext> cct = new CephContext(CEPH_ENTITY_TYPE_MON);
+ cct->_conf.set_val("mon_host", "ceph.noname");
+ MonMap monmap;
+ int r = monmap.build_initial(cct.get(), false, std::cerr);
+#if defined(__FreeBSD__)
+ ASSERT_EQ(r, -ENOENT);
+#else
+ ASSERT_EQ(r, -EINVAL);
+#endif
+}
diff --git a/src/test/mon/PGMap.cc b/src/test/mon/PGMap.cc
new file mode 100644
index 000000000..756665132
--- /dev/null
+++ b/src/test/mon/PGMap.cc
@@ -0,0 +1,168 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Inktank <info@inktank.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2, as published by the Free Software
+ * Foundation. See file COPYING.
+ */
+
+#include "mon/PGMap.h"
+#include "gtest/gtest.h"
+
+#include "include/stringify.h"
+
+
+namespace {
+ class CheckTextTable : public TextTable {
+ public:
+ explicit CheckTextTable(bool verbose) {
+ for (int i = 0; i < 5; i++) {
+ define_column("", TextTable::LEFT, TextTable::LEFT);
+ }
+ if (verbose) {
+ for (int i = 0; i < 9; i++) {
+ define_column("", TextTable::LEFT, TextTable::LEFT);
+ }
+ }
+ }
+ const string& get(unsigned r, unsigned c) const {
+ ceph_assert(r < row.size());
+ ceph_assert(c < row[r].size());
+ return row[r][c];
+ }
+ };
+
+ // copied from PGMap.cc
+ string percentify(float a) {
+ stringstream ss;
+ if (a < 0.01)
+ ss << "0";
+ else
+ ss << std::fixed << std::setprecision(2) << a;
+ return ss.str();
+ }
+}
+
+// dump_object_stat_sum() is called by "ceph df" command
+// with table, without formatter, verbose = true, not empty, avail > 0
+TEST(pgmap, dump_object_stat_sum_0)
+{
+ bool verbose = true;
+ CheckTextTable tbl(verbose);
+ pool_stat_t pool_stat;
+ object_stat_sum_t& sum = pool_stat.stats.sum;
+ sum.num_bytes = 42 * 1024 * 1024;
+ sum.num_objects = 42;
+ sum.num_objects_degraded = 13; // there are 13 missings + not_yet_backfilled
+ sum.num_objects_dirty = 2;
+ sum.num_rd = 100;
+ sum.num_rd_kb = 123;
+ sum.num_wr = 101;
+ sum.num_wr_kb = 321;
+ pool_stat.num_store_stats = 3;
+ store_statfs_t &statfs = pool_stat.store_stats;
+ statfs.data_stored = 40 * 1024 * 1024;
+ statfs.allocated = 41 * 1024 * 1024 * 2;
+ statfs.data_compressed_allocated = 4334;
+ statfs.data_compressed_original = 1213;
+
+ sum.calc_copies(3); // assuming we have 3 copies for each obj
+ // nominal amount of space available for new objects in this pool
+ uint64_t avail = 2016 * 1024 * 1024;
+ pg_pool_t pool;
+ pool.quota_max_objects = 2000;
+ pool.quota_max_bytes = 2000 * 1024 * 1024;
+ pool.size = 2;
+ pool.type = pg_pool_t::TYPE_REPLICATED;
+ pool.tier_of = 0;
+ PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
+ pool.get_size(), verbose, true, true, &pool);
+ float copies_rate =
+ (static_cast<float>(sum.num_object_copies - sum.num_objects_degraded) /
+ sum.num_object_copies) * pool.get_size();
+ float used_percent = (float)statfs.allocated /
+ (statfs.allocated + avail) * 100;
+ uint64_t stored = statfs.data_stored / copies_rate;
+
+ unsigned col = 0;
+ ASSERT_EQ(stringify(byte_u_t(stored)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(stored)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(si_u_t(sum.num_objects)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(statfs.allocated)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(statfs.allocated)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(percentify(used_percent), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(avail/copies_rate)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(si_u_t(pool.quota_max_objects)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(pool.quota_max_bytes)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(si_u_t(sum.num_objects_dirty)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(statfs.data_compressed_allocated)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(statfs.data_compressed_original)), tbl.get(0, col++));
+}
+
+// with table, without formatter, verbose = true, empty, avail > 0
+TEST(pgmap, dump_object_stat_sum_1)
+{
+ bool verbose = true;
+ CheckTextTable tbl(verbose);
+ pool_stat_t pool_stat;
+ object_stat_sum_t& sum = pool_stat.stats.sum; // zero by default
+ ASSERT_TRUE(sum.is_zero());
+ // nominal amount of space available for new objects in this pool
+ uint64_t avail = 2016 * 1024 * 1024;
+ pg_pool_t pool;
+ pool.quota_max_objects = 2000;
+ pool.quota_max_bytes = 2000 * 1024 * 1024;
+ pool.size = 2;
+ pool.type = pg_pool_t::TYPE_REPLICATED;
+ pool.tier_of = 0;
+ PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
+ pool.get_size(), verbose, true, true, &pool);
+ unsigned col = 0;
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(percentify(0), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(si_u_t(pool.quota_max_objects)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(pool.quota_max_bytes)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+}
+
+// with table, without formatter, verbose = false, empty, avail = 0
+TEST(pgmap, dump_object_stat_sum_2)
+{
+ bool verbose = false;
+ CheckTextTable tbl(verbose);
+ pool_stat_t pool_stat;
+ object_stat_sum_t& sum = pool_stat.stats.sum; // zero by default
+ ASSERT_TRUE(sum.is_zero());
+ // nominal amount of space available for new objects in this pool
+ uint64_t avail = 0;
+ pg_pool_t pool;
+ pool.quota_max_objects = 2000;
+ pool.quota_max_bytes = 2000 * 1024 * 1024;
+ pool.size = 2;
+ pool.type = pg_pool_t::TYPE_REPLICATED;
+
+ PGMap::dump_object_stat_sum(tbl, nullptr, pool_stat, avail,
+ pool.get_size(), verbose, true, true, &pool);
+ unsigned col = 0;
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+ ASSERT_EQ(percentify(0), tbl.get(0, col++));
+ ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++));
+}
diff --git a/src/test/mon/bench_auth.py b/src/test/mon/bench_auth.py
new file mode 100755
index 000000000..5242f6892
--- /dev/null
+++ b/src/test/mon/bench_auth.py
@@ -0,0 +1,105 @@
+#!/usr/bin/python3
+
+import argparse
+import copy
+import json
+import rados
+import time
+import multiprocessing
+
+caps_base = ["mon", "profile rbd", "osd", "profile rbd pool=rbd namespace=test"]
+
+def create_users(conn, num_namespaces, num_users):
+ cmd = {'prefix': 'auth get-or-create'}
+
+ for i in range(num_namespaces):
+ caps_base[-1] += ", profile rbd pool=rbd namespace=namespace{}".format(i)
+
+ cmd['caps'] = caps_base
+ for i in range(num_users):
+ cmd['entity'] = "client.{}".format(i)
+ conn.mon_command(json.dumps(cmd), b'')
+
+class Worker(multiprocessing.Process):
+ def __init__(self, conn, num, queue, duration):
+ super().__init__()
+ self.conn = conn
+ self.num = num
+ self.queue = queue
+ self.duration = duration
+
+ def run(self):
+ client = "client.{}".format(self.num)
+ cmd = {'prefix': 'auth caps', 'entity': client}
+ start_time = time.time()
+ num_complete = 0
+ with rados.Rados(conffile='') as conn:
+ while True:
+ now = time.time()
+ diff = now - start_time
+ if diff > self.duration:
+ self.queue.put((num_complete, diff))
+ return
+ caps = copy.deepcopy(caps_base)
+ caps[-1] += ", profile rbd pool=rbd namespace=namespace{}".format(self.num * 10000 + num_complete)
+ cmd['caps'] = caps
+ cmd_start = time.time()
+ ret, buf, out = conn.mon_command(json.dumps(cmd), b'')
+ cmd_end = time.time()
+ if ret != 0:
+ self.queue.put((Exception("{0}: {1}".format(ret, out)), 0))
+ return
+ num_complete += 1
+ print("Process {} finished op {} - latency: {}".format(self.num, num_complete, cmd_end - cmd_start))
+
+def main():
+ parser = argparse.ArgumentParser(description="""
+Benchmark updates to ceph users' capabilities. Run one update at a time in each thread.
+""")
+ parser.add_argument(
+ '-n', '--num-namespaces',
+ type=int,
+ default=300,
+ help='number of namespaces per user',
+ )
+ parser.add_argument(
+ '-t', '--threads',
+ type=int,
+ default=10,
+ help='number of threads (and thus parallel operations) to use',
+ )
+ parser.add_argument(
+ '-d', '--duration',
+ type=int,
+ default=30,
+ help='how long to run, in seconds',
+ )
+ args = parser.parse_args()
+ num_namespaces = args.num_namespaces
+ num_threads = args.threads
+ duration = args.duration
+ workers = []
+ results = []
+ q = multiprocessing.Queue()
+ with rados.Rados(conffile=rados.Rados.DEFAULT_CONF_FILES) as conn:
+ create_users(conn, num_namespaces, num_threads)
+ for i in range(num_threads):
+ workers.append(Worker(conn, i, q, duration))
+ workers[-1].start()
+ for i in range(num_threads):
+ num_complete, seconds = q.get()
+ if isinstance(num_complete, Exception):
+ raise num_complete
+ results.append((num_complete, seconds))
+ total = 0
+ total_rate = 0
+ for num, sec in results:
+ print("Completed {} in {} ({} / s)".format(num, sec, num / sec))
+ total += num
+ total_rate += num / sec
+
+ print("Total: ", total)
+ print("Avg rate: ", total_rate / len(results))
+
+if __name__ == '__main__':
+ main()
diff --git a/src/test/mon/moncap.cc b/src/test/mon/moncap.cc
new file mode 100644
index 000000000..1c151b1e3
--- /dev/null
+++ b/src/test/mon/moncap.cc
@@ -0,0 +1,376 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2012 Inktank
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <iostream>
+
+#include "include/stringify.h"
+#include "mon/MonCap.h"
+
+#include "gtest/gtest.h"
+
+const char *parse_good[] = {
+
+ // MonCapMatch
+ "allow *",
+ "allow r",
+ "allow rwx",
+ "allow r",
+ " allow rwx",
+ "allow rwx ",
+ " allow rwx ",
+ " allow\t rwx ",
+ "\tallow\nrwx\t",
+ "allow service=foo x",
+ "allow service=\"froo\" x",
+ "allow profile osd",
+ "allow profile osd-bootstrap",
+ "allow profile \"mds-bootstrap\", allow *",
+ "allow command \"a b c\"",
+ "allow command abc",
+ "allow command abc with arg=foo",
+ "allow command abc with arg=foo arg2=bar",
+ "allow command abc with arg=foo arg2=bar",
+ "allow command abc with arg=foo arg2 prefix bar arg3 prefix baz",
+ "allow command abc with arg=foo arg2 prefix \"bar bingo\" arg3 prefix baz",
+ "allow command abc with arg regex \"^[0-9a-z.]*$\"",
+ "allow command abc with arg regex \"\(invaluid regex\"",
+ "allow service foo x",
+ "allow service foo x; allow service bar x",
+ "allow service foo w ;allow service bar x",
+ "allow service foo w , allow service bar x",
+ "allow service foo r , allow service bar x",
+ "allow service foo_foo r, allow service bar r",
+ "allow service foo-foo r, allow service bar r",
+ "allow service \" foo \" w, allow service bar r",
+ "allow command abc with arg=foo arg2=bar, allow service foo r",
+ "allow command abc.def with arg=foo arg2=bar, allow service foo r",
+ "allow command \"foo bar\" with arg=\"baz\"",
+ "allow command \"foo bar\" with arg=\"baz.xx\"",
+ "profile osd",
+ "profile \"mds-bootstrap\", profile foo",
+ "allow * network 1.2.3.4/24",
+ "allow * network ::1/128",
+ "allow * network [aa:bb::1]/128",
+ "allow service=foo x network 1.2.3.4/16",
+ "allow command abc network 1.2.3.4/8",
+ "profile osd network 1.2.3.4/32",
+ "allow profile mon network 1.2.3.4/32",
+ 0
+};
+
+TEST(MonCap, ParseGood) {
+ for (int i=0; parse_good[i]; ++i) {
+ string str = parse_good[i];
+ MonCap cap;
+ std::cout << "Testing good input: '" << str << "'" << std::endl;
+ ASSERT_TRUE(cap.parse(str, &cout));
+ std::cout << " -> " << cap << std::endl;
+ }
+}
+
+// these should stringify to the input value
+const char *parse_identity[] = {
+ "allow *",
+ "allow r",
+ "allow rwx",
+ "allow service foo x",
+ "allow profile osd",
+ "allow profile osd-bootstrap",
+ "allow profile mds-bootstrap, allow *",
+ "allow profile \"foo bar\", allow *",
+ "allow command abc",
+ "allow command \"a b c\"",
+ "allow command abc with arg=foo",
+ "allow command abc with arg=foo arg2=bar",
+ "allow command abc with arg=foo arg2=bar",
+ "allow command abc with arg=foo arg2 prefix bar arg3 prefix baz",
+ "allow command abc with arg=foo arg2 prefix \"bar bingo\" arg3 prefix baz",
+ "allow service foo x",
+ "allow service foo x, allow service bar x",
+ "allow service foo w, allow service bar x",
+ "allow service foo r, allow service bar x",
+ "allow service foo_foo r, allow service bar r",
+ "allow service foo-foo r, allow service bar r",
+ "allow service \" foo \" w, allow service bar r",
+ "allow command abc with arg=foo arg2=bar, allow service foo r",
+ 0
+};
+
+TEST(MonCap, ParseIdentity)
+{
+ for (int i=0; parse_identity[i]; ++i) {
+ string str = parse_identity[i];
+ MonCap cap;
+ std::cout << "Testing good input: '" << str << "'" << std::endl;
+ ASSERT_TRUE(cap.parse(str, &cout));
+ string out = stringify(cap);
+ ASSERT_EQ(out, str);
+ }
+}
+
+const char *parse_bad[] = {
+ "allow r foo",
+ "allow*",
+ "foo allow *",
+ "allow profile foo rwx",
+ "allow profile",
+ "allow profile foo bar rwx",
+ "allow service bar",
+ "allow command baz x",
+ "allow r w",
+ "ALLOW r",
+ "allow rwx,",
+ "allow rwx x",
+ "allow r pool foo r",
+ "allow wwx pool taco",
+ "allow wwx pool taco^funny&chars",
+ "allow rwx pool 'weird name''",
+ "allow rwx object_prefix \"beforepool\" pool weird",
+ "allow rwx auid 123 pool asdf",
+ "allow command foo a prefix b",
+ "allow command foo with a prefixb",
+ "allow command foo with a = prefix b",
+ "allow command foo with a prefix b c",
+ 0
+};
+
+TEST(MonCap, ParseBad) {
+ for (int i=0; parse_bad[i]; ++i) {
+ string str = parse_bad[i];
+ MonCap cap;
+ std::cout << "Testing bad input: '" << str << "'" << std::endl;
+ ASSERT_FALSE(cap.parse(str, &cout));
+ }
+}
+
+TEST(MonCap, AllowAll) {
+ MonCap cap;
+ ASSERT_FALSE(cap.is_allow_all());
+
+ ASSERT_TRUE(cap.parse("allow r", NULL));
+ ASSERT_FALSE(cap.is_allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow w", NULL));
+ ASSERT_FALSE(cap.is_allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow x", NULL));
+ ASSERT_FALSE(cap.is_allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow rwx", NULL));
+ ASSERT_FALSE(cap.is_allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow rw", NULL));
+ ASSERT_FALSE(cap.is_allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow rx", NULL));
+ ASSERT_FALSE(cap.is_allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow wx", NULL));
+ ASSERT_FALSE(cap.is_allow_all());
+ cap.grants.clear();
+
+ ASSERT_TRUE(cap.parse("allow *", NULL));
+ ASSERT_TRUE(cap.is_allow_all());
+ ASSERT_TRUE(cap.is_capable(NULL, {}, "foo", "asdf", {}, true, true, true,
+ {}));
+
+ MonCap cap2;
+ ASSERT_FALSE(cap2.is_allow_all());
+ cap2.set_allow_all();
+ ASSERT_TRUE(cap2.is_allow_all());
+}
+
+TEST(MonCap, Network) {
+ MonCap cap;
+ bool r = cap.parse("allow * network 192.168.0.0/16, allow * network 10.0.0.0/8", NULL);
+ ASSERT_TRUE(r);
+
+ entity_addr_t a, b, c;
+ a.parse("10.1.2.3");
+ b.parse("192.168.2.3");
+ c.parse("192.167.2.3");
+
+ ASSERT_TRUE(cap.is_capable(NULL, {}, "foo", "asdf", {}, true, true, true,
+ a));
+ ASSERT_TRUE(cap.is_capable(NULL, {}, "foo", "asdf", {}, true, true, true,
+ b));
+ ASSERT_FALSE(cap.is_capable(NULL, {}, "foo", "asdf", {}, true, true, true,
+ c));
+}
+
+TEST(MonCap, ProfileOSD) {
+ MonCap cap;
+ bool r = cap.parse("allow profile osd", NULL);
+ ASSERT_TRUE(r);
+
+ EntityName name;
+ name.from_str("osd.123");
+ map<string,string> ca;
+
+ ASSERT_TRUE(cap.is_capable(NULL, name, "osd", "", ca, true, false, false,
+ {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "osd", "", ca, true, true, false, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "osd", "", ca, true, true, true, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "osd", "", ca, true, true, true, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "mon", "", ca, true, false, false,
+ {}));
+
+ ASSERT_FALSE(cap.is_capable(NULL, name, "mds", "", ca, true, true, true, {}));
+ ASSERT_FALSE(cap.is_capable(NULL, name, "mon", "", ca, true, true, true, {}));
+
+ ca.clear();
+ ASSERT_FALSE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true,
+ true, {}));
+ ca["key"] = "daemon-private/osd.123";
+ ASSERT_FALSE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true,
+ true, {}));
+ ca["key"] = "daemon-private/osd.12/asdf";
+ ASSERT_FALSE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true,
+ true, {}));
+ ca["key"] = "daemon-private/osd.123/";
+ ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true,
+ true, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true,
+ true, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true,
+ true, {}));
+ ca["key"] = "daemon-private/osd.123/foo";
+ ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key get", ca, true, true,
+ true, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key put", ca, true, true,
+ true, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key set", ca, true, true,
+ true, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key exists", ca, true,
+ true, true, {}));
+ ASSERT_TRUE(cap.is_capable(NULL, name, "", "config-key delete", ca, true,
+ true, true, {}));
+}
+
+TEST(MonCap, CommandRegEx) {
+ MonCap cap;
+ ASSERT_FALSE(cap.is_allow_all());
+ ASSERT_TRUE(cap.parse("allow command abc with arg regex \"^[0-9a-z.]*$\"",
+ NULL));
+
+ EntityName name;
+ name.from_str("osd.123");
+ ASSERT_TRUE(cap.is_capable(nullptr, name, "", "abc", {{"arg", "12345abcde"}},
+ true, true, true, {}));
+ ASSERT_FALSE(cap.is_capable(nullptr, name, "", "abc", {{"arg", "~!@#$"}},
+ true, true, true, {}));
+
+ ASSERT_TRUE(cap.parse("allow command abc with arg regex \"[*\"", NULL));
+ ASSERT_FALSE(cap.is_capable(nullptr, name, "", "abc", {{"arg", ""}}, true,
+ true, true, {}));
+}
+
+TEST(MonCap, ProfileBootstrapRBD) {
+ MonCap cap;
+ ASSERT_FALSE(cap.is_allow_all());
+ ASSERT_TRUE(cap.parse("profile bootstrap-rbd", NULL));
+
+ EntityName name;
+ name.from_str("mon.a");
+ ASSERT_TRUE(cap.is_capable(nullptr, name, "",
+ "auth get-or-create", {
+ {"entity", "client.rbd"},
+ {"caps_mon", "profile rbd"},
+ {"caps_osd", "profile rbd pool=foo, profile rbd-read-only"},
+ }, true, true, true,
+ {}));
+ ASSERT_FALSE(cap.is_capable(nullptr, name, "",
+ "auth get-or-create", {
+ {"entity", "client.rbd"},
+ {"caps_mon", "allow *"},
+ {"caps_osd", "profile rbd"},
+ }, true, true, true,
+ {}));
+ ASSERT_FALSE(cap.is_capable(nullptr, name, "",
+ "auth get-or-create", {
+ {"entity", "client.rbd"},
+ {"caps_mon", "profile rbd"},
+ {"caps_osd", "profile rbd pool=foo, allow *, profile rbd-read-only"},
+ }, true, true, true,
+ {}));
+}
+
+TEST(MonCap, ProfileBootstrapRBDMirror) {
+ MonCap cap;
+ ASSERT_FALSE(cap.is_allow_all());
+ ASSERT_TRUE(cap.parse("profile bootstrap-rbd-mirror", NULL));
+
+ EntityName name;
+ name.from_str("mon.a");
+ ASSERT_TRUE(cap.is_capable(nullptr, name, "",
+ "auth get-or-create", {
+ {"entity", "client.rbd"},
+ {"caps_mon", "profile rbd-mirror"},
+ {"caps_osd", "profile rbd pool=foo, profile rbd-read-only"},
+ }, true, true, true,
+ {}));
+ ASSERT_FALSE(cap.is_capable(nullptr, name, "",
+ "auth get-or-create", {
+ {"entity", "client.rbd"},
+ {"caps_mon", "profile rbd"},
+ {"caps_osd", "profile rbd pool=foo, profile rbd-read-only"},
+ }, true, true, true,
+ {}));
+ ASSERT_FALSE(cap.is_capable(nullptr, name, "",
+ "auth get-or-create", {
+ {"entity", "client.rbd"},
+ {"caps_mon", "allow *"},
+ {"caps_osd", "profile rbd"},
+ }, true, true, true,
+ {}));
+ ASSERT_FALSE(cap.is_capable(nullptr, name, "",
+ "auth get-or-create", {
+ {"entity", "client.rbd"},
+ {"caps_mon", "profile rbd-mirror"},
+ {"caps_osd", "profile rbd pool=foo, allow *, profile rbd-read-only"},
+ }, true, true, true,
+ {}));
+}
+
+TEST(MonCap, ProfileRBD) {
+ MonCap cap;
+ ASSERT_FALSE(cap.is_allow_all());
+ ASSERT_TRUE(cap.parse("profile rbd", NULL));
+
+ EntityName name;
+ name.from_str("mon.a");
+ ASSERT_FALSE(cap.is_capable(nullptr, name, "config-key",
+ "config-key get", {
+ {"key", "rbd/mirror/peer/1/1234"},
+ }, true, false, false, {}));
+}
+
+TEST(MonCap, ProfileRBDMirror) {
+ MonCap cap;
+ ASSERT_FALSE(cap.is_allow_all());
+ ASSERT_TRUE(cap.parse("profile rbd-mirror", NULL));
+
+ EntityName name;
+ name.from_str("mon.a");
+ ASSERT_TRUE(cap.is_capable(nullptr, name, "config-key",
+ "config-key get", {
+ {"key", "rbd/mirror/peer/1/1234"},
+ }, true, false, false, {}));
+}
diff --git a/src/test/mon/test-mon-msg.cc b/src/test/mon/test-mon-msg.cc
new file mode 100644
index 000000000..fede9df8f
--- /dev/null
+++ b/src/test/mon/test-mon-msg.cc
@@ -0,0 +1,338 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+* Ceph - scalable distributed file system
+*
+* Copyright (C) 2014 Red Hat
+*
+* This is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License version 2.1, as published by the Free Software
+* Foundation. See file COPYING.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <iostream>
+#include <sstream>
+#include <time.h>
+#include <stdlib.h>
+#include <map>
+
+#include "global/global_init.h"
+#include "global/global_context.h"
+#include "common/async/context_pool.h"
+#include "common/ceph_argparse.h"
+#include "common/version.h"
+#include "common/dout.h"
+#include "common/debug.h"
+#include "common/ceph_mutex.h"
+#include "common/Timer.h"
+#include "common/errno.h"
+#include "mon/MonClient.h"
+#include "msg/Dispatcher.h"
+#include "include/err.h"
+#include <boost/scoped_ptr.hpp>
+
+#include "gtest/gtest.h"
+
+#include "common/config.h"
+#include "include/ceph_assert.h"
+
+#include "messages/MMonProbe.h"
+#include "messages/MRoute.h"
+#include "messages/MGenericMessage.h"
+#include "messages/MMonJoin.h"
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_
+#undef dout_prefix
+#define dout_prefix *_dout << "test-mon-msg "
+
+class MonClientHelper : public Dispatcher
+{
+protected:
+ CephContext *cct;
+ ceph::async::io_context_pool poolctx;
+ Messenger *msg;
+ MonClient monc;
+
+ ceph::mutex lock = ceph::make_mutex("mon-msg-test::lock");
+
+ set<int> wanted;
+
+public:
+
+ explicit MonClientHelper(CephContext *cct_)
+ : Dispatcher(cct_),
+ cct(cct_),
+ poolctx(1),
+ msg(NULL),
+ monc(cct_, poolctx)
+ { }
+
+
+ int post_init() {
+ dout(1) << __func__ << dendl;
+ if (!msg)
+ return -EINVAL;
+ msg->add_dispatcher_tail(this);
+ return 0;
+ }
+
+ int init_messenger() {
+ dout(1) << __func__ << dendl;
+
+ std::string public_msgr_type = cct->_conf->ms_public_type.empty() ? cct->_conf.get_val<std::string>("ms_type") : cct->_conf->ms_public_type;
+ msg = Messenger::create(cct, public_msgr_type, entity_name_t::CLIENT(-1),
+ "test-mon-msg", 0);
+ ceph_assert(msg != NULL);
+ msg->set_default_policy(Messenger::Policy::lossy_client(0));
+ dout(0) << __func__ << " starting messenger at "
+ << msg->get_myaddrs() << dendl;
+ msg->start();
+ return 0;
+ }
+
+ int init_monc() {
+ dout(1) << __func__ << dendl;
+ ceph_assert(msg != NULL);
+ int err = monc.build_initial_monmap();
+ if (err < 0) {
+ derr << __func__ << " error building monmap: "
+ << cpp_strerror(err) << dendl;
+ return err;
+ }
+
+ monc.set_messenger(msg);
+ msg->add_dispatcher_head(&monc);
+
+ monc.set_want_keys(CEPH_ENTITY_TYPE_MON);
+ err = monc.init();
+ if (err < 0) {
+ derr << __func__ << " monc init failed: "
+ << cpp_strerror(err) << dendl;
+ goto fail;
+ }
+
+ err = monc.authenticate();
+ if (err < 0) {
+ derr << __func__ << " monc auth failed: "
+ << cpp_strerror(err) << dendl;
+ goto fail_monc;
+ }
+ monc.wait_auth_rotating(30.0);
+ monc.renew_subs();
+ dout(0) << __func__ << " finished" << dendl;
+ return 0;
+
+fail_monc:
+ derr << __func__ << " failing monc" << dendl;
+ monc.shutdown();
+fail:
+ return err;
+ }
+
+ void shutdown_messenger() {
+ dout(0) << __func__ << dendl;
+ msg->shutdown();
+ msg->wait();
+ }
+
+ void shutdown_monc() {
+ dout(0) << __func__ << dendl;
+ monc.shutdown();
+ }
+
+ void shutdown() {
+ dout(0) << __func__ << dendl;
+ shutdown_monc();
+ shutdown_messenger();
+ }
+
+ MonMap *get_monmap() {
+ return &monc.monmap;
+ }
+
+ int init() {
+ int err = init_messenger();
+ if (err < 0)
+ goto fail;
+ err = init_monc();
+ if (err < 0)
+ goto fail_msgr;
+ err = post_init();
+ if (err < 0)
+ goto fail_monc;
+ return 0;
+fail_monc:
+ shutdown_monc();
+fail_msgr:
+ shutdown_messenger();
+fail:
+ return err;
+ }
+
+ virtual void handle_wanted(Message *m) { }
+
+ bool handle_message(Message *m) {
+ dout(1) << __func__ << " " << *m << dendl;
+ if (!is_wanted(m)) {
+ dout(10) << __func__ << " not wanted" << dendl;
+ return false;
+ }
+ handle_wanted(m);
+ m->put();
+
+ return true;
+ }
+
+ bool ms_dispatch(Message *m) override {
+ return handle_message(m);
+ }
+ void ms_handle_connect(Connection *con) override { }
+ void ms_handle_remote_reset(Connection *con) override { }
+ bool ms_handle_reset(Connection *con) override { return false; }
+ bool ms_handle_refused(Connection *con) override { return false; }
+
+ bool is_wanted(Message *m) {
+ dout(20) << __func__ << " " << *m << " type " << m->get_type() << dendl;
+ return (wanted.find(m->get_type()) != wanted.end());
+ }
+
+ void add_wanted(int t) {
+ dout(20) << __func__ << " type " << t << dendl;
+ wanted.insert(t);
+ }
+
+ void rm_wanted(int t) {
+ dout(20) << __func__ << " type " << t << dendl;
+ wanted.erase(t);
+ }
+
+ void send_message(Message *m) {
+ dout(15) << __func__ << " " << *m << dendl;
+ monc.send_mon_message(m);
+ }
+
+ void wait() { msg->wait(); }
+};
+
+class MonMsgTest : public MonClientHelper,
+ public ::testing::Test
+{
+protected:
+ int reply_type = 0;
+ Message *reply_msg = nullptr;
+ ceph::mutex lock = ceph::make_mutex("lock");
+ ceph::condition_variable cond;
+
+ MonMsgTest() :
+ MonClientHelper(g_ceph_context) { }
+
+public:
+ void SetUp() override {
+ reply_type = -1;
+ if (reply_msg) {
+ reply_msg->put();
+ reply_msg = nullptr;
+ }
+ ASSERT_EQ(init(), 0);
+ }
+
+ void TearDown() override {
+ shutdown();
+ if (reply_msg) {
+ reply_msg->put();
+ reply_msg = nullptr;
+ }
+ }
+
+ void handle_wanted(Message *m) override {
+ std::lock_guard l{lock};
+ // caller will put() after they call us, so hold on to a ref
+ m->get();
+ reply_msg = m;
+ cond.notify_all();
+ }
+
+ Message *send_wait_reply(Message *m, int t, double timeout=30.0) {
+ std::unique_lock l{lock};
+ reply_type = t;
+ add_wanted(t);
+ send_message(m);
+
+ std::cv_status status = std::cv_status::no_timeout;
+ if (timeout > 0) {
+ utime_t s = ceph_clock_now();
+ status = cond.wait_for(l, ceph::make_timespan(timeout));
+ utime_t e = ceph_clock_now();
+ dout(20) << __func__ << " took " << (e-s) << " seconds" << dendl;
+ } else {
+ cond.wait(l);
+ }
+ rm_wanted(t);
+ l.unlock();
+ if (status == std::cv_status::timeout) {
+ dout(20) << __func__ << " error: " << cpp_strerror(ETIMEDOUT) << dendl;
+ return (Message*)((long)-ETIMEDOUT);
+ }
+
+ if (!reply_msg)
+ dout(20) << __func__ << " reply_msg is nullptr" << dendl;
+ else
+ dout(20) << __func__ << " reply_msg " << *reply_msg << dendl;
+ return reply_msg;
+ }
+};
+
+TEST_F(MonMsgTest, MMonProbeTest)
+{
+ Message *m = new MMonProbe(get_monmap()->fsid,
+ MMonProbe::OP_PROBE, "b", false,
+ ceph_release());
+ Message *r = send_wait_reply(m, MSG_MON_PROBE);
+ ASSERT_NE(IS_ERR(r), 0);
+ ASSERT_EQ(PTR_ERR(r), -ETIMEDOUT);
+}
+
+TEST_F(MonMsgTest, MRouteTest)
+{
+ Message *payload = new MGenericMessage(CEPH_MSG_SHUTDOWN);
+ MRoute *m = new MRoute;
+ m->msg = payload;
+ Message *r = send_wait_reply(m, CEPH_MSG_SHUTDOWN);
+ // we want an error
+ ASSERT_NE(IS_ERR(r), 0);
+ ASSERT_EQ(PTR_ERR(r), -ETIMEDOUT);
+}
+
+/* MMonScrub and MMonSync have other safeguards in place that prevent
+ * us from actually receiving a reply even if the message is handled
+ * by the monitor due to lack of cap checking.
+ */
+TEST_F(MonMsgTest, MMonJoin)
+{
+ Message *m = new MMonJoin(get_monmap()->fsid, string("client"),
+ msg->get_myaddrs());
+ send_wait_reply(m, MSG_MON_PAXOS, 10.0);
+
+ int r = monc.get_monmap();
+ ASSERT_EQ(r, 0);
+ ASSERT_FALSE(monc.monmap.contains("client"));
+}
+
+int main(int argc, char *argv[])
+{
+ vector<const char*> args;
+ argv_to_vec(argc, (const char **)argv, args);
+
+ auto cct = global_init(nullptr, args,
+ CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ common_init_finish(g_ceph_context);
+ g_ceph_context->_conf.apply_changes(nullptr);
+ ::testing::InitGoogleTest(&argc, argv);
+
+ return RUN_ALL_TESTS();
+}
+
diff --git a/src/test/mon/test_election.cc b/src/test/mon/test_election.cc
new file mode 100644
index 000000000..a232d7355
--- /dev/null
+++ b/src/test/mon/test_election.cc
@@ -0,0 +1,1003 @@
+// -*- mode:C; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "gtest/gtest.h"
+#include "mon/ElectionLogic.h"
+#include "mon/ConnectionTracker.h"
+#include "common/dout.h"
+
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "common/common_init.h"
+#include "common/ceph_argparse.h"
+
+using namespace std;
+
+#define dout_subsys ceph_subsys_test
+#undef dout_prefix
+#define dout_prefix _prefix(_dout, prefix_name(), timestep_count())
+static ostream& _prefix(std::ostream *_dout, const char *prefix, int timesteps) {
+ return *_dout << prefix << timesteps << " ";
+}
+
+const char* prefix_name() { return "test_election: "; }
+int timestep_count() { return -1; }
+
+int main(int argc, char **argv) {
+ vector<const char*> args(argv, argv+argc);
+ bool user_set_debug = false;
+ for (auto& arg : args) {
+ if (strncmp("--debug_mon", arg, 11) == 0) user_set_debug = true;
+ }
+ auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+ common_init_finish(g_ceph_context);
+ if (!user_set_debug) g_ceph_context->_conf.set_val("debug mon", "0/20");
+
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
+
+
+class Owner;
+struct Election {
+ map<int, Owner*> electors;
+ map<int, set<int> > blocked_messages;
+ int count;
+ ElectionLogic::election_strategy election_strategy;
+ int ping_interval;
+ set<int> disallowed_leaders;
+
+ vector< function<void()> > messages;
+ int pending_election_messages;
+ int timesteps_run = 0;
+ int last_quorum_change = 0;
+ int last_quorum_formed = -1;
+ set<int> last_quorum_reported;
+ int last_leader = -1;
+
+ Election(int c, ElectionLogic::election_strategy es, int pingi=1, double tracker_halflife=5);
+ ~Election();
+ // ElectionOwner interfaces
+ int get_paxos_size() { return count; }
+ const set<int>& get_disallowed_leaders() const { return disallowed_leaders; }
+ void propose_to(int from, int to, epoch_t e, bufferlist& cbl);
+ void defer_to(int from, int to, epoch_t e);
+ void claim_victory(int from, int to, epoch_t e, const set<int>& members);
+ void accept_victory(int from, int to, epoch_t e);
+ void report_quorum(const set<int>& quorum);
+ void queue_stable_message(int from, int to, function<void()> m);
+ void queue_timeout_message(int from, int to, function<void()> m);
+ void queue_stable_or_timeout(int from, int to,
+ function<void()> m, function<void()> t);
+ void queue_election_message(int from, int to, function<void()> m);
+
+ // test runner interfaces
+ int run_timesteps(int max);
+ void start_one(int who);
+ void start_all();
+ bool election_stable() const;
+ bool quorum_stable(int timesteps_stable) const;
+ bool all_agree_on_leader() const;
+ bool check_epoch_agreement() const;
+ void block_messages(int from, int to);
+ void block_bidirectional_messages(int a, int b);
+ void unblock_messages(int from, int to);
+ void unblock_bidirectional_messages(int a, int b);
+ void add_disallowed_leader(int disallowed) { disallowed_leaders.insert(disallowed); }
+ void remove_elector(int rank);
+ const char* prefix_name() const { return "Election: "; }
+ int timestep_count() const { return timesteps_run; }
+};
+struct Owner : public ElectionOwner, RankProvider {
+ Election *parent;
+ int rank;
+ epoch_t persisted_epoch;
+ bool ever_joined;
+ ConnectionTracker peer_tracker;
+ ElectionLogic logic;
+ set<int> quorum;
+ int victory_accepters;
+ int timer_steps; // timesteps until we trigger timeout
+ bool timer_election; // the timeout is for normal election, or victory
+ bool rank_deleted = false;
+ string prefix_str;
+ Owner(int r, ElectionLogic::election_strategy es, double tracker_halflife,
+ Election *p) : parent(p), rank(r), persisted_epoch(0),
+ ever_joined(false),
+ peer_tracker(this, rank, tracker_halflife, 5, g_ceph_context),
+ logic(this, es, &peer_tracker, 0.0005, g_ceph_context),
+ victory_accepters(0),
+ timer_steps(-1), timer_election(true) {
+ std::stringstream str;
+ str << "Owner" << rank << " ";
+ prefix_str = str.str();
+ }
+
+ // in-memory store: just save to variable
+ void persist_epoch(epoch_t e) { persisted_epoch = e; }
+ // in-memory store: just return variable
+ epoch_t read_persisted_epoch() const { return persisted_epoch; }
+ // in-memory store: don't need to validate
+ void validate_store() { return; }
+ // don't need to do anything with our state right now
+ void notify_bump_epoch() {}
+ void notify_rank_removed(int removed_rank) {
+ ldout(g_ceph_context, 1) << "removed_rank: " << removed_rank << dendl;
+ ldout(g_ceph_context, 1) << "rank before: " << rank << dendl;
+ if (removed_rank < rank) {
+ --rank;
+ }
+ peer_tracker.notify_rank_removed(removed_rank, rank);
+ ldout(g_ceph_context, 1) << "rank after: " << rank << dendl;
+ }
+ void notify_deleted() { rank_deleted = true; rank = -1; cancel_timer(); }
+ // pass back to ElectionLogic; we don't need this redirect ourselves
+ void trigger_new_election() { ceph_assert (!rank_deleted); logic.start(); }
+ int get_my_rank() const { return rank; }
+ // we don't need to persist scores as we don't reset and lose memory state
+ void persist_connectivity_scores() {}
+ void propose_to_peers(epoch_t e, bufferlist& bl) {
+ ceph_assert (!rank_deleted);
+ for (int i = 0; i < parent->get_paxos_size(); ++i) {
+ if (i == rank) continue;
+ parent->propose_to(rank, i, e, bl);
+ }
+ }
+ void reset_election() {
+ ceph_assert (!rank_deleted);
+ _start();
+ logic.start();
+ }
+ bool ever_participated() const { return ever_joined; }
+ unsigned paxos_size() const { return parent->get_paxos_size(); }
+ const set<int>& get_disallowed_leaders() const {
+ return parent->get_disallowed_leaders();
+ }
+ void cancel_timer() {
+ timer_steps = -1;
+ }
+ void reset_timer(int steps) {
+ cancel_timer();
+ timer_steps = 3 + steps; // FIXME? magic number, current step + roundtrip
+ timer_election = true;
+ }
+ void start_victory_timer() {
+ cancel_timer();
+ timer_election = false;
+ timer_steps = 3; // FIXME? current step + roundtrip
+ }
+ void _start() {
+ reset_timer(0);
+ quorum.clear();
+ }
+ void _defer_to(int who) {
+ ceph_assert (!rank_deleted);
+ parent->defer_to(rank, who, logic.get_epoch());
+ reset_timer(0); // wtf does changing this 0->1 cause breakage?
+ }
+ void message_victory(const std::set<int>& members) {
+ ceph_assert (!rank_deleted);
+ for (auto i : members) {
+ if (i == rank) continue;
+ parent->claim_victory(rank, i, logic.get_epoch(), members);
+ }
+ start_victory_timer();
+ quorum = members;
+ victory_accepters = 1;
+ }
+ bool is_current_member(int r) const { return quorum.count(r) != 0; }
+ void receive_propose(int from, epoch_t e, ConnectionTracker *oct) {
+ if (rank_deleted) return;
+ logic.receive_propose(from, e, oct);
+ delete oct;
+ }
+ void receive_ack(int from, epoch_t e) {
+ if (rank_deleted) return;
+ if (e < logic.get_epoch())
+ return;
+ logic.receive_ack(from, e);
+ }
+ void receive_victory_claim(int from, epoch_t e, const set<int>& members) {
+ if (rank_deleted) return;
+ if (e < logic.get_epoch())
+ return;
+ if (logic.receive_victory_claim(from, e)) {
+ quorum = members;
+ cancel_timer();
+ parent->accept_victory(rank, from, e);
+ }
+ }
+ void receive_victory_ack(int from, epoch_t e) {
+ if (rank_deleted) return;
+ if (e < logic.get_epoch())
+ return;
+ ++victory_accepters;
+ if (victory_accepters == static_cast<int>(quorum.size())) {
+ cancel_timer();
+ parent->report_quorum(quorum);
+ }
+ }
+ void receive_scores(bufferlist bl) {
+ ConnectionTracker oct(bl, g_ceph_context);
+ peer_tracker.receive_peer_report(oct);
+ ldout(g_ceph_context, 10) << "received scores " << oct << dendl;
+ }
+ void receive_ping(int from_rank, bufferlist bl) {
+ ldout(g_ceph_context, 6) << "receive ping from " << from_rank << dendl;
+ peer_tracker.report_live_connection(from_rank, parent->ping_interval);
+ receive_scores(bl);
+ }
+ void receive_ping_timeout(int from_rank) {
+ ldout(g_ceph_context, 6) << "timeout ping from " << from_rank << dendl;
+ peer_tracker.report_dead_connection(from_rank, parent->ping_interval);
+ }
+ void election_timeout() {
+ ldout(g_ceph_context, 2) << "election epoch " << logic.get_epoch()
+ << " timed out for " << rank
+ << ", electing me:" << logic.electing_me
+ << ", acked_me:" << logic.acked_me << dendl;
+ ceph_assert (!rank_deleted);
+ logic.end_election_period();
+ }
+ void victory_timeout() {
+ ldout(g_ceph_context, 2) << "victory epoch " << logic.get_epoch()
+ << " timed out for " << rank
+ << ", electing me:" << logic.electing_me
+ << ", acked_me:" << logic.acked_me << dendl;
+ ceph_assert (!rank_deleted);
+ reset_election();
+ }
+ void encode_scores(bufferlist& bl) {
+ encode(peer_tracker, bl);
+ }
+ void send_pings() {
+ ceph_assert (!rank_deleted);
+ if (!parent->ping_interval ||
+ parent->timesteps_run % parent->ping_interval != 0) {
+ return;
+ }
+
+ bufferlist bl;
+ encode_scores(bl);
+ for (int i = 0; i < parent->get_paxos_size(); ++i) {
+ if (i == rank)
+ continue;
+ Owner *o = parent->electors[i];
+ parent->queue_stable_or_timeout(rank, i,
+ [o, r=rank, bl] { o->receive_ping(r, bl); },
+ [o, r=rank] { o->receive_ping_timeout(r); }
+ );
+ }
+ }
+ void notify_timestep() {
+ ceph_assert (!rank_deleted);
+ assert(timer_steps != 0);
+ if (timer_steps > 0) {
+ --timer_steps;
+ }
+ if (timer_steps == 0) {
+ if (timer_election) {
+ election_timeout();
+ } else {
+ victory_timeout();
+ }
+ }
+ send_pings();
+ }
+ const char *prefix_name() const {
+ return prefix_str.c_str();
+ }
+ int timestep_count() const { return parent->timesteps_run; }
+};
+
+Election::Election(int c, ElectionLogic::election_strategy es, int pingi,
+ double tracker_halflife) : count(c), election_strategy(es), ping_interval(pingi),
+ pending_election_messages(0), timesteps_run(0), last_quorum_change(0), last_quorum_formed(-1)
+{
+ for (int i = 0; i < count; ++i) {
+ electors[i] = new Owner(i, election_strategy, tracker_halflife, this);
+ }
+}
+
+Election::~Election()
+{
+ {
+ for (auto i : electors) {
+ delete i.second;
+ }
+ }
+}
+
+void Election::queue_stable_message(int from, int to, function<void()> m)
+{
+ if (!blocked_messages[from].count(to)) {
+ messages.push_back(m);
+ }
+}
+
+void Election::queue_election_message(int from, int to, function<void()> m)
+{
+ if (last_quorum_reported.count(from)) {
+ last_quorum_change = timesteps_run;
+ last_quorum_reported.clear();
+ last_leader = -1;
+ }
+ if (!blocked_messages[from].count(to)) {
+ bufferlist bl;
+ electors[from]->encode_scores(bl);
+ Owner *o = electors[to];
+ messages.push_back([this,m,o,bl] {
+ --this->pending_election_messages;
+ o->receive_scores(bl);
+ m();
+ });
+ ++pending_election_messages;
+ }
+}
+
+void Election::queue_timeout_message(int from, int to, function<void()> m)
+{
+ ceph_assert(blocked_messages[from].count(to));
+ messages.push_back(m);
+}
+
+void Election::queue_stable_or_timeout(int from, int to,
+ function<void()> m, function<void()> t)
+{
+ if (blocked_messages[from].count(to)) {
+ queue_timeout_message(from, to, t);
+ } else {
+ queue_stable_message(from, to, m);
+ }
+}
+
+void Election::defer_to(int from, int to, epoch_t e)
+{
+ Owner *o = electors[to];
+ queue_election_message(from, to, [o, from, e] {
+ o->receive_ack(from, e);
+ });
+}
+
+void Election::propose_to(int from, int to, epoch_t e, bufferlist& cbl)
+{
+ Owner *o = electors[to];
+ ConnectionTracker *oct = NULL;
+ if (cbl.length()) {
+ oct = new ConnectionTracker(cbl, g_ceph_context); // we leak these on blocked cons, meh
+ }
+ queue_election_message(from, to, [o, from, e, oct] {
+ o->receive_propose(from, e, oct);
+ });
+}
+
+void Election::claim_victory(int from, int to, epoch_t e, const set<int>& members)
+{
+ Owner *o = electors[to];
+ queue_election_message(from, to, [o, from, e, members] {
+ o->receive_victory_claim(from, e, members);
+ });
+}
+
+void Election::accept_victory(int from, int to, epoch_t e)
+{
+ Owner *o = electors[to];
+ queue_election_message(from, to, [o, from, e] {
+ o->receive_victory_ack(from, e);
+ });
+}
+
+void Election::report_quorum(const set<int>& quorum)
+{
+ for (int i : quorum) {
+ electors[i]->ever_joined = true;
+ }
+ last_quorum_formed = last_quorum_change = timesteps_run;
+ last_quorum_reported = quorum;
+ last_leader = electors[*(quorum.begin())]->logic.get_election_winner();
+}
+
+int Election::run_timesteps(int max)
+{
+ vector< function<void()> > current_m;
+ int steps = 0;
+ for (; (!max || steps < max) && // we have timesteps left AND ONE OF
+ (pending_election_messages || // there are messages pending.
+ !election_stable()); // somebody's not happy and will act in future
+ ++steps) {
+ current_m.clear();
+ current_m.swap(messages);
+ ++timesteps_run;
+ for (auto& m : current_m) {
+ m();
+ }
+ for (auto o : electors) {
+ o.second->notify_timestep();
+ }
+ }
+
+ return steps;
+}
+
+void Election::start_one(int who)
+{
+ assert(who < static_cast<int>(electors.size()));
+ electors[who]->logic.start();
+}
+
+void Election::start_all() {
+ for (auto e : electors) {
+ e.second->logic.start();
+ }
+}
+
+bool Election::election_stable() const
+{
+ // see if anybody has a timer running
+ for (auto i : electors) {
+ if (i.second->timer_steps != -1) {
+ ldout(g_ceph_context, 30) << "rank " << i.first << " has timer value " << i.second->timer_steps << dendl;
+ return false;
+ }
+ }
+ return (pending_election_messages == 0);
+}
+
+bool Election::quorum_stable(int timesteps_stable) const
+{
+ ldout(g_ceph_context, 1) << "quorum_stable? last formed:" << last_quorum_formed
+ << ", last changed " << last_quorum_change
+ << ", last reported members " << last_quorum_reported << dendl;
+ if (last_quorum_reported.empty()) {
+ return false;
+ }
+ if (last_quorum_formed < last_quorum_change) {
+ return false;
+ }
+ for (auto i : last_quorum_reported) {
+ if (electors.find(i)->second->timer_steps != -1) {
+ return false;
+ }
+ }
+ if (timesteps_run - timesteps_stable > last_quorum_change)
+ return true;
+ return election_stable();
+}
+
+bool Election::all_agree_on_leader() const
+{
+ int leader = electors.find(0)->second->logic.get_election_winner();
+ ldout(g_ceph_context, 10) << "all_agree_on_leader on " << leader << dendl;
+ for (auto& i: electors) {
+ if (leader != i.second->logic.get_election_winner()) {
+ ldout(g_ceph_context, 10) << "rank " << i.first << " has different leader "
+ << i.second->logic.get_election_winner() << dendl;
+ return false;
+ }
+ }
+ if (disallowed_leaders.count(leader)) {
+ ldout(g_ceph_context, 10) << "that leader is disallowed! member of "
+ << disallowed_leaders << dendl;
+ return false;
+ }
+ return true;
+}
+
+bool Election::check_epoch_agreement() const
+{
+ epoch_t epoch = electors.find(0)->second->logic.get_epoch();
+ for (auto& i : electors) {
+ if (epoch != i.second->logic.get_epoch()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void Election::block_messages(int from, int to)
+{
+ blocked_messages[from].insert(to);
+}
+void Election::block_bidirectional_messages(int a, int b)
+{
+ block_messages(a, b);
+ block_messages(b, a);
+}
+void Election::unblock_messages(int from, int to)
+{
+ blocked_messages[from].erase(to);
+}
+void Election::unblock_bidirectional_messages(int a, int b)
+{
+ unblock_messages(a, b);
+ unblock_messages(b, a);
+}
+
+void Election::remove_elector(int rank)
+{
+ for (auto ei = electors.begin(); ei != electors.end(); ) {
+ if (ei->first == rank) {
+ ei->second->notify_deleted();
+ electors.erase(ei++);
+ continue;
+ }
+ ei->second->notify_rank_removed(rank);
+ if (ei->first > rank) {
+ electors[ei->first - 1] = ei->second;
+ electors.erase(ei++);
+ continue;
+ }
+ ++ei;
+ }
+ for (auto bi = blocked_messages.begin(); bi != blocked_messages.end(); ) {
+ if (bi->first == rank) {
+ blocked_messages.erase(bi++);
+ continue;
+ }
+ bi->second.erase(rank);
+ for (auto i = bi->second.upper_bound(rank);
+ i != bi->second.end();) {
+ bi->second.insert(*i - 1);
+ bi->second.erase(*(i++));
+ }
+ ++bi;
+ }
+ --count;
+}
+
+void single_startup_election_completes(ElectionLogic::election_strategy strategy)
+{
+ for (int starter = 0; starter < 5; ++starter) {
+ Election election(5, strategy);
+ election.start_one(starter);
+ // This test is not actually legit since you should start
+ // all the ElectionLogics, but it seems to work
+ int steps = election.run_timesteps(0);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ }
+}
+
+void everybody_starts_completes(ElectionLogic::election_strategy strategy)
+{
+ Election election(5, strategy);
+ election.start_all();
+ int steps = election.run_timesteps(0);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+void blocked_connection_continues_election(ElectionLogic::election_strategy strategy)
+{
+ Election election(5, strategy);
+ election.block_bidirectional_messages(0, 1);
+ election.start_all();
+ int steps = election.run_timesteps(100);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ // This is a failure mode!
+ ASSERT_FALSE(election.election_stable());
+ ASSERT_FALSE(election.quorum_stable(6)); // double the timer_steps we use
+ election.unblock_bidirectional_messages(0, 1);
+ steps = election.run_timesteps(100);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+void blocked_connection_converges_election(ElectionLogic::election_strategy strategy)
+{
+ Election election(5, strategy);
+ election.block_bidirectional_messages(0, 1);
+ election.start_all();
+ int steps = election.run_timesteps(100);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ election.unblock_bidirectional_messages(0, 1);
+ steps = election.run_timesteps(100);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+void disallowed_doesnt_win(ElectionLogic::election_strategy strategy)
+{
+ int MON_COUNT = 5;
+ for (int i = 0; i < MON_COUNT - 1; ++i) {
+ Election election(MON_COUNT, strategy);
+ for (int j = 0; j <= i; ++j) {
+ election.add_disallowed_leader(j);
+ }
+ election.start_all();
+ int steps = election.run_timesteps(0);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ int leader = election.electors[0]->logic.get_election_winner();
+ for (int j = 0; j <= i; ++j) {
+ ASSERT_NE(j, leader);
+ }
+ }
+ for (int i = MON_COUNT - 1; i > 0; --i) {
+ Election election(MON_COUNT, strategy);
+ for (int j = i; j <= MON_COUNT - 1; ++j) {
+ election.add_disallowed_leader(j);
+ }
+ election.start_all();
+ int steps = election.run_timesteps(0);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ int leader = election.electors[0]->logic.get_election_winner();
+ for (int j = i; j < MON_COUNT; ++j) {
+ ASSERT_NE(j, leader);
+ }
+ }
+}
+
+void converges_after_flapping(ElectionLogic::election_strategy strategy)
+{
+ Election election(5, strategy);
+ auto block_cons = [&] {
+ auto& e = election;
+ // leave 4 connected to both sides so it will trigger but not trivially win
+ e.block_bidirectional_messages(0, 2);
+ e.block_bidirectional_messages(0, 3);
+ e.block_bidirectional_messages(1, 2);
+ e.block_bidirectional_messages(1, 3);
+ };
+ auto unblock_cons = [&] {
+ auto& e = election;
+ e.unblock_bidirectional_messages(0, 2);
+ e.unblock_bidirectional_messages(0, 3);
+ e.unblock_bidirectional_messages(1, 2);
+ e.unblock_bidirectional_messages(1, 3);
+ };
+ block_cons();
+ election.start_all();
+ for (int i = 0; i < 5; ++i) {
+ election.run_timesteps(5);
+ unblock_cons();
+ election.run_timesteps(5);
+ block_cons();
+ }
+ unblock_cons();
+ election.run_timesteps(100);
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+void converges_while_flapping(ElectionLogic::election_strategy strategy)
+{
+ Election election(5, strategy);
+ auto block_cons = [&] {
+ auto& e = election;
+ // leave 4 connected to both sides so it will trigger but not trivially win
+ e.block_bidirectional_messages(0, 2);
+ e.block_bidirectional_messages(0, 3);
+ e.block_bidirectional_messages(1, 2);
+ e.block_bidirectional_messages(1, 3);
+ };
+ auto unblock_cons = [&] {
+ auto& e = election;
+ e.unblock_bidirectional_messages(0, 2);
+ e.unblock_bidirectional_messages(0, 3);
+ e.unblock_bidirectional_messages(1, 2);
+ e.unblock_bidirectional_messages(1, 3);
+ };
+ block_cons();
+ election.start_all();
+ for (int i = 0; i < 5; ++i) {
+ election.run_timesteps(10);
+ ASSERT_TRUE(election.quorum_stable(6));
+ unblock_cons();
+ election.run_timesteps(5);
+ block_cons();
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ }
+ unblock_cons();
+ election.run_timesteps(100);
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6));
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+void netsplit_with_disallowed_tiebreaker_converges(ElectionLogic::election_strategy strategy)
+{
+ Election election(5, strategy);
+ election.add_disallowed_leader(4);
+ auto netsplit = [&] {
+ auto& e = election;
+ e.block_bidirectional_messages(0, 2);
+ e.block_bidirectional_messages(0, 3);
+ e.block_bidirectional_messages(1, 2);
+ e.block_bidirectional_messages(1, 3);
+ };
+ auto unsplit = [&] {
+ auto& e = election;
+ e.unblock_bidirectional_messages(0, 2);
+ e.unblock_bidirectional_messages(0, 3);
+ e.unblock_bidirectional_messages(1, 2);
+ e.unblock_bidirectional_messages(1, 3);
+ };
+ // hmm, we don't have timeouts to call elections automatically yet
+ auto call_elections = [&] {
+ for (auto i : election.electors) {
+ i.second->trigger_new_election();
+ }
+ };
+ // turn everybody on, run happy for a while
+ election.start_all();
+ election.run_timesteps(0);
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6));
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ int starting_leader = election.last_leader;
+ // do some netsplits, but leave disallowed tiebreaker alive
+ for (int i = 0; i < 5; ++i) {
+ netsplit();
+ call_elections();
+ election.run_timesteps(15); // tests fail when I run 10 because 0 and 1 time out on same timestamp for some reason, why?
+ // this ASSERT_EQ only holds while we bias for ranks
+ ASSERT_EQ(starting_leader, election.last_leader);
+ ASSERT_TRUE(election.quorum_stable(6));
+ ASSERT_FALSE(election.election_stable());
+ unsplit();
+ call_elections();
+ election.run_timesteps(10);
+ ASSERT_EQ(starting_leader, election.last_leader);
+ ASSERT_TRUE(election.quorum_stable(6));
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ }
+
+ // now disconnect the tiebreaker and make sure nobody can win
+ int presplit_quorum_time = election.last_quorum_formed;
+ netsplit();
+ election.block_bidirectional_messages(4, 0);
+ election.block_bidirectional_messages(4, 1);
+ election.block_bidirectional_messages(4, 2);
+ election.block_bidirectional_messages(4, 3);
+ call_elections();
+ election.run_timesteps(100);
+ ASSERT_EQ(election.last_quorum_formed, presplit_quorum_time);
+
+ // now let in the previously-losing side
+ election.unblock_bidirectional_messages(4, 2);
+ election.unblock_bidirectional_messages(4, 3);
+ call_elections();
+ election.run_timesteps(100);
+ ASSERT_TRUE(election.quorum_stable(50));
+ ASSERT_FALSE(election.election_stable());
+
+ // now reconnect everybody
+ unsplit();
+ election.unblock_bidirectional_messages(4, 0);
+ election.unblock_bidirectional_messages(4, 1);
+ call_elections();
+ election.run_timesteps(100);
+ ASSERT_TRUE(election.quorum_stable(50));
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+void handles_singly_connected_peon(ElectionLogic::election_strategy strategy)
+{
+ Election election(5, strategy);
+ election.block_bidirectional_messages(0, 1);
+ election.block_bidirectional_messages(0, 2);
+ election.block_bidirectional_messages(0, 3);
+ election.block_bidirectional_messages(0, 4);
+
+ election.start_all();
+ election.run_timesteps(20);
+ ASSERT_TRUE(election.quorum_stable(5));
+ ASSERT_FALSE(election.election_stable());
+
+ election.unblock_bidirectional_messages(0, 1);
+ election.run_timesteps(100);
+ ASSERT_TRUE(election.quorum_stable(50));
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+
+ election.block_bidirectional_messages(0, 1);
+ election.unblock_bidirectional_messages(0, 4);
+ for (auto i : election.electors) {
+ i.second->trigger_new_election();
+ }
+ election.run_timesteps(15);
+ ASSERT_TRUE(election.quorum_stable(50));
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+ConnectionReport *get_connection_reports(ConnectionTracker& ct) {
+ return &ct.my_reports;
+}
+map<int,ConnectionReport> *get_peer_reports(ConnectionTracker& ct) {
+ return &ct.peer_reports;
+}
+void handles_outdated_scoring(ElectionLogic::election_strategy strategy)
+{
+ Election election(3, strategy, 5); // ping every 5 timesteps so they start elections before settling scores!
+
+ // start everybody up and run for a bit
+ election.start_all();
+ election.run_timesteps(20);
+ ASSERT_TRUE(election.quorum_stable(5));
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+
+ // now mess up the scores to disagree
+ ConnectionTracker& ct0 = election.electors[0]->peer_tracker;
+ ConnectionReport& cr0 = *get_connection_reports(ct0);
+ cr0.history[1] = 0.5;
+ cr0.history[2] = 0.5;
+ ct0.increase_version();
+ ConnectionTracker& ct1 = election.electors[1]->peer_tracker;
+ ConnectionReport& cr1 = *get_connection_reports(ct1);
+ cr1.history[0] = 0.5;
+ cr1.history[2] = 0.5;
+ ct1.increase_version();
+ ConnectionTracker& ct2 = election.electors[2]->peer_tracker;
+ ConnectionReport& cr2 = *get_connection_reports(ct2);
+ cr2.history[0] = 0.5;
+ map<int,ConnectionReport>&cp2 = *get_peer_reports(ct2);
+ cp2[0].history[2] = 0;
+ cp2[1].history[2] = 0;
+ ct2.increase_version();
+ election.ping_interval = 0; // disable pinging to update the scores
+ ldout(g_ceph_context, 5) << "mangled the scores to be different" << dendl;
+
+ election.start_all();
+ election.run_timesteps(50);
+ ASSERT_TRUE(election.quorum_stable(30));
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+void handles_disagreeing_connectivity(ElectionLogic::election_strategy strategy)
+{
+ Election election(5, strategy, 5); // ping every 5 timesteps so they start elections before settling scores!
+
+ // start everybody up and run for a bit
+ election.start_all();
+ election.run_timesteps(20);
+ ASSERT_TRUE(election.quorum_stable(5));
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+
+ // block all the connections
+ for (int i = 0; i < 5; ++i) {
+ for (int j = i+1; j < 5; ++j) {
+ election.block_bidirectional_messages(i, j);
+ }
+ }
+
+ // now start them electing, which will obviously fail
+ election.start_all();
+ election.run_timesteps(50); // let them all demote scores of their peers
+ ASSERT_FALSE(election.quorum_stable(10));
+ ASSERT_FALSE(election.election_stable());
+
+ // now reconnect them, at which point they should start running an election before exchanging scores
+ for (int i = 0; i < 5; ++i) {
+ for (int j = i+1; j < 5; ++j) {
+ election.unblock_bidirectional_messages(i, j);
+ }
+ }
+ election.run_timesteps(100);
+
+ // these will pass if the nodes managed to converge on scores, but I expect failure
+ ASSERT_TRUE(election.quorum_stable(5));
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+}
+
+void handles_removing_ranks(ElectionLogic::election_strategy strategy)
+{
+ ceph_assert(strategy == ElectionLogic::CONNECTIVITY);
+ for (int deletee = 0; deletee < 5; ++deletee) {
+ Election election(5, strategy);
+ election.start_all();
+ int steps = election.run_timesteps(0);
+ ldout(g_ceph_context, 10) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ election.remove_elector(deletee);
+ ldout(g_ceph_context, 1) << "removed rank " << deletee << " from set" << dendl;
+ election.start_all();
+ steps = election.run_timesteps(0);
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ }
+ {
+ Election election(7, strategy);
+ for (int i = 0; i < (7 - 3); ++i) {
+ election.start_all();
+ election.remove_elector(0);
+ int steps = election.run_timesteps(0);
+ ldout(g_ceph_context, 1) << "ran in " << steps << " timesteps" << dendl;
+ ASSERT_TRUE(election.election_stable());
+ ASSERT_TRUE(election.quorum_stable(6)); // double the timer_steps we use
+ ASSERT_TRUE(election.all_agree_on_leader());
+ ASSERT_TRUE(election.check_epoch_agreement());
+ }
+ }
+}
+
+// TODO: write a test with more complicated connectivity graphs and make sure
+// they are stable with multiple disconnected ranks pinging peons
+
+// TODO: Write a test that disallowing and disconnecting 0 is otherwise stable?
+
+// TODO: figure out how to test for bumping election epochs with changing scores,
+// a la what happened in run
+// http://pulpito.ceph.com/gregf-2019-11-26_10:50:50-rados:monthrash-wip-elector-distro-basic-mira/
+
+#define test_classic(utest) TEST(classic, utest) { utest(ElectionLogic::CLASSIC); }
+
+#define test_disallowed(utest) TEST(disallowed, utest) { utest(ElectionLogic::DISALLOW); }
+
+#define test_connectivity(utest) TEST(connectivity, utest) { utest(ElectionLogic::CONNECTIVITY); }
+
+
+// TODO: test for expected failures; gtest probably supports that?
+test_classic(single_startup_election_completes)
+test_classic(everybody_starts_completes)
+test_classic(blocked_connection_continues_election)
+test_classic(converges_after_flapping)
+
+test_disallowed(single_startup_election_completes)
+test_disallowed(everybody_starts_completes)
+test_disallowed(blocked_connection_continues_election)
+test_disallowed(disallowed_doesnt_win)
+test_disallowed(converges_after_flapping)
+
+/* skip single_startup_election_completes because we crash
+ on init conditions. That's fine since as noted above it's not
+ quite following the rules anyway. */
+test_connectivity(everybody_starts_completes)
+test_connectivity(blocked_connection_converges_election)
+test_connectivity(disallowed_doesnt_win)
+test_connectivity(converges_after_flapping)
+test_connectivity(converges_while_flapping)
+test_connectivity(netsplit_with_disallowed_tiebreaker_converges)
+test_connectivity(handles_singly_connected_peon)
+test_connectivity(handles_disagreeing_connectivity)
+test_connectivity(handles_outdated_scoring)
+test_connectivity(handles_removing_ranks)
diff --git a/src/test/mon/test_log_rss_usage.cc b/src/test/mon/test_log_rss_usage.cc
new file mode 100644
index 000000000..f6e85f414
--- /dev/null
+++ b/src/test/mon/test_log_rss_usage.cc
@@ -0,0 +1,101 @@
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
+#include <vector>
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+using namespace std;
+
+int getPidByName(string procName)
+{
+ int pid = -1;
+
+ // Open the /proc directory
+ DIR *dp = opendir("/proc");
+ if (dp != NULL)
+ {
+ // Enumerate all entries in '/proc' until process is found
+ struct dirent *dirp;
+ while (pid < 0 && (dirp = readdir(dp)))
+ {
+ // Skip non-numeric entries
+ int id = atoi(dirp->d_name);
+ if (id > 0)
+ {
+ // Read contents of virtual /proc/{pid}/cmdline file
+ string cmdPath = string("/proc/") + dirp->d_name + "/cmdline";
+ ifstream cmdFile(cmdPath.c_str());
+ string cmdLine;
+ getline(cmdFile, cmdLine);
+ if (!cmdLine.empty())
+ {
+ // Keep first cmdline item which contains the program path
+ size_t pos = cmdLine.find('\0');
+ if (pos != string::npos) {
+ cmdLine = cmdLine.substr(0, pos);
+ }
+ // Get program name only, removing the path
+ pos = cmdLine.rfind('/');
+ if (pos != string::npos) {
+ cmdLine = cmdLine.substr(pos + 1);
+ }
+ // Compare against requested process name
+ if (procName == cmdLine) {
+ pid = id;
+ }
+ }
+ }
+ }
+ }
+
+ closedir(dp);
+
+ return pid;
+}
+
+uint64_t getRssUsage(string pid)
+{
+ int totalSize = 0;
+ int resSize = 0;
+
+ string statmPath = string("/proc/") + pid + "/statm";
+ ifstream buffer(statmPath);
+ buffer >> totalSize >> resSize;
+ buffer.close();
+
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ uint64_t rss = resSize * page_size;
+
+ return rss;
+}
+
+int main(int argc, char* argv[])
+{
+ if (argc != 2) {
+ cout << "Syntax: "
+ << "ceph_test_log_rss_usage <process name>"
+ << endl;
+ exit(EINVAL);
+ }
+ uint64_t rss = 0;
+ int pid = getPidByName(argv[1]);
+ string rssUsage;
+
+ // Use the pid to get RSS memory usage
+ // and print it to stdout
+ if (pid != -1) {
+ rss = getRssUsage(to_string(pid));
+ } else {
+ cout << "Process " << argv[1] << " NOT FOUND!\n" << endl;
+ exit(ESRCH);
+ }
+
+ rssUsage = to_string(rss) + ":" + to_string(pid) + ":";
+ cout << rssUsage.c_str() << endl;
+ return 0;
+}
diff --git a/src/test/mon/test_mon_memory_target.cc b/src/test/mon/test_mon_memory_target.cc
new file mode 100644
index 000000000..e8f975b47
--- /dev/null
+++ b/src/test/mon/test_mon_memory_target.cc
@@ -0,0 +1,79 @@
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <string>
+#include <numeric>
+#include <regex>
+#include <system_error>
+
+#include <boost/process.hpp>
+#include <boost/tokenizer.hpp>
+
+namespace bp = boost::process;
+using namespace std;
+
+int main(int argc, char** argv)
+{
+ cout << "Mon Memory Target Test" << endl;
+
+ if (argc != 2) {
+ cout << "Syntax: "
+ << "ceph_test_mon_memory_target <mon-memory-target-bytes>"
+ << endl;
+ exit(EINVAL);
+ }
+
+ string target_directory("/var/log/ceph/");
+ unsigned long maxallowed = stoul(argv[1], nullptr, 10);
+ regex reg(R"(cache_size:(\d*)\s)");
+
+ string grep_command("grep _set_new_cache_sizes " + target_directory
+ + "ceph-mon.a.log");
+ bp::ipstream is;
+ error_code ec;
+ bp::child grep(grep_command, bp::std_out > is, ec);
+ if (ec) {
+ cout << "Error grepping logs! Exiting" << endl;
+ cout << "Error: " << ec.value() << " " << ec.message() << endl;
+ exit(ec.value());
+ }
+
+ string line;
+ vector<unsigned long> results;
+ while (grep.running() && getline(is, line) && !line.empty()) {
+ smatch match;
+ if (regex_search(line, match, reg)) {
+ results.push_back(stoul(match[1].str()));
+ }
+ }
+
+ if (results.empty()) {
+ cout << "Error: No grep results found!" << endl;
+ exit(ENOENT);
+ }
+
+ auto maxe = *(max_element(results.begin(), results.end()));
+ cout << "Results for mon_memory_target:" << endl;
+ cout << "Max: " << maxe << endl;
+ cout << "Min: " << *(min_element(results.begin(), results.end())) << endl;
+ auto sum = accumulate(results.begin(), results.end(),
+ static_cast<unsigned long long>(0));
+ auto mean = sum / results.size();
+ cout << "Mean average: " << mean << endl;
+ vector<unsigned long> diff(results.size());
+ transform(results.begin(), results.end(), diff.begin(),
+ [mean](unsigned long x) { return x - mean; });
+ auto sump = inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
+ auto stdev = sqrt(sump / results.size());
+ cout << "Standard deviation: " << stdev << endl;
+
+ if (maxe > maxallowed) {
+ cout << "Error: Mon memory consumption exceeds maximum allowed!" << endl;
+ exit(ENOMEM);
+ }
+
+ grep.wait();
+
+ cout << "Completed successfully" << endl;
+ return 0;
+}
diff --git a/src/test/mon/test_mon_rss_usage.cc b/src/test/mon/test_mon_rss_usage.cc
new file mode 100644
index 000000000..76b5856f6
--- /dev/null
+++ b/src/test/mon/test_mon_rss_usage.cc
@@ -0,0 +1,72 @@
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <numeric>
+#include <regex>
+#include <cmath>
+#include <system_error>
+
+using namespace std;
+
+int main(int argc, char **argv)
+{
+ cout << "Mon RSS Usage Test" << endl;
+
+ if (argc != 2) {
+ cout << "Syntax: "
+ << "ceph_test_mon_rss_usage <mon-memory-target-bytes>"
+ << endl;
+ exit(EINVAL);
+ }
+
+ unsigned long maxallowed = stoul(argv[1], nullptr, 10);
+ // Set max allowed RSS usage to be 125% of mon-memory-target
+ maxallowed *= 1.25;
+
+ string target_directory("/var/log/ceph/");
+ string filePath = target_directory + "ceph-mon-rss-usage.log";
+ ifstream buffer(filePath.c_str());
+ string line;
+ vector<unsigned long> results;
+ while(getline(buffer, line) && !line.empty()) {
+ string rssUsage;
+ size_t pos = line.find(':');
+ if (pos != string::npos) {
+ rssUsage = line.substr(0, pos);
+ }
+ if (!rssUsage.empty()) {
+ results.push_back(stoul(rssUsage));
+ }
+ }
+
+ buffer.close();
+ if (results.empty()) {
+ cout << "Error: No grep results found!" << endl;
+ exit(ENOENT);
+ }
+
+ auto maxe = *(max_element(results.begin(), results.end()));
+ cout << "Stats for mon RSS Memory Usage:" << endl;
+ cout << "Parsed " << results.size() << " entries." << endl;
+ cout << "Max: " << maxe << endl;
+ cout << "Min: " << *(min_element(results.begin(), results.end())) << endl;
+ auto sum = accumulate(results.begin(), results.end(),
+ static_cast<unsigned long long>(0));
+ auto mean = sum / results.size();
+ cout << "Mean average: " << mean << endl;
+ vector<unsigned long> diff(results.size());
+ transform(results.begin(), results.end(), diff.begin(),
+ [mean](unsigned long x) { return x - mean; });
+ auto sump = inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
+ auto stdev = sqrt(sump / results.size());
+ cout << fixed << "Standard deviation: " << stdev << endl;
+
+ if (maxe > maxallowed) {
+ cout << "Error: Mon RSS memory usage exceeds maximum allowed!" << endl;
+ exit(ENOMEM);
+ }
+
+ cout << "Completed successfully" << endl;
+ return 0;
+}
diff --git a/src/test/mon/test_mon_types.cc b/src/test/mon/test_mon_types.cc
new file mode 100644
index 000000000..e9997f14f
--- /dev/null
+++ b/src/test/mon/test_mon_types.cc
@@ -0,0 +1,140 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2012 Inktank
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+#include <iostream>
+#include "mon/mon_types.h"
+
+#include "gtest/gtest.h"
+
+TEST(mon_features, supported_v_persistent) {
+
+ mon_feature_t supported = ceph::features::mon::get_supported();
+ mon_feature_t persistent = ceph::features::mon::get_persistent();
+
+ ASSERT_EQ(supported.intersection(persistent), persistent);
+ ASSERT_TRUE(supported.contains_all(persistent));
+
+ mon_feature_t diff = supported.diff(persistent);
+ ASSERT_TRUE((persistent | diff) == supported);
+ ASSERT_TRUE((supported & persistent) == persistent);
+}
+
+TEST(mon_features, binary_ops) {
+
+ mon_feature_t FEATURE_NONE(0ULL);
+ mon_feature_t FEATURE_A((1ULL << 1));
+ mon_feature_t FEATURE_B((1ULL << 2));
+ mon_feature_t FEATURE_C((1ULL << 3));
+ mon_feature_t FEATURE_D((1ULL << 4));
+
+ mon_feature_t FEATURE_ALL(
+ FEATURE_A | FEATURE_B |
+ FEATURE_C | FEATURE_D
+ );
+
+ mon_feature_t foo(FEATURE_A|FEATURE_B);
+ mon_feature_t bar(FEATURE_C|FEATURE_D);
+
+ ASSERT_EQ(FEATURE_A|FEATURE_B, foo);
+ ASSERT_EQ(FEATURE_C|FEATURE_D, bar);
+
+ ASSERT_NE(FEATURE_C, foo);
+ ASSERT_NE(FEATURE_B, bar);
+ ASSERT_NE(FEATURE_NONE, foo);
+ ASSERT_NE(FEATURE_NONE, bar);
+
+ ASSERT_FALSE(foo.empty());
+ ASSERT_FALSE(bar.empty());
+ ASSERT_TRUE(FEATURE_NONE.empty());
+
+ ASSERT_EQ(FEATURE_ALL, (foo ^ bar));
+ ASSERT_EQ(FEATURE_NONE, (foo & bar));
+
+ mon_feature_t baz = foo;
+ ASSERT_EQ(baz, foo);
+
+ baz |= bar;
+ ASSERT_EQ(FEATURE_ALL, baz);
+ baz ^= foo;
+ ASSERT_EQ(baz, bar);
+
+ baz |= FEATURE_A;
+ ASSERT_EQ(FEATURE_C, baz & FEATURE_C);
+ ASSERT_EQ((FEATURE_A|FEATURE_D), baz & (FEATURE_A|FEATURE_D));
+ ASSERT_EQ(FEATURE_B|FEATURE_C|FEATURE_D, (baz ^ foo));
+}
+
+TEST(mon_features, set_funcs) {
+
+ mon_feature_t FEATURE_A((1ULL << 1));
+ mon_feature_t FEATURE_B((1ULL << 2));
+ mon_feature_t FEATURE_C((1ULL << 3));
+ mon_feature_t FEATURE_D((1ULL << 4));
+
+ mon_feature_t FEATURE_ALL(
+ FEATURE_A | FEATURE_B |
+ FEATURE_C | FEATURE_D
+ );
+
+ mon_feature_t foo(FEATURE_A|FEATURE_B);
+ mon_feature_t bar(FEATURE_C|FEATURE_D);
+
+ ASSERT_TRUE(FEATURE_ALL.contains_all(foo));
+ ASSERT_TRUE(FEATURE_ALL.contains_all(bar));
+ ASSERT_TRUE(FEATURE_ALL.contains_all(foo|bar));
+
+ ASSERT_EQ(foo.diff(bar), foo);
+ ASSERT_EQ(bar.diff(foo), bar);
+ ASSERT_EQ(FEATURE_ALL.diff(foo), bar);
+ ASSERT_EQ(FEATURE_ALL.diff(bar), foo);
+
+ ASSERT_TRUE(foo.contains_any(FEATURE_A|bar));
+ ASSERT_TRUE(bar.contains_any(FEATURE_ALL));
+ ASSERT_TRUE(FEATURE_ALL.contains_any(foo));
+
+ mon_feature_t FEATURE_X((1ULL << 10));
+
+ ASSERT_FALSE(FEATURE_ALL.contains_any(FEATURE_X));
+ ASSERT_FALSE(FEATURE_ALL.contains_all(FEATURE_X));
+ ASSERT_EQ(FEATURE_ALL.diff(FEATURE_X), FEATURE_ALL);
+
+ ASSERT_EQ(foo.intersection(FEATURE_ALL), foo);
+ ASSERT_EQ(bar.intersection(FEATURE_ALL), bar);
+}
+
+TEST(mon_features, set_unset) {
+
+ mon_feature_t FEATURE_A((1ULL << 1));
+ mon_feature_t FEATURE_B((1ULL << 2));
+ mon_feature_t FEATURE_C((1ULL << 3));
+
+ mon_feature_t foo;
+ ASSERT_EQ(ceph::features::mon::FEATURE_NONE, foo);
+
+ foo.set_feature(FEATURE_A);
+ ASSERT_EQ(FEATURE_A, foo);
+ ASSERT_TRUE(foo.contains_all(FEATURE_A));
+
+ foo.set_feature(FEATURE_B|FEATURE_C);
+ ASSERT_EQ((FEATURE_A|FEATURE_B|FEATURE_C), foo);
+ ASSERT_TRUE(foo.contains_all((FEATURE_A|FEATURE_B|FEATURE_C)));
+
+ foo.unset_feature(FEATURE_A);
+ ASSERT_EQ((FEATURE_B|FEATURE_C), foo);
+ ASSERT_FALSE(foo.contains_any(FEATURE_A));
+ ASSERT_TRUE(foo.contains_all((FEATURE_B|FEATURE_C)));
+
+ foo.unset_feature(FEATURE_B|FEATURE_C);
+ ASSERT_EQ(ceph::features::mon::FEATURE_NONE, foo);
+ ASSERT_FALSE(foo.contains_any(FEATURE_A|FEATURE_B|FEATURE_C));
+}
diff --git a/src/test/mon/test_mon_workloadgen.cc b/src/test/mon/test_mon_workloadgen.cc
new file mode 100644
index 000000000..147ea8bcd
--- /dev/null
+++ b/src/test/mon/test_mon_workloadgen.cc
@@ -0,0 +1,1104 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+#include "acconfig.h"
+
+#ifdef HAVE_SYS_MOUNT_H
+#include <sys/mount.h>
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef HAVE_SYS_VFS_H
+#include <sys/vfs.h>
+#endif
+
+#include <iostream>
+#include <string>
+#include <map>
+
+#include <boost/scoped_ptr.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_int.hpp>
+
+
+#include "osd/osd_types.h"
+#include "osdc/Objecter.h"
+#include "mon/MonClient.h"
+#include "msg/Dispatcher.h"
+#include "msg/Messenger.h"
+#include "common/async/context_pool.h"
+#include "common/Timer.h"
+#include "common/ceph_argparse.h"
+#include "global/global_init.h"
+#include "global/signal_handler.h"
+#include "common/config.h"
+#include "common/debug.h"
+#include "common/errno.h"
+#include "common/ceph_mutex.h"
+#include "common/strtol.h"
+#include "common/LogEntry.h"
+#include "auth/KeyRing.h"
+#include "auth/AuthAuthorizeHandler.h"
+#include "include/uuid.h"
+#include "include/ceph_assert.h"
+
+#include "messages/MOSDBoot.h"
+#include "messages/MOSDAlive.h"
+#include "messages/MOSDPGCreate.h"
+#include "messages/MOSDPGRemove.h"
+#include "messages/MOSDMap.h"
+#include "messages/MPGStats.h"
+#include "messages/MLog.h"
+#include "messages/MOSDPGTemp.h"
+
+using namespace std;
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_
+#undef dout_prefix
+#define dout_prefix _prefix(_dout, get_name())
+static ostream& _prefix(std::ostream *_dout, const string &n) {
+ return *_dout << " stub(" << n << ") ";
+}
+
+
+typedef boost::mt11213b rngen_t;
+typedef boost::scoped_ptr<Messenger> MessengerRef;
+typedef boost::scoped_ptr<Objecter> ObjecterRef;
+
+class TestStub : public Dispatcher
+{
+ protected:
+ MessengerRef messenger;
+ ceph::async::io_context_pool poolctx;
+ MonClient monc;
+
+ ceph::mutex lock;
+ ceph::condition_variable cond;
+ SafeTimer timer;
+
+ bool do_shutdown;
+ double tick_seconds;
+
+ struct C_Tick : public Context {
+ TestStub *s;
+ explicit C_Tick(TestStub *stub) : s(stub) {}
+ void finish(int r) override {
+ generic_dout(20) << "C_Tick::" << __func__ << dendl;
+ if (r == -ECANCELED) {
+ generic_dout(20) << "C_Tick::" << __func__
+ << " shutdown" << dendl;
+ return;
+ }
+ s->tick();
+ }
+ };
+
+ bool ms_dispatch(Message *m) override = 0;
+ void ms_handle_connect(Connection *con) override = 0;
+ void ms_handle_remote_reset(Connection *con) override = 0;
+ virtual int _shutdown() = 0;
+ // courtesy method to be implemented by the stubs at their
+ // own discretion
+ virtual void _tick() { }
+ // different stubs may have different needs; if a stub needs
+ // to tick, then it must call this function.
+ void start_ticking(double t=1.0) {
+ tick_seconds = t;
+ if (t <= 0) {
+ stop_ticking();
+ return;
+ }
+ dout(20) << __func__ << " adding tick timer" << dendl;
+ timer.add_event_after(tick_seconds, new C_Tick(this));
+ }
+ // If we have a function to start ticking that the stubs can
+ // use at their own discretion, then we should also have a
+ // function to disable said ticking to be used the same way.
+ // Just in case.
+ // For simplicity's sake, we don't cancel the tick right off
+ // the bat; instead, we wait for the next tick to kick in and
+ // disable itself.
+ void stop_ticking() {
+ dout(20) << __func__ << " disable tick" << dendl;
+ tick_seconds = 0;
+ }
+
+ public:
+ void tick() {
+ std::cout << __func__ << std::endl;
+ if (do_shutdown || (tick_seconds <= 0)) {
+ std::cout << __func__ << " "
+ << (do_shutdown ? "shutdown" : "stop ticking")
+ << std::endl;
+ return;
+ }
+ _tick();
+ timer.add_event_after(tick_seconds, new C_Tick(this));
+ }
+
+ virtual const string get_name() = 0;
+ virtual int init() = 0;
+
+ virtual int shutdown() {
+ std::lock_guard l{lock};
+ do_shutdown = true;
+ int r = _shutdown();
+ if (r < 0) {
+ dout(10) << __func__ << " error shutting down: "
+ << cpp_strerror(-r) << dendl;
+ return r;
+ }
+ monc.shutdown();
+ timer.shutdown();
+ messenger->shutdown();
+ poolctx.finish();
+ return 0;
+ }
+
+ virtual void print(ostream &out) {
+ out << "stub(" << get_name() << ")";
+ }
+
+ void wait() {
+ if (messenger != NULL)
+ messenger->wait();
+ }
+
+ TestStub(CephContext *cct, string who)
+ : Dispatcher(cct),
+ monc(cct, poolctx),
+ lock(ceph::make_mutex(who.append("::lock"))),
+ timer(cct, lock),
+ do_shutdown(false),
+ tick_seconds(0.0) { }
+};
+
+class ClientStub : public TestStub
+{
+ ObjecterRef objecter;
+ rngen_t gen;
+
+ protected:
+ bool ms_dispatch(Message *m) override {
+ std::lock_guard l{lock};
+ dout(1) << "client::" << __func__ << " " << *m << dendl;
+ switch (m->get_type()) {
+ case CEPH_MSG_OSD_MAP:
+ objecter->handle_osd_map((MOSDMap*)m);
+ cond.notify_all();
+ break;
+ }
+ return true;
+ }
+
+ void ms_handle_connect(Connection *con) override {
+ dout(1) << "client::" << __func__ << " " << con << dendl;
+ std::lock_guard l{lock};
+ objecter->ms_handle_connect(con);
+ }
+
+ void ms_handle_remote_reset(Connection *con) override {
+ dout(1) << "client::" << __func__ << " " << con << dendl;
+ std::lock_guard l{lock};
+ objecter->ms_handle_remote_reset(con);
+ }
+
+ bool ms_handle_reset(Connection *con) override {
+ dout(1) << "client::" << __func__ << dendl;
+ std::lock_guard l{lock};
+ objecter->ms_handle_reset(con);
+ return false;
+ }
+
+ bool ms_handle_refused(Connection *con) override {
+ return false;
+ }
+
+ const string get_name() override {
+ return "client";
+ }
+
+ int _shutdown() override {
+ if (objecter) {
+ objecter->shutdown();
+ }
+ return 0;
+ }
+
+ public:
+ explicit ClientStub(CephContext *cct)
+ : TestStub(cct, "client"),
+ gen((int) time(NULL))
+ { }
+
+ int init() override {
+ int err;
+ poolctx.start(1);
+ err = monc.build_initial_monmap();
+ if (err < 0) {
+ derr << "ClientStub::" << __func__ << " ERROR: build initial monmap: "
+ << cpp_strerror(err) << dendl;
+ return err;
+ }
+
+ messenger.reset(Messenger::create_client_messenger(cct, "stubclient"));
+ ceph_assert(messenger.get() != NULL);
+
+ messenger->set_default_policy(
+ Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX));
+ dout(10) << "ClientStub::" << __func__ << " starting messenger at "
+ << messenger->get_myaddrs() << dendl;
+
+ objecter.reset(new Objecter(cct, messenger.get(), &monc, poolctx));
+ ceph_assert(objecter.get() != NULL);
+ objecter->set_balanced_budget();
+
+ monc.set_messenger(messenger.get());
+ objecter->init();
+ messenger->add_dispatcher_head(this);
+ messenger->start();
+ monc.set_want_keys(CEPH_ENTITY_TYPE_MON|CEPH_ENTITY_TYPE_OSD);
+
+ err = monc.init();
+ if (err < 0) {
+ derr << "ClientStub::" << __func__ << " monc init error: "
+ << cpp_strerror(-err) << dendl;
+ return err;
+ }
+
+ err = monc.authenticate();
+ if (err < 0) {
+ derr << "ClientStub::" << __func__ << " monc authenticate error: "
+ << cpp_strerror(-err) << dendl;
+ monc.shutdown();
+ return err;
+ }
+ monc.wait_auth_rotating(30.0);
+
+ objecter->set_client_incarnation(0);
+ objecter->start();
+
+ lock.lock();
+ timer.init();
+ monc.renew_subs();
+
+ lock.unlock();
+
+ objecter->wait_for_osd_map();
+
+ dout(10) << "ClientStub::" << __func__ << " done" << dendl;
+ return 0;
+ }
+};
+
+class OSDStub : public TestStub
+{
+ int whoami;
+ OSDSuperblock sb;
+ OSDMap osdmap;
+ osd_stat_t osd_stat;
+
+ map<pg_t,pg_stat_t> pgs;
+ set<pg_t> pgs_changes;
+
+ rngen_t gen;
+ boost::uniform_int<> mon_osd_rng;
+
+ utime_t last_boot_attempt;
+ static const double STUB_BOOT_INTERVAL;
+
+
+ public:
+
+ enum {
+ STUB_MON_OSD_ALIVE = 1,
+ STUB_MON_OSD_PGTEMP = 2,
+ STUB_MON_OSD_FAILURE = 3,
+ STUB_MON_OSD_PGSTATS = 4,
+ STUB_MON_LOG = 5,
+
+ STUB_MON_OSD_FIRST = STUB_MON_OSD_ALIVE,
+ STUB_MON_OSD_LAST = STUB_MON_LOG,
+ };
+
+ struct C_CreatePGs : public Context {
+ OSDStub *s;
+ explicit C_CreatePGs(OSDStub *stub) : s(stub) {}
+ void finish(int r) override {
+ if (r == -ECANCELED) {
+ generic_dout(20) << "C_CreatePGs::" << __func__
+ << " shutdown" << dendl;
+ return;
+ }
+ generic_dout(20) << "C_CreatePGs::" << __func__ << dendl;
+ s->auto_create_pgs();
+ }
+ };
+
+
+ OSDStub(int _whoami, CephContext *cct)
+ : TestStub(cct, "osd"),
+ whoami(_whoami),
+ gen(whoami),
+ mon_osd_rng(STUB_MON_OSD_FIRST, STUB_MON_OSD_LAST)
+ {
+ dout(20) << __func__ << " auth supported: "
+ << cct->_conf->auth_supported << dendl;
+ stringstream ss;
+ ss << "client-osd" << whoami;
+ std::string public_msgr_type = cct->_conf->ms_public_type.empty() ? cct->_conf.get_val<std::string>("ms_type") : cct->_conf->ms_public_type;
+ messenger.reset(Messenger::create(cct, public_msgr_type, entity_name_t::OSD(whoami),
+ ss.str().c_str(), getpid()));
+
+ Throttle throttler(g_ceph_context, "osd_client_bytes",
+ g_conf()->osd_client_message_size_cap);
+
+ messenger->set_default_policy(
+ Messenger::Policy::stateless_server(0));
+ messenger->set_policy_throttlers(entity_name_t::TYPE_CLIENT,
+ &throttler, NULL);
+ messenger->set_policy(entity_name_t::TYPE_MON,
+ Messenger::Policy::lossy_client(
+ CEPH_FEATURE_UID |
+ CEPH_FEATURE_PGID64 |
+ CEPH_FEATURE_OSDENC));
+ messenger->set_policy(entity_name_t::TYPE_OSD,
+ Messenger::Policy::stateless_server(0));
+
+ dout(10) << __func__ << " public addr " << g_conf()->public_addr << dendl;
+ int err = messenger->bind(g_conf()->public_addr);
+ if (err < 0)
+ exit(1);
+
+ if (monc.build_initial_monmap() < 0)
+ exit(1);
+
+ messenger->start();
+ monc.set_messenger(messenger.get());
+ }
+
+ int init() override {
+ dout(10) << __func__ << dendl;
+ std::lock_guard l{lock};
+
+ dout(1) << __func__ << " fsid " << monc.monmap.fsid
+ << " osd_fsid " << g_conf()->osd_uuid << dendl;
+ dout(1) << __func__ << " name " << g_conf()->name << dendl;
+
+ timer.init();
+ messenger->add_dispatcher_head(this);
+ monc.set_want_keys(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD);
+
+ int err = monc.init();
+ if (err < 0) {
+ derr << __func__ << " monc init error: "
+ << cpp_strerror(-err) << dendl;
+ return err;
+ }
+
+ err = monc.authenticate();
+ if (err < 0) {
+ derr << __func__ << " monc authenticate error: "
+ << cpp_strerror(-err) << dendl;
+ monc.shutdown();
+ return err;
+ }
+ ceph_assert(!monc.get_fsid().is_zero());
+
+ monc.wait_auth_rotating(30.0);
+
+
+ dout(10) << __func__ << " creating osd superblock" << dendl;
+ sb.cluster_fsid = monc.monmap.fsid;
+ sb.osd_fsid.generate_random();
+ sb.whoami = whoami;
+ sb.compat_features = CompatSet();
+ dout(20) << __func__ << " " << sb << dendl;
+ dout(20) << __func__ << " osdmap " << osdmap << dendl;
+
+ update_osd_stat();
+
+ start_ticking();
+ // give a chance to the mons to inform us of what PGs we should create
+ timer.add_event_after(30.0, new C_CreatePGs(this));
+
+ return 0;
+ }
+
+ int _shutdown() override {
+
+ return 0;
+ }
+
+ void boot() {
+ dout(1) << __func__ << " boot?" << dendl;
+
+ utime_t now = ceph_clock_now();
+ if ((last_boot_attempt > 0.0)
+ && ((now - last_boot_attempt)) <= STUB_BOOT_INTERVAL) {
+ dout(1) << __func__ << " backoff and try again later." << dendl;
+ return;
+ }
+
+ dout(1) << __func__ << " boot!" << dendl;
+ MOSDBoot *mboot = new MOSDBoot;
+ mboot->sb = sb;
+ last_boot_attempt = now;
+ monc.send_mon_message(mboot);
+ }
+
+ void add_pg(pg_t pgid, epoch_t epoch, pg_t parent) {
+
+ utime_t now = ceph_clock_now();
+
+ pg_stat_t s;
+ s.created = epoch;
+ s.last_epoch_clean = epoch;
+ s.parent = parent;
+ s.state |= PG_STATE_CLEAN | PG_STATE_ACTIVE;
+ s.last_fresh = now;
+ s.last_change = now;
+ s.last_clean = now;
+ s.last_active = now;
+ s.last_unstale = now;
+
+ pgs[pgid] = s;
+ pgs_changes.insert(pgid);
+ }
+
+ void auto_create_pgs() {
+ bool has_pgs = !pgs.empty();
+ dout(10) << __func__
+ << ": " << (has_pgs ? "has pgs; ignore" : "create pgs") << dendl;
+ if (has_pgs)
+ return;
+
+ if (!osdmap.get_epoch()) {
+ dout(1) << __func__
+ << " still don't have osdmap; reschedule pg creation" << dendl;
+ timer.add_event_after(10.0, new C_CreatePGs(this));
+ return;
+ }
+
+ auto& osdmap_pools = osdmap.get_pools();
+ for (auto pit = osdmap_pools.begin(); pit != osdmap_pools.end(); ++pit) {
+ const int64_t pool_id = pit->first;
+ const pg_pool_t &pool = pit->second;
+ int ruleno = pool.get_crush_rule();
+
+ if (!osdmap.crush->rule_exists(ruleno)) {
+ dout(20) << __func__
+ << " no crush rule for pool id " << pool_id
+ << " rule no " << ruleno << dendl;
+ continue;
+ }
+
+ epoch_t pool_epoch = pool.get_last_change();
+ dout(20) << __func__
+ << " pool num pgs " << pool.get_pg_num()
+ << " epoch " << pool_epoch << dendl;
+
+ for (ps_t ps = 0; ps < pool.get_pg_num(); ++ps) {
+ pg_t pgid(ps, pool_id);
+ pg_t parent;
+ dout(20) << __func__
+ << " pgid " << pgid << " parent " << parent << dendl;
+ add_pg(pgid, pool_epoch, parent);
+ }
+ }
+ }
+
+ void update_osd_stat() {
+ struct statfs stbuf;
+ int ret = statfs(".", &stbuf);
+ if (ret < 0) {
+ ret = -errno;
+ dout(0) << __func__
+ << " cannot statfs ." << cpp_strerror(ret) << dendl;
+ return;
+ }
+
+ osd_stat.statfs.total = stbuf.f_blocks * stbuf.f_bsize;
+ osd_stat.statfs.available = stbuf.f_bavail * stbuf.f_bsize;
+ osd_stat.statfs.internally_reserved = 0;
+ }
+
+ void send_pg_stats() {
+ dout(10) << __func__
+ << " pgs " << pgs.size() << " osdmap " << osdmap << dendl;
+ MPGStats *mstats = new MPGStats(monc.get_fsid(), osdmap.get_epoch());
+
+ mstats->set_tid(1);
+ mstats->osd_stat = osd_stat;
+
+ set<pg_t>::iterator it;
+ for (it = pgs_changes.begin(); it != pgs_changes.end(); ++it) {
+ pg_t pgid = (*it);
+ if (pgs.count(pgid) == 0) {
+ derr << __func__
+ << " pgid " << pgid << " not on our map" << dendl;
+ ceph_abort_msg("pgid not on our map");
+ }
+ pg_stat_t &s = pgs[pgid];
+ mstats->pg_stat[pgid] = s;
+
+ JSONFormatter f(true);
+ s.dump(&f);
+ dout(20) << __func__
+ << " pg " << pgid << " stats:\n";
+ f.flush(*_dout);
+ *_dout << dendl;
+
+ }
+ dout(10) << __func__ << " send " << *mstats << dendl;
+ monc.send_mon_message(mstats);
+ }
+
+ void modify_pg(pg_t pgid) {
+ dout(10) << __func__ << " pg " << pgid << dendl;
+ ceph_assert(pgs.count(pgid) > 0);
+
+ pg_stat_t &s = pgs[pgid];
+ utime_t now = ceph_clock_now();
+
+ if (now - s.last_change < 10.0) {
+ dout(10) << __func__
+ << " pg " << pgid << " changed in the last 10s" << dendl;
+ return;
+ }
+
+ s.state ^= PG_STATE_CLEAN;
+ if (s.state & PG_STATE_CLEAN)
+ s.last_clean = now;
+ s.last_change = now;
+ s.reported_seq++;
+
+ pgs_changes.insert(pgid);
+ }
+
+ void modify_pgs() {
+ dout(10) << __func__ << dendl;
+
+ if (pgs.empty()) {
+ dout(1) << __func__
+ << " no pgs available! don't attempt to modify." << dendl;
+ return;
+ }
+
+ boost::uniform_int<> pg_rng(0, pgs.size()-1);
+ set<int> pgs_pos;
+
+ int num_pgs = pg_rng(gen);
+ while ((int)pgs_pos.size() < num_pgs)
+ pgs_pos.insert(pg_rng(gen));
+
+ map<pg_t,pg_stat_t>::iterator it = pgs.begin();
+ set<int>::iterator pos_it = pgs_pos.begin();
+
+ int pgs_at = 0;
+ while (pos_it != pgs_pos.end()) {
+ int at = *pos_it;
+ dout(20) << __func__ << " pg at pos " << at << dendl;
+ while ((pgs_at != at) && (it != pgs.end())) {
+ ++it;
+ ++pgs_at;
+ }
+ ceph_assert(it != pgs.end());
+ dout(20) << __func__
+ << " pg at pos " << at << ": " << it->first << dendl;
+ modify_pg(it->first);
+ ++pos_it;
+ }
+ }
+
+ void op_alive() {
+ dout(10) << __func__ << dendl;
+ if (!osdmap.exists(whoami)) {
+ dout(0) << __func__ << " I'm not in the osdmap!!\n";
+ JSONFormatter f(true);
+ osdmap.dump(&f);
+ f.flush(*_dout);
+ *_dout << dendl;
+ }
+ if (osdmap.get_epoch() == 0) {
+ dout(1) << __func__ << " wait for osdmap" << dendl;
+ return;
+ }
+ epoch_t up_thru = osdmap.get_up_thru(whoami);
+ dout(10) << __func__ << "up_thru: " << osdmap.get_up_thru(whoami) << dendl;
+
+ monc.send_mon_message(new MOSDAlive(osdmap.get_epoch(), up_thru));
+ }
+
+ void op_pgtemp() {
+ if (osdmap.get_epoch() == 0) {
+ dout(1) << __func__ << " wait for osdmap" << dendl;
+ return;
+ }
+ dout(10) << __func__ << dendl;
+ MOSDPGTemp *m = new MOSDPGTemp(osdmap.get_epoch());
+ monc.send_mon_message(m);
+ }
+
+ void op_failure() {
+ dout(10) << __func__ << dendl;
+ }
+
+ void op_pgstats() {
+ dout(10) << __func__ << dendl;
+
+ modify_pgs();
+ if (!pgs_changes.empty())
+ send_pg_stats();
+ monc.sub_want("osd_pg_creates", 0, CEPH_SUBSCRIBE_ONETIME);
+ monc.renew_subs();
+
+ dout(20) << __func__ << " pg pools:\n";
+
+ JSONFormatter f(true);
+ f.open_array_section("pools");
+ auto& osdmap_pools = osdmap.get_pools();
+ for (auto pit = osdmap_pools.begin(); pit != osdmap_pools.end(); ++pit) {
+ const int64_t pool_id = pit->first;
+ const pg_pool_t &pool = pit->second;
+ f.open_object_section("pool");
+ f.dump_int("pool_id", pool_id);
+ f.open_object_section("pool_dump");
+ pool.dump(&f);
+ f.close_section();
+ f.close_section();
+ }
+ f.close_section();
+ f.flush(*_dout);
+ *_dout << dendl;
+ }
+
+ void op_log() {
+ dout(10) << __func__ << dendl;
+
+ MLog *m = new MLog(monc.get_fsid());
+
+ boost::uniform_int<> log_rng(1, 10);
+ size_t num_entries = log_rng(gen);
+ dout(10) << __func__
+ << " send " << num_entries << " log messages" << dendl;
+
+ utime_t now = ceph_clock_now();
+ int seq = 0;
+ for (; num_entries > 0; --num_entries) {
+ LogEntry e;
+ e.rank = messenger->get_myname();
+ e.addrs = messenger->get_myaddrs();
+ e.stamp = now;
+ e.seq = seq++;
+ e.prio = CLOG_DEBUG;
+ e.msg = "OSDStub::op_log";
+ m->entries.push_back(e);
+ }
+
+ monc.send_mon_message(m);
+ }
+
+ void _tick() override {
+ if (!osdmap.exists(whoami)) {
+ std::cout << __func__ << " not in the cluster; boot!" << std::endl;
+ boot();
+ return;
+ }
+
+ update_osd_stat();
+
+ boost::uniform_int<> op_rng(STUB_MON_OSD_FIRST, STUB_MON_OSD_LAST);
+ int op = op_rng(gen);
+ switch (op) {
+ case STUB_MON_OSD_ALIVE:
+ op_alive();
+ break;
+ case STUB_MON_OSD_PGTEMP:
+ op_pgtemp();
+ break;
+ case STUB_MON_OSD_FAILURE:
+ op_failure();
+ break;
+ case STUB_MON_OSD_PGSTATS:
+ op_pgstats();
+ break;
+ case STUB_MON_LOG:
+ op_log();
+ break;
+ }
+ }
+
+ void handle_pg_create(MOSDPGCreate *m) {
+ ceph_assert(m != NULL);
+ if (m->epoch < osdmap.get_epoch()) {
+ std::cout << __func__ << " epoch " << m->epoch << " < "
+ << osdmap.get_epoch() << "; dropping" << std::endl;
+ m->put();
+ return;
+ }
+
+ for (map<pg_t,pg_create_t>::iterator it = m->mkpg.begin();
+ it != m->mkpg.end(); ++it) {
+ pg_create_t &c = it->second;
+ std::cout << __func__ << " pg " << it->first
+ << " created " << c.created
+ << " parent " << c.parent << std::endl;
+ if (pgs.count(it->first)) {
+ std::cout << __func__ << " pg " << it->first
+ << " exists; skipping" << std::endl;
+ continue;
+ }
+
+ pg_t pgid = it->first;
+ add_pg(pgid, c.created, c.parent);
+ }
+ send_pg_stats();
+ }
+
+ void handle_osd_map(MOSDMap *m) {
+ dout(1) << __func__ << dendl;
+ if (m->fsid != monc.get_fsid()) {
+ dout(0) << __func__
+ << " message fsid " << m->fsid << " != " << monc.get_fsid()
+ << dendl;
+ dout(0) << __func__ << " " << m
+ << " from " << m->get_source_inst()
+ << dendl;
+ dout(0) << monc.get_monmap() << dendl;
+ }
+ ceph_assert(m->fsid == monc.get_fsid());
+
+ epoch_t first = m->get_first();
+ epoch_t last = m->get_last();
+ dout(5) << __func__
+ << " epochs [" << first << "," << last << "]"
+ << " current " << osdmap.get_epoch() << dendl;
+
+ if (last <= osdmap.get_epoch()) {
+ dout(5) << __func__ << " no new maps here; dropping" << dendl;
+ m->put();
+ return;
+ }
+
+ if (first > osdmap.get_epoch() + 1) {
+ dout(5) << __func__
+ << osdmap.get_epoch() + 1 << ".." << (first-1) << dendl;
+ if ((m->oldest_map < first && osdmap.get_epoch() == 0) ||
+ m->oldest_map <= osdmap.get_epoch()) {
+ monc.sub_want("osdmap", osdmap.get_epoch()+1,
+ CEPH_SUBSCRIBE_ONETIME);
+ monc.renew_subs();
+ m->put();
+ return;
+ }
+ }
+
+ epoch_t start_full = std::max(osdmap.get_epoch() + 1, first);
+
+ if (m->maps.size() > 0) {
+ map<epoch_t,bufferlist>::reverse_iterator rit;
+ rit = m->maps.rbegin();
+ if (start_full <= rit->first) {
+ start_full = rit->first;
+ dout(5) << __func__
+ << " full epoch " << start_full << dendl;
+ bufferlist &bl = rit->second;
+ auto p = bl.cbegin();
+ osdmap.decode(p);
+ }
+ }
+
+ for (epoch_t e = start_full; e <= last; e++) {
+ map<epoch_t,bufferlist>::iterator it;
+ it = m->incremental_maps.find(e);
+ if (it == m->incremental_maps.end())
+ continue;
+
+ dout(20) << __func__
+ << " incremental epoch " << e
+ << " on full epoch " << start_full << dendl;
+ OSDMap::Incremental inc;
+ bufferlist &bl = it->second;
+ auto p = bl.cbegin();
+ inc.decode(p);
+
+ int err = osdmap.apply_incremental(inc);
+ if (err < 0) {
+ derr << "osd." << whoami << "::" << __func__
+ << "** ERROR: applying incremental: "
+ << cpp_strerror(err) << dendl;
+ ceph_abort_msg("error applying incremental");
+ }
+ }
+ dout(30) << __func__ << "\nosdmap:\n";
+ JSONFormatter f(true);
+ osdmap.dump(&f);
+ f.flush(*_dout);
+ *_dout << dendl;
+
+ if (osdmap.is_up(whoami) &&
+ osdmap.get_addrs(whoami) == messenger->get_myaddrs()) {
+ dout(1) << __func__
+ << " got into the osdmap and we're up!" << dendl;
+ }
+
+ if (m->newest_map && m->newest_map > last) {
+ dout(1) << __func__
+ << " they have more maps; requesting them!" << dendl;
+ monc.sub_want("osdmap", osdmap.get_epoch()+1, CEPH_SUBSCRIBE_ONETIME);
+ monc.renew_subs();
+ }
+
+ dout(10) << __func__ << " done" << dendl;
+ m->put();
+ }
+
+ bool ms_dispatch(Message *m) override {
+ dout(1) << __func__ << " " << *m << dendl;
+
+ switch (m->get_type()) {
+ case MSG_OSD_PG_CREATE:
+ handle_pg_create((MOSDPGCreate*)m);
+ break;
+ case CEPH_MSG_OSD_MAP:
+ handle_osd_map((MOSDMap*)m);
+ break;
+ default:
+ m->put();
+ break;
+ }
+ return true;
+ }
+
+ void ms_handle_connect(Connection *con) override {
+ dout(1) << __func__ << " " << con << dendl;
+ if (con->get_peer_type() == CEPH_ENTITY_TYPE_MON) {
+ dout(10) << __func__ << " on mon" << dendl;
+ }
+ }
+
+ void ms_handle_remote_reset(Connection *con) override {}
+
+ bool ms_handle_reset(Connection *con) override {
+ dout(1) << __func__ << dendl;
+ return con->get_priv().get();
+ }
+
+ bool ms_handle_refused(Connection *con) override {
+ return false;
+ }
+
+ const string get_name() override {
+ stringstream ss;
+ ss << "osd." << whoami;
+ return ss.str();
+ }
+};
+
+double const OSDStub::STUB_BOOT_INTERVAL = 10.0;
+
+#undef dout_prefix
+#define dout_prefix *_dout << "main "
+
+const char *our_name = NULL;
+vector<TestStub*> stubs;
+ceph::mutex shutdown_lock = ceph::make_mutex("main::shutdown_lock");
+ceph::condition_variable shutdown_cond;
+Context *shutdown_cb = NULL;
+SafeTimer *shutdown_timer = NULL;
+
+struct C_Shutdown : public Context
+{
+ void finish(int r) override {
+ generic_dout(10) << "main::shutdown time has ran out" << dendl;
+ shutdown_cond.notify_all();
+ }
+};
+
+void handle_test_signal(int signum)
+{
+ if ((signum != SIGINT) && (signum != SIGTERM))
+ return;
+
+ std::cerr << "*** Got signal " << sig_str(signum) << " ***" << std::endl;
+ std::lock_guard l{shutdown_lock};
+ if (shutdown_timer) {
+ shutdown_timer->cancel_all_events();
+ shutdown_cond.notify_all();
+ }
+}
+
+void usage() {
+ ceph_assert(our_name != NULL);
+
+ std::cout << "usage: " << our_name
+ << " <--stub-id ID> [--stub-id ID...]"
+ << std::endl;
+ std::cout << "\n\
+Global Options:\n\
+ -c FILE Read configuration from FILE\n\
+ --keyring FILE Read keyring from FILE\n\
+ --help This message\n\
+\n\
+Test-specific Options:\n\
+ --stub-id ID1..ID2 Interval of OSD ids for multiple stubs to mimic.\n\
+ --stub-id ID OSD id a stub will mimic to be\n\
+ (same as --stub-id ID..ID)\n\
+" << std::endl;
+}
+
+int get_id_interval(int &first, int &last, string &str)
+{
+ size_t found = str.find("..");
+ string first_str, last_str;
+ if (found == string::npos) {
+ first_str = last_str = str;
+ } else {
+ first_str = str.substr(0, found);
+ last_str = str.substr(found+2);
+ }
+
+ string err;
+ first = strict_strtol(first_str.c_str(), 10, &err);
+ if ((first == 0) && (!err.empty())) {
+ std::cerr << err << std::endl;
+ return -1;
+ }
+
+ last = strict_strtol(last_str.c_str(), 10, &err);
+ if ((last == 0) && (!err.empty())) {
+ std::cerr << err << std::endl;
+ return -1;
+ }
+ return 0;
+}
+
+int main(int argc, const char *argv[])
+{
+ vector<const char*> args;
+ our_name = argv[0];
+ argv_to_vec(argc, argv, args);
+
+ auto cct = global_init(NULL, args,
+ CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
+
+ common_init_finish(g_ceph_context);
+ g_ceph_context->_conf.apply_changes(nullptr);
+
+ set<int> stub_ids;
+ double duration = 300.0;
+
+ for (std::vector<const char*>::iterator i = args.begin(); i != args.end();) {
+ string val;
+
+ if (ceph_argparse_double_dash(args, i)) {
+ break;
+ } else if (ceph_argparse_witharg(args, i, &val,
+ "--stub-id", (char*) NULL)) {
+ int first = -1, last = -1;
+ if (get_id_interval(first, last, val) < 0) {
+ std::cerr << "** error parsing stub id '" << val << "'" << std::endl;
+ exit(1);
+ }
+
+ for (; first <= last; ++first)
+ stub_ids.insert(first);
+ } else if (ceph_argparse_witharg(args, i, &val,
+ "--duration", (char*) NULL)) {
+ string err;
+ duration = (double) strict_strtol(val.c_str(), 10, &err);
+ if ((duration == 0) && (!err.empty())) {
+ std::cerr << "** error parsing '--duration " << val << "': '"
+ << err << std::endl;
+ exit(1);
+ }
+ } else if (ceph_argparse_flag(args, i, "--help", (char*) NULL)) {
+ usage();
+ exit(0);
+ } else {
+ std::cerr << "unknown argument '" << *i << "'" << std::endl;
+ return 1;
+ }
+ }
+
+ if (stub_ids.empty()) {
+ std::cerr << "** error: must specify at least one '--stub-id <ID>'"
+ << std::endl;
+ usage();
+ return 1;
+ }
+
+ for (set<int>::iterator i = stub_ids.begin(); i != stub_ids.end(); ++i) {
+ int whoami = *i;
+
+ std::cout << __func__ << " starting stub." << whoami << std::endl;
+ OSDStub *stub = new OSDStub(whoami, g_ceph_context);
+ int err = stub->init();
+ if (err < 0) {
+ std::cerr << "** osd stub error: " << cpp_strerror(-err) << std::endl;
+ return 1;
+ }
+ stubs.push_back(stub);
+ }
+
+ std::cout << __func__ << " starting client stub" << std::endl;
+ ClientStub *cstub = new ClientStub(g_ceph_context);
+ int err = cstub->init();
+ if (err < 0) {
+ std::cerr << "** client stub error: " << cpp_strerror(-err) << std::endl;
+ return 1;
+ }
+ stubs.push_back(cstub);
+
+ init_async_signal_handler();
+ register_async_signal_handler_oneshot(SIGINT, handle_test_signal);
+ register_async_signal_handler_oneshot(SIGTERM, handle_test_signal);
+
+ {
+ unique_lock locker{shutdown_lock};
+ shutdown_timer = new SafeTimer(g_ceph_context, shutdown_lock);
+ shutdown_timer->init();
+ if (duration != 0) {
+ std::cout << __func__
+ << " run test for " << duration << " seconds" << std::endl;
+ shutdown_timer->add_event_after((double) duration, new C_Shutdown);
+ }
+ shutdown_cond.wait(locker);
+ shutdown_timer->shutdown();
+ delete shutdown_timer;
+ shutdown_timer = NULL;
+ }
+ unregister_async_signal_handler(SIGINT, handle_test_signal);
+ unregister_async_signal_handler(SIGTERM, handle_test_signal);
+
+ std::cout << __func__ << " waiting for stubs to finish" << std::endl;
+ vector<TestStub*>::iterator it;
+ int i;
+ for (i = 0, it = stubs.begin(); it != stubs.end(); ++it, ++i) {
+ if (*it != NULL) {
+ (*it)->shutdown();
+ (*it)->wait();
+ std::cout << __func__ << " finished " << (*it)->get_name() << std::endl;
+ delete (*it);
+ (*it) = NULL;
+ }
+ }
+
+ return 0;
+}