diff options
Diffstat (limited to '')
29 files changed, 16714 insertions, 0 deletions
diff --git a/src/test/os/CMakeLists.txt b/src/test/os/CMakeLists.txt new file mode 100644 index 000000000..35eb8f117 --- /dev/null +++ b/src/test/os/CMakeLists.txt @@ -0,0 +1,7 @@ +# unittest_lfnindex +add_executable(unittest_lfnindex + TestLFNIndex.cc + ) +add_ceph_unittest(unittest_lfnindex) +target_link_libraries(unittest_lfnindex os global) + diff --git a/src/test/os/TestLFNIndex.cc b/src/test/os/TestLFNIndex.cc new file mode 100644 index 000000000..d74de739d --- /dev/null +++ b/src/test/os/TestLFNIndex.cc @@ -0,0 +1,493 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library Public License for more details. + * + */ + +#include <stdio.h> +#include <signal.h> +#include "os/filestore/LFNIndex.h" +#include "os/filestore/chain_xattr.h" +#include "common/ceph_argparse.h" +#include "global/global_init.h" +#include <gtest/gtest.h> + +class TestWrapLFNIndex : public LFNIndex { +public: + TestWrapLFNIndex(CephContext* cct, + coll_t collection, + const char *base_path, + uint32_t index_version) + : LFNIndex(cct, collection, base_path, index_version) {} + + uint32_t collection_version() override { + return index_version; + } + + int cleanup() override { return 0; } + + int _split( + uint32_t match, + uint32_t bits, + CollectionIndex* dest + ) override { return 0; } + int _merge( + uint32_t bits, + CollectionIndex* dest + ) override { return 0; } + + void test_generate_and_parse(const ghobject_t &hoid, const std::string &mangled_expected) { + const std::string mangled_name = lfn_generate_object_name(hoid); + EXPECT_EQ(mangled_expected, mangled_name); + ghobject_t hoid_parsed; + EXPECT_EQ(0, lfn_parse_object_name(mangled_name, &hoid_parsed)); + EXPECT_EQ(hoid, hoid_parsed); + } + +protected: + int _init() override { return 0; } + + int _created( + const vector<string> &path, + const ghobject_t &hoid, + const string &mangled_name + ) override { return 0; } + + int _remove( + const vector<string> &path, + const ghobject_t &hoid, + const string &mangled_name + ) override { return 0; } + + int _lookup( + const ghobject_t &hoid, + vector<string> *path, + string *mangled_name, + int *exists + ) override { return 0; } + + int _collection_list_partial( + const ghobject_t &start, + const ghobject_t &end, + int max_count, + vector<ghobject_t> *ls, + ghobject_t *next + ) override { return 0; } + int _pre_hash_collection( + uint32_t pg_num, + uint64_t expected_num_objs + ) override { return 0; } + +}; + +class TestHASH_INDEX_TAG : public TestWrapLFNIndex, public ::testing::Test { +public: + TestHASH_INDEX_TAG() + : TestWrapLFNIndex(g_ceph_context, coll_t(), "PATH_1", + CollectionIndex::HASH_INDEX_TAG) { + } +}; + +TEST_F(TestHASH_INDEX_TAG, generate_and_parse_name) { + const vector<string> path; + const std::string key; + uint64_t hash = 0xABABABAB; + uint64_t pool = -1; + + test_generate_and_parse(ghobject_t(hobject_t(object_t(".A/B_\\C.D"), key, CEPH_NOSNAP, hash, pool, "")), + "\\.A\\sB_\\\\C.D_head_ABABABAB"); + test_generate_and_parse(ghobject_t(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, "")), + "\\dA_head_ABABABAB"); +} + +class TestHASH_INDEX_TAG_2 : public TestWrapLFNIndex, public ::testing::Test { +public: + TestHASH_INDEX_TAG_2() + : TestWrapLFNIndex(g_ceph_context, + coll_t(), "PATH_1", CollectionIndex::HASH_INDEX_TAG_2) { + } +}; + +TEST_F(TestHASH_INDEX_TAG_2, generate_and_parse_name) { + const vector<string> path; + const std::string key("KEY"); + uint64_t hash = 0xABABABAB; + uint64_t pool = -1; + + { + std::string name(".XA/B_\\C.D"); + name[1] = '\0'; + ghobject_t hoid(hobject_t(object_t(name), key, CEPH_NOSNAP, hash, pool, "")); + + test_generate_and_parse(hoid, "\\.\\nA\\sB\\u\\\\C.D_KEY_head_ABABABAB"); + } + test_generate_and_parse(ghobject_t(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, "")), + "\\dA_KEY_head_ABABABAB"); +} + +class TestHOBJECT_WITH_POOL : public TestWrapLFNIndex, public ::testing::Test { +public: + TestHOBJECT_WITH_POOL() + : TestWrapLFNIndex(g_ceph_context, coll_t(), + "PATH_1", CollectionIndex::HOBJECT_WITH_POOL) { + } +}; + +TEST_F(TestHOBJECT_WITH_POOL, generate_and_parse_name) { + const vector<string> path; + const std::string key("KEY"); + uint64_t hash = 0xABABABAB; + uint64_t pool = 0xCDCDCDCD; + int64_t gen = 0xefefefefef; + shard_id_t shard_id(0xb); + + { + std::string name(".XA/B_\\C.D"); + name[1] = '\0'; + ghobject_t hoid(hobject_t(object_t(name), key, CEPH_NOSNAP, hash, pool, "")); + hoid.hobj.nspace = "NSPACE"; + + test_generate_and_parse(hoid, "\\.\\nA\\sB\\u\\\\C.D_KEY_head_ABABABAB_NSPACE_cdcdcdcd"); + } + { + ghobject_t hoid(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, "")); + hoid.hobj.nspace = "NSPACE"; + + test_generate_and_parse(hoid, "\\dA_KEY_head_ABABABAB_NSPACE_cdcdcdcd"); + } + { + std::string name(".XA/B_\\C.D"); + name[1] = '\0'; + ghobject_t hoid(hobject_t(object_t(name), key, CEPH_NOSNAP, hash, pool, ""), gen, shard_id); + hoid.hobj.nspace = "NSPACE"; + + test_generate_and_parse(hoid, "\\.\\nA\\sB\\u\\\\C.D_KEY_head_ABABABAB_NSPACE_cdcdcdcd_efefefefef_b"); + } + { + ghobject_t hoid(hobject_t(object_t("DIR_A"), key, CEPH_NOSNAP, hash, pool, ""), gen, shard_id); + hoid.hobj.nspace = "NSPACE"; + + test_generate_and_parse(hoid, "\\dA_KEY_head_ABABABAB_NSPACE_cdcdcdcd_efefefefef_b"); + } +} + +class TestLFNIndex : public TestWrapLFNIndex, public ::testing::Test { +public: + TestLFNIndex() + : TestWrapLFNIndex(g_ceph_context, coll_t(), "PATH_1", + CollectionIndex::HOBJECT_WITH_POOL) { + } + + void SetUp() override { + ::chmod("PATH_1", 0700); + ASSERT_EQ(0, ::system("rm -fr PATH_1")); + ASSERT_EQ(0, ::mkdir("PATH_1", 0700)); + } + + void TearDown() override { + ASSERT_EQ(0, ::system("rm -fr PATH_1")); + } +}; + +TEST_F(TestLFNIndex, remove_object) { + const vector<string> path; + + // + // small object name removal + // + { + std::string mangled_name; + int exists = 666; + ghobject_t hoid(hobject_t(sobject_t("ABC", CEPH_NOSNAP))); + + EXPECT_EQ(0, ::chmod("PATH_1", 0000)); + if (getuid() != 0) { + EXPECT_EQ(-EACCES, remove_object(path, hoid)); + } + EXPECT_EQ(0, ::chmod("PATH_1", 0700)); + EXPECT_EQ(-ENOENT, remove_object(path, hoid)); + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + const std::string pathname("PATH_1/" + mangled_name); + EXPECT_EQ(0, ::close(::creat(pathname.c_str(), 0600))); + EXPECT_EQ(0, remove_object(path, hoid)); + EXPECT_EQ(-1, ::access(pathname.c_str(), 0)); + EXPECT_EQ(ENOENT, errno); + } + // + // long object name removal of a single file + // + { + std::string mangled_name; + int exists; + const std::string object_name(1024, 'A'); + ghobject_t hoid(hobject_t(sobject_t(object_name, CEPH_NOSNAP))); + + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + EXPECT_EQ(0, exists); + EXPECT_NE(std::string::npos, mangled_name.find("0_long")); + std::string pathname("PATH_1/" + mangled_name); + EXPECT_EQ(0, ::close(::creat(pathname.c_str(), 0600))); + EXPECT_EQ(0, created(hoid, pathname.c_str())); + + EXPECT_EQ(0, remove_object(path, hoid)); + EXPECT_EQ(-1, ::access(pathname.c_str(), 0)); + EXPECT_EQ(ENOENT, errno); + } + + // + // long object name removal of the last file + // + { + std::string mangled_name; + int exists; + const std::string object_name(1024, 'A'); + ghobject_t hoid(hobject_t(sobject_t(object_name, CEPH_NOSNAP))); + + // + // PATH_1/AAA..._0_long => does not match long object name + // + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + EXPECT_EQ(0, exists); + EXPECT_NE(std::string::npos, mangled_name.find("0_long")); + std::string pathname("PATH_1/" + mangled_name); + EXPECT_EQ(0, ::close(::creat(pathname.c_str(), 0600))); + EXPECT_EQ(0, created(hoid, pathname.c_str())); + string LFN_ATTR = "user.cephos.lfn"; + if (index_version != HASH_INDEX_TAG) { + char buf[100]; + snprintf(buf, sizeof(buf), "%d", index_version); + LFN_ATTR += string(buf); + } + const std::string object_name_1 = object_name + "SUFFIX"; + EXPECT_EQ(object_name_1.size(), (unsigned)chain_setxattr(pathname.c_str(), LFN_ATTR.c_str(), object_name_1.c_str(), object_name_1.size())); + + // + // PATH_1/AAA..._1_long => matches long object name + // + std::string mangled_name_1; + exists = 666; + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name_1, &exists)); + EXPECT_NE(std::string::npos, mangled_name_1.find("1_long")); + EXPECT_EQ(0, exists); + std::string pathname_1("PATH_1/" + mangled_name_1); + auto retvalue = ::creat(pathname_1.c_str(), 0600); + ceph_assert(retvalue > 2); + EXPECT_EQ(0, ::close(retvalue)); + EXPECT_EQ(0, created(hoid, pathname_1.c_str())); + + // + // remove_object skips PATH_1/AAA..._0_long and removes PATH_1/AAA..._1_long + // + EXPECT_EQ(0, remove_object(path, hoid)); + EXPECT_EQ(0, ::access(pathname.c_str(), 0)); + EXPECT_EQ(-1, ::access(pathname_1.c_str(), 0)); + EXPECT_EQ(ENOENT, errno); + EXPECT_EQ(0, ::unlink(pathname.c_str())); + } + + // + // long object name removal of a file in the middle of the list + // + { + std::string mangled_name; + int exists; + const std::string object_name(1024, 'A'); + ghobject_t hoid(hobject_t(sobject_t(object_name, CEPH_NOSNAP))); + + // + // PATH_1/AAA..._0_long => matches long object name + // + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + EXPECT_EQ(0, exists); + EXPECT_NE(std::string::npos, mangled_name.find("0_long")); + std::string pathname("PATH_1/" + mangled_name); + EXPECT_EQ(0, ::close(::creat(pathname.c_str(), 0600))); + EXPECT_EQ(0, created(hoid, pathname.c_str())); + // + // PATH_1/AAA..._1_long => matches long object name + // + std::string mangled_name_1 = mangled_name; + mangled_name_1.replace(mangled_name_1.find("0_long"), 6, "1_long"); + const std::string pathname_1("PATH_1/" + mangled_name_1); + const std::string cmd("cp -a " + pathname + " " + pathname_1); + EXPECT_EQ(0, ::system(cmd.c_str())); + const string ATTR = "user.MARK"; + EXPECT_EQ((unsigned)1, (unsigned)chain_setxattr(pathname_1.c_str(), ATTR.c_str(), "Y", 1)); + + // + // remove_object replaces the file to be removed with the last from the + // collision list. In this case it replaces + // PATH_1/AAA..._0_long + // with + // PATH_1/AAA..._1_long + // + EXPECT_EQ(0, remove_object(path, hoid)); + EXPECT_EQ(0, ::access(pathname.c_str(), 0)); + char buffer[1] = { 0, }; + EXPECT_EQ((unsigned)1, (unsigned)chain_getxattr(pathname.c_str(), ATTR.c_str(), buffer, 1)); + EXPECT_EQ('Y', buffer[0]); + EXPECT_EQ(-1, ::access(pathname_1.c_str(), 0)); + EXPECT_EQ(ENOENT, errno); + } +} + +TEST_F(TestLFNIndex, get_mangled_name) { + const vector<string> path; + + // + // small object name + // + { + std::string mangled_name; + int exists = 666; + ghobject_t hoid(hobject_t(sobject_t("ABC", CEPH_NOSNAP))); + + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + EXPECT_NE(std::string::npos, mangled_name.find("ABC__head")); + EXPECT_EQ(std::string::npos, mangled_name.find("0_long")); + EXPECT_EQ(0, exists); + const std::string pathname("PATH_1/" + mangled_name); + EXPECT_EQ(0, ::close(::creat(pathname.c_str(), 0600))); + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + EXPECT_NE(std::string::npos, mangled_name.find("ABC__head")); + EXPECT_EQ(1, exists); + EXPECT_EQ(0, ::unlink(pathname.c_str())); + } + // + // long object name + // + { + std::string mangled_name; + int exists; + const std::string object_name(1024, 'A'); + ghobject_t hoid(hobject_t(sobject_t(object_name, CEPH_NOSNAP))); + + // + // long version of the mangled name and no matching + // file exists + // + mangled_name.clear(); + exists = 666; + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + EXPECT_NE(std::string::npos, mangled_name.find("0_long")); + EXPECT_EQ(0, exists); + + const std::string pathname("PATH_1/" + mangled_name); + + // + // if a file by the same name exists but does not have the + // expected extended attribute, it is silently removed + // + mangled_name.clear(); + exists = 666; + EXPECT_EQ(0, ::close(::creat(pathname.c_str(), 0600))); + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + EXPECT_NE(std::string::npos, mangled_name.find("0_long")); + EXPECT_EQ(0, exists); + EXPECT_EQ(-1, ::access(pathname.c_str(), 0)); + EXPECT_EQ(ENOENT, errno); + + // + // if a file by the same name exists but does not have the + // expected extended attribute, and cannot be removed, + // return on error + // + mangled_name.clear(); + exists = 666; + EXPECT_EQ(0, ::close(::creat(pathname.c_str(), 0600))); + EXPECT_EQ(0, ::chmod("PATH_1", 0500)); + if (getuid() != 0) { + EXPECT_EQ(-EACCES, get_mangled_name(path, hoid, &mangled_name, &exists)); + } + EXPECT_EQ("", mangled_name); + EXPECT_EQ(666, exists); + EXPECT_EQ(0, ::chmod("PATH_1", 0700)); + EXPECT_EQ(0, ::unlink(pathname.c_str())); + + // + // long version of the mangled name and a file + // exists by that name and contains the long object name + // + mangled_name.clear(); + exists = 666; + auto retvalue = ::creat(pathname.c_str(), 0600); + ceph_assert(retvalue > 2); + EXPECT_EQ(0, ::close(retvalue)); + EXPECT_EQ(0, created(hoid, pathname.c_str())); + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name, &exists)); + EXPECT_NE(std::string::npos, mangled_name.find("0_long")); + EXPECT_EQ(1, exists); + EXPECT_EQ(0, ::access(pathname.c_str(), 0)); + + // + // long version of the mangled name and a file exists by that name + // and contains a long object name with the same prefix but they + // are not identical and it so happens that their SHA1 is + // identical : a collision number is used to differentiate them + // + string LFN_ATTR = "user.cephos.lfn"; + if (index_version != HASH_INDEX_TAG) { + char buf[100]; + snprintf(buf, sizeof(buf), "%d", index_version); + LFN_ATTR += string(buf); + } + const std::string object_name_same_prefix = object_name + "SUFFIX"; + EXPECT_EQ(object_name_same_prefix.size(), (unsigned)chain_setxattr(pathname.c_str(), LFN_ATTR.c_str(), object_name_same_prefix.c_str(), object_name_same_prefix.size())); + std::string mangled_name_same_prefix; + exists = 666; + EXPECT_EQ(0, get_mangled_name(path, hoid, &mangled_name_same_prefix, &exists)); + EXPECT_NE(std::string::npos, mangled_name_same_prefix.find("1_long")); + EXPECT_EQ(0, exists); + + EXPECT_EQ(0, ::unlink(pathname.c_str())); + } +} + +int main(int argc, char **argv) { + int fd = ::creat("detect", 0600); + if (fd < 0){ + cerr << "failed to create file detect" << std::endl; + return EXIT_FAILURE; + } + int ret = chain_fsetxattr(fd, "user.test", "A", 1); + ::close(fd); + ::unlink("detect"); + if (ret < 0) { + cerr << "SKIP LFNIndex because unable to test for xattr" << std::endl; + } else { + vector<const char*> args; + argv_to_vec(argc, (const char **)argv, args); + + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); + } +} + +/* + * Local Variables: + * compile-command: "cd ../.. ; + * make unittest_lfnindex && + * valgrind --tool=memcheck ./unittest_lfnindex \ + * # --gtest_filter=TestLFNIndex.* --log-to-stderr=true --debug-filestore=20" + * End: + */ diff --git a/src/test/osd/CMakeLists.txt b/src/test/osd/CMakeLists.txt new file mode 100644 index 000000000..fc343d969 --- /dev/null +++ b/src/test/osd/CMakeLists.txt @@ -0,0 +1,123 @@ +# test_rados +add_executable(ceph_test_rados + TestRados.cc + TestOpStat.cc + Object.cc + RadosModel.cc + ) +target_link_libraries(ceph_test_rados + librados + global + ${BLKID_LIBRARIES} + ${CMAKE_DL_LIBS} + ${EXTRALIBS} + ${CMAKE_DL_LIBS} + ) +install(TARGETS + ceph_test_rados + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# test_stale_read +add_executable(ceph_test_osd_stale_read + ceph_test_osd_stale_read.cc + ) +target_link_libraries(ceph_test_osd_stale_read + librados + global + ${CMAKE_DL_LIBS} + ${EXTRALIBS} + ${CMAKE_DL_LIBS} + ${UNITTEST_LIBS} + ) +install(TARGETS + ceph_test_osd_stale_read + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# scripts +add_ceph_test(safe-to-destroy.sh ${CMAKE_CURRENT_SOURCE_DIR}/safe-to-destroy.sh) + +# unittest_osdmap +add_executable(unittest_osdmap + TestOSDMap.cc + ) +add_ceph_unittest(unittest_osdmap) +target_link_libraries(unittest_osdmap global ${BLKID_LIBRARIES}) + +# unittest_osd_types +add_executable(unittest_osd_types + types.cc + ) +add_ceph_unittest(unittest_osd_types) +target_link_libraries(unittest_osd_types global) + +# unittest_ecbackend +add_executable(unittest_ecbackend + TestECBackend.cc + ) +add_ceph_unittest(unittest_ecbackend) +target_link_libraries(unittest_ecbackend osd global) + +# unittest_osdscrub +add_executable(unittest_osdscrub + TestOSDScrub.cc + $<TARGET_OBJECTS:unit-main> + ) +add_ceph_unittest(unittest_osdscrub) +target_link_libraries(unittest_osdscrub osd os global ${CMAKE_DL_LIBS} mon ${BLKID_LIBRARIES}) + +# unittest_pglog +add_executable(unittest_pglog + TestPGLog.cc + $<TARGET_OBJECTS:unit-main> + $<TARGET_OBJECTS:store_test_fixture> + ) +add_ceph_unittest(unittest_pglog) +target_link_libraries(unittest_pglog osd os global ${CMAKE_DL_LIBS} ${BLKID_LIBRARIES}) + +# unittest_hitset +add_executable(unittest_hitset + hitset.cc + ) +add_ceph_unittest(unittest_hitset) +target_link_libraries(unittest_hitset osd global ${BLKID_LIBRARIES}) + +# unittest_osd_osdcap +add_executable(unittest_osd_osdcap + osdcap.cc +) +if(HAS_VTA) + set_source_files_properties(osdcap.cc PROPERTIES + COMPILE_FLAGS -fno-var-tracking-assignments) +endif() +add_ceph_unittest(unittest_osd_osdcap) +target_link_libraries(unittest_osd_osdcap osd global ${BLKID_LIBRARIES}) + +# unittest ExtentCache +add_executable(unittest_extent_cache + test_extent_cache.cc +) +add_ceph_unittest(unittest_extent_cache) +target_link_libraries(unittest_extent_cache osd global ${BLKID_LIBRARIES}) + +# unittest PGTransaction +add_executable(unittest_pg_transaction + test_pg_transaction.cc +) +add_ceph_unittest(unittest_pg_transaction) +target_link_libraries(unittest_pg_transaction osd global ${BLKID_LIBRARIES}) + +# unittest ECTransaction +add_executable(unittest_ec_transaction + test_ec_transaction.cc +) +add_ceph_unittest(unittest_ec_transaction) +target_link_libraries(unittest_ec_transaction osd global ${BLKID_LIBRARIES}) + +# unittest_mclock_scheduler +add_executable(unittest_mclock_scheduler + TestMClockScheduler.cc +) +add_ceph_unittest(unittest_mclock_scheduler) +target_link_libraries(unittest_mclock_scheduler + global osd dmclock os +) diff --git a/src/test/osd/Object.cc b/src/test/osd/Object.cc new file mode 100644 index 000000000..9d914abd7 --- /dev/null +++ b/src/test/osd/Object.cc @@ -0,0 +1,200 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "include/interval_set.h" +#include "include/buffer.h" +#include <list> +#include <map> +#include <set> +#include <iostream> + +#include "Object.h" + +void ContDesc::encode(bufferlist &bl) const +{ + ENCODE_START(1, 1, bl); + encode(objnum, bl); + encode(cursnap, bl); + encode(seqnum, bl); + encode(prefix, bl); + encode(oid, bl); + ENCODE_FINISH(bl); +} + +void ContDesc::decode(bufferlist::const_iterator &bl) +{ + DECODE_START(1, bl); + decode(objnum, bl); + decode(cursnap, bl); + decode(seqnum, bl); + decode(prefix, bl); + decode(oid, bl); + DECODE_FINISH(bl); +} + +std::ostream &operator<<(std::ostream &out, const ContDesc &rhs) +{ + return out << "(ObjNum " << rhs.objnum + << " snap " << rhs.cursnap + << " seq_num " << rhs.seqnum + << ")"; +} + +void AppendGenerator::get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) { + RandWrap rand(cont.seqnum); + uint64_t pos = off; + uint64_t limit = off + get_append_size(cont); + while (pos < limit) { + uint64_t segment_length = round_up( + rand() % (max_append_size - min_append_size), + alignment) + min_append_size; + ceph_assert(segment_length >= min_append_size); + if (segment_length + pos > limit) { + segment_length = limit - pos; + } + if (alignment) + ceph_assert(segment_length % alignment == 0); + out.insert(std::pair<uint64_t, uint64_t>(pos, segment_length)); + pos += segment_length; + } +} + +void VarLenGenerator::get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) { + RandWrap rand(cont.seqnum); + uint64_t pos = 0; + uint64_t limit = get_length(cont); + bool include = false; + while (pos < limit) { + uint64_t segment_length = (rand() % (max_stride_size - min_stride_size)) + min_stride_size; + ceph_assert(segment_length < max_stride_size); + ceph_assert(segment_length >= min_stride_size); + if (segment_length + pos > limit) { + segment_length = limit - pos; + } + if (include) { + out.insert(std::pair<uint64_t, uint64_t>(pos, segment_length)); + include = false; + } else { + include = true; + } + pos += segment_length; + } +} + +void ObjectDesc::iterator::adjust_stack() { + while (!stack.empty() && pos >= stack.top().second.next) { + ceph_assert(pos == stack.top().second.next); + size = stack.top().second.size; + current = stack.top().first; + stack.pop(); + } + + if (stack.empty()) { + cur_valid_till = std::numeric_limits<uint64_t>::max(); + } else { + cur_valid_till = stack.top().second.next; + } + + while (current != layers.end() && !current->covers(pos)) { + uint64_t next = current->next(pos); + if (next < cur_valid_till) { + stack.emplace(current, StackState{next, size}); + cur_valid_till = next; + } + + ++current; + } + + if (current == layers.end()) { + size = 0; + } else { + current->iter.seek(pos); + size = std::min(size, current->get_size()); + cur_valid_till = std::min( + current->valid_till(pos), + cur_valid_till); + } +} + +const ContDesc &ObjectDesc::most_recent() { + return layers.begin()->second; +} + +void ObjectDesc::update(ContentsGenerator *gen, const ContDesc &next) { + layers.push_front(std::pair<std::shared_ptr<ContentsGenerator>, ContDesc>(std::shared_ptr<ContentsGenerator>(gen), next)); + return; +} + +bool ObjectDesc::check(bufferlist &to_check) { + iterator objiter = begin(); + uint64_t error_at = 0; + if (!objiter.check_bl_advance(to_check, &error_at)) { + std::cout << "incorrect buffer at pos " << error_at << std::endl; + return false; + } + + uint64_t size = layers.begin()->first->get_length(layers.begin()->second); + if (to_check.length() < size) { + std::cout << "only read " << to_check.length() + << " out of size " << size << std::endl; + return false; + } + return true; +} + +bool ObjectDesc::check_sparse(const std::map<uint64_t, uint64_t>& extents, + bufferlist &to_check) +{ + uint64_t off = 0; + uint64_t pos = 0; + auto objiter = begin(); + for (auto &&extiter : extents) { + // verify hole + { + bufferlist bl; + bl.append_zero(extiter.first - pos); + uint64_t error_at = 0; + if (!objiter.check_bl_advance(bl, &error_at)) { + std::cout << "sparse read omitted non-zero data at " + << error_at << std::endl; + return false; + } + } + + ceph_assert(off <= to_check.length()); + pos = extiter.first; + objiter.seek(pos); + + { + bufferlist bl; + bl.substr_of( + to_check, + off, + std::min(to_check.length() - off, extiter.second)); + uint64_t error_at = 0; + if (!objiter.check_bl_advance(bl, &error_at)) { + std::cout << "incorrect buffer at pos " << error_at << std::endl; + return false; + } + off += extiter.second; + pos += extiter.second; + } + + if (pos < extiter.first + extiter.second) { + std::cout << "reached end of iterator first" << std::endl; + return false; + } + } + + // final hole + bufferlist bl; + uint64_t size = layers.begin()->first->get_length(layers.begin()->second); + bl.append_zero(size - pos); + uint64_t error_at; + if (!objiter.check_bl_advance(bl, &error_at)) { + std::cout << "sparse read omitted non-zero data at " + << error_at << std::endl; + return false; + } + return true; +} diff --git a/src/test/osd/Object.h b/src/test/osd/Object.h new file mode 100644 index 000000000..a78a59823 --- /dev/null +++ b/src/test/osd/Object.h @@ -0,0 +1,539 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "include/interval_set.h" +#include "include/buffer.h" +#include "include/encoding.h" +#include <list> +#include <map> +#include <set> +#include <stack> +#include <random> + +#ifndef OBJECT_H +#define OBJECT_H + +/// describes an object +class ContDesc { +public: + int objnum; + int cursnap; + unsigned seqnum; + std::string prefix; + std::string oid; + + ContDesc() : + objnum(0), cursnap(0), + seqnum(0), prefix("") {} + + ContDesc(int objnum, + int cursnap, + unsigned seqnum, + const std::string &prefix) : + objnum(objnum), cursnap(cursnap), + seqnum(seqnum), prefix(prefix) {} + + bool operator==(const ContDesc &rhs) { + return (rhs.objnum == objnum && + rhs.cursnap == cursnap && + rhs.seqnum == seqnum && + rhs.prefix == prefix && + rhs.oid == oid); + } + + bool operator<(const ContDesc &rhs) const { + return seqnum < rhs.seqnum; + } + + bool operator!=(const ContDesc &rhs) { + return !((*this) == rhs); + } + void encode(bufferlist &bl) const; + void decode(bufferlist::const_iterator &bp); +}; +WRITE_CLASS_ENCODER(ContDesc) + +std::ostream &operator<<(std::ostream &out, const ContDesc &rhs); + +class ChunkDesc { +public: + uint32_t offset; + uint32_t length; + std::string oid; +}; + +class ContentsGenerator { +public: + + class iterator_impl { + public: + virtual char operator*() = 0; + virtual iterator_impl &operator++() = 0; + virtual void seek(uint64_t pos) = 0; + virtual bool end() = 0; + virtual ContDesc get_cont() const = 0; + virtual uint64_t get_pos() const = 0; + virtual bufferlist gen_bl_advance(uint64_t s) { + bufferptr ret = buffer::create(s); + for (uint64_t i = 0; i < s; ++i, ++(*this)) { + ret[i] = **this; + } + bufferlist _ret; + _ret.push_back(ret); + return _ret; + } + /// walk through given @c bl + /// + /// @param[out] off the offset of the first byte which does not match + /// @returns true if @c bl matches with the content, false otherwise + virtual bool check_bl_advance(bufferlist &bl, uint64_t *off = nullptr) { + uint64_t _off = 0; + for (bufferlist::iterator i = bl.begin(); + !i.end(); + ++i, ++_off, ++(*this)) { + if (*i != **this) { + if (off) + *off = _off; + return false; + } + } + return true; + } + virtual ~iterator_impl() {}; + }; + + class iterator { + public: + ContentsGenerator *parent; + iterator_impl *impl; + char operator *() { return **impl; } + iterator &operator++() { ++(*impl); return *this; }; + void seek(uint64_t pos) { impl->seek(pos); } + bool end() { return impl->end(); } + ~iterator() { parent->put_iterator_impl(impl); } + iterator(const iterator &rhs) : parent(rhs.parent) { + impl = parent->dup_iterator_impl(rhs.impl); + } + iterator &operator=(const iterator &rhs) { + iterator new_iter(rhs); + swap(new_iter); + return *this; + } + void swap(iterator &other) { + ContentsGenerator *otherparent = other.parent; + other.parent = parent; + parent = otherparent; + + iterator_impl *otherimpl = other.impl; + other.impl = impl; + impl = otherimpl; + } + bufferlist gen_bl_advance(uint64_t s) { + return impl->gen_bl_advance(s); + } + bool check_bl_advance(bufferlist &bl, uint64_t *off = nullptr) { + return impl->check_bl_advance(bl, off); + } + iterator(ContentsGenerator *parent, iterator_impl *impl) : + parent(parent), impl(impl) {} + }; + + virtual uint64_t get_length(const ContDesc &in) = 0; + + virtual void get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) = 0; + void get_ranges(const ContDesc &cont, interval_set<uint64_t> &out) { + std::map<uint64_t, uint64_t> ranges; + get_ranges_map(cont, ranges); + for (std::map<uint64_t, uint64_t>::iterator i = ranges.begin(); + i != ranges.end(); + ++i) { + out.insert(i->first, i->second); + } + } + + + virtual iterator_impl *get_iterator_impl(const ContDesc &in) = 0; + + virtual iterator_impl *dup_iterator_impl(const iterator_impl *in) = 0; + + virtual void put_iterator_impl(iterator_impl *in) = 0; + + virtual ~ContentsGenerator() {}; + + iterator get_iterator(const ContDesc &in) { + return iterator(this, get_iterator_impl(in)); + } +}; + +class RandGenerator : public ContentsGenerator { +public: + typedef std::minstd_rand0 RandWrap; + + class iterator_impl : public ContentsGenerator::iterator_impl { + public: + uint64_t pos; + ContDesc cont; + RandWrap rand; + RandGenerator *cont_gen; + char current; + iterator_impl(const ContDesc &cont, RandGenerator *cont_gen) : + pos(0), cont(cont), rand(cont.seqnum), cont_gen(cont_gen) { + current = rand(); + } + + ContDesc get_cont() const override { return cont; } + uint64_t get_pos() const override { return pos; } + + iterator_impl &operator++() override { + pos++; + current = rand(); + return *this; + } + + char operator*() override { + return current; + } + + void seek(uint64_t _pos) override { + if (_pos < pos) { + iterator_impl begin = iterator_impl(cont, cont_gen); + begin.seek(_pos); + *this = begin; + } + while (pos < _pos) { + ++(*this); + } + } + + bool end() override { + return pos >= cont_gen->get_length(cont); + } + }; + + ContentsGenerator::iterator_impl *get_iterator_impl(const ContDesc &in) override { + RandGenerator::iterator_impl *i = new iterator_impl(in, this); + return i; + } + + void put_iterator_impl(ContentsGenerator::iterator_impl *in) override { + delete in; + } + + ContentsGenerator::iterator_impl *dup_iterator_impl( + const ContentsGenerator::iterator_impl *in) override { + ContentsGenerator::iterator_impl *retval = get_iterator_impl(in->get_cont()); + retval->seek(in->get_pos()); + return retval; + } +}; + +class VarLenGenerator : public RandGenerator { + uint64_t max_length; + uint64_t min_stride_size; + uint64_t max_stride_size; +public: + VarLenGenerator( + uint64_t length, uint64_t min_stride_size, uint64_t max_stride_size) : + max_length(length), + min_stride_size(min_stride_size), + max_stride_size(max_stride_size) {} + void get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override; + uint64_t get_length(const ContDesc &in) override { + RandWrap rand(in.seqnum); + if (max_length == 0) + return 0; + return (rand() % (max_length/2)) + ((max_length - 1)/2) + 1; + } +}; + +class AttrGenerator : public RandGenerator { + uint64_t max_len; + uint64_t big_max_len; +public: + AttrGenerator(uint64_t max_len, uint64_t big_max_len) + : max_len(max_len), big_max_len(big_max_len) {} + void get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override { + out.insert(std::pair<uint64_t, uint64_t>(0, get_length(cont))); + } + uint64_t get_length(const ContDesc &in) override { + RandWrap rand(in.seqnum); + // make some attrs big + if (in.seqnum & 3) + return (rand() % max_len); + else + return (rand() % big_max_len); + } + bufferlist gen_bl(const ContDesc &in) { + bufferlist bl; + for (iterator i = get_iterator(in); !i.end(); ++i) { + bl.append(*i); + } + ceph_assert(bl.length() < big_max_len); + return bl; + } +}; + +class AppendGenerator : public RandGenerator { + uint64_t off; + uint64_t alignment; + uint64_t min_append_size; + uint64_t max_append_size; + uint64_t max_append_total; + + uint64_t round_up(uint64_t in, uint64_t by) { + if (by) + in += (by - (in % by)); + return in; + } + +public: + AppendGenerator( + uint64_t off, + uint64_t alignment, + uint64_t min_append_size, + uint64_t _max_append_size, + uint64_t max_append_multiple) : + off(off), alignment(alignment), + min_append_size(round_up(min_append_size, alignment)), + max_append_size(round_up(_max_append_size, alignment)) { + if (_max_append_size == min_append_size) + max_append_size += alignment; + max_append_total = max_append_multiple * max_append_size; + } + uint64_t get_append_size(const ContDesc &in) { + RandWrap rand(in.seqnum); + return round_up(rand() % max_append_total, alignment); + } + uint64_t get_length(const ContDesc &in) override { + return off + get_append_size(in); + } + void get_ranges_map( + const ContDesc &cont, std::map<uint64_t, uint64_t> &out) override; +}; + +class ObjectDesc { +public: + ObjectDesc() + : exists(false), dirty(false), + version(0) {} + ObjectDesc(const ContDesc &init, ContentsGenerator *cont_gen) + : exists(false), dirty(false), + version(0) { + layers.push_front(std::pair<std::shared_ptr<ContentsGenerator>, ContDesc>(std::shared_ptr<ContentsGenerator>(cont_gen), init)); + } + + class iterator { + public: + uint64_t pos; + uint64_t size; + uint64_t cur_valid_till; + + class ContState { + interval_set<uint64_t> ranges; + const uint64_t size; + + public: + ContDesc cont; + std::shared_ptr<ContentsGenerator> gen; + ContentsGenerator::iterator iter; + + ContState( + const ContDesc &_cont, + std::shared_ptr<ContentsGenerator> _gen, + ContentsGenerator::iterator _iter) + : size(_gen->get_length(_cont)), cont(_cont), gen(_gen), iter(_iter) { + gen->get_ranges(cont, ranges); + } + + const interval_set<uint64_t> &get_ranges() { + return ranges; + } + + uint64_t get_size() { + return gen->get_length(cont); + } + + bool covers(uint64_t pos) { + return ranges.contains(pos) || (!ranges.starts_after(pos) && pos >= size); + } + + uint64_t next(uint64_t pos) { + ceph_assert(!covers(pos)); + return ranges.starts_after(pos) ? ranges.start_after(pos) : size; + } + + uint64_t valid_till(uint64_t pos) { + ceph_assert(covers(pos)); + return ranges.contains(pos) ? + ranges.end_after(pos) : + std::numeric_limits<uint64_t>::max(); + } + }; + // from latest to earliest + using layers_t = std::vector<ContState>; + layers_t layers; + + struct StackState { + const uint64_t next; + const uint64_t size; + }; + std::stack<std::pair<layers_t::iterator, StackState> > stack; + layers_t::iterator current; + + explicit iterator(ObjectDesc &obj) : + pos(0), + size(obj.layers.begin()->first->get_length(obj.layers.begin()->second)), + cur_valid_till(0) { + for (auto &&i : obj.layers) { + layers.push_back({i.second, i.first, i.first->get_iterator(i.second)}); + } + current = layers.begin(); + + adjust_stack(); + } + + void adjust_stack(); + iterator &operator++() { + ceph_assert(cur_valid_till >= pos); + ++pos; + if (pos >= cur_valid_till) { + adjust_stack(); + } + return *this; + } + + char operator*() { + if (current == layers.end()) { + return '\0'; + } else { + return pos >= size ? '\0' : *(current->iter); + } + } + + bool end() { + return pos >= size; + } + + // advance @c pos to given position + void seek(uint64_t _pos) { + if (_pos < pos) { + ceph_abort(); + } + while (pos < _pos) { + ceph_assert(cur_valid_till >= pos); + uint64_t next = std::min(_pos - pos, cur_valid_till - pos); + pos += next; + + if (pos >= cur_valid_till) { + ceph_assert(pos == cur_valid_till); + adjust_stack(); + } + } + ceph_assert(pos == _pos); + } + + // grab the bytes in the range of [pos, pos+s), and advance @c pos + // + // @returns the bytes in the specified range + bufferlist gen_bl_advance(uint64_t s) { + bufferlist ret; + while (s > 0) { + ceph_assert(cur_valid_till >= pos); + uint64_t next = std::min(s, cur_valid_till - pos); + if (current != layers.end() && pos < size) { + ret.append(current->iter.gen_bl_advance(next)); + } else { + ret.append_zero(next); + } + + pos += next; + ceph_assert(next <= s); + s -= next; + + if (pos >= cur_valid_till) { + ceph_assert(cur_valid_till == pos); + adjust_stack(); + } + } + return ret; + } + + // compare the range of [pos, pos+bl.length()) with given @c bl, and + // advance @pos if all bytes in the range match + // + // @param error_at the offset of the first byte which does not match + // @returns true if all bytes match, false otherwise + bool check_bl_advance(bufferlist &bl, uint64_t *error_at = nullptr) { + uint64_t off = 0; + while (off < bl.length()) { + ceph_assert(cur_valid_till >= pos); + uint64_t next = std::min(bl.length() - off, cur_valid_till - pos); + + bufferlist to_check; + to_check.substr_of(bl, off, next); + if (current != layers.end() && pos < size) { + if (!current->iter.check_bl_advance(to_check, error_at)) { + if (error_at) + *error_at += off; + return false; + } + } else { + uint64_t at = pos; + for (auto i = to_check.begin(); !i.end(); ++i, ++at) { + if (*i) { + if (error_at) + *error_at = at; + return false; + } + } + } + + pos += next; + off += next; + ceph_assert(off <= bl.length()); + + if (pos >= cur_valid_till) { + ceph_assert(cur_valid_till == pos); + adjust_stack(); + } + } + ceph_assert(off == bl.length()); + return true; + } + }; + + iterator begin() { + return iterator(*this); + } + + bool deleted() { + return !exists; + } + + bool has_contents() { + return layers.size(); + } + + // takes ownership of gen + void update(ContentsGenerator *gen, const ContDesc &next); + bool check(bufferlist &to_check); + bool check_sparse(const std::map<uint64_t, uint64_t>& extends, + bufferlist &to_check); + const ContDesc &most_recent(); + ContentsGenerator *most_recent_gen() { + return layers.begin()->first.get(); + } + std::map<std::string, ContDesc> attrs; // Both omap and xattrs + bufferlist header; + bool exists; + bool dirty; + + uint64_t version; + std::string redirect_target; + std::map<uint64_t, ChunkDesc> chunk_info; +private: + std::list<std::pair<std::shared_ptr<ContentsGenerator>, ContDesc> > layers; +}; + +#endif diff --git a/src/test/osd/RadosModel.cc b/src/test/osd/RadosModel.cc new file mode 100644 index 000000000..501bf3b13 --- /dev/null +++ b/src/test/osd/RadosModel.cc @@ -0,0 +1,36 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "include/interval_set.h" +#include "include/buffer.h" +#include <list> +#include <map> +#include <set> +#include "include/rados/librados.h" +#include "RadosModel.h" +#include "TestOpStat.h" + + +void TestOp::begin() +{ + _begin(); +} + +void TestOp::finish(TestOp::CallbackInfo *info) +{ + _finish(info); +} + +void read_callback(librados::completion_t comp, void *arg) { + TestOp* op = static_cast<TestOp*>(arg); + op->finish(NULL); +} + +void write_callback(librados::completion_t comp, void *arg) { + std::pair<TestOp*, TestOp::CallbackInfo*> *args = + static_cast<std::pair<TestOp*, TestOp::CallbackInfo*> *>(arg); + TestOp* op = args->first; + TestOp::CallbackInfo *info = args->second; + op->finish(info); + delete args; + delete info; +} diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h new file mode 100644 index 000000000..d5689f606 --- /dev/null +++ b/src/test/osd/RadosModel.h @@ -0,0 +1,3172 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "include/int_types.h" + +#include "common/ceph_mutex.h" +#include "include/rados/librados.hpp" + +#include <iostream> +#include <iterator> +#include <sstream> +#include <map> +#include <set> +#include <list> +#include <string> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <time.h> +#include "Object.h" +#include "TestOpStat.h" +#include "test/librados/test.h" +#include "common/sharedptr_registry.hpp" +#include "common/errno.h" +#include "osd/HitSet.h" + +#ifndef RADOSMODEL_H +#define RADOSMODEL_H + +using namespace std; + +class RadosTestContext; +class TestOpStat; + +template <typename T> +typename T::iterator rand_choose(T &cont) { + if (std::empty(cont)) { + return std::end(cont); + } + return std::next(std::begin(cont), rand() % cont.size()); +} + +enum TestOpType { + TEST_OP_READ, + TEST_OP_WRITE, + TEST_OP_WRITE_EXCL, + TEST_OP_WRITESAME, + TEST_OP_DELETE, + TEST_OP_SNAP_CREATE, + TEST_OP_SNAP_REMOVE, + TEST_OP_ROLLBACK, + TEST_OP_SETATTR, + TEST_OP_RMATTR, + TEST_OP_WATCH, + TEST_OP_COPY_FROM, + TEST_OP_HIT_SET_LIST, + TEST_OP_UNDIRTY, + TEST_OP_IS_DIRTY, + TEST_OP_CACHE_FLUSH, + TEST_OP_CACHE_TRY_FLUSH, + TEST_OP_CACHE_EVICT, + TEST_OP_APPEND, + TEST_OP_APPEND_EXCL, + TEST_OP_SET_REDIRECT, + TEST_OP_UNSET_REDIRECT, + TEST_OP_CHUNK_READ, + TEST_OP_TIER_PROMOTE, + TEST_OP_TIER_FLUSH +}; + +class TestWatchContext : public librados::WatchCtx2 { + TestWatchContext(const TestWatchContext&); +public: + ceph::condition_variable cond; + uint64_t handle = 0; + bool waiting = false; + ceph::mutex lock = ceph::make_mutex("watch lock"); + TestWatchContext() = default; + void handle_notify(uint64_t notify_id, uint64_t cookie, + uint64_t notifier_id, + bufferlist &bl) override { + std::lock_guard l{lock}; + waiting = false; + cond.notify_all(); + } + void handle_error(uint64_t cookie, int err) override { + std::lock_guard l{lock}; + cout << "watch handle_error " << err << std::endl; + } + void start() { + std::lock_guard l{lock}; + waiting = true; + } + void wait() { + std::unique_lock l{lock}; + cond.wait(l, [this] { return !waiting; }); + } + uint64_t &get_handle() { + return handle; + } +}; + +class TestOp { +public: + const int num; + RadosTestContext *context; + TestOpStat *stat; + bool done = false; + TestOp(int n, RadosTestContext *context, + TestOpStat *stat = 0) + : num(n), + context(context), + stat(stat) + {} + + virtual ~TestOp() {}; + + /** + * This struct holds data to be passed by a callback + * to a TestOp::finish method. + */ + struct CallbackInfo { + uint64_t id; + explicit CallbackInfo(uint64_t id) : id(id) {} + virtual ~CallbackInfo() {}; + }; + + virtual void _begin() = 0; + + /** + * Called when the operation completes. + * This should be overridden by asynchronous operations. + * + * @param info information stored by a callback, or NULL - + * useful for multi-operation TestOps + */ + virtual void _finish(CallbackInfo *info) + { + return; + } + virtual string getType() = 0; + virtual bool finished() + { + return true; + } + + void begin(); + void finish(CallbackInfo *info); + virtual bool must_quiesce_other_ops() { return false; } +}; + +class TestOpGenerator { +public: + virtual ~TestOpGenerator() {}; + virtual TestOp *next(RadosTestContext &context) = 0; +}; + +class RadosTestContext { +public: + ceph::mutex state_lock = ceph::make_mutex("Context Lock"); + ceph::condition_variable wait_cond; + // snap => {oid => desc} + map<int, map<string,ObjectDesc> > pool_obj_cont; + set<string> oid_in_use; + set<string> oid_not_in_use; + set<string> oid_flushing; + set<string> oid_not_flushing; + set<string> oid_redirect_not_in_use; + set<string> oid_redirect_in_use; + SharedPtrRegistry<int, int> snaps_in_use; + int current_snap; + string pool_name; + librados::IoCtx io_ctx; + librados::Rados rados; + int next_oid; + string prefix; + int errors; + int max_in_flight; + int seq_num; + map<int,uint64_t> snaps; + uint64_t seq; + const char *rados_id; + bool initialized; + map<string, TestWatchContext*> watches; + const uint64_t max_size; + const uint64_t min_stride_size; + const uint64_t max_stride_size; + AttrGenerator attr_gen; + const bool no_omap; + const bool no_sparse; + bool pool_snaps; + bool write_fadvise_dontneed; + string low_tier_pool_name; + librados::IoCtx low_tier_io_ctx; + int snapname_num; + map<string,string > redirect_objs; + bool enable_dedup; + + RadosTestContext(const string &pool_name, + int max_in_flight, + uint64_t max_size, + uint64_t min_stride_size, + uint64_t max_stride_size, + bool no_omap, + bool no_sparse, + bool pool_snaps, + bool write_fadvise_dontneed, + const string &low_tier_pool_name, + bool enable_dedup, + const char *id = 0) : + pool_obj_cont(), + current_snap(0), + pool_name(pool_name), + next_oid(0), + errors(0), + max_in_flight(max_in_flight), + seq_num(0), seq(0), + rados_id(id), initialized(false), + max_size(max_size), + min_stride_size(min_stride_size), max_stride_size(max_stride_size), + attr_gen(2000, 20000), + no_omap(no_omap), + no_sparse(no_sparse), + pool_snaps(pool_snaps), + write_fadvise_dontneed(write_fadvise_dontneed), + low_tier_pool_name(low_tier_pool_name), + snapname_num(0), + enable_dedup(enable_dedup) + { + } + + int init() + { + int r = rados.init(rados_id); + if (r < 0) + return r; + r = rados.conf_read_file(NULL); + if (r < 0) + return r; + r = rados.conf_parse_env(NULL); + if (r < 0) + return r; + r = rados.connect(); + if (r < 0) + return r; + r = rados.ioctx_create(pool_name.c_str(), io_ctx); + if (r < 0) { + rados.shutdown(); + return r; + } + if (!low_tier_pool_name.empty()) { + r = rados.ioctx_create(low_tier_pool_name.c_str(), low_tier_io_ctx); + if (r < 0) { + rados.shutdown(); + return r; + } + } + bufferlist inbl; + r = rados.mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool_name + + "\", \"var\": \"write_fadvise_dontneed\", \"val\": \"" + (write_fadvise_dontneed ? "true" : "false") + "\"}", + inbl, NULL, NULL); + if (r < 0) { + rados.shutdown(); + return r; + } + if (enable_dedup) { + r = rados.mon_command( + "{\"prefix\": \"osd pool set\", \"pool\": \"" + pool_name + + "\", \"var\": \"fingerprint_algorithm\", \"val\": \"" + "sha256" + "\"}", + inbl, NULL, NULL); + if (r < 0) { + rados.shutdown(); + return r; + } + } + + char hostname_cstr[100]; + gethostname(hostname_cstr, 100); + stringstream hostpid; + hostpid << hostname_cstr << getpid() << "-"; + prefix = hostpid.str(); + ceph_assert(!initialized); + initialized = true; + return 0; + } + + void shutdown() + { + if (initialized) { + rados.shutdown(); + } + } + + void loop(TestOpGenerator *gen) + { + ceph_assert(initialized); + list<TestOp*> inflight; + std::unique_lock state_locker{state_lock}; + + TestOp *next = gen->next(*this); + TestOp *waiting = NULL; + + while (next || !inflight.empty()) { + if (next && next->must_quiesce_other_ops() && !inflight.empty()) { + waiting = next; + next = NULL; // Force to wait for inflight to drain + } + if (next) { + inflight.push_back(next); + } + state_lock.unlock(); + if (next) { + (*inflight.rbegin())->begin(); + } + state_lock.lock(); + while (1) { + for (list<TestOp*>::iterator i = inflight.begin(); + i != inflight.end();) { + if ((*i)->finished()) { + cout << (*i)->num << ": done (" << (inflight.size()-1) << " left)" << std::endl; + delete *i; + inflight.erase(i++); + } else { + ++i; + } + } + + if (inflight.size() >= (unsigned) max_in_flight || (!next && !inflight.empty())) { + cout << " waiting on " << inflight.size() << std::endl; + wait_cond.wait(state_locker); + } else { + break; + } + } + if (waiting) { + next = waiting; + waiting = NULL; + } else { + next = gen->next(*this); + } + } + } + + void kick() + { + wait_cond.notify_all(); + } + + TestWatchContext *get_watch_context(const string &oid) { + return watches.count(oid) ? watches[oid] : 0; + } + + TestWatchContext *watch(const string &oid) { + ceph_assert(!watches.count(oid)); + return (watches[oid] = new TestWatchContext); + } + + void unwatch(const string &oid) { + ceph_assert(watches.count(oid)); + delete watches[oid]; + watches.erase(oid); + } + + ObjectDesc get_most_recent(const string &oid) { + ObjectDesc new_obj; + for (map<int, map<string,ObjectDesc> >::reverse_iterator i = + pool_obj_cont.rbegin(); + i != pool_obj_cont.rend(); + ++i) { + map<string,ObjectDesc>::iterator j = i->second.find(oid); + if (j != i->second.end()) { + new_obj = j->second; + break; + } + } + return new_obj; + } + + void rm_object_attrs(const string &oid, const set<string> &attrs) + { + ObjectDesc new_obj = get_most_recent(oid); + for (set<string>::const_iterator i = attrs.begin(); + i != attrs.end(); + ++i) { + new_obj.attrs.erase(*i); + } + new_obj.dirty = true; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void remove_object_header(const string &oid) + { + ObjectDesc new_obj = get_most_recent(oid); + new_obj.header = bufferlist(); + new_obj.dirty = true; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + + void update_object_header(const string &oid, const bufferlist &bl) + { + ObjectDesc new_obj = get_most_recent(oid); + new_obj.header = bl; + new_obj.exists = true; + new_obj.dirty = true; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void update_object_attrs(const string &oid, const map<string, ContDesc> &attrs) + { + ObjectDesc new_obj = get_most_recent(oid); + for (map<string, ContDesc>::const_iterator i = attrs.begin(); + i != attrs.end(); + ++i) { + new_obj.attrs[i->first] = i->second; + } + new_obj.exists = true; + new_obj.dirty = true; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void update_object(ContentsGenerator *cont_gen, + const string &oid, const ContDesc &contents) + { + ObjectDesc new_obj = get_most_recent(oid); + new_obj.exists = true; + new_obj.dirty = true; + new_obj.update(cont_gen, + contents); + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void update_object_full(const string &oid, const ObjectDesc &contents) + { + pool_obj_cont[current_snap].insert_or_assign(oid, contents); + pool_obj_cont[current_snap][oid].dirty = true; + } + + void update_object_undirty(const string &oid) + { + ObjectDesc new_obj = get_most_recent(oid); + new_obj.dirty = false; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + void update_object_version(const string &oid, uint64_t version, + int snap = -1) + { + for (map<int, map<string,ObjectDesc> >::reverse_iterator i = + pool_obj_cont.rbegin(); + i != pool_obj_cont.rend(); + ++i) { + if (snap != -1 && snap < i->first) + continue; + map<string,ObjectDesc>::iterator j = i->second.find(oid); + if (j != i->second.end()) { + if (version) + j->second.version = version; + cout << __func__ << " oid " << oid + << " v " << version << " " << j->second.most_recent() + << " " << (j->second.dirty ? "dirty" : "clean") + << " " << (j->second.exists ? "exists" : "dne") + << std::endl; + break; + } + } + } + + void remove_object(const string &oid) + { + ceph_assert(!get_watch_context(oid)); + ObjectDesc new_obj; + pool_obj_cont[current_snap].insert_or_assign(oid, new_obj); + } + + bool find_object(const string &oid, ObjectDesc *contents, int snap = -1) const + { + for (map<int, map<string,ObjectDesc> >::const_reverse_iterator i = + pool_obj_cont.rbegin(); + i != pool_obj_cont.rend(); + ++i) { + if (snap != -1 && snap < i->first) continue; + if (i->second.count(oid) != 0) { + *contents = i->second.find(oid)->second; + return true; + } + } + return false; + } + + void update_object_redirect_target(const string &oid, const string &target) + { + redirect_objs[oid] = target; + } + + void update_object_chunk_target(const string &oid, uint64_t offset, const ChunkDesc &info) + { + for (map<int, map<string,ObjectDesc> >::const_reverse_iterator i = + pool_obj_cont.rbegin(); + i != pool_obj_cont.rend(); + ++i) { + if (i->second.count(oid) != 0) { + ObjectDesc obj_desc = i->second.find(oid)->second; + obj_desc.chunk_info[offset] = info; + update_object_full(oid, obj_desc); + return ; + } + } + return; + } + + bool object_existed_at(const string &oid, int snap = -1) const + { + ObjectDesc contents; + bool found = find_object(oid, &contents, snap); + return found && contents.exists; + } + + void remove_snap(int snap) + { + map<int, map<string,ObjectDesc> >::iterator next_iter = pool_obj_cont.find(snap); + ceph_assert(next_iter != pool_obj_cont.end()); + map<int, map<string,ObjectDesc> >::iterator current_iter = next_iter++; + ceph_assert(current_iter != pool_obj_cont.end()); + map<string,ObjectDesc> ¤t = current_iter->second; + map<string,ObjectDesc> &next = next_iter->second; + for (map<string,ObjectDesc>::iterator i = current.begin(); + i != current.end(); + ++i) { + if (next.count(i->first) == 0) { + next.insert(pair<string,ObjectDesc>(i->first, i->second)); + } + } + pool_obj_cont.erase(current_iter); + snaps.erase(snap); + } + + void add_snap(uint64_t snap) + { + snaps[current_snap] = snap; + current_snap++; + pool_obj_cont[current_snap]; + seq = snap; + } + + void roll_back(const string &oid, int snap) + { + ceph_assert(!get_watch_context(oid)); + ObjectDesc contents; + find_object(oid, &contents, snap); + contents.dirty = true; + pool_obj_cont.rbegin()->second.insert_or_assign(oid, contents); + } +}; + +void read_callback(librados::completion_t comp, void *arg); +void write_callback(librados::completion_t comp, void *arg); + +/// remove random xattrs from given object, and optionally remove omap +/// entries if @c no_omap is not specified in context +class RemoveAttrsOp : public TestOp { +public: + string oid; + librados::ObjectWriteOperation op; + librados::AioCompletion *comp; + RemoveAttrsOp(int n, RadosTestContext *context, + const string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), oid(oid), comp(NULL) + {} + + void _begin() override + { + ContDesc cont; + set<string> to_remove; + { + std::lock_guard l{context->state_lock}; + ObjectDesc obj; + if (!context->find_object(oid, &obj)) { + context->kick(); + done = true; + return; + } + cont = ContDesc(context->seq_num, context->current_snap, + context->seq_num, ""); + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + if (rand() % 30) { + ContentsGenerator::iterator iter = context->attr_gen.get_iterator(cont); + for (map<string, ContDesc>::iterator i = obj.attrs.begin(); + i != obj.attrs.end(); + ++i, ++iter) { + if (!(*iter % 3)) { + to_remove.insert(i->first); + op.rmxattr(i->first.c_str()); + } + } + if (to_remove.empty()) { + context->kick(); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + done = true; + return; + } + if (!context->no_omap) { + op.omap_rm_keys(to_remove); + } + } else { + if (!context->no_omap) { + op.omap_clear(); + } + for (map<string, ContDesc>::iterator i = obj.attrs.begin(); + i != obj.attrs.end(); + ++i) { + op.rmxattr(i->first.c_str()); + to_remove.insert(i->first); + } + context->remove_object_header(oid); + } + context->rm_object_attrs(oid, to_remove); + } + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + done = true; + context->update_object_version(oid, comp->get_version64()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "RemoveAttrsOp"; + } +}; + +/// add random xattrs to given object, and optionally add omap +/// entries if @c no_omap is not specified in context +class SetAttrsOp : public TestOp { +public: + string oid; + librados::ObjectWriteOperation op; + librados::AioCompletion *comp; + SetAttrsOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + oid(oid), comp(NULL) + {} + + void _begin() override + { + ContDesc cont; + { + std::lock_guard l{context->state_lock}; + cont = ContDesc(context->seq_num, context->current_snap, + context->seq_num, ""); + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + } + + map<string, bufferlist> omap_contents; + map<string, ContDesc> omap; + bufferlist header; + ContentsGenerator::iterator keygen = context->attr_gen.get_iterator(cont); + op.create(false); + while (!*keygen) ++keygen; + while (*keygen) { + if (*keygen != '_') + header.append(*keygen); + ++keygen; + } + for (int i = 0; i < 20; ++i) { + string key; + while (!*keygen) ++keygen; + while (*keygen && key.size() < 40) { + key.push_back((*keygen % 20) + 'a'); + ++keygen; + } + ContDesc val(cont); + val.seqnum += (unsigned)(*keygen); + val.prefix = ("oid: " + oid); + omap[key] = val; + bufferlist val_buffer = context->attr_gen.gen_bl(val); + omap_contents[key] = val_buffer; + op.setxattr(key.c_str(), val_buffer); + } + if (!context->no_omap) { + op.omap_set_header(header); + op.omap_set(omap_contents); + } + + { + std::lock_guard l{context->state_lock}; + context->update_object_header(oid, header); + context->update_object_attrs(oid, omap); + } + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + int r; + if ((r = comp->get_return_value())) { + cerr << "err " << r << std::endl; + ceph_abort(); + } + done = true; + context->update_object_version(oid, comp->get_version64()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "SetAttrsOp"; + } +}; + +class WriteOp : public TestOp { +public: + const string oid; + ContDesc cont; + set<librados::AioCompletion *> waiting; + librados::AioCompletion *rcompletion = nullptr; + // numbers of async ops submitted + uint64_t waiting_on = 0; + uint64_t last_acked_tid = 0; + + librados::ObjectReadOperation read_op; + librados::ObjectWriteOperation write_op; + bufferlist rbuffer; + + const bool do_append; + const bool do_excl; + + WriteOp(int n, + RadosTestContext *context, + const string &oid, + bool do_append, + bool do_excl, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), + do_append(do_append), + do_excl(do_excl) + {} + + void _begin() override + { + assert(!done); + stringstream acc; + std::lock_guard state_locker{context->state_lock}; + acc << context->prefix << "OID: " << oid << " snap " << context->current_snap << std::endl; + string prefix = acc.str(); + + cont = ContDesc(context->seq_num, context->current_snap, context->seq_num, prefix); + + ContentsGenerator *cont_gen; + if (do_append) { + ObjectDesc old_value; + bool found = context->find_object(oid, &old_value); + uint64_t prev_length = found && old_value.has_contents() ? + old_value.most_recent_gen()->get_length(old_value.most_recent()) : + 0; + bool requires_alignment; + int r = context->io_ctx.pool_requires_alignment2(&requires_alignment); + ceph_assert(r == 0); + uint64_t alignment = 0; + if (requires_alignment) { + r = context->io_ctx.pool_required_alignment2(&alignment); + ceph_assert(r == 0); + ceph_assert(alignment != 0); + } + cont_gen = new AppendGenerator( + prev_length, + alignment, + context->min_stride_size, + context->max_stride_size, + 3); + } else { + cont_gen = new VarLenGenerator( + context->max_size, context->min_stride_size, context->max_stride_size); + } + context->update_object(cont_gen, oid, cont); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + map<uint64_t, uint64_t> ranges; + + cont_gen->get_ranges_map(cont, ranges); + std::cout << num << ": seq_num " << context->seq_num << " ranges " << ranges << std::endl; + context->seq_num++; + + waiting_on = ranges.size(); + ContentsGenerator::iterator gen_pos = cont_gen->get_iterator(cont); + // assure that tid is greater than last_acked_tid + uint64_t tid = last_acked_tid + 1; + for (auto [offset, len] : ranges) { + gen_pos.seek(offset); + bufferlist to_write = gen_pos.gen_bl_advance(len); + ceph_assert(to_write.length() == len); + ceph_assert(to_write.length() > 0); + std::cout << num << ": writing " << context->prefix+oid + << " from " << offset + << " to " << len + offset << " tid " << tid << std::endl; + auto cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(tid++)); + librados::AioCompletion *completion = + context->rados.aio_create_completion((void*) cb_arg, &write_callback); + waiting.insert(completion); + librados::ObjectWriteOperation op; + if (do_append) { + op.append(to_write); + } else { + op.write(offset, to_write); + } + if (do_excl && cb_arg->second->id == last_acked_tid + 1) + op.assert_exists(); + context->io_ctx.aio_operate( + context->prefix+oid, completion, + &op); + } + + bufferlist contbl; + encode(cont, contbl); + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>( + this, + new TestOp::CallbackInfo(tid++)); + librados::AioCompletion *completion = context->rados.aio_create_completion( + (void*) cb_arg, &write_callback); + waiting.insert(completion); + waiting_on++; + write_op.setxattr("_header", contbl); + if (!do_append) { + write_op.truncate(cont_gen->get_length(cont)); + } + context->io_ctx.aio_operate( + context->prefix+oid, completion, &write_op); + + cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>( + this, + new TestOp::CallbackInfo(tid++)); + rcompletion = context->rados.aio_create_completion( + (void*) cb_arg, &write_callback); + waiting_on++; + read_op.read(0, 1, &rbuffer, 0); + context->io_ctx.aio_operate( + context->prefix+oid, rcompletion, + &read_op, + librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update + 0); + } + + void _finish(CallbackInfo *info) override + { + ceph_assert(info); + std::lock_guard state_locker{context->state_lock}; + uint64_t tid = info->id; + + cout << num << ": finishing write tid " << tid << " to " << context->prefix + oid << std::endl; + + if (tid <= last_acked_tid) { + cerr << "Error: finished tid " << tid + << " when last_acked_tid was " << last_acked_tid << std::endl; + ceph_abort(); + } + last_acked_tid = tid; + + ceph_assert(!done); + waiting_on--; + if (waiting_on == 0) { + uint64_t version = 0; + for (set<librados::AioCompletion *>::iterator i = waiting.begin(); + i != waiting.end(); + ) { + ceph_assert((*i)->is_complete()); + if (int err = (*i)->get_return_value()) { + cerr << "Error: oid " << oid << " write returned error code " + << err << std::endl; + } + if ((*i)->get_version64() > version) + version = (*i)->get_version64(); + (*i)->release(); + waiting.erase(i++); + } + + context->update_object_version(oid, version); + if (rcompletion->get_version64() != version) { + cerr << "Error: racing read on " << oid << " returned version " + << rcompletion->get_version64() << " rather than version " + << version << std::endl; + ceph_abort_msg("racing read got wrong version"); + } + + { + ObjectDesc old_value; + ceph_assert(context->find_object(oid, &old_value, -1)); + if (old_value.deleted()) + std::cout << num << ": left oid " << oid << " deleted" << std::endl; + else + std::cout << num << ": left oid " << oid << " " + << old_value.most_recent() << std::endl; + } + + rcompletion->release(); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + } + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "WriteOp"; + } +}; + +class WriteSameOp : public TestOp { +public: + string oid; + ContDesc cont; + set<librados::AioCompletion *> waiting; + librados::AioCompletion *rcompletion; + uint64_t waiting_on; + uint64_t last_acked_tid; + + librados::ObjectReadOperation read_op; + librados::ObjectWriteOperation write_op; + bufferlist rbuffer; + + WriteSameOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), rcompletion(NULL), waiting_on(0), + last_acked_tid(0) + {} + + void _begin() override + { + std::lock_guard state_locker{context->state_lock}; + done = 0; + stringstream acc; + acc << context->prefix << "OID: " << oid << " snap " << context->current_snap << std::endl; + string prefix = acc.str(); + + cont = ContDesc(context->seq_num, context->current_snap, context->seq_num, prefix); + + ContentsGenerator *cont_gen; + cont_gen = new VarLenGenerator( + context->max_size, context->min_stride_size, context->max_stride_size); + context->update_object(cont_gen, oid, cont); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + map<uint64_t, uint64_t> ranges; + + cont_gen->get_ranges_map(cont, ranges); + std::cout << num << ": seq_num " << context->seq_num << " ranges " << ranges << std::endl; + context->seq_num++; + + waiting_on = ranges.size(); + ContentsGenerator::iterator gen_pos = cont_gen->get_iterator(cont); + // assure that tid is greater than last_acked_tid + uint64_t tid = last_acked_tid + 1; + for (auto [offset, len] : ranges) { + gen_pos.seek(offset); + bufferlist to_write = gen_pos.gen_bl_advance(len); + ceph_assert(to_write.length() == len); + ceph_assert(to_write.length() > 0); + std::cout << num << ": writing " << context->prefix+oid + << " from " << offset + << " to " << offset + len << " tid " << tid << std::endl; + auto cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(tid++)); + librados::AioCompletion *completion = + context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + waiting.insert(completion); + librados::ObjectWriteOperation op; + /* no writesame multiplication factor for now */ + op.writesame(offset, to_write.length(), to_write); + + context->io_ctx.aio_operate( + context->prefix+oid, completion, + &op); + } + + bufferlist contbl; + encode(cont, contbl); + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>( + this, + new TestOp::CallbackInfo(tid++)); + librados::AioCompletion *completion = context->rados.aio_create_completion( + (void*) cb_arg, &write_callback); + waiting.insert(completion); + waiting_on++; + write_op.setxattr("_header", contbl); + write_op.truncate(cont_gen->get_length(cont)); + context->io_ctx.aio_operate( + context->prefix+oid, completion, &write_op); + + cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>( + this, + new TestOp::CallbackInfo(tid++)); + rcompletion = context->rados.aio_create_completion( + (void*) cb_arg, &write_callback); + waiting_on++; + read_op.read(0, 1, &rbuffer, 0); + context->io_ctx.aio_operate( + context->prefix+oid, rcompletion, + &read_op, + librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update + 0); + } + + void _finish(CallbackInfo *info) override + { + ceph_assert(info); + std::lock_guard state_locker{context->state_lock}; + uint64_t tid = info->id; + + cout << num << ": finishing writesame tid " << tid << " to " << context->prefix + oid << std::endl; + + if (tid <= last_acked_tid) { + cerr << "Error: finished tid " << tid + << " when last_acked_tid was " << last_acked_tid << std::endl; + ceph_abort(); + } + last_acked_tid = tid; + + ceph_assert(!done); + waiting_on--; + if (waiting_on == 0) { + uint64_t version = 0; + for (set<librados::AioCompletion *>::iterator i = waiting.begin(); + i != waiting.end(); + ) { + ceph_assert((*i)->is_complete()); + if (int err = (*i)->get_return_value()) { + cerr << "Error: oid " << oid << " writesame returned error code " + << err << std::endl; + } + if ((*i)->get_version64() > version) + version = (*i)->get_version64(); + (*i)->release(); + waiting.erase(i++); + } + + context->update_object_version(oid, version); + ceph_assert(rcompletion->is_complete()); + ceph_assert(rcompletion->get_return_value() == 1); + if (rcompletion->get_version64() != version) { + cerr << "Error: racing read on " << oid << " returned version " + << rcompletion->get_version64() << " rather than version " + << version << std::endl; + ceph_abort_msg("racing read got wrong version"); + } + rcompletion->release(); + + { + ObjectDesc old_value; + ceph_assert(context->find_object(oid, &old_value, -1)); + if (old_value.deleted()) + std::cout << num << ": left oid " << oid << " deleted" << std::endl; + else + std::cout << num << ": left oid " << oid << " " + << old_value.most_recent() << std::endl; + } + + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + } + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "WriteSameOp"; + } +}; + +class DeleteOp : public TestOp { +public: + string oid; + + DeleteOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), oid(oid) + {} + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + if (context->get_watch_context(oid)) { + context->kick(); + return; + } + + ObjectDesc contents; + context->find_object(oid, &contents); + bool present = !contents.deleted(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->seq_num++; + + context->remove_object(oid); + + interval_set<uint64_t> ranges; + state_locker.unlock(); + + int r = 0; + if (rand() % 2) { + librados::ObjectWriteOperation op; + op.assert_exists(); + op.remove(); + r = context->io_ctx.operate(context->prefix+oid, &op); + } else { + r = context->io_ctx.remove(context->prefix+oid); + } + if (r && !(r == -ENOENT && !present)) { + cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl; + ceph_abort(); + } + + state_locker.lock(); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + + string getType() override + { + return "DeleteOp"; + } +}; + +class ReadOp : public TestOp { +public: + vector<librados::AioCompletion *> completions; + librados::ObjectReadOperation op; + string oid; + ObjectDesc old_value; + int snap; + bool balance_reads; + bool localize_reads; + + std::shared_ptr<int> in_use; + + vector<bufferlist> results; + vector<int> retvals; + vector<std::map<uint64_t, uint64_t>> extent_results; + vector<bool> is_sparse_read; + uint64_t waiting_on; + + vector<bufferlist> checksums; + vector<int> checksum_retvals; + + map<string, bufferlist> attrs; + int attrretval; + + set<string> omap_requested_keys; + map<string, bufferlist> omap_returned_values; + set<string> omap_keys; + map<string, bufferlist> omap; + bufferlist header; + + map<string, bufferlist> xattrs; + ReadOp(int n, + RadosTestContext *context, + const string &oid, + bool balance_reads, + bool localize_reads, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + completions(3), + oid(oid), + snap(0), + balance_reads(balance_reads), + localize_reads(localize_reads), + results(3), + retvals(3), + extent_results(3), + is_sparse_read(3, false), + waiting_on(0), + checksums(3), + checksum_retvals(3), + attrretval(0) + {} + + void _do_read(librados::ObjectReadOperation& read_op, int index) { + uint64_t len = 0; + if (old_value.has_contents()) + len = old_value.most_recent_gen()->get_length(old_value.most_recent()); + if (context->no_sparse || rand() % 2) { + is_sparse_read[index] = false; + read_op.read(0, + len, + &results[index], + &retvals[index]); + bufferlist init_value_bl; + encode(static_cast<uint32_t>(-1), init_value_bl); + read_op.checksum(LIBRADOS_CHECKSUM_TYPE_CRC32C, init_value_bl, 0, len, + 0, &checksums[index], &checksum_retvals[index]); + } else { + is_sparse_read[index] = true; + read_op.sparse_read(0, + len, + &extent_results[index], + &results[index], + &retvals[index]); + } + } + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + std::cout << num << ": read oid " << oid << " snap " << snap << std::endl; + done = 0; + for (uint32_t i = 0; i < 3; i++) { + completions[i] = context->rados.aio_create_completion((void *) this, &read_callback); + } + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + ceph_assert(context->find_object(oid, &old_value, snap)); + if (old_value.deleted()) + std::cout << num << ": expect deleted" << std::endl; + else + std::cout << num << ": expect " << old_value.most_recent() << std::endl; + + TestWatchContext *ctx = context->get_watch_context(oid); + state_locker.unlock(); + if (ctx) { + ceph_assert(old_value.exists); + TestAlarm alarm; + std::cerr << num << ": about to start" << std::endl; + ctx->start(); + std::cerr << num << ": started" << std::endl; + bufferlist bl; + context->io_ctx.set_notify_timeout(600); + int r = context->io_ctx.notify2(context->prefix+oid, bl, 0, NULL); + if (r < 0) { + std::cerr << "r is " << r << std::endl; + ceph_abort(); + } + std::cerr << num << ": notified, waiting" << std::endl; + ctx->wait(); + } + state_locker.lock(); + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + _do_read(op, 0); + for (map<string, ContDesc>::iterator i = old_value.attrs.begin(); + i != old_value.attrs.end(); + ++i) { + if (rand() % 2) { + string key = i->first; + if (rand() % 2) + key.push_back((rand() % 26) + 'a'); + omap_requested_keys.insert(key); + } + } + if (!context->no_omap) { + op.omap_get_vals_by_keys(omap_requested_keys, &omap_returned_values, 0); + // NOTE: we're ignore pmore here, which assumes the OSD limit is high + // enough for us. + op.omap_get_keys2("", -1, &omap_keys, nullptr, nullptr); + op.omap_get_vals2("", -1, &omap, nullptr, nullptr); + op.omap_get_header(&header, 0); + } + op.getxattrs(&xattrs, 0); + + unsigned flags = 0; + if (balance_reads) + flags |= librados::OPERATION_BALANCE_READS; + if (localize_reads) + flags |= librados::OPERATION_LOCALIZE_READS; + + ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[0], &op, + flags, NULL)); + waiting_on++; + + // send 2 pipelined reads on the same object/snap. This can help testing + // OSD's read behavior in some scenarios + for (uint32_t i = 1; i < 3; ++i) { + librados::ObjectReadOperation pipeline_op; + _do_read(pipeline_op, i); + ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[i], &pipeline_op, 0)); + waiting_on++; + } + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::unique_lock state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(waiting_on > 0); + if (--waiting_on) { + return; + } + + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + int retval = completions[0]->get_return_value(); + for (vector<librados::AioCompletion *>::iterator it = completions.begin(); + it != completions.end(); ++it) { + ceph_assert((*it)->is_complete()); + uint64_t version = (*it)->get_version64(); + int err = (*it)->get_return_value(); + if (err != retval) { + cerr << num << ": Error: oid " << oid << " read returned different error codes: " + << retval << " and " << err << std::endl; + ceph_abort(); + } + if (err) { + if (!(err == -ENOENT && old_value.deleted())) { + cerr << num << ": Error: oid " << oid << " read returned error code " + << err << std::endl; + ceph_abort(); + } + } else if (version != old_value.version) { + cerr << num << ": oid " << oid << " version is " << version + << " and expected " << old_value.version << std::endl; + ceph_assert(version == old_value.version); + } + } + if (!retval) { + map<string, bufferlist>::iterator iter = xattrs.find("_header"); + bufferlist headerbl; + if (iter == xattrs.end()) { + if (old_value.has_contents()) { + cerr << num << ": Error: did not find header attr, has_contents: " + << old_value.has_contents() + << std::endl; + ceph_assert(!old_value.has_contents()); + } + } else { + headerbl = iter->second; + xattrs.erase(iter); + } + if (old_value.deleted()) { + std::cout << num << ": expect deleted" << std::endl; + ceph_abort_msg("expected deleted"); + } else { + std::cout << num << ": expect " << old_value.most_recent() << std::endl; + } + if (old_value.has_contents()) { + ContDesc to_check; + auto p = headerbl.cbegin(); + decode(to_check, p); + if (to_check != old_value.most_recent()) { + cerr << num << ": oid " << oid << " found incorrect object contents " << to_check + << ", expected " << old_value.most_recent() << std::endl; + context->errors++; + } + for (unsigned i = 0; i < results.size(); i++) { + if (is_sparse_read[i]) { + if (!old_value.check_sparse(extent_results[i], results[i])) { + cerr << num << ": oid " << oid << " contents " << to_check << " corrupt" << std::endl; + context->errors++; + } + } else { + if (!old_value.check(results[i])) { + cerr << num << ": oid " << oid << " contents " << to_check << " corrupt" << std::endl; + context->errors++; + } + + uint32_t checksum = 0; + if (checksum_retvals[i] == 0) { + try { + auto bl_it = checksums[i].cbegin(); + uint32_t csum_count; + decode(csum_count, bl_it); + decode(checksum, bl_it); + } catch (const buffer::error &err) { + checksum_retvals[i] = -EBADMSG; + } + } + if (checksum_retvals[i] != 0 || checksum != results[i].crc32c(-1)) { + cerr << num << ": oid " << oid << " checksum " << checksums[i] + << " incorrect, expecting " << results[i].crc32c(-1) + << std::endl; + context->errors++; + } + } + } + if (context->errors) ceph_abort(); + } + + // Attributes + if (!context->no_omap) { + if (!(old_value.header == header)) { + cerr << num << ": oid " << oid << " header does not match, old size: " + << old_value.header.length() << " new size " << header.length() + << std::endl; + ceph_assert(old_value.header == header); + } + if (omap.size() != old_value.attrs.size()) { + cerr << num << ": oid " << oid << " omap.size() is " << omap.size() + << " and old is " << old_value.attrs.size() << std::endl; + ceph_assert(omap.size() == old_value.attrs.size()); + } + if (omap_keys.size() != old_value.attrs.size()) { + cerr << num << ": oid " << oid << " omap.size() is " << omap_keys.size() + << " and old is " << old_value.attrs.size() << std::endl; + ceph_assert(omap_keys.size() == old_value.attrs.size()); + } + } + if (xattrs.size() != old_value.attrs.size()) { + cerr << num << ": oid " << oid << " xattrs.size() is " << xattrs.size() + << " and old is " << old_value.attrs.size() << std::endl; + ceph_assert(xattrs.size() == old_value.attrs.size()); + } + for (map<string, ContDesc>::iterator iter = old_value.attrs.begin(); + iter != old_value.attrs.end(); + ++iter) { + bufferlist bl = context->attr_gen.gen_bl( + iter->second); + if (!context->no_omap) { + map<string, bufferlist>::iterator omap_iter = omap.find(iter->first); + ceph_assert(omap_iter != omap.end()); + ceph_assert(bl.length() == omap_iter->second.length()); + bufferlist::iterator k = bl.begin(); + for(bufferlist::iterator l = omap_iter->second.begin(); + !k.end() && !l.end(); + ++k, ++l) { + ceph_assert(*l == *k); + } + } + map<string, bufferlist>::iterator xattr_iter = xattrs.find(iter->first); + ceph_assert(xattr_iter != xattrs.end()); + ceph_assert(bl.length() == xattr_iter->second.length()); + bufferlist::iterator k = bl.begin(); + for (bufferlist::iterator j = xattr_iter->second.begin(); + !k.end() && !j.end(); + ++j, ++k) { + ceph_assert(*j == *k); + } + } + if (!context->no_omap) { + for (set<string>::iterator i = omap_requested_keys.begin(); + i != omap_requested_keys.end(); + ++i) { + if (!omap_returned_values.count(*i)) + ceph_assert(!old_value.attrs.count(*i)); + if (!old_value.attrs.count(*i)) + ceph_assert(!omap_returned_values.count(*i)); + } + for (map<string, bufferlist>::iterator i = omap_returned_values.begin(); + i != omap_returned_values.end(); + ++i) { + ceph_assert(omap_requested_keys.count(i->first)); + ceph_assert(omap.count(i->first)); + ceph_assert(old_value.attrs.count(i->first)); + ceph_assert(i->second == omap[i->first]); + } + } + } + for (vector<librados::AioCompletion *>::iterator it = completions.begin(); + it != completions.end(); ++it) { + (*it)->release(); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "ReadOp"; + } +}; + +class SnapCreateOp : public TestOp { +public: + SnapCreateOp(int n, + RadosTestContext *context, + TestOpStat *stat = 0) + : TestOp(n, context, stat) + {} + + void _begin() override + { + uint64_t snap; + string snapname; + + if (context->pool_snaps) { + stringstream ss; + + ss << context->prefix << "snap" << ++context->snapname_num; + snapname = ss.str(); + + int ret = context->io_ctx.snap_create(snapname.c_str()); + if (ret) { + cerr << "snap_create returned " << ret << std::endl; + ceph_abort(); + } + ceph_assert(!context->io_ctx.snap_lookup(snapname.c_str(), &snap)); + + } else { + ceph_assert(!context->io_ctx.selfmanaged_snap_create(&snap)); + } + + std::unique_lock state_locker{context->state_lock}; + context->add_snap(snap); + + if (!context->pool_snaps) { + vector<uint64_t> snapset(context->snaps.size()); + + int j = 0; + for (map<int,uint64_t>::reverse_iterator i = context->snaps.rbegin(); + i != context->snaps.rend(); + ++i, ++j) { + snapset[j] = i->second; + } + + state_locker.unlock(); + + int r = context->io_ctx.selfmanaged_snap_set_write_ctx(context->seq, snapset); + if (r) { + cerr << "r is " << r << " snapset is " << snapset << " seq is " << context->seq << std::endl; + ceph_abort(); + } + } + } + + string getType() override + { + return "SnapCreateOp"; + } + bool must_quiesce_other_ops() override { return context->pool_snaps; } +}; + +class SnapRemoveOp : public TestOp { +public: + int to_remove; + SnapRemoveOp(int n, RadosTestContext *context, + int snap, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + to_remove(snap) + {} + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + uint64_t snap = context->snaps[to_remove]; + context->remove_snap(to_remove); + + if (context->pool_snaps) { + string snapname; + + ceph_assert(!context->io_ctx.snap_get_name(snap, &snapname)); + ceph_assert(!context->io_ctx.snap_remove(snapname.c_str())); + } else { + ceph_assert(!context->io_ctx.selfmanaged_snap_remove(snap)); + + vector<uint64_t> snapset(context->snaps.size()); + int j = 0; + for (map<int,uint64_t>::reverse_iterator i = context->snaps.rbegin(); + i != context->snaps.rend(); + ++i, ++j) { + snapset[j] = i->second; + } + + int r = context->io_ctx.selfmanaged_snap_set_write_ctx(context->seq, snapset); + if (r) { + cerr << "r is " << r << " snapset is " << snapset << " seq is " << context->seq << std::endl; + ceph_abort(); + } + } + } + + string getType() override + { + return "SnapRemoveOp"; + } +}; + +class WatchOp : public TestOp { + string oid; +public: + WatchOp(int n, + RadosTestContext *context, + const string &_oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(_oid) + {} + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + ObjectDesc contents; + context->find_object(oid, &contents); + if (contents.deleted()) { + context->kick(); + return; + } + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + TestWatchContext *ctx = context->get_watch_context(oid); + state_locker.unlock(); + int r; + if (!ctx) { + { + std::lock_guard l{context->state_lock}; + ctx = context->watch(oid); + } + + r = context->io_ctx.watch2(context->prefix+oid, + &ctx->get_handle(), + ctx); + } else { + r = context->io_ctx.unwatch2(ctx->get_handle()); + { + std::lock_guard l{context->state_lock}; + context->unwatch(oid); + } + } + + if (r) { + cerr << "r is " << r << std::endl; + ceph_abort(); + } + + { + std::lock_guard l{context->state_lock}; + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + } + } + + string getType() override + { + return "WatchOp"; + } +}; + +class RollbackOp : public TestOp { +public: + string oid; + int roll_back_to; + librados::ObjectWriteOperation zero_write_op1; + librados::ObjectWriteOperation zero_write_op2; + librados::ObjectWriteOperation op; + vector<librados::AioCompletion *> comps; + std::shared_ptr<int> in_use; + int last_finished; + int outstanding; + + RollbackOp(int n, + RadosTestContext *context, + const string &_oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(_oid), roll_back_to(-1), + comps(3, NULL), + last_finished(-1), outstanding(3) + {} + + void _begin() override + { + context->state_lock.lock(); + if (context->get_watch_context(oid)) { + context->kick(); + context->state_lock.unlock(); + return; + } + + if (context->snaps.empty()) { + context->kick(); + context->state_lock.unlock(); + done = true; + return; + } + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + roll_back_to = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create( + roll_back_to, + roll_back_to); + + + cout << "rollback oid " << oid << " to " << roll_back_to << std::endl; + + bool existed_before = context->object_existed_at(oid); + bool existed_after = context->object_existed_at(oid, roll_back_to); + + context->roll_back(oid, roll_back_to); + uint64_t snap = context->snaps[roll_back_to]; + + outstanding -= (!existed_before) + (!existed_after); + + context->state_lock.unlock(); + + bufferlist bl, bl2; + zero_write_op1.append(bl); + zero_write_op2.append(bl2); + + if (context->pool_snaps) { + op.snap_rollback(snap); + } else { + op.selfmanaged_snap_rollback(snap); + } + + if (existed_before) { + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comps[0] = + context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate( + context->prefix+oid, comps[0], &zero_write_op1); + } + { + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(1)); + comps[1] = + context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate( + context->prefix+oid, comps[1], &op); + } + if (existed_after) { + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(2)); + comps[2] = + context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate( + context->prefix+oid, comps[2], &zero_write_op2); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + uint64_t tid = info->id; + cout << num << ": finishing rollback tid " << tid + << " to " << context->prefix + oid << std::endl; + ceph_assert((int)(info->id) > last_finished); + last_finished = info->id; + + int r; + if ((r = comps[last_finished]->get_return_value()) != 0) { + cerr << "err " << r << std::endl; + ceph_abort(); + } + if (--outstanding == 0) { + done = true; + context->update_object_version(oid, comps[tid]->get_version64()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + in_use = std::shared_ptr<int>(); + context->kick(); + } + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "RollBackOp"; + } +}; + +class CopyFromOp : public TestOp { +public: + string oid, oid_src; + ObjectDesc src_value; + librados::ObjectWriteOperation op; + librados::ObjectReadOperation rd_op; + librados::AioCompletion *comp; + librados::AioCompletion *comp_racing_read = nullptr; + std::shared_ptr<int> in_use; + int snap; + int done; + uint64_t version; + int r; + CopyFromOp(int n, + RadosTestContext *context, + const string &oid, + const string &oid_src, + TestOpStat *stat) + : TestOp(n, context, stat), + oid(oid), oid_src(oid_src), + comp(NULL), snap(-1), done(0), + version(0), r(0) + {} + + void _begin() override + { + ContDesc cont; + { + std::lock_guard l{context->state_lock}; + cont = ContDesc(context->seq_num, context->current_snap, + context->seq_num, ""); + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->oid_in_use.insert(oid_src); + context->oid_not_in_use.erase(oid_src); + + // choose source snap + if (0 && !(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + context->find_object(oid_src, &src_value, snap); + if (!src_value.deleted()) + context->update_object_full(oid, src_value); + } + + string src = context->prefix+oid_src; + op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0); + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op); + + // queue up a racing read, too. + pair<TestOp*, TestOp::CallbackInfo*> *read_cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(1)); + comp_racing_read = context->rados.aio_create_completion((void*) read_cb_arg, &write_callback); + rd_op.stat(NULL, NULL, NULL); + context->io_ctx.aio_operate(context->prefix+oid, comp_racing_read, &rd_op, + librados::OPERATION_ORDER_READS_WRITES, // order wrt previous write/update + NULL); + + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + + // note that the read can (and atm will) come back before the + // write reply, but will reflect the update and the versions will + // match. + + if (info->id == 0) { + // copy_from + ceph_assert(comp->is_complete()); + cout << num << ": finishing copy_from to " << context->prefix + oid << std::endl; + if ((r = comp->get_return_value())) { + if (r == -ENOENT && src_value.deleted()) { + cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else { + cerr << "Error: oid " << oid << " copy_from " << oid_src << " returned error code " + << r << std::endl; + ceph_abort(); + } + } else { + ceph_assert(!version || comp->get_version64() == version); + version = comp->get_version64(); + context->update_object_version(oid, comp->get_version64()); + } + } else if (info->id == 1) { + // racing read + ceph_assert(comp_racing_read->is_complete()); + cout << num << ": finishing copy_from racing read to " << context->prefix + oid << std::endl; + if ((r = comp_racing_read->get_return_value())) { + if (!(r == -ENOENT && src_value.deleted())) { + cerr << "Error: oid " << oid << " copy_from " << oid_src << " returned error code " + << r << std::endl; + } + } else { + ceph_assert(comp_racing_read->get_return_value() == 0); + ceph_assert(!version || comp_racing_read->get_version64() == version); + version = comp_racing_read->get_version64(); + } + } + if (++done == 2) { + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->oid_in_use.erase(oid_src); + context->oid_not_in_use.insert(oid_src); + context->kick(); + } + } + + bool finished() override + { + return done == 2; + } + + string getType() override + { + return "CopyFromOp"; + } +}; + +class ChunkReadOp : public TestOp { +public: + vector<librados::AioCompletion *> completions; + librados::ObjectReadOperation op; + string oid; + ObjectDesc old_value; + ObjectDesc tgt_value; + int snap; + bool balance_reads; + bool localize_reads; + + std::shared_ptr<int> in_use; + + vector<bufferlist> results; + vector<int> retvals; + vector<bool> is_sparse_read; + uint64_t waiting_on; + + vector<bufferlist> checksums; + vector<int> checksum_retvals; + uint32_t offset = 0; + uint32_t length = 0; + string tgt_oid; + string tgt_pool_name; + uint32_t tgt_offset = 0; + + ChunkReadOp(int n, + RadosTestContext *context, + const string &oid, + const string &tgt_pool_name, + bool balance_reads, + bool localize_reads, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + completions(2), + oid(oid), + snap(0), + balance_reads(balance_reads), + localize_reads(localize_reads), + results(2), + retvals(2), + waiting_on(0), + checksums(2), + checksum_retvals(2), + tgt_pool_name(tgt_pool_name) + {} + + void _do_read(librados::ObjectReadOperation& read_op, uint32_t offset, uint32_t length, int index) { + read_op.read(offset, + length, + &results[index], + &retvals[index]); + if (index != 0) { + bufferlist init_value_bl; + encode(static_cast<uint32_t>(-1), init_value_bl); + read_op.checksum(LIBRADOS_CHECKSUM_TYPE_CRC32C, init_value_bl, offset, length, + 0, &checksums[index], &checksum_retvals[index]); + } + + } + + void _begin() override + { + context->state_lock.lock(); + std::cout << num << ": chunk read oid " << oid << " snap " << snap << std::endl; + done = 0; + for (uint32_t i = 0; i < 2; i++) { + completions[i] = context->rados.aio_create_completion((void *) this, &read_callback); + } + + context->find_object(oid, &old_value); + + if (old_value.chunk_info.size() == 0) { + std::cout << ": no chunks" << std::endl; + context->kick(); + context->state_lock.unlock(); + done = true; + return; + } + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + if (old_value.deleted()) { + std::cout << num << ": expect deleted" << std::endl; + } else { + std::cout << num << ": expect " << old_value.most_recent() << std::endl; + } + + int rand_index = rand() % old_value.chunk_info.size(); + auto iter = old_value.chunk_info.begin(); + for (int i = 0; i < rand_index; i++) { + iter++; + } + offset = iter->first; + offset += (rand() % iter->second.length)/2; + uint32_t t_length = rand() % iter->second.length; + while (t_length + offset > iter->first + iter->second.length) { + t_length = rand() % iter->second.length; + } + length = t_length; + tgt_offset = iter->second.offset + offset - iter->first; + tgt_oid = iter->second.oid; + + std::cout << num << ": ori offset " << iter->first << " req offset " << offset + << " ori length " << iter->second.length << " req length " << length + << " ori tgt_offset " << iter->second.offset << " req tgt_offset " << tgt_offset + << " tgt_oid " << tgt_oid << std::endl; + + TestWatchContext *ctx = context->get_watch_context(oid); + context->state_lock.unlock(); + if (ctx) { + ceph_assert(old_value.exists); + TestAlarm alarm; + std::cerr << num << ": about to start" << std::endl; + ctx->start(); + std::cerr << num << ": started" << std::endl; + bufferlist bl; + context->io_ctx.set_notify_timeout(600); + int r = context->io_ctx.notify2(context->prefix+oid, bl, 0, NULL); + if (r < 0) { + std::cerr << "r is " << r << std::endl; + ceph_abort(); + } + std::cerr << num << ": notified, waiting" << std::endl; + ctx->wait(); + } + std::lock_guard state_locker{context->state_lock}; + + _do_read(op, offset, length, 0); + + unsigned flags = 0; + if (balance_reads) + flags |= librados::OPERATION_BALANCE_READS; + if (localize_reads) + flags |= librados::OPERATION_LOCALIZE_READS; + + ceph_assert(!context->io_ctx.aio_operate(context->prefix+oid, completions[0], &op, + flags, NULL)); + waiting_on++; + + _do_read(op, tgt_offset, length, 1); + ceph_assert(!context->io_ctx.aio_operate(context->prefix+tgt_oid, completions[1], &op, + flags, NULL)); + + waiting_on++; + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + ceph_assert(!done); + ceph_assert(waiting_on > 0); + if (--waiting_on) { + return; + } + + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + int retval = completions[0]->get_return_value(); + std::cout << ": finish!! ret: " << retval << std::endl; + context->find_object(tgt_oid, &tgt_value); + + for (int i = 0; i < 2; i++) { + ceph_assert(completions[i]->is_complete()); + int err = completions[i]->get_return_value(); + if (err != retval) { + cerr << num << ": Error: oid " << oid << " read returned different error codes: " + << retval << " and " << err << std::endl; + ceph_abort(); + } + if (err) { + if (!(err == -ENOENT && old_value.deleted())) { + cerr << num << ": Error: oid " << oid << " read returned error code " + << err << std::endl; + ceph_abort(); + } + } + } + + if (!retval) { + if (old_value.deleted()) { + std::cout << num << ": expect deleted" << std::endl; + ceph_abort_msg("expected deleted"); + } else { + std::cout << num << ": expect " << old_value.most_recent() << std::endl; + } + if (tgt_value.has_contents()) { + uint32_t checksum[2] = {0}; + if (checksum_retvals[1] == 0) { + try { + auto bl_it = checksums[1].cbegin(); + uint32_t csum_count; + decode(csum_count, bl_it); + decode(checksum[1], bl_it); + } catch (const buffer::error &err) { + checksum_retvals[1] = -EBADMSG; + } + } + + if (checksum_retvals[1] != 0) { + cerr << num << ": oid " << oid << " checksum retvals " << checksums[0] + << " error " << std::endl; + context->errors++; + } + + checksum[0] = results[0].crc32c(-1); + + if (checksum[0] != checksum[1]) { + cerr << num << ": oid " << oid << " checksum src " << checksum[0] + << " chunksum tgt " << checksum[1] << " incorrect, expecting " + << results[0].crc32c(-1) + << std::endl; + context->errors++; + } + if (context->errors) ceph_abort(); + } + } + for (vector<librados::AioCompletion *>::iterator it = completions.begin(); + it != completions.end(); ++it) { + (*it)->release(); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "ChunkReadOp"; + } +}; + +class CopyOp : public TestOp { +public: + string oid, oid_src, tgt_pool_name; + librados::ObjectWriteOperation op; + librados::ObjectReadOperation rd_op; + librados::AioCompletion *comp; + ObjectDesc src_value, tgt_value; + int done; + int r; + CopyOp(int n, + RadosTestContext *context, + const string &oid_src, + const string &oid, + const string &tgt_pool_name, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), oid_src(oid_src), tgt_pool_name(tgt_pool_name), + comp(NULL), done(0), r(0) + {} + + void _begin() override + { + std::lock_guard l{context->state_lock}; + context->oid_in_use.insert(oid_src); + context->oid_not_in_use.erase(oid_src); + + string src = context->prefix+oid_src; + context->find_object(oid_src, &src_value); + op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0); + + cout << "copy op oid " << oid_src << " to " << oid << " tgt_pool_name " << tgt_pool_name << std::endl; + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, &write_callback); + if (tgt_pool_name == context->low_tier_pool_name) { + context->low_tier_io_ctx.aio_operate(context->prefix+oid, comp, &op); + } else { + context->io_ctx.aio_operate(context->prefix+oid, comp, &op); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + + if (info->id == 0) { + ceph_assert(comp->is_complete()); + cout << num << ": finishing copy op to oid " << oid << std::endl; + if ((r = comp->get_return_value())) { + cerr << "Error: oid " << oid << " write returned error code " + << r << std::endl; + ceph_abort(); + } + } + + if (++done == 1) { + context->oid_in_use.erase(oid_src); + context->oid_not_in_use.insert(oid_src); + context->kick(); + } + } + + bool finished() override + { + return done == 1; + } + + string getType() override + { + return "CopyOp"; + } +}; + +class SetChunkOp : public TestOp { +public: + string oid, oid_tgt, tgt_pool_name; + ObjectDesc src_value, tgt_value; + librados::ObjectReadOperation op; + librados::ObjectReadOperation rd_op; + librados::AioCompletion *comp; + std::shared_ptr<int> in_use; + int done; + int r; + uint64_t offset; + uint32_t length; + uint64_t tgt_offset; + SetChunkOp(int n, + RadosTestContext *context, + const string &oid, + uint64_t offset, + uint32_t length, + const string &oid_tgt, + const string &tgt_pool_name, + uint64_t tgt_offset, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), oid_tgt(oid_tgt), tgt_pool_name(tgt_pool_name), + comp(NULL), done(0), + r(0), offset(offset), length(length), + tgt_offset(tgt_offset) + {} + + void _begin() override + { + std::lock_guard l{context->state_lock}; + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + if (tgt_pool_name.empty()) ceph_abort(); + + context->find_object(oid, &src_value); + context->find_object(oid_tgt, &tgt_value); + + if (src_value.version != 0 && !src_value.deleted()) + op.assert_version(src_value.version); + op.set_chunk(offset, length, context->low_tier_io_ctx, + context->prefix+oid_tgt, tgt_offset, CEPH_OSD_OP_FLAG_WITH_REFERENCE); + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op, + librados::OPERATION_ORDER_READS_WRITES, NULL); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + + if (info->id == 0) { + ceph_assert(comp->is_complete()); + cout << num << ": finishing set_chunk to oid " << oid << std::endl; + if ((r = comp->get_return_value())) { + if (r == -ENOENT && src_value.deleted()) { + cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else if (r == -EOPNOTSUPP) { + bool is_overlapped = false; + for (auto &p : src_value.chunk_info) { + if ((p.first <= offset && p.first + p.second.length > offset) || + (p.first > offset && p.first <= offset + length)) { + cout << " range is overlapped offset: " << offset << " length: " << length + << " chunk_info offset: " << p.second.offset << " length " + << p.second.length << std::endl; + is_overlapped = true; + context->update_object_version(oid, comp->get_version64()); + } + } + if (!is_overlapped) { + cerr << "Error: oid " << oid << " set_chunk " << oid_tgt << " returned error code " + << r << " offset: " << offset << " length: " << length << std::endl; + ceph_abort(); + } + } else { + cerr << "Error: oid " << oid << " set_chunk " << oid_tgt << " returned error code " + << r << std::endl; + ceph_abort(); + } + } else { + ChunkDesc info; + info.offset = tgt_offset; + info.length = length; + info.oid = oid_tgt; + context->update_object_chunk_target(oid, offset, info); + context->update_object_version(oid, comp->get_version64()); + } + } + + if (++done == 1) { + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + } + + bool finished() override + { + return done == 1; + } + + string getType() override + { + return "SetChunkOp"; + } +}; + +class SetRedirectOp : public TestOp { +public: + string oid, oid_tgt, tgt_pool_name; + ObjectDesc src_value, tgt_value; + librados::ObjectWriteOperation op; + librados::ObjectReadOperation rd_op; + librados::AioCompletion *comp; + std::shared_ptr<int> in_use; + int done; + int r; + SetRedirectOp(int n, + RadosTestContext *context, + const string &oid, + const string &oid_tgt, + const string &tgt_pool_name, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + oid(oid), oid_tgt(oid_tgt), tgt_pool_name(tgt_pool_name), + comp(NULL), done(0), + r(0) + {} + + void _begin() override + { + std::lock_guard l{context->state_lock}; + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->oid_redirect_in_use.insert(oid_tgt); + context->oid_redirect_not_in_use.erase(oid_tgt); + + if (tgt_pool_name.empty()) ceph_abort(); + + context->find_object(oid, &src_value); + if(!context->redirect_objs[oid].empty()) { + /* copy_from oid --> oid_tgt */ + comp = context->rados.aio_create_completion(); + string src = context->prefix+oid; + op.copy_from(src.c_str(), context->io_ctx, src_value.version, 0); + context->low_tier_io_ctx.aio_operate(context->prefix+oid_tgt, comp, &op, + librados::OPERATION_ORDER_READS_WRITES); + comp->wait_for_complete(); + if ((r = comp->get_return_value())) { + cerr << "Error: oid " << oid << " copy_from " << oid_tgt << " returned error code " + << r << std::endl; + ceph_abort(); + } + comp->release(); + + /* unset redirect target */ + comp = context->rados.aio_create_completion(); + bool present = !src_value.deleted(); + op.unset_manifest(); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op, + librados::OPERATION_ORDER_READS_WRITES | + librados::OPERATION_IGNORE_REDIRECT); + comp->wait_for_complete(); + if ((r = comp->get_return_value())) { + if (!(r == -ENOENT && !present) && r != -EOPNOTSUPP) { + cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl; + ceph_abort(); + } + } + comp->release(); + + context->oid_redirect_not_in_use.insert(context->redirect_objs[oid]); + context->oid_redirect_in_use.erase(context->redirect_objs[oid]); + } + + comp = context->rados.aio_create_completion(); + rd_op.stat(NULL, NULL, NULL); + context->io_ctx.aio_operate(context->prefix+oid, comp, &rd_op, + librados::OPERATION_ORDER_READS_WRITES | + librados::OPERATION_IGNORE_REDIRECT, + NULL); + comp->wait_for_complete(); + if ((r = comp->get_return_value()) && !src_value.deleted()) { + cerr << "Error: oid " << oid << " stat returned error code " + << r << std::endl; + ceph_abort(); + } + context->update_object_version(oid, comp->get_version64()); + comp->release(); + + comp = context->rados.aio_create_completion(); + rd_op.stat(NULL, NULL, NULL); + context->low_tier_io_ctx.aio_operate(context->prefix+oid_tgt, comp, &rd_op, + librados::OPERATION_ORDER_READS_WRITES | + librados::OPERATION_IGNORE_REDIRECT, + NULL); + comp->wait_for_complete(); + if ((r = comp->get_return_value())) { + cerr << "Error: oid " << oid_tgt << " stat returned error code " + << r << std::endl; + ceph_abort(); + } + uint64_t tgt_version = comp->get_version64(); + comp->release(); + + + context->find_object(oid, &src_value); + + if (src_value.version != 0 && !src_value.deleted()) + op.assert_version(src_value.version); + op.set_redirect(context->prefix+oid_tgt, context->low_tier_io_ctx, tgt_version); + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp = context->rados.aio_create_completion((void*) cb_arg, &write_callback); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op, + librados::OPERATION_ORDER_READS_WRITES); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + + if (info->id == 0) { + ceph_assert(comp->is_complete()); + cout << num << ": finishing set_redirect to oid " << oid << std::endl; + if ((r = comp->get_return_value())) { + if (r == -ENOENT && src_value.deleted()) { + cout << num << ": got expected ENOENT (src dne)" << std::endl; + } else { + cerr << "Error: oid " << oid << " set_redirect " << oid_tgt << " returned error code " + << r << std::endl; + ceph_abort(); + } + } else { + context->update_object_redirect_target(oid, oid_tgt); + context->update_object_version(oid, comp->get_version64()); + } + } + + if (++done == 1) { + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + } + } + + bool finished() override + { + return done == 1; + } + + string getType() override + { + return "SetRedirectOp"; + } +}; + +class UnsetRedirectOp : public TestOp { +public: + string oid; + librados::ObjectWriteOperation op; + librados::AioCompletion *comp = nullptr; + + UnsetRedirectOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), oid(oid) + {} + + void _begin() override + { + std::unique_lock state_locker{context->state_lock}; + if (context->get_watch_context(oid)) { + context->kick(); + return; + } + + ObjectDesc contents; + context->find_object(oid, &contents); + bool present = !contents.deleted(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->seq_num++; + + context->remove_object(oid); + + state_locker.unlock(); + + comp = context->rados.aio_create_completion(); + op.remove(); + context->io_ctx.aio_operate(context->prefix+oid, comp, &op, + librados::OPERATION_ORDER_READS_WRITES | + librados::OPERATION_IGNORE_REDIRECT); + comp->wait_for_complete(); + int r = comp->get_return_value(); + if (r && !(r == -ENOENT && !present)) { + cerr << "r is " << r << " while deleting " << oid << " and present is " << present << std::endl; + ceph_abort(); + } + state_locker.lock(); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + if(!context->redirect_objs[oid].empty()) { + context->oid_redirect_not_in_use.insert(context->redirect_objs[oid]); + context->oid_redirect_in_use.erase(context->redirect_objs[oid]); + context->update_object_redirect_target(oid, string()); + } + context->kick(); + } + + string getType() override + { + return "UnsetRedirectOp"; + } +}; + +class TierPromoteOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectWriteOperation op; + string oid; + std::shared_ptr<int> in_use; + + TierPromoteOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + completion(NULL), + oid(oid) + {} + + void _begin() override + { + context->state_lock.lock(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->state_lock.unlock(); + + op.tier_promote(); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op); + ceph_assert(!r); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard l{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + + ObjectDesc oid_value; + context->find_object(oid, &oid_value); + int r = completion->get_return_value(); + cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + // sucess + } else { + ceph_abort_msg("shouldn't happen"); + } + context->update_object_version(oid, completion->get_version64()); + context->find_object(oid, &oid_value); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "TierPromoteOp"; + } +}; + +class TierFlushOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + string oid; + std::shared_ptr<int> in_use; + + TierFlushOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + completion(NULL), + oid(oid) + {} + + void _begin() override + { + context->state_lock.lock(); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->state_lock.unlock(); + + op.tier_flush(); + unsigned flags = librados::OPERATION_IGNORE_CACHE; + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, flags, NULL); + ceph_assert(!r); + } + + void _finish(CallbackInfo *info) override + { + context->state_lock.lock(); + ceph_assert(!done); + ceph_assert(completion->is_complete()); + + int r = completion->get_return_value(); + cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + // sucess + } else { + ceph_abort_msg("shouldn't happen"); + } + context->update_object_version(oid, completion->get_version64()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->kick(); + done = true; + context->state_lock.unlock(); + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "TierFlushOp"; + } +}; + +class HitSetListOp : public TestOp { + librados::AioCompletion *comp1, *comp2; + uint32_t hash; + std::list< std::pair<time_t, time_t> > ls; + bufferlist bl; + +public: + HitSetListOp(int n, + RadosTestContext *context, + uint32_t hash, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + comp1(NULL), comp2(NULL), + hash(hash) + {} + + void _begin() override + { + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp1 = context->rados.aio_create_completion((void*) cb_arg, + &write_callback); + int r = context->io_ctx.hit_set_list(hash, comp1, &ls); + ceph_assert(r == 0); + } + + void _finish(CallbackInfo *info) override { + std::lock_guard l{context->state_lock}; + if (!comp2) { + if (ls.empty()) { + cerr << num << ": no hitsets" << std::endl; + done = true; + } else { + cerr << num << ": hitsets are " << ls << std::endl; + int r = rand() % ls.size(); + std::list<pair<time_t,time_t> >::iterator p = ls.begin(); + while (r--) + ++p; + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + comp2 = context->rados.aio_create_completion((void*) cb_arg, &write_callback); + r = context->io_ctx.hit_set_get(hash, comp2, p->second, &bl); + ceph_assert(r == 0); + } + } else { + int r = comp2->get_return_value(); + if (r == 0) { + HitSet hitset; + auto p = bl.cbegin(); + decode(hitset, p); + cout << num << ": got hitset of type " << hitset.get_type_name() + << " size " << bl.length() + << std::endl; + } else { + // FIXME: we could verify that we did in fact race with a trim... + ceph_assert(r == -ENOENT); + } + done = true; + } + + context->kick(); + } + + bool finished() override { + return done; + } + + string getType() override { + return "HitSetListOp"; + } +}; + +class UndirtyOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectWriteOperation op; + string oid; + + UndirtyOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + completion(NULL), + oid(oid) + {} + + void _begin() override + { + context->state_lock.lock(); + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->update_object_undirty(oid); + context->state_lock.unlock(); + + op.undirty(); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, 0); + ceph_assert(!r); + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + context->update_object_version(oid, completion->get_version64()); + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "UndirtyOp"; + } +}; + +class IsDirtyOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + string oid; + bool dirty; + ObjectDesc old_value; + int snap = 0; + std::shared_ptr<int> in_use; + + IsDirtyOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat = 0) + : TestOp(n, context, stat), + completion(NULL), + oid(oid), + dirty(false) + {} + + void _begin() override + { + context->state_lock.lock(); + + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + std::cout << num << ": is_dirty oid " << oid << " snap " << snap + << std::endl; + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + + context->oid_in_use.insert(oid); + context->oid_not_in_use.erase(oid); + context->state_lock.unlock(); + + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + + op.is_dirty(&dirty, NULL); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, 0); + ceph_assert(!r); + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + context->oid_in_use.erase(oid); + context->oid_not_in_use.insert(oid); + + ceph_assert(context->find_object(oid, &old_value, snap)); + + int r = completion->get_return_value(); + if (r == 0) { + cout << num << ": " << (dirty ? "dirty" : "clean") << std::endl; + ceph_assert(!old_value.deleted()); + ceph_assert(dirty == old_value.dirty); + } else { + cout << num << ": got " << r << std::endl; + ceph_assert(r == -ENOENT); + ceph_assert(old_value.deleted()); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "IsDirtyOp"; + } +}; + + + +class CacheFlushOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + string oid; + bool blocking; + int snap; + bool can_fail; + std::shared_ptr<int> in_use; + + CacheFlushOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat, + bool b) + : TestOp(n, context, stat), + completion(NULL), + oid(oid), + blocking(b), + snap(0), + can_fail(false) + {} + + void _begin() override + { + context->state_lock.lock(); + + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + // not being particularly specific here about knowing which + // flushes are on the oldest clean snap and which ones are not. + can_fail = !blocking || !context->snaps.empty(); + // FIXME: we could fail if we've ever removed a snap due to + // the async snap trimming. + can_fail = true; + cout << num << ": " << (blocking ? "cache_flush" : "cache_try_flush") + << " oid " << oid << " snap " << snap << std::endl; + + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->oid_flushing.insert(oid); + context->oid_not_flushing.erase(oid); + context->state_lock.unlock(); + + unsigned flags = librados::OPERATION_IGNORE_CACHE; + if (blocking) { + op.cache_flush(); + } else { + op.cache_try_flush(); + flags = librados::OPERATION_SKIPRWLOCKS; + } + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, flags, NULL); + ceph_assert(!r); + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + context->oid_flushing.erase(oid); + context->oid_not_flushing.insert(oid); + int r = completion->get_return_value(); + cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + context->update_object_version(oid, 0, snap); + } else if (r == -EBUSY) { + ceph_assert(can_fail); + } else if (r == -EINVAL) { + // caching not enabled? + } else if (r == -ENOENT) { + // may have raced with a remove? + } else { + ceph_abort_msg("shouldn't happen"); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "CacheFlushOp"; + } +}; + +class CacheEvictOp : public TestOp { +public: + librados::AioCompletion *completion; + librados::ObjectReadOperation op; + string oid; + std::shared_ptr<int> in_use; + + CacheEvictOp(int n, + RadosTestContext *context, + const string &oid, + TestOpStat *stat) + : TestOp(n, context, stat), + completion(NULL), + oid(oid) + {} + + void _begin() override + { + context->state_lock.lock(); + + int snap; + if (!(rand() % 4) && !context->snaps.empty()) { + snap = rand_choose(context->snaps)->first; + in_use = context->snaps_in_use.lookup_or_create(snap, snap); + } else { + snap = -1; + } + cout << num << ": cache_evict oid " << oid << " snap " << snap << std::endl; + + if (snap >= 0) { + context->io_ctx.snap_set_read(context->snaps[snap]); + } + + pair<TestOp*, TestOp::CallbackInfo*> *cb_arg = + new pair<TestOp*, TestOp::CallbackInfo*>(this, + new TestOp::CallbackInfo(0)); + completion = context->rados.aio_create_completion((void *) cb_arg, + &write_callback); + context->state_lock.unlock(); + + op.cache_evict(); + int r = context->io_ctx.aio_operate(context->prefix+oid, completion, + &op, librados::OPERATION_IGNORE_CACHE, + NULL); + ceph_assert(!r); + + if (snap >= 0) { + context->io_ctx.snap_set_read(0); + } + } + + void _finish(CallbackInfo *info) override + { + std::lock_guard state_locker{context->state_lock}; + ceph_assert(!done); + ceph_assert(completion->is_complete()); + + int r = completion->get_return_value(); + cout << num << ": got " << cpp_strerror(r) << std::endl; + if (r == 0) { + // yay! + } else if (r == -EBUSY) { + // raced with something that dirtied the object + } else if (r == -EINVAL) { + // caching not enabled? + } else if (r == -ENOENT) { + // may have raced with a remove? + } else { + ceph_abort_msg("shouldn't happen"); + } + context->kick(); + done = true; + } + + bool finished() override + { + return done; + } + + string getType() override + { + return "CacheEvictOp"; + } +}; + + +#endif diff --git a/src/test/osd/TestECBackend.cc b/src/test/osd/TestECBackend.cc new file mode 100644 index 000000000..affff3694 --- /dev/null +++ b/src/test/osd/TestECBackend.cc @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank Storage, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <iostream> +#include <sstream> +#include <errno.h> +#include <signal.h> +#include "osd/ECBackend.h" +#include "gtest/gtest.h" + +TEST(ECUtil, stripe_info_t) +{ + const uint64_t swidth = 4096; + const uint64_t ssize = 4; + + ECUtil::stripe_info_t s(ssize, swidth); + ASSERT_EQ(s.get_stripe_width(), swidth); + + ASSERT_EQ(s.logical_to_next_chunk_offset(0), 0u); + ASSERT_EQ(s.logical_to_next_chunk_offset(1), s.get_chunk_size()); + ASSERT_EQ(s.logical_to_next_chunk_offset(swidth - 1), + s.get_chunk_size()); + + ASSERT_EQ(s.logical_to_prev_chunk_offset(0), 0u); + ASSERT_EQ(s.logical_to_prev_chunk_offset(swidth), s.get_chunk_size()); + ASSERT_EQ(s.logical_to_prev_chunk_offset((swidth * 2) - 1), + s.get_chunk_size()); + + ASSERT_EQ(s.logical_to_next_stripe_offset(0), 0u); + ASSERT_EQ(s.logical_to_next_stripe_offset(swidth - 1), + s.get_stripe_width()); + + ASSERT_EQ(s.logical_to_prev_stripe_offset(swidth), s.get_stripe_width()); + ASSERT_EQ(s.logical_to_prev_stripe_offset(swidth), s.get_stripe_width()); + ASSERT_EQ(s.logical_to_prev_stripe_offset((swidth * 2) - 1), + s.get_stripe_width()); + + ASSERT_EQ(s.aligned_logical_offset_to_chunk_offset(2*swidth), + 2*s.get_chunk_size()); + ASSERT_EQ(s.aligned_chunk_offset_to_logical_offset(2*s.get_chunk_size()), + 2*s.get_stripe_width()); + + ASSERT_EQ(s.aligned_offset_len_to_chunk(make_pair(swidth, 10*swidth)), + make_pair(s.get_chunk_size(), 10*s.get_chunk_size())); + + ASSERT_EQ(s.offset_len_to_stripe_bounds(make_pair(swidth-10, (uint64_t)20)), + make_pair((uint64_t)0, 2*swidth)); +} + diff --git a/src/test/osd/TestMClockScheduler.cc b/src/test/osd/TestMClockScheduler.cc new file mode 100644 index 000000000..0feb427ec --- /dev/null +++ b/src/test/osd/TestMClockScheduler.cc @@ -0,0 +1,171 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- + +#include <chrono> + +#include "gtest/gtest.h" + +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/common_init.h" + +#include "osd/scheduler/mClockScheduler.h" +#include "osd/scheduler/OpSchedulerItem.h" + +using namespace ceph::osd::scheduler; + +int main(int argc, char **argv) { + std::vector<const char*> args(argv, argv+argc); + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + + +class mClockSchedulerTest : public testing::Test { +public: + uint32_t num_shards; + bool is_rotational; + mClockScheduler q; + + uint64_t client1; + uint64_t client2; + uint64_t client3; + + mClockSchedulerTest() : + num_shards(1), + is_rotational(false), + q(g_ceph_context, num_shards, is_rotational), + client1(1001), + client2(9999), + client3(100000001) + {} + + struct MockDmclockItem : public PGOpQueueable { + op_scheduler_class scheduler_class; + + MockDmclockItem(op_scheduler_class _scheduler_class) : + PGOpQueueable(spg_t()), + scheduler_class(_scheduler_class) {} + + MockDmclockItem() + : MockDmclockItem(op_scheduler_class::background_best_effort) {} + + op_type_t get_op_type() const final { + return op_type_t::client_op; // not used + } + + ostream &print(ostream &rhs) const final { return rhs; } + + std::optional<OpRequestRef> maybe_get_op() const final { + return std::nullopt; + } + + op_scheduler_class get_scheduler_class() const final { + return scheduler_class; + } + + void run(OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final {} + }; +}; + +template <typename... Args> +OpSchedulerItem create_item( + epoch_t e, uint64_t owner, Args&&... args) +{ + return OpSchedulerItem( + std::make_unique<mClockSchedulerTest::MockDmclockItem>( + std::forward<Args>(args)...), + 12, 12, + utime_t(), owner, e); +} + +OpSchedulerItem get_item(WorkItem item) +{ + return std::move(std::get<OpSchedulerItem>(item)); +} + +TEST_F(mClockSchedulerTest, TestEmpty) { + ASSERT_TRUE(q.empty()); + + for (unsigned i = 100; i < 105; i+=2) { + q.enqueue(create_item(i, client1, op_scheduler_class::client)); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + ASSERT_FALSE(q.empty()); + + std::list<OpSchedulerItem> reqs; + + reqs.push_back(get_item(q.dequeue())); + reqs.push_back(get_item(q.dequeue())); + + ASSERT_EQ(2u, reqs.size()); + ASSERT_FALSE(q.empty()); + + for (auto &&i : reqs) { + q.enqueue_front(std::move(i)); + } + reqs.clear(); + + ASSERT_FALSE(q.empty()); + + for (int i = 0; i < 3; ++i) { + ASSERT_FALSE(q.empty()); + q.dequeue(); + } + + ASSERT_TRUE(q.empty()); +} + +TEST_F(mClockSchedulerTest, TestSingleClientOrderedEnqueueDequeue) { + ASSERT_TRUE(q.empty()); + + for (unsigned i = 100; i < 105; ++i) { + q.enqueue(create_item(i, client1, op_scheduler_class::client)); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + + auto r = get_item(q.dequeue()); + ASSERT_EQ(100u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(101u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(102u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(103u, r.get_map_epoch()); + + r = get_item(q.dequeue()); + ASSERT_EQ(104u, r.get_map_epoch()); +} + +TEST_F(mClockSchedulerTest, TestMultiClientOrderedEnqueueDequeue) { + const unsigned NUM = 1000; + for (unsigned i = 0; i < NUM; ++i) { + for (auto &&c: {client1, client2, client3}) { + q.enqueue(create_item(i, c)); + std::this_thread::sleep_for(std::chrono::microseconds(1)); + } + } + + std::map<uint64_t, epoch_t> next; + for (auto &&c: {client1, client2, client3}) { + next[c] = 0; + } + for (unsigned i = 0; i < NUM * 3; ++i) { + ASSERT_FALSE(q.empty()); + auto r = get_item(q.dequeue()); + auto owner = r.get_owner(); + auto niter = next.find(owner); + ASSERT_FALSE(niter == next.end()); + ASSERT_EQ(niter->second, r.get_map_epoch()); + niter->second++; + } + ASSERT_TRUE(q.empty()); +} diff --git a/src/test/osd/TestOSDMap.cc b/src/test/osd/TestOSDMap.cc new file mode 100644 index 000000000..dabb1a1c0 --- /dev/null +++ b/src/test/osd/TestOSDMap.cc @@ -0,0 +1,2306 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "gtest/gtest.h" +#include "osd/OSDMap.h" +#include "osd/OSDMapMapping.h" +#include "mon/OSDMonitor.h" +#include "mon/PGMap.h" + +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/common_init.h" +#include "common/ceph_argparse.h" +#include "common/ceph_json.h" + +#include <iostream> + +using namespace std; + +int main(int argc, char **argv) { + map<string,string> defaults = { + // make sure we have 3 copies, or some tests won't work + { "osd_pool_default_size", "3" }, + // our map is flat, so just try and split across OSDs, not hosts or whatever + { "osd_crush_chooseleaf_type", "0" }, + }; + std::vector<const char*> args(argv, argv+argc); + auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +class OSDMapTest : public testing::Test, + public ::testing::WithParamInterface<std::pair<int, int>> { + int num_osds = 6; +public: + OSDMap osdmap; + OSDMapMapping mapping; + const uint64_t my_ec_pool = 1; + const uint64_t my_rep_pool = 2; + + // Blacklist testing lists + // I pulled the first two ranges and their start/end points from + // https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation + static const string range_addrs[]; + static const string ip_addrs[]; + static const string unblocked_ip_addrs[]; + + OSDMapTest() {} + + void set_up_map(int new_num_osds = 6, bool no_default_pools = false) { + num_osds = new_num_osds; + uuid_d fsid; + osdmap.build_simple(g_ceph_context, 0, fsid, num_osds); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.fsid = osdmap.get_fsid(); + entity_addrvec_t sample_addrs; + sample_addrs.v.push_back(entity_addr_t()); + uuid_d sample_uuid; + for (int i = 0; i < num_osds; ++i) { + sample_uuid.generate_random(); + sample_addrs.v[0].nonce = i; + pending_inc.new_state[i] = CEPH_OSD_EXISTS | CEPH_OSD_NEW; + pending_inc.new_up_client[i] = sample_addrs; + pending_inc.new_up_cluster[i] = sample_addrs; + pending_inc.new_hb_back_up[i] = sample_addrs; + pending_inc.new_hb_front_up[i] = sample_addrs; + pending_inc.new_weight[i] = CEPH_OSD_IN; + pending_inc.new_uuid[i] = sample_uuid; + } + osdmap.apply_incremental(pending_inc); + if (no_default_pools) // do not create any default pool(s) + return; + + // Create an EC ruleset and a pool using it + int r = osdmap.crush->add_simple_rule( + "erasure", "default", "osd", "", + "indep", pg_pool_t::TYPE_ERASURE, + &cerr); + + OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); + new_pool_inc.new_pool_max = osdmap.get_pool_max(); + new_pool_inc.fsid = osdmap.get_fsid(); + pg_pool_t empty; + // make an ec pool + uint64_t pool_id = ++new_pool_inc.new_pool_max; + ceph_assert(pool_id == my_ec_pool); + pg_pool_t *p = new_pool_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->set_pg_num(64); + p->set_pgp_num(64); + p->type = pg_pool_t::TYPE_ERASURE; + p->crush_rule = r; + new_pool_inc.new_pool_names[pool_id] = "ec"; + // and a replicated pool + pool_id = ++new_pool_inc.new_pool_max; + ceph_assert(pool_id == my_rep_pool); + p = new_pool_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->set_pg_num(64); + p->set_pgp_num(64); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = 0; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_id] = "reppool"; + osdmap.apply_incremental(new_pool_inc); + } + unsigned int get_num_osds() { return num_osds; } + void get_crush(const OSDMap& tmap, CrushWrapper& newcrush) { + bufferlist bl; + tmap.crush->encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT); + auto p = bl.cbegin(); + newcrush.decode(p); + } + int crush_move(OSDMap& tmap, const string &name, const vector<string> &argvec) { + map<string,string> loc; + CrushWrapper::parse_loc_map(argvec, &loc); + CrushWrapper newcrush; + get_crush(tmap, newcrush); + if (!newcrush.name_exists(name)) { + return -ENOENT; + } + int id = newcrush.get_item_id(name); + int err; + if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) { + if (id >= 0) { + err = newcrush.create_or_move_item(g_ceph_context, id, 0, name, loc); + } else { + err = newcrush.move_bucket(g_ceph_context, id, loc); + } + if (err >= 0) { + OSDMap::Incremental pending_inc(tmap.get_epoch() + 1); + pending_inc.crush.clear(); + newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmap.apply_incremental(pending_inc); + err = 0; + } + } else { + // already there + err = 0; + } + return err; + } + int crush_rule_create_replicated(const string &name, + const string &root, + const string &type) { + if (osdmap.crush->rule_exists(name)) { + return osdmap.crush->get_rule_id(name); + } + CrushWrapper newcrush; + get_crush(osdmap, newcrush); + string device_class; + stringstream ss; + int ruleno = newcrush.add_simple_rule( + name, root, type, device_class, + "firstn", pg_pool_t::TYPE_REPLICATED, &ss); + if (ruleno >= 0) { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + newcrush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + return ruleno; + } + void test_mappings(int pool, + int num, + vector<int> *any, + vector<int> *first, + vector<int> *primary) { + mapping.update(osdmap); + for (int i=0; i<num; ++i) { + vector<int> up, acting; + int up_primary, acting_primary; + pg_t pgid(i, pool); + osdmap.pg_to_up_acting_osds(pgid, + &up, &up_primary, &acting, &acting_primary); + for (unsigned j=0; j<acting.size(); ++j) + (*any)[acting[j]]++; + if (!acting.empty()) + (*first)[acting[0]]++; + if (acting_primary >= 0) + (*primary)[acting_primary]++; + + // compare to precalc mapping + vector<int> up2, acting2; + int up_primary2, acting_primary2; + pgid = osdmap.raw_pg_to_pg(pgid); + mapping.get(pgid, &up2, &up_primary2, &acting2, &acting_primary2); + ASSERT_EQ(up, up2); + ASSERT_EQ(up_primary, up_primary2); + ASSERT_EQ(acting, acting2); + ASSERT_EQ(acting_primary, acting_primary2); + } + cout << "any: " << *any << std::endl;; + cout << "first: " << *first << std::endl;; + cout << "primary: " << *primary << std::endl;; + } + void clean_pg_upmaps(CephContext *cct, + const OSDMap& om, + OSDMap::Incremental& pending_inc) { + int cpu_num = 8; + int pgs_per_chunk = 256; + ThreadPool tp(cct, "BUG_40104::clean_upmap_tp", "clean_upmap_tp", cpu_num); + tp.start(); + ParallelPGMapper mapper(cct, &tp); + vector<pg_t> pgs_to_check; + om.get_upmap_pgs(&pgs_to_check); + OSDMonitor::CleanUpmapJob job(cct, om, pending_inc); + mapper.queue(&job, pgs_per_chunk, pgs_to_check); + job.wait(); + tp.stop(); + } +}; + +TEST_F(OSDMapTest, Create) { + set_up_map(); + ASSERT_EQ(get_num_osds(), (unsigned)osdmap.get_max_osd()); + ASSERT_EQ(get_num_osds(), osdmap.get_num_in_osds()); +} + +TEST_F(OSDMapTest, Features) { + // with EC pool + set_up_map(); + uint64_t features = osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2); + ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); + ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); + + // clients have a slightly different view + features = osdmap.get_features(CEPH_ENTITY_TYPE_CLIENT, NULL); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_V2); + ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); + ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); + + // remove teh EC pool, but leave the rule. add primary affinity. + { + OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); + new_pool_inc.old_pools.insert(osdmap.lookup_pg_pool_name("ec")); + new_pool_inc.new_primary_affinity[0] = 0x8000; + osdmap.apply_incremental(new_pool_inc); + } + + features = osdmap.get_features(CEPH_ENTITY_TYPE_MON, NULL); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES2); + ASSERT_TRUE(features & CEPH_FEATURE_CRUSH_TUNABLES3); // shared bit with primary affinity + ASSERT_FALSE(features & CEPH_FEATURE_CRUSH_V2); + ASSERT_TRUE(features & CEPH_FEATURE_OSDHASHPSPOOL); + ASSERT_TRUE(features & CEPH_FEATURE_OSD_PRIMARY_AFFINITY); + + // FIXME: test tiering feature bits +} + +TEST_F(OSDMapTest, MapPG) { + set_up_map(); + + std::cerr << " osdmap.pool_max==" << osdmap.get_pool_max() << std::endl; + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + vector<int> old_up_osds, old_acting_osds; + osdmap.pg_to_up_acting_osds(pgid, old_up_osds, old_acting_osds); + ASSERT_EQ(old_up_osds, up_osds); + ASSERT_EQ(old_acting_osds, acting_osds); + + ASSERT_EQ(osdmap.get_pg_pool(my_rep_pool)->get_size(), up_osds.size()); +} + +TEST_F(OSDMapTest, MapFunctionsMatch) { + // TODO: make sure pg_to_up_acting_osds and pg_to_acting_osds match + set_up_map(); + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + vector<int> up_osds_two, acting_osds_two; + + osdmap.pg_to_up_acting_osds(pgid, up_osds_two, acting_osds_two); + + ASSERT_EQ(up_osds, up_osds_two); + ASSERT_EQ(acting_osds, acting_osds_two); + + int acting_primary_two; + osdmap.pg_to_acting_osds(pgid, &acting_osds_two, &acting_primary_two); + EXPECT_EQ(acting_osds, acting_osds_two); + EXPECT_EQ(acting_primary, acting_primary_two); + osdmap.pg_to_acting_osds(pgid, acting_osds_two); + EXPECT_EQ(acting_osds, acting_osds_two); +} + +/** This test must be removed or modified appropriately when we allow + * other ways to specify a primary. */ +TEST_F(OSDMapTest, PrimaryIsFirst) { + set_up_map(); + + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + EXPECT_EQ(up_osds[0], up_primary); + EXPECT_EQ(acting_osds[0], acting_primary); +} + +TEST_F(OSDMapTest, PGTempRespected) { + set_up_map(); + + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + // copy and swap first and last element in acting_osds + vector<int> new_acting_osds(acting_osds); + int first = new_acting_osds[0]; + new_acting_osds[0] = *new_acting_osds.rbegin(); + *new_acting_osds.rbegin() = first; + + // apply pg_temp to osdmap + OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); + pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>( + new_acting_osds.begin(), new_acting_osds.end()); + osdmap.apply_incremental(pgtemp_map); + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + EXPECT_EQ(new_acting_osds, acting_osds); +} + +TEST_F(OSDMapTest, PrimaryTempRespected) { + set_up_map(); + + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds; + vector<int> acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + // make second OSD primary via incremental + OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); + pgtemp_map.new_primary_temp[pgid] = acting_osds[1]; + osdmap.apply_incremental(pgtemp_map); + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + EXPECT_EQ(acting_primary, acting_osds[1]); +} + +TEST_F(OSDMapTest, CleanTemps) { + set_up_map(); + + OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 2); + pg_t pga = osdmap.raw_pg_to_pg(pg_t(0, my_rep_pool)); + { + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + osdmap.pg_to_up_acting_osds(pga, &up_osds, &up_primary, + &acting_osds, &acting_primary); + pgtemp_map.new_pg_temp[pga] = mempool::osdmap::vector<int>( + up_osds.begin(), up_osds.end()); + pgtemp_map.new_primary_temp[pga] = up_primary; + } + pg_t pgb = osdmap.raw_pg_to_pg(pg_t(1, my_rep_pool)); + { + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + osdmap.pg_to_up_acting_osds(pgb, &up_osds, &up_primary, + &acting_osds, &acting_primary); + pending_inc.new_pg_temp[pgb] = mempool::osdmap::vector<int>( + up_osds.begin(), up_osds.end()); + pending_inc.new_primary_temp[pgb] = up_primary; + } + + osdmap.apply_incremental(pgtemp_map); + + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc); + + EXPECT_TRUE(pending_inc.new_pg_temp.count(pga) && + pending_inc.new_pg_temp[pga].size() == 0); + EXPECT_EQ(-1, pending_inc.new_primary_temp[pga]); + + EXPECT_TRUE(!pending_inc.new_pg_temp.count(pgb) && + !pending_inc.new_primary_temp.count(pgb)); +} + +TEST_F(OSDMapTest, KeepsNecessaryTemps) { + set_up_map(); + + pg_t rawpg(0, my_rep_pool); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up_osds, acting_osds; + int up_primary, acting_primary; + + osdmap.pg_to_up_acting_osds(pgid, &up_osds, &up_primary, + &acting_osds, &acting_primary); + + // find unused OSD and stick it in there + OSDMap::Incremental pgtemp_map(osdmap.get_epoch() + 1); + // find an unused osd and put it in place of the first one + int i = 0; + for(; i != (int)get_num_osds(); ++i) { + bool in_use = false; + for (vector<int>::iterator osd_it = up_osds.begin(); + osd_it != up_osds.end(); + ++osd_it) { + if (i == *osd_it) { + in_use = true; + break; + } + } + if (!in_use) { + up_osds[1] = i; + break; + } + } + if (i == (int)get_num_osds()) + FAIL() << "did not find unused OSD for temp mapping"; + + pgtemp_map.new_pg_temp[pgid] = mempool::osdmap::vector<int>( + up_osds.begin(), up_osds.end()); + pgtemp_map.new_primary_temp[pgid] = up_osds[1]; + osdmap.apply_incremental(pgtemp_map); + + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc); + EXPECT_FALSE(pending_inc.new_pg_temp.count(pgid)); + EXPECT_FALSE(pending_inc.new_primary_temp.count(pgid)); +} + +TEST_F(OSDMapTest, PrimaryAffinity) { + set_up_map(); + + int n = get_num_osds(); + for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin(); + p != osdmap.get_pools().end(); + ++p) { + int pool = p->first; + int expect_primary = 10000 / n; + cout << "pool " << pool << " size " << (int)p->second.size + << " expect_primary " << expect_primary << std::endl; + { + vector<int> any(n, 0); + vector<int> first(n, 0); + vector<int> primary(n, 0); + test_mappings(pool, 10000, &any, &first, &primary); + for (int i=0; i<n; ++i) { + ASSERT_LT(0, any[i]); + ASSERT_LT(0, first[i]); + ASSERT_LT(0, primary[i]); + } + } + + osdmap.set_primary_affinity(0, 0); + osdmap.set_primary_affinity(1, 0); + { + vector<int> any(n, 0); + vector<int> first(n, 0); + vector<int> primary(n, 0); + test_mappings(pool, 10000, &any, &first, &primary); + for (int i=0; i<n; ++i) { + ASSERT_LT(0, any[i]); + if (i >= 2) { + ASSERT_LT(0, first[i]); + ASSERT_LT(0, primary[i]); + } else { + if (p->second.is_replicated()) { + ASSERT_EQ(0, first[i]); + } + ASSERT_EQ(0, primary[i]); + } + } + } + + osdmap.set_primary_affinity(0, 0x8000); + osdmap.set_primary_affinity(1, 0); + { + vector<int> any(n, 0); + vector<int> first(n, 0); + vector<int> primary(n, 0); + test_mappings(pool, 10000, &any, &first, &primary); + int expect = (10000 / (n-2)) / 2; // half weight + cout << "expect " << expect << std::endl; + for (int i=0; i<n; ++i) { + ASSERT_LT(0, any[i]); + if (i >= 2) { + ASSERT_LT(0, first[i]); + ASSERT_LT(0, primary[i]); + } else if (i == 1) { + if (p->second.is_replicated()) { + ASSERT_EQ(0, first[i]); + } + ASSERT_EQ(0, primary[i]); + } else { + ASSERT_LT(expect *2/3, primary[0]); + ASSERT_GT(expect *4/3, primary[0]); + } + } + } + + osdmap.set_primary_affinity(0, 0x10000); + osdmap.set_primary_affinity(1, 0x10000); + } +} + +TEST_F(OSDMapTest, get_osd_crush_node_flags) { + set_up_map(); + + for (unsigned i=0; i<get_num_osds(); ++i) { + ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(i)); + } + + OSDMap::Incremental inc(osdmap.get_epoch() + 1); + inc.new_crush_node_flags[-1] = 123u; + osdmap.apply_incremental(inc); + for (unsigned i=0; i<get_num_osds(); ++i) { + ASSERT_EQ(123u, osdmap.get_osd_crush_node_flags(i)); + } + ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000)); + + OSDMap::Incremental inc3(osdmap.get_epoch() + 1); + inc3.new_crush_node_flags[-1] = 456u; + osdmap.apply_incremental(inc3); + for (unsigned i=0; i<get_num_osds(); ++i) { + ASSERT_EQ(456u, osdmap.get_osd_crush_node_flags(i)); + } + ASSERT_EQ(0u, osdmap.get_osd_crush_node_flags(1000)); + + OSDMap::Incremental inc2(osdmap.get_epoch() + 1); + inc2.new_crush_node_flags[-1] = 0; + osdmap.apply_incremental(inc2); + for (unsigned i=0; i<get_num_osds(); ++i) { + ASSERT_EQ(0u, osdmap.get_crush_node_flags(i)); + } +} + +TEST_F(OSDMapTest, parse_osd_id_list) { + set_up_map(); + set<int> out; + set<int> all; + osdmap.get_all_osds(all); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0"}, &out, &cout)); + ASSERT_EQ(1u, out.size()); + ASSERT_EQ(0, *out.begin()); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"1"}, &out, &cout)); + ASSERT_EQ(1u, out.size()); + ASSERT_EQ(1, *out.begin()); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","osd.1"}, &out, &cout)); + ASSERT_EQ(2u, out.size()); + ASSERT_EQ(0, *out.begin()); + ASSERT_EQ(1, *out.rbegin()); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"osd.0","1"}, &out, &cout)); + ASSERT_EQ(2u, out.size()); + ASSERT_EQ(0, *out.begin()); + ASSERT_EQ(1, *out.rbegin()); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"*"}, &out, &cout)); + ASSERT_EQ(all.size(), out.size()); + ASSERT_EQ(all, out); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"all"}, &out, &cout)); + ASSERT_EQ(all, out); + + ASSERT_EQ(0, osdmap.parse_osd_id_list({"any"}, &out, &cout)); + ASSERT_EQ(all, out); + + ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"foo"}, &out, &cout)); + ASSERT_EQ(-EINVAL, osdmap.parse_osd_id_list({"-12"}, &out, &cout)); +} + +TEST_F(OSDMapTest, CleanPGUpmaps) { + set_up_map(); + + // build a crush rule of type host + const int expected_host_num = 3; + int osd_per_host = get_num_osds() / expected_host_num; + ASSERT_GE(2, osd_per_host); + int index = 0; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (i && i % osd_per_host == 0) { + ++index; + } + stringstream osd_name; + stringstream host_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host-" << index; + move_to.push_back("root=default"); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + int r = crush_move(osdmap, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + const string upmap_rule = "upmap"; + int upmap_rule_no = crush_rule_create_replicated( + upmap_rule, "default", "host"); + ASSERT_LT(0, upmap_rule_no); + + // create a replicated pool which references the above rule + OSDMap::Incremental new_pool_inc(osdmap.get_epoch() + 1); + new_pool_inc.new_pool_max = osdmap.get_pool_max(); + new_pool_inc.fsid = osdmap.get_fsid(); + pg_pool_t empty; + uint64_t upmap_pool_id = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(upmap_pool_id, &empty); + p->size = 2; + p->set_pg_num(64); + p->set_pgp_num(64); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = upmap_rule_no; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[upmap_pool_id] = "upmap_pool"; + osdmap.apply_incremental(new_pool_inc); + + pg_t rawpg(0, upmap_pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_LT(1U, up.size()); + { + // validate we won't have two OSDs from a same host + int parent_0 = osdmap.crush->get_parent_of_type(up[0], + osdmap.crush->get_type_id("host")); + int parent_1 = osdmap.crush->get_parent_of_type(up[1], + osdmap.crush->get_type_id("host")); + ASSERT_TRUE(parent_0 != parent_1); + } + + { + // cancel stale upmaps + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + int from = -1; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(up.begin(), up.end(), i) == up.end()) { + from = i; + break; + } + } + ASSERT_TRUE(from >= 0); + int to = -1; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(up.begin(), up.end(), i) == up.end() && i != from) { + to = i; + break; + } + } + ASSERT_TRUE(to >= 0); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + OSDMap nextmap; + nextmap.deepish_copy_from(osdmap); + nextmap.apply_incremental(pending_inc); + ASSERT_TRUE(nextmap.have_pg_upmaps(pgid)); + OSDMap::Incremental new_pending_inc(nextmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, nextmap, new_pending_inc); + nextmap.apply_incremental(new_pending_inc); + ASSERT_TRUE(!nextmap.have_pg_upmaps(pgid)); + } + + { + // https://tracker.ceph.com/issues/37493 + pg_t ec_pg(0, my_ec_pool); + pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg); + OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map.. + int from = -1; + int to = -1; + { + // insert a valid pg_upmap_item + vector<int> ec_up; + int ec_up_primary; + osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); + ASSERT_TRUE(!ec_up.empty()); + from = *(ec_up.begin()); + ASSERT_TRUE(from >= 0); + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { + to = i; + break; + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[ec_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + { + // mark one of the target OSDs of the above pg_upmap_item as down + OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); + pending_inc.new_state[to] = CEPH_OSD_UP; + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(!tmpmap.is_up(to)); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + { + // confirm *clean_pg_upmaps* won't do anything bad + OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc); + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + } + + { + // http://tracker.ceph.com/issues/37501 + pg_t ec_pg(0, my_ec_pool); + pg_t ec_pgid = osdmap.raw_pg_to_pg(ec_pg); + OSDMap tmpmap; // use a tmpmap here, so we do not dirty origin map.. + int from = -1; + int to = -1; + { + // insert a valid pg_upmap_item + vector<int> ec_up; + int ec_up_primary; + osdmap.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); + ASSERT_TRUE(!ec_up.empty()); + from = *(ec_up.begin()); + ASSERT_TRUE(from >= 0); + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { + to = i; + break; + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[ec_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + { + // mark one of the target OSDs of the above pg_upmap_item as out + OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); + pending_inc.new_weight[to] = CEPH_OSD_OUT; + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(tmpmap.is_out(to)); + ASSERT_TRUE(tmpmap.have_pg_upmaps(ec_pgid)); + } + { + // *clean_pg_upmaps* should be able to remove the above *bad* mapping + OSDMap::Incremental pending_inc(tmpmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmpmap, pending_inc); + tmpmap.apply_incremental(pending_inc); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(ec_pgid)); + } + } + + { + // http://tracker.ceph.com/issues/37968 + + // build a temporary crush topology of 2 hosts, 3 osds per host + OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. + tmp.deepish_copy_from(osdmap); + const int expected_host_num = 2; + int osd_per_host = get_num_osds() / expected_host_num; + ASSERT_GE(osd_per_host, 3); + int index = 0; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (i && i % osd_per_host == 0) { + ++index; + } + stringstream osd_name; + stringstream host_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host-" << index; + move_to.push_back("root=default"); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + auto r = crush_move(tmp, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + + // build crush rule + CrushWrapper crush; + get_crush(tmp, crush); + string rule_name = "rule_37968"; + int rule_type = pg_pool_t::TYPE_ERASURE; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno)) + break; + } + string root_name = "default"; + int root = crush.get_item_id(root_name); + int min_size = 3; + int max_size = 4; + int steps = 6; + crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 1 /* host*/); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_INDEP, 2, 0 /* osd */); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmp.apply_incremental(pending_inc); + } + + // create a erasuce-coded pool referencing the above rule + int64_t pool_37968; + { + OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); + new_pool_inc.new_pool_max = tmp.get_pool_max(); + new_pool_inc.fsid = tmp.get_fsid(); + pg_pool_t empty; + pool_37968 = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(pool_37968, &empty); + p->size = 4; + p->set_pg_num(8); + p->set_pgp_num(8); + p->type = pg_pool_t::TYPE_ERASURE; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_37968] = "pool_37968"; + tmp.apply_incremental(new_pool_inc); + } + + pg_t ec_pg(0, pool_37968); + pg_t ec_pgid = tmp.raw_pg_to_pg(ec_pg); + int from = -1; + int to = -1; + { + // insert a valid pg_upmap_item + vector<int> ec_up; + int ec_up_primary; + tmp.pg_to_raw_up(ec_pgid, &ec_up, &ec_up_primary); + ASSERT_TRUE(ec_up.size() == 4); + from = *(ec_up.begin()); + ASSERT_TRUE(from >= 0); + auto parent = tmp.crush->get_parent_of_type(from, 1 /* host */, rno); + ASSERT_TRUE(parent < 0); + // pick an osd of the same parent with *from* + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(ec_up.begin(), ec_up.end(), i) == ec_up.end()) { + auto p = tmp.crush->get_parent_of_type(i, 1 /* host */, rno); + if (p == parent) { + to = i; + break; + } + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap_items[ec_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid)); + } + { + // *clean_pg_upmaps* should not remove the above upmap_item + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmp, pending_inc); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(ec_pgid)); + } + } + + { + // TEST pg_upmap + { + // STEP-1: enumerate all children of up[0]'s parent, + // replace up[1] with one of them (other than up[0]) + int parent = osdmap.crush->get_parent_of_type(up[0], + osdmap.crush->get_type_id("host")); + set<int> candidates; + osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), &candidates); + ASSERT_LT(1U, candidates.size()); + int replaced_by = -1; + for (auto c: candidates) { + if (c != up[0]) { + replaced_by = c; + break; + } + } + { + // Check we can handle a negative pg_upmap value + vector<int32_t> new_pg_upmap; + new_pg_upmap.push_back(up[0]); + new_pg_upmap.push_back(-823648512); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( + new_pg_upmap.begin(), new_pg_upmap.end()); + osdmap.apply_incremental(pending_inc); + vector<int> new_up; + int new_up_primary; + // crucial call - _apply_upmap should ignore the negative value + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + } + ASSERT_NE(-1, replaced_by); + // generate a new pg_upmap item and apply + vector<int32_t> new_pg_upmap; + new_pg_upmap.push_back(up[0]); + new_pg_upmap.push_back(replaced_by); // up[1] -> replaced_by + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( + new_pg_upmap.begin(), new_pg_upmap.end()); + osdmap.apply_incremental(pending_inc); + { + // validate pg_upmap is there + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(new_up.size(), up.size()); + ASSERT_EQ(new_up[0], new_pg_upmap[0]); + ASSERT_EQ(new_up[1], new_pg_upmap[1]); + // and we shall have two OSDs from a same host now.. + int parent_0 = osdmap.crush->get_parent_of_type(new_up[0], + osdmap.crush->get_type_id("host")); + int parent_1 = osdmap.crush->get_parent_of_type(new_up[1], + osdmap.crush->get_type_id("host")); + ASSERT_EQ(parent_0, parent_1); + } + } + { + // STEP-2: apply cure + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + osdmap.apply_incremental(pending_inc); + { + // validate pg_upmap is gone (reverted) + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(new_up, up); + ASSERT_EQ(new_up_primary, up_primary); + } + } + } + + { + // TEST pg_upmap_items + // enumerate all used hosts first + set<int> parents; + for (auto u: up) { + int parent = osdmap.crush->get_parent_of_type(u, + osdmap.crush->get_type_id("host")); + ASSERT_GT(0, parent); + parents.insert(parent); + } + int candidate_parent = 0; + set<int> candidate_children; + vector<int> up_after_out; + { + // STEP-1: try mark out up[1] and all other OSDs from the same host + int parent = osdmap.crush->get_parent_of_type(up[1], + osdmap.crush->get_type_id("host")); + set<int> children; + osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), + &children); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + for (auto c: children) { + pending_inc.new_weight[c] = CEPH_OSD_OUT; + } + OSDMap tmpmap; + tmpmap.deepish_copy_from(osdmap); + tmpmap.apply_incremental(pending_inc); + vector<int> new_up; + int new_up_primary; + tmpmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + // verify that we'll have OSDs from a different host.. + int will_choose = -1; + for (auto o: new_up) { + int parent = tmpmap.crush->get_parent_of_type(o, + osdmap.crush->get_type_id("host")); + if (!parents.count(parent)) { + will_choose = o; + candidate_parent = parent; // record + break; + } + } + ASSERT_LT(-1, will_choose); // it is an OSD! + ASSERT_NE(candidate_parent, 0); + osdmap.crush->get_leaves(osdmap.crush->get_item_name(candidate_parent), + &candidate_children); + ASSERT_TRUE(candidate_children.count(will_choose)); + candidate_children.erase(will_choose); + ASSERT_FALSE(candidate_children.empty()); + up_after_out = new_up; // needed for verification.. + } + { + // Make sure we can handle a negative pg_upmap_item + int victim = up[0]; + int replaced_by = -823648512; + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); + // apply + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + vector<int> new_up; + int new_up_primary; + // crucial call - _apply_upmap should ignore the negative value + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + } + { + // STEP-2: generating a new pg_upmap_items entry by + // replacing up[0] with one coming from candidate_children + int victim = up[0]; + int replaced_by = *candidate_children.begin(); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); + // apply + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + { + // validate pg_upmap_items is there + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(new_up.size(), up.size()); + ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), replaced_by) != + new_up.end()); + // and up[1] too + ASSERT_TRUE(std::find(new_up.begin(), new_up.end(), up[1]) != + new_up.end()); + } + } + { + // STEP-3: mark out up[1] and all other OSDs from the same host + int parent = osdmap.crush->get_parent_of_type(up[1], + osdmap.crush->get_type_id("host")); + set<int> children; + osdmap.crush->get_leaves(osdmap.crush->get_item_name(parent), + &children); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + for (auto c: children) { + pending_inc.new_weight[c] = CEPH_OSD_OUT; + } + osdmap.apply_incremental(pending_inc); + { + // validate we have two OSDs from the same host now.. + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(up.size(), new_up.size()); + int parent_0 = osdmap.crush->get_parent_of_type(new_up[0], + osdmap.crush->get_type_id("host")); + int parent_1 = osdmap.crush->get_parent_of_type(new_up[1], + osdmap.crush->get_type_id("host")); + ASSERT_EQ(parent_0, parent_1); + } + } + { + // STEP-4: apply cure + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + osdmap.apply_incremental(pending_inc); + { + // validate pg_upmap_items is gone (reverted) + vector<int> new_up; + int new_up_primary; + osdmap.pg_to_raw_up(pgid, &new_up, &new_up_primary); + ASSERT_EQ(new_up, up_after_out); + } + } + } +} + +TEST_F(OSDMapTest, BUG_38897) { + // http://tracker.ceph.com/issues/38897 + // build a fresh map with 12 OSDs, without any default pools + set_up_map(12, true); + const string pool_1("pool1"); + const string pool_2("pool2"); + int64_t pool_1_id = -1; + + { + // build customized crush rule for "pool1" + string host_name = "host_for_pool_1"; + // build a customized host to capture osd.1~5 + for (int i = 1; i < 5; i++) { + stringstream osd_name; + vector<string> move_to; + osd_name << "osd." << i; + move_to.push_back("root=default"); + string host_loc = "host=" + host_name; + move_to.push_back(host_loc); + auto r = crush_move(osdmap, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + CrushWrapper crush; + get_crush(osdmap, crush); + auto host_id = crush.get_item_id(host_name); + ASSERT_TRUE(host_id < 0); + string rule_name = "rule_for_pool1"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno)) + break; + } + int min_size = 3; + int max_size = 3; + int steps = 7; + crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + // always choose osd.0 + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + // then pick any other random osds + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + + // create "pool1" + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + auto pool_id = ++pending_inc.new_pool_max; + pool_1_id = pool_id; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(3); + p->set_pgp_num(3); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = pool_1; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_1); + { + for (unsigned i = 0; i < 3; i++) { + // 1.x -> [1] + pg_t rawpg(i, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 3); + ASSERT_TRUE(up[0] == 0); + + // insert a new pg_upmap + vector<int32_t> new_up; + // and remap 1.x to osd.1 only + // this way osd.0 is deemed to be *underfull* + // and osd.1 is deemed to be *overfull* + new_up.push_back(1); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( + new_up.begin(), new_up.end()); + osdmap.apply_incremental(pending_inc); + } + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 1); + ASSERT_TRUE(up[0] == 1); + } + } + } + + { + // build customized crush rule for "pool2" + string host_name = "host_for_pool_2"; + // build a customized host to capture osd.6~11 + for (int i = 6; i < (int)get_num_osds(); i++) { + stringstream osd_name; + vector<string> move_to; + osd_name << "osd." << i; + move_to.push_back("root=default"); + string host_loc = "host=" + host_name; + move_to.push_back(host_loc); + auto r = crush_move(osdmap, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + CrushWrapper crush; + get_crush(osdmap, crush); + auto host_id = crush.get_item_id(host_name); + ASSERT_TRUE(host_id < 0); + string rule_name = "rule_for_pool2"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno)) + break; + } + int min_size = 3; + int max_size = 3; + int steps = 7; + crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + // always choose osd.0 + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + // then pick any other random osds + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, host_id, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + + // create "pool2" + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + auto pool_id = ++pending_inc.new_pool_max; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + // include a single PG + p->set_pg_num(1); + p->set_pgp_num(1); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = pool_2; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_2); + pg_t rawpg(0, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + EXPECT_TRUE(!osdmap.have_pg_upmaps(pgid)); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 3); + ASSERT_TRUE(up[0] == 0); + + { + // build a pg_upmap_item that will + // remap pg out from *underfull* osd.0 + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(0, 10)); // osd.0 -> osd.10 + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_upmaps(pgid)); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 3); + ASSERT_TRUE(up[0] == 10); + } + } + + // ready to go + { + set<int64_t> only_pools; + ASSERT_TRUE(pool_1_id >= 0); + only_pools.insert(pool_1_id); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + // require perfect distribution! (max deviation 0) + osdmap.calc_pg_upmaps(g_ceph_context, + 0, // so we can force optimizing + 100, + only_pools, + &pending_inc); + osdmap.apply_incremental(pending_inc); + } +} + +TEST_F(OSDMapTest, BUG_40104) { + // http://tracker.ceph.com/issues/40104 + int big_osd_num = 5000; + int big_pg_num = 10000; + set_up_map(big_osd_num, true); + int pool_id; + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + pool_id = ++pending_inc.new_pool_max; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(big_pg_num); + p->set_pgp_num(big_pg_num); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = 0; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = "big_pool"; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == "big_pool"); + } + { + // generate pg_upmap_items for each pg + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + for (int i = 0; i < big_pg_num; i++) { + pg_t rawpg(i, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + vector<int> up; + int up_primary; + osdmap.pg_to_raw_up(pgid, &up, &up_primary); + ASSERT_TRUE(up.size() == 3); + int victim = up[0]; + int replaced_by = random() % big_osd_num; + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + // note that it might or might not be valid, we don't care + new_pg_upmap_items.push_back(make_pair(victim, replaced_by)); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + } + osdmap.apply_incremental(pending_inc); + } + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + auto start = mono_clock::now(); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + auto latency = mono_clock::now() - start; + std::cout << "clean_pg_upmaps (~" << big_pg_num + << " pg_upmap_items) latency:" << timespan_str(latency) + << std::endl; + } +} + +TEST_F(OSDMapTest, BUG_42052) { + // https://tracker.ceph.com/issues/42052 + set_up_map(6, true); + const string pool_name("pool"); + // build customized crush rule for "pool" + CrushWrapper crush; + get_crush(osdmap, crush); + string rule_name = "rule"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno)) + break; + } + int min_size = 3; + int max_size = 3; + int steps = 8; + crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + // always choose osd.0, osd.1, osd.2 + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 1); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, 0, 2); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + } + + // create "pool" + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pool_max = osdmap.get_pool_max(); + auto pool_id = ++pending_inc.new_pool_max; + pg_pool_t empty; + auto p = pending_inc.get_new_pool(pool_id, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(1); + p->set_pgp_num(1); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + pending_inc.new_pool_names[pool_id] = pool_name; + osdmap.apply_incremental(pending_inc); + ASSERT_TRUE(osdmap.have_pg_pool(pool_id)); + ASSERT_TRUE(osdmap.get_pool_name(pool_id) == pool_name); + pg_t rawpg(0, pool_id); + pg_t pgid = osdmap.raw_pg_to_pg(rawpg); + { + // pg_upmap 1.0 [2,3,5] + vector<int32_t> new_up{2,3,5}; + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap[pgid] = mempool::osdmap::vector<int32_t>( + new_up.begin(), new_up.end()); + osdmap.apply_incremental(pending_inc); + } + { + // pg_upmap_items 1.0 [0,3,4,5] + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(0, 3)); + new_pg_upmap_items.push_back(make_pair(4, 5)); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.new_pg_upmap_items[pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + osdmap.apply_incremental(pending_inc); + } + { + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, osdmap, pending_inc); + osdmap.apply_incremental(pending_inc); + ASSERT_FALSE(osdmap.have_pg_upmaps(pgid)); + } +} + +TEST_F(OSDMapTest, BUG_42485) { + set_up_map(60); + { + // build a temporary crush topology of 2datacenters, 3racks per dc, + // 1host per rack, 10osds per host + OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. + tmp.deepish_copy_from(osdmap); + const int expected_host_num = 6; + int osd_per_host = (int)get_num_osds() / expected_host_num; + ASSERT_GE(osd_per_host, 10); + int host_per_dc = 3; + int index = 0; + int dc_index = 0; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (i && i % osd_per_host == 0) { + ++index; + } + if (i && i % (host_per_dc * osd_per_host) == 0) { + ++dc_index; + } + stringstream osd_name; + stringstream host_name; + stringstream rack_name; + stringstream dc_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host-" << index; + rack_name << "rack-" << index; + dc_name << "dc-" << dc_index; + move_to.push_back("root=default"); + string dc_loc = "datacenter=" + dc_name.str(); + move_to.push_back(dc_loc); + string rack_loc = "rack=" + rack_name.str(); + move_to.push_back(rack_loc); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + auto r = crush_move(tmp, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + + // build crush rule + CrushWrapper crush; + get_crush(tmp, crush); + string rule_name = "rule_xeus_993_1"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno)) + break; + } + string root_name = "default"; + string dc_1 = "dc-0"; + int dc1 = crush.get_item_id(dc_1); + string dc_2 = "dc-1"; + int dc2 = crush.get_item_id(dc_2); + int min_size = 1; + int max_size = 20; + int steps = 8; + crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc1, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, dc2, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, 2, 3 /* rack */); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmp.apply_incremental(pending_inc); + } + // create a repliacted pool referencing the above rule + int64_t pool_xeus_993; + { + OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); + new_pool_inc.new_pool_max = tmp.get_pool_max(); + new_pool_inc.fsid = tmp.get_fsid(); + pg_pool_t empty; + pool_xeus_993 = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(pool_xeus_993, &empty); + p->size = 4; + p->set_pg_num(4096); + p->set_pgp_num(4096); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_xeus_993] = "pool_xeus_993"; + tmp.apply_incremental(new_pool_inc); + } + + pg_t rep_pg(0, pool_xeus_993); + pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); + { + int from = -1; + int to = -1; + vector<int> rep_up; + int rep_up_primary; + tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); + std::cout << "pgid " << rep_up << " up " << rep_up << std::endl; + ASSERT_TRUE(rep_up.size() == 4); + from = *(rep_up.begin()); + ASSERT_TRUE(from >= 0); + auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno); + if (dc_parent == dc1) + dc_parent = dc2; + else + dc_parent = dc1; + auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); + ASSERT_TRUE(dc_parent < 0); + ASSERT_TRUE(rack_parent < 0); + set<int> rack_parents; + for (auto &i: rep_up) { + if (i == from) continue; + auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + rack_parents.insert(rack_parent); + } + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { + auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno); + auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + if (dc_p == dc_parent && + rack_parents.find(rack_p) == rack_parents.end()) { + to = i; + break; + } + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + std::cout << "from " << from << " to " << to << std::endl; + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap_items[rep_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + } + pg_t rep_pg2(2, pool_xeus_993); + pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2); + { + pg_t rep_pgid = rep_pgid2; + vector<int> from_osds{-1, -1}; + vector<int> rep_up; + int rep_up_primary; + tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); + ASSERT_TRUE(rep_up.size() == 4); + from_osds[0] = *(rep_up.begin()); + from_osds[1] = *(rep_up.rbegin()); + std::cout << "pgid " << rep_pgid2 << " up " << rep_up << std::endl; + ASSERT_TRUE(*(from_osds.begin()) >= 0); + ASSERT_TRUE(*(from_osds.rbegin()) >= 0); + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + for (auto &from: from_osds) { + int to = -1; + auto dc_parent = tmp.crush->get_parent_of_type(from, 8 /* dc */, rno); + if (dc_parent == dc1) + dc_parent = dc2; + else + dc_parent = dc1; + auto rack_parent = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); + ASSERT_TRUE(dc_parent < 0); + ASSERT_TRUE(rack_parent < 0); + set<int> rack_parents; + for (auto &i: rep_up) { + if (i == from) continue; + auto rack_parent = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + rack_parents.insert(rack_parent); + } + for (auto &i: new_pg_upmap_items) { + auto rack_from = tmp.crush->get_parent_of_type(i.first, 3, rno); + auto rack_to = tmp.crush->get_parent_of_type(i.second, 3, rno); + rack_parents.insert(rack_from); + rack_parents.insert(rack_to); + } + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { + auto dc_p = tmp.crush->get_parent_of_type(i, 8 /* dc */, rno); + auto rack_p = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + if (dc_p == dc_parent && + rack_parents.find(rack_p) == rack_parents.end()) { + to = i; + break; + } + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + std::cout << "from " << from << " to " << to << std::endl; + new_pg_upmap_items.push_back(make_pair(from, to)); + } + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap_items[rep_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + } + { + // *maybe_remove_pg_upmaps* should remove the above upmap_item + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmp, pending_inc); + tmp.apply_incremental(pending_inc); + ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid)); + ASSERT_FALSE(tmp.have_pg_upmaps(rep_pgid2)); + } + } +} + +TEST(PGTempMap, basic) +{ + PGTempMap m; + pg_t a(1,1); + for (auto i=3; i<1000; ++i) { + pg_t x(i, 1); + m.set(x, {static_cast<int>(i)}); + } + pg_t b(2,1); + m.set(a, {1, 2}); + ASSERT_NE(m.find(a), m.end()); + ASSERT_EQ(m.find(a), m.begin()); + ASSERT_EQ(m.find(b), m.end()); + ASSERT_EQ(998u, m.size()); +} + +TEST_F(OSDMapTest, BUG_43124) { + set_up_map(200); + { + // https://tracker.ceph.com/issues/43124 + + // build a temporary crush topology of 5racks, + // 4 hosts per rack, 10osds per host + OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. + tmp.deepish_copy_from(osdmap); + const int expected_host_num = 20; + int osd_per_host = (int)get_num_osds() / expected_host_num; + ASSERT_GE(osd_per_host, 10); + int host_per_rack = 4; + int index = 0; + int rack_index = 0; + for (int i = 0; i < (int)get_num_osds(); i++) { + if (i && i % osd_per_host == 0) { + ++index; + } + if (i && i % (host_per_rack * osd_per_host) == 0) { + ++rack_index; + } + stringstream osd_name; + stringstream host_name; + stringstream rack_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host-" << index; + rack_name << "rack-" << rack_index; + move_to.push_back("root=default"); + string rack_loc = "rack=" + rack_name.str(); + move_to.push_back(rack_loc); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + auto r = crush_move(tmp, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + + // build crush rule + CrushWrapper crush; + get_crush(tmp, crush); + string rule_name = "rule_angel_1944"; + int rule_type = pg_pool_t::TYPE_ERASURE; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno) && !crush.ruleset_exists(rno)) + break; + } + int min_size = 1; + int max_size = 20; + int steps = 6; + string root_name = "default"; + int root = crush.get_item_id(root_name); + crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSE_FIRSTN, 4, 3 /* rack */); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_INDEP, 3, 1 /* host */); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmp.apply_incremental(pending_inc); + } + { + stringstream oss; + crush.dump_tree(&oss, NULL); + std::cout << oss.str() << std::endl; + Formatter *f = Formatter::create("json-pretty"); + f->open_object_section("crush_rules"); + crush.dump_rules(f); + f->close_section(); + f->flush(cout); + delete f; + } + // create a erasuce-coded pool referencing the above rule + int64_t pool_angel_1944; + { + OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); + new_pool_inc.new_pool_max = tmp.get_pool_max(); + new_pool_inc.fsid = tmp.get_fsid(); + pg_pool_t empty; + pool_angel_1944 = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(pool_angel_1944, &empty); + p->size = 12; + p->set_pg_num(4096); + p->set_pgp_num(4096); + p->type = pg_pool_t::TYPE_ERASURE; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_angel_1944] = "pool_angel_1944"; + tmp.apply_incremental(new_pool_inc); + } + + pg_t rep_pg(0, pool_angel_1944); + pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); + { + // insert a pg_upmap_item + int from = -1; + int to = -1; + vector<int> rep_up; + int rep_up_primary; + tmp.pg_to_raw_up(rep_pgid, &rep_up, &rep_up_primary); + std::cout << "pgid " << rep_pgid << " up " << rep_up << std::endl; + ASSERT_TRUE(rep_up.size() == 12); + from = *(rep_up.begin()); + ASSERT_TRUE(from >= 0); + auto from_rack = tmp.crush->get_parent_of_type(from, 3 /* rack */, rno); + set<int> failure_domains; + for (auto &osd : rep_up) { + failure_domains.insert(tmp.crush->get_parent_of_type(osd, 1 /* host */, rno)); + } + for (int i = 0; i < (int)get_num_osds(); i++) { + if (std::find(rep_up.begin(), rep_up.end(), i) == rep_up.end()) { + auto to_rack = tmp.crush->get_parent_of_type(i, 3 /* rack */, rno); + auto to_host = tmp.crush->get_parent_of_type(i, 1 /* host */, rno); + if (to_rack != from_rack && failure_domains.count(to_host) == 0) { + to = i; + break; + } + } + } + ASSERT_TRUE(to >= 0); + ASSERT_TRUE(from != to); + std::cout << "from " << from << " to " << to << std::endl; + vector<pair<int32_t,int32_t>> new_pg_upmap_items; + new_pg_upmap_items.push_back(make_pair(from, to)); + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap_items[rep_pgid] = + mempool::osdmap::vector<pair<int32_t,int32_t>>( + new_pg_upmap_items.begin(), new_pg_upmap_items.end()); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + } + { + // *maybe_remove_pg_upmaps* should not remove the above upmap_item + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmp, pending_inc); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + } + } +} + +TEST_F(OSDMapTest, BUG_48884) +{ + + set_up_map(12); + + unsigned int host_index = 1; + for (unsigned int x=0; x < get_num_osds();) { + // Create three hosts with four osds each + for (unsigned int y=0; y < 4; y++) { + stringstream osd_name; + stringstream host_name; + vector<string> move_to; + osd_name << "osd." << x; + host_name << "host-" << host_index; + move_to.push_back("root=default"); + move_to.push_back("rack=localrack"); + string host_loc = "host=" + host_name.str(); + move_to.push_back(host_loc); + int r = crush_move(osdmap, osd_name.str(), move_to); + ASSERT_EQ(0, r); + x++; + } + host_index++; + } + + CrushWrapper crush; + get_crush(osdmap, crush); + auto host_id = crush.get_item_id("localhost"); + crush.remove_item(g_ceph_context, host_id, false); + OSDMap::Incremental pending_inc(osdmap.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + osdmap.apply_incremental(pending_inc); + + PGMap pgmap; + osd_stat_t stats, stats_null; + stats.statfs.total = 500000; + stats.statfs.available = 50000; + stats.statfs.omap_allocated = 50000; + stats.statfs.internal_metadata = 50000; + stats_null.statfs.total = 0; + stats_null.statfs.available = 0; + stats_null.statfs.omap_allocated = 0; + stats_null.statfs.internal_metadata = 0; + for (unsigned int x=0; x < get_num_osds(); x++) { + if (x > 3 && x < 8) { + pgmap.osd_stat.insert({x,stats_null}); + } else { + pgmap.osd_stat.insert({x,stats}); + } + } + + stringstream ss; + boost::scoped_ptr<Formatter> f(Formatter::create("json-pretty")); + print_osd_utilization(osdmap, pgmap, ss, f.get(), true, "root"); + JSONParser parser; + parser.parse(ss.str().c_str(), static_cast<int>(ss.str().size())); + auto iter = parser.find_first(); + for (const auto& bucket : (*iter)->get_array_elements()) { + JSONParser parser2; + parser2.parse(bucket.c_str(), static_cast<int>(bucket.size())); + auto* obj = parser2.find_obj("name"); + if (obj->get_data_val().str.compare("localrack") == 0) { + obj = parser2.find_obj("kb"); + ASSERT_EQ(obj->get_data_val().str, "3904"); + obj = parser2.find_obj("kb_used"); + ASSERT_EQ(obj->get_data_val().str, "3512"); + obj = parser2.find_obj("kb_used_omap"); + ASSERT_EQ(obj->get_data_val().str, "384"); + obj = parser2.find_obj("kb_used_meta"); + ASSERT_EQ(obj->get_data_val().str, "384"); + obj = parser2.find_obj("kb_avail"); + ASSERT_EQ(obj->get_data_val().str, "384"); + } + } +} + +TEST_P(OSDMapTest, BUG_51842) { + set_up_map(3, true); + OSDMap tmp; // use a tmpmap here, so we do not dirty origin map.. + tmp.deepish_copy_from(osdmap); + for (int i = 0; i < (int)get_num_osds(); i++) { + stringstream osd_name; + stringstream host_name; + vector<string> move_to; + osd_name << "osd." << i; + host_name << "host=host-" << i; + move_to.push_back("root=infra-1706"); + move_to.push_back(host_name.str()); + auto r = crush_move(tmp, osd_name.str(), move_to); + ASSERT_EQ(0, r); + } + + // build crush rule + CrushWrapper crush; + get_crush(tmp, crush); + string rule_name = "infra-1706"; + int rule_type = pg_pool_t::TYPE_REPLICATED; + ASSERT_TRUE(!crush.rule_exists(rule_name)); + int rno; + for (rno = 0; rno < crush.get_max_rules(); rno++) { + if (!crush.rule_exists(rno)) + break; + } + string root_bucket = "infra-1706"; + int root = crush.get_item_id(root_bucket); + int min_size = 1; + int max_size = 20; + int steps = 5; + crush_rule *rule = crush_make_rule(steps, rno, rule_type, min_size, max_size); + int step = 0; + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 5, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_SET_CHOOSE_TRIES, 100, 0); + crush_rule_set_step(rule, step++, CRUSH_RULE_TAKE, root, 0); + // note: it's ok to set like 'step chooseleaf_firstn 0 host' + std::pair<int, int> param = GetParam(); + int rep_num = std::get<0>(param); + int domain = std::get<1>(param); + crush_rule_set_step(rule, step++, CRUSH_RULE_CHOOSELEAF_FIRSTN, rep_num, domain); + crush_rule_set_step(rule, step++, CRUSH_RULE_EMIT, 0, 0); + ASSERT_TRUE(step == steps); + auto r = crush_add_rule(crush.get_crush_map(), rule, rno); + ASSERT_TRUE(r >= 0); + crush.set_rule_name(rno, rule_name); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.crush.clear(); + crush.encode(pending_inc.crush, CEPH_FEATURES_SUPPORTED_DEFAULT); + tmp.apply_incremental(pending_inc); + } + { + stringstream oss; + crush.dump_tree(&oss, NULL); + std::cout << oss.str() << std::endl; + Formatter *f = Formatter::create("json-pretty"); + f->open_object_section("crush_rules"); + crush.dump_rules(f); + f->close_section(); + f->flush(cout); + delete f; + } + // create a replicated pool referencing the above rule + int64_t pool_infra_1706; + { + OSDMap::Incremental new_pool_inc(tmp.get_epoch() + 1); + new_pool_inc.new_pool_max = tmp.get_pool_max(); + new_pool_inc.fsid = tmp.get_fsid(); + pg_pool_t empty; + pool_infra_1706 = ++new_pool_inc.new_pool_max; + pg_pool_t *p = new_pool_inc.get_new_pool(pool_infra_1706, &empty); + p->size = 3; + p->min_size = 1; + p->set_pg_num(256); + p->set_pgp_num(256); + p->type = pg_pool_t::TYPE_REPLICATED; + p->crush_rule = rno; + p->set_flag(pg_pool_t::FLAG_HASHPSPOOL); + new_pool_inc.new_pool_names[pool_infra_1706] = "pool_infra_1706"; + tmp.apply_incremental(new_pool_inc); + } + + // add upmaps + pg_t rep_pg(3, pool_infra_1706); + pg_t rep_pgid = tmp.raw_pg_to_pg(rep_pg); + pg_t rep_pg2(4, pool_infra_1706); + pg_t rep_pgid2 = tmp.raw_pg_to_pg(rep_pg2); + pg_t rep_pg3(6, pool_infra_1706); + pg_t rep_pgid3 = tmp.raw_pg_to_pg(rep_pg3); + { + OSDMap::Incremental pending_inc(tmp.get_epoch() + 1); + pending_inc.new_pg_upmap[rep_pgid] = mempool::osdmap::vector<int32_t>({1,0,2}); + pending_inc.new_pg_upmap[rep_pgid2] = mempool::osdmap::vector<int32_t>({1,2,0}); + pending_inc.new_pg_upmap[rep_pgid3] = mempool::osdmap::vector<int32_t>({1,2,0}); + tmp.apply_incremental(pending_inc); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid)); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid2)); + ASSERT_TRUE(tmp.have_pg_upmaps(rep_pgid3)); + } + + { + // now, set pool size to 1 + OSDMap tmpmap; + tmpmap.deepish_copy_from(tmp); + OSDMap::Incremental new_pool_inc(tmpmap.get_epoch() + 1); + pg_pool_t p = *tmpmap.get_pg_pool(pool_infra_1706); + p.size = 1; + p.last_change = new_pool_inc.epoch; + new_pool_inc.new_pools[pool_infra_1706] = p; + tmpmap.apply_incremental(new_pool_inc); + + OSDMap::Incremental new_pending_inc(tmpmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmpmap, new_pending_inc); + tmpmap.apply_incremental(new_pending_inc); + // check pg upmaps + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid)); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid2)); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid3)); + } + { + // now, set pool size to 4 + OSDMap tmpmap; + tmpmap.deepish_copy_from(tmp); + OSDMap::Incremental new_pool_inc(tmpmap.get_epoch() + 1); + pg_pool_t p = *tmpmap.get_pg_pool(pool_infra_1706); + p.size = 4; + p.last_change = new_pool_inc.epoch; + new_pool_inc.new_pools[pool_infra_1706] = p; + tmpmap.apply_incremental(new_pool_inc); + + OSDMap::Incremental new_pending_inc(tmpmap.get_epoch() + 1); + clean_pg_upmaps(g_ceph_context, tmpmap, new_pending_inc); + tmpmap.apply_incremental(new_pending_inc); + // check pg upmaps + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid)); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid2)); + ASSERT_TRUE(!tmpmap.have_pg_upmaps(rep_pgid3)); + } +} + +const string OSDMapTest::range_addrs[] = {"198.51.100.0/22", "10.2.5.102/32", "2001:db8::/48", + "3001:db8::/72", "4001:db8::/30", "5001:db8::/64", "6001:db8::/128", "7001:db8::/127"}; +const string OSDMapTest::ip_addrs[] = {"198.51.100.14", "198.51.100.0", "198.51.103.255", + "10.2.5.102", + "2001:db8:0:0:0:0:0:0", "2001:db8:0:0:0:0001:ffff:ffff", + "2001:db8:0:ffff:ffff:ffff:ffff:ffff", + "3001:db8:0:0:0:0:0:0", "3001:db8:0:0:0:0001:ffff:ffff", + "3001:db8:0:0:00ff:ffff:ffff:ffff", + "4001:db8::", "4001:db8:0:0:0:0001:ffff:ffff", + "4001:dbb:ffff:ffff:ffff:ffff:ffff:ffff", + "5001:db8:0:0:0:0:0:0", "5001:db8:0:0:0:0:ffff:ffff", + "5001:db8:0:0:ffff:ffff:ffff:ffff", + "6001:db8:0:0:0:0:0:0", + "7001:db8:0:0:0:0:0:0", "7001:db8:0:0:0:0:0:0001" +}; +const string OSDMapTest::unblocked_ip_addrs[] = { "0.0.0.0", "1.1.1.1", "192.168.1.1", + "198.51.99.255", "198.51.104.0", + "10.2.5.101", "10.2.5.103", + "2001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "2001:db8:0001::", + "3001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "3001:db8:0:0:0100::", + "4001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "4001:dbc::", + "5001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "5001:db8:0:0001:0:0:0:0", + "6001:db8:0:0:0:0:0:0001", + "7001:db7:ffff:ffff:ffff:ffff:ffff:ffff", "7001:db8:0:0:0:0:0:0002" +}; + +TEST_F(OSDMapTest, blocklisting_ips) { + set_up_map(6); //whatever + + OSDMap::Incremental new_blocklist_inc(osdmap.get_epoch() + 1); + for (const auto& a : ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + new_blocklist_inc.new_blocklist[addr] = ceph_clock_now(); + } + osdmap.apply_incremental(new_blocklist_inc); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + ASSERT_TRUE(osdmap.is_blocklisted(addr, g_ceph_context)); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); + } + + OSDMap::Incremental rm_blocklist_inc(osdmap.get_epoch() + 1); + for (const auto& a : ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + rm_blocklist_inc.old_blocklist.push_back(addr); + } + osdmap.apply_incremental(rm_blocklist_inc); + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (blocklisted) { + cout << "erroneously blocklisted " << addr << std::endl; + } + EXPECT_FALSE(blocklisted); + } +} + +TEST_F(OSDMapTest, blocklisting_ranges) { + set_up_map(6); //whatever + OSDMap::Incremental range_blocklist_inc(osdmap.get_epoch() + 1); + for (const auto& a : range_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.type = entity_addr_t::TYPE_CIDR; + range_blocklist_inc.new_range_blocklist[addr] = ceph_clock_now(); + } + osdmap.apply_incremental(range_blocklist_inc); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (blocklisted) { + cout << "erroneously blocklisted " << addr << std::endl; + } + EXPECT_FALSE(blocklisted); + } + + OSDMap::Incremental rm_range_blocklist(osdmap.get_epoch() + 1); + for (const auto& a : range_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.type = entity_addr_t::TYPE_CIDR; + rm_range_blocklist.old_range_blocklist.push_back(addr); + } + osdmap.apply_incremental(rm_range_blocklist); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + ASSERT_FALSE(osdmap.is_blocklisted(addr, g_ceph_context)); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (blocklisted) { + cout << "erroneously blocklisted " << addr << std::endl; + } + EXPECT_FALSE(blocklisted); + } +} + +TEST_F(OSDMapTest, blocklisting_everything) { + set_up_map(6); //whatever + OSDMap::Incremental range_blocklist_inc(osdmap.get_epoch() + 1); + entity_addr_t baddr; + baddr.parse("2001:db8::/0"); + baddr.type = entity_addr_t::TYPE_CIDR; + range_blocklist_inc.new_range_blocklist[baddr] = ceph_clock_now(); + osdmap.apply_incremental(range_blocklist_inc); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + if (addr.is_ipv4()) continue; + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + if (addr.is_ipv4()) continue; + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } + + OSDMap::Incremental swap_blocklist_inc(osdmap.get_epoch()+1); + swap_blocklist_inc.old_range_blocklist.push_back(baddr); + + entity_addr_t caddr; + caddr.parse("1.1.1.1/0"); + caddr.type = entity_addr_t::TYPE_CIDR; + swap_blocklist_inc.new_range_blocklist[caddr] = ceph_clock_now(); + osdmap.apply_incremental(swap_blocklist_inc); + + for (const auto& a: ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + if (!addr.is_ipv4()) continue; + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } + for (const auto& a: unblocked_ip_addrs) { + entity_addr_t addr; + addr.parse(a); + addr.set_type(entity_addr_t::TYPE_LEGACY); + if (!addr.is_ipv4()) continue; + bool blocklisted = osdmap.is_blocklisted(addr, g_ceph_context); + if (!blocklisted) { + cout << "erroneously not blocklisted " << addr << std::endl; + } + ASSERT_TRUE(blocklisted); + } +} + +INSTANTIATE_TEST_CASE_P( + OSDMap, + OSDMapTest, + ::testing::Values( + std::make_pair<int, int>(0, 1), // chooseleaf firstn 0 host + std::make_pair<int, int>(3, 1), // chooseleaf firstn 3 host + std::make_pair<int, int>(0, 0), // chooseleaf firstn 0 osd + std::make_pair<int, int>(3, 0) // chooseleaf firstn 3 osd + ) +); diff --git a/src/test/osd/TestOSDScrub.cc b/src/test/osd/TestOSDScrub.cc new file mode 100644 index 000000000..45d79a183 --- /dev/null +++ b/src/test/osd/TestOSDScrub.cc @@ -0,0 +1,200 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library Public License for more details. + * + */ + +#include <stdio.h> +#include <signal.h> +#include <gtest/gtest.h> +#include "common/async/context_pool.h" +#include "osd/OSD.h" +#include "os/ObjectStore.h" +#include "mon/MonClient.h" +#include "common/ceph_argparse.h" +#include "msg/Messenger.h" + +class TestOSDScrub: public OSD { + +public: + TestOSDScrub(CephContext *cct_, + ObjectStore *store_, + int id, + Messenger *internal, + Messenger *external, + Messenger *hb_front_client, + Messenger *hb_back_client, + Messenger *hb_front_server, + Messenger *hb_back_server, + Messenger *osdc_messenger, + MonClient *mc, const std::string &dev, const std::string &jdev, + ceph::async::io_context_pool& ictx) : + OSD(cct_, store_, id, internal, external, hb_front_client, hb_back_client, hb_front_server, hb_back_server, osdc_messenger, mc, dev, jdev, ictx) + { + } + + bool scrub_time_permit(utime_t now) { + return OSD::scrub_time_permit(now); + } +}; + +TEST(TestOSDScrub, scrub_time_permit) { + ceph::async::io_context_pool icp(1); + ObjectStore *store = ObjectStore::create(g_ceph_context, + g_conf()->osd_objectstore, + g_conf()->osd_data, + g_conf()->osd_journal); + std::string cluster_msgr_type = g_conf()->ms_cluster_type.empty() ? g_conf().get_val<std::string>("ms_type") : g_conf()->ms_cluster_type; + Messenger *ms = Messenger::create(g_ceph_context, cluster_msgr_type, + entity_name_t::OSD(0), "make_checker", + getpid()); + ms->set_cluster_protocol(CEPH_OSD_PROTOCOL); + ms->set_default_policy(Messenger::Policy::stateless_server(0)); + ms->bind(g_conf()->public_addr); + MonClient mc(g_ceph_context, icp); + mc.build_initial_monmap(); + TestOSDScrub* osd = new TestOSDScrub(g_ceph_context, store, 0, ms, ms, ms, ms, ms, ms, ms, &mc, "", "", icp); + + // These are now invalid + int err = g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "24"); + ASSERT_TRUE(err < 0); + //GTEST_LOG_(INFO) << " osd_scrub_begin_hour = " << g_ceph_context->_conf.get_val<int64_t>("osd_scrub_begin_hour"); + + err = g_ceph_context->_conf.set_val("osd_scrub_end_hour", "24"); + ASSERT_TRUE(err < 0); + //GTEST_LOG_(INFO) << " osd_scrub_end_hour = " << g_ceph_context->_conf.get_val<int64_t>("osd_scrub_end_hour"); + + err = g_ceph_context->_conf.set_val("osd_scrub_begin_week_day", "7"); + ASSERT_TRUE(err < 0); + //GTEST_LOG_(INFO) << " osd_scrub_begin_week_day = " << g_ceph_context->_conf.get_val<int64_t>("osd_scrub_begin_week_day"); + + err = g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "7"); + ASSERT_TRUE(err < 0); + //GTEST_LOG_(INFO) << " osd_scrub_end_week_day = " << g_ceph_context->_conf.get_val<int64_t>("osd_scrub_end_week_day"); + + // Test all day + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "0"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "0"); + g_ceph_context->_conf.apply_changes(nullptr); + tm tm; + tm.tm_isdst = -1; + strptime("2015-01-16 12:05:13", "%Y-%m-%d %H:%M:%S", &tm); + utime_t now = utime_t(mktime(&tm), 0); + bool ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 01:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 20:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "20"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 08:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 20:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 00:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); + + g_ceph_context->_conf.set_val("osd_scrub_begin_hour", "01"); + g_ceph_context->_conf.set_val("osd_scrub_end_hour", "07"); + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + // Sun = 0, Mon = 1, Tue = 2, Wed = 3, Thu = 4m, Fri = 5, Sat = 6 + // Jan 16, 2015 is a Friday (5) + // every day + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "0"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "0"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + // test Sun - Thu + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "0"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "5"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); + + // test Fri - Sat + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "5"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "0"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-16 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + // Jan 14, 2015 is a Wednesday (3) + // test Tue - Fri + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "2"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "6"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-14 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_TRUE(ret); + + // Test Sat - Sun + g_ceph_context->_conf.set_val("osd_scrub_begin_week day", "6"); // inclusive + g_ceph_context->_conf.set_val("osd_scrub_end_week_day", "1"); // not inclusive + g_ceph_context->_conf.apply_changes(nullptr); + strptime("2015-01-14 04:05:13", "%Y-%m-%d %H:%M:%S", &tm); + now = utime_t(mktime(&tm), 0); + ret = osd->scrub_time_permit(now); + ASSERT_FALSE(ret); +} + +// Local Variables: +// compile-command: "cd ../.. ; make unittest_osdscrub ; ./unittest_osdscrub --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* " +// End: diff --git a/src/test/osd/TestOpStat.cc b/src/test/osd/TestOpStat.cc new file mode 100644 index 000000000..48b87b885 --- /dev/null +++ b/src/test/osd/TestOpStat.cc @@ -0,0 +1,58 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "include/interval_set.h" +#include "include/buffer.h" +#include <list> +#include <map> +#include <set> +#include "RadosModel.h" +#include "TestOpStat.h" + +void TestOpStat::begin(TestOp *in) { + std::lock_guard l{stat_lock}; + stats[in->getType()].begin(in); +} + +void TestOpStat::end(TestOp *in) { + std::lock_guard l{stat_lock}; + stats[in->getType()].end(in); +} + +void TestOpStat::TypeStatus::export_latencies(map<double,uint64_t> &in) const +{ + map<double,uint64_t>::iterator i = in.begin(); + multiset<uint64_t>::iterator j = latencies.begin(); + int count = 0; + while (j != latencies.end() && i != in.end()) { + count++; + if ((((double)count)/((double)latencies.size())) * 100 >= i->first) { + i->second = *j; + ++i; + } + ++j; + } +} + +std::ostream & operator<<(std::ostream &out, const TestOpStat &rhs) +{ + std::lock_guard l{rhs.stat_lock}; + for (auto i = rhs.stats.begin(); + i != rhs.stats.end(); + ++i) { + map<double,uint64_t> latency; + latency[10] = 0; + latency[50] = 0; + latency[90] = 0; + latency[99] = 0; + i->second.export_latencies(latency); + + out << i->first << " latency: " << std::endl; + for (map<double,uint64_t>::iterator j = latency.begin(); + j != latency.end(); + ++j) { + if (j->second == 0) break; + out << "\t" << j->first << "th percentile: " + << j->second / 1000 << "ms" << std::endl; + } + } + return out; +} diff --git a/src/test/osd/TestOpStat.h b/src/test/osd/TestOpStat.h new file mode 100644 index 000000000..a279287fd --- /dev/null +++ b/src/test/osd/TestOpStat.h @@ -0,0 +1,53 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +#include "common/ceph_mutex.h" +#include "common/Cond.h" +#include "include/rados/librados.hpp" + +#ifndef TESTOPSTAT_H +#define TESTOPSTAT_H + +class TestOp; + +class TestOpStat { +public: + mutable ceph::mutex stat_lock = ceph::make_mutex("TestOpStat lock"); + + TestOpStat() = default; + + static uint64_t gettime() + { + timeval t; + gettimeofday(&t,0); + return (1000000*t.tv_sec) + t.tv_usec; + } + + class TypeStatus { + public: + map<TestOp*,uint64_t> inflight; + multiset<uint64_t> latencies; + void begin(TestOp *in) + { + ceph_assert(!inflight.count(in)); + inflight[in] = gettime(); + } + + void end(TestOp *in) + { + ceph_assert(inflight.count(in)); + uint64_t curtime = gettime(); + latencies.insert(curtime - inflight[in]); + inflight.erase(in); + } + + void export_latencies(map<double,uint64_t> &in) const; + }; + map<string,TypeStatus> stats; + + void begin(TestOp *in); + void end(TestOp *in); + friend std::ostream & operator<<(std::ostream &, const TestOpStat &); +}; + +std::ostream & operator<<(std::ostream &out, const TestOpStat &rhs); + +#endif diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc new file mode 100644 index 000000000..ffc14b738 --- /dev/null +++ b/src/test/osd/TestPGLog.cc @@ -0,0 +1,3248 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library Public License for more details. + * + */ + +#include <stdio.h> +#include <signal.h> +#include "gtest/gtest.h" +#include "osd/PGLog.h" +#include "osd/OSDMap.h" +#include "include/coredumpctl.h" +#include "../objectstore/store_test_fixture.h" + + +struct PGLogTestBase { + static hobject_t mk_obj(unsigned id) { + hobject_t hoid; + stringstream ss; + ss << "obj_" << id; + hoid.oid = ss.str(); + hoid.set_hash(id); + hoid.pool = 1; + return hoid; + } + static eversion_t mk_evt(unsigned ep, unsigned v) { + return eversion_t(ep, v); + } + static pg_log_entry_t mk_ple_mod( + const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.op = pg_log_entry_t::MODIFY; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_dt( + const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.op = pg_log_entry_t::DELETE; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_ldt( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.op = pg_log_entry_t::LOST_DELETE; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + return e; + } + static pg_log_entry_t mk_ple_mod_rb( + const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) { + pg_log_entry_t e; + e.op = pg_log_entry_t::MODIFY; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_dt_rb( + const hobject_t &hoid, eversion_t v, eversion_t pv, osd_reqid_t reqid) { + pg_log_entry_t e; + e.op = pg_log_entry_t::DELETE; + e.soid = hoid; + e.version = v; + e.prior_version = pv; + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_err( + const hobject_t &hoid, eversion_t v, osd_reqid_t reqid) { + pg_log_entry_t e; + e.op = pg_log_entry_t::ERROR; + e.soid = hoid; + e.version = v; + e.prior_version = eversion_t(0, 0); + e.reqid = reqid; + return e; + } + static pg_log_entry_t mk_ple_mod( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + return mk_ple_mod(hoid, v, pv, osd_reqid_t()); + } + static pg_log_entry_t mk_ple_dt( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + return mk_ple_dt(hoid, v, pv, osd_reqid_t()); + } + static pg_log_entry_t mk_ple_mod_rb( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + return mk_ple_mod_rb(hoid, v, pv, osd_reqid_t()); + } + static pg_log_entry_t mk_ple_dt_rb( + const hobject_t &hoid, eversion_t v, eversion_t pv) { + return mk_ple_dt_rb(hoid, v, pv, osd_reqid_t()); + } + static pg_log_entry_t mk_ple_err( + const hobject_t &hoid, eversion_t v) { + return mk_ple_err(hoid, v, osd_reqid_t()); + } +}; // PGLogTestBase + + +class PGLogTest : virtual public ::testing::Test, protected PGLog, public PGLogTestBase { +public: + PGLogTest() : PGLog(g_ceph_context) {} + void SetUp() override { + missing.may_include_deletes = true; + } + +#include "common/ceph_context.h" +#include "common/config.h" + + void TearDown() override { + clear(); + } + + + struct TestCase { + list<pg_log_entry_t> base; + list<pg_log_entry_t> auth; + list<pg_log_entry_t> div; + + pg_missing_t init; + pg_missing_t final; + + set<hobject_t> toremove; + list<pg_log_entry_t> torollback; + bool deletes_during_peering; + + private: + IndexedLog fullauth; + IndexedLog fulldiv; + pg_info_t authinfo; + pg_info_t divinfo; + public: + TestCase() : deletes_during_peering(false) {} + void setup() { + init.may_include_deletes = !deletes_during_peering; + final.may_include_deletes = !deletes_during_peering; + fullauth.log.insert(fullauth.log.end(), base.begin(), base.end()); + fullauth.log.insert(fullauth.log.end(), auth.begin(), auth.end()); + fulldiv.log.insert(fulldiv.log.end(), base.begin(), base.end()); + fulldiv.log.insert(fulldiv.log.end(), div.begin(), div.end()); + + fullauth.head = authinfo.last_update = fullauth.log.rbegin()->version; + authinfo.last_complete = fullauth.log.rbegin()->version; + authinfo.log_tail = fullauth.log.begin()->version; + authinfo.log_tail.version--; + fullauth.tail = authinfo.log_tail; + authinfo.last_backfill = hobject_t::get_max(); + + fulldiv.head = divinfo.last_update = fulldiv.log.rbegin()->version; + divinfo.last_complete = eversion_t(); + divinfo.log_tail = fulldiv.log.begin()->version; + divinfo.log_tail.version--; + fulldiv.tail = divinfo.log_tail; + divinfo.last_backfill = hobject_t::get_max(); + + if (init.get_items().empty()) { + divinfo.last_complete = divinfo.last_update; + } else { + eversion_t fmissing = init.get_items().at(init.get_rmissing().begin()->second).need; + for (list<pg_log_entry_t>::const_iterator i = fulldiv.log.begin(); + i != fulldiv.log.end(); + ++i) { + if (i->version < fmissing) + divinfo.last_complete = i->version; + else + break; + } + } + + fullauth.index(); + fulldiv.index(); + } + void set_div_bounds(eversion_t head, eversion_t tail) { + fulldiv.tail = divinfo.log_tail = tail; + fulldiv.head = divinfo.last_update = head; + } + void set_auth_bounds(eversion_t head, eversion_t tail) { + fullauth.tail = authinfo.log_tail = tail; + fullauth.head = authinfo.last_update = head; + } + const IndexedLog &get_fullauth() const { return fullauth; } + const IndexedLog &get_fulldiv() const { return fulldiv; } + const pg_info_t &get_authinfo() const { return authinfo; } + const pg_info_t &get_divinfo() const { return divinfo; } + }; // struct TestCase + + struct LogHandler : public PGLog::LogEntryHandler { + set<hobject_t> removed; + list<pg_log_entry_t> rolledback; + + void rollback( + const pg_log_entry_t &entry) override { + rolledback.push_back(entry); + } + void rollforward( + const pg_log_entry_t &entry) override {} + void remove( + const hobject_t &hoid) override { + removed.insert(hoid); + } + void try_stash(const hobject_t &, version_t) override { + // lost/unfound cases are not tested yet + } + void trim( + const pg_log_entry_t &entry) override {} + }; + + template <typename missing_t> + void verify_missing( + const TestCase &tcase, + const missing_t &missing) { + ASSERT_EQ(tcase.final.get_items().size(), missing.get_items().size()); + for (auto i = missing.get_items().begin(); + i != missing.get_items().end(); + ++i) { + EXPECT_TRUE(tcase.final.get_items().count(i->first)); + EXPECT_EQ(tcase.final.get_items().find(i->first)->second.need, i->second.need); + EXPECT_EQ(tcase.final.get_items().find(i->first)->second.have, i->second.have); + } + bool correct = missing.debug_verify_from_init(tcase.init, &(std::cout)); + ASSERT_TRUE(correct); + } + + void verify_sideeffects( + const TestCase &tcase, + const LogHandler &handler) { + ASSERT_EQ(tcase.toremove.size(), handler.removed.size()); + ASSERT_EQ(tcase.torollback.size(), handler.rolledback.size()); + + { + list<pg_log_entry_t>::const_iterator titer = tcase.torollback.begin(); + list<pg_log_entry_t>::const_iterator hiter = handler.rolledback.begin(); + for (; titer != tcase.torollback.end(); ++titer, ++hiter) { + EXPECT_EQ(titer->version, hiter->version); + } + } + + { + set<hobject_t>::const_iterator titer = tcase.toremove.begin(); + set<hobject_t>::const_iterator hiter = handler.removed.begin(); + for (; titer != tcase.toremove.end(); ++titer, ++hiter) { + EXPECT_EQ(*titer, *hiter); + } + } + } + + void test_merge_log(const TestCase &tcase) { + clear(); + log = tcase.get_fulldiv(); + pg_info_t info = tcase.get_divinfo(); + + missing = tcase.init; + missing.flush(); + + IndexedLog olog; + olog = tcase.get_fullauth(); + pg_info_t oinfo = tcase.get_authinfo(); + + LogHandler h; + bool dirty_info = false; + bool dirty_big_info = false; + merge_log( + oinfo, std::move(olog), pg_shard_t(1, shard_id_t(0)), info, + &h, dirty_info, dirty_big_info); + + ASSERT_EQ(info.last_update, oinfo.last_update); + verify_missing(tcase, missing); + verify_sideeffects(tcase, h); + } + + void test_proc_replica_log(const TestCase &tcase) { + clear(); + log = tcase.get_fullauth(); + pg_info_t info = tcase.get_authinfo(); + + pg_missing_t omissing = tcase.init; + + IndexedLog olog; + olog = tcase.get_fulldiv(); + pg_info_t oinfo = tcase.get_divinfo(); + + proc_replica_log( + oinfo, olog, omissing, pg_shard_t(1, shard_id_t(0))); + + ceph_assert(oinfo.last_update >= log.tail); + + if (!tcase.base.empty()) { + ASSERT_EQ(tcase.base.rbegin()->version, oinfo.last_update); + } + + for (list<pg_log_entry_t>::const_iterator i = tcase.auth.begin(); + i != tcase.auth.end(); + ++i) { + if (i->version > oinfo.last_update) { + if (i->is_delete() && tcase.deletes_during_peering) { + omissing.rm(i->soid, i->version); + } else { + omissing.add_next_event(*i); + } + } + } + verify_missing(tcase, omissing); + } // test_proc_replica_log + + void run_test_case(const TestCase &tcase) { + test_merge_log(tcase); + test_proc_replica_log(tcase); + } +}; // class PGLogTest + +struct TestHandler : public PGLog::LogEntryHandler { + list<hobject_t> &removed; + explicit TestHandler(list<hobject_t> &removed) : removed(removed) {} + + void rollback( + const pg_log_entry_t &entry) override {} + void rollforward( + const pg_log_entry_t &entry) override {} + void remove( + const hobject_t &hoid) override { + removed.push_back(hoid); + } + void cant_rollback(const pg_log_entry_t &entry) {} + void try_stash(const hobject_t &, version_t) override { + // lost/unfound cases are not tested yet + } + void trim( + const pg_log_entry_t &entry) override {} +}; + +TEST_F(PGLogTest, rewind_divergent_log) { + /* +----------------+ + | log | + +--------+-------+ + | |object | + |version | hash | + | | | + tail > (1,1) | x5 | + | | | + | | | + | (1,4) | x9 < newhead + | MODIFY | | + | | | + head > (1,5) | x9 | + | DELETE | | + | | | + +--------+-------+ + + */ + { + clear(); + + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t divergent_object; + eversion_t divergent_version; + eversion_t newhead; + + hobject_t divergent; + divergent.set_hash(0x9); + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = newhead = eversion_t(1, 4); + e.soid = divergent; + e.op = pg_log_entry_t::MODIFY; + log.log.push_back(e); + e.version = divergent_version = eversion_t(1, 5); + e.prior_version = eversion_t(1, 4); + e.soid = divergent; + divergent_object = e.soid; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + info.last_complete = log.head; + } + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(3U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_EQ(log.head, info.last_complete); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + rewind_divergent_log(newhead, info, &h, + dirty_info, dirty_big_info); + + EXPECT_TRUE(log.objects.count(divergent)); + EXPECT_TRUE(missing.is_missing(divergent_object)); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(2U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(newhead, info.last_update); + EXPECT_EQ(newhead, info.last_complete); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + /* +----------------+ + | log | + +--------+-------+ + | |object | + |version | hash | + | | | + tail > (1,1) | NULL | + | | | + | (1,4) | NULL < newhead + | | | + head > (1,5) | x9 | + | | | + +--------+-------+ + + */ + { + clear(); + + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t divergent_object; + eversion_t divergent_version; + eversion_t prior_version; + eversion_t newhead; + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + info.log_tail = log.tail = eversion_t(1, 1); + newhead = eversion_t(1, 3); + e.version = divergent_version = eversion_t(1, 5); + e.soid.set_hash(0x9); + divergent_object = e.soid; + e.op = pg_log_entry_t::DELETE; + e.prior_version = prior_version = eversion_t(0, 2); + log.log.push_back(e); + log.head = e.version; + } + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + rewind_divergent_log(newhead, info, &h, + dirty_info, dirty_big_info); + + EXPECT_TRUE(missing.is_missing(divergent_object)); + EXPECT_EQ(0U, log.objects.count(divergent_object)); + EXPECT_TRUE(log.empty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + // Test for 13965 + { + clear(); + + list<hobject_t> remove_snap; + pg_info_t info; + info.log_tail = log.tail = eversion_t(1, 5); + info.last_update = eversion_t(1, 6); + bool dirty_info = false; + bool dirty_big_info = false; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.version = eversion_t(1, 5); + e.soid.set_hash(0x9); + add(e); + } + { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.version = eversion_t(1, 6); + e.soid.set_hash(0x10); + add(e); + } + TestHandler h(remove_snap); + roll_forward_to(eversion_t(1, 6), &h); + rewind_divergent_log(eversion_t(1, 5), info, &h, + dirty_info, dirty_big_info); + pg_log_t log; + reset_backfill_claim_log(log, &h); + } +} + +TEST_F(PGLogTest, merge_old_entry) { + // entries > last_backfill are silently ignored + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.last_backfill = hobject_t(); + info.last_backfill.set_hash(100); + oe.soid.set_hash(2); + ASSERT_GT(oe.soid, info.last_backfill); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + } + + // the new entry (from the logs) has a version that is higher than + // the old entry (from the log entry given in argument) : do + // nothing and return false + { + clear(); + + ObjectStore::Transaction t; + pg_info_t info; + list<hobject_t> remove_snap; + + pg_log_entry_t ne; + ne.mark_unrollbackable(); + ne.version = eversion_t(2,1); + log.add(ne); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + EXPECT_EQ(ne.version, log.log.front().version); + + // the newer entry ( from the logs ) can be DELETE + { + log.log.front().op = pg_log_entry_t::DELETE; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + oe.version = eversion_t(1,1); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + } + + // if the newer entry is not DELETE, the object must be in missing + { + pg_log_entry_t &ne = log.log.front(); + ne.op = pg_log_entry_t::MODIFY; + missing.add_next_event(ne); + pg_log_entry_t oe; + oe.mark_unrollbackable(); + oe.version = eversion_t(1,1); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + missing.rm(ne.soid, ne.version); + } + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + EXPECT_EQ(ne.version, log.log.front().version); + + } + + // the new entry (from the logs) has a version that is lower than + // the old entry (from the log entry given in argument) and + // old and new are delete : do nothing and return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + pg_log_entry_t ne; + ne.mark_unrollbackable(); + ne.version = eversion_t(1,1); + ne.op = pg_log_entry_t::DELETE; + log.add(ne); + + oe.version = eversion_t(2,1); + oe.op = pg_log_entry_t::DELETE; + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + } + + // the new entry (from the logs) has a version that is lower than + // the old entry (from the log entry given in argument) and + // old is update and new is DELETE : + // if the object is in missing, it is removed + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + pg_log_entry_t ne; + ne.mark_unrollbackable(); + ne.version = eversion_t(1,1); + ne.op = pg_log_entry_t::DELETE; + log.add(ne); + + oe.version = eversion_t(2,1); + oe.op = pg_log_entry_t::MODIFY; + missing.add_next_event(oe); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(missing.is_missing(oe.soid)); + EXPECT_EQ(1U, log.log.size()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.size() > 0); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.log.size()); + } + + // there is no new entry (from the logs) and + // the old entry (from the log entry given in argument) is not a CLONE and + // the old entry prior_version is greater than the tail of the log : + // do nothing and return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.log_tail = eversion_t(1,1); + oe.op = pg_log_entry_t::MODIFY; + oe.prior_version = eversion_t(2,1); + missing_add(oe.soid, oe.prior_version, eversion_t()); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(log.empty()); + } + + // there is no new entry (from the logs) and + // the old entry (from the log entry given in argument) is not a CLONE and + // the old entry (from the log entry given in argument) is not a DELETE and + // the old entry prior_version is lower than the tail of the log : + // add the old object to the remove_snap list and + // add the old object to divergent priors and + // add or update the prior_version of the object to missing and + // return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.log_tail = eversion_t(2,1); + oe.soid.set_hash(1); + oe.op = pg_log_entry_t::MODIFY; + oe.prior_version = eversion_t(1,1); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + EXPECT_TRUE(is_dirty()); + EXPECT_EQ(oe.soid, remove_snap.front()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(missing.is_missing(oe.soid)); + EXPECT_TRUE(log.empty()); + } + + // there is no new entry (from the logs) and + // the old entry (from the log entry given in argument) is not a CLONE and + // the old entry (from the log entry given in argument) is a DELETE and + // the old entry prior_version is lower than the tail of the log : + // add the old object to divergent priors and + // add or update the prior_version of the object to missing and + // return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.log_tail = eversion_t(2,1); + oe.soid.set_hash(1); + oe.op = pg_log_entry_t::DELETE; + oe.prior_version = eversion_t(1,1); + + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(missing.is_missing(oe.soid)); + EXPECT_TRUE(log.empty()); + } + + + // there is no new entry (from the logs) and + // the old entry (from the log entry given in argument) is not a CLONE and + // the old entry (from the log entry given in argument) is not a DELETE and + // the old entry prior_version is eversion_t() : + // add the old object to the remove_snap list and + // remove the prior_version of the object from missing, if any and + // return false + { + clear(); + + ObjectStore::Transaction t; + pg_log_entry_t oe; + oe.mark_unrollbackable(); + pg_info_t info; + list<hobject_t> remove_snap; + + info.log_tail = eversion_t(10,1); + oe.soid.set_hash(1); + oe.op = pg_log_entry_t::MODIFY; + oe.prior_version = eversion_t(); + + missing.add(oe.soid, eversion_t(1,1), eversion_t(), false); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(t.empty()); + EXPECT_TRUE(missing.is_missing(oe.soid)); + EXPECT_TRUE(log.empty()); + + TestHandler h(remove_snap); + merge_old_entry(t, oe, info, &h); + + missing.flush(); + EXPECT_FALSE(is_dirty()); + EXPECT_EQ(oe.soid, remove_snap.front()); + EXPECT_TRUE(t.empty()); + EXPECT_FALSE(missing.have_missing()); + EXPECT_TRUE(log.empty()); + } + +} + +TEST_F(PGLogTest, merge_log) { + // head and tail match, last_backfill is set: + // noop + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, ""); + info.last_backfill = last_backfill; + eversion_t stat_version(10, 1); + info.stats.version = stat_version; + log.tail = olog.tail = eversion_t(1, 1); + log.head = olog.head = eversion_t(2, 1); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(0U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(last_backfill, info.last_backfill); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(0U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + } + + // head and tail match, last_backfill is not set: info.stats is + // copied from oinfo.stats but info.stats.reported_* is guaranteed to + // never be replaced by a lower version + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + eversion_t stat_version(10, 1); + oinfo.stats.version = stat_version; + info.stats.reported_seq = 1; + info.stats.reported_epoch = 10; + oinfo.stats.reported_seq = 1; + oinfo.stats.reported_epoch = 1; + log.tail = olog.tail = eversion_t(1, 1); + log.head = olog.head = eversion_t(2, 1); + missing.may_include_deletes = false; + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(0U, log.log.size()); + EXPECT_EQ(eversion_t(), info.stats.version); + EXPECT_EQ(1ull, info.stats.reported_seq); + EXPECT_EQ(10u, info.stats.reported_epoch); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(info.last_backfill.is_max()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(0U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_EQ(1ull, info.stats.reported_seq); + EXPECT_EQ(10u, info.stats.reported_epoch); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + } + + /* Before + +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + | | x5 | (1,1) < tail + | | | | + | | | | + tail > (1,4) | x7 | | + | | | | + | | | | + head > (1,5) | x9 | (1,5) < head + | | | | + | | | | + +--------+-------+---------+ + + After + +----------------- + | log | + +--------+-------+ + | |object | + |version | hash | + | | | + tail > (1,1) | x5 | + | | | + | | | + | (1,4) | x7 | + | | | + | | | + head > (1,5) | x9 | + | | | + | | | + +--------+-------+ + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + missing.may_include_deletes = false; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 4); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 5); + e.soid.set_hash(0x9); + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(1, 5); + e.soid.set_hash(0x9); + olog.log.push_back(e); + olog.head = e.version; + } + + hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, ""); + info.last_backfill = last_backfill; + eversion_t stat_version(10, 1); + info.stats.version = stat_version; + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(2U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(last_backfill, info.last_backfill); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(3U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x5 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) < lower_bound + | | | | + | | | | + head > (1,3) | x9 | | + | DELETE | | | + | | | | + | | x9 | (2,3) | + | | | MODIFY | + | | | | + | | x7 | (2,4) < head + | | | DELETE | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 but the olog entry (2,3) modifies + it and is authoritative : the log entry (1,3) is divergent. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t divergent_object; + missing.may_include_deletes = true; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 2); + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = eversion_t(1,3); + e.soid.set_hash(0x9); + divergent_object = e.soid; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(1, 2); + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = eversion_t(2, 3); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::MODIFY; + olog.log.push_back(e); + e.version = eversion_t(2, 4); + e.soid.set_hash(0x7); + e.op = pg_log_entry_t::DELETE; + olog.log.push_back(e); + olog.head = e.version; + } + + snapid_t purged_snap(1); + { + oinfo.last_update = olog.head; + oinfo.purged_snaps.insert(purged_snap); + } + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(3U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + /* When the divergent entry is a DELETE and the authoritative + entry is a MODIFY, the object will be added to missing : it is + a verifiable side effect proving the entry was identified + to be divergent. + */ + EXPECT_TRUE(missing.is_missing(divergent_object)); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(4U, log.log.size()); + /* DELETE entries from olog that are appended to the hed of the + log, and the divergent version of the object is removed (added + to remove_snap) + */ + EXPECT_EQ(0x9U, remove_snap.front().get_hash()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_TRUE(info.purged_snaps.contains(purged_snap)); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x5 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) < lower_bound + | | | | + | | | | + head > (1,3) | x9 | | + | DELETE | | | + | | | | + | | x9 | (2,3) | + | | | MODIFY | + | | | | + | | x7 | (2,4) < head + | | | DELETE | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 but the olog entry (2,3) modifies + it and is authoritative : the log entry (1,3) is divergent. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + hobject_t divergent_object; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 2); + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = eversion_t(1,3); + e.soid.set_hash(0x9); + divergent_object = e.soid; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(1, 2); + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = eversion_t(2, 3); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::MODIFY; + olog.log.push_back(e); + e.version = eversion_t(2, 4); + e.soid.set_hash(0x7); + e.op = pg_log_entry_t::DELETE; + olog.log.push_back(e); + olog.head = e.version; + } + + snapid_t purged_snap(1); + { + oinfo.last_update = olog.head; + oinfo.purged_snaps.insert(purged_snap); + } + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(3U, log.log.size()); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + missing.may_include_deletes = false; + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + /* When the divergent entry is a DELETE and the authoritative + entry is a MODIFY, the object will be added to missing : it is + a verifiable side effect proving the entry was identified + to be divergent. + */ + EXPECT_TRUE(missing.is_missing(divergent_object)); + EXPECT_EQ(1U, log.objects.count(divergent_object)); + EXPECT_EQ(4U, log.log.size()); + /* DELETE entries from olog that are appended to the hed of the + log, and the divergent version of the object is removed (added + to remove_snap). When peering handles deletes, it is the earlier + version that is in the removed list. + */ + EXPECT_EQ(0x7U, remove_snap.front().get_hash()); + EXPECT_EQ(log.head, info.last_update); + EXPECT_TRUE(info.purged_snaps.contains(purged_snap)); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x5 | (1,1) < tail + | | | | + | | | | + | (1,4) | x7 | (1,4) < head + | | | | + | | | | + head > (1,5) | x9 | | + | | | | + | | | | + +--------+-------+---------+ + + The head of the log entry (1,5) is divergent because it is greater than the + head of olog. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_shard_t fromosd; + pg_info_t info; + list<hobject_t> remove_snap; + bool dirty_info = false; + bool dirty_big_info = false; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 4); + e.soid.set_hash(0x7); + log.log.push_back(e); + e.version = eversion_t(1, 5); + e.soid.set_hash(0x9); + log.log.push_back(e); + log.head = e.version; + log.index(); + + info.last_update = log.head; + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x5); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(1, 4); + e.soid.set_hash(0x7); + olog.log.push_back(e); + olog.head = e.version; + } + + hobject_t last_backfill(object_t("oname"), string("key"), 1, 234, 1, ""); + info.last_backfill = last_backfill; + eversion_t stat_version(10, 1); + info.stats.version = stat_version; + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(3U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_TRUE(remove_snap.empty()); + EXPECT_EQ(last_backfill, info.last_backfill); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_FALSE(is_dirty()); + EXPECT_FALSE(dirty_info); + EXPECT_FALSE(dirty_big_info); + + TestHandler h(remove_snap); + missing.may_include_deletes = false; + merge_log(oinfo, std::move(olog), fromosd, info, &h, + dirty_info, dirty_big_info); + + EXPECT_FALSE(missing.have_missing()); + EXPECT_EQ(2U, log.log.size()); + EXPECT_EQ(stat_version, info.stats.version); + EXPECT_EQ(0x9U, remove_snap.front().get_hash()); + EXPECT_TRUE(info.purged_snaps.empty()); + EXPECT_TRUE(is_dirty()); + EXPECT_TRUE(dirty_info); + EXPECT_TRUE(dirty_big_info); + } + +} + +TEST_F(PGLogTest, proc_replica_log) { + // empty log : no side effect + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + eversion_t last_update(1, 1); + log.head = olog.head = oinfo.last_update = last_update; + eversion_t last_complete(1, 1); + oinfo.last_complete = last_complete; + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(last_update, oinfo.last_update); + EXPECT_EQ(last_complete, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(last_update, oinfo.last_update); + EXPECT_EQ(last_update, oinfo.last_complete); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + | | x3 | (1,1) < tail + | | | | + | | | | + tail > (1,2) | x5 | | + | | | | + | | | | + head > (1,3) | x9 | | + | DELETE | | | + | | | | + | | x9 | (2,3) < head + | | | DELETE | + | | | | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 and the olog entry + (2,3) also deletes it : do nothing. The olog tail is ignored + because it is before the log tail. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 2); + e.soid.set_hash(0x5); + log.tail = e.version; + log.log.push_back(e); + e.version = eversion_t(1, 3); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x3); + olog.tail = e.version; + olog.log.push_back(e); + e.version = eversion_t(2, 3); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::DELETE; + olog.log.push_back(e); + olog.head = e.version; + + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_FALSE(omissing.have_missing()); + } + + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + hobject_t divergent_object; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + { + e.soid = divergent_object; + e.soid.set_hash(0x1); + e.version = eversion_t(1, 1); + log.tail = e.version; + log.log.push_back(e); + + e.soid = divergent_object; + e.prior_version = eversion_t(1, 1); + e.version = eversion_t(1, 2); + log.tail = e.version; + log.log.push_back(e); + + e.soid.set_hash(0x3); + e.version = eversion_t(1, 4); + log.log.push_back(e); + + e.soid.set_hash(0x7); + e.version = eversion_t(1, 5); + log.log.push_back(e); + + e.soid.set_hash(0x8); + e.version = eversion_t(1, 6); + log.log.push_back(e); + + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::DELETE; + e.version = eversion_t(2, 7); + log.log.push_back(e); + + e.soid.set_hash(0xa); + e.version = eversion_t(2, 8); + log.head = e.version; + log.log.push_back(e); + } + log.index(); + + { + e.soid = divergent_object; + e.soid.set_hash(0x1); + e.version = eversion_t(1, 1); + olog.tail = e.version; + olog.log.push_back(e); + + e.soid = divergent_object; + e.prior_version = eversion_t(1, 1); + e.version = eversion_t(1, 2); + olog.log.push_back(e); + + e.prior_version = eversion_t(0, 0); + e.soid.set_hash(0x3); + e.version = eversion_t(1, 4); + olog.log.push_back(e); + + e.soid.set_hash(0x7); + e.version = eversion_t(1, 5); + olog.log.push_back(e); + + e.soid.set_hash(0x8); + e.version = eversion_t(1, 6); + olog.log.push_back(e); + + e.soid.set_hash(0x9); // should not be added to missing, create + e.op = pg_log_entry_t::MODIFY; + e.version = eversion_t(1, 7); + olog.log.push_back(e); + + e.soid = divergent_object; // should be added to missing at 1,2 + e.op = pg_log_entry_t::MODIFY; + e.version = eversion_t(1, 8); + e.prior_version = eversion_t(1, 2); + olog.log.push_back(e); + olog.head = e.version; + } + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(eversion_t(1, 2), omissing.get_items().at(divergent_object).need); + EXPECT_EQ(eversion_t(1, 6), oinfo.last_update); + EXPECT_EQ(eversion_t(1, 1), oinfo.last_complete); + } + + /* +--------------------------+ + | olog log | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x9 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) | + | | | | + | | | | + head > (1,3) | x9 | | + | DELETE | | | + | | | | + | | x9 | (2,3) < head + | | | DELETE | + | | | | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 and the olog entry + (2,3) also deletes it : do nothing. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + eversion_t last_update(1, 2); + hobject_t divergent_object; + divergent_object.set_hash(0x9); + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid = divergent_object; + log.tail = e.version; + log.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = eversion_t(2, 3); + e.prior_version = eversion_t(1, 1); + e.soid = divergent_object; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + e.version = eversion_t(1, 1); + e.soid = divergent_object; + olog.tail = e.version; + olog.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = eversion_t(1, 3); + e.prior_version = eversion_t(1, 1); + e.soid = divergent_object; + e.op = pg_log_entry_t::DELETE; + olog.log.push_back(e); + olog.head = e.version; + + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_FALSE(omissing.have_missing()); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(omissing.get_items().at(divergent_object).have, eversion_t(0, 0)); + EXPECT_EQ(omissing.get_items().at(divergent_object).need, eversion_t(1, 1)); + EXPECT_EQ(last_update, oinfo.last_update); + } + + /* +--------------------------+ + | olog log | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x9 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) | + | | | | + | | | | + head > (1,3) | x9 | | + | MODIFY | | | + | | | | + | | x9 | (2,3) < head + | | | DELETE | + | | | | + +--------+-------+---------+ + + The log entry (1,3) deletes the object x9 but the olog entry + (2,3) modifies it : remove it from omissing. + + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + eversion_t last_update(1, 2); + hobject_t divergent_object; + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid = divergent_object; + log.tail = e.version; + log.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = eversion_t(2, 3); + e.prior_version = eversion_t(1, 1); + e.soid = divergent_object; + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + e.version = eversion_t(1, 1); + e.soid = divergent_object; + olog.tail = e.version; + olog.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = eversion_t(1, 3); + e.prior_version = eversion_t(1, 1); + e.soid = divergent_object; + divergent_object = e.soid; + omissing.add(divergent_object, e.version, eversion_t(), false); + e.op = pg_log_entry_t::MODIFY; + olog.log.push_back(e); + olog.head = e.version; + + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(eversion_t(1, 3), omissing.get_items().at(divergent_object).need); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(omissing.get_items().at(divergent_object).have, eversion_t(0, 0)); + EXPECT_EQ(omissing.get_items().at(divergent_object).need, eversion_t(1, 1)); + EXPECT_EQ(last_update, oinfo.last_update); + } + + /* +--------------------------+ + | log olog | + +--------+-------+---------+ + | |object | | + |version | hash | version | + | | | | + tail > (1,1) | x9 | (1,1) < tail + | | | | + | | | | + | (1,2) | x3 | (1,2) | + | | | | + | | | | + | | x9 | (1,3) < head + | | | MODIFY | + | | | | + head > (2,3) | x9 | | + | DELETE | | | + | | | | + +--------+-------+---------+ + + The log entry (2,3) deletes the object x9 but the olog entry + (1,3) modifies it : proc_replica_log should adjust missing to + 1,1 for that object until add_next_event in PG::activate processes + the delete. + */ + { + clear(); + + pg_log_t olog; + pg_info_t oinfo; + pg_missing_t omissing; + pg_shard_t from; + + eversion_t last_update(1, 2); + hobject_t divergent_object; + eversion_t new_version(2, 3); + eversion_t divergent_version(1, 3); + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + + e.version = eversion_t(1, 1); + e.soid.set_hash(0x9); + log.tail = e.version; + log.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + log.log.push_back(e); + e.version = new_version; + e.prior_version = eversion_t(1, 1); + e.soid.set_hash(0x9); + e.op = pg_log_entry_t::DELETE; + log.log.push_back(e); + log.head = e.version; + log.index(); + + e.op = pg_log_entry_t::MODIFY; + e.version = eversion_t(1, 1); + e.soid.set_hash(0x9); + olog.tail = e.version; + olog.log.push_back(e); + e.version = last_update; + e.soid.set_hash(0x3); + olog.log.push_back(e); + e.version = divergent_version; + e.prior_version = eversion_t(1, 1); + e.soid.set_hash(0x9); + divergent_object = e.soid; + omissing.add(divergent_object, e.version, eversion_t(), false); + e.op = pg_log_entry_t::MODIFY; + olog.log.push_back(e); + olog.head = e.version; + + oinfo.last_update = olog.head; + oinfo.last_complete = olog.head; + } + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.is_missing(divergent_object)); + EXPECT_EQ(divergent_version, omissing.get_items().at(divergent_object).need); + EXPECT_EQ(olog.head, oinfo.last_update); + EXPECT_EQ(olog.head, oinfo.last_complete); + + missing.may_include_deletes = false; + proc_replica_log(oinfo, olog, omissing, from); + + EXPECT_TRUE(omissing.have_missing()); + EXPECT_TRUE(omissing.get_items().begin()->second.need == eversion_t(1, 1)); + EXPECT_EQ(last_update, oinfo.last_update); + EXPECT_EQ(eversion_t(0, 0), oinfo.last_complete); + } + +} + +TEST_F(PGLogTest, merge_log_1) { + TestCase t; + t.base.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + + t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false); + + t.toremove.insert(mk_obj(1)); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_2) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101))); + + t.torollback.insert( + t.torollback.begin(), t.div.rbegin(), t.div.rend()); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_3) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101))); + + t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false); + + t.toremove.insert(mk_obj(1)); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_4) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101))); + + t.init.add(mk_obj(1), mk_evt(10, 102), mk_evt(0, 0), false); + t.final.add(mk_obj(1), mk_evt(10, 100), mk_evt(0, 0), false); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_5) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + t.div.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 102), mk_evt(10, 101))); + + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(0, 0), false); + + t.toremove.insert(mk_obj(1)); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_6) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100), false); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_7) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false); + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), false); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_8) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_dt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false); + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), true); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_9) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_dt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false); + t.toremove.insert(mk_obj(1)); + t.deletes_during_peering = true; + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_10) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.auth.push_back(mk_ple_ldt(mk_obj(1), mk_evt(11, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80), false); + t.final.add(mk_obj(1), mk_evt(11, 101), mk_evt(8, 80), true); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_prior_version_have) { + TestCase t; + t.base.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 80))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100))); + + t.init.add(mk_obj(1), mk_evt(10, 101), mk_evt(10, 100), false); + + t.setup(); + run_test_case(t); +} + +TEST_F(PGLogTest, merge_log_split_missing_entries_at_head) { + TestCase t; + t.auth.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + t.auth.push_back(mk_ple_mod_rb(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100))); + + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(8, 70), mk_evt(8, 65))); + + t.setup(); + t.set_div_bounds(mk_evt(9, 79), mk_evt(8, 69)); + t.set_auth_bounds(mk_evt(15, 160), mk_evt(9, 77)); + t.final.add(mk_obj(1), mk_evt(15, 150), mk_evt(8, 70), false); + run_test_case(t); +} + +TEST_F(PGLogTest, olog_tail_gt_log_tail_split) { + TestCase t; + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100))); + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 155), mk_evt(15, 150))); + + t.setup(); + t.set_div_bounds(mk_evt(15, 153), mk_evt(15, 151)); + t.set_auth_bounds(mk_evt(15, 156), mk_evt(10, 99)); + t.final.add(mk_obj(1), mk_evt(15, 155), mk_evt(15, 150), false); + run_test_case(t); +} + +TEST_F(PGLogTest, olog_tail_gt_log_tail_split2) { + TestCase t; + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 150), mk_evt(10, 100))); + t.auth.push_back(mk_ple_mod(mk_obj(1), mk_evt(16, 155), mk_evt(15, 150))); + t.div.push_back(mk_ple_mod(mk_obj(1), mk_evt(15, 153), mk_evt(15, 150))); + + t.setup(); + t.set_div_bounds(mk_evt(15, 153), mk_evt(15, 151)); + t.set_auth_bounds(mk_evt(16, 156), mk_evt(10, 99)); + t.final.add(mk_obj(1), mk_evt(16, 155), mk_evt(0, 0), false); + t.toremove.insert(mk_obj(1)); + run_test_case(t); +} + +TEST_F(PGLogTest, filter_log_1) { + { + clear(); + + int osd_id = 1; + epoch_t epoch = 40; + int64_t pool_id = 1; + int bits = 2; + int max_osd = 4; + int pg_num = max_osd << bits; + int num_objects = 1000; + int num_internal = 10; + + // Set up splitting map + OSDMap *osdmap = new OSDMap; + uuid_d test_uuid; + test_uuid.generate_random(); + osdmap->build_simple_with_pool(g_ceph_context, epoch, test_uuid, max_osd, bits, bits); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + + const string hit_set_namespace("internal"); + + { + pg_log_entry_t e; + e.mark_unrollbackable(); + e.op = pg_log_entry_t::MODIFY; + e.soid.pool = pool_id; + + uuid_d uuid_name; + int i; + for (i = 1; i <= num_objects; ++i) { + e.version = eversion_t(epoch, i); + // Use this to generate random file names + uuid_name.generate_random(); + ostringstream name; + name << uuid_name; + e.soid.oid.name = name.str(); + // First has no namespace + if (i != 1) { + // num_internal have the internal namspace + if (i <= num_internal + 1) { + e.soid.nspace = hit_set_namespace; + } else { // rest have different namespaces + ostringstream ns; + ns << "ns" << i; + e.soid.nspace = ns.str(); + } + } + log.log.push_back(e); + if (i == 1) + log.tail = e.version; + } + log.head = e.version; + log.index(); + } + + spg_t pgid(pg_t(2, pool_id), shard_id_t::NO_SHARD); + + // See if we created the right number of entries + int total = log.log.size(); + ASSERT_EQ(total, num_objects); + + // Some should be removed + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } + EXPECT_LE(log.log.size(), (size_t)total); + + // If we filter a second time, there should be the same total + total = log.log.size(); + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } + EXPECT_EQ(log.log.size(), (size_t)total); + + // Increase pg_num as if there would be a split + int new_pg_num = pg_num * 16; + OSDMap::Incremental inc(epoch + 1); + inc.fsid = test_uuid; + const pg_pool_t *pool = osdmap->get_pg_pool(pool_id); + pg_pool_t newpool; + newpool = *pool; + newpool.set_pg_num(new_pg_num); + newpool.set_pgp_num(new_pg_num); + inc.new_pools[pool_id] = newpool; + int ret = osdmap->apply_incremental(inc); + ASSERT_EQ(ret, 0); + + // We should have fewer entries after a filter + { + pg_log_t filtered, reject; + pg_log_t::filter_log( + pgid, *osdmap, hit_set_namespace, log, filtered, reject); + log = IndexedLog(filtered); + } + EXPECT_LE(log.log.size(), (size_t)total); + + // Make sure all internal entries are retained + int count = 0; + for (list<pg_log_entry_t>::iterator i = log.log.begin(); + i != log.log.end(); ++i) { + if (i->soid.nspace == hit_set_namespace) count++; + } + EXPECT_EQ(count, num_internal); + } +} + +TEST_F(PGLogTest, get_request) { + clear(); + + // make sure writes, deletes, and errors are found + vector<pg_log_entry_t> entries; + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(6,2), eversion_t(3,4), + 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 1), + utime_t(0,1), -ENOENT)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::MODIFY, oid, eversion_t(6,3), eversion_t(3,4), + 2, osd_reqid_t(entity_name_t::CLIENT(777), 8, 2), + utime_t(1,2), 0)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::DELETE, oid, eversion_t(7,4), eversion_t(7,4), + 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 3), + utime_t(10,2), 0)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(7,5), eversion_t(7,4), + 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 4), + utime_t(20,1), -ENOENT)); + + for (auto &entry : entries) { + log.add(entry); + } + + for (auto &entry : entries) { + eversion_t replay_version; + version_t user_version; + int return_code = 0; + vector<pg_log_op_return_item_t> op_returns; + bool got = log.get_request( + entry.reqid, &replay_version, &user_version, &return_code, &op_returns); + EXPECT_TRUE(got); + EXPECT_EQ(entry.return_code, return_code); + EXPECT_EQ(entry.version, replay_version); + EXPECT_EQ(entry.user_version, user_version); + } +} + +TEST_F(PGLogTest, ErrorNotIndexedByObject) { + clear(); + + // make sure writes, deletes, and errors are found + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + log.add( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(6,2), eversion_t(3,4), + 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 1), + utime_t(0,1), -ENOENT)); + + EXPECT_FALSE(log.logged_object(oid)); + + pg_log_entry_t modify(pg_log_entry_t::MODIFY, oid, eversion_t(6,3), + eversion_t(3,4), 2, + osd_reqid_t(entity_name_t::CLIENT(777), 8, 2), + utime_t(1,2), 0); + log.add(modify); + + EXPECT_TRUE(log.logged_object(oid)); + pg_log_entry_t *entry = log.objects[oid]; + EXPECT_EQ(modify.op, entry->op); + EXPECT_EQ(modify.version, entry->version); + EXPECT_EQ(modify.prior_version, entry->prior_version); + EXPECT_EQ(modify.user_version, entry->user_version); + EXPECT_EQ(modify.reqid, entry->reqid); + + pg_log_entry_t del(pg_log_entry_t::DELETE, oid, eversion_t(7,4), + eversion_t(7,4), 3, + osd_reqid_t(entity_name_t::CLIENT(777), 8, 3), + utime_t(10,2), 0); + log.add(del); + + EXPECT_TRUE(log.logged_object(oid)); + entry = log.objects[oid]; + EXPECT_EQ(del.op, entry->op); + EXPECT_EQ(del.version, entry->version); + EXPECT_EQ(del.prior_version, entry->prior_version); + EXPECT_EQ(del.user_version, entry->user_version); + EXPECT_EQ(del.reqid, entry->reqid); + + log.add( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(7,5), eversion_t(7,4), + 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 4), + utime_t(20,1), -ENOENT)); + + EXPECT_TRUE(log.logged_object(oid)); + entry = log.objects[oid]; + EXPECT_EQ(del.op, entry->op); + EXPECT_EQ(del.version, entry->version); + EXPECT_EQ(del.prior_version, entry->prior_version); + EXPECT_EQ(del.user_version, entry->user_version); + EXPECT_EQ(del.reqid, entry->reqid); +} + +TEST_F(PGLogTest, split_into_preserves_may_include_deletes) { + clear(); + + { + may_include_deletes_in_missing_dirty = false; + missing.may_include_deletes = true; + PGLog child_log(cct); + pg_t child_pg; + split_into(child_pg, 6, &child_log); + ASSERT_TRUE(child_log.get_missing().may_include_deletes); + ASSERT_TRUE(child_log.get_may_include_deletes_in_missing_dirty()); + } + + { + may_include_deletes_in_missing_dirty = false; + missing.may_include_deletes = false; + PGLog child_log(cct); + pg_t child_pg; + split_into(child_pg, 6, &child_log); + ASSERT_FALSE(child_log.get_missing().may_include_deletes); + ASSERT_FALSE(child_log.get_may_include_deletes_in_missing_dirty()); + } +} + +class PGLogTestRebuildMissing : public PGLogTest, public StoreTestFixture { +public: + PGLogTestRebuildMissing() : PGLogTest(), StoreTestFixture("memstore") {} + void SetUp() override { + StoreTestFixture::SetUp(); + ObjectStore::Transaction t; + test_coll = coll_t(spg_t(pg_t(1, 1))); + ch = store->create_new_collection(test_coll); + t.create_collection(test_coll, 0); + store->queue_transaction(ch, std::move(t)); + existing_oid = mk_obj(0); + nonexistent_oid = mk_obj(1); + ghobject_t existing_ghobj(existing_oid); + object_info_t existing_info; + existing_info.version = eversion_t(6, 2); + bufferlist enc_oi; + encode(existing_info, enc_oi, 0); + ObjectStore::Transaction t2; + t2.touch(test_coll, ghobject_t(existing_oid)); + t2.setattr(test_coll, ghobject_t(existing_oid), OI_ATTR, enc_oi); + ASSERT_EQ(0, store->queue_transaction(ch, std::move(t2))); + info.last_backfill = hobject_t::get_max(); + info.last_complete = eversion_t(); + } + + void TearDown() override { + clear(); + missing.may_include_deletes = false; + StoreTestFixture::TearDown(); + } + + pg_info_t info; + coll_t test_coll; + hobject_t existing_oid, nonexistent_oid; + + void run_rebuild_missing_test(const map<hobject_t, pg_missing_item> &expected_missing_items) { + rebuild_missing_set_with_deletes(store.get(), ch, info); + ASSERT_EQ(expected_missing_items, missing.get_items()); + } +}; + +TEST_F(PGLogTestRebuildMissing, EmptyLog) { + missing.add(existing_oid, mk_evt(6, 2), mk_evt(6, 3), false); + missing.add(nonexistent_oid, mk_evt(7, 4), mk_evt(0, 0), false); + map<hobject_t, pg_missing_item> orig_missing = missing.get_items(); + run_rebuild_missing_test(orig_missing); +} + +TEST_F(PGLogTestRebuildMissing, SameVersionMod) { + missing.add(existing_oid, mk_evt(6, 2), mk_evt(6, 1), false); + log.add(mk_ple_mod(existing_oid, mk_evt(6, 2), mk_evt(6, 1))); + map<hobject_t, pg_missing_item> empty_missing; + run_rebuild_missing_test(empty_missing); +} + +TEST_F(PGLogTestRebuildMissing, DelExisting) { + missing.add(existing_oid, mk_evt(6, 3), mk_evt(6, 2), false); + log.add(mk_ple_dt(existing_oid, mk_evt(7, 5), mk_evt(7, 4))); + map<hobject_t, pg_missing_item> expected; + expected[existing_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(6, 2), true); + run_rebuild_missing_test(expected); +} + +TEST_F(PGLogTestRebuildMissing, DelNonexistent) { + log.add(mk_ple_dt(nonexistent_oid, mk_evt(7, 5), mk_evt(7, 4))); + map<hobject_t, pg_missing_item> expected; + expected[nonexistent_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(0, 0), true); + run_rebuild_missing_test(expected); +} + +TEST_F(PGLogTestRebuildMissing, MissingNotInLog) { + missing.add(mk_obj(10), mk_evt(8, 12), mk_evt(8, 10), false); + log.add(mk_ple_dt(nonexistent_oid, mk_evt(7, 5), mk_evt(7, 4))); + map<hobject_t, pg_missing_item> expected; + expected[nonexistent_oid] = pg_missing_item(mk_evt(7, 5), mk_evt(0, 0), true); + expected[mk_obj(10)] = pg_missing_item(mk_evt(8, 12), mk_evt(8, 10), false); + run_rebuild_missing_test(expected); +} + + +class PGLogMergeDupsTest : protected PGLog, public StoreTestFixture { + +public: + + PGLogMergeDupsTest() : PGLog(g_ceph_context), StoreTestFixture("memstore") { } + + void SetUp() override { + StoreTestFixture::SetUp(); + ObjectStore::Transaction t; + test_coll = coll_t(spg_t(pg_t(1, 1))); + auto ch = store->create_new_collection(test_coll); + t.create_collection(test_coll, 0); + store->queue_transaction(ch, std::move(t)); + } + + void TearDown() override { + test_disk_roundtrip(); + clear(); + StoreTestFixture::TearDown(); + } + + static pg_log_dup_t create_dup_entry(uint a, uint b) { + // make each dup_entry unique by using different client id's + static uint client_id = 777; + return pg_log_dup_t(eversion_t(a, b), + a, + osd_reqid_t(entity_name_t::CLIENT(client_id++), 8, 1), + 0); + } + + static std::vector<pg_log_dup_t> example_dups_1() { + std::vector<pg_log_dup_t> result = { + create_dup_entry(10, 11), + create_dup_entry(10, 12), + create_dup_entry(11, 1), + create_dup_entry(12, 3), + create_dup_entry(13, 99) + }; + return result; + } + + static std::vector<pg_log_dup_t> example_dups_2() { + std::vector<pg_log_dup_t> result = { + create_dup_entry(12, 3), + create_dup_entry(13, 99), + create_dup_entry(15, 11), + create_dup_entry(16, 14), + create_dup_entry(16, 32) + }; + return result; + } + + void add_dups(uint a, uint b) { + log.dups.push_back(create_dup_entry(a, b)); + write_from_dups = std::min(write_from_dups, log.dups.back().version); + } + + void add_dups(const std::vector<pg_log_dup_t>& l) { + for (auto& i : l) { + log.dups.push_back(i); + write_from_dups = std::min(write_from_dups, log.dups.back().version); + } + } + + static void add_dups(IndexedLog& log, const std::vector<pg_log_dup_t>& dups) { + for (auto& i : dups) { + log.dups.push_back(i); + } + } + + void check_order() { + eversion_t prev(0, 0); + + for (auto& i : log.dups) { + EXPECT_LT(prev, i.version) << "verify versions monotonically increase"; + prev = i.version; + } + } + + void check_index() { + EXPECT_EQ(log.dups.size(), log.dup_index.size()); + for (auto& i : log.dups) { + EXPECT_EQ(1u, log.dup_index.count(i.reqid)); + } + } + + void test_disk_roundtrip() { + ObjectStore::Transaction t; + hobject_t hoid; + hoid.pool = 1; + hoid.oid = "log"; + ghobject_t log_oid(hoid); + map<string, bufferlist> km; + write_log_and_missing(t, &km, test_coll, log_oid, false); + if (!km.empty()) { + t.omap_setkeys(test_coll, log_oid, km); + } + auto ch = store->open_collection(test_coll); + ASSERT_EQ(0, store->queue_transaction(ch, std::move(t))); + + auto orig_dups = log.dups; + clear(); + ostringstream err; + read_log_and_missing(store.get(), ch, log_oid, + pg_info_t(), err, false); + ASSERT_EQ(orig_dups.size(), log.dups.size()); + ASSERT_EQ(orig_dups, log.dups); + auto dups_it = log.dups.begin(); + for (auto orig_dup : orig_dups) { + ASSERT_EQ(orig_dup, *dups_it); + ++dups_it; + } + } + + coll_t test_coll; +}; + +TEST_F(PGLogMergeDupsTest, OtherEmpty) { + log.tail = eversion_t(14, 5); + + IndexedLog olog; + + add_dups(example_dups_1()); + index(); + + bool changed = merge_log_dups(olog); + + EXPECT_FALSE(changed); + EXPECT_EQ(5u, log.dups.size()); + + if (5 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + EXPECT_EQ(13u, log.dups.back().version.epoch); + EXPECT_EQ(99u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + +TEST_F(PGLogMergeDupsTest, AmEmpty) { + log.tail = eversion_t(14, 5); + index(); + + IndexedLog olog; + + add_dups(olog, example_dups_1()); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(5u, log.dups.size()); + + if (5 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(13u, log.dups.back().version.epoch); + EXPECT_EQ(99u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + +TEST_F(PGLogMergeDupsTest, AmEmptyOverlap) { + log.tail = eversion_t(12, 3); + index(); + + IndexedLog olog; + + add_dups(olog, example_dups_1()); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(4u, log.dups.size()); + + if (4 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(12u, log.dups.back().version.epoch); + EXPECT_EQ(3u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + +TEST_F(PGLogMergeDupsTest, Same) { + log.tail = eversion_t(14, 1); + + IndexedLog olog; + + add_dups(example_dups_1()); + index(); + add_dups(olog, example_dups_1()); + + bool changed = merge_log_dups(olog); + + EXPECT_FALSE(changed); + EXPECT_EQ(5u, log.dups.size()); + + if (5 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(13u, log.dups.back().version.epoch); + EXPECT_EQ(99u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + + +TEST_F(PGLogMergeDupsTest, Later) { + log.tail = eversion_t(16, 14); + + IndexedLog olog; + + add_dups(example_dups_1()); + index(); + add_dups(olog, example_dups_2()); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(7u, log.dups.size()); + + if (7 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(16u, log.dups.back().version.epoch); + EXPECT_EQ(14u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + + +TEST_F(PGLogMergeDupsTest, Earlier) { + log.tail = eversion_t(17, 2); + + IndexedLog olog; + + add_dups(example_dups_2()); + index(); + add_dups(olog, example_dups_1()); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(8u, log.dups.size()); + + if (6 == log.dups.size()) { + EXPECT_EQ(10u, log.dups.front().version.epoch); + EXPECT_EQ(11u, log.dups.front().version.version); + + EXPECT_EQ(16u, log.dups.back().version.epoch); + EXPECT_EQ(32u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + + +TEST_F(PGLogMergeDupsTest, Superset) { + log.tail = eversion_t(17, 2); + + IndexedLog olog; + + add_dups(example_dups_1()); + index(); + + olog.dups.push_back(create_dup_entry(9, 5)); + olog.dups.push_back(create_dup_entry(15, 11)); + + bool changed = merge_log_dups(olog); + + EXPECT_TRUE(changed); + EXPECT_EQ(7u, log.dups.size()); + + if (7 == log.dups.size()) { + EXPECT_EQ(9u, log.dups.front().version.epoch); + EXPECT_EQ(5u, log.dups.front().version.version); + + EXPECT_EQ(15u, log.dups.back().version.epoch); + EXPECT_EQ(11u, log.dups.back().version.version); + } + + check_order(); + check_index(); +} + + +struct PGLogTrimTest : + public ::testing::Test, + public PGLogTestBase, + public PGLog::IndexedLog +{ + CephContext *cct = g_ceph_context; + + using ::testing::Test::SetUp; + void SetUp(unsigned dup_track) { + constexpr size_t size = 10; + + char dup_track_s[size]; + + snprintf(dup_track_s, size, "%u", dup_track); + + cct->_conf.set_val_or_die("osd_pg_log_dups_tracked", dup_track_s); + } +}; // struct PGLogTrimTest + + +TEST_F(PGLogTrimTest, TestMakingCephContext) +{ + SetUp(5); + + EXPECT_EQ(5u, cct->_conf->osd_pg_log_dups_tracked); +} + + +TEST_F(PGLogTrimTest, TestPartialTrim) +{ + SetUp(20); + PGLog::IndexedLog log; + log.head = mk_evt(24, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + std::set<eversion_t> trimmed; + std::set<std::string> trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(19, 157), &trimmed, &trimmed_dups, &write_from_dups); + + EXPECT_EQ(eversion_t(15, 150), write_from_dups); + EXPECT_EQ(3u, log.log.size()); + EXPECT_EQ(3u, trimmed.size()); + EXPECT_EQ(2u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups.size()); + + SetUp(15); + + std::set<eversion_t> trimmed2; + std::set<std::string> trimmed_dups2; + eversion_t write_from_dups2 = eversion_t::max(); + + log.trim(cct, mk_evt(20, 164), &trimmed2, &trimmed_dups2, &write_from_dups2); + + EXPECT_EQ(eversion_t(19, 160), write_from_dups2); + EXPECT_EQ(2u, log.log.size()); + EXPECT_EQ(1u, trimmed2.size()); + EXPECT_EQ(3u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups2.size()); +} + + +TEST_F(PGLogTrimTest, TestTrimNoTrimmed) { + SetUp(20); + PGLog::IndexedLog log; + log.head = mk_evt(20, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(19, 157), nullptr, nullptr, &write_from_dups); + + EXPECT_EQ(eversion_t(15, 150), write_from_dups); + EXPECT_EQ(3u, log.log.size()); + EXPECT_EQ(2u, log.dups.size()); +} + + +TEST_F(PGLogTrimTest, TestTrimNoDups) +{ + SetUp(10); + PGLog::IndexedLog log; + log.head = mk_evt(20, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + std::set<eversion_t> trimmed; + std::set<std::string> trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(19, 157), &trimmed, &trimmed_dups, &write_from_dups); + + EXPECT_EQ(eversion_t::max(), write_from_dups); + EXPECT_EQ(3u, log.log.size()); + EXPECT_EQ(3u, trimmed.size()); + EXPECT_EQ(0u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups.size()); +} + +TEST_F(PGLogTrimTest, TestNoTrim) +{ + SetUp(20); + PGLog::IndexedLog log; + log.head = mk_evt(24, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + std::set<eversion_t> trimmed; + std::set<std::string> trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(9, 99), &trimmed, &trimmed_dups, &write_from_dups); + + EXPECT_EQ(eversion_t::max(), write_from_dups); + EXPECT_EQ(6u, log.log.size()); + EXPECT_EQ(0u, trimmed.size()); + EXPECT_EQ(0u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups.size()); +} + +TEST_F(PGLogTrimTest, TestTrimAll) +{ + SetUp(20); + PGLog::IndexedLog log; + EXPECT_EQ(0u, log.dup_index.size()); // Sanity check + log.head = mk_evt(24, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(19, 160), mk_evt(25, 152))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166))); + + std::set<eversion_t> trimmed; + std::set<std::string> trimmed_dups; + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(22, 180), &trimmed, &trimmed_dups, &write_from_dups); + + EXPECT_EQ(eversion_t(15, 150), write_from_dups); + EXPECT_EQ(0u, log.log.size()); + EXPECT_EQ(6u, trimmed.size()); + EXPECT_EQ(5u, log.dups.size()); + EXPECT_EQ(0u, trimmed_dups.size()); + EXPECT_EQ(0u, log.dup_index.size()); // dup_index entry should be trimmed +} + + +TEST_F(PGLogTrimTest, TestGetRequest) { + SetUp(20); + PGLog::IndexedLog log; + log.head = mk_evt(20, 0); + log.skip_can_rollback_to_to_head(); + log.head = mk_evt(9, 0); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(8, 70), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 150), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 155), mk_evt(15, 150), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 160), mk_evt(25, 152), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 165), mk_evt(26, 160), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 167), mk_evt(31, 166), + osd_reqid_t(client, 8, 6))); + + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(19, 157), nullptr, nullptr, &write_from_dups); + + EXPECT_EQ(eversion_t(15, 150), write_from_dups); + EXPECT_EQ(3u, log.log.size()); + EXPECT_EQ(2u, log.dups.size()); + + eversion_t version; + version_t user_version; + int return_code; + vector<pg_log_op_return_item_t> op_returns; + + osd_reqid_t log_reqid = osd_reqid_t(client, 8, 5); + osd_reqid_t dup_reqid = osd_reqid_t(client, 8, 3); + osd_reqid_t bad_reqid = osd_reqid_t(client, 8, 1); + + bool result; + + result = log.get_request(log_reqid, &version, &user_version, &return_code, + &op_returns); + EXPECT_EQ(true, result); + EXPECT_EQ(mk_evt(21, 165), version); + + result = log.get_request(dup_reqid, &version, &user_version, &return_code, + &op_returns); + EXPECT_EQ(true, result); + EXPECT_EQ(mk_evt(15, 155), version); + + result = log.get_request(bad_reqid, &version, &user_version, &return_code, + &op_returns); + EXPECT_FALSE(result); +} + +TEST_F(PGLogTest, _merge_object_divergent_entries) { + { + // Test for issue 20843 + clear(); + hobject_t hoid(object_t(/*name*/"notify.7"), + /*key*/string(""), + /*snap*/7, + /*hash*/77, + /*pool*/5, + /*nspace*/string("")); + mempool::osd_pglog::list<pg_log_entry_t> orig_entries; + orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 957), eversion_t(8336, 952))); + orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 958))); + orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 959))); + orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 960), eversion_t(8336, 957))); + log.add(mk_ple_mod(hoid, eversion_t(8973, 1075), eversion_t(8971, 1070))); + missing.add(hoid, + /*need*/eversion_t(8971, 1070), + /*have*/eversion_t(8336, 952), + false); + pg_info_t oinfo; + LogHandler rollbacker; + _merge_object_divergent_entries(log, hoid, + orig_entries, oinfo, + log.get_can_rollback_to(), + missing, &rollbacker, + this); + // No core dump + } + { + // skip leading error entries + clear(); + hobject_t hoid(object_t(/*name*/"notify.7"), + /*key*/string(""), + /*snap*/7, + /*hash*/77, + /*pool*/5, + /*nspace*/string("")); + mempool::osd_pglog::list<pg_log_entry_t> orig_entries; + orig_entries.push_back(mk_ple_err(hoid, eversion_t(8336, 956))); + orig_entries.push_back(mk_ple_mod(hoid, eversion_t(8336, 957), eversion_t(8336, 952))); + log.add(mk_ple_mod(hoid, eversion_t(8973, 1075), eversion_t(8971, 1070))); + missing.add(hoid, + /*need*/eversion_t(8971, 1070), + /*have*/eversion_t(8336, 952), + false); + pg_info_t oinfo; + LogHandler rollbacker; + _merge_object_divergent_entries(log, hoid, + orig_entries, oinfo, + log.get_can_rollback_to(), + missing, &rollbacker, + this); + // No core dump + } +} + +TEST(eversion_t, get_key_name) { + eversion_t a(1234, 5678); + std::string a_key_name = a.get_key_name(); + EXPECT_EQ("0000001234.00000000000000005678", a_key_name); +} + +TEST(pg_log_dup_t, get_key_name) { + pg_log_dup_t a(eversion_t(1234, 5678), + 13, + osd_reqid_t(entity_name_t::CLIENT(777), 8, 999), + 15); + std::string a_key_name = a.get_key_name(); + EXPECT_EQ("dup_0000001234.00000000000000005678", a_key_name); +} + + +// This tests trim() to make copies of +// 2 log entries (107, 106) and 3 additional for a total +// of 5 dups. Nothing from the original dups is copied. +TEST_F(PGLogTrimTest, TestTrimDups) { + SetUp(5); + PGLog::IndexedLog log; + log.head = mk_evt(21, 107); + log.skip_can_rollback_to_to_head(); + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(21, 105), nullptr, nullptr, &write_from_dups); + + EXPECT_EQ(eversion_t(20, 103), write_from_dups) << log; + EXPECT_EQ(2u, log.log.size()) << log; + EXPECT_EQ(4u, log.dups.size()) << log; +} + +// This tests trim() to make copies of +// 4 log entries (107, 106, 105, 104) and 5 additional for a total +// of 9 dups. Only 1 of 2 existing dups are copied. +TEST_F(PGLogTrimTest, TestTrimDups2) { + SetUp(9); + PGLog::IndexedLog log; + log.head = mk_evt(21, 107); + log.skip_can_rollback_to_to_head(); + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + eversion_t write_from_dups = eversion_t::max(); + + log.trim(cct, mk_evt(20, 103), nullptr, nullptr, &write_from_dups); + + EXPECT_EQ(eversion_t(10, 100), write_from_dups) << log; + EXPECT_EQ(4u, log.log.size()) << log; + EXPECT_EQ(6u, log.dups.size()) << log; +} + +// This tests copy_up_to() to make copies of +// 2 log entries (107, 106) and 3 additional for a total +// of 5 dups. Nothing from the original dups is copied. +TEST_F(PGLogTrimTest, TestCopyUpTo) { + SetUp(5); + PGLog::IndexedLog log, copy; + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + copy.copy_up_to(cct, log, 2); + + EXPECT_EQ(2u, copy.log.size()) << copy; + EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy; + EXPECT_EQ(copy.tail, mk_evt(21, 105)) << copy; + // Tracking 5 means 3 additional as dups + EXPECT_EQ(3u, copy.dups.size()) << copy; +} + +// This tests copy_up_to() to make copies of +// 4 log entries (107, 106, 105, 104) and 5 additional for a total +// of 5 dups. Only 1 of 2 existing dups are copied. +TEST_F(PGLogTrimTest, TestCopyUpTo2) { + SetUp(9); + PGLog::IndexedLog log, copy; + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + copy.copy_up_to(cct, log, 4); + + EXPECT_EQ(4u, copy.log.size()) << copy; + EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy; + EXPECT_EQ(copy.tail, mk_evt(20, 103)) << copy; + // Tracking 5 means 3 additional as dups + EXPECT_EQ(5u, copy.dups.size()) << copy; +} + +// This tests copy_after() by specifying a version that copies +// 2 log entries (107, 106) and 3 additional for a total +// of 5 dups. Nothing of the original dups is copied. +TEST_F(PGLogTrimTest, TestCopyAfter) { + SetUp(5); + PGLog::IndexedLog log, copy; + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + copy.copy_after(cct, log, mk_evt(21, 105)); + + EXPECT_EQ(2u, copy.log.size()) << copy; + EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy; + EXPECT_EQ(copy.tail, mk_evt(21, 105)) << copy; + // Tracking 5 means 3 additional as dups + EXPECT_EQ(3u, copy.dups.size()) << copy; +} + +// This copies everything dups and log because of the large max dups +// and value passed to copy_after(). +TEST_F(PGLogTrimTest, TestCopyAfter2) { + SetUp(3000); + PGLog::IndexedLog log, copy; + log.tail = mk_evt(9, 99); + log.head = mk_evt(9, 99); + + entity_name_t client = entity_name_t::CLIENT(777); + + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 93), mk_evt(8, 92), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 94), mk_evt(8, 93), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 95), mk_evt(8, 94), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 96), mk_evt(8, 95), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 97), mk_evt(8, 96), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(8, 98), mk_evt(8, 97), osd_reqid_t(client, 8, 1)))); + log.dups.push_back(pg_log_dup_t(mk_ple_mod(mk_obj(1), + mk_evt(9, 99), mk_evt(8, 98), osd_reqid_t(client, 8, 1)))); + + log.add(mk_ple_mod(mk_obj(1), mk_evt(10, 100), mk_evt(9, 99), + osd_reqid_t(client, 8, 1))); + log.add(mk_ple_dt(mk_obj(2), mk_evt(15, 101), mk_evt(10, 100), + osd_reqid_t(client, 8, 2))); + log.add(mk_ple_mod_rb(mk_obj(3), mk_evt(15, 102), mk_evt(15, 101), + osd_reqid_t(client, 8, 3))); + log.add(mk_ple_mod(mk_obj(1), mk_evt(20, 103), mk_evt(15, 102), + osd_reqid_t(client, 8, 4))); + log.add(mk_ple_mod(mk_obj(4), mk_evt(21, 104), mk_evt(20, 103), + osd_reqid_t(client, 8, 5))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 105), mk_evt(21, 104), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 106), mk_evt(21, 105), + osd_reqid_t(client, 8, 6))); + log.add(mk_ple_dt_rb(mk_obj(5), mk_evt(21, 107), mk_evt(21, 106), + osd_reqid_t(client, 8, 6))); + + copy.copy_after(cct, log, mk_evt(9, 99)); + + EXPECT_EQ(8u, copy.log.size()) << copy; + EXPECT_EQ(copy.head, mk_evt(21, 107)) << copy; + EXPECT_EQ(copy.tail, mk_evt(9, 99)) << copy; + // Tracking 3000 is larger than all entries, so all dups copied + EXPECT_EQ(7u, copy.dups.size()) << copy; +} + +// Local Variables: +// compile-command: "cd ../.. ; make unittest_pglog ; ./unittest_pglog --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* " +// End: diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc new file mode 100644 index 000000000..3be5699ca --- /dev/null +++ b/src/test/osd/TestRados.cc @@ -0,0 +1,689 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "common/Cond.h" +#include "common/errno.h" +#include "common/version.h" + +#include <iostream> +#include <sstream> +#include <map> +#include <numeric> +#include <string> +#include <vector> +#include <stdlib.h> +#include <unistd.h> + +#include "test/osd/RadosModel.h" + + +using namespace std; + +class WeightedTestGenerator : public TestOpGenerator +{ +public: + + WeightedTestGenerator(int ops, + int objects, + map<TestOpType, unsigned int> op_weights, + TestOpStat *stats, + int max_seconds, + bool ec_pool, + bool balance_reads, + bool localize_reads, + bool set_redirect, + bool set_chunk, + bool enable_dedup) : + m_nextop(NULL), m_op(0), m_ops(ops), m_seconds(max_seconds), + m_objects(objects), m_stats(stats), + m_total_weight(0), + m_ec_pool(ec_pool), + m_balance_reads(balance_reads), + m_localize_reads(localize_reads), + m_set_redirect(set_redirect), + m_set_chunk(set_chunk), + m_enable_dedup(enable_dedup) + { + m_start = time(0); + for (map<TestOpType, unsigned int>::const_iterator it = op_weights.begin(); + it != op_weights.end(); + ++it) { + m_total_weight += it->second; + m_weight_sums.insert(pair<TestOpType, unsigned int>(it->first, + m_total_weight)); + } + if (m_set_redirect || m_set_chunk) { + if (m_set_redirect) { + m_ops = ops+m_objects+m_objects; + } else { + /* create 10 chunks per an object*/ + m_ops = ops+m_objects+m_objects*10; + } + } + } + + TestOp *next(RadosTestContext &context) override + { + TestOp *retval = NULL; + + ++m_op; + if (m_op <= m_objects && !m_set_redirect && !m_set_chunk ) { + stringstream oid; + oid << m_op; + /*if (m_op % 2) { + // make it a long name + oid << " " << string(300, 'o'); + }*/ + cout << m_op << ": write initial oid " << oid.str() << std::endl; + context.oid_not_flushing.insert(oid.str()); + if (m_ec_pool) { + return new WriteOp(m_op, &context, oid.str(), true, true); + } else { + return new WriteOp(m_op, &context, oid.str(), false, true); + } + } else if (m_op >= m_ops) { + return NULL; + } + + if (m_set_redirect || m_set_chunk) { + if (init_extensible_tier(context, retval)) { + return retval; + } + } + + if (m_nextop) { + retval = m_nextop; + m_nextop = NULL; + return retval; + } + + while (retval == NULL) { + unsigned int rand_val = rand() % m_total_weight; + + time_t now = time(0); + if (m_seconds && now - m_start > m_seconds) + break; + + for (map<TestOpType, unsigned int>::const_iterator it = m_weight_sums.begin(); + it != m_weight_sums.end(); + ++it) { + if (rand_val < it->second) { + retval = gen_op(context, it->first); + break; + } + } + } + return retval; + } + + bool init_extensible_tier(RadosTestContext &context, TestOp *& op) { + /* + * set-redirect or set-chunk test (manifest test) + * 0. make default objects (using create op) + * 1. set-redirect or set-chunk + * 2. initialize target objects (using write op) + * 3. wait for set-* completion + */ + int copy_manifest_end = 0; + if (m_set_chunk) { + copy_manifest_end = m_objects*2; + } else { + copy_manifest_end = m_objects*3; + } + int make_manifest_end = copy_manifest_end; + if (m_set_chunk) { + /* make 10 chunks per an object*/ + make_manifest_end = make_manifest_end + m_objects * 10; + } else { + /* redirect */ + make_manifest_end = make_manifest_end + m_objects; + } + + if (m_op <= m_objects) { + stringstream oid; + oid << m_op; + /*if (m_op % 2) { + oid << " " << string(300, 'o'); + }*/ + cout << m_op << ": write initial oid " << oid.str() << std::endl; + context.oid_not_flushing.insert(oid.str()); + if (m_ec_pool) { + op = new WriteOp(m_op, &context, oid.str(), true, true); + } else { + op = new WriteOp(m_op, &context, oid.str(), false, true); + } + return true; + } else if (m_op <= copy_manifest_end) { + stringstream oid, oid2; + //int _oid = m_op-m_objects; + int _oid = m_op % m_objects + 1; + oid << _oid; + /*if ((_oid) % 2) { + oid << " " << string(300, 'o'); + }*/ + + if (context.oid_in_use.count(oid.str())) { + /* previous write is not finished */ + op = NULL; + m_op--; + cout << m_op << " wait for completion of write op! " << std::endl; + return true; + } + + int _oid2 = m_op - m_objects + 1; + if (_oid2 > copy_manifest_end - m_objects) { + _oid2 -= (copy_manifest_end - m_objects); + } + oid2 << _oid2 << " " << context.low_tier_pool_name; + if ((_oid2) % 2) { + oid2 << " " << string(300, 'm'); + } + cout << m_op << ": " << "copy oid " << oid.str() << " target oid " + << oid2.str() << std::endl; + op = new CopyOp(m_op, &context, oid.str(), oid2.str(), context.low_tier_pool_name); + return true; + } else if (m_op <= make_manifest_end) { + if (m_set_redirect) { + stringstream oid, oid2; + int _oid = m_op-copy_manifest_end; + oid << _oid; + /*if ((_oid) % 2) { + oid << " " << string(300, 'o'); + }*/ + oid2 << _oid << " " << context.low_tier_pool_name; + if ((_oid) % 2) { + oid2 << " " << string(300, 'm'); + } + if (context.oid_in_use.count(oid.str())) { + /* previous copy is not finished */ + op = NULL; + m_op--; + cout << m_op << " retry set_redirect !" << std::endl; + return true; + } + cout << m_op << ": " << "set_redirect oid " << oid.str() << " target oid " + << oid2.str() << std::endl; + op = new SetRedirectOp(m_op, &context, oid.str(), oid2.str(), context.pool_name); + return true; + } else if (m_set_chunk) { + stringstream oid; + int _oid = m_op % m_objects +1; + oid << _oid; + /*if ((_oid) % 2) { + oid << " " << string(300, 'o'); + }*/ + if (context.oid_in_use.count(oid.str())) { + /* previous set-chunk is not finished */ + op = NULL; + m_op--; + cout << m_op << " retry set_chunk !" << std::endl; + return true; + } + stringstream oid2; + oid2 << _oid << " " << context.low_tier_pool_name; + if ((_oid) % 2) { + oid2 << " " << string(300, 'm'); + } + + /* make a chunk (random offset, random length --> + * target object's random offset) + */ + ObjectDesc contents, contents2; + context.find_object(oid.str(), &contents); + uint32_t max_len = contents.most_recent_gen()->get_length(contents.most_recent()); + uint32_t rand_offset = rand() % max_len; + uint32_t rand_length = rand() % max_len; + rand_offset = rand_offset - (rand_offset % 512); + rand_length = rand_length - (rand_length % 512); + + while (rand_offset + rand_length > max_len || rand_length == 0) { + rand_offset = rand() % max_len; + rand_length = rand() % max_len; + rand_offset = rand_offset - (rand_offset % 512); + rand_length = rand_length - (rand_length % 512); + } + uint32_t rand_tgt_offset = rand_offset; + cout << m_op << ": " << "set_chunk oid " << oid.str() << " offset: " << rand_offset + << " length: " << rand_length << " target oid " << oid2.str() + << " tgt_offset: " << rand_tgt_offset << std::endl; + op = new SetChunkOp(m_op, &context, oid.str(), rand_offset, rand_length, oid2.str(), + context.low_tier_pool_name, rand_tgt_offset, m_stats); + return true; + } + } else if (m_op == make_manifest_end + 1) { + int set_size = context.oid_not_in_use.size(); + int set_manifest_size = context.oid_redirect_not_in_use.size(); + cout << m_op << " oid_not_in_use " << set_size << " oid_redirect_not_in_use " << set_manifest_size << std::endl; + /* wait for redirect or set_chunk initialization */ + if (set_size != m_objects || set_manifest_size != 0) { + op = NULL; + m_op--; + cout << m_op << " wait for manifest initialization " << std::endl; + return true; + } + for (int t_op = m_objects+1; t_op <= m_objects*2; t_op++) { + stringstream oid; + oid << t_op << " " << context.low_tier_pool_name; + if (t_op % 2) { + oid << " " << string(300, 'm'); + } + cout << " redirect_not_in_use: " << oid.str() << std::endl; + context.oid_redirect_not_in_use.insert(oid.str()); + } + } + + return false; + } + +private: + + TestOp *gen_op(RadosTestContext &context, TestOpType type) + { + string oid, oid2; + ceph_assert(context.oid_not_in_use.size()); + + switch (type) { + case TEST_OP_READ: + oid = *(rand_choose(context.oid_not_in_use)); + return new ReadOp(m_op, &context, oid, m_balance_reads, m_localize_reads, + m_stats); + + case TEST_OP_WRITE: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "write oid " << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteOp(m_op, &context, oid, false, false, m_stats); + + case TEST_OP_WRITE_EXCL: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "write (excl) oid " + << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteOp(m_op, &context, oid, false, true, m_stats); + + case TEST_OP_WRITESAME: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "writesame oid " + << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteSameOp(m_op, &context, oid, m_stats); + + case TEST_OP_DELETE: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "delete oid " << oid << " current snap is " + << context.current_snap << std::endl; + return new DeleteOp(m_op, &context, oid, m_stats); + + case TEST_OP_SNAP_CREATE: + cout << m_op << ": " << "snap_create" << std::endl; + return new SnapCreateOp(m_op, &context, m_stats); + + case TEST_OP_SNAP_REMOVE: + if (context.snaps.size() <= context.snaps_in_use.size()) { + return NULL; + } + while (true) { + int snap = rand_choose(context.snaps)->first; + if (context.snaps_in_use.lookup(snap)) + continue; // in use; try again! + cout << m_op << ": " << "snap_remove snap " << snap << std::endl; + return new SnapRemoveOp(m_op, &context, snap, m_stats); + } + + case TEST_OP_ROLLBACK: + { + string oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "rollback oid " << oid << " current snap is " + << context.current_snap << std::endl; + return new RollbackOp(m_op, &context, oid); + } + + case TEST_OP_SETATTR: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "setattr oid " << oid + << " current snap is " << context.current_snap << std::endl; + return new SetAttrsOp(m_op, &context, oid, m_stats); + + case TEST_OP_RMATTR: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "rmattr oid " << oid + << " current snap is " << context.current_snap << std::endl; + return new RemoveAttrsOp(m_op, &context, oid, m_stats); + + case TEST_OP_WATCH: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "watch oid " << oid + << " current snap is " << context.current_snap << std::endl; + return new WatchOp(m_op, &context, oid, m_stats); + + case TEST_OP_COPY_FROM: + oid = *(rand_choose(context.oid_not_in_use)); + do { + oid2 = *(rand_choose(context.oid_not_in_use)); + } while (oid == oid2); + cout << m_op << ": " << "copy_from oid " << oid << " from oid " << oid2 + << " current snap is " << context.current_snap << std::endl; + return new CopyFromOp(m_op, &context, oid, oid2, m_stats); + + case TEST_OP_HIT_SET_LIST: + { + uint32_t hash = rjhash32(rand()); + cout << m_op << ": " << "hit_set_list " << hash << std::endl; + return new HitSetListOp(m_op, &context, hash, m_stats); + } + + case TEST_OP_UNDIRTY: + { + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "undirty oid " << oid << std::endl; + return new UndirtyOp(m_op, &context, oid, m_stats); + } + + case TEST_OP_IS_DIRTY: + { + oid = *(rand_choose(context.oid_not_flushing)); + return new IsDirtyOp(m_op, &context, oid, m_stats); + } + + case TEST_OP_CACHE_FLUSH: + { + oid = *(rand_choose(context.oid_not_in_use)); + return new CacheFlushOp(m_op, &context, oid, m_stats, true); + } + + case TEST_OP_CACHE_TRY_FLUSH: + { + oid = *(rand_choose(context.oid_not_in_use)); + return new CacheFlushOp(m_op, &context, oid, m_stats, false); + } + + case TEST_OP_CACHE_EVICT: + { + oid = *(rand_choose(context.oid_not_in_use)); + return new CacheEvictOp(m_op, &context, oid, m_stats); + } + + case TEST_OP_APPEND: + oid = *(rand_choose(context.oid_not_in_use)); + cout << "append oid " << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteOp(m_op, &context, oid, true, false, m_stats); + + case TEST_OP_APPEND_EXCL: + oid = *(rand_choose(context.oid_not_in_use)); + cout << "append oid (excl) " << oid << " current snap is " + << context.current_snap << std::endl; + return new WriteOp(m_op, &context, oid, true, true, m_stats); + + case TEST_OP_CHUNK_READ: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "chunk read oid " << oid << " target oid " << oid2 << std::endl; + return new ChunkReadOp(m_op, &context, oid, context.pool_name, false, m_stats); + + case TEST_OP_TIER_PROMOTE: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "tier_promote oid " << oid << std::endl; + return new TierPromoteOp(m_op, &context, oid, m_stats); + + case TEST_OP_TIER_FLUSH: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "tier_flush oid " << oid << std::endl; + return new TierFlushOp(m_op, &context, oid, m_stats); + + case TEST_OP_SET_REDIRECT: + oid = *(rand_choose(context.oid_not_in_use)); + oid2 = *(rand_choose(context.oid_redirect_not_in_use)); + cout << m_op << ": " << "set_redirect oid " << oid << " target oid " << oid2 << std::endl; + return new SetRedirectOp(m_op, &context, oid, oid2, context.pool_name, m_stats); + + case TEST_OP_UNSET_REDIRECT: + oid = *(rand_choose(context.oid_not_in_use)); + cout << m_op << ": " << "unset_redirect oid " << oid << std::endl; + return new UnsetRedirectOp(m_op, &context, oid, m_stats); + + default: + cerr << m_op << ": Invalid op type " << type << std::endl; + ceph_abort(); + return nullptr; + } + } + + TestOp *m_nextop; + int m_op; + int m_ops; + int m_seconds; + int m_objects; + time_t m_start; + TestOpStat *m_stats; + map<TestOpType, unsigned int> m_weight_sums; + unsigned int m_total_weight; + bool m_ec_pool; + bool m_balance_reads; + bool m_localize_reads; + bool m_set_redirect; + bool m_set_chunk; + bool m_enable_dedup; +}; + +int main(int argc, char **argv) +{ + int ops = 1000; + int objects = 50; + int max_in_flight = 16; + int64_t size = 4000000; // 4 MB + int64_t min_stride_size = -1, max_stride_size = -1; + int max_seconds = 0; + bool pool_snaps = false; + bool write_fadvise_dontneed = false; + + struct { + TestOpType op; + const char *name; + bool ec_pool_valid; + } op_types[] = { + { TEST_OP_READ, "read", true }, + { TEST_OP_WRITE, "write", false }, + { TEST_OP_WRITE_EXCL, "write_excl", false }, + { TEST_OP_WRITESAME, "writesame", false }, + { TEST_OP_DELETE, "delete", true }, + { TEST_OP_SNAP_CREATE, "snap_create", true }, + { TEST_OP_SNAP_REMOVE, "snap_remove", true }, + { TEST_OP_ROLLBACK, "rollback", true }, + { TEST_OP_SETATTR, "setattr", true }, + { TEST_OP_RMATTR, "rmattr", true }, + { TEST_OP_WATCH, "watch", true }, + { TEST_OP_COPY_FROM, "copy_from", true }, + { TEST_OP_HIT_SET_LIST, "hit_set_list", true }, + { TEST_OP_IS_DIRTY, "is_dirty", true }, + { TEST_OP_UNDIRTY, "undirty", true }, + { TEST_OP_CACHE_FLUSH, "cache_flush", true }, + { TEST_OP_CACHE_TRY_FLUSH, "cache_try_flush", true }, + { TEST_OP_CACHE_EVICT, "cache_evict", true }, + { TEST_OP_APPEND, "append", true }, + { TEST_OP_APPEND_EXCL, "append_excl", true }, + { TEST_OP_SET_REDIRECT, "set_redirect", true }, + { TEST_OP_UNSET_REDIRECT, "unset_redirect", true }, + { TEST_OP_CHUNK_READ, "chunk_read", true }, + { TEST_OP_TIER_PROMOTE, "tier_promote", true }, + { TEST_OP_TIER_FLUSH, "tier_flush", true }, + { TEST_OP_READ /* grr */, NULL }, + }; + + map<TestOpType, unsigned int> op_weights; + string pool_name = "rbd"; + string low_tier_pool_name = ""; + bool ec_pool = false; + bool no_omap = false; + bool no_sparse = false; + bool balance_reads = false; + bool localize_reads = false; + bool set_redirect = false; + bool set_chunk = false; + bool enable_dedup = false; + + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "--max-ops") == 0) + ops = atoi(argv[++i]); + else if (strcmp(argv[i], "--pool") == 0) + pool_name = argv[++i]; + else if (strcmp(argv[i], "--max-seconds") == 0) + max_seconds = atoi(argv[++i]); + else if (strcmp(argv[i], "--objects") == 0) + objects = atoi(argv[++i]); + else if (strcmp(argv[i], "--max-in-flight") == 0) + max_in_flight = atoi(argv[++i]); + else if (strcmp(argv[i], "--size") == 0) + size = atoi(argv[++i]); + else if (strcmp(argv[i], "--min-stride-size") == 0) + min_stride_size = atoi(argv[++i]); + else if (strcmp(argv[i], "--max-stride-size") == 0) + max_stride_size = atoi(argv[++i]); + else if (strcmp(argv[i], "--no-omap") == 0) + no_omap = true; + else if (strcmp(argv[i], "--no-sparse") == 0) + no_sparse = true; + else if (strcmp(argv[i], "--balance-reads") == 0) + balance_reads = true; + else if (strcmp(argv[i], "--localize-reads") == 0) + localize_reads = true; + else if (strcmp(argv[i], "--pool-snaps") == 0) + pool_snaps = true; + else if (strcmp(argv[i], "--write-fadvise-dontneed") == 0) + write_fadvise_dontneed = true; + else if (strcmp(argv[i], "--ec-pool") == 0) { + if (!op_weights.empty()) { + cerr << "--ec-pool must be specified prior to any ops" << std::endl; + exit(1); + } + ec_pool = true; + no_omap = true; + no_sparse = true; + } else if (strcmp(argv[i], "--op") == 0) { + i++; + if (i == argc) { + cerr << "Missing op after --op" << std::endl; + return 1; + } + int j; + for (j = 0; op_types[j].name; ++j) { + if (strcmp(op_types[j].name, argv[i]) == 0) { + break; + } + } + if (!op_types[j].name) { + cerr << "unknown op " << argv[i] << std::endl; + exit(1); + } + i++; + if (i == argc) { + cerr << "Weight unspecified." << std::endl; + return 1; + } + int weight = atoi(argv[i]); + if (weight < 0) { + cerr << "Weights must be nonnegative." << std::endl; + return 1; + } else if (weight > 0) { + if (ec_pool && !op_types[j].ec_pool_valid) { + cerr << "Error: cannot use op type " << op_types[j].name + << " with --ec-pool" << std::endl; + exit(1); + } + cout << "adding op weight " << op_types[j].name << " -> " << weight << std::endl; + op_weights.insert(pair<TestOpType, unsigned int>(op_types[j].op, weight)); + } + } else if (strcmp(argv[i], "--set_redirect") == 0) { + set_redirect = true; + } else if (strcmp(argv[i], "--set_chunk") == 0) { + set_chunk = true; + } else if (strcmp(argv[i], "--low_tier_pool") == 0) { + /* + * disallow redirect or chunk object into the same pool + * to prevent the race. see https://github.com/ceph/ceph/pull/20096 + */ + low_tier_pool_name = argv[++i]; + } else if (strcmp(argv[i], "--enable_dedup") == 0) { + enable_dedup = true; + } else { + cerr << "unknown arg " << argv[i] << std::endl; + exit(1); + } + } + + if (set_redirect || set_chunk) { + if (low_tier_pool_name == "") { + cerr << "low_tier_pool is needed" << std::endl; + exit(1); + } + } + + if (op_weights.empty()) { + cerr << "No operations specified" << std::endl; + exit(1); + } + + if (min_stride_size < 0) + min_stride_size = size / 10; + if (max_stride_size < 0) + max_stride_size = size / 5; + + cout << pretty_version_to_str() << std::endl; + cout << "Configuration:" << std::endl + << "\tNumber of operations: " << ops << std::endl + << "\tNumber of objects: " << objects << std::endl + << "\tMax in flight operations: " << max_in_flight << std::endl + << "\tObject size (in bytes): " << size << std::endl + << "\tWrite stride min: " << min_stride_size << std::endl + << "\tWrite stride max: " << max_stride_size << std::endl; + + if (min_stride_size >= max_stride_size) { + cerr << "Error: max_stride_size must be more than min_stride_size" + << std::endl; + return 1; + } + + if (min_stride_size > size || max_stride_size > size) { + cerr << "Error: min_stride_size and max_stride_size must be " + << "smaller than object size" << std::endl; + return 1; + } + + if (max_in_flight * 2 > objects) { + cerr << "Error: max_in_flight must be <= than the number of objects / 2" + << std::endl; + return 1; + } + + char *id = getenv("CEPH_CLIENT_ID"); + RadosTestContext context( + pool_name, + max_in_flight, + size, + min_stride_size, + max_stride_size, + no_omap, + no_sparse, + pool_snaps, + write_fadvise_dontneed, + low_tier_pool_name, + enable_dedup, + id); + + TestOpStat stats; + WeightedTestGenerator gen = WeightedTestGenerator( + ops, objects, + op_weights, &stats, max_seconds, + ec_pool, balance_reads, localize_reads, + set_redirect, set_chunk, enable_dedup); + int r = context.init(); + if (r < 0) { + cerr << "Error initializing rados test context: " + << cpp_strerror(r) << std::endl; + exit(1); + } + context.loop(&gen); + + context.shutdown(); + cerr << context.errors << " errors." << std::endl; + cerr << stats << std::endl; + return 0; +} diff --git a/src/test/osd/ceph_test_osd_stale_read.cc b/src/test/osd/ceph_test_osd_stale_read.cc new file mode 100644 index 000000000..41b5aa6d3 --- /dev/null +++ b/src/test/osd/ceph_test_osd_stale_read.cc @@ -0,0 +1,180 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include "gtest/gtest.h" + +#include "mds/mdstypes.h" +#include "include/buffer.h" +#include "include/rbd_types.h" +#include "include/rados/librados.h" +#include "include/rados/librados.hpp" +#include "include/stringify.h" +#include "include/types.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "common/ceph_argparse.h" +#include "common/common_init.h" +#include "common/Cond.h" +#include "json_spirit/json_spirit.h" + +#include <errno.h> +#include <map> +#include <sstream> +#include <string> + +using namespace librados; +using std::map; +using std::ostringstream; +using std::string; + +int get_primary_osd(Rados& rados, const string& pool_name, + const string& oid, int *pprimary) +{ + bufferlist inbl; + string cmd = string("{\"prefix\": \"osd map\",\"pool\":\"") + + pool_name + + string("\",\"object\": \"") + + oid + + string("\",\"format\": \"json\"}"); + bufferlist outbl; + if (int r = rados.mon_command(cmd, inbl, &outbl, nullptr); + r < 0) { + return r; + } + string outstr(outbl.c_str(), outbl.length()); + json_spirit::Value v; + if (!json_spirit::read(outstr, v)) { + cerr <<" unable to parse json " << outstr << std::endl; + return -1; + } + + json_spirit::Object& o = v.get_obj(); + for (json_spirit::Object::size_type i=0; i<o.size(); i++) { + json_spirit::Pair& p = o[i]; + if (p.name_ == "acting_primary") { + cout << "primary = " << p.value_.get_int() << std::endl; + *pprimary = p.value_.get_int(); + return 0; + } + } + cerr << "didn't find primary in " << outstr << std::endl; + return -1; +} + +int fence_osd(Rados& rados, int osd) +{ + bufferlist inbl, outbl; + string cmd("{\"prefix\": \"injectargs\",\"injected_args\":[" + "\"--ms-blackhole-osd\", " + "\"--ms-blackhole-mon\"]}"); + return rados.osd_command(osd, cmd, inbl, &outbl, NULL); +} + +int mark_down_osd(Rados& rados, int osd) +{ + bufferlist inbl, outbl; + string cmd("{\"prefix\": \"osd down\",\"ids\":[\"" + + stringify(osd) + "\"]}"); + return rados.mon_command(cmd, inbl, &outbl, NULL); +} + +TEST(OSD, StaleRead) { + // create two rados instances, one pool + Rados rados1, rados2; + IoCtx ioctx1, ioctx2; + int r; + + r = rados1.init_with_context(g_ceph_context); + ASSERT_EQ(0, r); + r = rados1.connect(); + ASSERT_EQ(0, r); + + srand(time(0)); + string pool_name = "read-hole-test-" + stringify(rand()); + r = rados1.pool_create(pool_name.c_str()); + ASSERT_EQ(0, r); + + r = rados1.ioctx_create(pool_name.c_str(), ioctx1); + ASSERT_EQ(0, r); + + r = rados2.init_with_context(g_ceph_context); + ASSERT_EQ(0, r); + r = rados2.connect(); + ASSERT_EQ(0, r); + r = rados2.ioctx_create(pool_name.c_str(), ioctx2); + ASSERT_EQ(0, r); + + string oid = "foo"; + bufferlist one; + one.append("one"); + { + cout << "client1: writing 'one'" << std::endl; + r = ioctx1.write_full(oid, one); + ASSERT_EQ(0, r); + } + + // make sure 2 can read it + { + cout << "client2: reading 'one'" << std::endl; + bufferlist bl; + r = ioctx2.read(oid, bl, 3, 0); + ASSERT_EQ(3, r); + ASSERT_EQ('o', bl[0]); + ASSERT_EQ('n', bl[1]); + ASSERT_EQ('e', bl[2]); + } + + // find the primary + int primary; + r = get_primary_osd(rados1, pool_name, oid, &primary); + ASSERT_EQ(0, r); + + // fence it + cout << "client1: fencing primary" << std::endl; + fence_osd(rados1, primary); + mark_down_osd(rados1, primary); + rados1.wait_for_latest_osdmap(); + + // should still be able to read the old value on 2 + { + cout << "client2: reading 'one' again from old primary" << std::endl; + bufferlist bl; + r = ioctx2.read(oid, bl, 3, 0); + ASSERT_EQ(3, r); + ASSERT_EQ('o', bl[0]); + ASSERT_EQ('n', bl[1]); + ASSERT_EQ('e', bl[2]); + } + + // update object on 1 + bufferlist two; + two.append("two"); + { + cout << "client1: writing 'two' to new acting set" << std::endl; + r = ioctx1.write_full(oid, two); + ASSERT_EQ(0, r); + } + + // make sure we can't still read the old value on 2 + { + cout << "client2: reading again from old primary" << std::endl; + bufferlist bl; + r = ioctx2.read(oid, bl, 3, 0); + ASSERT_EQ(3, r); + ASSERT_EQ('t', bl[0]); + ASSERT_EQ('w', bl[1]); + ASSERT_EQ('o', bl[2]); + } + + rados1.shutdown(); + rados2.shutdown(); +} + +int main(int argc, char **argv) { + vector<const char*> args; + argv_to_vec(argc, (const char **)argv, args); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/test/osd/hitset.cc b/src/test/osd/hitset.cc new file mode 100644 index 000000000..6234bdaba --- /dev/null +++ b/src/test/osd/hitset.cc @@ -0,0 +1,197 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * Copyright 2013 Inktank + */ + +#include "gtest/gtest.h" +#include "osd/HitSet.h" +#include <iostream> + +class HitSetTestStrap { +public: + HitSet *hitset; + + explicit HitSetTestStrap(HitSet *h) : hitset(h) {} + + void fill(unsigned count) { + char buf[50]; + for (unsigned i = 0; i < count; ++i) { + sprintf(buf, "hitsettest_%u", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + hitset->insert(obj); + } + EXPECT_EQ(count, hitset->insert_count()); + } + void verify_fill(unsigned count) { + char buf[50]; + for (unsigned i = 0; i < count; ++i) { + sprintf(buf, "hitsettest_%u", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + EXPECT_TRUE(hitset->contains(obj)); + } + } + +}; + +class BloomHitSetTest : public testing::Test, public HitSetTestStrap { +public: + + BloomHitSetTest() : HitSetTestStrap(new HitSet(new BloomHitSet)) {} + + void rebuild(double fp, uint64_t target, uint64_t seed) { + BloomHitSet::Params *bparams = new BloomHitSet::Params(fp, target, seed); + HitSet::Params param(bparams); + HitSet new_set(param); + *hitset = new_set; + } + + BloomHitSet *get_hitset() { return static_cast<BloomHitSet*>(hitset->impl.get()); } +}; + +TEST_F(BloomHitSetTest, Params) { + BloomHitSet::Params params(0.01, 100, 5); + EXPECT_EQ(.01, params.get_fpp()); + EXPECT_EQ((unsigned)100, params.target_size); + EXPECT_EQ((unsigned)5, params.seed); + params.set_fpp(0.1); + EXPECT_EQ(0.1, params.get_fpp()); + + bufferlist bl; + params.encode(bl); + BloomHitSet::Params p2; + auto iter = bl.cbegin(); + p2.decode(iter); + EXPECT_EQ(0.1, p2.get_fpp()); + EXPECT_EQ((unsigned)100, p2.target_size); + EXPECT_EQ((unsigned)5, p2.seed); +} + +TEST_F(BloomHitSetTest, Construct) { + ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_BLOOM); + // success! +} + +TEST_F(BloomHitSetTest, Rebuild) { + rebuild(0.1, 100, 1); + ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_BLOOM); +} + +TEST_F(BloomHitSetTest, InsertsMatch) { + rebuild(0.1, 100, 1); + fill(50); + /* + * the approx unique count is atrocious on bloom filters. Empirical + * evidence suggests the current test will produce a value of 62 + * regardless of hitset size + */ + EXPECT_TRUE(hitset->approx_unique_insert_count() >= 50 && + hitset->approx_unique_insert_count() <= 62); + verify_fill(50); + EXPECT_FALSE(hitset->is_full()); +} + +TEST_F(BloomHitSetTest, FillsUp) { + rebuild(0.1, 20, 1); + fill(20); + verify_fill(20); + EXPECT_TRUE(hitset->is_full()); +} + +TEST_F(BloomHitSetTest, RejectsNoMatch) { + rebuild(0.001, 100, 1); + fill(100); + verify_fill(100); + EXPECT_TRUE(hitset->is_full()); + + char buf[50]; + int matches = 0; + for (int i = 100; i < 200; ++i) { + sprintf(buf, "hitsettest_%d", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + if (hitset->contains(obj)) + ++matches; + } + // we set a 1 in 1000 false positive; allow one in our 100 + EXPECT_LT(matches, 2); +} + +class ExplicitHashHitSetTest : public testing::Test, public HitSetTestStrap { +public: + + ExplicitHashHitSetTest() : HitSetTestStrap(new HitSet(new ExplicitHashHitSet)) {} + + ExplicitHashHitSet *get_hitset() { return static_cast<ExplicitHashHitSet*>(hitset->impl.get()); } +}; + +TEST_F(ExplicitHashHitSetTest, Construct) { + ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_EXPLICIT_HASH); + // success! +} + +TEST_F(ExplicitHashHitSetTest, InsertsMatch) { + fill(50); + verify_fill(50); + EXPECT_EQ((unsigned)50, hitset->approx_unique_insert_count()); + EXPECT_FALSE(hitset->is_full()); +} + +TEST_F(ExplicitHashHitSetTest, RejectsNoMatch) { + fill(100); + verify_fill(100); + EXPECT_FALSE(hitset->is_full()); + + char buf[50]; + int matches = 0; + for (int i = 100; i < 200; ++i) { + sprintf(buf, "hitsettest_%d", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + if (hitset->contains(obj)) { + ++matches; + } + } + EXPECT_EQ(matches, 0); +} + +class ExplicitObjectHitSetTest : public testing::Test, public HitSetTestStrap { +public: + + ExplicitObjectHitSetTest() : HitSetTestStrap(new HitSet(new ExplicitObjectHitSet)) {} + + ExplicitObjectHitSet *get_hitset() { return static_cast<ExplicitObjectHitSet*>(hitset->impl.get()); } +}; + +TEST_F(ExplicitObjectHitSetTest, Construct) { + ASSERT_EQ(hitset->impl->get_type(), HitSet::TYPE_EXPLICIT_OBJECT); + // success! +} + +TEST_F(ExplicitObjectHitSetTest, InsertsMatch) { + fill(50); + verify_fill(50); + EXPECT_EQ((unsigned)50, hitset->approx_unique_insert_count()); + EXPECT_FALSE(hitset->is_full()); +} + +TEST_F(ExplicitObjectHitSetTest, RejectsNoMatch) { + fill(100); + verify_fill(100); + EXPECT_FALSE(hitset->is_full()); + + char buf[50]; + int matches = 0; + for (int i = 100; i < 200; ++i) { + sprintf(buf, "hitsettest_%d", i); + hobject_t obj(object_t(buf), "", 0, i, 0, ""); + if (hitset->contains(obj)) { + ++matches; + } + } + EXPECT_EQ(matches, 0); +} diff --git a/src/test/osd/osdcap.cc b/src/test/osd/osdcap.cc new file mode 100644 index 000000000..f1b80faae --- /dev/null +++ b/src/test/osd/osdcap.cc @@ -0,0 +1,1353 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2012 Inktank + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <iostream> + +#include "include/stringify.h" +#include "osd/OSDCap.h" + +#include "gtest/gtest.h" + +const char *parse_good[] = { + "allow *", + "allow r", + "allow rwx", + "allow r pool foo ", + "allow r pool=foo", + "allow wx pool taco", + "allow pool foo r", + "allow pool taco wx", + "allow wx pool taco object_prefix obj", + "allow wx pool taco object_prefix obj_with_underscores_and_no_quotes", + "allow pool taco object_prefix obj wx", + "allow pool taco object_prefix obj_with_underscores_and_no_quotes wx", + "allow rwx pool 'weird name'", + "allow rwx pool \"weird name with ''s\"", + "allow rwx pool foo, allow r pool bar", + "allow rwx pool foo ; allow r pool bar", + "allow rwx pool foo ;allow r pool bar", + "allow rwx pool foo; allow r pool bar", + "allow pool foo rwx, allow pool bar r", + "allow pool foo.froo.foo rwx, allow pool bar r", + "allow pool foo rwx ; allow pool bar r", + "allow pool foo rwx ;allow pool bar r", + "allow pool foo rwx; allow pool bar r", + "allow pool data rw, allow pool rbd rwx, allow pool images class rbd foo", + "allow class-read", + "allow class-write", + "allow class-read class-write", + "allow r class-read pool foo", + "allow rw class-read class-write pool foo", + "allow r class-read pool foo", + "allow pool bar rwx; allow pool baz r class-read", + "allow class foo", + "allow class clsname \"clsthingidon'tunderstand\"", + " allow rwx pool foo; allow r pool bar ", + " allow rwx pool foo; allow r pool bar ", + " allow pool foo rwx; allow pool bar r ", + " allow pool foo rwx; allow pool bar r ", + " allow wx pool taco", + "\tallow\nwx\tpool \n taco\t", + "allow class-read object_prefix rbd_children, allow pool libvirt-pool-test rwx", + "allow class-read object_prefix rbd-children, allow pool libvirt_pool_test rwx", + "allow pool foo namespace nfoo rwx, allow pool bar namespace=nbar r", + "allow pool foo namespace=nfoo rwx ; allow pool bar namespace=nbar r", + "allow pool foo namespace nfoo rwx ;allow pool bar namespace nbar r", + "allow pool foo namespace=nfoo rwx; allow pool bar namespace nbar object_prefix rbd r", + "allow rwx namespace=nfoo tag cephfs data=cephfs_a", + "allow rwx namespace foo tag cephfs data =cephfs_a", + "allow pool foo namespace=nfoo* rwx", + "allow pool foo namespace=\"\" rwx; allow pool bar namespace='' object_prefix rbd r", + "allow pool foo namespace \"\" rwx; allow pool bar namespace '' object_prefix rbd r", + "profile abc, profile abc pool=bar, profile abc pool=bar namespace=foo", + "allow rwx tag application key=value", + "allow rwx tag application key = value", + "allow rwx tag application key =value", + "allow rwx tag application key= value", + "allow rwx tag application key = value", + "allow all tag application all=all", + "allow rwx network 127.0.0.1/8", + "allow rwx network ::1/128", + "allow rwx network [ff::1]/128", + "profile foo network 127.0.0.1/8", + "allow rwx namespace foo tag cephfs data =cephfs_a network 127.0.0.1/8", + "allow pool foo rwx network 1.2.3.4/24", + 0 +}; + +TEST(OSDCap, ParseGood) { + for (int i=0; parse_good[i]; i++) { + string str = parse_good[i]; + OSDCap cap; + std::cout << "Testing good input: '" << str << "'" << std::endl; + ASSERT_TRUE(cap.parse(str, &cout)); + } +} + +const char *parse_bad[] = { + "allow r poolfoo", + "allow r w", + "ALLOW r", + "allow rwx,", + "allow rwx x", + "allow r pool foo r", + "allow wwx pool taco", + "allow wwx pool taco^funny&chars", + "allow rwx pool 'weird name''", + "allow rwx object_prefix \"beforepool\" pool weird", + "allow rwx auid 123 pool asdf", + "allow xrwx pool foo,, allow r pool bar", + ";allow rwx pool foo rwx ; allow r pool bar", + "allow rwx pool foo ;allow r pool bar gibberish", + "allow rwx auid 123 pool asdf namespace=foo", + "allow rwx auid 123 namespace", + "allow rwx namespace", + "allow namespace", + "allow namespace=foo", + "allow namespace=f*oo", + "allow rwx auid 123 namespace asdf", + "allow wwx pool ''", + "allow rwx tag application key value", + "allow rwx auid 123", + "allow auid 123 rwx", + "allow r pool foo object_prefix blah ; allow w auid 5", + 0 +}; + +TEST(OSDCap, ParseBad) { + for (int i=0; parse_bad[i]; i++) { + string str = parse_bad[i]; + OSDCap cap; + std::cout << "Testing bad input: '" << str << "'" << std::endl; + ASSERT_FALSE(cap.parse(str, &cout)); + } +} + +TEST(OSDCap, AllowAll) { + OSDCap cap; + entity_addr_t addr; + ASSERT_FALSE(cap.allow_all()); + + ASSERT_TRUE(cap.parse("allow r", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow w", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow x", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rwx", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rw", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow rx", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow wx", NULL)); + ASSERT_FALSE(cap.allow_all()); + cap.grants.clear(); + + ASSERT_TRUE(cap.parse("allow *", NULL)); + ASSERT_TRUE(cap.allow_all()); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, true}}, addr)); + // 'allow *' overrides allow list + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "anamespace", {}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "anamespace", {{"application", {{"key", "value"}}}}, "asdf", true, true, {{"cls", "", true, true, false}}, addr)); +} + +TEST(OSDCap, AllowPool) { + OSDCap cap; + entity_addr_t addr; + bool r = cap.parse("allow rwx pool foo", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + // true->false for classes not on allow list + ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr)); + + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, AllowPools) { + entity_addr_t addr; + OSDCap cap; + bool r = cap.parse("allow rwx pool foo, allow r pool bar", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + // true-false for classes not on allow list + ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "", true, true, {{"cls", "", true, true, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("baz", "", {}, "", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "ns", {}, "", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "ns", {{"application", {{"key", "value"}}}}, "", true, false, {}, addr)); +} + +TEST(OSDCap, AllowPools2) { + entity_addr_t addr; + OSDCap cap; + bool r = cap.parse("allow r, allow rwx pool foo", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + // true-false for classes not on allow list + ASSERT_FALSE(cap.is_capable("foo", "", {}, "", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "", true, true, {{"cls", "", true, true, true}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "", true, false, {}, addr)); +} + +TEST(OSDCap, ObjectPrefix) { + entity_addr_t addr; + OSDCap cap; + bool r = cap.parse("allow rwx object_prefix foo", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, true}}, addr)); + // true-false for classes not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "_foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, " foo ", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "fo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, ObjectPoolAndPrefix) { + entity_addr_t addr; + OSDCap cap; + bool r = cap.parse("allow rwx pool bar object_prefix foo", NULL); + ASSERT_TRUE(r); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, true}}, addr)); + // true-false for classes not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "food", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo_bar", true, true, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "food", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "fo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, Namespace) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rw namespace=nfoo")); + + ASSERT_TRUE(cap.is_capable("bar", "nfoo", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "nfoobar", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, NamespaceGlob) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rw namespace=nfoo*")); + + ASSERT_TRUE(cap.is_capable("bar", "nfoo", {}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "nfoobar", {}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "nfo", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow r", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow w", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow x", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + // true->false when class not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicRW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rw", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, BasicRX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, true, true}}, addr)); + // true->false for class not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicWX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow wx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + // true->false for class not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, BasicRWX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false for class not on allow list + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); +} + +TEST(OSDCap, BasicRWClassRClassW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rw class-read class-write", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); +} + +TEST(OSDCap, ClassR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class-read", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, ClassW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class-write", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, ClassRW) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class-read class-write", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); +} + +TEST(OSDCap, BasicRClassR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow r class-read", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "any", {}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "any", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, PoolClassR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow pool bar r class-read, allow pool foo rwx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); +} + +TEST(OSDCap, PoolClassRNS) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow pool bar namespace='' r class-read, allow pool foo namespace=ns rwx", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "ns", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "other", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "other", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("baz", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); +} + +TEST(OSDCap, NSClassR) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow namespace '' rw class-read class-write, allow namespace test r", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {{"cls", "", true, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "test", {}, "foo", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "test", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "test", {}, "foo", true, false, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "test", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, false, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "bad", {}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "bad", {{"application", {{"key", "value"}}}}, "foo", false, false, {{"cls", "", false, true, true}}, addr)); +} + +TEST(OSDCap, PoolTagBasic) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx tag application key=value", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, false, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, false, true}}, addr)); + // true->false when class not allow listed + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", false, true, {{"cls", "", false, true, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", false, true, {{"cls", "", false, false, true}}, addr)); +} + +TEST(OSDCap, PoolTagWildK) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx tag application *=value", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, PoolTagWildV) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx tag application key=*", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, PoolTagWildKV) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx tag application *=*", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"app2", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {{"application", {{"foo", "bar"}, {"key2", "value"}}}, {"app2", {{"foo", "bar"}}}}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, NSPool) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx namespace ns tag application key=value", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns2", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value2"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, NSPoolGlob) +{ + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx namespace ns* tag application key=value", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "ns", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + + ASSERT_TRUE(cap.is_capable("foo", "ns2", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {{"application", {{"key", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key", "value2"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "ns", {{"application", {{"key2", "value"}}}}, "foo", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "foo", true, true, {}, addr)); +} + +TEST(OSDCap, OutputParsed) +{ + entity_addr_t addr; + struct CapsTest { + const char *input; + const char *output; + }; + CapsTest test_values[] = { + {"allow *", + "osdcap[grant(*)]"}, + {"allow r", + "osdcap[grant(r)]"}, + {"allow rx", + "osdcap[grant(rx)]"}, + {"allow rwx", + "osdcap[grant(rwx)]"}, + {"allow rw class-read class-write", + "osdcap[grant(rwx)]"}, + {"allow rw class-read", + "osdcap[grant(rw class-read)]"}, + {"allow rw class-write", + "osdcap[grant(rw class-write)]"}, + {"allow rwx pool images", + "osdcap[grant(pool images rwx)]"}, + {"allow r pool images", + "osdcap[grant(pool images r)]"}, + {"allow pool images rwx", + "osdcap[grant(pool images rwx)]"}, + {"allow pool images r", + "osdcap[grant(pool images r)]"}, + {"allow pool images w", + "osdcap[grant(pool images w)]"}, + {"allow pool images x", + "osdcap[grant(pool images x)]"}, + {"allow r pool images namespace ''", + "osdcap[grant(pool images namespace \"\" r)]"}, + {"allow r pool images namespace foo", + "osdcap[grant(pool images namespace foo r)]"}, + {"allow r pool images namespace \"\"", + "osdcap[grant(pool images namespace \"\" r)]"}, + {"allow r namespace foo", + "osdcap[grant(namespace foo r)]"}, + {"allow pool images r; allow pool rbd rwx", + "osdcap[grant(pool images r),grant(pool rbd rwx)]"}, + {"allow pool images r, allow pool rbd rwx", + "osdcap[grant(pool images r),grant(pool rbd rwx)]"}, + {"allow class-read object_prefix rbd_children, allow pool libvirt-pool-test rwx", + "osdcap[grant(object_prefix rbd_children class-read),grant(pool libvirt-pool-test rwx)]"}, + {"allow rwx tag application key=value", + "osdcap[grant(app application key key val value rwx)]"}, + {"allow rwx namespace ns* tag application key=value", + "osdcap[grant(namespace ns* app application key key val value rwx)]"}, + {"allow all", + "osdcap[grant(*)]"}, + {"allow rwx tag application all=all", + "osdcap[grant(app application key * val * rwx)]"}, + {"allow rwx network 1.2.3.4/24", + "osdcap[grant(rwx network 1.2.3.4/24)]"}, + }; + + size_t num_tests = sizeof(test_values) / sizeof(*test_values); + for (size_t i = 0; i < num_tests; ++i) { + OSDCap cap; + std::cout << "Testing input '" << test_values[i].input << "'" << std::endl; + ASSERT_TRUE(cap.parse(test_values[i].input)); + ASSERT_EQ(test_values[i].output, stringify(cap)); + } +} + +TEST(OSDCap, AllowClass) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class foo", NULL)); + + // can call any method on class foo regardless of allow list status + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr)); + + // does not permit invoking class bar + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr)); +} + +TEST(OSDCap, AllowClassMethod) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class foo xyz", NULL)); + + // can call the xyz method on class foo regardless of allow list status + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "xyz", true, true, false}}, addr)); + + // does not permit invoking class bar + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "xyz", true, true, false}}, addr)); +} + +TEST(OSDCap, AllowClass2) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class foo, allow class bar", NULL)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr)); +} + +TEST(OSDCap, AllowClassRWX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx, allow class foo", NULL)); + + // can call any method on class foo regardless of allow list status + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}}, addr)); + + // does not permit invoking class bar + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, false}}, addr)); + + // allows class bar if it is allow listed + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"bar", "", true, true, true}}, addr)); +} + +TEST(OSDCap, AllowClassMulti) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow class foo", NULL)); + + // can call any method on foo, but not bar, so the entire op is rejected + // bar with allow list is rejected because it still needs rwx/class-read,write + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr)); + + // these are OK because 'bar' is on the allow list BUT the calls don't read or write + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr)); + + // can call any method on foo or bar regardless of allow list status + OSDCap cap2; + ASSERT_TRUE(cap2.parse("allow class foo, allow class bar", NULL)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr)); + ASSERT_TRUE(cap2.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr)); +} + +TEST(OSDCap, AllowClassMultiRWX) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("allow rwx, allow class foo", NULL)); + + // can call anything on foo, but only allow listed methods on bar + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, true}}, addr)); + + // fails because bar not allow listed + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", true, false, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, true, false}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, true}, {"bar", "", false, false, false}}, addr)); + + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, true}}, addr)); + + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", true, false, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, true, false}}, addr)); + ASSERT_FALSE(cap.is_capable("bar", "", {}, "foo", false, false, {{"foo", "", false, false, false}, {"bar", "", false, false, false}}, addr)); +} + +TEST(OSDCap, AllowProfile) { + entity_addr_t addr; + OSDCap cap; + ASSERT_TRUE(cap.parse("profile read-only, profile read-write pool abc", NULL)); + ASSERT_FALSE(cap.allow_all()); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", false, true, {}, addr)); + + // RBD + cap.grants.clear(); + ASSERT_TRUE(cap.parse("profile rbd pool abc", NULL)); + ASSERT_FALSE(cap.allow_all()); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {}, addr)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "rbd_children", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "rbd_children", false, false, + {{"rbd", "", true, false, true}}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", true, true, + {{"rbd", "", true, true, true}}, addr)); + + cap.grants.clear(); + ASSERT_TRUE(cap.parse("profile rbd-read-only pool abc", NULL)); + ASSERT_FALSE(cap.allow_all()); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "rbd_children", true, false, {}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "asdf", true, false, + {{"rbd", "", true, false, true}}, addr)); + ASSERT_FALSE(cap.is_capable("abc", "", {}, "asdf", true, true, {}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false, + {{"rbd", "child_attach", true, true, true}}, addr)); + ASSERT_TRUE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false, + {{"rbd", "child_detach", true, true, true}}, addr)); + ASSERT_FALSE(cap.is_capable("abc", "", {}, "rbd_header.ABC", false, false, + {{"rbd", "other function", true, true, true}}, addr)); +} + +TEST(OSDCap, network) { + entity_addr_t a, b, c; + a.parse("10.1.2.3"); + b.parse("192.168.2.3"); + c.parse("192.167.2.3"); + + OSDCap cap; + ASSERT_TRUE(cap.parse("allow * network 192.168.0.0/16, allow * network 10.0.0.0/8", NULL)); + + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, a)); + ASSERT_TRUE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, b)); + ASSERT_FALSE(cap.is_capable("foo", "", {}, "asdf", true, true, {{"cls", "", true, true, true}}, c)); +} diff --git a/src/test/osd/safe-to-destroy.sh b/src/test/osd/safe-to-destroy.sh new file mode 100755 index 000000000..08afc8e8d --- /dev/null +++ b/src/test/osd/safe-to-destroy.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +source $CEPH_ROOT/qa/standalone/ceph-helpers.sh + +set -e + +function run() { + local dir=$1 + shift + + export CEPH_MON="127.0.0.1:$(get_unused_port)" + export CEPH_ARGS + CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " + CEPH_ARGS+="--mon-host=$CEPH_MON " + set -e + + local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} + for func in $funcs ; do + setup $dir || return 1 + $func $dir || return 1 + teardown $dir || return 1 + done +} + +function TEST_safe_to_destroy() { + local dir=$1 + + run_mon $dir a + run_mgr $dir x + run_osd $dir 0 + run_osd $dir 1 + run_osd $dir 2 + run_osd $dir 3 + + flush_pg_stats + + ceph osd safe-to-destroy 0 + ceph osd safe-to-destroy 1 + ceph osd safe-to-destroy 2 + ceph osd safe-to-destroy 3 + + ceph osd pool create foo 128 + sleep 2 + flush_pg_stats + wait_for_clean + + expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 0 + expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 1 + expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 2 + expect_failure $dir 'pgs currently' ceph osd safe-to-destroy 3 + + ceph osd out 0 + sleep 2 + flush_pg_stats + wait_for_clean + + ceph osd safe-to-destroy 0 + + # even osds without osd_stat are ok if all pgs are active+clean + id=`ceph osd create` + ceph osd safe-to-destroy $id +} + +function TEST_ok_to_stop() { + local dir=$1 + + run_mon $dir a + run_mgr $dir x + run_osd $dir 0 + run_osd $dir 1 + run_osd $dir 2 + run_osd $dir 3 + + ceph osd pool create foo 128 + ceph osd pool set foo size 3 + ceph osd pool set foo min_size 2 + sleep 1 + flush_pg_stats + wait_for_clean + + ceph osd ok-to-stop 0 + ceph osd ok-to-stop 1 + ceph osd ok-to-stop 2 + ceph osd ok-to-stop 3 + expect_failure $dir bad_become_inactive ceph osd ok-to-stop 0 1 + + ceph osd pool set foo min_size 1 + sleep 1 + flush_pg_stats + wait_for_clean + ceph osd ok-to-stop 0 1 + ceph osd ok-to-stop 1 2 + ceph osd ok-to-stop 2 3 + ceph osd ok-to-stop 3 4 + expect_failure $dir bad_become_inactive ceph osd ok-to-stop 0 1 2 + expect_failure $dir bad_become_inactive ceph osd ok-to-stop 0 1 2 3 +} + +main safe-to-destroy "$@" diff --git a/src/test/osd/test_ec_transaction.cc b/src/test/osd/test_ec_transaction.cc new file mode 100644 index 000000000..98669667a --- /dev/null +++ b/src/test/osd/test_ec_transaction.cc @@ -0,0 +1,124 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <gtest/gtest.h> +#include "osd/PGTransaction.h" +#include "osd/ECTransaction.h" + +#include "test/unit.cc" + +struct mydpp : public DoutPrefixProvider { + std::ostream& gen_prefix(std::ostream& out) const override { return out << "foo"; } + CephContext *get_cct() const override { return g_ceph_context; } + unsigned get_subsys() const override { return ceph_subsys_osd; } +} dpp; + +#define dout_context g_ceph_context + +TEST(ectransaction, two_writes_separated) +{ + hobject_t h; + PGTransactionUPtr t(new PGTransaction); + bufferlist a, b; + t->create(h); + a.append_zero(565760); + t->write(h, 0, a.length(), a, 0); + b.append_zero(2437120); + t->write(h, 669856, b.length(), b, 0); + + ECUtil::stripe_info_t sinfo(2, 8192); + auto plan = ECTransaction::get_write_plan( + sinfo, + std::move(t), + [&](const hobject_t &i) { + ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1)); + return ref; + }, + &dpp); + generic_derr << "to_read " << plan.to_read << dendl; + generic_derr << "will_write " << plan.will_write << dendl; + + ASSERT_EQ(0u, plan.to_read.size()); + ASSERT_EQ(1u, plan.will_write.size()); +} + +TEST(ectransaction, two_writes_nearby) +{ + hobject_t h; + PGTransactionUPtr t(new PGTransaction); + bufferlist a, b; + t->create(h); + + // two nearby writes, both partly touching the same 8192-byte stripe + ECUtil::stripe_info_t sinfo(2, 8192); + a.append_zero(565760); + t->write(h, 0, a.length(), a, 0); + b.append_zero(2437120); + t->write(h, 569856, b.length(), b, 0); + + auto plan = ECTransaction::get_write_plan( + sinfo, + std::move(t), + [&](const hobject_t &i) { + ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1)); + return ref; + }, + &dpp); + generic_derr << "to_read " << plan.to_read << dendl; + generic_derr << "will_write " << plan.will_write << dendl; + + ASSERT_EQ(0u, plan.to_read.size()); + ASSERT_EQ(1u, plan.will_write.size()); +} + +TEST(ectransaction, many_writes) +{ + hobject_t h; + PGTransactionUPtr t(new PGTransaction); + bufferlist a, b; + a.append_zero(512); + b.append_zero(4096); + t->create(h); + + ECUtil::stripe_info_t sinfo(2, 8192); + // write 2801664~512 + // write 2802176~512 + // write 2802688~512 + // write 2803200~512 + t->write(h, 2801664, a.length(), a, 0); + t->write(h, 2802176, a.length(), a, 0); + t->write(h, 2802688, a.length(), a, 0); + t->write(h, 2803200, a.length(), a, 0); + + // write 2805760~4096 + // write 2809856~4096 + // write 2813952~4096 + t->write(h, 2805760, b.length(), b, 0); + t->write(h, 2809856, b.length(), b, 0); + t->write(h, 2813952, b.length(), b, 0); + + auto plan = ECTransaction::get_write_plan( + sinfo, + std::move(t), + [&](const hobject_t &i) { + ECUtil::HashInfoRef ref(new ECUtil::HashInfo(1)); + return ref; + }, + &dpp); + generic_derr << "to_read " << plan.to_read << dendl; + generic_derr << "will_write " << plan.will_write << dendl; + + ASSERT_EQ(0u, plan.to_read.size()); + ASSERT_EQ(1u, plan.will_write.size()); +} diff --git a/src/test/osd/test_extent_cache.cc b/src/test/osd/test_extent_cache.cc new file mode 100644 index 000000000..04b638a9a --- /dev/null +++ b/src/test/osd/test_extent_cache.cc @@ -0,0 +1,280 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + + +#include <gtest/gtest.h> +#include "osd/ExtentCache.h" +#include <iostream> + +extent_map imap_from_vector(vector<pair<uint64_t, uint64_t> > &&in) +{ + extent_map out; + for (auto &&tup: in) { + bufferlist bl; + bl.append_zero(tup.second); + out.insert(tup.first, bl.length(), bl); + } + return out; +} + +extent_map imap_from_iset(const extent_set &set) +{ + extent_map out; + for (auto &&iter: set) { + bufferlist bl; + bl.append_zero(iter.second); + out.insert(iter.first, iter.second, bl); + } + return out; +} + +extent_set iset_from_vector(vector<pair<uint64_t, uint64_t> > &&in) +{ + extent_set out; + for (auto &&tup: in) { + out.insert(tup.first, tup.second); + } + return out; +} + +TEST(extentcache, simple_write) +{ + hobject_t oid; + + ExtentCache c; + ExtentCache::write_pin pin; + c.open_write_pin(pin); + + auto to_read = iset_from_vector( + {{0, 2}, {8, 2}, {20, 2}}); + auto to_write = iset_from_vector( + {{0, 10}, {20, 4}}); + auto must_read = c.reserve_extents_for_rmw( + oid, pin, to_write, to_read); + ASSERT_EQ( + must_read, + to_read); + + c.print(std::cerr); + + auto got = imap_from_iset(must_read); + auto pending_read = to_read; + pending_read.subtract(must_read); + + auto pending = c.get_remaining_extents_for_rmw( + oid, + pin, + pending_read); + ASSERT_TRUE(pending.empty()); + + auto write_map = imap_from_iset(to_write); + c.present_rmw_update( + oid, + pin, + write_map); + + c.release_write_pin(pin); +} + +TEST(extentcache, write_write_overlap) +{ + hobject_t oid; + + ExtentCache c; + ExtentCache::write_pin pin; + c.open_write_pin(pin); + + // start write 1 + auto to_read = iset_from_vector( + {{0, 2}, {8, 2}, {20, 2}}); + auto to_write = iset_from_vector( + {{0, 10}, {20, 4}}); + auto must_read = c.reserve_extents_for_rmw( + oid, pin, to_write, to_read); + ASSERT_EQ( + must_read, + to_read); + + c.print(std::cerr); + + // start write 2 + ExtentCache::write_pin pin2; + c.open_write_pin(pin2); + auto to_read2 = iset_from_vector( + {{2, 4}, {10, 4}, {18, 4}}); + auto to_write2 = iset_from_vector( + {{2, 12}, {18, 12}}); + auto must_read2 = c.reserve_extents_for_rmw( + oid, pin2, to_write2, to_read2); + ASSERT_EQ( + must_read2, + iset_from_vector({{10, 4}, {18, 2}})); + + c.print(std::cerr); + + // complete read for write 1 and start commit + auto got = imap_from_iset(must_read); + auto pending_read = to_read; + pending_read.subtract(must_read); + auto pending = c.get_remaining_extents_for_rmw( + oid, + pin, + pending_read); + ASSERT_TRUE(pending.empty()); + + auto write_map = imap_from_iset(to_write); + c.present_rmw_update( + oid, + pin, + write_map); + + c.print(std::cerr); + + // complete read for write 2 and start commit + auto pending_read2 = to_read2; + pending_read2.subtract(must_read2); + auto pending2 = c.get_remaining_extents_for_rmw( + oid, + pin2, + pending_read2); + ASSERT_EQ( + pending2, + imap_from_iset(pending_read2)); + + auto write_map2 = imap_from_iset(to_write2); + c.present_rmw_update( + oid, + pin2, + write_map2); + + c.print(std::cerr); + + c.release_write_pin(pin); + + c.print(std::cerr); + + c.release_write_pin(pin2); +} + +TEST(extentcache, write_write_overlap2) +{ + hobject_t oid; + + ExtentCache c; + ExtentCache::write_pin pin; + c.open_write_pin(pin); + + // start write 1 + auto to_read = extent_set(); + auto to_write = iset_from_vector( + {{659456, 4096}}); + auto must_read = c.reserve_extents_for_rmw( + oid, pin, to_write, to_read); + ASSERT_EQ( + must_read, + to_read); + + c.print(std::cerr); + + // start write 2 + ExtentCache::write_pin pin2; + c.open_write_pin(pin2); + auto to_read2 = extent_set(); + auto to_write2 = iset_from_vector( + {{663552, 4096}}); + auto must_read2 = c.reserve_extents_for_rmw( + oid, pin2, to_write2, to_read2); + ASSERT_EQ( + must_read2, + to_read2); + + + // start write 3 + ExtentCache::write_pin pin3; + c.open_write_pin(pin3); + auto to_read3 = iset_from_vector({{659456, 8192}}); + auto to_write3 = iset_from_vector({{659456, 8192}}); + auto must_read3 = c.reserve_extents_for_rmw( + oid, pin3, to_write3, to_read3); + ASSERT_EQ( + must_read3, + extent_set()); + + c.print(std::cerr); + + // complete read for write 1 and start commit + auto got = imap_from_iset(must_read); + auto pending_read = to_read; + pending_read.subtract(must_read); + auto pending = c.get_remaining_extents_for_rmw( + oid, + pin, + pending_read); + ASSERT_TRUE(pending.empty()); + + auto write_map = imap_from_iset(to_write); + c.present_rmw_update( + oid, + pin, + write_map); + + c.print(std::cerr); + + // complete read for write 2 and start commit + auto pending_read2 = to_read2; + pending_read2.subtract(must_read2); + auto pending2 = c.get_remaining_extents_for_rmw( + oid, + pin2, + pending_read2); + ASSERT_EQ( + pending2, + imap_from_iset(pending_read2)); + + auto write_map2 = imap_from_iset(to_write2); + c.present_rmw_update( + oid, + pin2, + write_map2); + + // complete read for write 2 and start commit + auto pending_read3 = to_read3; + pending_read3.subtract(must_read3); + auto pending3 = c.get_remaining_extents_for_rmw( + oid, + pin3, + pending_read3); + ASSERT_EQ( + pending3, + imap_from_iset(pending_read3)); + + auto write_map3 = imap_from_iset(to_write3); + c.present_rmw_update( + oid, + pin3, + write_map3); + + + c.print(std::cerr); + + c.release_write_pin(pin); + + c.print(std::cerr); + + c.release_write_pin(pin2); + + c.print(std::cerr); + + c.release_write_pin(pin3); +} diff --git a/src/test/osd/test_pg_transaction.cc b/src/test/osd/test_pg_transaction.cc new file mode 100644 index 000000000..63b6197bf --- /dev/null +++ b/src/test/osd/test_pg_transaction.cc @@ -0,0 +1,129 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <gtest/gtest.h> +#include "osd/PGTransaction.h" + +TEST(pgtransaction, simple) +{ + hobject_t h; + PGTransaction t; + ASSERT_TRUE(t.empty()); + t.nop(h); + ASSERT_FALSE(t.empty()); + unsigned num = 0; + t.safe_create_traverse( + [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) { + ASSERT_EQ(p.first, h); + using T = PGTransaction::ObjectOperation::Init; + ASSERT_TRUE(boost::get<T::None>(&p.second.init_type)); + ++num; + }); + ASSERT_EQ(num, 1u); +} + +TEST(pgtransaction, clone_safe_create_traverse) +{ + hobject_t h, h2; + h2.snap = 1; + PGTransaction t; + ASSERT_TRUE(t.empty()); + t.nop(h2); + ASSERT_FALSE(t.empty()); + t.clone(h, h2); + unsigned num = 0; + t.safe_create_traverse( + [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) { + using T = PGTransaction::ObjectOperation::Init; + if (num == 0) { + ASSERT_EQ(p.first, h); + ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type)); + ASSERT_EQ( + boost::get<T::Clone>(&p.second.init_type)->source, + h2); + } else if (num == 1) { + ASSERT_EQ(p.first, h2); + ASSERT_TRUE(boost::get<T::None>(&p.second.init_type)); + } else { + ASSERT_LT(num, 2u); + } + ++num; + }); +} + +TEST(pgtransaction, clone_safe_create_traverse2) +{ + hobject_t h, h2, h3; + h.snap = 10; + h2.snap = 5; + h3.snap = 3; + PGTransaction t; + ASSERT_TRUE(t.empty()); + t.nop(h3); + ASSERT_FALSE(t.empty()); + t.clone(h, h2); + t.remove(h2); + t.clone(h2, h3); + unsigned num = 0; + t.safe_create_traverse( + [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) { + using T = PGTransaction::ObjectOperation::Init; + if (num == 0) { + ASSERT_EQ(p.first, h); + ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type)); + ASSERT_EQ( + boost::get<T::Clone>(&p.second.init_type)->source, + h2); + } else if (num == 1) { + ASSERT_EQ(p.first, h2); + ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type)); + ASSERT_EQ( + boost::get<T::Clone>(&p.second.init_type)->source, + h3); + } else if (num == 2) { + ASSERT_EQ(p.first, h3); + ASSERT_TRUE(boost::get<T::None>(&p.second.init_type)); + } else { + ASSERT_LT(num, 3u); + } + ++num; + }); +} + +TEST(pgtransaction, clone_safe_create_traverse3) +{ + hobject_t h, h2, h3; + h.snap = 10; + h2.snap = 5; + h3.snap = 3; + PGTransaction t; + t.remove(h); + t.remove(h2); + t.clone(h2, h3); + unsigned num = 0; + t.safe_create_traverse( + [&](const pair<const hobject_t, PGTransaction::ObjectOperation> &p) { + using T = PGTransaction::ObjectOperation::Init; + if (p.first == h) { + ASSERT_TRUE(p.second.is_delete()); + } else if (p.first == h2) { + ASSERT_TRUE(boost::get<T::Clone>(&p.second.init_type)); + ASSERT_EQ( + boost::get<T::Clone>(&p.second.init_type)->source, + h3); + } + ASSERT_LT(num, 2u); + ++num; + }); +} diff --git a/src/test/osd/types.cc b/src/test/osd/types.cc new file mode 100644 index 000000000..c452176bc --- /dev/null +++ b/src/test/osd/types.cc @@ -0,0 +1,2202 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "include/types.h" +#include "osd/osd_types.h" +#include "osd/OSDMap.h" +#include "gtest/gtest.h" +#include "include/coredumpctl.h" +#include "common/Thread.h" +#include "include/stringify.h" +#include "osd/ReplicatedBackend.h" +#include <sstream> + +TEST(hobject, prefixes0) +{ + uint32_t mask = 0xE947FA20; + uint32_t bits = 12; + int64_t pool = 0; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000000.02A")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes1) +{ + uint32_t mask = 0x0000000F; + uint32_t bits = 6; + int64_t pool = 20; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000014.F0")); + prefixes_correct.insert(string("0000000000000014.F4")); + prefixes_correct.insert(string("0000000000000014.F8")); + prefixes_correct.insert(string("0000000000000014.FC")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes2) +{ + uint32_t mask = 0xDEADBEAF; + uint32_t bits = 25; + int64_t pool = 0; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000000.FAEBDA0")); + prefixes_correct.insert(string("0000000000000000.FAEBDA2")); + prefixes_correct.insert(string("0000000000000000.FAEBDA4")); + prefixes_correct.insert(string("0000000000000000.FAEBDA6")); + prefixes_correct.insert(string("0000000000000000.FAEBDA8")); + prefixes_correct.insert(string("0000000000000000.FAEBDAA")); + prefixes_correct.insert(string("0000000000000000.FAEBDAC")); + prefixes_correct.insert(string("0000000000000000.FAEBDAE")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes3) +{ + uint32_t mask = 0xE947FA20; + uint32_t bits = 32; + int64_t pool = 0x23; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000023.02AF749E")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes4) +{ + uint32_t mask = 0xE947FA20; + uint32_t bits = 0; + int64_t pool = 0x23; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000000000023.")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(hobject, prefixes5) +{ + uint32_t mask = 0xDEADBEAF; + uint32_t bits = 1; + int64_t pool = 0x34AC5D00; + + set<string> prefixes_correct; + prefixes_correct.insert(string("0000000034AC5D00.1")); + prefixes_correct.insert(string("0000000034AC5D00.3")); + prefixes_correct.insert(string("0000000034AC5D00.5")); + prefixes_correct.insert(string("0000000034AC5D00.7")); + prefixes_correct.insert(string("0000000034AC5D00.9")); + prefixes_correct.insert(string("0000000034AC5D00.B")); + prefixes_correct.insert(string("0000000034AC5D00.D")); + prefixes_correct.insert(string("0000000034AC5D00.F")); + + set<string> prefixes_out(hobject_t::get_prefixes(bits, mask, pool)); + ASSERT_EQ(prefixes_out, prefixes_correct); +} + +TEST(pg_interval_t, check_new_interval) +{ +// iterate through all 4 combinations +for (unsigned i = 0; i < 4; ++i) { + // + // Create a situation where osdmaps are the same so that + // each test case can diverge from it using minimal code. + // + int osd_id = 1; + epoch_t epoch = 40; + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + std::shared_ptr<OSDMap> lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + epoch_t same_interval_since = epoch; + epoch_t last_epoch_clean = same_interval_since; + int64_t pool_id = 200; + int pg_num = 4; + __u8 min_size = 2; + boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(new ReplicatedBackend::RPCRecPred()); + { + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_pools[pool_id].set_pg_num_pending(pg_num); + inc.new_up_thru[osd_id] = epoch + 1; + osdmap->apply_incremental(inc); + lastmap->apply_incremental(inc); + } + vector<int> new_acting; + new_acting.push_back(osd_id); + new_acting.push_back(osd_id + 1); + vector<int> old_acting = new_acting; + int old_primary = osd_id; + int new_primary = osd_id; + vector<int> new_up; + new_up.push_back(osd_id); + int old_up_primary = osd_id; + int new_up_primary = osd_id; + vector<int> old_up = new_up; + pg_t pgid; + pgid.set_pool(pool_id); + + // + // Do nothing if there are no modifications in + // acting, up or pool size and that the pool is not + // being split + // + { + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_FALSE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + ASSERT_TRUE(past_intervals.empty()); + } + + // + // The acting set has changed + // + { + vector<int> new_acting; + int _new_primary = osd_id + 1; + new_acting.push_back(_new_primary); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + old_primary = new_primary; + } + + // + // The up set has changed + // + { + vector<int> new_up; + int _new_primary = osd_id + 1; + new_up.push_back(_new_primary); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + } + + // + // The up primary has changed + // + { + vector<int> new_up; + int _new_up_primary = osd_id + 1; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + _new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + } + + // + // PG is splitting + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + int new_pg_num = pg_num ^ 2; + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(new_pg_num); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + } + + // + // PG is pre-merge source + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1); + osdmap->apply_incremental(inc); + cout << "pg_num " << pg_num << std::endl; + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pg_t(pg_num - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG was pre-merge source + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1); + osdmap->apply_incremental(inc); + + cout << "pg_num " << pg_num << std::endl; + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + lastmap, // reverse order! + osdmap, + pg_t(pg_num - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG is merge source + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num - 1); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pg_t(pg_num - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG is pre-merge target + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pg_t(pg_num / 2 - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG was pre-merge target + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num_pending(pg_num - 1); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + lastmap, // reverse order! + osdmap, + pg_t(pg_num / 2 - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG is merge target + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num - 1); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pg_t(pg_num / 2 - 1, pool_id), + *recoverable, + &past_intervals)); + } + + // + // PG size has changed + // + { + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + __u8 new_min_size = min_size + 1; + inc.new_pools[pool_id].min_size = new_min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + osdmap->apply_incremental(inc); + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals)); + } + + // + // The old acting set was empty : the previous interval could not + // have been rw + // + { + vector<int> old_acting; + + PastIntervals past_intervals; + + ostringstream out; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("acting set is too small")); + } + + // + // The old acting set did not have enough osd : it could + // not have been rw + // + { + vector<int> old_acting; + old_acting.push_back(osd_id); + + // + // see http://tracker.ceph.com/issues/5780 + // the size of the old acting set should be compared + // with the min_size of the old osdmap + // + // The new osdmap is created so that it triggers the + // bug. + // + std::shared_ptr<OSDMap> osdmap(new OSDMap()); + osdmap->set_max_osd(10); + osdmap->set_state(osd_id, CEPH_OSD_EXISTS); + osdmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + __u8 new_min_size = old_acting.size(); + inc.new_pools[pool_id].min_size = new_min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + osdmap->apply_incremental(inc); + + ostringstream out; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("acting set is too small")); + } + + // + // The acting set changes. The old acting set primary was up during the + // previous interval and may have been rw. + // + { + vector<int> new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + ostringstream out; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("includes interval")); + } + // + // The acting set changes. The old acting set primary was not up + // during the old interval but last_epoch_clean is in the + // old interval and it may have been rw. + // + { + vector<int> new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + std::shared_ptr<OSDMap> lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_up_thru[osd_id] = epoch - 10; + lastmap->apply_incremental(inc); + + ostringstream out; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("presumed to have been rw")); + } + + // + // The acting set changes. The old acting set primary was not up + // during the old interval and last_epoch_clean is before the + // old interval : the previous interval could not possibly have + // been rw. + // + { + vector<int> new_acting; + new_acting.push_back(osd_id + 4); + new_acting.push_back(osd_id + 5); + + epoch_t last_epoch_clean = epoch - 10; + + std::shared_ptr<OSDMap> lastmap(new OSDMap()); + lastmap->set_max_osd(10); + lastmap->set_state(osd_id, CEPH_OSD_EXISTS); + lastmap->set_epoch(epoch); + OSDMap::Incremental inc(epoch + 1); + inc.new_pools[pool_id].min_size = min_size; + inc.new_pools[pool_id].set_pg_num(pg_num); + inc.new_up_thru[osd_id] = last_epoch_clean; + lastmap->apply_incremental(inc); + + ostringstream out; + + PastIntervals past_intervals; + + ASSERT_TRUE(past_intervals.empty()); + ASSERT_TRUE(PastIntervals::check_new_interval(old_primary, + new_primary, + old_acting, + new_acting, + old_up_primary, + new_up_primary, + old_up, + new_up, + same_interval_since, + last_epoch_clean, + osdmap, + lastmap, + pgid, + *recoverable, + &past_intervals, + &out)); + ASSERT_NE(string::npos, out.str().find("does not include interval")); + } +} // end for, didn't want to reindent +} + +TEST(pg_t, get_ancestor) +{ + ASSERT_EQ(pg_t(0, 0), pg_t(16, 0).get_ancestor(16)); + ASSERT_EQ(pg_t(1, 0), pg_t(17, 0).get_ancestor(16)); + ASSERT_EQ(pg_t(0, 0), pg_t(16, 0).get_ancestor(8)); + ASSERT_EQ(pg_t(16, 0), pg_t(16, 0).get_ancestor(80)); + ASSERT_EQ(pg_t(16, 0), pg_t(16, 0).get_ancestor(83)); + ASSERT_EQ(pg_t(1, 0), pg_t(1321, 0).get_ancestor(123).get_ancestor(8)); + ASSERT_EQ(pg_t(3, 0), pg_t(1323, 0).get_ancestor(123).get_ancestor(8)); + ASSERT_EQ(pg_t(3, 0), pg_t(1323, 0).get_ancestor(8)); +} + +TEST(pg_t, split) +{ + pg_t pgid(0, 0); + set<pg_t> s; + bool b; + + s.clear(); + b = pgid.is_split(1, 1, &s); + ASSERT_TRUE(!b); + + s.clear(); + b = pgid.is_split(2, 4, NULL); + ASSERT_TRUE(b); + b = pgid.is_split(2, 4, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(2, 0))); + + s.clear(); + b = pgid.is_split(2, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(3u, s.size()); + ASSERT_TRUE(s.count(pg_t(2, 0))); + ASSERT_TRUE(s.count(pg_t(4, 0))); + ASSERT_TRUE(s.count(pg_t(6, 0))); + + s.clear(); + b = pgid.is_split(3, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(4, 0))); + + s.clear(); + b = pgid.is_split(6, 8, NULL); + ASSERT_TRUE(!b); + b = pgid.is_split(6, 8, &s); + ASSERT_TRUE(!b); + ASSERT_EQ(0u, s.size()); + + pgid = pg_t(1, 0); + + s.clear(); + b = pgid.is_split(2, 4, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(3, 0))); + + s.clear(); + b = pgid.is_split(2, 6, &s); + ASSERT_TRUE(b); + ASSERT_EQ(2u, s.size()); + ASSERT_TRUE(s.count(pg_t(3, 0))); + ASSERT_TRUE(s.count(pg_t(5, 0))); + + s.clear(); + b = pgid.is_split(2, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(3u, s.size()); + ASSERT_TRUE(s.count(pg_t(3, 0))); + ASSERT_TRUE(s.count(pg_t(5, 0))); + ASSERT_TRUE(s.count(pg_t(7, 0))); + + s.clear(); + b = pgid.is_split(4, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(5, 0))); + + s.clear(); + b = pgid.is_split(3, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(3u, s.size()); + ASSERT_TRUE(s.count(pg_t(3, 0))); + ASSERT_TRUE(s.count(pg_t(5, 0))); + ASSERT_TRUE(s.count(pg_t(7, 0))); + + s.clear(); + b = pgid.is_split(6, 8, &s); + ASSERT_TRUE(!b); + ASSERT_EQ(0u, s.size()); + + pgid = pg_t(3, 0); + + s.clear(); + b = pgid.is_split(7, 8, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(7, 0))); + + s.clear(); + b = pgid.is_split(7, 12, &s); + ASSERT_TRUE(b); + ASSERT_EQ(2u, s.size()); + ASSERT_TRUE(s.count(pg_t(7, 0))); + ASSERT_TRUE(s.count(pg_t(11, 0))); + + s.clear(); + b = pgid.is_split(7, 11, &s); + ASSERT_TRUE(b); + ASSERT_EQ(1u, s.size()); + ASSERT_TRUE(s.count(pg_t(7, 0))); + +} + +TEST(pg_t, merge) +{ + pg_t pgid, parent; + bool b; + + pgid = pg_t(7, 0); + b = pgid.is_merge_source(8, 7, &parent); + ASSERT_TRUE(b); + ASSERT_EQ(parent, pg_t(3, 0)); + ASSERT_TRUE(parent.is_merge_target(8, 7)); + + b = pgid.is_merge_source(8, 5, &parent); + ASSERT_TRUE(b); + ASSERT_EQ(parent, pg_t(3, 0)); + ASSERT_TRUE(parent.is_merge_target(8, 5)); + + b = pgid.is_merge_source(8, 4, &parent); + ASSERT_TRUE(b); + ASSERT_EQ(parent, pg_t(3, 0)); + ASSERT_TRUE(parent.is_merge_target(8, 4)); + + b = pgid.is_merge_source(8, 3, &parent); + ASSERT_TRUE(b); + ASSERT_EQ(parent, pg_t(1, 0)); + ASSERT_TRUE(parent.is_merge_target(8, 4)); + + b = pgid.is_merge_source(9, 8, &parent); + ASSERT_FALSE(b); + ASSERT_FALSE(parent.is_merge_target(9, 8)); +} + +TEST(ObjectCleanRegions, mark_data_region_dirty) +{ + ObjectCleanRegions clean_regions; + uint64_t offset_1, len_1, offset_2, len_2; + offset_1 = 4096; + len_1 = 8192; + offset_2 = 40960; + len_2 = 4096; + + interval_set<uint64_t> expect_dirty_region; + EXPECT_EQ(expect_dirty_region, clean_regions.get_dirty_regions()); + expect_dirty_region.insert(offset_1, len_1); + expect_dirty_region.insert(offset_2, len_2); + + clean_regions.mark_data_region_dirty(offset_1, len_1); + clean_regions.mark_data_region_dirty(offset_2, len_2); + EXPECT_EQ(expect_dirty_region, clean_regions.get_dirty_regions()); +} + +TEST(ObjectCleanRegions, mark_omap_dirty) +{ + ObjectCleanRegions clean_regions; + + EXPECT_FALSE(clean_regions.omap_is_dirty()); + clean_regions.mark_omap_dirty(); + EXPECT_TRUE(clean_regions.omap_is_dirty()); +} + +TEST(ObjectCleanRegions, merge) +{ + ObjectCleanRegions cr1, cr2; + interval_set<uint64_t> cr1_expect; + interval_set<uint64_t> cr2_expect; + ASSERT_EQ(cr1_expect, cr1.get_dirty_regions()); + ASSERT_EQ(cr2_expect, cr2.get_dirty_regions()); + + cr1.mark_data_region_dirty(4096, 4096); + cr1_expect.insert(4096, 4096); + ASSERT_EQ(cr1_expect, cr1.get_dirty_regions()); + cr1.mark_data_region_dirty(12288, 8192); + cr1_expect.insert(12288, 8192); + ASSERT_TRUE(cr1_expect.subset_of(cr1.get_dirty_regions())); + cr1.mark_data_region_dirty(32768, 10240); + cr1_expect.insert(32768, 10240); + cr1_expect.erase(4096, 4096); + ASSERT_TRUE(cr1_expect.subset_of(cr1.get_dirty_regions())); + + cr2.mark_data_region_dirty(20480, 12288); + cr2_expect.insert(20480, 12288); + ASSERT_EQ(cr2_expect, cr2.get_dirty_regions()); + cr2.mark_data_region_dirty(102400, 4096); + cr2_expect.insert(102400, 4096); + cr2.mark_data_region_dirty(204800, 8192); + cr2_expect.insert(204800, 8192); + cr2.mark_data_region_dirty(409600, 4096); + cr2_expect.insert(409600, 4096); + ASSERT_TRUE(cr2_expect.subset_of(cr2.get_dirty_regions())); + + ASSERT_FALSE(cr2.omap_is_dirty()); + cr2.mark_omap_dirty(); + ASSERT_FALSE(cr1.omap_is_dirty()); + ASSERT_TRUE(cr2.omap_is_dirty()); + + cr1.merge(cr2); + cr1_expect.insert(204800, 8192); + ASSERT_TRUE(cr1_expect.subset_of(cr1.get_dirty_regions())); + ASSERT_TRUE(cr1.omap_is_dirty()); +} + +TEST(pg_missing_t, constructor) +{ + pg_missing_t missing; + EXPECT_EQ((unsigned int)0, missing.num_missing()); + EXPECT_FALSE(missing.have_missing()); +} + +TEST(pg_missing_t, have_missing) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.have_missing()); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.have_missing()); +} + +TEST(pg_missing_t, claim) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.have_missing()); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.have_missing()); + + pg_missing_t other; + EXPECT_FALSE(other.have_missing()); + + other.claim(std::move(missing)); + EXPECT_TRUE(other.have_missing()); +} + +TEST(pg_missing_t, is_missing) +{ + // pg_missing_t::is_missing(const hobject_t& oid) const + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + } + + // bool pg_missing_t::is_missing(const hobject_t& oid, eversion_t v) const + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + eversion_t need(10,5); + EXPECT_FALSE(missing.is_missing(oid, eversion_t())); + missing.add(oid, need, eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_FALSE(missing.is_missing(oid, eversion_t())); + EXPECT_TRUE(missing.is_missing(oid, need)); + } +} + +TEST(pg_missing_t, add_next_event) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + hobject_t oid_other(object_t("other"), "key", 9123, 9456, 0, ""); + eversion_t version(10,5); + eversion_t prior_version(3,4); + pg_log_entry_t sample_e(pg_log_entry_t::DELETE, oid, version, prior_version, + 0, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999), + utime_t(8,9), 0); + + // new object (MODIFY) + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + e.prior_version = eversion_t(); + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + + // adding the same object replaces the previous one + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // new object (CLONE) + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::CLONE; + e.prior_version = eversion_t(); + EXPECT_TRUE(e.is_clone()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_FALSE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + + // adding the same object replaces the previous one + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // existing object (MODIFY) + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + e.prior_version = eversion_t(); + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + + // adding the same object with a different version + e.prior_version = prior_version; + missing.add_next_event(e); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // object with prior version (MODIFY) + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(prior_version, missing.get_items().at(oid).have); + EXPECT_EQ(version, missing.get_items().at(oid).need); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // adding a DELETE matching an existing event + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + + e.op = pg_log_entry_t::DELETE; + EXPECT_TRUE(e.is_delete()); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_TRUE(missing.get_items().at(oid).is_delete()); + EXPECT_EQ(prior_version, missing.get_items().at(oid).have); + EXPECT_EQ(version, missing.get_items().at(oid).need); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } + + // adding a LOST_DELETE after an existing event + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::MODIFY; + EXPECT_TRUE(e.is_update()); + EXPECT_TRUE(e.object_is_indexed()); + EXPECT_TRUE(e.reqid_is_indexed()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_FALSE(missing.get_items().at(oid).is_delete()); + + e.op = pg_log_entry_t::LOST_DELETE; + e.version.version++; + EXPECT_TRUE(e.is_delete()); + missing.add_next_event(e); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_TRUE(missing.get_items().at(oid).is_delete()); + EXPECT_EQ(prior_version, missing.get_items().at(oid).have); + EXPECT_EQ(e.version, missing.get_items().at(oid).need); + EXPECT_EQ(oid, missing.get_rmissing().at(e.version.version)); + EXPECT_EQ(1U, missing.num_missing()); + EXPECT_EQ(1U, missing.get_rmissing().size()); + } +} + +TEST(pg_missing_t, revise_need) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + // create a new entry + EXPECT_FALSE(missing.is_missing(oid)); + eversion_t need(10,10); + missing.revise_need(oid, need, false); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(eversion_t(), missing.get_items().at(oid).have); + EXPECT_EQ(need, missing.get_items().at(oid).need); + // update an existing entry and preserve have + eversion_t have(1,1); + missing.revise_have(oid, have); + eversion_t new_need(10,12); + EXPECT_EQ(have, missing.get_items().at(oid).have); + missing.revise_need(oid, new_need, false); + EXPECT_EQ(have, missing.get_items().at(oid).have); + EXPECT_EQ(new_need, missing.get_items().at(oid).need); +} + +TEST(pg_missing_t, revise_have) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + // a non existing entry means noop + EXPECT_FALSE(missing.is_missing(oid)); + eversion_t have(1,1); + missing.revise_have(oid, have); + EXPECT_FALSE(missing.is_missing(oid)); + // update an existing entry + eversion_t need(10,12); + missing.add(oid, need, have, false); + EXPECT_TRUE(missing.is_missing(oid)); + eversion_t new_have(2,2); + EXPECT_EQ(have, missing.get_items().at(oid).have); + missing.revise_have(oid, new_have); + EXPECT_EQ(new_have, missing.get_items().at(oid).have); + EXPECT_EQ(need, missing.get_items().at(oid).need); +} + +TEST(pg_missing_t, add) +{ + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + eversion_t have(1,1); + eversion_t need(10,10); + missing.add(oid, need, have, false); + EXPECT_TRUE(missing.is_missing(oid)); + EXPECT_EQ(have, missing.get_items().at(oid).have); + EXPECT_EQ(need, missing.get_items().at(oid).need); +} + +TEST(pg_missing_t, rm) +{ + // void pg_missing_t::rm(const hobject_t& oid, eversion_t v) + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + epoch_t epoch = 10; + eversion_t need(epoch,10); + missing.add(oid, need, eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + // rm of an older version is a noop + missing.rm(oid, eversion_t(epoch / 2,20)); + EXPECT_TRUE(missing.is_missing(oid)); + // rm of a later version removes the object + missing.rm(oid, eversion_t(epoch * 2,20)); + EXPECT_FALSE(missing.is_missing(oid)); + } + // void pg_missing_t::rm(const std::map<hobject_t, pg_missing_item>::iterator &m) + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + auto m = missing.get_items().find(oid); + missing.rm(m); + EXPECT_FALSE(missing.is_missing(oid)); + } +} + +TEST(pg_missing_t, got) +{ + // void pg_missing_t::got(const hobject_t& oid, eversion_t v) + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + // assert if the oid does not exist + { + PrCtl unset_dumpable; + EXPECT_DEATH(missing.got(oid, eversion_t()), ""); + } + EXPECT_FALSE(missing.is_missing(oid)); + epoch_t epoch = 10; + eversion_t need(epoch,10); + missing.add(oid, need, eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + // assert if that the version to be removed is lower than the version of the object + { + PrCtl unset_dumpable; + EXPECT_DEATH(missing.got(oid, eversion_t(epoch / 2,20)), ""); + } + // remove of a later version removes the object + missing.got(oid, eversion_t(epoch * 2,20)); + EXPECT_FALSE(missing.is_missing(oid)); + } + // void pg_missing_t::got(const std::map<hobject_t, pg_missing_item>::iterator &m) + { + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + pg_missing_t missing; + EXPECT_FALSE(missing.is_missing(oid)); + missing.add(oid, eversion_t(), eversion_t(), false); + EXPECT_TRUE(missing.is_missing(oid)); + auto m = missing.get_items().find(oid); + missing.got(m); + EXPECT_FALSE(missing.is_missing(oid)); + } +} + +TEST(pg_missing_t, split_into) +{ + uint32_t hash1 = 1; + hobject_t oid1(object_t("objname"), "key1", 123, hash1, 0, ""); + uint32_t hash2 = 2; + hobject_t oid2(object_t("objname"), "key2", 123, hash2, 0, ""); + pg_missing_t missing; + missing.add(oid1, eversion_t(), eversion_t(), false); + missing.add(oid2, eversion_t(), eversion_t(), false); + pg_t child_pgid; + child_pgid.m_seed = 1; + pg_missing_t child; + unsigned split_bits = 1; + missing.split_into(child_pgid, split_bits, &child); + EXPECT_TRUE(child.is_missing(oid1)); + EXPECT_FALSE(child.is_missing(oid2)); + EXPECT_FALSE(missing.is_missing(oid1)); + EXPECT_TRUE(missing.is_missing(oid2)); +} + +TEST(pg_pool_t_test, get_pg_num_divisor) { + pg_pool_t p; + p.set_pg_num(16); + p.set_pgp_num(16); + + for (int i = 0; i < 16; ++i) + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(i, 1))); + + p.set_pg_num(12); + p.set_pgp_num(12); + + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(0, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(1, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(2, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(3, 1))); + ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(4, 1))); + ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(5, 1))); + ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(6, 1))); + ASSERT_EQ(8u, p.get_pg_num_divisor(pg_t(7, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(8, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(9, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(10, 1))); + ASSERT_EQ(16u, p.get_pg_num_divisor(pg_t(11, 1))); +} + +TEST(pg_pool_t_test, get_random_pg_position) { + srand(getpid()); + for (int i = 0; i < 100; ++i) { + pg_pool_t p; + p.set_pg_num(1 + (rand() % 1000)); + p.set_pgp_num(p.get_pg_num()); + pg_t pgid(rand() % p.get_pg_num(), 1); + uint32_t h = p.get_random_pg_position(pgid, rand()); + uint32_t ps = p.raw_hash_to_pg(h); + cout << p.get_pg_num() << " " << pgid << ": " + << h << " -> " << pg_t(ps, 1) << std::endl; + ASSERT_EQ(pgid.ps(), ps); + } +} + +TEST(shard_id_t, iostream) { + set<shard_id_t> shards; + shards.insert(shard_id_t(0)); + shards.insert(shard_id_t(1)); + shards.insert(shard_id_t(2)); + ostringstream out; + out << shards; + ASSERT_EQ(out.str(), "0,1,2"); + + shard_id_t noshard = shard_id_t::NO_SHARD; + shard_id_t zero(0); + ASSERT_GT(zero, noshard); +} + +TEST(spg_t, parse) { + spg_t a(pg_t(1,2), shard_id_t::NO_SHARD); + spg_t aa, bb; + spg_t b(pg_t(3,2), shard_id_t(2)); + std::string s = stringify(a); + ASSERT_TRUE(aa.parse(s.c_str())); + ASSERT_EQ(a, aa); + + s = stringify(b); + ASSERT_TRUE(bb.parse(s.c_str())); + ASSERT_EQ(b, bb); +} + +TEST(coll_t, parse) { + const char *ok[] = { + "meta", + "1.2_head", + "1.2_TEMP", + "1.2s3_head", + "1.3s2_TEMP", + "1.2s0_head", + 0 + }; + const char *bad[] = { + "foo", + "1.2_food", + "1.2_head ", + //" 1.2_head", // hrm, this parses, which is not ideal.. pg_t's fault? + "1.2_temp", + "1.2_HEAD", + "1.xS3_HEAD", + "1.2s_HEAD", + "1.2sfoo_HEAD", + 0 + }; + coll_t a; + for (int i = 0; ok[i]; ++i) { + cout << "check ok " << ok[i] << std::endl; + ASSERT_TRUE(a.parse(ok[i])); + ASSERT_EQ(string(ok[i]), a.to_str()); + } + for (int i = 0; bad[i]; ++i) { + cout << "check bad " << bad[i] << std::endl; + ASSERT_FALSE(a.parse(bad[i])); + } +} + +TEST(coll_t, temp) { + spg_t pgid; + coll_t foo(pgid); + ASSERT_EQ(foo.to_str(), string("0.0_head")); + + coll_t temp = foo.get_temp(); + ASSERT_EQ(temp.to_str(), string("0.0_TEMP")); + + spg_t pgid2; + ASSERT_TRUE(temp.is_temp()); + ASSERT_TRUE(temp.is_temp(&pgid2)); + ASSERT_EQ(pgid, pgid2); +} + +TEST(coll_t, assigment) { + spg_t pgid; + coll_t right(pgid); + ASSERT_EQ(right.to_str(), string("0.0_head")); + + coll_t left, middle; + + ASSERT_EQ(left.to_str(), string("meta")); + ASSERT_EQ(middle.to_str(), string("meta")); + + left = middle = right; + + ASSERT_EQ(left.to_str(), string("0.0_head")); + ASSERT_EQ(middle.to_str(), string("0.0_head")); + + ASSERT_NE(middle.c_str(), right.c_str()); + ASSERT_NE(left.c_str(), middle.c_str()); +} + +TEST(hobject_t, parse) { + const char *v[] = { + "MIN", + "MAX", + "-1:60c2fa6d:::inc_osdmap.1:0", + "-1:60c2fa6d:::inc_osdmap.1:333", + "0:00000000::::head", + "1:00000000:nspace:key:obj:head", + "-40:00000000:nspace::obj:head", + "20:00000000::key:obj:head", + "20:00000000:::o%fdj:head", + "20:00000000:::o%02fdj:head", + "20:00000000:::_zero_%00_:head", + NULL + }; + + for (unsigned i=0; v[i]; ++i) { + hobject_t o; + bool b = o.parse(v[i]); + if (!b) { + cout << "failed to parse " << v[i] << std::endl; + ASSERT_TRUE(false); + } + string s = stringify(o); + if (s != v[i]) { + cout << v[i] << " -> " << o << " -> " << s << std::endl; + ASSERT_EQ(s, string(v[i])); + } + } +} + +TEST(ghobject_t, cmp) { + ghobject_t min; + ghobject_t sep; + sep.set_shard(shard_id_t(1)); + sep.hobj.pool = -1; + cout << min << " < " << sep << std::endl; + ASSERT_TRUE(min < sep); + + sep.set_shard(shard_id_t::NO_SHARD); + cout << "sep shard " << sep.shard_id << std::endl; + ghobject_t o(hobject_t(object_t(), string(), CEPH_NOSNAP, 0x42, + 1, string())); + cout << "o " << o << std::endl; + ASSERT_TRUE(o > sep); +} + +TEST(ghobject_t, parse) { + const char *v[] = { + "GHMIN", + "GHMAX", + "13#0:00000000::::head#", + "13#0:00000000::::head#deadbeef", + "#-1:60c2fa6d:::inc_osdmap.1:333#deadbeef", + "#-1:60c2fa6d:::inc%02osdmap.1:333#deadbeef", + "#-1:60c2fa6d:::inc_osdmap.1:333#", + "1#MIN#deadbeefff", + "1#MAX#", + "#MAX#123", + "#-40:00000000:nspace::obj:head#", + NULL + }; + + for (unsigned i=0; v[i]; ++i) { + ghobject_t o; + bool b = o.parse(v[i]); + if (!b) { + cout << "failed to parse " << v[i] << std::endl; + ASSERT_TRUE(false); + } + string s = stringify(o); + if (s != v[i]) { + cout << v[i] << " -> " << o << " -> " << s << std::endl; + ASSERT_EQ(s, string(v[i])); + } + } +} + +TEST(pool_opts_t, invalid_opt) { + EXPECT_FALSE(pool_opts_t::is_opt_name("INVALID_OPT")); + PrCtl unset_dumpable; + EXPECT_DEATH(pool_opts_t::get_opt_desc("INVALID_OPT"), ""); +} + +TEST(pool_opts_t, scrub_min_interval) { + EXPECT_TRUE(pool_opts_t::is_opt_name("scrub_min_interval")); + EXPECT_EQ(pool_opts_t::get_opt_desc("scrub_min_interval"), + pool_opts_t::opt_desc_t(pool_opts_t::SCRUB_MIN_INTERVAL, + pool_opts_t::DOUBLE)); + + pool_opts_t opts; + EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MIN_INTERVAL)); + { + PrCtl unset_dumpable; + EXPECT_DEATH(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL), ""); + } + double val; + EXPECT_FALSE(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL, &val)); + opts.set(pool_opts_t::SCRUB_MIN_INTERVAL, static_cast<double>(2015)); + EXPECT_TRUE(opts.get(pool_opts_t::SCRUB_MIN_INTERVAL, &val)); + EXPECT_EQ(val, 2015); + opts.unset(pool_opts_t::SCRUB_MIN_INTERVAL); + EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MIN_INTERVAL)); +} + +TEST(pool_opts_t, scrub_max_interval) { + EXPECT_TRUE(pool_opts_t::is_opt_name("scrub_max_interval")); + EXPECT_EQ(pool_opts_t::get_opt_desc("scrub_max_interval"), + pool_opts_t::opt_desc_t(pool_opts_t::SCRUB_MAX_INTERVAL, + pool_opts_t::DOUBLE)); + + pool_opts_t opts; + EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MAX_INTERVAL)); + { + PrCtl unset_dumpable; + EXPECT_DEATH(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL), ""); + } + double val; + EXPECT_FALSE(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &val)); + opts.set(pool_opts_t::SCRUB_MAX_INTERVAL, static_cast<double>(2015)); + EXPECT_TRUE(opts.get(pool_opts_t::SCRUB_MAX_INTERVAL, &val)); + EXPECT_EQ(val, 2015); + opts.unset(pool_opts_t::SCRUB_MAX_INTERVAL); + EXPECT_FALSE(opts.is_set(pool_opts_t::SCRUB_MAX_INTERVAL)); +} + +TEST(pool_opts_t, deep_scrub_interval) { + EXPECT_TRUE(pool_opts_t::is_opt_name("deep_scrub_interval")); + EXPECT_EQ(pool_opts_t::get_opt_desc("deep_scrub_interval"), + pool_opts_t::opt_desc_t(pool_opts_t::DEEP_SCRUB_INTERVAL, + pool_opts_t::DOUBLE)); + + pool_opts_t opts; + EXPECT_FALSE(opts.is_set(pool_opts_t::DEEP_SCRUB_INTERVAL)); + { + PrCtl unset_dumpable; + EXPECT_DEATH(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL), ""); + } + double val; + EXPECT_FALSE(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &val)); + opts.set(pool_opts_t::DEEP_SCRUB_INTERVAL, static_cast<double>(2015)); + EXPECT_TRUE(opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &val)); + EXPECT_EQ(val, 2015); + opts.unset(pool_opts_t::DEEP_SCRUB_INTERVAL); + EXPECT_FALSE(opts.is_set(pool_opts_t::DEEP_SCRUB_INTERVAL)); +} + +struct RequiredPredicate : IsPGRecoverablePredicate { + unsigned required_size; + explicit RequiredPredicate(unsigned required_size) : required_size(required_size) {} + bool operator()(const set<pg_shard_t> &have) const override { + return have.size() >= required_size; + } +}; + +using namespace std; +struct MapPredicate { + map<int, pair<PastIntervals::osd_state_t, epoch_t>> states; + explicit MapPredicate( + const vector<pair<int, pair<PastIntervals::osd_state_t, epoch_t>>> &_states) + : states(_states.begin(), _states.end()) {} + PastIntervals::osd_state_t operator()(epoch_t start, int osd, epoch_t *lost_at) { + auto val = states.at(osd); + if (lost_at) + *lost_at = val.second; + return val.first; + } +}; + +using sit = shard_id_t; +using PI = PastIntervals; +using pst = pg_shard_t; +using ival = PastIntervals::pg_interval_t; +using ivallst = std::list<ival>; +const int N = 0x7fffffff /* CRUSH_ITEM_NONE, can't import crush.h here */; + +struct PITest : ::testing::Test { + PITest() {} + void run( + bool ec_pool, + ivallst intervals, + epoch_t last_epoch_started, + unsigned min_to_peer, + vector<pair<int, pair<PastIntervals::osd_state_t, epoch_t>>> osd_states, + vector<int> up, + vector<int> acting, + set<pg_shard_t> probe, + set<int> down, + map<int, epoch_t> blocked_by, + bool pg_down) { + RequiredPredicate rec_pred(min_to_peer); + MapPredicate map_pred(osd_states); + + PI::PriorSet correct( + ec_pool, + probe, + down, + blocked_by, + pg_down, + new RequiredPredicate(rec_pred)); + + PastIntervals compact; + for (auto &&i: intervals) { + compact.add_interval(ec_pool, i); + } + PI::PriorSet compact_ps = compact.get_prior_set( + ec_pool, + last_epoch_started, + new RequiredPredicate(rec_pred), + map_pred, + up, + acting, + nullptr); + ASSERT_EQ(correct, compact_ps); + } +}; + +TEST_F(PITest, past_intervals_rep) { + run( + /* ec_pool */ false, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1} + , ival{{ 2}, { 2}, 31, 35, false, 2, 2} + , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 1, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::DOWN , 0)) + }, + /* acting */ {0, 1 }, + /* up */ {0, 1 }, + /* probe */ {pst(0), pst(1)}, + /* down */ {2}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_ec) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1} + }, + /* les */ 5, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::DOWN , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::UP , 0)) + }, + /* acting */ {N, 1, 2}, + /* up */ {N, 1, 2}, + /* probe */ {pst(1, sit(1)), pst(2, sit(2))}, + /* down */ {0}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_rep_down) { + run( + /* ec_pool */ false, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1} + , ival{{ 2}, { 2}, 31, 35, true, 2, 2} + , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 1, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::DOWN , 0)) + }, + /* acting */ {0, 1 }, + /* up */ {0, 1 }, + /* probe */ {pst(0), pst(1)}, + /* down */ {2}, + /* blocked_by */ {{2, 0}}, + /* pg_down */ true); +} + +TEST_F(PITest, past_intervals_ec_down) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1} + , ival{{N, N, 2}, {N, N, 2}, 31, 35, false, 2, 2} + }, + /* les */ 5, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::DOWN , 0)) + , make_pair(2, make_pair(PI::UP , 0)) + }, + /* acting */ {0, N, 2}, + /* up */ {0, N, 2}, + /* probe */ {pst(0, sit(0)), pst(2, sit(2))}, + /* down */ {1}, + /* blocked_by */ {{1, 0}}, + /* pg_down */ true); +} + +TEST_F(PITest, past_intervals_rep_no_subsets) { + run( + /* ec_pool */ false, + /* intervals */ + { ival{{0, 2}, {0, 2}, 10, 20, true, 0, 0} + , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1} + , ival{{0, 1 }, {0, 1 }, 31, 35, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 1, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::DOWN , 0)) + }, + /* acting */ {0, 1 }, + /* up */ {0, 1 }, + /* probe */ {pst(0), pst(1)}, + /* down */ {2}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_ec_no_subsets) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{0, N, 2}, {0, N, 2}, 10, 20, true, 0, 0} + , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1} + , ival{{0, 1, N}, {0, 1, N}, 31, 35, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::DOWN , 0)) + , make_pair(2, make_pair(PI::UP , 0)) + }, + /* acting */ {0, N, 2}, + /* up */ {0, N, 2}, + /* probe */ {pst(0, sit(0)), pst(2, sit(2))}, + /* down */ {1}, + /* blocked_by */ {{1, 0}}, + /* pg_down */ true); +} + +TEST_F(PITest, past_intervals_ec_no_subsets2) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{N, 1, 2}, {N, 1, 2}, 10, 20, true, 0, 0} + , ival{{0, N, 2}, {0, N, 2}, 21, 30, true, 1, 1} + , ival{{0, 3, N}, {0, 3, N}, 31, 35, true, 0, 0} + }, + /* les */ 31, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::DOWN , 0)) + , make_pair(2, make_pair(PI::UP , 0)) + , make_pair(3, make_pair(PI::UP , 0)) + }, + /* acting */ {0, N, 2}, + /* up */ {0, N, 2}, + /* probe */ {pst(0, sit(0)), pst(2, sit(2)), pst(3, sit(1))}, + /* down */ {1}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_rep_lost) { + run( + /* ec_pool */ false, + /* intervals */ + { ival{{0, 1, 2}, {0, 1, 2}, 10, 20, true, 0, 0} + , ival{{ 1, 2}, { 1, 2}, 21, 30, true, 1, 1} + , ival{{ 2}, { 2}, 31, 35, true, 2, 2} + , ival{{0, 2}, {0, 2}, 36, 50, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 1, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::UP , 0)) + , make_pair(2, make_pair(PI::LOST , 55)) + }, + /* acting */ {0, 1 }, + /* up */ {0, 1 }, + /* probe */ {pst(0), pst(1)}, + /* down */ {2}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +TEST_F(PITest, past_intervals_ec_lost) { + run( + /* ec_pool */ true, + /* intervals */ + { ival{{0, N, 2}, {0, N, 2}, 10, 20, true, 0, 0} + , ival{{N, 1, 2}, {N, 1, 2}, 21, 30, true, 1, 1} + , ival{{0, 1, N}, {0, 1, N}, 31, 35, true, 0, 0} + }, + /* les */ 5, + /* min_peer */ 2, + /* osd states at end */ + { make_pair(0, make_pair(PI::UP , 0)) + , make_pair(1, make_pair(PI::LOST , 36)) + , make_pair(2, make_pair(PI::UP , 0)) + }, + /* acting */ {0, N, 2}, + /* up */ {0, N, 2}, + /* probe */ {pst(0, sit(0)), pst(2, sit(2))}, + /* down */ {1}, + /* blocked_by */ {}, + /* pg_down */ false); +} + +void ci_ref_test( + object_manifest_t l, + object_manifest_t to_remove, + object_manifest_t g, + object_ref_delta_t expected_delta) +{ + { + object_ref_delta_t delta; + to_remove.calc_refs_to_drop_on_removal( + &l, + &g, + delta); + ASSERT_EQ( + expected_delta, + delta); + } + + // calc_refs_to_drop specifically handles nullptr identically to empty + // chunk_map + if (l.chunk_map.empty() || g.chunk_map.empty()) { + object_ref_delta_t delta; + to_remove.calc_refs_to_drop_on_removal( + l.chunk_map.empty() ? nullptr : &l, + g.chunk_map.empty() ? nullptr : &g, + delta); + ASSERT_EQ( + expected_delta, + delta); + } +} + +void ci_ref_test_on_modify( + object_manifest_t l, + object_manifest_t to_remove, + ObjectCleanRegions clean_regions, + object_ref_delta_t expected_delta) +{ + { + object_ref_delta_t delta; + to_remove.calc_refs_to_drop_on_modify( + &l, + clean_regions, + delta); + ASSERT_EQ( + expected_delta, + delta); + } +} + +void ci_ref_test_inc_on_set( + object_manifest_t l, + object_manifest_t added_set, + object_manifest_t g, + object_ref_delta_t expected_delta) +{ + { + object_ref_delta_t delta; + added_set.calc_refs_to_inc_on_set( + &l, + &g, + delta); + ASSERT_EQ( + expected_delta, + delta); + } +} + +hobject_t mk_hobject(string name) +{ + return hobject_t( + std::move(name), + string(), + CEPH_NOSNAP, + 0x42, + 1, + string()); +} + +object_manifest_t mk_manifest( + std::map<uint64_t, std::tuple<uint64_t, uint64_t, string>> m) +{ + object_manifest_t ret; + ret.type = object_manifest_t::TYPE_CHUNKED; + for (auto &[offset, tgt] : m) { + auto &[tgt_off, length, name] = tgt; + auto &ci = ret.chunk_map[offset]; + ci.offset = tgt_off; + ci.length = length; + ci.oid = mk_hobject(name); + } + return ret; +} + +object_ref_delta_t mk_delta(std::map<string, int> _m) { + std::map<hobject_t, int> m; + for (auto &[name, delta] : _m) { + m.insert( + std::make_pair( + mk_hobject(name), + delta)); + } + return object_ref_delta_t(std::move(m)); +} + +TEST(chunk_info_test, calc_refs_to_drop) { + ci_ref_test( + mk_manifest({}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({}), + mk_delta({{"foo", -1}})); + +} + + +TEST(chunk_info_test, calc_refs_to_drop_match) { + ci_ref_test( + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_delta({})); + +} + +TEST(chunk_info_test, calc_refs_to_drop_head_match) { + ci_ref_test( + mk_manifest({}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_delta({})); + +} + +TEST(chunk_info_test, calc_refs_to_drop_tail_match) { + ci_ref_test( + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({}), + mk_delta({})); + +} + +TEST(chunk_info_test, calc_refs_to_drop_second_reference) { + ci_ref_test( + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}, {4<<10, {0, 1<<10, "foo"}}}), + mk_manifest({}), + mk_delta({{"foo", -1}})); + +} + +TEST(chunk_info_test, calc_refs_offsets_dont_match) { + ci_ref_test( + mk_manifest({{0, {0, 1024, "foo"}}}), + mk_manifest({{512, {0, 1024, "foo"}}, {(4<<10) + 512, {0, 1<<10, "foo"}}}), + mk_manifest({}), + mk_delta({{"foo", -2}})); + +} + +TEST(chunk_info_test, calc_refs_g_l_match) { + ci_ref_test( + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "foo"}}, {4096, {0, 1024, "bar"}}}), + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_delta({{"foo", -2}, {"bar", -1}})); + +} + +TEST(chunk_info_test, calc_refs_g_l_match_no_this) { + ci_ref_test( + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "bar"}}}), + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_delta({{"foo", -1}, {"bar", -1}})); + +} + +TEST(chunk_info_test, calc_refs_modify_mismatch) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 1024); + clean_regions.mark_data_region_dirty(512, 1024); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{0, {0, 1024, "bar"}}, {512, {2048, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}, {"ttt", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 1024); + clean_regions.mark_data_region_dirty(512, 1024); + clean_regions.mark_data_region_dirty(4096, 1024); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + mk_manifest({{0, {0, 1024, "bar"}}, {512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match_dirty_overlap) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 256); + clean_regions.mark_data_region_dirty(256, 4096); + ci_ref_test_on_modify( + mk_manifest({}), + mk_manifest({{0, {0, 256, "bar"}}, {512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}, {"foo", -1}, {"ttt", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match_dirty_overlap2) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 256); + clean_regions.mark_data_region_dirty(256, 1024); + clean_regions.mark_data_region_dirty(3584, 1024); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + mk_manifest({{0, {0, 256, "bar"}}, {512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match_dirty_overlap3) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 256); + clean_regions.mark_data_region_dirty(256, 4096); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + mk_manifest({{0, {0, 256, "bar"}}, {512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_match_clone_overlap) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 256); + clean_regions.mark_data_region_dirty(256, 1024); + clean_regions.mark_data_region_dirty(3584, 1024); + ci_ref_test_on_modify( + mk_manifest({{512, {2048, 1024, "foo"}}, {4096, {0, 1024, "ttt"}}}), + mk_manifest({{0, {0, 256, "bar"}}, {256, {2048, 1024, "foo"}}, {3584, {0, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}, {"foo", -1}, {"ttt", -1}})); +} + +TEST(chunk_info_test, calc_refs_modify_no_snap) { + ObjectCleanRegions clean_regions(0, 8192, false); + clean_regions.mark_data_region_dirty(0, 1024); + clean_regions.mark_data_region_dirty(512, 1024); + ci_ref_test_on_modify( + mk_manifest({}), + mk_manifest({{0, {0, 1024, "bar"}}, {512, {2048, 1024, "ttt"}}}), + clean_regions, + mk_delta({{"bar", -1}, {"ttt", -1}})); +} + +TEST(chunk_info_test, calc_refs_inc) { + ci_ref_test_inc_on_set( + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{1024, {0, 1024, "bar"}}}), + mk_manifest({{4096, {0, 1024, "foo"}}}), + mk_delta({{"bar", 1}})); +} + +TEST(chunk_info_test, calc_refs_inc2) { + ci_ref_test_inc_on_set( + mk_manifest({{512, {0, 1024, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{1024, {0, 1024, "bar"}}, {4096, {0, 1024, "bbb"}}}), + mk_manifest({{512, {0, 1024, "foo"}}}), + mk_delta({{"bar", 1}, {"bbb", 1}})); +} + +TEST(chunk_info_test, calc_refs_inc_no_l) { + ci_ref_test_inc_on_set( + mk_manifest({}), + mk_manifest({{1024, {0, 1024, "bar"}}, {4096, {0, 1024, "bbb"}}}), + mk_manifest({{512, {0, 1024, "foo"}}}), + mk_delta({{"bar", 1}, {"bbb", 1}})); +} + +TEST(chunk_info_test, calc_refs_inc_no_g) { + ci_ref_test_inc_on_set( + mk_manifest({{512, {0, 1024, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{1024, {0, 1024, "bar"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({}), + mk_delta({{"bar", 1}})); +} + +TEST(chunk_info_test, calc_refs_inc_match_g_l) { + ci_ref_test_inc_on_set( + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_delta({{"aaa", -1}, {"foo", -1}})); +} + +TEST(chunk_info_test, calc_refs_inc_match) { + ci_ref_test_inc_on_set( + mk_manifest({{256, {0, 256, "bbb"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "foo"}}}), + mk_manifest({{256, {0, 256, "aaa"}}, {4096, {0, 1024, "ccc"}}}), + mk_delta({})); +} + +/* + * Local Variables: + * compile-command: "cd ../.. ; + * make unittest_osd_types ; + * ./unittest_osd_types # --gtest_filter=pg_missing_t.constructor + * " + * End: + */ diff --git a/src/test/osdc/CMakeLists.txt b/src/test/osdc/CMakeLists.txt new file mode 100644 index 000000000..297c2672c --- /dev/null +++ b/src/test/osdc/CMakeLists.txt @@ -0,0 +1,13 @@ +add_executable(ceph_test_objectcacher_stress + object_cacher_stress.cc + FakeWriteback.cc + MemWriteback.cc + ) +target_link_libraries(ceph_test_objectcacher_stress + osdc + global + ${EXTRALIBS} + ${CMAKE_DL_LIBS} + ) +install(TARGETS ceph_test_objectcacher_stress + DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/test/osdc/FakeWriteback.cc b/src/test/osdc/FakeWriteback.cc new file mode 100644 index 000000000..2f58965cc --- /dev/null +++ b/src/test/osdc/FakeWriteback.cc @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <errno.h> +#include <time.h> + +#include <thread> +#include "common/debug.h" +#include "common/Cond.h" +#include "common/Finisher.h" +#include "common/ceph_mutex.h" +#include "include/ceph_assert.h" +#include "common/ceph_time.h" + +#include "FakeWriteback.h" + +#define dout_subsys ceph_subsys_objectcacher +#undef dout_prefix +#define dout_prefix *_dout << "FakeWriteback(" << this << ") " + +class C_Delay : public Context { + CephContext *m_cct; + Context *m_con; + ceph::timespan m_delay; + ceph::mutex *m_lock; + bufferlist *m_bl; + uint64_t m_off; + +public: + C_Delay(CephContext *cct, Context *c, ceph::mutex *lock, uint64_t off, + bufferlist *pbl, uint64_t delay_ns=0) + : m_cct(cct), m_con(c), m_delay(delay_ns * std::chrono::nanoseconds(1)), + m_lock(lock), m_bl(pbl), m_off(off) {} + void finish(int r) override { + std::this_thread::sleep_for(m_delay); + if (m_bl) { + buffer::ptr bp(r); + bp.zero(); + m_bl->append(bp); + ldout(m_cct, 20) << "finished read " << m_off << "~" << r << dendl; + } + std::lock_guard locker{*m_lock}; + m_con->complete(r); + } +}; + +FakeWriteback::FakeWriteback(CephContext *cct, ceph::mutex *lock, uint64_t delay_ns) + : m_cct(cct), m_lock(lock), m_delay_ns(delay_ns) +{ + m_finisher = new Finisher(cct); + m_finisher->start(); +} + +FakeWriteback::~FakeWriteback() +{ + m_finisher->stop(); + delete m_finisher; +} + +void FakeWriteback::read(const object_t& oid, uint64_t object_no, + const object_locator_t& oloc, + uint64_t off, uint64_t len, snapid_t snapid, + bufferlist *pbl, uint64_t trunc_size, + __u32 trunc_seq, int op_flags, + const ZTracer::Trace &parent_trace, + Context *onfinish) +{ + C_Delay *wrapper = new C_Delay(m_cct, onfinish, m_lock, off, pbl, + m_delay_ns); + m_finisher->queue(wrapper, len); +} + +ceph_tid_t FakeWriteback::write(const object_t& oid, + const object_locator_t& oloc, + uint64_t off, uint64_t len, + const SnapContext& snapc, + const bufferlist &bl, ceph::real_time mtime, + uint64_t trunc_size, __u32 trunc_seq, + ceph_tid_t journal_tid, + const ZTracer::Trace &parent_trace, + Context *oncommit) +{ + C_Delay *wrapper = new C_Delay(m_cct, oncommit, m_lock, off, NULL, + m_delay_ns); + m_finisher->queue(wrapper, 0); + return ++m_tid; +} + +bool FakeWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t, + snapid_t) +{ + return false; +} diff --git a/src/test/osdc/FakeWriteback.h b/src/test/osdc/FakeWriteback.h new file mode 100644 index 000000000..11f78e813 --- /dev/null +++ b/src/test/osdc/FakeWriteback.h @@ -0,0 +1,47 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_TEST_OSDC_FAKEWRITEBACK_H +#define CEPH_TEST_OSDC_FAKEWRITEBACK_H + +#include "include/Context.h" +#include "include/types.h" +#include "osd/osd_types.h" +#include "osdc/WritebackHandler.h" + +#include <atomic> + +class Finisher; + +class FakeWriteback : public WritebackHandler { +public: + FakeWriteback(CephContext *cct, ceph::mutex *lock, uint64_t delay_ns); + ~FakeWriteback() override; + + void read(const object_t& oid, uint64_t object_no, + const object_locator_t& oloc, uint64_t off, uint64_t len, + snapid_t snapid, bufferlist *pbl, uint64_t trunc_size, + __u32 trunc_seq, int op_flags, + const ZTracer::Trace &parent_trace, + Context *onfinish) override; + + ceph_tid_t write(const object_t& oid, const object_locator_t& oloc, + uint64_t off, uint64_t len, + const SnapContext& snapc, const bufferlist &bl, + ceph::real_time mtime, uint64_t trunc_size, + __u32 trunc_seq, ceph_tid_t journal_tid, + const ZTracer::Trace &parent_trace, + Context *oncommit) override; + + using WritebackHandler::write; + + bool may_copy_on_write(const object_t&, uint64_t, uint64_t, + snapid_t) override; +private: + CephContext *m_cct; + ceph::mutex *m_lock; + uint64_t m_delay_ns; + std::atomic<unsigned> m_tid = { 0 }; + Finisher *m_finisher; +}; + +#endif diff --git a/src/test/osdc/MemWriteback.cc b/src/test/osdc/MemWriteback.cc new file mode 100644 index 000000000..4cb11291a --- /dev/null +++ b/src/test/osdc/MemWriteback.cc @@ -0,0 +1,166 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <errno.h> +#include <time.h> + +#include <thread> +#include "common/debug.h" +#include "common/Cond.h" +#include "common/Finisher.h" +#include "common/ceph_mutex.h" +#include "include/ceph_assert.h" +#include "common/ceph_time.h" + +#include "MemWriteback.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_objectcacher +#undef dout_prefix +#define dout_prefix *_dout << "MemWriteback(" << this << ") " + +class C_DelayRead : public Context { + MemWriteback *wb; + CephContext *m_cct; + Context *m_con; + ceph::timespan m_delay; + ceph::mutex *m_lock; + object_t m_oid; + uint64_t m_off; + uint64_t m_len; + bufferlist *m_bl; + +public: + C_DelayRead(MemWriteback *mwb, CephContext *cct, Context *c, ceph::mutex *lock, + const object_t& oid, uint64_t off, uint64_t len, bufferlist *pbl, + uint64_t delay_ns=0) + : wb(mwb), m_cct(cct), m_con(c), + m_delay(delay_ns * std::chrono::nanoseconds(1)), + m_lock(lock), m_oid(oid), m_off(off), m_len(len), m_bl(pbl) {} + void finish(int r) override { + std::this_thread::sleep_for(m_delay); + std::lock_guard locker{*m_lock}; + r = wb->read_object_data(m_oid, m_off, m_len, m_bl); + if (m_con) + m_con->complete(r); + } +}; + +class C_DelayWrite : public Context { + MemWriteback *wb; + CephContext *m_cct; + Context *m_con; + ceph::timespan m_delay; + ceph::mutex *m_lock; + object_t m_oid; + uint64_t m_off; + uint64_t m_len; + const bufferlist& m_bl; + +public: + C_DelayWrite(MemWriteback *mwb, CephContext *cct, Context *c, ceph::mutex *lock, + const object_t& oid, uint64_t off, uint64_t len, + const bufferlist& bl, uint64_t delay_ns=0) + : wb(mwb), m_cct(cct), m_con(c), + m_delay(delay_ns * std::chrono::nanoseconds(1)), + m_lock(lock), m_oid(oid), m_off(off), m_len(len), m_bl(bl) {} + void finish(int r) override { + std::this_thread::sleep_for(m_delay); + std::lock_guard locker{*m_lock}; + wb->write_object_data(m_oid, m_off, m_len, m_bl); + if (m_con) + m_con->complete(r); + } +}; + +MemWriteback::MemWriteback(CephContext *cct, ceph::mutex *lock, uint64_t delay_ns) + : m_cct(cct), m_lock(lock), m_delay_ns(delay_ns) +{ + m_finisher = new Finisher(cct); + m_finisher->start(); +} + +MemWriteback::~MemWriteback() +{ + m_finisher->stop(); + delete m_finisher; +} + +void MemWriteback::read(const object_t& oid, uint64_t object_no, + const object_locator_t& oloc, + uint64_t off, uint64_t len, snapid_t snapid, + bufferlist *pbl, uint64_t trunc_size, + __u32 trunc_seq, int op_flags, + const ZTracer::Trace &parent_trace, + Context *onfinish) +{ + ceph_assert(snapid == CEPH_NOSNAP); + C_DelayRead *wrapper = new C_DelayRead(this, m_cct, onfinish, m_lock, oid, + off, len, pbl, m_delay_ns); + m_finisher->queue(wrapper, len); +} + +ceph_tid_t MemWriteback::write(const object_t& oid, + const object_locator_t& oloc, + uint64_t off, uint64_t len, + const SnapContext& snapc, + const bufferlist &bl, ceph::real_time mtime, + uint64_t trunc_size, __u32 trunc_seq, + ceph_tid_t journal_tid, + const ZTracer::Trace &parent_trace, + Context *oncommit) +{ + ceph_assert(snapc.seq == 0); + C_DelayWrite *wrapper = new C_DelayWrite(this, m_cct, oncommit, m_lock, oid, + off, len, bl, m_delay_ns); + m_finisher->queue(wrapper, 0); + return ++m_tid; +} + +void MemWriteback::write_object_data(const object_t& oid, uint64_t off, uint64_t len, + const bufferlist& data_bl) +{ + dout(1) << "writing " << oid << " " << off << "~" << len << dendl; + ceph_assert(len == data_bl.length()); + bufferlist& obj_bl = object_data[oid]; + bufferlist new_obj_bl; + // ensure size, or set it if new object + if (off + len > obj_bl.length()) { + obj_bl.append_zero(off + len - obj_bl.length()); + } + + // beginning + new_obj_bl.substr_of(obj_bl, 0, off); + // overwritten bit + new_obj_bl.append(data_bl); + // tail bit + bufferlist tmp; + tmp.substr_of(obj_bl, off+len, obj_bl.length()-(off+len)); + new_obj_bl.append(tmp); + obj_bl.swap(new_obj_bl); + dout(1) << oid << " final size " << obj_bl.length() << dendl; +} + +int MemWriteback::read_object_data(const object_t& oid, uint64_t off, uint64_t len, + bufferlist *data_bl) +{ + dout(1) << "reading " << oid << " " << off << "~" << len << dendl; + auto obj_i = object_data.find(oid); + if (obj_i == object_data.end()) { + dout(1) << oid << "DNE!" << dendl; + return -ENOENT; + } + + const bufferlist& obj_bl = obj_i->second; + dout(1) << "reading " << oid << " from total size " << obj_bl.length() << dendl; + + uint64_t read_len = std::min(len, obj_bl.length()-off); + data_bl->substr_of(obj_bl, off, read_len); + return 0; +} + +bool MemWriteback::may_copy_on_write(const object_t&, uint64_t, uint64_t, + snapid_t) +{ + return false; +} diff --git a/src/test/osdc/MemWriteback.h b/src/test/osdc/MemWriteback.h new file mode 100644 index 000000000..12c1ac3c0 --- /dev/null +++ b/src/test/osdc/MemWriteback.h @@ -0,0 +1,52 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#ifndef CEPH_TEST_OSDC_MEMWRITEBACK_H +#define CEPH_TEST_OSDC_MEMWRITEBACK_H + +#include "include/Context.h" +#include "include/types.h" +#include "osd/osd_types.h" +#include "osdc/WritebackHandler.h" + +#include <atomic> + +class Finisher; + +class MemWriteback : public WritebackHandler { +public: + MemWriteback(CephContext *cct, ceph::mutex *lock, uint64_t delay_ns); + ~MemWriteback() override; + + void read(const object_t& oid, uint64_t object_no, + const object_locator_t& oloc, uint64_t off, uint64_t len, + snapid_t snapid, bufferlist *pbl, uint64_t trunc_size, + __u32 trunc_seq, int op_flags, + const ZTracer::Trace &parent_trace, + Context *onfinish) override; + + ceph_tid_t write(const object_t& oid, const object_locator_t& oloc, + uint64_t off, uint64_t len, + const SnapContext& snapc, const bufferlist &bl, + ceph::real_time mtime, uint64_t trunc_size, + __u32 trunc_seq, ceph_tid_t journal_tid, + const ZTracer::Trace &parent_trace, + Context *oncommit) override; + + using WritebackHandler::write; + + bool may_copy_on_write(const object_t&, uint64_t, uint64_t, + snapid_t) override; + void write_object_data(const object_t& oid, uint64_t off, uint64_t len, + const bufferlist& data_bl); + int read_object_data(const object_t& oid, uint64_t off, uint64_t len, + bufferlist *data_bl); +private: + std::map<object_t, bufferlist> object_data; + CephContext *m_cct; + ceph::mutex *m_lock; + uint64_t m_delay_ns; + std::atomic<unsigned> m_tid = { 0 }; + Finisher *m_finisher; +}; + +#endif diff --git a/src/test/osdc/object_cacher_stress.cc b/src/test/osdc/object_cacher_stress.cc new file mode 100644 index 000000000..371117ef6 --- /dev/null +++ b/src/test/osdc/object_cacher_stress.cc @@ -0,0 +1,424 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <cstdlib> +#include <ctime> +#include <sstream> +#include <string> +#include <vector> +#include <boost/scoped_ptr.hpp> + +#include "common/ceph_argparse.h" +#include "common/ceph_mutex.h" +#include "common/common_init.h" +#include "common/config.h" +#include "common/snap_types.h" +#include "global/global_init.h" +#include "include/buffer.h" +#include "include/Context.h" +#include "include/stringify.h" +#include "osdc/ObjectCacher.h" + +#include "FakeWriteback.h" +#include "MemWriteback.h" + +#include <atomic> + +// XXX: Only tests default namespace +struct op_data { + op_data(const std::string &oid, uint64_t offset, uint64_t len, bool read) + : extent(oid, 0, offset, len, 0), is_read(read) + { + extent.oloc.pool = 0; + extent.buffer_extents.push_back(make_pair(0, len)); + } + + ObjectExtent extent; + bool is_read; + ceph::bufferlist result; + std::atomic<unsigned> done = { 0 }; +}; + +class C_Count : public Context { + op_data *m_op; + std::atomic<unsigned> *m_outstanding = nullptr; +public: + C_Count(op_data *op, std::atomic<unsigned> *outstanding) + : m_op(op), m_outstanding(outstanding) {} + void finish(int r) override { + m_op->done++; + ceph_assert(*m_outstanding > 0); + (*m_outstanding)--; + } +}; + +int stress_test(uint64_t num_ops, uint64_t num_objs, + uint64_t max_obj_size, uint64_t delay_ns, + uint64_t max_op_len, float percent_reads) +{ + ceph::mutex lock = ceph::make_mutex("object_cacher_stress::object_cacher"); + FakeWriteback writeback(g_ceph_context, &lock, delay_ns); + + ObjectCacher obc(g_ceph_context, "test", writeback, lock, NULL, NULL, + g_conf()->client_oc_size, + g_conf()->client_oc_max_objects, + g_conf()->client_oc_max_dirty, + g_conf()->client_oc_target_dirty, + g_conf()->client_oc_max_dirty_age, + true); + obc.start(); + + std::atomic<unsigned> outstanding_reads = { 0 }; + vector<std::shared_ptr<op_data> > ops; + ObjectCacher::ObjectSet object_set(NULL, 0, 0); + SnapContext snapc; + ceph::buffer::ptr bp(max_op_len); + ceph::bufferlist bl; + uint64_t journal_tid = 0; + bp.zero(); + bl.append(bp); + + // schedule ops + std::cout << "Test configuration:\n\n" + << setw(10) << "ops: " << num_ops << "\n" + << setw(10) << "objects: " << num_objs << "\n" + << setw(10) << "obj size: " << max_obj_size << "\n" + << setw(10) << "delay: " << delay_ns << "\n" + << setw(10) << "max op len: " << max_op_len << "\n" + << setw(10) << "percent reads: " << percent_reads << "\n\n"; + + for (uint64_t i = 0; i < num_ops; ++i) { + uint64_t offset = random() % max_obj_size; + uint64_t max_len = std::min(max_obj_size - offset, max_op_len); + // no zero-length operations + uint64_t length = random() % (std::max<uint64_t>(max_len - 1, 1)) + 1; + std::string oid = "test" + stringify(random() % num_objs); + bool is_read = random() < percent_reads * float(RAND_MAX); + std::shared_ptr<op_data> op(new op_data(oid, offset, length, is_read)); + ops.push_back(op); + std::cout << "op " << i << " " << (is_read ? "read" : "write") + << " " << op->extent << "\n"; + if (op->is_read) { + ObjectCacher::OSDRead *rd = obc.prepare_read(CEPH_NOSNAP, &op->result, 0); + rd->extents.push_back(op->extent); + outstanding_reads++; + Context *completion = new C_Count(op.get(), &outstanding_reads); + lock.lock(); + int r = obc.readx(rd, &object_set, completion); + lock.unlock(); + ceph_assert(r >= 0); + if ((uint64_t)r == length) + completion->complete(r); + else + ceph_assert(r == 0); + } else { + ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, bl, + ceph::real_time::min(), 0, + ++journal_tid); + wr->extents.push_back(op->extent); + lock.lock(); + obc.writex(wr, &object_set, NULL); + lock.unlock(); + } + } + + // check that all reads completed + for (uint64_t i = 0; i < num_ops; ++i) { + if (!ops[i]->is_read) + continue; + std::cout << "waiting for read " << i << ops[i]->extent << std::endl; + uint64_t done = 0; + while (done == 0) { + done = ops[i]->done; + if (!done) { + usleep(500); + } + } + if (done > 1) { + std::cout << "completion called more than once!\n" << std::endl; + return EXIT_FAILURE; + } + } + + lock.lock(); + obc.release_set(&object_set); + lock.unlock(); + + int r = 0; + ceph::mutex mylock = ceph::make_mutex("librbd::ImageCtx::flush_cache"); + ceph::condition_variable cond; + bool done; + Context *onfinish = new C_SafeCond(mylock, cond, &done, &r); + lock.lock(); + bool already_flushed = obc.flush_set(&object_set, onfinish); + std::cout << "already flushed = " << already_flushed << std::endl; + lock.unlock(); + { + std::unique_lock locker{mylock}; + cond.wait(locker, [&done] { return done; }); + } + lock.lock(); + bool unclean = obc.release_set(&object_set); + lock.unlock(); + + if (unclean) { + std::cout << "unclean buffers left over!" << std::endl; + return EXIT_FAILURE; + } + + obc.stop(); + + std::cout << "Test completed successfully." << std::endl; + + return EXIT_SUCCESS; +} + +int correctness_test(uint64_t delay_ns) +{ + std::cerr << "starting correctness test" << std::endl; + ceph::mutex lock = ceph::make_mutex("object_cacher_stress::object_cacher"); + MemWriteback writeback(g_ceph_context, &lock, delay_ns); + + ObjectCacher obc(g_ceph_context, "test", writeback, lock, NULL, NULL, + 1<<21, // max cache size, 2MB + 1, // max objects, just one + 1<<18, // max dirty, 256KB + 1<<17, // target dirty, 128KB + g_conf()->client_oc_max_dirty_age, + true); + obc.start(); + std::cerr << "just start()ed ObjectCacher" << std::endl; + + SnapContext snapc; + ceph_tid_t journal_tid = 0; + std::string oid("correctness_test_obj"); + ObjectCacher::ObjectSet object_set(NULL, 0, 0); + ceph::bufferlist zeroes_bl; + zeroes_bl.append_zero(1<<20); + + // set up a 4MB all-zero object + std::cerr << "writing 4x1MB object" << std::endl; + std::map<int, C_SaferCond> create_finishers; + for (int i = 0; i < 4; ++i) { + ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, zeroes_bl, + ceph::real_time::min(), 0, + ++journal_tid); + ObjectExtent extent(oid, 0, zeroes_bl.length()*i, zeroes_bl.length(), 0); + extent.oloc.pool = 0; + extent.buffer_extents.push_back(make_pair(0, 1<<20)); + wr->extents.push_back(extent); + lock.lock(); + obc.writex(wr, &object_set, &create_finishers[i]); + lock.unlock(); + } + + // write some 1-valued bits at 256-KB intervals for checking consistency + std::cerr << "Writing some 0xff values" << std::endl; + ceph::buffer::ptr ones(1<<16); + memset(ones.c_str(), 0xff, ones.length()); + ceph::bufferlist ones_bl; + ones_bl.append(ones); + for (int i = 1<<18; i < 1<<22; i+=1<<18) { + ObjectCacher::OSDWrite *wr = obc.prepare_write(snapc, ones_bl, + ceph::real_time::min(), 0, + ++journal_tid); + ObjectExtent extent(oid, 0, i, ones_bl.length(), 0); + extent.oloc.pool = 0; + extent.buffer_extents.push_back(make_pair(0, 1<<16)); + wr->extents.push_back(extent); + lock.lock(); + obc.writex(wr, &object_set, &create_finishers[i]); + lock.unlock(); + } + + for (auto i = create_finishers.begin(); i != create_finishers.end(); ++i) { + i->second.wait(); + } + std::cout << "Finished setting up object" << std::endl; + lock.lock(); + C_SaferCond flushcond; + bool done = obc.flush_all(&flushcond); + if (!done) { + std::cout << "Waiting for flush" << std::endl; + lock.unlock(); + flushcond.wait(); + lock.lock(); + } + lock.unlock(); + + /* now read the back half of the object in, check consistency, + */ + std::cout << "Reading back half of object (1<<21~1<<21)" << std::endl; + bufferlist readbl; + C_SaferCond backreadcond; + ObjectCacher::OSDRead *back_half_rd = obc.prepare_read(CEPH_NOSNAP, &readbl, 0); + ObjectExtent back_half_extent(oid, 0, 1<<21, 1<<21, 0); + back_half_extent.oloc.pool = 0; + back_half_extent.buffer_extents.push_back(make_pair(0, 1<<21)); + back_half_rd->extents.push_back(back_half_extent); + lock.lock(); + int r = obc.readx(back_half_rd, &object_set, &backreadcond); + lock.unlock(); + ceph_assert(r >= 0); + if (r == 0) { + std::cout << "Waiting to read data into cache" << std::endl; + r = backreadcond.wait(); + } + + ceph_assert(r == 1<<21); + + /* Read the whole object in, + * verify we have to wait for it to complete, + * overwrite a small piece, (http://tracker.ceph.com/issues/16002), + * and check consistency */ + + readbl.clear(); + std::cout<< "Reading whole object (0~1<<22)" << std::endl; + C_SaferCond frontreadcond; + ObjectCacher::OSDRead *whole_rd = obc.prepare_read(CEPH_NOSNAP, &readbl, 0); + ObjectExtent whole_extent(oid, 0, 0, 1<<22, 0); + whole_extent.oloc.pool = 0; + whole_extent.buffer_extents.push_back(make_pair(0, 1<<22)); + whole_rd->extents.push_back(whole_extent); + lock.lock(); + r = obc.readx(whole_rd, &object_set, &frontreadcond); + // we cleared out the cache by reading back half, it shouldn't pass immediately! + ceph_assert(r == 0); + std::cout << "Data (correctly) not available without fetching" << std::endl; + + ObjectCacher::OSDWrite *verify_wr = obc.prepare_write(snapc, ones_bl, + ceph::real_time::min(), 0, + ++journal_tid); + ObjectExtent verify_extent(oid, 0, (1<<18)+(1<<16), ones_bl.length(), 0); + verify_extent.oloc.pool = 0; + verify_extent.buffer_extents.push_back(make_pair(0, 1<<16)); + verify_wr->extents.push_back(verify_extent); + C_SaferCond verify_finisher; + obc.writex(verify_wr, &object_set, &verify_finisher); + lock.unlock(); + std::cout << "wrote dirtying data" << std::endl; + + std::cout << "Waiting to read data into cache" << std::endl; + frontreadcond.wait(); + verify_finisher.wait(); + + std::cout << "Validating data" << std::endl; + + for (int i = 1<<18; i < 1<<22; i+=1<<18) { + bufferlist ones_maybe; + ones_maybe.substr_of(readbl, i, ones_bl.length()); + ceph_assert(0 == memcmp(ones_maybe.c_str(), ones_bl.c_str(), ones_bl.length())); + } + bufferlist ones_maybe; + ones_maybe.substr_of(readbl, (1<<18)+(1<<16), ones_bl.length()); + ceph_assert(0 == memcmp(ones_maybe.c_str(), ones_bl.c_str(), ones_bl.length())); + + std::cout << "validated that data is 0xff where it should be" << std::endl; + + lock.lock(); + C_SaferCond flushcond2; + done = obc.flush_all(&flushcond2); + if (!done) { + std::cout << "Waiting for final write flush" << std::endl; + lock.unlock(); + flushcond2.wait(); + lock.lock(); + } + + bool unclean = obc.release_set(&object_set); + if (unclean) { + std::cout << "unclean buffers left over!" << std::endl; + vector<ObjectExtent> discard_extents; + int i = 0; + for (auto oi = object_set.objects.begin(); !oi.end(); ++oi) { + discard_extents.emplace_back(oid, i++, 0, 1<<22, 0); + } + obc.discard_set(&object_set, discard_extents); + lock.unlock(); + obc.stop(); + goto fail; + } + lock.unlock(); + + obc.stop(); + + std::cout << "Testing ObjectCacher correctness complete" << std::endl; + return EXIT_SUCCESS; + + fail: + return EXIT_FAILURE; +} + +int main(int argc, const char **argv) +{ + std::vector<const char*> args; + argv_to_vec(argc, argv, args); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + + long long delay_ns = 0; + long long num_ops = 1000; + long long obj_bytes = 4 << 20; + long long max_len = 128 << 10; + long long num_objs = 10; + float percent_reads = 0.90; + int seed = time(0) % 100000; + bool stress = false; + bool correctness = false; + std::ostringstream err; + std::vector<const char*>::iterator i; + for (i = args.begin(); i != args.end();) { + if (ceph_argparse_witharg(args, i, &delay_ns, err, "--delay-ns", (char*)NULL)) { + if (!err.str().empty()) { + cerr << argv[0] << ": " << err.str() << std::endl; + return EXIT_FAILURE; + } + } else if (ceph_argparse_witharg(args, i, &num_ops, err, "--ops", (char*)NULL)) { + if (!err.str().empty()) { + cerr << argv[0] << ": " << err.str() << std::endl; + return EXIT_FAILURE; + } + } else if (ceph_argparse_witharg(args, i, &num_objs, err, "--objects", (char*)NULL)) { + if (!err.str().empty()) { + cerr << argv[0] << ": " << err.str() << std::endl; + return EXIT_FAILURE; + } + } else if (ceph_argparse_witharg(args, i, &obj_bytes, err, "--obj-size", (char*)NULL)) { + if (!err.str().empty()) { + cerr << argv[0] << ": " << err.str() << std::endl; + return EXIT_FAILURE; + } + } else if (ceph_argparse_witharg(args, i, &max_len, err, "--max-op-size", (char*)NULL)) { + if (!err.str().empty()) { + cerr << argv[0] << ": " << err.str() << std::endl; + return EXIT_FAILURE; + } + } else if (ceph_argparse_witharg(args, i, &percent_reads, err, "--percent-read", (char*)NULL)) { + if (!err.str().empty()) { + cerr << argv[0] << ": " << err.str() << std::endl; + return EXIT_FAILURE; + } + } else if (ceph_argparse_witharg(args, i, &seed, err, "--seed", (char*)NULL)) { + if (!err.str().empty()) { + cerr << argv[0] << ": " << err.str() << std::endl; + return EXIT_FAILURE; + } + } else if (ceph_argparse_flag(args, i, "--stress-test", NULL)) { + stress = true; + } else if (ceph_argparse_flag(args, i, "--correctness-test", NULL)) { + correctness = true; + } else { + cerr << "unknown option " << *i << std::endl; + return EXIT_FAILURE; + } + } + + if (stress) { + srandom(seed); + return stress_test(num_ops, num_objs, obj_bytes, delay_ns, max_len, percent_reads); + } + if (correctness) { + return correctness_test(delay_ns); + } +} |