diff options
Diffstat (limited to 'src/test/crimson/seastore')
20 files changed, 10171 insertions, 0 deletions
diff --git a/src/test/crimson/seastore/CMakeLists.txt b/src/test/crimson/seastore/CMakeLists.txt new file mode 100644 index 000000000..5c6c2771c --- /dev/null +++ b/src/test/crimson/seastore/CMakeLists.txt @@ -0,0 +1,128 @@ +add_executable(unittest-transaction-manager + test_block.cc + test_transaction_manager.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-transaction-manager + --memory 256M --smp 1) +target_link_libraries( + unittest-transaction-manager + ${CMAKE_DL_LIBS} + crimson-seastore) + +add_executable(unittest-btree-lba-manager + test_btree_lba_manager.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-btree-lba-manager + --memory 256M --smp 1) +target_link_libraries( + unittest-btree-lba-manager + ${CMAKE_DL_LIBS} + crimson-seastore) + +add_executable(unittest-seastore-journal + test_seastore_journal.cc) +add_ceph_test(unittest-seastore-journal + unittest-seastore-journal --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-journal + crimson::gtest + crimson-seastore) + +add_executable(unittest-seastore-cache + test_block.cc + test_seastore_cache.cc) +add_ceph_test(unittest-seastore-cache + unittest-seastore-cache --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-cache + crimson::gtest + crimson-seastore) + +add_executable(unittest-object-data-handler + test_object_data_handler.cc + ../gtest_seastar.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/lsan_suppressions.cc) +add_ceph_unittest(unittest-object-data-handler + --memory 256M --smp 1) +target_link_libraries( + unittest-object-data-handler + crimson::gtest + crimson-seastore + crimson-os + crimson-common) + +add_executable(unittest-collection-manager + test_collection_manager.cc + ../gtest_seastar.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/lsan_suppressions.cc) +add_ceph_test(unittest-collection-manager + unittest-collection-manager --memory 256M --smp 1) +target_link_libraries( + unittest-collection-manager + crimson::gtest + crimson-seastore + crimson-os + crimson-common) + +add_executable(unittest-omap-manager + test_omap_manager.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-omap-manager + --memory 256M --smp 1) +target_link_libraries( + unittest-omap-manager + ${CMAKE_DL_LIBS} + crimson-seastore) + +add_executable(unittest-seastore + test_seastore.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-seastore + --memory 256M --smp 1) +target_link_libraries( + unittest-seastore + ${CMAKE_DL_LIBS} + crimson-seastore + crimson-common) + +add_executable(unittest-seastore-randomblock-manager + test_randomblock_manager.cc) +add_ceph_test(unittest-seastore-randomblock-manager + unittest-seastore-randomblock-manager --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-randomblock-manager + crimson::gtest + ${CMAKE_DL_LIBS} + crimson-seastore) + +add_executable(unittest-seastore-nvmedevice + nvmedevice/test_nvmedevice.cc) +add_ceph_test(unittest-seastore-nvmedevice + unittest-seastore-nvmedevice --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-nvmedevice + crimson::gtest + crimson-seastore + aio) + +add_executable(unittest-seastore-cbjournal + test_cbjournal.cc) +add_ceph_test(unittest-seastore-cbjournal + unittest-seastore-cbjournal --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-cbjournal + crimson::gtest + crimson-seastore + aio) + +add_executable(unittest-seastore-extent-allocator + test_extent_allocator.cc) +add_ceph_test(unittest-seastore-extent-allocator + unittest-seastore-extent-allocator --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-extent-allocator + crimson::gtest + crimson-seastore + aio) + +add_subdirectory(onode_tree) diff --git a/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc new file mode 100644 index 000000000..9c2f4c246 --- /dev/null +++ b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc @@ -0,0 +1,105 @@ +//-*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/buffer.h" +#include "crimson/os/seastore/random_block_manager/rbm_device.h" +#include "crimson/os/seastore/random_block_manager/nvme_block_device.h" +#include "test/crimson/gtest_seastar.h" +#include "include/stringify.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace random_block_device; +using namespace random_block_device::nvme; + +struct nvdev_test_t : seastar_test_suite_t { + std::unique_ptr<RBMDevice> device; + std::string dev_path; + + static const uint64_t DEV_SIZE = 1024 * 1024 * 1024; + + nvdev_test_t() : + device(nullptr), + dev_path("randomblock_manager.test_nvmedevice" + stringify(getpid())) { + int fd = ::open(dev_path.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644); + ceph_assert(fd >= 0); + ::ftruncate(fd, DEV_SIZE); + ::close(fd); + } + ~nvdev_test_t() { + ::unlink(dev_path.c_str()); + } +}; + +static const uint64_t BUF_SIZE = 1024; +static const uint64_t BLK_SIZE = 4096; + +struct nvdev_test_block_t { + uint8_t data[BUF_SIZE]; + + DENC(nvdev_test_block_t, v, p) { + DENC_START(1, 1, p); + for (uint64_t i = 0 ; i < BUF_SIZE; i++) + { + denc(v.data[i], p); + } + DENC_FINISH(p); + } +}; + +WRITE_CLASS_DENC_BOUNDED( + nvdev_test_block_t +) + +using crimson::common::local_conf; +TEST_F(nvdev_test_t, write_and_verify_test) +{ + run_async([this] { + device.reset(new random_block_device::nvme::NVMeBlockDevice(dev_path)); + local_conf().set_val("seastore_cbjournal_size", "1048576").get(); + device->start().get(); + device->mkfs( + device_config_t{ + true, + device_spec_t{ + (magic_t)std::rand(), + device_type_t::RANDOM_BLOCK_SSD, + static_cast<device_id_t>(DEVICE_ID_RANDOM_BLOCK_MIN)}, + seastore_meta_t{uuid_d()}, + secondary_device_set_t()} + ).unsafe_get(); + device->mount().unsafe_get(); + nvdev_test_block_t original_data; + std::minstd_rand0 generator; + uint8_t value = generator(); + memset(original_data.data, value, BUF_SIZE); + uint64_t bl_length = 0; + Device& d = device->get_sharded_device(); + { + bufferlist bl; + encode(original_data, bl); + bl_length = bl.length(); + auto write_buf = ceph::bufferptr(buffer::create_page_aligned(BLK_SIZE)); + bl.begin().copy(bl_length, write_buf.c_str()); + ((RBMDevice*)&d)->write(0, std::move(write_buf)).unsafe_get(); + } + + nvdev_test_block_t read_data; + { + auto read_buf = ceph::bufferptr(buffer::create_page_aligned(BLK_SIZE)); + ((RBMDevice*)&d)->read(0, read_buf).unsafe_get(); + bufferlist bl; + bl.push_back(read_buf); + auto bliter = bl.cbegin(); + decode(read_data, bliter); + } + + int ret = memcmp(original_data.data, read_data.data, BUF_SIZE); + ((RBMDevice*)&d)->close().unsafe_get(); + device->stop().get(); + ASSERT_TRUE(ret == 0); + device.reset(nullptr); + }); +} + diff --git a/src/test/crimson/seastore/onode_tree/CMakeLists.txt b/src/test/crimson/seastore/onode_tree/CMakeLists.txt new file mode 100644 index 000000000..bea208601 --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/CMakeLists.txt @@ -0,0 +1,15 @@ +add_executable(unittest-staged-fltree + test_staged_fltree.cc + ../../gtest_seastar.cc) +add_ceph_unittest(unittest-staged-fltree + --memory 256M --smp 1) +target_link_libraries(unittest-staged-fltree + crimson-seastore) + +add_executable(unittest-fltree-onode-manager + test_fltree_onode_manager.cc + ../../gtest_seastar.cc) +add_ceph_unittest(unittest-fltree-onode-manager + --memory 256M --smp 1) +target_link_libraries(unittest-fltree-onode-manager + crimson-seastore) diff --git a/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc b/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc new file mode 100644 index 000000000..1f661cdca --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc @@ -0,0 +1,330 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#include <boost/range/combine.hpp> + +#include "test/crimson/gtest_seastar.h" + +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace crimson::os::seastore::onode; +using CTransaction = ceph::os::Transaction; +using namespace std; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct onode_item_t { + uint32_t size; + uint64_t id; + uint64_t block_size; + uint32_t cnt_modify = 0; + + void initialize(Transaction& t, Onode& value) const { + auto& layout = value.get_mutable_layout(t); + layout.size = size; + layout.omap_root.update(omap_root_t(id, cnt_modify, + value.get_metadata_hint(block_size))); + validate(value); + } + + void validate(Onode& value) const { + auto& layout = value.get_layout(); + ceph_assert(laddr_t(layout.size) == laddr_t{size}); + ceph_assert(layout.omap_root.get(value.get_metadata_hint(block_size)).addr == id); + ceph_assert(layout.omap_root.get(value.get_metadata_hint(block_size)).depth == cnt_modify); + } + + void modify(Transaction& t, Onode& value) { + validate(value); + ++cnt_modify; + initialize(t, value); + } + + static onode_item_t create(std::size_t size, std::size_t id, uint64_t block_size) { + ceph_assert(size <= std::numeric_limits<uint32_t>::max()); + return {(uint32_t)size, id, block_size}; + } +}; + +struct fltree_onode_manager_test_t + : public seastar_test_suite_t, TMTestState { + using iterator_t = typename KVPool<onode_item_t>::iterator_t; + + FLTreeOnodeManagerRef manager; + + seastar::future<> set_up_fut() final { + return tm_setup(); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown(); + } + + virtual seastar::future<> _init() final { + return TMTestState::_init().then([this] { + manager.reset(new FLTreeOnodeManager(*tm)); + }); + } + + virtual seastar::future<> _destroy() final { + manager.reset(); + return TMTestState::_destroy(); + } + + virtual FuturizedStore::mkfs_ertr::future<> _mkfs() final { + return TMTestState::_mkfs( + ).safe_then([this] { + return restart_fut(); + }).safe_then([this] { + return repeat_eagain([this] { + return seastar::do_with( + create_mutate_transaction(), + [this](auto &ref_t) + { + return with_trans_intr(*ref_t, [&](auto &t) { + return manager->mkfs(t + ).si_then([this, &t] { + return submit_transaction_fut2(t); + }); + }); + }); + }); + }).handle_error( + crimson::ct_error::assert_all{"Invalid error in _mkfs"} + ); + } + + template <typename F> + void with_transaction(F&& f) { + auto t = create_mutate_transaction(); + std::invoke(f, *t); + submit_transaction(std::move(t)); + } + + template <typename F> + void with_onode_write(iterator_t& it, F&& f) { + with_transaction([this, &it, f=std::move(f)] (auto& t) { + auto p_kv = *it; + auto onode = with_trans_intr(t, [&](auto &t) { + return manager->get_or_create_onode(t, p_kv->key); + }).unsafe_get0(); + std::invoke(f, t, *onode, p_kv->value); + with_trans_intr(t, [&](auto &t) { + if (onode->is_alive()) { + return manager->write_dirty(t, {onode}); + } else { + return OnodeManager::write_dirty_iertr::now(); + } + }).unsafe_get0(); + }); + } + + void validate_onode(iterator_t& it) { + with_transaction([this, &it] (auto& t) { + auto p_kv = *it; + auto onode = with_trans_intr(t, [&](auto &t) { + return manager->get_onode(t, p_kv->key); + }).unsafe_get0(); + p_kv->value.validate(*onode); + }); + } + + void validate_erased(iterator_t& it) { + with_transaction([this, &it] (auto& t) { + auto p_kv = *it; + auto exist = with_trans_intr(t, [&](auto &t) { + return manager->contains_onode(t, p_kv->key); + }).unsafe_get0(); + ceph_assert(exist == false); + }); + } + + template <typename F> + void with_onodes_process( + const iterator_t& start, const iterator_t& end, F&& f) { + std::vector<ghobject_t> oids; + std::vector<onode_item_t*> items; + auto it = start; + while(it != end) { + auto p_kv = *it; + oids.emplace_back(p_kv->key); + items.emplace_back(&p_kv->value); + ++it; + } + with_transaction([&oids, &items, f=std::move(f)] (auto& t) mutable { + std::invoke(f, t, oids, items); + }); + } + + template <typename F> + void with_onodes_write( + const iterator_t& start, const iterator_t& end, F&& f) { + with_onodes_process(start, end, + [this, f=std::move(f)] (auto& t, auto& oids, auto& items) { + auto onodes = with_trans_intr(t, [&](auto &t) { + return manager->get_or_create_onodes(t, oids); + }).unsafe_get0(); + for (auto tup : boost::combine(onodes, items)) { + OnodeRef onode; + onode_item_t* p_item; + boost::tie(onode, p_item) = tup; + std::invoke(f, t, *onode, *p_item); + } + with_trans_intr(t, [&](auto &t) { + return manager->write_dirty(t, onodes); + }).unsafe_get0(); + }); + } + + void validate_onodes( + const iterator_t& start, const iterator_t& end) { + with_onodes_process(start, end, + [this] (auto& t, auto& oids, auto& items) { + for (auto tup : boost::combine(oids, items)) { + ghobject_t oid; + onode_item_t* p_item; + boost::tie(oid, p_item) = tup; + auto onode = with_trans_intr(t, [&](auto &t) { + return manager->get_onode(t, oid); + }).unsafe_get0(); + p_item->validate(*onode); + } + }); + } + + void validate_erased( + const iterator_t& start, const iterator_t& end) { + with_onodes_process(start, end, + [this] (auto& t, auto& oids, auto& items) { + for (auto& oid : oids) { + auto exist = with_trans_intr(t, [&](auto &t) { + return manager->contains_onode(t, oid); + }).unsafe_get0(); + ceph_assert(exist == false); + } + }); + } + + static constexpr uint64_t LIST_LIMIT = 10; + void validate_list_onodes(KVPool<onode_item_t>& pool) { + with_onodes_process(pool.begin(), pool.end(), + [this] (auto& t, auto& oids, auto& items) { + std::vector<ghobject_t> listed_oids; + auto start = ghobject_t(); + auto end = ghobject_t::get_max(); + assert(start < end); + assert(start < oids[0]); + assert(oids[0] < end); + while (start != end) { + auto [list_ret, list_end] = with_trans_intr(t, [&](auto &t) { + return manager->list_onodes(t, start, end, LIST_LIMIT); + }).unsafe_get0(); + listed_oids.insert(listed_oids.end(), list_ret.begin(), list_ret.end()); + start = list_end; + } + ceph_assert(oids.size() == listed_oids.size()); + }); + } + + fltree_onode_manager_test_t() {} +}; + +TEST_P(fltree_onode_manager_test_t, 1_single) +{ + run_async([this] { + uint64_t block_size = tm->get_block_size(); + auto pool = KVPool<onode_item_t>::create_range({0, 1}, {128, 256}, block_size); + auto iter = pool.begin(); + with_onode_write(iter, [](auto& t, auto& onode, auto& item) { + item.initialize(t, onode); + }); + validate_onode(iter); + + with_onode_write(iter, [](auto& t, auto& onode, auto& item) { + item.modify(t, onode); + }); + validate_onode(iter); + + validate_list_onodes(pool); + + with_onode_write(iter, [this](auto& t, auto& onode, auto& item) { + OnodeRef onode_ref = &onode; + with_trans_intr(t, [&](auto &t) { + return manager->erase_onode(t, onode_ref); + }).unsafe_get0(); + }); + validate_erased(iter); + }); +} + +TEST_P(fltree_onode_manager_test_t, 2_synthetic) +{ + run_async([this] { + uint64_t block_size = tm->get_block_size(); + auto pool = KVPool<onode_item_t>::create_range( + {0, 100}, {32, 64, 128, 256, 512}, block_size); + auto start = pool.begin(); + auto end = pool.end(); + with_onodes_write(start, end, + [](auto& t, auto& onode, auto& item) { + item.initialize(t, onode); + }); + validate_onodes(start, end); + + validate_list_onodes(pool); + + auto rd_start = pool.random_begin(); + auto rd_end = rd_start + 50; + with_onodes_write(rd_start, rd_end, + [](auto& t, auto& onode, auto& item) { + item.modify(t, onode); + }); + validate_onodes(start, end); + + pool.shuffle(); + rd_start = pool.random_begin(); + rd_end = rd_start + 50; + with_onodes_write(rd_start, rd_end, + [](auto& t, auto& onode, auto& item) { + item.modify(t, onode); + }); + validate_onodes(start, end); + + pool.shuffle(); + rd_start = pool.random_begin(); + rd_end = rd_start + 50; + with_onodes_write(rd_start, rd_end, + [this](auto& t, auto& onode, auto& item) { + OnodeRef onode_ref = &onode; + with_trans_intr(t, [&](auto &t) { + return manager->erase_onode(t, onode_ref); + }).unsafe_get0(); + }); + validate_erased(rd_start, rd_end); + pool.erase_from_random(rd_start, rd_end); + start = pool.begin(); + end = pool.end(); + validate_onodes(start, end); + + validate_list_onodes(pool); + }); +} + +INSTANTIATE_TEST_SUITE_P( + fltree_onode__manager_test, + fltree_onode_manager_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc new file mode 100644 index 000000000..7357b5ced --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc @@ -0,0 +1,1792 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#include <array> +#include <cstring> +#include <memory> +#include <set> +#include <sstream> +#include <vector> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_layout.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h" + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" +#include "test_value.h" + +using namespace crimson::os::seastore::onode; + +#define INTR(fun, t) \ + with_trans_intr( \ + t, \ + [&] (auto &tr) { \ + return fun(tr); \ + } \ + ) + +#define INTR_R(fun, t, args...) \ + with_trans_intr( \ + t, \ + [&] (auto &tr) { \ + return fun(tr, args); \ + } \ + ) + +#define INTR_WITH_PARAM(fun, c, b, v) \ + with_trans_intr( \ + c.t, \ + [=] (auto &t) { \ + return fun(c, L_ADDR_MIN, b, v); \ + } \ + ) + +namespace { + constexpr bool IS_DUMMY_SYNC = false; + using DummyManager = DummyNodeExtentManager<IS_DUMMY_SYNC>; + + using UnboundedBtree = Btree<UnboundedValue>; + + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } + + ghobject_t make_ghobj( + shard_t shard, pool_t pool, crush_hash_t crush, + std::string ns, std::string oid, snap_t snap, gen_t gen) { + return ghobject_t{shard_id_t{shard}, pool, crush, ns, oid, snap, gen}; + } + + // return a key_view_t and its underlying memory buffer. + // the buffer needs to be freed manually. + std::pair<key_view_t, void*> build_key_view(const ghobject_t& hobj) { + key_hobj_t key_hobj(hobj); + size_t key_size = sizeof(shard_pool_crush_t) + sizeof(snap_gen_t) + + ns_oid_view_t::estimate_size(key_hobj); + void* p_mem = std::malloc(key_size); + + key_view_t key_view; + char* p_fill = (char*)p_mem + key_size; + + auto spc = shard_pool_crush_t::from_key(key_hobj); + p_fill -= sizeof(shard_pool_crush_t); + std::memcpy(p_fill, &spc, sizeof(shard_pool_crush_t)); + key_view.set(*reinterpret_cast<const shard_pool_crush_t*>(p_fill)); + + auto p_ns_oid = p_fill; + ns_oid_view_t::test_append(key_hobj, p_fill); + ns_oid_view_t ns_oid_view(p_ns_oid); + key_view.set(ns_oid_view); + + auto sg = snap_gen_t::from_key(key_hobj); + p_fill -= sizeof(snap_gen_t); + ceph_assert(p_fill == (char*)p_mem); + std::memcpy(p_fill, &sg, sizeof(snap_gen_t)); + key_view.set(*reinterpret_cast<const snap_gen_t*>(p_fill)); + + return {key_view, p_mem}; + } +} + +struct a_basic_test_t : public seastar_test_suite_t {}; + +TEST_F(a_basic_test_t, 1_basic_sizes) +{ + logger().info("\n" + "Bytes of struct:\n" + " node_header_t: {}\n" + " shard_pool_t: {}\n" + " shard_pool_crush_t: {}\n" + " crush_t: {}\n" + " snap_gen_t: {}\n" + " slot_0_t: {}\n" + " slot_1_t: {}\n" + " slot_3_t: {}\n" + " node_fields_0_t: {}\n" + " node_fields_1_t: {}\n" + " node_fields_2_t: {}\n" + " internal_fields_3_t: {}\n" + " leaf_fields_3_t: {}\n" + " internal_sub_item_t: {}", + sizeof(node_header_t), sizeof(shard_pool_t), + sizeof(shard_pool_crush_t), sizeof(crush_t), sizeof(snap_gen_t), + sizeof(slot_0_t), sizeof(slot_1_t), sizeof(slot_3_t), + sizeof(node_fields_0_t), sizeof(node_fields_1_t), sizeof(node_fields_2_t), + sizeof(internal_fields_3_t), sizeof(leaf_fields_3_t), sizeof(internal_sub_item_t) + ); + + auto hobj = make_ghobj(0, 0, 0, "n", "o", 0, 0); + key_hobj_t key(hobj); + auto [key_view, p_mem] = build_key_view(hobj); + value_config_t value; + value.payload_size = 8; +#define _STAGE_T(NodeType) node_to_stage_t<typename NodeType::node_stage_t> +#define NXT_T(StageType) staged<typename StageType::next_param_t> + laddr_t i_value{0}; + logger().info("\n" + "Bytes of a key-value insertion (full-string):\n" + " s-p-c, 'n'-'o', s-g => value_payload(8): typically internal 43B, leaf 59B\n" + " InternalNode0: {} {} {}\n" + " InternalNode1: {} {} {}\n" + " InternalNode2: {} {}\n" + " InternalNode3: {}\n" + " LeafNode0: {} {} {}\n" + " LeafNode1: {} {} {}\n" + " LeafNode2: {} {}\n" + " LeafNode3: {}", + _STAGE_T(InternalNode0)::insert_size(key_view, i_value), + NXT_T(_STAGE_T(InternalNode0))::insert_size(key_view, i_value), + NXT_T(NXT_T(_STAGE_T(InternalNode0)))::insert_size(key_view, i_value), + _STAGE_T(InternalNode1)::insert_size(key_view, i_value), + NXT_T(_STAGE_T(InternalNode1))::insert_size(key_view, i_value), + NXT_T(NXT_T(_STAGE_T(InternalNode1)))::insert_size(key_view, i_value), + _STAGE_T(InternalNode2)::insert_size(key_view, i_value), + NXT_T(_STAGE_T(InternalNode2))::insert_size(key_view, i_value), + _STAGE_T(InternalNode3)::insert_size(key_view, i_value), + _STAGE_T(LeafNode0)::insert_size(key, value), + NXT_T(_STAGE_T(LeafNode0))::insert_size(key, value), + NXT_T(NXT_T(_STAGE_T(LeafNode0)))::insert_size(key, value), + _STAGE_T(LeafNode1)::insert_size(key, value), + NXT_T(_STAGE_T(LeafNode1))::insert_size(key, value), + NXT_T(NXT_T(_STAGE_T(LeafNode1)))::insert_size(key, value), + _STAGE_T(LeafNode2)::insert_size(key, value), + NXT_T(_STAGE_T(LeafNode2))::insert_size(key, value), + _STAGE_T(LeafNode3)::insert_size(key, value) + ); + std::free(p_mem); +} + +TEST_F(a_basic_test_t, 2_node_sizes) +{ + run_async([] { + auto nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + auto t = make_test_transaction(); + ValueBuilderImpl<UnboundedValue> vb; + context_t c{*nm, vb, *t}; + std::array<std::pair<NodeImplURef, NodeExtentMutable>, 16> nodes = { + INTR_WITH_PARAM(InternalNode0::allocate, c, false, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode1::allocate, c, false, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode2::allocate, c, false, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode3::allocate, c, false, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode0::allocate, c, true, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode1::allocate, c, true, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode2::allocate, c, true, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode3::allocate, c, true, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode0::allocate, c, false, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode1::allocate, c, false, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode2::allocate, c, false, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode3::allocate, c, false, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode0::allocate, c, true, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode1::allocate, c, true, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode2::allocate, c, true, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode3::allocate, c, true, 0u).unsafe_get0().make_pair() + }; + std::ostringstream oss; + oss << "\nallocated nodes:"; + for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) { + oss << "\n "; + auto& ref_node = iter->first; + ref_node->dump_brief(oss); + } + logger().info("{}", oss.str()); + }); +} + +struct b_dummy_tree_test_t : public seastar_test_suite_t { + TransactionRef ref_t; + std::unique_ptr<UnboundedBtree> tree; + + b_dummy_tree_test_t() = default; + + seastar::future<> set_up_fut() override final { + ref_t = make_test_transaction(); + tree.reset( + new UnboundedBtree(NodeExtentManager::create_dummy(IS_DUMMY_SYNC)) + ); + return INTR(tree->mkfs, *ref_t).handle_error( + crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Unable to mkfs"); + }) + ); + } + + seastar::future<> tear_down_fut() final { + ref_t.reset(); + tree.reset(); + return seastar::now(); + } +}; + +TEST_F(b_dummy_tree_test_t, 3_random_insert_erase_leaf_node) +{ + run_async([this] { + logger().info("\n---------------------------------------------" + "\nrandomized leaf node insert:\n"); + auto key_s = ghobject_t(); + auto key_e = ghobject_t::get_max(); + ASSERT_TRUE(INTR_R(tree->find, *ref_t, key_s).unsafe_get0().is_end()); + ASSERT_TRUE(INTR(tree->begin, *ref_t).unsafe_get0().is_end()); + ASSERT_TRUE(INTR(tree->last, *ref_t).unsafe_get0().is_end()); + + std::map<ghobject_t, + std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history; + + auto f_validate_insert_new = [this, &insert_history] ( + const ghobject_t& key, const test_item_t& value) { + auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()}; + auto [cursor, success] = INTR_R(tree->insert, + *ref_t, key, conf).unsafe_get0(); + initialize_cursor_from_item(*ref_t, key, value, cursor, success); + insert_history.emplace(key, std::make_tuple(value, cursor)); + auto cursor_ = INTR_R(tree->find, *ref_t, key).unsafe_get0(); + ceph_assert(cursor_ != tree->end()); + ceph_assert(cursor_.value() == cursor.value()); + validate_cursor_from_item(key, value, cursor_); + return cursor.value(); + }; + + auto f_validate_erase = [this, &insert_history] (const ghobject_t& key) { + auto cursor_erase = INTR_R(tree->find, *ref_t, key).unsafe_get0(); + auto cursor_next = INTR(cursor_erase.get_next, *ref_t).unsafe_get0(); + auto cursor_ret = INTR_R(tree->erase, *ref_t, cursor_erase).unsafe_get0(); + ceph_assert(cursor_erase.is_end()); + ceph_assert(cursor_ret == cursor_next); + auto cursor_lb = INTR_R(tree->lower_bound, *ref_t, key).unsafe_get0(); + ceph_assert(cursor_lb == cursor_next); + auto it = insert_history.find(key); + ceph_assert(std::get<1>(it->second).is_end()); + insert_history.erase(it); + }; + + auto f_insert_erase_insert = [&f_validate_insert_new, &f_validate_erase] ( + const ghobject_t& key, const test_item_t& value) { + f_validate_insert_new(key, value); + f_validate_erase(key); + return f_validate_insert_new(key, value); + }; + + auto values = Values<test_item_t>(15); + + // insert key1, value1 at STAGE_LEFT + auto key1 = make_ghobj(3, 3, 3, "ns3", "oid3", 3, 3); + auto value1 = values.pick(); + auto test_value1 = f_insert_erase_insert(key1, value1); + + // validate lookup + { + auto cursor1_s = INTR_R(tree->lower_bound, *ref_t, key_s).unsafe_get0(); + ASSERT_EQ(cursor1_s.get_ghobj(), key1); + ASSERT_EQ(cursor1_s.value(), test_value1); + auto cursor1_e = INTR_R(tree->lower_bound, *ref_t, key_e).unsafe_get0(); + ASSERT_TRUE(cursor1_e.is_end()); + } + + // insert the same key1 with a different value + { + auto value1_dup = values.pick(); + auto conf = UnboundedBtree::tree_value_config_t{value1_dup.get_payload_size()}; + auto [cursor1_dup, ret1_dup] = INTR_R(tree->insert, + *ref_t, key1, conf).unsafe_get0(); + ASSERT_FALSE(ret1_dup); + validate_cursor_from_item(key1, value1, cursor1_dup); + } + + // insert key2, value2 to key1's left at STAGE_LEFT + // insert node front at STAGE_LEFT + auto key2 = make_ghobj(2, 2, 2, "ns3", "oid3", 3, 3); + auto value2 = values.pick(); + f_insert_erase_insert(key2, value2); + + // insert key3, value3 to key1's right at STAGE_LEFT + // insert node last at STAGE_LEFT + auto key3 = make_ghobj(4, 4, 4, "ns3", "oid3", 3, 3); + auto value3 = values.pick(); + f_insert_erase_insert(key3, value3); + + // insert key4, value4 to key1's left at STAGE_STRING (collision) + auto key4 = make_ghobj(3, 3, 3, "ns2", "oid2", 3, 3); + auto value4 = values.pick(); + f_insert_erase_insert(key4, value4); + + // insert key5, value5 to key1's right at STAGE_STRING (collision) + auto key5 = make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3); + auto value5 = values.pick(); + f_insert_erase_insert(key5, value5); + + // insert key6, value6 to key1's left at STAGE_RIGHT + auto key6 = make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2); + auto value6 = values.pick(); + f_insert_erase_insert(key6, value6); + + // insert key7, value7 to key1's right at STAGE_RIGHT + auto key7 = make_ghobj(3, 3, 3, "ns3", "oid3", 4, 4); + auto value7 = values.pick(); + f_insert_erase_insert(key7, value7); + + // insert node front at STAGE_RIGHT + auto key8 = make_ghobj(2, 2, 2, "ns3", "oid3", 2, 2); + auto value8 = values.pick(); + f_insert_erase_insert(key8, value8); + + // insert node front at STAGE_STRING (collision) + auto key9 = make_ghobj(2, 2, 2, "ns2", "oid2", 3, 3); + auto value9 = values.pick(); + f_insert_erase_insert(key9, value9); + + // insert node last at STAGE_RIGHT + auto key10 = make_ghobj(4, 4, 4, "ns3", "oid3", 4, 4); + auto value10 = values.pick(); + f_insert_erase_insert(key10, value10); + + // insert node last at STAGE_STRING (collision) + auto key11 = make_ghobj(4, 4, 4, "ns4", "oid4", 3, 3); + auto value11 = values.pick(); + f_insert_erase_insert(key11, value11); + + // insert key, value randomly until a perfect 3-ary tree is formed + std::vector<std::pair<ghobject_t, test_item_t>> kvs{ + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2), values.pick()}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 4, 4), values.pick()}, + {make_ghobj(2, 2, 2, "ns3", "oid3", 4, 4), values.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 2, 2), values.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 3, 3), values.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 4, 4), values.pick()}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2), values.pick()}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 4, 4), values.pick()}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2), values.pick()}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 4, 4), values.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2), values.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 3, 3), values.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 4, 4), values.pick()}, + {make_ghobj(4, 4, 4, "ns3", "oid3", 2, 2), values.pick()}, + {make_ghobj(4, 4, 4, "ns4", "oid4", 2, 2), values.pick()}, + {make_ghobj(4, 4, 4, "ns4", "oid4", 4, 4), values.pick()}}; + auto [smallest_key, smallest_value] = kvs[0]; + auto [largest_key, largest_value] = kvs[kvs.size() - 1]; + std::shuffle(kvs.begin(), kvs.end(), std::default_random_engine{}); + std::for_each(kvs.begin(), kvs.end(), [&f_insert_erase_insert] (auto& kv) { + f_insert_erase_insert(kv.first, kv.second); + }); + ASSERT_EQ(INTR(tree->height, *ref_t).unsafe_get0(), 1); + ASSERT_FALSE(tree->test_is_clean()); + + for (auto& [k, val] : insert_history) { + auto& [v, c] = val; + // validate values in tree keep intact + auto cursor = with_trans_intr(*ref_t, [this, &k=k](auto& tr) { + return tree->find(tr, k); + }).unsafe_get0(); + EXPECT_NE(cursor, tree->end()); + validate_cursor_from_item(k, v, cursor); + // validate values in cursors keep intact + validate_cursor_from_item(k, v, c); + } + { + auto cursor = INTR_R(tree->lower_bound, *ref_t, key_s).unsafe_get0(); + validate_cursor_from_item(smallest_key, smallest_value, cursor); + } + { + auto cursor = INTR(tree->begin, *ref_t).unsafe_get0(); + validate_cursor_from_item(smallest_key, smallest_value, cursor); + } + { + auto cursor = INTR(tree->last, *ref_t).unsafe_get0(); + validate_cursor_from_item(largest_key, largest_value, cursor); + } + + // validate range query + { + kvs.clear(); + for (auto& [k, val] : insert_history) { + auto& [v, c] = val; + kvs.emplace_back(k, v); + } + insert_history.clear(); + std::sort(kvs.begin(), kvs.end(), [](auto& l, auto& r) { + return l.first < r.first; + }); + auto cursor = INTR(tree->begin, *ref_t).unsafe_get0(); + for (auto& [k, v] : kvs) { + ASSERT_FALSE(cursor.is_end()); + validate_cursor_from_item(k, v, cursor); + cursor = INTR(cursor.get_next, *ref_t).unsafe_get0(); + } + ASSERT_TRUE(cursor.is_end()); + } + + std::ostringstream oss; + tree->dump(*ref_t, oss); + logger().info("\n{}\n", oss.str()); + + // randomized erase until empty + std::shuffle(kvs.begin(), kvs.end(), std::default_random_engine{}); + for (auto& [k, v] : kvs) { + auto e_size = with_trans_intr(*ref_t, [this, &k=k](auto& tr) { + return tree->erase(tr, k); + }).unsafe_get0(); + ASSERT_EQ(e_size, 1); + } + auto cursor = INTR(tree->begin, *ref_t).unsafe_get0(); + ASSERT_TRUE(cursor.is_end()); + ASSERT_EQ(INTR(tree->height, *ref_t).unsafe_get0(), 1); + }); +} + +static std::set<ghobject_t> build_key_set( + std::pair<unsigned, unsigned> range_2, + std::pair<unsigned, unsigned> range_1, + std::pair<unsigned, unsigned> range_0, + std::string padding = "", + bool is_internal = false) { + ceph_assert(range_1.second <= 10); + std::set<ghobject_t> ret; + ghobject_t key; + for (unsigned i = range_2.first; i < range_2.second; ++i) { + for (unsigned j = range_1.first; j < range_1.second; ++j) { + for (unsigned k = range_0.first; k < range_0.second; ++k) { + std::ostringstream os_ns; + os_ns << "ns" << j; + std::ostringstream os_oid; + os_oid << "oid" << j << padding; + key = make_ghobj(i, i, i, os_ns.str(), os_oid.str(), k, k); + ret.insert(key); + } + } + } + if (is_internal) { + ret.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + } + return ret; +} + +class TestTree { + public: + TestTree() + : moved_nm{NodeExtentManager::create_dummy(IS_DUMMY_SYNC)}, + ref_t{make_test_transaction()}, + t{*ref_t}, + c{*moved_nm, vb, t}, + tree{std::move(moved_nm)}, + values{0} {} + + seastar::future<> build_tree( + std::pair<unsigned, unsigned> range_2, + std::pair<unsigned, unsigned> range_1, + std::pair<unsigned, unsigned> range_0, + size_t value_size) { + return seastar::async([this, range_2, range_1, range_0, value_size] { + INTR(tree.mkfs, t).unsafe_get0(); + //logger().info("\n---------------------------------------------" + // "\nbefore leaf node split:\n"); + auto keys = build_key_set(range_2, range_1, range_0); + for (auto& key : keys) { + auto value = values.create(value_size); + insert_tree(key, value).get0(); + } + ASSERT_EQ(INTR(tree.height, t).unsafe_get0(), 1); + ASSERT_FALSE(tree.test_is_clean()); + //std::ostringstream oss; + //tree.dump(t, oss); + //logger().info("\n{}\n", oss.str()); + }); + } + + seastar::future<> build_tree( + const std::vector<ghobject_t>& keys, const std::vector<test_item_t>& values) { + return seastar::async([this, keys, values] { + INTR(tree.mkfs, t).unsafe_get0(); + //logger().info("\n---------------------------------------------" + // "\nbefore leaf node split:\n"); + ASSERT_EQ(keys.size(), values.size()); + auto key_iter = keys.begin(); + auto value_iter = values.begin(); + while (key_iter != keys.end()) { + insert_tree(*key_iter, *value_iter).get0(); + ++key_iter; + ++value_iter; + } + ASSERT_EQ(INTR(tree.height, t).unsafe_get0(), 1); + ASSERT_FALSE(tree.test_is_clean()); + //std::ostringstream oss; + //tree.dump(t, oss); + //logger().info("\n{}\n", oss.str()); + }); + } + + seastar::future<> split_merge( + const ghobject_t& key, + const test_item_t& value, + const split_expectation_t& expected, + std::optional<ghobject_t> next_key) { + return seastar::async([this, key, value, expected, next_key] { + // clone + auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + auto p_dummy = static_cast<DummyManager*>(ref_dummy.get()); + UnboundedBtree tree_clone(std::move(ref_dummy)); + auto ref_t_clone = make_test_transaction(); + Transaction& t_clone = *ref_t_clone; + INTR_R(tree_clone.test_clone_from, t_clone, t, tree).unsafe_get0(); + + // insert and split + logger().info("\n\nINSERT-SPLIT {}:", key_hobj_t(key)); + auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()}; + auto [cursor, success] = INTR_R(tree_clone.insert, + t_clone, key, conf).unsafe_get0(); + initialize_cursor_from_item(t, key, value, cursor, success); + + { + std::ostringstream oss; + tree_clone.dump(t_clone, oss); + logger().info("dump new root:\n{}", oss.str()); + } + EXPECT_EQ(INTR(tree_clone.height, t_clone).unsafe_get0(), 2); + + for (auto& [k, val] : insert_history) { + auto& [v, c] = val; + auto result = with_trans_intr(t_clone, [&tree_clone, &k=k] (auto& tr) { + return tree_clone.find(tr, k); + }).unsafe_get0(); + EXPECT_NE(result, tree_clone.end()); + validate_cursor_from_item(k, v, result); + } + auto result = INTR_R(tree_clone.find, t_clone, key).unsafe_get0(); + EXPECT_NE(result, tree_clone.end()); + validate_cursor_from_item(key, value, result); + EXPECT_TRUE(last_split.match(expected)); + EXPECT_EQ(p_dummy->size(), 3); + + // erase and merge + logger().info("\n\nERASE-MERGE {}:", key_hobj_t(key)); + auto nxt_cursor = with_trans_intr(t_clone, [&cursor=cursor](auto& tr) { + return cursor.erase<true>(tr); + }).unsafe_get0(); + + { + // track root again to dump + auto begin = INTR(tree_clone.begin, t_clone).unsafe_get0(); + std::ignore = begin; + std::ostringstream oss; + tree_clone.dump(t_clone, oss); + logger().info("dump root:\n{}", oss.str()); + } + + if (next_key.has_value()) { + auto found = insert_history.find(*next_key); + ceph_assert(found != insert_history.end()); + validate_cursor_from_item( + *next_key, std::get<0>(found->second), nxt_cursor); + } else { + EXPECT_TRUE(nxt_cursor.is_end()); + } + + for (auto& [k, val] : insert_history) { + auto& [v, c] = val; + auto result = with_trans_intr(t_clone, [&tree_clone, &k=k](auto& tr) { + return tree_clone.find(tr, k); + }).unsafe_get0(); + EXPECT_NE(result, tree_clone.end()); + validate_cursor_from_item(k, v, result); + } + EXPECT_EQ(INTR(tree_clone.height, t_clone).unsafe_get0(), 1); + EXPECT_EQ(p_dummy->size(), 1); + }); + } + + test_item_t create_value(size_t size) { + return values.create(size); + } + + private: + seastar::future<> insert_tree(const ghobject_t& key, const test_item_t& value) { + return seastar::async([this, &key, &value] { + auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()}; + auto [cursor, success] = INTR_R(tree.insert, + t, key, conf).unsafe_get0(); + initialize_cursor_from_item(t, key, value, cursor, success); + insert_history.emplace(key, std::make_tuple(value, cursor)); + }); + } + + NodeExtentManagerURef moved_nm; + TransactionRef ref_t; + Transaction& t; + ValueBuilderImpl<UnboundedValue> vb; + context_t c; + UnboundedBtree tree; + Values<test_item_t> values; + std::map<ghobject_t, + std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history; +}; + +struct c_dummy_test_t : public seastar_test_suite_t {}; + +TEST_F(c_dummy_test_t, 4_split_merge_leaf_node) +{ + run_async([] { + { + TestTree test; + test.build_tree({2, 5}, {2, 5}, {2, 5}, 120).get0(); + + auto value = test.create_value(1144); + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left front at stage 2, 1, 0\n"); + test.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), value, + {2u, 2u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), value, + {2u, 1u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2", 1, 1), value, + {2u, 0u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4", 5, 5), value, + {2u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), value, + {2u, 1u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 3, 3, "ns3", "oid3", 3, 3), value, + {2u, 2u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), value, + {2u, 1u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 1, 1), value, + {2u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + + auto value0 = test.create_value(1416); + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right front at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value0, + {2u, 0u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value0, + {2u, 1u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 4, 4, "ns3", "oid3", 3, 3), value0, + {2u, 2u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value0, + {2u, 1u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value0, + {2u, 0u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right back at stage 0, 1, 2\n"); + test.split_merge(make_ghobj(4, 4, 4, "ns4", "oid4", 5, 5), value0, + {2u, 0u, false, InsertType::LAST}, + std::nullopt).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns5", "oid5", 3, 3), value0, + {2u, 1u, false, InsertType::LAST}, + std::nullopt).get0(); + test.split_merge(make_ghobj(5, 5, 5, "ns3", "oid3", 3, 3), value0, + {2u, 2u, false, InsertType::LAST}, + std::nullopt).get0(); + + auto value1 = test.create_value(316); + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left middle at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4", 5, 5), value1, + {1u, 0u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), value1, + {1u, 1u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 3, "ns3", "oid3", 3, 3), value1, + {1u, 2u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), value1, + {1u, 1u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 1, 1), value1, + {1u, 0u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage 0, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 5, 5), value1, + {1u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), value1, + {1u, 1u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 1, 1), value1, + {1u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0(); + + auto value2 = test.create_value(452); + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 5, 5), value2, + {1u, 0u, false, InsertType::BEGIN}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid4", 3, 3), value2, + {1u, 1u, false, InsertType::BEGIN}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 1, 1), value2, + {1u, 0u, false, InsertType::BEGIN}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right middle at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value2, + {1u, 0u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value2, + {1u, 1u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 4, "ns3", "oid3", 3, 3), value2, + {1u, 2u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value2, + {1u, 1u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value2, + {1u, 0u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + + auto value3 = test.create_value(834); + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right middle at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value3, + {0u, 0u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value3, + {0u, 1u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 4, "ns3", "oid3", 3, 3), value3, + {0u, 2u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value3, + {0u, 1u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value3, + {0u, 0u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right front at stage 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 2, 3), value3, + {0u, 0u, false, InsertType::BEGIN}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3)}).get0(); + + auto value4 = test.create_value(572); + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left back at stage 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 3, 4), value4, + {0u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 4, 4)}).get0(); + } + + { + TestTree test; + test.build_tree({2, 4}, {2, 4}, {2, 4}, 232).get0(); + auto value = test.create_value(1996); + logger().info("\n---------------------------------------------" + "\nsplit at [0, 0, 0]; insert to left front at stage 2, 1, 0\n"); + test.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), value, + {2u, 2u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + test.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), value, + {2u, 1u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + test.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2", 1, 1), value, + {2u, 0u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + } + + { + TestTree test; + std::vector<ghobject_t> keys = { + make_ghobj(2, 2, 2, "ns3", "oid3", 3, 3), + make_ghobj(3, 3, 3, "ns3", "oid3", 3, 3)}; + std::vector<test_item_t> values = { + test.create_value(1360), + test.create_value(1632)}; + test.build_tree(keys, values).get0(); + auto value = test.create_value(1640); + logger().info("\n---------------------------------------------" + "\nsplit at [END, END, END]; insert to right at stage 0, 1, 2\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 4, 4), value, + {0u, 0u, false, InsertType::BEGIN}, + std::nullopt).get0(); + EXPECT_TRUE(last_split.match_split_pos({1, {0, {1}}})); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3), value, + {1u, 1u, false, InsertType::BEGIN}, + std::nullopt).get0(); + EXPECT_TRUE(last_split.match_split_pos({1, {1, {0}}})); + test.split_merge(make_ghobj(4, 4, 4, "ns3", "oid3", 3, 3), value, + {2u, 2u, false, InsertType::BEGIN}, + std::nullopt).get0(); + EXPECT_TRUE(last_split.match_split_pos({2, {0, {0}}})); + } + }); +} + +namespace crimson::os::seastore::onode { + +class DummyChildPool { + class DummyChildImpl final : public NodeImpl { + public: + using URef = std::unique_ptr<DummyChildImpl>; + DummyChildImpl(const std::set<ghobject_t>& keys, bool is_level_tail, laddr_t laddr) + : keys{keys}, _is_level_tail{is_level_tail}, _laddr{laddr} { + std::tie(key_view, p_mem_key_view) = build_key_view(*keys.crbegin()); + build_name(); + } + ~DummyChildImpl() override { + std::free(p_mem_key_view); + } + + const std::set<ghobject_t>& get_keys() const { return keys; } + + void reset(const std::set<ghobject_t>& _keys, bool level_tail) { + keys = _keys; + _is_level_tail = level_tail; + std::free(p_mem_key_view); + std::tie(key_view, p_mem_key_view) = build_key_view(*keys.crbegin()); + build_name(); + } + + public: + laddr_t laddr() const override { return _laddr; } + bool is_level_tail() const override { return _is_level_tail; } + std::optional<key_view_t> get_pivot_index() const override { return {key_view}; } + bool is_extent_retired() const override { return _is_extent_retired; } + const std::string& get_name() const override { return name; } + search_position_t make_tail() override { + _is_level_tail = true; + build_name(); + return search_position_t::end(); + } + eagain_ifuture<> retire_extent(context_t) override { + assert(!_is_extent_retired); + _is_extent_retired = true; + return eagain_iertr::now(); + } + + protected: + node_type_t node_type() const override { return node_type_t::LEAF; } + field_type_t field_type() const override { return field_type_t::N0; } + const char* read() const override { + ceph_abort("impossible path"); } + extent_len_t get_node_size() const override { + ceph_abort("impossible path"); } + nextent_state_t get_extent_state() const override { + ceph_abort("impossible path"); } + level_t level() const override { return 0u; } + void prepare_mutate(context_t) override { + ceph_abort("impossible path"); } + void validate_non_empty() const override { + ceph_abort("impossible path"); } + bool is_keys_empty() const override { + ceph_abort("impossible path"); } + bool has_single_value() const override { + ceph_abort("impossible path"); } + node_offset_t free_size() const override { + ceph_abort("impossible path"); } + extent_len_t total_size() const override { + ceph_abort("impossible path"); } + bool is_size_underflow() const override { + ceph_abort("impossible path"); } + std::tuple<match_stage_t, search_position_t> erase(const search_position_t&) override { + ceph_abort("impossible path"); } + std::tuple<match_stage_t, std::size_t> evaluate_merge(NodeImpl&) override { + ceph_abort("impossible path"); } + search_position_t merge(NodeExtentMutable&, NodeImpl&, match_stage_t, extent_len_t) override { + ceph_abort("impossible path"); } + eagain_ifuture<NodeExtentMutable> rebuild_extent(context_t) override { + ceph_abort("impossible path"); } + node_stats_t get_stats() const override { + ceph_abort("impossible path"); } + std::ostream& dump(std::ostream&) const override { + ceph_abort("impossible path"); } + std::ostream& dump_brief(std::ostream&) const override { + ceph_abort("impossible path"); } + void validate_layout() const override { + ceph_abort("impossible path"); } + void test_copy_to(NodeExtentMutable&) const override { + ceph_abort("impossible path"); } + void test_set_tail(NodeExtentMutable&) override { + ceph_abort("impossible path"); } + + private: + void build_name() { + std::ostringstream sos; + sos << "DummyNode" + << "@0x" << std::hex << laddr() << std::dec + << "Lv" << (unsigned)level() + << (is_level_tail() ? "$" : "") + << "(" << key_view << ")"; + name = sos.str(); + } + + std::set<ghobject_t> keys; + bool _is_level_tail; + laddr_t _laddr; + std::string name; + bool _is_extent_retired = false; + + key_view_t key_view; + void* p_mem_key_view; + }; + + class DummyChild final : public Node { + public: + ~DummyChild() override = default; + + key_view_t get_pivot_key() const { return *impl->get_pivot_index(); } + + eagain_ifuture<> populate_split( + context_t c, std::set<Ref<DummyChild>>& splitable_nodes) { + ceph_assert(can_split()); + ceph_assert(splitable_nodes.find(this) != splitable_nodes.end()); + + size_t index; + const auto& keys = impl->get_keys(); + if (keys.size() == 2) { + index = 1; + } else { + index = rd() % (keys.size() - 2) + 1; + } + auto iter = keys.begin(); + std::advance(iter, index); + + std::set<ghobject_t> left_keys(keys.begin(), iter); + std::set<ghobject_t> right_keys(iter, keys.end()); + bool right_is_tail = impl->is_level_tail(); + impl->reset(left_keys, false); + auto right_child = DummyChild::create_new(right_keys, right_is_tail, pool); + if (!can_split()) { + splitable_nodes.erase(this); + } + if (right_child->can_split()) { + splitable_nodes.insert(right_child); + } + Ref<Node> this_ref = this; + return apply_split_to_parent( + c, std::move(this_ref), std::move(right_child), false); + } + + eagain_ifuture<> insert_and_split( + context_t c, const ghobject_t& insert_key, + std::set<Ref<DummyChild>>& splitable_nodes) { + const auto& keys = impl->get_keys(); + ceph_assert(keys.size() == 1); + auto& key = *keys.begin(); + ceph_assert(insert_key < key); + + std::set<ghobject_t> new_keys; + new_keys.insert(insert_key); + new_keys.insert(key); + impl->reset(new_keys, impl->is_level_tail()); + + splitable_nodes.clear(); + splitable_nodes.insert(this); + auto fut = populate_split(c, splitable_nodes); + ceph_assert(splitable_nodes.size() == 0); + return fut; + } + + eagain_ifuture<> merge(context_t c, Ref<DummyChild>&& this_ref) { + return parent_info().ptr->get_child_peers(c, parent_info().position + ).si_then([c, this_ref = std::move(this_ref), this] (auto lr_nodes) mutable { + auto& [lnode, rnode] = lr_nodes; + if (rnode) { + lnode.reset(); + Ref<DummyChild> r_dummy(static_cast<DummyChild*>(rnode.get())); + rnode.reset(); + pool.untrack_node(r_dummy); + assert(r_dummy->use_count() == 1); + return do_merge(c, std::move(this_ref), std::move(r_dummy), true); + } else { + ceph_assert(lnode); + Ref<DummyChild> l_dummy(static_cast<DummyChild*>(lnode.get())); + pool.untrack_node(this_ref); + assert(this_ref->use_count() == 1); + return do_merge(c, std::move(l_dummy), std::move(this_ref), false); + } + }); + } + + eagain_ifuture<> fix_key(context_t c, const ghobject_t& new_key) { + const auto& keys = impl->get_keys(); + ceph_assert(keys.size() == 1); + assert(impl->is_level_tail() == false); + + std::set<ghobject_t> new_keys; + new_keys.insert(new_key); + impl->reset(new_keys, impl->is_level_tail()); + Ref<Node> this_ref = this; + return fix_parent_index<true>(c, std::move(this_ref), false); + } + + bool match_pos(const search_position_t& pos) const { + ceph_assert(!is_root()); + return pos == parent_info().position; + } + + static Ref<DummyChild> create( + const std::set<ghobject_t>& keys, bool is_level_tail, + laddr_t addr, DummyChildPool& pool) { + auto ref_impl = std::make_unique<DummyChildImpl>(keys, is_level_tail, addr); + return new DummyChild(ref_impl.get(), std::move(ref_impl), pool); + } + + static Ref<DummyChild> create_new( + const std::set<ghobject_t>& keys, bool is_level_tail, DummyChildPool& pool) { + static laddr_t seed = 0; + return create(keys, is_level_tail, seed++, pool); + } + + static eagain_ifuture<Ref<DummyChild>> create_initial( + context_t c, const std::set<ghobject_t>& keys, + DummyChildPool& pool, RootNodeTracker& root_tracker) { + auto initial = create_new(keys, true, pool); + return c.nm.get_super(c.t, root_tracker + ).handle_error_interruptible( + eagain_iertr::pass_further{}, + crimson::ct_error::assert_all{"Invalid error during create_initial()"} + ).si_then([c, initial](auto super) { + initial->make_root_new(c, std::move(super)); + return initial->upgrade_root(c, L_ADDR_MIN).si_then([initial] { + return initial; + }); + }); + } + + protected: + eagain_ifuture<> test_clone_non_root( + context_t, Ref<InternalNode> new_parent) const override { + ceph_assert(!is_root()); + auto p_pool_clone = pool.pool_clone_in_progress; + ceph_assert(p_pool_clone != nullptr); + auto clone = create( + impl->get_keys(), impl->is_level_tail(), impl->laddr(), *p_pool_clone); + clone->as_child(parent_info().position, new_parent); + return eagain_iertr::now(); + } + eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) override { + ceph_abort("impossible path"); } + eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) override { + ceph_abort("impossible path"); } + eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const override { + ceph_abort("impossible path"); } + eagain_ifuture<search_result_t> lower_bound_tracked( + context_t, const key_hobj_t&, MatchHistory&) override { + ceph_abort("impossible path"); } + eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) override { + ceph_abort("impossible path"); } + bool is_tracking() const override { return false; } + void track_merge(Ref<Node>, match_stage_t, search_position_t&) override { + ceph_abort("impossible path"); } + + private: + DummyChild(DummyChildImpl* impl, DummyChildImpl::URef&& ref, DummyChildPool& pool) + : Node(std::move(ref)), impl{impl}, pool{pool} { + pool.track_node(this); + } + + bool can_split() const { return impl->get_keys().size() > 1; } + + static eagain_ifuture<> do_merge( + context_t c, Ref<DummyChild>&& left, Ref<DummyChild>&& right, bool stole_key) { + assert(right->use_count() == 1); + assert(left->impl->get_keys().size() == 1); + assert(right->impl->get_keys().size() == 1); + bool left_is_tail = right->impl->is_level_tail(); + const std::set<ghobject_t>* p_keys; + if (stole_key) { + p_keys = &right->impl->get_keys(); + } else { + p_keys = &left->impl->get_keys(); + } + left->impl->reset(*p_keys, left_is_tail); + auto left_addr = left->impl->laddr(); + return left->parent_info().ptr->apply_children_merge<true>( + c, std::move(left), left_addr, std::move(right), !stole_key); + } + + DummyChildImpl* impl; + DummyChildPool& pool; + mutable std::random_device rd; + }; + + public: + DummyChildPool() = default; + ~DummyChildPool() { reset(); } + + auto build_tree(const std::set<ghobject_t>& keys) { + reset(); + // create tree + auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + p_dummy = static_cast<DummyManager*>(ref_dummy.get()); + p_btree.emplace(std::move(ref_dummy)); + return with_trans_intr(get_context().t, [this, &keys] (auto &tr) { + return DummyChild::create_initial(get_context(), keys, *this, *p_btree->root_tracker + ).si_then([this](auto initial_child) { + // split + splitable_nodes.insert(initial_child); + return trans_intr::repeat([this] () + -> eagain_ifuture<seastar::stop_iteration> { + if (splitable_nodes.empty()) { + return seastar::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::yes); + } + auto index = rd() % splitable_nodes.size(); + auto iter = splitable_nodes.begin(); + std::advance(iter, index); + Ref<DummyChild> child = *iter; + return child->populate_split(get_context(), splitable_nodes + ).si_then([] { + return seastar::stop_iteration::no; + }); + }); + }).si_then([this] { + //std::ostringstream oss; + //p_btree->dump(t(), oss); + //logger().info("\n{}\n", oss.str()); + return p_btree->height(t()); + }).si_then([](auto height) { + ceph_assert(height == 2); + }); + }); + } + + seastar::future<> split_merge(ghobject_t key, search_position_t pos, + const split_expectation_t& expected) { + return seastar::async([this, key, pos, expected] { + DummyChildPool pool_clone; + clone_to(pool_clone); + + // insert and split + logger().info("\n\nINSERT-SPLIT {} at pos({}):", key_hobj_t(key), pos); + auto node_to_split = pool_clone.get_node_by_pos(pos); + with_trans_intr(pool_clone.get_context().t, [&] (auto &t) { + return node_to_split->insert_and_split( + pool_clone.get_context(), key, pool_clone.splitable_nodes); + }).unsafe_get0(); + { + std::ostringstream oss; + pool_clone.p_btree->dump(pool_clone.t(), oss); + logger().info("dump new root:\n{}", oss.str()); + } + auto &pt = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 3); + EXPECT_TRUE(last_split.match(expected)); + EXPECT_EQ(pool_clone.p_dummy->size(), 3); + + // erase and merge + [[maybe_unused]] auto pivot_key = node_to_split->get_pivot_key(); + logger().info("\n\nERASE-MERGE {}:", node_to_split->get_name()); + assert(pivot_key == key_hobj_t(key)); + with_trans_intr(pool_clone.get_context().t, [&] (auto &t) { + return node_to_split->merge( + pool_clone.get_context(), std::move(node_to_split)); + }).unsafe_get0(); + auto &pt2 = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height ,pt2).unsafe_get0(), 2); + EXPECT_EQ(pool_clone.p_dummy->size(), 1); + }); + } + + seastar::future<> fix_index( + ghobject_t new_key, search_position_t pos, bool expect_split) { + return seastar::async([this, new_key, pos, expect_split] { + DummyChildPool pool_clone; + clone_to(pool_clone); + + // fix + auto node_to_fix = pool_clone.get_node_by_pos(pos); + auto old_key = node_to_fix->get_pivot_key().to_ghobj(); + logger().info("\n\nFIX pos({}) from {} to {}, expect_split={}:", + pos, node_to_fix->get_name(), key_hobj_t(new_key), expect_split); + with_trans_intr(pool_clone.get_context().t, [&] (auto &t) { + return node_to_fix->fix_key(pool_clone.get_context(), new_key); + }).unsafe_get0(); + if (expect_split) { + std::ostringstream oss; + pool_clone.p_btree->dump(pool_clone.t(), oss); + logger().info("dump new root:\n{}", oss.str()); + auto &pt = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 3); + EXPECT_EQ(pool_clone.p_dummy->size(), 3); + } else { + auto &pt = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 2); + EXPECT_EQ(pool_clone.p_dummy->size(), 1); + } + + // fix back + logger().info("\n\nFIX pos({}) from {} back to {}:", + pos, node_to_fix->get_name(), key_hobj_t(old_key)); + with_trans_intr(pool_clone.get_context().t, [&] (auto &t) { + return node_to_fix->fix_key(pool_clone.get_context(), old_key); + }).unsafe_get0(); + auto &pt = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 2); + EXPECT_EQ(pool_clone.p_dummy->size(), 1); + }); + } + + private: + void clone_to(DummyChildPool& pool_clone) { + pool_clone_in_progress = &pool_clone; + auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + pool_clone.p_dummy = static_cast<DummyManager*>(ref_dummy.get()); + pool_clone.p_btree.emplace(std::move(ref_dummy)); + auto &pt = pool_clone.t(); + [[maybe_unused]] auto &tr = t(); + INTR_R(pool_clone.p_btree->test_clone_from, + pt, tr, *p_btree).unsafe_get0(); + pool_clone_in_progress = nullptr; + } + + void reset() { + ceph_assert(pool_clone_in_progress == nullptr); + if (tracked_children.size()) { + ceph_assert(!p_btree->test_is_clean()); + tracked_children.clear(); + ceph_assert(p_btree->test_is_clean()); + p_dummy = nullptr; + p_btree.reset(); + } else { + ceph_assert(!p_btree.has_value()); + } + splitable_nodes.clear(); + } + + void track_node(Ref<DummyChild> node) { + ceph_assert(tracked_children.find(node) == tracked_children.end()); + tracked_children.insert(node); + } + + void untrack_node(Ref<DummyChild> node) { + auto ret = tracked_children.erase(node); + ceph_assert(ret == 1); + } + + Ref<DummyChild> get_node_by_pos(const search_position_t& pos) const { + auto iter = std::find_if( + tracked_children.begin(), tracked_children.end(), [&pos](auto& child) { + return child->match_pos(pos); + }); + ceph_assert(iter != tracked_children.end()); + return *iter; + } + + context_t get_context() { + ceph_assert(p_dummy != nullptr); + return {*p_dummy, vb, t()}; + } + + Transaction& t() const { return *ref_t; } + + std::set<Ref<DummyChild>> tracked_children; + std::optional<UnboundedBtree> p_btree; + DummyManager* p_dummy = nullptr; + ValueBuilderImpl<UnboundedValue> vb; + TransactionRef ref_t = make_test_transaction(); + + std::random_device rd; + std::set<Ref<DummyChild>> splitable_nodes; + + DummyChildPool* pool_clone_in_progress = nullptr; +}; + +} + +TEST_F(c_dummy_test_t, 5_split_merge_internal_node) +{ + run_async([] { + DummyChildPool pool; + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert:\n"); + auto padding = std::string(250, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 4, 4)); + auto padding_s = std::string(257, '_'); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); + auto padding_e = std::string(247, '_'); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 2, 2)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 3, 3)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right front at stage 0, 1, 2, 1, 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 5, 5), {2, {0, {0}}}, + {2u, 0u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), {2, {0, {0}}}, + {2u, 1u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(3, 4, 4, "ns3", "oid3", 3, 3), {2, {0, {0}}}, + {2u, 2u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), {2, {0, {0}}}, + {2u, 1u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2" + padding, 1, 1), {2, {0, {0}}}, + {2u, 0u, false, InsertType::BEGIN}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right middle at stage 0, 1, 2, 1, 0\n"); + pool.split_merge(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), {3, {0, {0}}}, + {2u, 0u, false, InsertType::MID}).get(); + pool.split_merge(make_ghobj(4, 4, 4, "ns5", "oid5", 3, 3), {3, {0, {0}}}, + {2u, 1u, false, InsertType::MID}).get(); + pool.split_merge(make_ghobj(4, 4, 5, "ns3", "oid3", 3, 3), {3, {0, {0}}}, + {2u, 2u, false, InsertType::MID}).get(); + pool.split_merge(make_ghobj(5, 5, 5, "ns1", "oid1", 3, 3), {3, {0, {0}}}, + {2u, 1u, false, InsertType::MID}).get(); + pool.split_merge(make_ghobj(5, 5, 5, "ns2", "oid2" + padding, 1, 1), {3, {0, {0}}}, + {2u, 0u, false, InsertType::MID}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right back at stage 0, 1, 2\n"); + pool.split_merge(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 5, 5), search_position_t::end() , + {2u, 0u, false, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(5, 5, 5, "ns5", "oid5", 3, 3), search_position_t::end(), + {2u, 1u, false, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(6, 6, 6, "ns3", "oid3", 3, 3), search_position_t::end(), + {2u, 2u, false, InsertType::LAST}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left front at stage 2, 1, 0\n"); + pool.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), {0, {0, {0}}}, + {0u, 2u, true, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), {0, {0, {0}}}, + {0u, 1u, true, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 1, 1), {0, {0, {0}}}, + {0u, 0u, true, InsertType::BEGIN}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left middle at stage 0, 1, 2, 1, 0\n"); + pool.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4" + padding, 5, 5), {1, {0, {0}}}, + {0u, 0u, true, InsertType::MID}).get(); + pool.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), {1, {0, {0}}}, + {0u, 1u, true, InsertType::MID}).get(); + pool.split_merge(make_ghobj(2, 2, 3, "ns3", "oid3" + std::string(80, '_'), 3, 3), {1, {0, {0}}} , + {0u, 2u, true, InsertType::MID}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), {1, {0, {0}}}, + {0u, 1u, true, InsertType::MID}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 1, 1), {1, {0, {0}}}, + {0u, 0u, true, InsertType::MID}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left back at stage 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 3, 4), {1, {2, {2}}}, + {0u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (1):\n"); + auto padding = std::string(244, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 7, 7)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage 0, 1, 2, 1\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 5, 5), {2, {0, {0}}}, + {2u, 0u, true, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), {2, {0, {0}}}, + {2u, 1u, true, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(3, 4, 4, "n", "o", 3, 3), {2, {0, {0}}}, + {2u, 2u, true, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(4, 4, 4, "n", "o", 3, 3), {2, {0, {0}}}, + {2u, 1u, true, InsertType::LAST}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left middle at stage 2\n"); + pool.split_merge(make_ghobj(2, 3, 3, "n", "o", 3, 3), {1, {0, {0}}}, + {2u, 2u, true, InsertType::MID}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (2):\n"); + auto padding = std::string(243, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.insert(make_ghobj(4, 4, 4, "n", "o", 3, 3)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage (0, 1, 2, 1,) 0\n"); + pool.split_merge(make_ghobj(4, 4, 4, "n", "o", 2, 2), {2, {0, {0}}}, + {2u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (3):\n"); + auto padding = std::string(419, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}}, + {1u, 0u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}}, + {1u, 1u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 1, 1), {1, {1, {0}}}, + {1u, 0u, false, InsertType::BEGIN}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (4):\n"); + auto padding = std::string(361, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4)); + auto padding_s = std::string(386, '_'); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage 0, 1\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}}, + {1u, 0u, true, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}}, + {1u, 1u, true, InsertType::LAST}).get(); + + logger().info("\n---------------------------------------------" + "\nfix end index from stage 0 to 0, 1, 2\n"); + auto padding1 = std::string(400, '_'); + pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), + {2, {2, {2}}}, false).get(); + pool.fix_index(make_ghobj(4, 4, 4, "ns5", "oid5" + padding1, 3, 3), + {2, {2, {2}}}, true).get(); + pool.fix_index(make_ghobj(5, 5, 5, "ns3", "oid3" + padding1, 3, 3), + {2, {2, {2}}}, true).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (5):\n"); + auto padding = std::string(412, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); + keys.insert(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3)); + keys.insert(make_ghobj(4, 4, 4, "ns3", "oid3" + padding, 5, 5)); + keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage (0, 1,) 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 2, 2), {1, {1, {0}}}, + {1u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (6):\n"); + auto padding = std::string(328, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); + keys.insert(make_ghobj(5, 5, 5, "ns3", "oid3" + std::string(270, '_'), 3, 3)); + keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right front at stage 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 2, 3), {1, {1, {1}}}, + {0u, 0u, false, InsertType::BEGIN}).get(); + + logger().info("\n---------------------------------------------" + "\nfix end index from stage 2 to 0, 1, 2\n"); + auto padding1 = std::string(400, '_'); + pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), + {3, {0, {0}}}, false).get(); + pool.fix_index(make_ghobj(4, 4, 4, "ns5", "oid5" + padding1, 3, 3), + {3, {0, {0}}}, true).get(); + pool.fix_index(make_ghobj(5, 5, 5, "ns4", "oid4" + padding1, 3, 3), + {3, {0, {0}}}, true).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (7):\n"); + auto padding = std::string(323, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); + keys.insert(make_ghobj(4, 4, 4, "ns5", "oid5" + padding, 3, 3)); + keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nfix end index from stage 1 to 0, 1, 2\n"); + auto padding1 = std::string(400, '_'); + pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), + {2, {3, {0}}}, false).get(); + pool.fix_index(make_ghobj(4, 4, 4, "ns6", "oid6" + padding1, 3, 3), + {2, {3, {0}}}, true).get(); + pool.fix_index(make_ghobj(5, 5, 5, "ns3", "oid3" + padding1, 3, 3), + {2, {3, {0}}}, true).get(); + } + + // Impossible to split at {0, 0, 0} + // Impossible to split at [END, END, END] + }); +} + +struct d_seastore_tm_test_t : + public seastar_test_suite_t, TMTestState { + seastar::future<> set_up_fut() override final { + return tm_setup(); + } + seastar::future<> tear_down_fut() override final { + return tm_teardown(); + } +}; + +TEST_P(d_seastore_tm_test_t, 6_random_tree_insert_erase) +{ + run_async([this] { + constexpr bool TEST_SEASTORE = true; + constexpr bool TRACK_CURSORS = true; + auto kvs = KVPool<test_item_t>::create_raw_range( + {8, 11, 64, 256, 301, 320}, + {8, 11, 64, 256, 301, 320}, + {8, 16, 128, 512, 576, 640}, + {0, 16}, {0, 10}, {0, 4}); + auto moved_nm = (TEST_SEASTORE ? NodeExtentManager::create_seastore(*tm) + : NodeExtentManager::create_dummy(IS_DUMMY_SYNC)); + auto p_nm = moved_nm.get(); + auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, BoundedValue>>( + kvs, std::move(moved_nm)); + { + auto t = create_mutate_transaction(); + INTR(tree->bootstrap, *t).unsafe_get(); + submit_transaction(std::move(t)); + } + + // test insert + { + auto t = create_mutate_transaction(); + INTR(tree->insert, *t).unsafe_get(); + submit_transaction(std::move(t)); + } + { + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get(); + } + if constexpr (TEST_SEASTORE) { + restart(); + tree->reload(NodeExtentManager::create_seastore(*tm)); + } + { + // Note: create_weak_transaction() can also work, but too slow. + auto t = create_read_transaction(); + INTR(tree->validate, *t).unsafe_get(); + } + + // test erase 3/4 + { + auto t = create_mutate_transaction(); + auto size = kvs.size() / 4 * 3; + INTR_R(tree->erase, *t, size).unsafe_get(); + submit_transaction(std::move(t)); + } + { + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get(); + } + if constexpr (TEST_SEASTORE) { + restart(); + tree->reload(NodeExtentManager::create_seastore(*tm)); + } + { + auto t = create_read_transaction(); + INTR(tree->validate, *t).unsafe_get(); + } + + // test erase remaining + { + auto t = create_mutate_transaction(); + auto size = kvs.size(); + INTR_R(tree->erase, *t, size).unsafe_get(); + submit_transaction(std::move(t)); + } + { + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get(); + } + if constexpr (TEST_SEASTORE) { + restart(); + tree->reload(NodeExtentManager::create_seastore(*tm)); + } + { + auto t = create_read_transaction(); + INTR(tree->validate, *t).unsafe_get(); + EXPECT_EQ(INTR(tree->height, *t).unsafe_get0(), 1); + } + + if constexpr (!TEST_SEASTORE) { + auto p_dummy = static_cast<DummyManager*>(p_nm); + EXPECT_EQ(p_dummy->size(), 1); + } + tree.reset(); + }); +} + +TEST_P(d_seastore_tm_test_t, 7_tree_insert_erase_eagain) +{ + run_async([this] { + constexpr double EAGAIN_PROBABILITY = 0.1; + constexpr bool TRACK_CURSORS = false; + auto kvs = KVPool<test_item_t>::create_raw_range( + {8, 11, 64, 128, 255, 256}, + {8, 13, 64, 512, 2035, 2048}, + {8, 16, 128, 576, 992, 1200}, + {0, 8}, {0, 10}, {0, 4}); + auto moved_nm = NodeExtentManager::create_seastore( + *tm, L_ADDR_MIN, EAGAIN_PROBABILITY); + auto p_nm = static_cast<SeastoreNodeExtentManager<true>*>(moved_nm.get()); + auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, ExtendedValue>>( + kvs, std::move(moved_nm)); + unsigned num_ops = 0; + unsigned num_ops_eagain = 0; + + // bootstrap + ++num_ops; + repeat_eagain([this, &tree, &num_ops_eagain] { + ++num_ops_eagain; + return seastar::do_with( + create_mutate_transaction(), + [this, &tree](auto &t) { + return INTR(tree->bootstrap, *t + ).safe_then([this, &t] { + return submit_transaction_fut(*t); + }); + }); + }).unsafe_get0(); + epm->run_background_work_until_halt().get0(); + + // insert + logger().warn("start inserting {} kvs ...", kvs.size()); + { + auto iter = kvs.random_begin(); + while (iter != kvs.random_end()) { + ++num_ops; + repeat_eagain([this, &tree, &num_ops_eagain, &iter] { + ++num_ops_eagain; + return seastar::do_with( + create_mutate_transaction(), + [this, &tree, &iter](auto &t) { + return INTR_R(tree->insert_one, *t, iter + ).safe_then([this, &t](auto cursor) { + cursor.invalidate(); + return submit_transaction_fut(*t); + }); + }); + }).unsafe_get0(); + epm->run_background_work_until_halt().get0(); + ++iter; + } + } + + { + p_nm->set_generate_eagain(false); + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get0(); + p_nm->set_generate_eagain(true); + } + + // lookup + logger().warn("start lookup {} kvs ...", kvs.size()); + { + auto iter = kvs.begin(); + while (iter != kvs.end()) { + ++num_ops; + repeat_eagain([this, &tree, &num_ops_eagain, &iter] { + ++num_ops_eagain; + auto t = create_read_transaction(); + return INTR_R(tree->validate_one, *t, iter + ).safe_then([t=std::move(t)]{}); + }).unsafe_get0(); + ++iter; + } + } + + // erase + logger().warn("start erase {} kvs ...", kvs.size()); + { + kvs.shuffle(); + auto iter = kvs.random_begin(); + while (iter != kvs.random_end()) { + ++num_ops; + repeat_eagain([this, &tree, &num_ops_eagain, &iter] { + ++num_ops_eagain; + return seastar::do_with( + create_mutate_transaction(), + [this, &tree, &iter](auto &t) { + return INTR_R(tree->erase_one, *t, iter + ).safe_then([this, &t] () mutable { + return submit_transaction_fut(*t); + }); + }); + }).unsafe_get0(); + epm->run_background_work_until_halt().get0(); + ++iter; + } + kvs.erase_from_random(kvs.random_begin(), kvs.random_end()); + } + + { + p_nm->set_generate_eagain(false); + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get0(); + INTR(tree->validate, *t).unsafe_get0(); + EXPECT_EQ(INTR(tree->height,*t).unsafe_get0(), 1); + } + + // we can adjust EAGAIN_PROBABILITY to get a proper eagain_rate + double eagain_rate = num_ops_eagain; + eagain_rate /= num_ops; + logger().info("eagain rate: {}", eagain_rate); + + tree.reset(); + }); +} + +INSTANTIATE_TEST_SUITE_P( + d_seastore_tm_test, + d_seastore_tm_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/onode_tree/test_value.h b/src/test/crimson/seastore/onode_tree/test_value.h new file mode 100644 index 000000000..98249f8c9 --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/test_value.h @@ -0,0 +1,240 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <fmt/format.h> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/value.h" + +namespace crimson::os::seastore::onode { + +struct test_item_t { + using id_t = uint16_t; + using magic_t = uint32_t; + + value_size_t size; + id_t id; + magic_t magic; + + value_size_t get_payload_size() const { + assert(size > sizeof(value_header_t)); + return static_cast<value_size_t>(size - sizeof(value_header_t)); + } + + static test_item_t create(std::size_t _size, std::size_t _id) { + ceph_assert(_size <= std::numeric_limits<value_size_t>::max()); + ceph_assert(_size > sizeof(value_header_t)); + value_size_t size = _size; + + ceph_assert(_id <= std::numeric_limits<id_t>::max()); + id_t id = _id; + + return {size, id, (magic_t)id * 137}; + } +}; +inline std::ostream& operator<<(std::ostream& os, const test_item_t& item) { + return os << "TestItem(#" << item.id << ", " << item.size << "B)"; +} + +enum class delta_op_t : uint8_t { + UPDATE_ID, + UPDATE_TAIL_MAGIC, +}; + +inline std::ostream& operator<<(std::ostream& os, const delta_op_t op) { + switch (op) { + case delta_op_t::UPDATE_ID: + return os << "update_id"; + case delta_op_t::UPDATE_TAIL_MAGIC: + return os << "update_tail_magic"; + default: + return os << "unknown"; + } +} + +} // namespace crimson::os::seastore::onode + +#if FMT_VERSION >= 90000 +template<> struct fmt::formatter<crimson::os::seastore::onode::delta_op_t> : fmt::ostream_formatter {}; +#endif + +namespace crimson::os::seastore::onode { + +template <value_magic_t MAGIC, + string_size_t MAX_NS_SIZE, + string_size_t MAX_OID_SIZE, + value_size_t MAX_VALUE_PAYLOAD_SIZE, + extent_len_t INTERNAL_NODE_SIZE, + extent_len_t LEAF_NODE_SIZE, + bool DO_SPLIT_CHECK> +class TestValue final : public Value { + public: + static constexpr tree_conf_t TREE_CONF = { + MAGIC, + MAX_NS_SIZE, + MAX_OID_SIZE, + MAX_VALUE_PAYLOAD_SIZE, + INTERNAL_NODE_SIZE, + LEAF_NODE_SIZE, + DO_SPLIT_CHECK + }; + + using id_t = test_item_t::id_t; + using magic_t = test_item_t::magic_t; + struct magic_packed_t { + magic_t value; + } __attribute__((packed)); + + private: + struct payload_t { + id_t id; + } __attribute__((packed)); + + struct Replayable { + static void set_id(NodeExtentMutable& payload_mut, id_t id) { + auto p_payload = get_write(payload_mut); + p_payload->id = id; + } + + static void set_tail_magic(NodeExtentMutable& payload_mut, magic_t magic) { + auto length = payload_mut.get_length(); + auto offset_magic = length - sizeof(magic_t); + payload_mut.copy_in_relative(offset_magic, magic); + } + + private: + static payload_t* get_write(NodeExtentMutable& payload_mut) { + return reinterpret_cast<payload_t*>(payload_mut.get_write()); + } + }; + + public: + class Recorder final : public ValueDeltaRecorder { + + public: + Recorder(ceph::bufferlist& encoded) + : ValueDeltaRecorder(encoded) {} + ~Recorder() override = default; + + void encode_set_id(NodeExtentMutable& payload_mut, id_t id) { + auto& encoded = get_encoded(payload_mut); + ceph::encode(delta_op_t::UPDATE_ID, encoded); + ceph::encode(id, encoded); + } + + void encode_set_tail_magic(NodeExtentMutable& payload_mut, magic_t magic) { + auto& encoded = get_encoded(payload_mut); + ceph::encode(delta_op_t::UPDATE_TAIL_MAGIC, encoded); + ceph::encode(magic, encoded); + } + + protected: + value_magic_t get_header_magic() const override { + return TREE_CONF.value_magic; + } + + void apply_value_delta(ceph::bufferlist::const_iterator& delta, + NodeExtentMutable& payload_mut, + laddr_t value_addr) override { + delta_op_t op; + try { + ceph::decode(op, delta); + switch (op) { + case delta_op_t::UPDATE_ID: { + logger().debug("OTree::TestValue::Replay: decoding UPDATE_ID ..."); + id_t id; + ceph::decode(id, delta); + logger().debug("OTree::TestValue::Replay: apply id={} ...", id); + Replayable::set_id(payload_mut, id); + break; + } + case delta_op_t::UPDATE_TAIL_MAGIC: { + logger().debug("OTree::TestValue::Replay: decoding UPDATE_TAIL_MAGIC ..."); + magic_t magic; + ceph::decode(magic, delta); + logger().debug("OTree::TestValue::Replay: apply magic={} ...", magic); + Replayable::set_tail_magic(payload_mut, magic); + break; + } + default: + logger().error("OTree::TestValue::Replay: got unknown op {} when replay {:#x}+{:#x}", + op, value_addr, payload_mut.get_length()); + ceph_abort(); + } + } catch (buffer::error& e) { + logger().error("OTree::TestValue::Replay: got decode error {} when replay {:#x}+{:#x}", + e.what(), value_addr, payload_mut.get_length()); + ceph_abort(); + } + } + + private: + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } + }; + + TestValue(NodeExtentManager& nm, const ValueBuilder& vb, Ref<tree_cursor_t>& p_cursor) + : Value(nm, vb, p_cursor) {} + ~TestValue() override = default; + + id_t get_id() const { + return read_payload<payload_t>()->id; + } + void set_id_replayable(Transaction& t, id_t id) { + auto value_mutable = prepare_mutate_payload<payload_t, Recorder>(t); + if (value_mutable.second) { + value_mutable.second->encode_set_id(value_mutable.first, id); + } + Replayable::set_id(value_mutable.first, id); + } + + magic_t get_tail_magic() const { + auto p_payload = read_payload<payload_t>(); + auto offset_magic = get_payload_size() - sizeof(magic_t); + auto p_magic = reinterpret_cast<const char*>(p_payload) + offset_magic; + return reinterpret_cast<const magic_packed_t*>(p_magic)->value; + } + void set_tail_magic_replayable(Transaction& t, magic_t magic) { + auto value_mutable = prepare_mutate_payload<payload_t, Recorder>(t); + if (value_mutable.second) { + value_mutable.second->encode_set_tail_magic(value_mutable.first, magic); + } + Replayable::set_tail_magic(value_mutable.first, magic); + } + + /* + * tree_util.h related interfaces + */ + + using item_t = test_item_t; + + void initialize(Transaction& t, const item_t& item) { + ceph_assert(get_payload_size() + sizeof(value_header_t) == item.size); + set_id_replayable(t, item.id); + set_tail_magic_replayable(t, item.magic); + } + + void validate(const item_t& item) const { + ceph_assert(get_payload_size() + sizeof(value_header_t) == item.size); + ceph_assert(get_id() == item.id); + ceph_assert(get_tail_magic() == item.magic); + } +}; + +using UnboundedValue = TestValue< + value_magic_t::TEST_UNBOUND, 4096, 4096, 4096, 4096, 4096, false>; +using BoundedValue = TestValue< + value_magic_t::TEST_BOUNDED, 320, 320, 640, 4096, 4096, true>; +// should be the same configuration with FLTreeOnode +using ExtendedValue = TestValue< + value_magic_t::TEST_EXTENDED, 256, 2048, 1200, 8192, 16384, true>; + +} + +#if FMT_VERSION >= 90000 +template<> +struct fmt::formatter<crimson::os::seastore::onode::test_item_t> : fmt::ostream_formatter {}; +#endif diff --git a/src/test/crimson/seastore/test_block.cc b/src/test/crimson/seastore/test_block.cc new file mode 100644 index 000000000..f7a39b0ef --- /dev/null +++ b/src/test/crimson/seastore/test_block.cc @@ -0,0 +1,41 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/seastore/test_block.h" + +namespace crimson::os::seastore { + + +ceph::bufferlist TestBlock::get_delta() { + ceph::bufferlist bl; + encode(delta, bl); + return bl; +} + + +void TestBlock::apply_delta(const ceph::bufferlist &bl) { + auto biter = bl.begin(); + decltype(delta) deltas; + decode(deltas, biter); + for (auto &&d : deltas) { + set_contents(d.val, d.offset, d.len); + } +} + +ceph::bufferlist TestBlockPhysical::get_delta() { + ceph::bufferlist bl; + encode(delta, bl); + return bl; +} + +void TestBlockPhysical::apply_delta_and_adjust_crc( + paddr_t, const ceph::bufferlist &bl) { + auto biter = bl.begin(); + decltype(delta) deltas; + decode(deltas, biter); + for (auto &&d : deltas) { + set_contents(d.val, d.offset, d.len); + } +} + +} diff --git a/src/test/crimson/seastore/test_block.h b/src/test/crimson/seastore/test_block.h new file mode 100644 index 000000000..ccdafb784 --- /dev/null +++ b/src/test/crimson/seastore/test_block.h @@ -0,0 +1,154 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <random> + +#include "crimson/os/seastore/transaction_manager.h" + +namespace crimson::os::seastore { + +struct test_extent_desc_t { + size_t len = 0; + unsigned checksum = 0; + + bool operator==(const test_extent_desc_t &rhs) const { + return (len == rhs.len && + checksum == rhs.checksum); + } + bool operator!=(const test_extent_desc_t &rhs) const { + return !(*this == rhs); + } +}; + +struct test_block_delta_t { + int8_t val = 0; + uint16_t offset = 0; + uint16_t len = 0; + + + DENC(test_block_delta_t, v, p) { + DENC_START(1, 1, p); + denc(v.val, p); + denc(v.offset, p); + denc(v.len, p); + DENC_FINISH(p); + } +}; + +inline std::ostream &operator<<( + std::ostream &lhs, const test_extent_desc_t &rhs) { + return lhs << "test_extent_desc_t(len=" << rhs.len + << ", checksum=" << rhs.checksum << ")"; +} + +struct TestBlock : crimson::os::seastore::LogicalCachedExtent { + constexpr static extent_len_t SIZE = 4<<10; + using Ref = TCachedExtentRef<TestBlock>; + + std::vector<test_block_delta_t> delta = {}; + + TestBlock(ceph::bufferptr &&ptr) + : LogicalCachedExtent(std::move(ptr)) {} + TestBlock(const TestBlock &other) + : LogicalCachedExtent(other) {} + + CachedExtentRef duplicate_for_write(Transaction&) final { + return CachedExtentRef(new TestBlock(*this)); + }; + + static constexpr extent_types_t TYPE = extent_types_t::TEST_BLOCK; + extent_types_t get_type() const final { + return TYPE; + } + + ceph::bufferlist get_delta() final; + + void set_contents(char c, uint16_t offset, uint16_t len) { + ::memset(get_bptr().c_str() + offset, c, len); + delta.push_back({c, offset, len}); + } + + void set_contents(char c) { + set_contents(c, 0, get_length()); + } + + test_extent_desc_t get_desc() { + return { get_length(), get_crc32c() }; + } + + void apply_delta(const ceph::bufferlist &bl) final; +}; +using TestBlockRef = TCachedExtentRef<TestBlock>; + +struct TestBlockPhysical : crimson::os::seastore::CachedExtent{ + constexpr static extent_len_t SIZE = 4<<10; + using Ref = TCachedExtentRef<TestBlockPhysical>; + + std::vector<test_block_delta_t> delta = {}; + + TestBlockPhysical(ceph::bufferptr &&ptr) + : CachedExtent(std::move(ptr)) {} + TestBlockPhysical(const TestBlockPhysical &other) + : CachedExtent(other) {} + + CachedExtentRef duplicate_for_write(Transaction&) final { + return CachedExtentRef(new TestBlockPhysical(*this)); + }; + + static constexpr extent_types_t TYPE = extent_types_t::TEST_BLOCK_PHYSICAL; + extent_types_t get_type() const final { + return TYPE; + } + + void set_contents(char c, uint16_t offset, uint16_t len) { + ::memset(get_bptr().c_str() + offset, c, len); + delta.push_back({c, offset, len}); + } + + void set_contents(char c) { + set_contents(c, 0, get_length()); + } + + ceph::bufferlist get_delta() final; + + void apply_delta_and_adjust_crc(paddr_t, const ceph::bufferlist &bl) final; +}; +using TestBlockPhysicalRef = TCachedExtentRef<TestBlockPhysical>; + +struct test_block_mutator_t { + std::uniform_int_distribution<int8_t> + contents_distribution = std::uniform_int_distribution<int8_t>( + std::numeric_limits<int8_t>::min(), + std::numeric_limits<int8_t>::max()); + + std::uniform_int_distribution<uint16_t> + offset_distribution = std::uniform_int_distribution<uint16_t>( + 0, TestBlock::SIZE - 1); + + std::uniform_int_distribution<uint16_t> length_distribution(uint16_t offset) { + return std::uniform_int_distribution<uint16_t>( + 0, TestBlock::SIZE - offset - 1); + } + + + template <typename generator_t> + void mutate(TestBlock &block, generator_t &gen) { + auto offset = offset_distribution(gen); + block.set_contents( + contents_distribution(gen), + offset, + length_distribution(offset)(gen)); + } +}; + +} + +WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::test_block_delta_t) + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter<crimson::os::seastore::test_extent_desc_t> : fmt::ostream_formatter {}; +template <> struct fmt::formatter<crimson::os::seastore::TestBlock> : fmt::ostream_formatter {}; +template <> struct fmt::formatter<crimson::os::seastore::TestBlockPhysical> : fmt::ostream_formatter {}; +#endif diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc new file mode 100644 index 000000000..f18c3ac67 --- /dev/null +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -0,0 +1,752 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include "crimson/common/log.h" + +#include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" +#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" + +#include "test/crimson/seastore/test_block.h" + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace crimson::os::seastore::lba_manager; +using namespace crimson::os::seastore::lba_manager::btree; + +struct btree_test_base : + public seastar_test_suite_t, SegmentProvider, JournalTrimmer { + + segment_manager::EphemeralSegmentManagerRef segment_manager; + SegmentManagerGroupRef sms; + JournalRef journal; + ExtentPlacementManagerRef epm; + CacheRef cache; + + size_t block_size; + + WritePipeline pipeline; + + segment_id_t next; + + std::map<segment_id_t, segment_seq_t> segment_seqs; + std::map<segment_id_t, segment_type_t> segment_types; + + journal_seq_t dummy_tail; + + mutable segment_info_t tmp_info; + + btree_test_base() = default; + + /* + * JournalTrimmer interfaces + */ + journal_seq_t get_journal_head() const final { return dummy_tail; } + + void set_journal_head(journal_seq_t) final {} + + journal_seq_t get_dirty_tail() const final { return dummy_tail; } + + journal_seq_t get_alloc_tail() const final { return dummy_tail; } + + void update_journal_tails(journal_seq_t, journal_seq_t) final {} + + bool try_reserve_inline_usage(std::size_t) final { return true; } + + void release_inline_usage(std::size_t) final {} + + std::size_t get_trim_size_per_cycle() const final { + return 0; + } + + /* + * SegmentProvider interfaces + */ + const segment_info_t& get_seg_info(segment_id_t id) const final { + tmp_info = {}; + tmp_info.seq = segment_seqs.at(id); + tmp_info.type = segment_types.at(id); + return tmp_info; + } + + segment_id_t allocate_segment( + segment_seq_t seq, + segment_type_t type, + data_category_t, + rewrite_gen_t + ) final { + auto ret = next; + next = segment_id_t{ + segment_manager->get_device_id(), + next.device_segment_id() + 1}; + segment_seqs[ret] = seq; + segment_types[ret] = type; + return ret; + } + + void close_segment(segment_id_t) final {} + + void update_segment_avail_bytes(segment_type_t, paddr_t) final {} + + void update_modify_time(segment_id_t, sea_time_point, std::size_t) final {} + + SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); } + + virtual void complete_commit(Transaction &t) {} + seastar::future<> submit_transaction(TransactionRef t) + { + auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL); + return journal->submit_record(std::move(record), t->get_handle()).safe_then( + [this, t=std::move(t)](auto submit_result) mutable { + cache->complete_commit( + *t, + submit_result.record_block_base, + submit_result.write_result.start_seq); + complete_commit(*t); + }).handle_error(crimson::ct_error::assert_all{}); + } + + virtual LBAManager::mkfs_ret test_structure_setup(Transaction &t) = 0; + seastar::future<> set_up_fut() final { + segment_manager = segment_manager::create_test_ephemeral(); + return segment_manager->init( + ).safe_then([this] { + return segment_manager->mkfs( + segment_manager::get_ephemeral_device_config(0, 1, 0)); + }).safe_then([this] { + sms.reset(new SegmentManagerGroup()); + journal = journal::make_segmented(*this, *this); + epm.reset(new ExtentPlacementManager()); + cache.reset(new Cache(*epm)); + + block_size = segment_manager->get_block_size(); + next = segment_id_t{segment_manager->get_device_id(), 0}; + sms->add_segment_manager(segment_manager.get()); + epm->test_init_no_background(segment_manager.get()); + journal->set_write_pipeline(&pipeline); + + return journal->open_for_mkfs().discard_result(); + }).safe_then([this] { + dummy_tail = journal_seq_t{0, + paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0)}; + return epm->open_for_write(); + }).safe_then([this] { + return seastar::do_with( + cache->create_transaction( + Transaction::src_t::MUTATE, "test_set_up_fut", false), + [this](auto &ref_t) { + return with_trans_intr(*ref_t, [&](auto &t) { + cache->init(); + return cache->mkfs(t + ).si_then([this, &t] { + return test_structure_setup(t); + }); + }).safe_then([this, &ref_t] { + return submit_transaction(std::move(ref_t)); + }); + }); + }).handle_error( + crimson::ct_error::all_same_way([] { + ceph_assert(0 == "error"); + }) + ); + } + + virtual void test_structure_reset() {} + seastar::future<> tear_down_fut() final { + return cache->close( + ).safe_then([this] { + return journal->close(); + }).safe_then([this] { + return epm->close(); + }).safe_then([this] { + test_structure_reset(); + segment_manager.reset(); + sms.reset(); + journal.reset(); + epm.reset(); + cache.reset(); + }).handle_error( + crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Unable to close"); + }) + ); + } +}; + +struct lba_btree_test : btree_test_base { + std::map<laddr_t, lba_map_val_t> check; + + auto get_op_context(Transaction &t) { + return op_context_t<laddr_t>{*cache, t}; + } + + LBAManager::mkfs_ret test_structure_setup(Transaction &t) final { + return cache->get_root( + t + ).si_then([this, &t](RootBlockRef croot) { + auto mut_croot = cache->duplicate_for_write( + t, croot + )->cast<RootBlock>(); + mut_croot->root.lba_root = + LBABtree::mkfs(mut_croot, get_op_context(t)); + }); + } + + template <typename F> + auto lba_btree_update(F &&f) { + auto tref = cache->create_transaction( + Transaction::src_t::MUTATE, "test_btree_update", false); + auto &t = *tref; + with_trans_intr( + t, + [this, tref=std::move(tref), f=std::forward<F>(f)](auto &t) mutable { + return cache->get_root( + t + ).si_then([f=std::move(f), &t](RootBlockRef croot) { + return seastar::do_with( + LBABtree(croot), + [f=std::move(f), &t](auto &btree) mutable { + return std::invoke( + std::move(f), btree, t + ); + }); + }).si_then([this, tref=std::move(tref)]() mutable { + return submit_transaction(std::move(tref)); + }); + }).unsafe_get0(); + } + + template <typename F> + auto lba_btree_read(F &&f) { + auto t = cache->create_transaction( + Transaction::src_t::READ, "test_btree_read", false); + return with_trans_intr( + *t, + [this, f=std::forward<F>(f)](auto &t) mutable { + return cache->get_root( + t + ).si_then([f=std::move(f), &t](RootBlockRef croot) mutable { + return seastar::do_with( + LBABtree(croot), + [f=std::move(f), &t](auto &btree) mutable { + return std::invoke( + std::move(f), btree, t + ); + }); + }); + }).unsafe_get0(); + } + + static auto get_map_val(extent_len_t len) { + return lba_map_val_t{0, (pladdr_t)P_ADDR_NULL, len, 0}; + } + + device_off_t next_off = 0; + paddr_t get_paddr() { + next_off += block_size; + return make_fake_paddr(next_off); + } + + void insert(laddr_t addr, extent_len_t len) { + ceph_assert(check.count(addr) == 0); + check.emplace(addr, get_map_val(len)); + lba_btree_update([=, this](auto &btree, auto &t) { + auto extent = cache->alloc_new_extent<TestBlock>( + t, + TestBlock::SIZE, + placement_hint_t::HOT, + 0, + get_paddr()); + return btree.insert( + get_op_context(t), addr, get_map_val(len), extent.get() + ).si_then([addr, extent](auto p){ + auto& [iter, inserted] = p; + assert(inserted); + extent->set_laddr(addr); + }); + }); + } + + void remove(laddr_t addr) { + auto iter = check.find(addr); + ceph_assert(iter != check.end()); + auto len = iter->second.len; + check.erase(iter++); + lba_btree_update([=, this](auto &btree, auto &t) { + return btree.lower_bound( + get_op_context(t), addr + ).si_then([this, len, addr, &btree, &t](auto iter) { + EXPECT_FALSE(iter.is_end()); + EXPECT_TRUE(iter.get_key() == addr); + EXPECT_TRUE(iter.get_val().len == len); + return btree.remove( + get_op_context(t), iter + ); + }); + }); + } + + void check_lower_bound(laddr_t addr) { + auto iter = check.lower_bound(addr); + auto result = lba_btree_read([=, this](auto &btree, auto &t) { + return btree.lower_bound( + get_op_context(t), addr + ).si_then([](auto iter) + -> std::optional<std::pair<const laddr_t, const lba_map_val_t>> { + if (iter.is_end()) { + return std::nullopt; + } else { + return std::make_optional( + std::make_pair(iter.get_key(), iter.get_val())); + } + }); + }); + if (iter == check.end()) { + EXPECT_FALSE(result); + } else { + EXPECT_TRUE(result); + decltype(result) to_check = *iter; + EXPECT_EQ(to_check, *result); + } + } +}; + +TEST_F(lba_btree_test, basic) +{ + run_async([this] { + constexpr unsigned total = 16<<10; + for (unsigned i = 0; i < total; i += 16) { + insert(i, 8); + } + + for (unsigned i = 0; i < total; i += 16) { + check_lower_bound(i); + check_lower_bound(i + 4); + check_lower_bound(i + 8); + check_lower_bound(i + 12); + } + }); +} + +struct btree_lba_manager_test : btree_test_base { + BtreeLBAManagerRef lba_manager; + + btree_lba_manager_test() = default; + + void complete_commit(Transaction &t) final {} + + LBAManager::mkfs_ret test_structure_setup(Transaction &t) final { + lba_manager.reset(new BtreeLBAManager(*cache)); + return lba_manager->mkfs(t); + } + + void test_structure_reset() final { + lba_manager.reset(); + } + + struct test_extent_t { + paddr_t addr; + size_t len = 0; + unsigned refcount = 0; + }; + using test_lba_mapping_t = std::map<laddr_t, test_extent_t>; + test_lba_mapping_t test_lba_mappings; + struct test_transaction_t { + TransactionRef t; + test_lba_mapping_t mappings; + }; + + auto create_transaction(bool create_fake_extent=true) { + auto t = test_transaction_t{ + cache->create_transaction( + Transaction::src_t::MUTATE, "test_mutate_lba", false), + test_lba_mappings + }; + if (create_fake_extent) { + cache->alloc_new_extent<TestBlockPhysical>( + *t.t, + TestBlockPhysical::SIZE, + placement_hint_t::HOT, + 0); + }; + return t; + } + + auto create_weak_transaction() { + auto t = test_transaction_t{ + cache->create_transaction( + Transaction::src_t::READ, "test_read_weak", true), + test_lba_mappings + }; + return t; + } + + void submit_test_transaction(test_transaction_t t) { + submit_transaction(std::move(t.t)).get(); + test_lba_mappings.swap(t.mappings); + } + + auto get_overlap(test_transaction_t &t, laddr_t addr, size_t len) { + auto bottom = t.mappings.upper_bound(addr); + if (bottom != t.mappings.begin()) + --bottom; + if (bottom != t.mappings.end() && + bottom->first + bottom->second.len <= addr) + ++bottom; + + auto top = t.mappings.lower_bound(addr + len); + return std::make_pair( + bottom, + top + ); + } + + device_off_t next_off = 0; + paddr_t get_paddr() { + next_off += block_size; + return make_fake_paddr(next_off); + } + + auto alloc_mapping( + test_transaction_t &t, + laddr_t hint, + size_t len) { + auto ret = with_trans_intr( + *t.t, + [=, this](auto &t) { + auto extent = cache->alloc_new_extent<TestBlock>( + t, + TestBlock::SIZE, + placement_hint_t::HOT, + 0, + get_paddr()); + return lba_manager->alloc_extent( + t, hint, len, extent->get_paddr(), *extent); + }).unsafe_get0(); + logger().debug("alloc'd: {}", *ret); + EXPECT_EQ(len, ret->get_length()); + auto [b, e] = get_overlap(t, ret->get_key(), len); + EXPECT_EQ(b, e); + t.mappings.emplace( + std::make_pair( + ret->get_key(), + test_extent_t{ + ret->get_val(), + ret->get_length(), + 1 + } + )); + return ret; + } + + auto decref_mapping( + test_transaction_t &t, + laddr_t addr) { + return decref_mapping(t, t.mappings.find(addr)); + } + + void decref_mapping( + test_transaction_t &t, + test_lba_mapping_t::iterator target) { + ceph_assert(target != t.mappings.end()); + ceph_assert(target->second.refcount > 0); + target->second.refcount--; + + (void) with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->decref_extent( + t, + target->first, + true + ).si_then([this, &t, target](auto result) { + EXPECT_EQ(result.refcount, target->second.refcount); + if (result.refcount == 0) { + return cache->retire_extent_addr( + t, result.addr.get_paddr(), result.length); + } + return Cache::retire_extent_iertr::now(); + }); + }).unsafe_get0(); + if (target->second.refcount == 0) { + t.mappings.erase(target); + } + } + + auto incref_mapping( + test_transaction_t &t, + laddr_t addr) { + return incref_mapping(t, t.mappings.find(addr)); + } + + void incref_mapping( + test_transaction_t &t, + test_lba_mapping_t::iterator target) { + ceph_assert(target->second.refcount > 0); + target->second.refcount++; + auto refcnt = with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->incref_extent( + t, + target->first); + }).unsafe_get0().refcount; + EXPECT_EQ(refcnt, target->second.refcount); + } + + std::vector<laddr_t> get_mapped_addresses() { + std::vector<laddr_t> addresses; + addresses.reserve(test_lba_mappings.size()); + for (auto &i: test_lba_mappings) { + addresses.push_back(i.first); + } + return addresses; + } + + std::vector<laddr_t> get_mapped_addresses(test_transaction_t &t) { + std::vector<laddr_t> addresses; + addresses.reserve(t.mappings.size()); + for (auto &i: t.mappings) { + addresses.push_back(i.first); + } + return addresses; + } + + void check_mappings() { + auto t = create_transaction(); + check_mappings(t); + } + + void check_mappings(test_transaction_t &t) { + (void)with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->check_child_trackers(t); + }).unsafe_get0(); + for (auto &&i: t.mappings) { + auto laddr = i.first; + auto len = i.second.len; + + auto ret_list = with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->get_mappings( + t, laddr, len); + }).unsafe_get0(); + EXPECT_EQ(ret_list.size(), 1); + auto &ret = *ret_list.begin(); + EXPECT_EQ(i.second.addr, ret->get_val()); + EXPECT_EQ(laddr, ret->get_key()); + EXPECT_EQ(len, ret->get_length()); + + auto ret_pin = with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->get_mapping( + t, laddr); + }).unsafe_get0(); + EXPECT_EQ(i.second.addr, ret_pin->get_val()); + EXPECT_EQ(laddr, ret_pin->get_key()); + EXPECT_EQ(len, ret_pin->get_length()); + } + with_trans_intr( + *t.t, + [=, &t, this](auto &) { + return lba_manager->scan_mappings( + *t.t, + 0, + L_ADDR_MAX, + [iter=t.mappings.begin(), &t](auto l, auto p, auto len) mutable { + EXPECT_NE(iter, t.mappings.end()); + EXPECT_EQ(l, iter->first); + EXPECT_EQ(p, iter->second.addr); + EXPECT_EQ(len, iter->second.len); + ++iter; + }); + }).unsafe_get(); + } +}; + +TEST_F(btree_lba_manager_test, basic) +{ + run_async([this] { + laddr_t laddr = 0x12345678 * block_size; + { + // write initial mapping + auto t = create_transaction(); + check_mappings(t); // check in progress transaction sees mapping + check_mappings(); // check concurrent does not + auto ret = alloc_mapping(t, laddr, block_size); + submit_test_transaction(std::move(t)); + } + check_mappings(); // check new transaction post commit sees it + }); +} + +TEST_F(btree_lba_manager_test, force_split) +{ + run_async([this] { + for (unsigned i = 0; i < 40; ++i) { + auto t = create_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 5; ++j) { + auto ret = alloc_mapping(t, 0, block_size); + if ((i % 10 == 0) && (j == 3)) { + check_mappings(t); + check_mappings(); + } + } + logger().debug("submitting transaction"); + submit_test_transaction(std::move(t)); + check_mappings(); + } + }); +} + +TEST_F(btree_lba_manager_test, force_split_merge) +{ + run_async([this] { + for (unsigned i = 0; i < 80; ++i) { + auto t = create_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 5; ++j) { + auto ret = alloc_mapping(t, 0, block_size); + // just to speed things up a bit + if ((i % 100 == 0) && (j == 3)) { + check_mappings(t); + check_mappings(); + } + incref_mapping(t, ret->get_key()); + decref_mapping(t, ret->get_key()); + } + logger().debug("submitting transaction"); + submit_test_transaction(std::move(t)); + if (i % 50 == 0) { + check_mappings(); + } + } + { + auto addresses = get_mapped_addresses(); + auto t = create_transaction(); + for (unsigned i = 0; i != addresses.size(); ++i) { + if (i % 2 == 0) { + incref_mapping(t, addresses[i]); + decref_mapping(t, addresses[i]); + decref_mapping(t, addresses[i]); + } + logger().debug("submitting transaction"); + if (i % 7 == 0) { + submit_test_transaction(std::move(t)); + t = create_transaction(); + } + if (i % 13 == 0) { + check_mappings(); + check_mappings(t); + } + } + submit_test_transaction(std::move(t)); + } + { + auto addresses = get_mapped_addresses(); + auto t = create_transaction(); + for (unsigned i = 0; i != addresses.size(); ++i) { + incref_mapping(t, addresses[i]); + decref_mapping(t, addresses[i]); + decref_mapping(t, addresses[i]); + } + check_mappings(t); + submit_test_transaction(std::move(t)); + check_mappings(); + } + }); +} + +TEST_F(btree_lba_manager_test, single_transaction_split_merge) +{ + run_async([this] { + { + auto t = create_transaction(); + for (unsigned i = 0; i < 400; ++i) { + alloc_mapping(t, 0, block_size); + } + check_mappings(t); + submit_test_transaction(std::move(t)); + } + check_mappings(); + + { + auto addresses = get_mapped_addresses(); + auto t = create_transaction(); + for (unsigned i = 0; i != addresses.size(); ++i) { + if (i % 4 != 0) { + decref_mapping(t, addresses[i]); + } + } + check_mappings(t); + submit_test_transaction(std::move(t)); + } + check_mappings(); + + { + auto t = create_transaction(); + for (unsigned i = 0; i < 600; ++i) { + alloc_mapping(t, 0, block_size); + } + auto addresses = get_mapped_addresses(t); + for (unsigned i = 0; i != addresses.size(); ++i) { + decref_mapping(t, addresses[i]); + } + check_mappings(t); + submit_test_transaction(std::move(t)); + } + check_mappings(); + }); +} + +TEST_F(btree_lba_manager_test, split_merge_multi) +{ + run_async([this] { + auto iterate = [&](auto f) { + for (uint64_t i = 0; i < (1<<10); ++i) { + auto t = create_transaction(false); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 5; ++j) { + f(t, (i * 5) + j); + } + logger().debug("submitting transaction"); + submit_test_transaction(std::move(t)); + } + }; + iterate([&](auto &t, auto idx) { + alloc_mapping(t, idx * block_size, block_size); + }); + check_mappings(); + iterate([&](auto &t, auto idx) { + if ((idx % 32) > 0) { + decref_mapping(t, idx * block_size); + } + }); + check_mappings(); + iterate([&](auto &t, auto idx) { + if ((idx % 32) > 0) { + alloc_mapping(t, idx * block_size, block_size); + } + }); + check_mappings(); + iterate([&](auto &t, auto idx) { + decref_mapping(t, idx * block_size); + }); + check_mappings(); + }); +} diff --git a/src/test/crimson/seastore/test_cbjournal.cc b/src/test/crimson/seastore/test_cbjournal.cc new file mode 100644 index 000000000..0bf2d4135 --- /dev/null +++ b/src/test/crimson/seastore/test_cbjournal.cc @@ -0,0 +1,583 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include <random> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/async_cleaner.h" +#include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/journal/circular_bounded_journal.h" +#include "crimson/os/seastore/random_block_manager.h" +#include "crimson/os/seastore/random_block_manager/rbm_device.h" +#include "crimson/os/seastore/seastore_types.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" +#include "crimson/os/seastore/random_block_manager/block_rb_manager.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace crimson::os::seastore::journal; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +std::optional<record_t> decode_record( + bufferlist& bl) +{ + record_t record; + record_group_header_t r_header; + auto bliter = bl.cbegin(); + decode(r_header, bliter); + logger().debug(" decode_record mdlength {} records {}", + r_header.mdlength, r_header.records); + device_id_t d_id = 1 << (std::numeric_limits<device_id_t>::digits - 1); + + auto del_infos = try_decode_deltas(r_header, bl, + paddr_t::make_blk_paddr(d_id, 0)); + for (auto &iter : *del_infos) { + for (auto r : iter.deltas) { + record.deltas.push_back(r.second); + } + } + auto ex_infos = try_decode_extent_infos(r_header, bl); + auto bliter_ex = bl.cbegin(); + bliter_ex += r_header.mdlength; + for (auto &iter: *ex_infos) { + for (auto e : iter.extent_infos) { + extent_t ex; + auto bptr = bufferptr(ceph::buffer::create_page_aligned(e.len)); + logger().debug(" exten len {} remaining {} ", e.len, bliter_ex.get_remaining()); + bliter_ex.copy(e.len, bptr.c_str()); + ex.bl.append(bptr); + record.extents.push_back(ex); + } + } + return record; +} + +struct entry_validator_t { + bufferlist bl; + int entries; + record_t record; + segment_nonce_t magic = 0; + journal_seq_t seq; + + template <typename... T> + entry_validator_t(T&&... entry) : record(std::forward<T>(entry)...) {} + + void validate(record_t read) { + auto iter = read.extents.begin(); + for (auto &&block : record.extents) { + ASSERT_EQ( + iter->bl.length(), + block.bl.length()); + ASSERT_EQ( + iter->bl.begin().crc32c(iter->bl.length(), 1), + block.bl.begin().crc32c(block.bl.length(), 1)); + ++iter; + } + auto iter_delta = read.deltas.begin(); + for (auto &&block : record.deltas) { + ASSERT_EQ( + iter_delta->bl.length(), + block.bl.length()); + ASSERT_EQ( + iter_delta->bl.begin().crc32c(iter_delta->bl.length(), 1), + block.bl.begin().crc32c(block.bl.length(), 1)); + ++iter_delta; + } + } + void validate(CircularBoundedJournal &cbj) { + rbm_abs_addr offset = 0; + auto cursor = scan_valid_records_cursor(seq); + cbj.test_initialize_cursor(cursor); + for (int i = 0; i < entries; i++) { + paddr_t paddr = seq.offset.add_offset(offset); + cursor.seq.offset = paddr; + auto md = cbj.test_read_validate_record_metadata( + cursor, magic).unsafe_get0(); + assert(md); + auto& [header, md_bl] = *md; + auto dbuf = cbj.read( + paddr.add_offset(header.mdlength), + header.dlength).unsafe_get0(); + + bufferlist bl; + bl.append(md_bl); + bl.append(dbuf); + auto record = decode_record(bl); + validate(*record); + offset += header.mdlength + header.dlength; + cursor.last_committed = header.committed_to; + } + } + + rbm_abs_addr get_abs_addr() { + return convert_paddr_to_abs_addr(seq.offset); + } + + bool validate_delta(bufferlist bl) { + for (auto &&block : record.deltas) { + if (bl.begin().crc32c(bl.length(), 1) == + block.bl.begin().crc32c(block.bl.length(), 1)) { + return true; + } + } + return false; + } +}; + +struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer +{ + std::vector<entry_validator_t> entries; + std::unique_ptr<CircularBoundedJournal> cbj; + random_block_device::EphemeralRBMDeviceRef device; + + std::default_random_engine generator; + uint64_t block_size; + WritePipeline pipeline; + + cbjournal_test_t() = default; + + /* + * JournalTrimmer interfaces + */ + journal_seq_t get_journal_head() const { + return JOURNAL_SEQ_NULL; + } + + journal_seq_t get_dirty_tail() const final { + return JOURNAL_SEQ_NULL; + } + + journal_seq_t get_alloc_tail() const final { + return JOURNAL_SEQ_NULL; + } + + void set_journal_head(journal_seq_t head) final {} + + void update_journal_tails( + journal_seq_t dirty_tail, + journal_seq_t alloc_tail) final {} + + bool try_reserve_inline_usage(std::size_t) final { return true; } + + void release_inline_usage(std::size_t) final {} + + std::size_t get_trim_size_per_cycle() const final { + return 0; + } + + auto submit_record(record_t&& record) { + entries.push_back(record); + OrderingHandle handle = get_dummy_ordering_handle(); + auto [addr, w_result] = cbj->submit_record( + std::move(record), + handle).unsafe_get0(); + entries.back().seq = w_result.start_seq; + entries.back().entries = 1; + entries.back().magic = cbj->get_cjs().get_cbj_header().magic; + logger().debug("submit entry to addr {}", entries.back().seq); + return convert_paddr_to_abs_addr(entries.back().seq.offset); + } + + seastar::future<> tear_down_fut() final { + return close(); + } + + extent_t generate_extent(size_t blocks) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + bufferlist bl; + bl.append(buffer::ptr(buffer::create(blocks * block_size, contents))); + return extent_t{extent_types_t::TEST_BLOCK, L_ADDR_NULL, bl}; + } + + delta_info_t generate_delta(size_t bytes) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + bufferlist bl; + bl.append(buffer::ptr(buffer::create(bytes, contents))); + return delta_info_t{ + extent_types_t::TEST_BLOCK, + paddr_t{}, + L_ADDR_NULL, + 0, 0, + device->get_block_size(), + 1, + 0, + segment_type_t::JOURNAL, + bl + }; + } + + auto replay_and_check() { + for (auto &i : entries) { + i.validate(*(cbj.get())); + } + } + + auto replay() { + return cbj->replay( + [this](const auto &offsets, + const auto &e, + auto &dirty_seq, + auto &alloc_seq, + auto last_modified) { + bool found = false; + for (auto &i : entries) { + paddr_t base = offsets.write_result.start_seq.offset; + rbm_abs_addr addr = convert_paddr_to_abs_addr(base); + if (addr == i.get_abs_addr()) { + logger().debug(" compare addr: {} and i.addr {} ", base, i.get_abs_addr()); + found = i.validate_delta(e.bl); + break; + } + } + assert(found == true); + return Journal::replay_ertr::make_ready_future<bool>(true); + }); + } + + auto mkfs() { + device_config_t config = get_rbm_ephemeral_device_config(0, 1); + return device->mkfs(config + ).safe_then([this]() { + return device->mount( + ).safe_then([this]() { + return cbj->open_for_mkfs( + ).safe_then([](auto q) { + return seastar::now(); + }); + }); + }).safe_then([this] { + return cbj->close(); + }); + } + auto open() { + return cbj->open_for_mount( + ).safe_then([](auto q) { + return seastar::now(); + }); + } + seastar::future<> close() { + return cbj->close().handle_error(crimson::ct_error::assert_all{}); + } + auto get_records_available_size() { + return cbj->get_cjs().get_records_available_size(); + } + auto get_records_total_size() { + return cbj->get_cjs().get_records_total_size(); + } + auto get_block_size() { + return device->get_block_size(); + } + auto get_written_to_rbm_addr() { + return cbj->get_rbm_addr(cbj->get_cjs().get_written_to()); + } + auto get_written_to() { + return cbj->get_cjs().get_written_to(); + } + auto get_journal_tail() { + return cbj->get_dirty_tail(); + } + auto get_records_used_size() { + return cbj->get_cjs().get_records_used_size(); + } + bool is_available_size(uint64_t size) { + return cbj->get_cjs().is_available_size(size); + } + void update_journal_tail(rbm_abs_addr addr, uint32_t len) { + paddr_t paddr = + convert_abs_addr_to_paddr( + addr + len, + cbj->get_device_id()); + journal_seq_t seq = {0, paddr}; + cbj->update_journal_tail( + seq, + seq + ).get0(); + } + void set_written_to(journal_seq_t seq) { + cbj->set_written_to(seq); + } + + seastar::future<> set_up_fut() final { + device = random_block_device::create_test_ephemeral( + random_block_device::DEFAULT_TEST_CBJOURNAL_SIZE, 0); + cbj.reset(new CircularBoundedJournal(*this, device.get(), std::string())); + block_size = device->get_block_size(); + cbj->set_write_pipeline(&pipeline); + return mkfs( + ).safe_then([this] { + return replay( + ).safe_then([this] { + return open( + ).safe_then([this] { + return replay(); + }); + }); + }).handle_error(crimson::ct_error::assert_all{}); + } +}; + +TEST_F(cbjournal_test_t, submit_one_record) +{ + run_async([this] { + submit_record( + record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(3), generate_delta(4) } + }); + replay_and_check(); + }); +} + +TEST_F(cbjournal_test_t, submit_three_records) +{ + run_async([this] { + submit_record( + record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(3), generate_delta(4) } + }); + submit_record( + record_t{ + { generate_extent(8), generate_extent(9) }, + { generate_delta(20), generate_delta(21) } + }); + submit_record( + record_t{ + { generate_extent(5), generate_extent(6) }, + { generate_delta(200), generate_delta(210) } + }); + replay_and_check(); + }); +} + +TEST_F(cbjournal_test_t, submit_full_records) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + + update_journal_tail(entries.back().get_abs_addr(), record_total_size); + ASSERT_EQ(get_records_total_size(), + get_records_available_size()); + + // will be appended at the begining of log + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + ASSERT_TRUE(record_total_size > get_records_available_size()); + }); +} + +TEST_F(cbjournal_test_t, boudary_check_verify) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + + uint64_t avail = get_records_available_size(); + // forward 2 recod size here because 1 block is reserved between head and tail + update_journal_tail(entries.front().get_abs_addr(), record_total_size * 2); + entries.erase(entries.begin()); + entries.erase(entries.begin()); + ASSERT_EQ(avail + (record_total_size * 2), get_records_available_size()); + avail = get_records_available_size(); + // will be appended at the begining of WAL + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + ASSERT_TRUE(avail - record_total_size >= get_records_available_size()); + replay_and_check(); + }); +} + +TEST_F(cbjournal_test_t, update_header) +{ + run_async([this] { + auto [header, _buf] = *(cbj->get_cjs().read_header().unsafe_get0()); + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + + update_journal_tail(entries.front().get_abs_addr(), record_total_size); + cbj->get_cjs().write_header().unsafe_get0(); + auto [update_header, update_buf2] = *(cbj->get_cjs().read_header().unsafe_get0()); + cbj->close().unsafe_get0(); + replay().unsafe_get0(); + + ASSERT_EQ(update_header.dirty_tail.offset, update_header.dirty_tail.offset); + }); +} + +TEST_F(cbjournal_test_t, replay) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + // will be appended at the begining of WAL + uint64_t avail = get_records_available_size(); + update_journal_tail(entries.front().get_abs_addr(), record_total_size * 2); + entries.erase(entries.begin()); + entries.erase(entries.begin()); + ASSERT_EQ(avail + (record_total_size * 2), get_records_available_size()); + avail = get_records_available_size(); + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + ASSERT_TRUE(avail - record_total_size >= get_records_available_size()); + cbj->close().unsafe_get0(); + replay().unsafe_get0(); + }); +} + +TEST_F(cbjournal_test_t, replay_after_reset) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + auto old_written_to = get_written_to(); + auto old_used_size = get_records_used_size(); + set_written_to( + journal_seq_t{0, + convert_abs_addr_to_paddr( + cbj->get_records_start(), + cbj->get_device_id())}); + cbj->close().unsafe_get0(); + replay().unsafe_get0(); + ASSERT_EQ(old_written_to, get_written_to()); + ASSERT_EQ(old_used_size, + get_records_used_size()); + }); +} + +TEST_F(cbjournal_test_t, multiple_submit_at_end) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + update_journal_tail(entries.front().get_abs_addr(), record_total_size * 8); + for (int i = 0; i < 8; i++) { + entries.erase(entries.begin()); + } + seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(4u), + [&](auto) { + return seastar::async([&] { + auto writes = 0; + while (writes < 2) { + record_t rec { + { generate_extent(1) }, + { generate_delta(20) } }; + submit_record(std::move(rec)); + writes++; + } + }); + }).get0(); + auto old_written_to = get_written_to(); + cbj->close().unsafe_get0(); + cbj->replay( + [](const auto &offsets, + const auto &e, + auto &dirty_seq, + auto &alloc_seq, + auto last_modified) { + return Journal::replay_ertr::make_ready_future<bool>(true); + }).unsafe_get0(); + assert(get_written_to() == old_written_to); + }); +} diff --git a/src/test/crimson/seastore/test_collection_manager.cc b/src/test/crimson/seastore/test_collection_manager.cc new file mode 100644 index 000000000..cedcc5e8f --- /dev/null +++ b/src/test/crimson/seastore/test_collection_manager.cc @@ -0,0 +1,195 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "os/ObjectStore.h" +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/collection_manager.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + + +#define TEST_COLL_FORWARD(METHOD) \ + template <typename... Args> \ + auto METHOD(coll_root_t &root, Transaction &t, Args&&... args) const { \ + return with_trans_intr( \ + t, \ + [this](auto &t, auto &root, auto&&... args) { \ + return collection_manager->METHOD( \ + root, \ + t, \ + std::forward<decltype(args)>(args)...); \ + }, \ + root, \ + std::forward<Args>(args)...).unsafe_get0(); \ + } + +struct collection_manager_test_t : + public seastar_test_suite_t, + TMTestState { + + CollectionManagerRef collection_manager; + + collection_manager_test_t() {} + + seastar::future<> set_up_fut() final { + return tm_setup().then([this] { + collection_manager = collection_manager::create_coll_manager(*tm); + return seastar::now(); + }); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown().then([this] { + collection_manager.reset(); + return seastar::now(); + }); + } + + using test_collection_t = std::map<coll_t, coll_info_t>; + test_collection_t test_coll_mappings; + + void replay() { + restart(); + collection_manager = collection_manager::create_coll_manager(*tm); + } + + auto get_root() { + auto tref = create_mutate_transaction(); + auto coll_root = with_trans_intr( + *tref, + [this](auto &t) { + return collection_manager->mkfs(t); + }).unsafe_get0(); + submit_transaction(std::move(tref)); + return coll_root; + } + + TEST_COLL_FORWARD(remove) + TEST_COLL_FORWARD(list) + TEST_COLL_FORWARD(create) + TEST_COLL_FORWARD(update) + + void checking_mappings(coll_root_t &coll_root, Transaction &t) { + auto coll_list = list(coll_root, t); + EXPECT_EQ(test_coll_mappings.size(), coll_list.size()); + for (std::pair<coll_t, coll_info_t> p : test_coll_mappings) { + EXPECT_NE( + std::find(coll_list.begin(), coll_list.end(), p), + coll_list.end()); + } + } + + void checking_mappings(coll_root_t &coll_root) { + auto t = create_read_transaction(); + checking_mappings(coll_root, *t); + } +}; + +TEST_P(collection_manager_test_t, basic) +{ + run_async([this] { + coll_root_t coll_root = get_root(); + { + auto t = create_mutate_transaction(); + for (int i = 0; i < 20; i++) { + coll_t cid(spg_t(pg_t(i+1,i+2), shard_id_t::NO_SHARD)); + create(coll_root, *t, cid, coll_info_t(i)); + test_coll_mappings.emplace(cid, coll_info_t(i)); + } + checking_mappings(coll_root, *t); + submit_transaction(std::move(t)); + EXPECT_EQ(test_coll_mappings.size(), 20); + } + + replay(); + checking_mappings(coll_root); + { + auto t = create_mutate_transaction(); + for (auto iter = test_coll_mappings.begin(); + iter != test_coll_mappings.end();) { + remove(coll_root, *t, iter->first); + iter = test_coll_mappings.erase(iter); + } + submit_transaction(std::move(t)); + } + replay(); + { + auto t = create_mutate_transaction(); + auto list_ret = list(coll_root, *t); + submit_transaction(std::move(t)); + EXPECT_EQ(list_ret.size(), test_coll_mappings.size()); + } + }); +} + +TEST_P(collection_manager_test_t, overflow) +{ + run_async([this] { + coll_root_t coll_root = get_root(); + auto old_location = coll_root.get_location(); + + auto t = create_mutate_transaction(); + for (int i = 0; i < 412; i++) { + coll_t cid(spg_t(pg_t(i+1,i+2), shard_id_t::NO_SHARD)); + create(coll_root, *t, cid, coll_info_t(i)); + test_coll_mappings.emplace(cid, coll_info_t(i)); + } + submit_transaction(std::move(t)); + EXPECT_NE(old_location, coll_root.get_location()); + checking_mappings(coll_root); + + replay(); + checking_mappings(coll_root); + }); +} + +TEST_P(collection_manager_test_t, update) +{ + run_async([this] { + coll_root_t coll_root = get_root(); + { + auto t = create_mutate_transaction(); + for (int i = 0; i < 2; i++) { + coll_t cid(spg_t(pg_t(1,i+1), shard_id_t::NO_SHARD)); + create(coll_root, *t, cid, coll_info_t(i)); + test_coll_mappings.emplace(cid, coll_info_t(i)); + } + submit_transaction(std::move(t)); + } + { + auto iter1= test_coll_mappings.begin(); + auto iter2 = std::next(test_coll_mappings.begin(), 1); + EXPECT_NE(iter1->second.split_bits, iter2->second.split_bits); + auto t = create_mutate_transaction(); + update(coll_root, *t, iter1->first, iter2->second); + submit_transaction(std::move(t)); + iter1->second.split_bits = iter2->second.split_bits; + } + replay(); + checking_mappings(coll_root); + }); +} + +INSTANTIATE_TEST_SUITE_P( + collection_manager_test, + collection_manager_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/test_extent_allocator.cc b/src/test/crimson/seastore/test_extent_allocator.cc new file mode 100644 index 000000000..8217e5a66 --- /dev/null +++ b/src/test/crimson/seastore/test_extent_allocator.cc @@ -0,0 +1,181 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <random> + +#include <boost/iterator/counting_iterator.hpp> + +#include "test/crimson/gtest_seastar.h" +#include "crimson/os/seastore/random_block_manager.h" +#include "crimson/os/seastore/random_block_manager/extent_allocator.h" +#include "crimson/os/seastore/random_block_manager/avlallocator.h" +#include "include/interval_set.h" + + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct allocator_test_t : + public seastar_test_suite_t, + ::testing::WithParamInterface<const char*> { + std::random_device rd; + std::mt19937 gen; + ExtentAllocatorRef allocator; + + allocator_test_t() + : gen(rd()) {} + + seastar::future<> set_up_fut() final { + std::string a_type = GetParam(); + if (a_type == "avl") { + allocator.reset(new AvlAllocator(false)); + return seastar::now(); + } + ceph_assert(0 == "no support"); + } + seastar::future<> tear_down_fut() final { + if (allocator) { + allocator->close(); + } + return seastar::now(); + } + void init_alloc(uint64_t block_size, uint64_t total_size) { + assert(allocator); + allocator->init(0, total_size, block_size); + } + void close() { + assert(allocator); + allocator->close(); + } + auto allocate(size_t size) { + return allocator->alloc_extent(size); + } + void free(uint64_t start, uint64_t length) { + allocator->free_extent(start, length); + } + rbm_abs_addr get_random_addr(size_t block_size, size_t capacity) { + return block_size * + std::uniform_int_distribution<>(0, (capacity / block_size) - 1)(gen); + } +}; + +TEST_P(allocator_test_t, test_alloc_init) +{ + init_alloc(4096, 4096 * 64); + ASSERT_EQ((4096 * 64), allocator->get_available_size()); + close(); + init_alloc(8192, 8192 * 32); + allocate(8192); + ASSERT_EQ(8192 * 32 - 8192, allocator->get_available_size()); + close(); + init_alloc(4096, 4096 * 128); + allocate(8192); + ASSERT_EQ(4096 * 128 - 8192, allocator->get_available_size()); +} + +TEST_P(allocator_test_t, test_init_alloc_free) +{ + uint64_t block_size = 4096; + uint64_t capacity = 4 * 1024 * block_size; + + { + init_alloc(block_size, capacity); + + auto free_length = allocator->get_available_size(); + allocate(allocator->get_max_alloc_size()); + ASSERT_EQ(free_length - allocator->get_max_alloc_size(), + allocator->get_available_size()); + + free(0, allocator->get_max_alloc_size()); + ASSERT_EQ(free_length, allocator->get_available_size()); + } +} + +TEST_P(allocator_test_t, test_alloc_failure) +{ + uint64_t block_size = 8192; + uint64_t capacity = 1024 * block_size; + + { + init_alloc(block_size, capacity); + allocator->mark_extent_used(0, block_size * 256); + allocator->mark_extent_used(block_size * 512, block_size * 256); + + auto result = allocate(block_size * 512); + ASSERT_EQ(false, result.has_value()); + + free(0, block_size * 256); + allocator->mark_extent_used(0, block_size * 512); + + result = allocate(block_size * 512); + ASSERT_EQ(false, result.has_value()); + } +} + +TEST_P(allocator_test_t, test_random_alloc_verify) +{ + uint64_t block_size = 4096; + uint64_t capacity = 64 * 1024 * block_size; + uint64_t avail = capacity; + interval_set<rbm_abs_addr> alloc_map; + init_alloc(block_size, capacity); + + { + for (int i = 0; i < 256; i++) { + auto addr = get_random_addr(block_size, capacity); + auto size = get_random_addr(block_size, capacity) % (4 << 20); + if (addr + size > capacity || size == 0 || + alloc_map.intersects(addr, size) ) continue; + allocator->mark_extent_used(addr, size); + alloc_map.insert(addr, size); + avail -= size; + } + ASSERT_EQ(avail, allocator->get_available_size()); + + for (auto p : alloc_map) { + free(p.first, p.second); + avail += p.second; + alloc_map.erase(p.first, p.second); + ASSERT_EQ(avail, allocator->get_available_size()); + } + ASSERT_EQ(capacity, allocator->get_available_size()); + + for (int i = 0; i < 100; i++) { + auto addr = get_random_addr(block_size, capacity); + auto size = get_random_addr(block_size, capacity) % (4 << 20); + if (addr + size > capacity || size == 0 || + alloc_map.intersects(addr, size) ) continue; + allocator->mark_extent_used(addr, size); + alloc_map.insert(addr, size); + avail -= size; + } + + for (int i = 0; i < 50; i++) { + free((*alloc_map.begin()).first, (*alloc_map.begin()).second); + avail += (*alloc_map.begin()).second; + alloc_map.erase((*alloc_map.begin()).first, (*alloc_map.begin()).second); + ASSERT_EQ(avail, allocator->get_available_size()); + + auto addr = get_random_addr(block_size, capacity); + auto size = get_random_addr(block_size, capacity) % (4 << 20); + if (addr + size > capacity || size == 0 || + alloc_map.intersects(addr, size) ) continue; + allocator->mark_extent_used(addr, size); + alloc_map.insert(addr, size); + avail -= size; + } + ASSERT_EQ(avail, allocator->get_available_size()); + } +} + +INSTANTIATE_TEST_SUITE_P( + allocator_test, + allocator_test_t, + ::testing::Values("avl")); diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc new file mode 100644 index 000000000..6510cb5d9 --- /dev/null +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -0,0 +1,431 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/onode.h" +#include "crimson/os/seastore/object_data_handler.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +#define MAX_OBJECT_SIZE (16<<20) +#define DEFAULT_OBJECT_DATA_RESERVATION (16<<20) +#define DEFAULT_OBJECT_METADATA_RESERVATION (16<<20) + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +class TestOnode final : public Onode { + onode_layout_t layout; + bool dirty = false; + +public: + TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {} + const onode_layout_t &get_layout() const final { + return layout; + } + onode_layout_t &get_mutable_layout(Transaction &t) final { + dirty = true; + return layout; + } + bool is_alive() const { + return true; + } + bool is_dirty() const { return dirty; } + laddr_t get_hint() const final {return L_ADDR_MIN; } + ~TestOnode() final = default; +}; + +struct object_data_handler_test_t: + public seastar_test_suite_t, + TMTestState { + OnodeRef onode; + + bufferptr known_contents; + extent_len_t size = 0; + + object_data_handler_test_t() {} + + void write(Transaction &t, objaddr_t offset, extent_len_t len, char fill) { + ceph_assert(offset + len <= known_contents.length()); + size = std::max<extent_len_t>(size, offset + len); + memset( + known_contents.c_str() + offset, + fill, + len); + bufferlist bl; + bl.append( + bufferptr( + known_contents, + offset, + len)); + with_trans_intr(t, [&](auto &t) { + return ObjectDataHandler(MAX_OBJECT_SIZE).write( + ObjectDataHandler::context_t{ + *tm, + t, + *onode, + }, + offset, + bl); + }).unsafe_get0(); + } + void write(objaddr_t offset, extent_len_t len, char fill) { + auto t = create_mutate_transaction(); + write(*t, offset, len, fill); + return submit_transaction(std::move(t)); + } + + void truncate(Transaction &t, objaddr_t offset) { + if (size > offset) { + memset( + known_contents.c_str() + offset, + 0, + size - offset); + with_trans_intr(t, [&](auto &t) { + return ObjectDataHandler(MAX_OBJECT_SIZE).truncate( + ObjectDataHandler::context_t{ + *tm, + t, + *onode + }, + offset); + }).unsafe_get0(); + } + size = offset; + } + void truncate(objaddr_t offset) { + auto t = create_mutate_transaction(); + truncate(*t, offset); + return submit_transaction(std::move(t)); + } + + void read(Transaction &t, objaddr_t offset, extent_len_t len) { + bufferlist bl = with_trans_intr(t, [&](auto &t) { + return ObjectDataHandler(MAX_OBJECT_SIZE).read( + ObjectDataHandler::context_t{ + *tm, + t, + *onode + }, + offset, + len); + }).unsafe_get0(); + bufferlist known; + known.append( + bufferptr( + known_contents, + offset, + len)); + EXPECT_EQ(bl.length(), known.length()); + EXPECT_EQ(bl, known); + } + void read(objaddr_t offset, extent_len_t len) { + auto t = create_read_transaction(); + read(*t, offset, len); + } + void read_near(objaddr_t offset, extent_len_t len, extent_len_t fuzz) { + auto fuzzes = std::vector<int32_t>{-1 * (int32_t)fuzz, 0, (int32_t)fuzz}; + for (auto left_fuzz : fuzzes) { + for (auto right_fuzz : fuzzes) { + read(offset + left_fuzz, len - left_fuzz + right_fuzz); + } + } + } + std::list<LBAMappingRef> get_mappings(objaddr_t offset, extent_len_t length) { + auto t = create_mutate_transaction(); + auto ret = with_trans_intr(*t, [&](auto &t) { + return tm->get_pins(t, offset, length); + }).unsafe_get0(); + return ret; + } + + seastar::future<> set_up_fut() final { + onode = new TestOnode( + DEFAULT_OBJECT_DATA_RESERVATION, + DEFAULT_OBJECT_METADATA_RESERVATION); + known_contents = buffer::create(4<<20 /* 4MB */); + memset(known_contents.c_str(), 0, known_contents.length()); + size = 0; + return tm_setup(); + } + + seastar::future<> tear_down_fut() final { + onode.reset(); + size = 0; + return tm_teardown(); + } +}; + +TEST_P(object_data_handler_test_t, single_write) +{ + run_async([this] { + write(1<<20, 8<<10, 'c'); + + read_near(1<<20, 8<<10, 1); + read_near(1<<20, 8<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, multi_write) +{ + run_async([this] { + write((1<<20) - (4<<10), 4<<10, 'a'); + write(1<<20, 4<<10, 'b'); + write((1<<20) + (4<<10), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20)-(4<<10), 12<<10, 1); + read_near((1<<20)-(4<<10), 12<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, write_hole) +{ + run_async([this] { + write((1<<20) - (4<<10), 4<<10, 'a'); + // hole at 1<<20 + write((1<<20) + (4<<10), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20)-(4<<10), 12<<10, 1); + read_near((1<<20)-(4<<10), 12<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, overwrite_single) +{ + run_async([this] { + write((1<<20), 4<<10, 'a'); + write((1<<20), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, overwrite_double) +{ + run_async([this] { + write((1<<20), 4<<10, 'a'); + write((1<<20)+(4<<10), 4<<10, 'c'); + write((1<<20), 8<<10, 'b'); + + read_near(1<<20, 8<<10, 1); + read_near(1<<20, 8<<10, 512); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20) + (4<<10), 4<<10, 1); + read_near((1<<20) + (4<<10), 4<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, overwrite_partial) +{ + run_async([this] { + write((1<<20), 12<<10, 'a'); + read_near(1<<20, 12<<10, 1); + + write((1<<20)+(8<<10), 4<<10, 'b'); + read_near(1<<20, 12<<10, 1); + + write((1<<20)+(4<<10), 4<<10, 'c'); + read_near(1<<20, 12<<10, 1); + + write((1<<20), 4<<10, 'd'); + + read_near(1<<20, 12<<10, 1); + read_near(1<<20, 12<<10, 512); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20) + (4<<10), 4<<10, 1); + read_near((1<<20) + (4<<10), 4<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, unaligned_write) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, (4<<10)+(1<<10), 'a'); + read_near(base-(4<<10), 12<<10, 512); + + base = (1<<20) + (64<<10); + write(base+(1<<10), (4<<10)+(1<<10), 'b'); + read_near(base-(4<<10), 12<<10, 512); + + base = (1<<20) + (128<<10); + write(base-(1<<10), (4<<10)+(2<<20), 'c'); + read_near(base-(4<<10), 12<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, unaligned_overwrite) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, (128<<10) + (16<<10), 'x'); + + write(base, (4<<10)+(1<<10), 'a'); + read_near(base-(4<<10), 12<<10, 2<<10); + + base = (1<<20) + (64<<10); + write(base+(1<<10), (4<<10)+(1<<10), 'b'); + read_near(base-(4<<10), 12<<10, 2<<10); + + base = (1<<20) + (128<<10); + write(base-(1<<10), (4<<10)+(2<<20), 'c'); + read_near(base-(4<<10), 12<<10, 2<<10); + + read(base, (128<<10) + (16<<10)); + }); +} + +TEST_P(object_data_handler_test_t, truncate) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, 8<<10, 'a'); + write(base+(8<<10), 8<<10, 'b'); + write(base+(16<<10), 8<<10, 'c'); + + truncate(base + (32<<10)); + read(base, 64<<10); + + truncate(base + (24<<10)); + read(base, 64<<10); + + truncate(base + (12<<10)); + read(base, 64<<10); + + truncate(base - (12<<10)); + read(base, 64<<10); + }); +} + +TEST_P(object_data_handler_test_t, no_split) { + run_async([this] { + write(0, 8<<10, 'x'); + write(0, 8<<10, 'a'); + + auto pins = get_mappings(0, 8<<10); + EXPECT_EQ(pins.size(), 1); + + read(0, 8<<10); + }); +} + +TEST_P(object_data_handler_test_t, split_left) { + run_async([this] { + write(0, 128<<10, 'x'); + + write(64<<10, 60<<10, 'a'); + + auto pins = get_mappings(0, 128<<10); + EXPECT_EQ(pins.size(), 2); + + size_t res[2] = {0, 64<<10}; + auto base = pins.front()->get_key(); + int i = 0; + for (auto &pin : pins) { + EXPECT_EQ(pin->get_key() - base, res[i]); + i++; + } + read(0, 128<<10); + }); +} + +TEST_P(object_data_handler_test_t, split_right) { + run_async([this] { + write(0, 128<<10, 'x'); + write(4<<10, 60<<10, 'a'); + + auto pins = get_mappings(0, 128<<10); + EXPECT_EQ(pins.size(), 2); + + size_t res[2] = {0, 64<<10}; + auto base = pins.front()->get_key(); + int i = 0; + for (auto &pin : pins) { + EXPECT_EQ(pin->get_key() - base, res[i]); + i++; + } + read(0, 128<<10); + }); +} +TEST_P(object_data_handler_test_t, split_left_right) { + run_async([this] { + write(0, 128<<10, 'x'); + write(48<<10, 32<<10, 'a'); + + auto pins = get_mappings(0, 128<<10); + EXPECT_EQ(pins.size(), 3); + + size_t res[3] = {0, 48<<10, 80<<10}; + auto base = pins.front()->get_key(); + int i = 0; + for (auto &pin : pins) { + EXPECT_EQ(pin->get_key() - base, res[i]); + i++; + } + }); +} +TEST_P(object_data_handler_test_t, multiple_split) { + run_async([this] { + write(0, 128<<10, 'x'); + + auto t = create_mutate_transaction(); + // normal split + write(*t, 120<<10, 4<<10, 'a'); + // not aligned right + write(*t, 4<<10, 5<<10, 'b'); + // split right extent of last split result + write(*t, 32<<10, 4<<10, 'c'); + // non aligned overwrite + write(*t, 13<<10, 4<<10, 'd'); + + write(*t, 64<<10, 32<<10, 'e'); + // not split right + write(*t, 60<<10, 8<<10, 'f'); + + submit_transaction(std::move(t)); + + auto pins = get_mappings(0, 128<<10); + EXPECT_EQ(pins.size(), 10); + + size_t res[10] = {0, 4<<10, 12<<10, 20<<10, 32<<10, + 36<<10, 60<<10, 96<<10, 120<<10, 124<<10}; + auto base = pins.front()->get_key(); + int i = 0; + for (auto &pin : pins) { + EXPECT_EQ(pin->get_key() - base, res[i]); + i++; + } + read(0, 128<<10); + }); +} + +INSTANTIATE_TEST_SUITE_P( + object_data_handler_test, + object_data_handler_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); + + diff --git a/src/test/crimson/seastore/test_omap_manager.cc b/src/test/crimson/seastore/test_omap_manager.cc new file mode 100644 index 000000000..ab2218565 --- /dev/null +++ b/src/test/crimson/seastore/test_omap_manager.cc @@ -0,0 +1,730 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/omap_manager.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace std; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +const int STR_LEN = 50; + +std::string rand_name(const int len) +{ + std::string ret; + ret.reserve(len); + for (int i = 0; i < len; ++i) { + ret.append(1, (char)(rand() % ('z' - '0')) + '0'); + } + return ret; +} + +bufferlist rand_buffer(const int len) { + bufferptr ptr(len); + for (auto i = ptr.c_str(); i < ptr.c_str() + len; ++i) { + *i = (char)rand(); + } + bufferlist bl; + bl.append(ptr); + return bl; +} + +struct omap_manager_test_t : + public seastar_test_suite_t, + TMTestState { + + OMapManagerRef omap_manager; + + omap_manager_test_t() {} + + seastar::future<> set_up_fut() final { + return tm_setup().then([this] { + omap_manager = omap_manager::create_omap_manager(*tm); + return seastar::now(); + }); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown().then([this] { + omap_manager.reset(); + return seastar::now(); + }); + } + + using test_omap_t = std::map<std::string, ceph::bufferlist>; + test_omap_t test_omap_mappings; + + void set_key( + omap_root_t &omap_root, + Transaction &t, + const string &key, + const bufferlist &val) { + with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_set_key(omap_root, t, key, val); + }).unsafe_get0(); + test_omap_mappings[key] = val; + } + + void set_key( + omap_root_t &omap_root, + Transaction &t, + const string &key, + const string &val) { + bufferlist bl; + bl.append(val); + set_key(omap_root, t, key, bl); + } + + std::string set_random_key( + omap_root_t &omap_root, + Transaction &t) { + auto key = rand_name(STR_LEN); + set_key( + omap_root, + t, + key, + rand_buffer(STR_LEN)); + return key; + } + + void get_value( + omap_root_t &omap_root, + Transaction &t, + const string &key) { + auto ret = with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_get_value(omap_root, t, key); + }).unsafe_get0(); + auto iter = test_omap_mappings.find(key); + if (iter == test_omap_mappings.end()) { + EXPECT_FALSE(ret); + } else { + EXPECT_TRUE(ret); + if (ret) { + EXPECT_TRUE(*ret == iter->second); + } + } + } + + void rm_key( + omap_root_t &omap_root, + Transaction &t, + const string &key) { + with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_rm_key(omap_root, t, key); + }).unsafe_get0(); + test_omap_mappings.erase(test_omap_mappings.find(key)); + } + + std::vector<std::string> rm_key_range( + omap_root_t &omap_root, + Transaction &t, + const std::string &first, + const std::string &last) { + logger().debug("rm keys in range {} ~ {}", first, last); + auto config = OMapManager::omap_list_config_t() + .with_max(3000) + .with_inclusive(true, false); + + with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_rm_key_range( + omap_root, t, first, last, config); + }).unsafe_get0(); + + std::vector<std::string> keys; + size_t count = 0; + for (auto iter = test_omap_mappings.begin(); + iter != test_omap_mappings.end(); ) { + if (iter->first >= first && iter->first < last) { + keys.push_back(iter->first); + iter = test_omap_mappings.erase(iter); + count++; + } else { + iter++; + } + if (count == config.max_result_size) { + break; + } + } + return keys; + } + + void list( + const omap_root_t &omap_root, + Transaction &t, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + size_t max = 128, + bool inclusive = false) { + + if (first && last) { + logger().debug("list on {} ~ {}", *first, *last); + } else if (first) { + logger().debug("list on {} ~ end", *first); + } else if (last) { + logger().debug("list on start ~ {}", *last); + } else { + logger().debug("list on start ~ end"); + } + + auto config = OMapManager::omap_list_config_t() + .with_max(max) + .with_inclusive(inclusive, false); + + auto [complete, results] = with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_list(omap_root, t, first, last, config); + }).unsafe_get0(); + + test_omap_t::iterator it, lit; + if (first) { + it = config.first_inclusive ? + test_omap_mappings.lower_bound(*first) : + test_omap_mappings.upper_bound(*first); + } else { + it = test_omap_mappings.begin(); + } + if (last) { + lit = config.last_inclusive ? + test_omap_mappings.upper_bound(*last) : + test_omap_mappings.lower_bound(*last); + } else { + lit = test_omap_mappings.end(); + } + + for (auto &&[k, v]: results) { + EXPECT_NE(it, test_omap_mappings.end()); + if (it == test_omap_mappings.end()) { + return; + } + EXPECT_EQ(k, it->first); + EXPECT_EQ(v, it->second); + it++; + } + if (it == lit) { + EXPECT_TRUE(complete); + } else { + EXPECT_EQ(results.size(), max); + } + } + + void clear( + omap_root_t &omap_root, + Transaction &t) { + with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_clear(omap_root, t); + }).unsafe_get0(); + EXPECT_EQ(omap_root.get_location(), L_ADDR_NULL); + } + + void check_mappings(omap_root_t &omap_root, Transaction &t) { + for (const auto &i: test_omap_mappings){ + get_value(omap_root, t, i.first); + } + } + + void check_mappings(omap_root_t &omap_root) { + auto t = create_read_transaction(); + check_mappings(omap_root, *t); + } + + std::vector<std::string> get_mapped_keys() { + std::vector<std::string> mkeys; + mkeys.reserve(test_omap_mappings.size()); + for (auto &k: test_omap_mappings) { + mkeys.push_back(k.first); + } + return mkeys; + } + + void replay() { + restart(); + omap_manager = omap_manager::create_omap_manager(*tm); + } + + auto initialize() { + auto t = create_mutate_transaction(); + omap_root_t omap_root = with_trans_intr( + *t, + [this](auto &t) { + return omap_manager->initialize_omap(t, L_ADDR_MIN); + }).unsafe_get0(); + submit_transaction(std::move(t)); + return omap_root; + } +}; + +TEST_P(omap_manager_test_t, basic) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + string key = "owner"; + string val = "test"; + + { + auto t = create_mutate_transaction(); + logger().debug("first transaction"); + set_key(omap_root, *t, key, val); + get_value(omap_root, *t, key); + submit_transaction(std::move(t)); + } + { + auto t = create_mutate_transaction(); + logger().debug("second transaction"); + get_value(omap_root, *t, key); + rm_key(omap_root, *t, key); + get_value(omap_root, *t, key); + submit_transaction(std::move(t)); + } + { + auto t = create_mutate_transaction(); + logger().debug("third transaction"); + get_value(omap_root, *t, key); + submit_transaction(std::move(t)); + } + }); +} + +TEST_P(omap_manager_test_t, force_leafnode_split) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 40; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 10; ++j) { + set_random_key(omap_root, *t); + if ((i % 20 == 0) && (j == 5)) { + check_mappings(omap_root, *t); + } + } + logger().debug("force split submit transaction i = {}", i); + submit_transaction(std::move(t)); + check_mappings(omap_root); + } + }); +} + +TEST_P(omap_manager_test_t, force_leafnode_split_merge) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 80; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened split_merge transaction"); + for (unsigned j = 0; j < 5; ++j) { + set_random_key(omap_root, *t); + if ((i % 10 == 0) && (j == 3)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + submit_transaction(std::move(t)); + if (i % 50 == 0) { + check_mappings(omap_root); + } + } + auto mkeys = get_mapped_keys(); + auto t = create_mutate_transaction(); + for (unsigned i = 0; i < mkeys.size(); i++) { + if (i % 3 != 0) { + rm_key(omap_root, *t, mkeys[i]); + } + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + submit_transaction(std::move(t)); + t = create_mutate_transaction(); + } + if (i % 100 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + } + logger().debug("finally submitting transaction "); + submit_transaction(std::move(t)); + }); +} + +TEST_P(omap_manager_test_t, force_leafnode_split_merge_fullandbalanced) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 50; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened split_merge transaction"); + for (unsigned j = 0; j < 5; ++j) { + set_random_key(omap_root, *t); + if ((i % 10 == 0) && (j == 3)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + submit_transaction(std::move(t)); + if (i % 50 == 0) { + check_mappings(omap_root); + } + } + auto mkeys = get_mapped_keys(); + auto t = create_mutate_transaction(); + for (unsigned i = 0; i < mkeys.size(); i++) { + if (30 < i && i < 100) { + rm_key(omap_root, *t, mkeys[i]); + } + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + submit_transaction(std::move(t)); + t = create_mutate_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + if (i == 100) { + break; + } + } + logger().debug("finally submitting transaction "); + submit_transaction(std::move(t)); + check_mappings(omap_root); + }); +} + +TEST_P(omap_manager_test_t, force_split_listkeys_list_rmkey_range_clear) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + string first, last; + for (unsigned i = 0; i < 40; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 10; ++j) { + auto key = set_random_key(omap_root, *t); + if (i == 10) { + first = key; + } + if (i == 30) { + last = key; + if (first > last) { + std::swap(first, last); + } + } + if ((i % 20 == 0) && (j == 5)) { + check_mappings(omap_root, *t); + } + } + logger().debug("force split submit transaction i = {}", i); + submit_transaction(std::move(t)); + check_mappings(omap_root); + } + + std::optional<std::string> first_temp; + std::optional<std::string> last_temp; + { + auto t = create_read_transaction(); + first_temp = std::nullopt; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp, 100); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp, 100, true); + } + + { + auto t = create_read_transaction(); + first_temp = std::nullopt; + last_temp = last; + list(omap_root, *t, first_temp, last_temp, 10240); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = last; + list(omap_root, *t, first_temp, last_temp, 10240, true); + } + + { + auto t = create_read_transaction(); + list(omap_root, *t, first, last, 10240, true); + } + + { + auto t = create_mutate_transaction(); + auto keys = rm_key_range(omap_root, *t, first, last); + for (const auto& key : keys) { + get_value(omap_root, *t, key); + } + submit_transaction(std::move(t)); + } + + { + auto t = create_mutate_transaction(); + clear(omap_root, *t); + submit_transaction(std::move(t)); + } + }); +} + +TEST_P(omap_manager_test_t, force_inner_node_split_list_rmkey_range) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + string first = ""; + string last; + while (cache->get_omap_tree_depth() < 3) { + for (unsigned i = 0; i < 40; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 10; ++j) { + auto key = set_random_key(omap_root, *t); + if (key.compare(first) < 0 || !first.length()) { + first = key; + } + if (i == 10) { + last = key; + } + } + logger().debug("force split submit transaction i = {}", i); + submit_transaction(std::move(t)); + } + } + + std::optional<std::string> first_temp; + std::optional<std::string> last_temp; + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp, 10240); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp, 10240, true); + } + + { + auto t = create_read_transaction(); + first_temp = std::nullopt; + last_temp = last; + list(omap_root, *t, first_temp, last_temp, 10240); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = last; + list(omap_root, *t, first_temp, last_temp, 10240, true); + } + + { + auto t = create_mutate_transaction(); + auto keys = rm_key_range(omap_root, *t, first, last); + for (const auto& key : keys) { + get_value(omap_root, *t, key); + } + submit_transaction(std::move(t)); + } + + { + auto t = create_mutate_transaction(); + clear(omap_root, *t); + submit_transaction(std::move(t)); + } + }); +} + + +TEST_P(omap_manager_test_t, internal_force_split) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 10; i++) { + logger().debug("opened split transaction"); + auto t = create_mutate_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + set_random_key(omap_root, *t); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction i = {}", i); + submit_transaction(std::move(t)); + } + check_mappings(omap_root); + }); +} + +TEST_P(omap_manager_test_t, internal_force_merge_fullandbalanced) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 8; i++) { + logger().debug("opened split transaction"); + auto t = create_mutate_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + set_random_key(omap_root, *t); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + submit_transaction(std::move(t)); + } + auto mkeys = get_mapped_keys(); + auto t = create_mutate_transaction(); + for (unsigned i = 0; i < mkeys.size(); i++) { + rm_key(omap_root, *t, mkeys[i]); + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + submit_transaction(std::move(t)); + t = create_mutate_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + } + logger().debug("finally submitting transaction "); + submit_transaction(std::move(t)); + check_mappings(omap_root); + }); +} + +TEST_P(omap_manager_test_t, replay) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 8; i++) { + logger().debug("opened split transaction"); + auto t = create_mutate_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + set_random_key(omap_root, *t); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction i = {}", i); + submit_transaction(std::move(t)); + } + replay(); + check_mappings(omap_root); + + auto mkeys = get_mapped_keys(); + auto t = create_mutate_transaction(); + for (unsigned i = 0; i < mkeys.size(); i++) { + rm_key(omap_root, *t, mkeys[i]); + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + submit_transaction(std::move(t)); + replay(); + t = create_mutate_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + } + logger().debug("finally submitting transaction "); + submit_transaction(std::move(t)); + replay(); + check_mappings(omap_root); + }); +} + + +TEST_P(omap_manager_test_t, internal_force_split_to_root) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + logger().debug("set big keys"); + for (unsigned i = 0; i < 53; i++) { + auto t = create_mutate_transaction(); + + for (unsigned j = 0; j < 8; ++j) { + set_random_key(omap_root, *t); + } + logger().debug("submitting transaction i = {}", i); + submit_transaction(std::move(t)); + } + logger().debug("set small keys"); + for (unsigned i = 0; i < 100; i++) { + auto t = create_mutate_transaction(); + for (unsigned j = 0; j < 8; ++j) { + set_random_key(omap_root, *t); + } + logger().debug("submitting transaction last"); + submit_transaction(std::move(t)); + } + check_mappings(omap_root); + }); +} + +INSTANTIATE_TEST_SUITE_P( + omap_manager_test, + omap_manager_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/test_randomblock_manager.cc b/src/test/crimson/seastore/test_randomblock_manager.cc new file mode 100644 index 000000000..9ddb7f9ad --- /dev/null +++ b/src/test/crimson/seastore/test_randomblock_manager.cc @@ -0,0 +1,178 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include <random> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/random_block_manager/block_rb_manager.h" +#include "crimson/os/seastore/random_block_manager/rbm_device.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +constexpr uint64_t DEFAULT_TEST_SIZE = 1 << 20; + +struct rbm_test_t : + public seastar_test_suite_t { + std::unique_ptr<BlockRBManager> rbm_manager; + std::unique_ptr<random_block_device::RBMDevice> device; + + struct rbm_transaction { + void add_rbm_allocated_blocks(alloc_delta_t &d) { + allocated_blocks.push_back(d); + } + void clear_rbm_allocated_blocks() { + if (!allocated_blocks.empty()) { + allocated_blocks.clear(); + } + } + const auto &get_rbm_allocated_blocks() { + return allocated_blocks; + } + std::vector<alloc_delta_t> allocated_blocks; + }; + + std::default_random_engine generator; + + uint64_t block_size = 0; + uint64_t size = 0; + + device_config_t config; + + rbm_test_t() = default; + + seastar::future<> set_up_fut() final { + device = random_block_device::create_test_ephemeral( + random_block_device::DEFAULT_TEST_CBJOURNAL_SIZE, DEFAULT_TEST_SIZE); + block_size = device->get_block_size(); + size = device->get_available_size(); + rbm_manager.reset(new BlockRBManager(device.get(), std::string(), false)); + config = get_rbm_ephemeral_device_config(0, 1); + return device->mkfs(config).handle_error(crimson::ct_error::assert_all{} + ).then([this] { + return device->mount().handle_error(crimson::ct_error::assert_all{} + ).then([this] { + return rbm_manager->open().handle_error(crimson::ct_error::assert_all{}); + }); + }); + } + + seastar::future<> tear_down_fut() final { + rbm_manager->close().unsafe_get0(); + device->close().unsafe_get0(); + rbm_manager.reset(); + device.reset(); + return seastar::now(); + } + + auto mkfs() { + return device->mkfs(config).unsafe_get0(); + } + + auto read_rbm_header() { + return device->read_rbm_header(RBM_START_ADDRESS).unsafe_get0(); + } + + auto open() { + device->mount().unsafe_get0(); + return rbm_manager->open().unsafe_get0(); + } + + auto write(uint64_t addr, bufferptr &ptr) { + paddr_t paddr = convert_abs_addr_to_paddr( + addr, + rbm_manager->get_device_id()); + return rbm_manager->write(paddr, ptr).unsafe_get0(); + } + + auto read(uint64_t addr, bufferptr &ptr) { + paddr_t paddr = convert_abs_addr_to_paddr( + addr, + rbm_manager->get_device_id()); + return rbm_manager->read(paddr, ptr).unsafe_get0(); + } + + bufferptr generate_extent(size_t blocks) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + return buffer::ptr(buffer::create(blocks * block_size, contents)); + } + + void close() { + rbm_manager->close().unsafe_get0(); + return; + } + +}; + +TEST_F(rbm_test_t, mkfs_test) +{ + run_async([this] { + auto super = read_rbm_header(); + ASSERT_TRUE( + super.block_size == block_size && + super.size == size + ); + config.spec.id = DEVICE_ID_NULL; + mkfs(); + super = read_rbm_header(); + ASSERT_TRUE( + super.config.spec.id == DEVICE_ID_NULL && + super.size == size + ); + }); +} + +TEST_F(rbm_test_t, open_read_write_test) +{ + run_async([this] { + auto content = generate_extent(1); + { + write( + block_size, + content + ); + auto bp = bufferptr(ceph::buffer::create_page_aligned(block_size)); + read( + block_size, + bp + ); + bufferlist bl; + bufferlist block; + bl.append(bp); + block.append(content); + ASSERT_EQ( + bl.begin().crc32c(bl.length(), 1), + block.begin().crc32c(block.length(), 1)); + } + close(); + open(); + { + auto bp = bufferptr(ceph::buffer::create_page_aligned(block_size)); + read( + block_size, + bp + ); + bufferlist bl; + bufferlist block; + bl.append(bp); + block.append(content); + ASSERT_EQ( + bl.begin().crc32c(bl.length(), 1), + block.begin().crc32c(block.length(), 1)); + } + }); +} + diff --git a/src/test/crimson/seastore/test_seastore.cc b/src/test/crimson/seastore/test_seastore.cc new file mode 100644 index 000000000..63bf4c51f --- /dev/null +++ b/src/test/crimson/seastore/test_seastore.cc @@ -0,0 +1,1268 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <string> +#include <iostream> +#include <sstream> + +#include "test/crimson/gtest_seastar.h" + +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/futurized_collection.h" +#include "crimson/os/seastore/seastore.h" +#include "crimson/os/seastore/onode.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using SeaStoreShard = FuturizedStore::Shard; +using CTransaction = ceph::os::Transaction; +using namespace std; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +ghobject_t make_oid(int i) { + stringstream ss; + ss << "object_" << i; + auto ret = ghobject_t( + hobject_t( + sobject_t(ss.str(), CEPH_NOSNAP))); + ret.set_shard(shard_id_t(shard_id_t::NO_SHARD)); + ret.hobj.nspace = "asdf"; + ret.hobj.pool = 0; + uint32_t reverse_hash = hobject_t::_reverse_bits(0); + ret.hobj.set_bitwise_key_u32(reverse_hash + i * 100); + return ret; +} + +ghobject_t make_temp_oid(int i) { + stringstream ss; + ss << "temp_object_" << i; + auto ret = ghobject_t( + hobject_t( + sobject_t(ss.str(), CEPH_NOSNAP))); + ret.set_shard(shard_id_t(shard_id_t::NO_SHARD)); + ret.hobj.nspace = "hjkl"; + ret.hobj.pool = -2ll; + uint32_t reverse_hash = hobject_t::_reverse_bits(0); + ret.hobj.set_bitwise_key_u32(reverse_hash + i * 100); + return ret; +} + +struct seastore_test_t : + public seastar_test_suite_t, + SeaStoreTestState { + + coll_t coll_name{spg_t{pg_t{0, 0}}}; + CollectionRef coll; + + seastore_test_t() {} + + seastar::future<> set_up_fut() final { + return tm_setup( + ).then([this] { + return sharded_seastore->create_new_collection(coll_name); + }).then([this](auto coll_ref) { + coll = coll_ref; + CTransaction t; + t.create_collection(coll_name, 0); + return sharded_seastore->do_transaction( + coll, + std::move(t)); + }); + } + + seastar::future<> tear_down_fut() final { + coll.reset(); + return tm_teardown(); + } + + void do_transaction(CTransaction &&t) { + return sharded_seastore->do_transaction( + coll, + std::move(t)).get0(); + } + + void set_meta( + const std::string& key, + const std::string& value) { + return seastore->write_meta(key, value).get0(); + } + + std::tuple<int, std::string> get_meta( + const std::string& key) { + return seastore->read_meta(key).get(); + } + + struct object_state_t { + const coll_t cid; + const CollectionRef coll; + const ghobject_t oid; + + std::map<string, bufferlist> omap; + bufferlist contents; + + std::map<snapid_t, bufferlist> clone_contents; + + void touch( + CTransaction &t) { + t.touch(cid, oid); + } + + void touch( + SeaStoreShard &sharded_seastore) { + CTransaction t; + touch(t); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void truncate( + CTransaction &t, + uint64_t off) { + t.truncate(cid, oid, off); + } + + void truncate( + SeaStoreShard &sharded_seastore, + uint64_t off) { + CTransaction t; + truncate(t, off); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + std::map<uint64_t, uint64_t> fiemap( + SeaStoreShard &sharded_seastore, + uint64_t off, + uint64_t len) { + return sharded_seastore.fiemap(coll, oid, off, len).unsafe_get0(); + } + + bufferlist readv( + SeaStoreShard &sharded_seastore, + interval_set<uint64_t>&m) { + return sharded_seastore.readv(coll, oid, m).unsafe_get0(); + } + + void remove( + CTransaction &t) { + t.remove(cid, oid); + t.remove_collection(cid); + } + + void remove( + SeaStoreShard &sharded_seastore) { + CTransaction t; + remove(t); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void set_omap( + CTransaction &t, + const string &key, + const bufferlist &val) { + omap[key] = val; + std::map<string, bufferlist> arg; + arg[key] = val; + t.omap_setkeys( + cid, + oid, + arg); + } + + void set_omap( + SeaStoreShard &sharded_seastore, + const string &key, + const bufferlist &val) { + CTransaction t; + set_omap(t, key, val); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void write( + SeaStoreShard &sharded_seastore, + CTransaction &t, + uint64_t offset, + bufferlist bl) { + bufferlist new_contents; + if (offset > 0 && contents.length()) { + new_contents.substr_of( + contents, + 0, + std::min<size_t>(offset, contents.length()) + ); + } + new_contents.append_zero(offset - new_contents.length()); + new_contents.append(bl); + + auto tail_offset = offset + bl.length(); + if (contents.length() > tail_offset) { + bufferlist tail; + tail.substr_of( + contents, + tail_offset, + contents.length() - tail_offset); + new_contents.append(tail); + } + contents.swap(new_contents); + + t.write( + cid, + oid, + offset, + bl.length(), + bl); + } + + void write( + SeaStoreShard &sharded_seastore, + uint64_t offset, + bufferlist bl) { + CTransaction t; + write(sharded_seastore, t, offset, bl); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void clone( + SeaStoreShard &sharded_seastore, + snapid_t snap) { + ghobject_t coid = oid; + coid.hobj.snap = snap; + CTransaction t; + t.clone(cid, oid, coid); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + clone_contents[snap].reserve(contents.length()); + auto it = contents.begin(); + it.copy_all(clone_contents[snap]); + } + + object_state_t get_clone(snapid_t snap) { + auto coid = oid; + coid.hobj.snap = snap; + auto clone_obj = object_state_t{cid, coll, coid}; + clone_obj.contents.reserve(clone_contents[snap].length()); + auto it = clone_contents[snap].begin(); + it.copy_all(clone_obj.contents); + return clone_obj; + } + + void write( + SeaStoreShard &sharded_seastore, + uint64_t offset, + size_t len, + char fill) { + auto buffer = bufferptr(buffer::create(len)); + ::memset(buffer.c_str(), fill, len); + bufferlist bl; + bl.append(buffer); + write(sharded_seastore, offset, bl); + } + + void zero( + SeaStoreShard &sharded_seastore, + CTransaction &t, + uint64_t offset, + size_t len) { + ceph::buffer::list bl; + bl.append_zero(len); + bufferlist new_contents; + if (offset > 0 && contents.length()) { + new_contents.substr_of( + contents, + 0, + std::min<size_t>(offset, contents.length()) + ); + } + new_contents.append_zero(offset - new_contents.length()); + new_contents.append(bl); + + auto tail_offset = offset + bl.length(); + if (contents.length() > tail_offset) { + bufferlist tail; + tail.substr_of( + contents, + tail_offset, + contents.length() - tail_offset); + new_contents.append(tail); + } + contents.swap(new_contents); + + t.zero( + cid, + oid, + offset, + len); + } + + void zero( + SeaStoreShard &sharded_seastore, + uint64_t offset, + size_t len) { + CTransaction t; + zero(sharded_seastore, t, offset, len); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void read( + SeaStoreShard &sharded_seastore, + uint64_t offset, + uint64_t len) { + bufferlist to_check; + if (contents.length() >= offset) { + to_check.substr_of( + contents, + offset, + std::min(len, (uint64_t)contents.length())); + } + auto ret = sharded_seastore.read( + coll, + oid, + offset, + len).unsafe_get0(); + EXPECT_EQ(ret.length(), to_check.length()); + EXPECT_EQ(ret, to_check); + } + + void check_size(SeaStoreShard &sharded_seastore) { + auto st = sharded_seastore.stat( + coll, + oid).get0(); + EXPECT_EQ(contents.length(), st.st_size); + } + + void set_attr( + SeaStoreShard &sharded_seastore, + std::string key, + bufferlist& val) { + CTransaction t; + t.setattr(cid, oid, key, val); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void rm_attr( + SeaStoreShard &sharded_seastore, + std::string key) { + CTransaction t; + t.rmattr(cid, oid, key); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void rm_attrs( + SeaStoreShard &sharded_seastore) { + CTransaction t; + t.rmattrs(cid, oid); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + SeaStoreShard::attrs_t get_attrs( + SeaStoreShard &sharded_seastore) { + return sharded_seastore.get_attrs(coll, oid) + .handle_error(SeaStoreShard::get_attrs_ertr::discard_all{}) + .get(); + } + + ceph::bufferlist get_attr( + SeaStoreShard& sharded_seastore, + std::string_view name) { + return sharded_seastore.get_attr(coll, oid, name) + .handle_error( + SeaStoreShard::get_attr_errorator::discard_all{}) + .get(); + } + + void check_omap_key( + SeaStoreShard &sharded_seastore, + const string &key) { + std::set<string> to_check; + to_check.insert(key); + auto result = sharded_seastore.omap_get_values( + coll, + oid, + to_check).unsafe_get0(); + if (result.empty()) { + EXPECT_EQ(omap.find(key), omap.end()); + } else { + auto iter = omap.find(key); + EXPECT_NE(iter, omap.end()); + if (iter != omap.end()) { + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(iter->second, result.begin()->second); + } + } + } + + void check_omap(SeaStoreShard &sharded_seastore) { + auto refiter = omap.begin(); + std::optional<std::string> start; + while(true) { + auto [done, kvs] = sharded_seastore.omap_get_values( + coll, + oid, + start).unsafe_get0(); + auto iter = kvs.begin(); + while (true) { + if ((done && iter == kvs.end()) && refiter == omap.end()) { + return; // finished + } else if (!done && iter == kvs.end()) { + break; // reload kvs + } + if (iter == kvs.end() || refiter->first < iter->first) { + logger().debug( + "check_omap: missing omap key {}", + refiter->first); + GTEST_FAIL() << "missing omap key " << refiter->first; + ++refiter; + } else if (refiter == omap.end() || refiter->first > iter->first) { + logger().debug( + "check_omap: extra omap key {}", + iter->first); + GTEST_FAIL() << "extra omap key " << iter->first; + ++iter; + } else { + EXPECT_EQ(iter->second, refiter->second); + ++iter; + ++refiter; + } + } + if (!done) { + start = kvs.rbegin()->first; + } + } + } + }; + + map<ghobject_t, object_state_t> test_objects; + object_state_t &get_object( + const ghobject_t &oid) { + return test_objects.emplace( + std::make_pair( + oid, + object_state_t{coll_name, coll, oid})).first->second; + } + + void remove_object( + object_state_t &sobj) { + + sobj.remove(*sharded_seastore); + auto erased = test_objects.erase(sobj.oid); + ceph_assert(erased == 1); + } + + void validate_objects() const { + std::vector<ghobject_t> oids; + for (auto& [oid, obj] : test_objects) { + oids.emplace_back(oid); + } + auto ret = sharded_seastore->list_objects( + coll, + ghobject_t(), + ghobject_t::get_max(), + std::numeric_limits<uint64_t>::max()).get0(); + EXPECT_EQ(std::get<1>(ret), ghobject_t::get_max()); + EXPECT_EQ(std::get<0>(ret), oids); + } + + // create temp objects + struct bound_t { + enum class type_t { + MIN, + MAX, + TEMP, + TEMP_END, + NORMAL_BEGIN, + NORMAL, + } type = type_t::MIN; + unsigned index = 0; + + static bound_t get_temp(unsigned index) { + return bound_t{type_t::TEMP, index}; + } + static bound_t get_normal(unsigned index) { + return bound_t{type_t::NORMAL, index}; + } + static bound_t get_min() { return bound_t{type_t::MIN}; } + static bound_t get_max() { return bound_t{type_t::MAX}; } + static bound_t get_temp_end() { return bound_t{type_t::TEMP_END}; } + static bound_t get_normal_begin() { + return bound_t{type_t::NORMAL_BEGIN}; + } + + ghobject_t get_oid(SeaStore &seastore, CollectionRef &coll) const { + switch (type) { + case type_t::MIN: + return ghobject_t(); + case type_t::MAX: + return ghobject_t::get_max(); + case type_t::TEMP: + return make_temp_oid(index); + case type_t::TEMP_END: + return seastore.get_objs_range(coll, 0).temp_end; + case type_t::NORMAL_BEGIN: + return seastore.get_objs_range(coll, 0).obj_begin; + case type_t::NORMAL: + return make_oid(index); + default: + assert(0 == "impossible"); + return ghobject_t(); + } + } + }; + struct list_test_case_t { + bound_t left; + bound_t right; + unsigned limit; + }; + // list_test_cases_t :: [<limit, left_bound, right_bound>] + using list_test_cases_t = std::list<std::tuple<unsigned, bound_t, bound_t>>; + + void test_list( + unsigned temp_to_create, /// create temp 0..temp_to_create-1 + unsigned normal_to_create, /// create normal 0..normal_to_create-1 + list_test_cases_t cases /// cases to test + ) { + std::vector<ghobject_t> objs; + + // setup + auto create = [this, &objs](ghobject_t hoid) { + objs.emplace_back(std::move(hoid)); + auto &obj = get_object(objs.back()); + obj.touch(*sharded_seastore); + obj.check_size(*sharded_seastore); + }; + for (unsigned i = 0; i < temp_to_create; ++i) { + create(make_temp_oid(i)); + } + for (unsigned i = 0; i < normal_to_create; ++i) { + create(make_oid(i)); + } + + // list and validate each case + for (auto [limit, in_left_bound, in_right_bound] : cases) { + auto left_bound = in_left_bound.get_oid(*seastore, coll); + auto right_bound = in_right_bound.get_oid(*seastore, coll); + + // get results from seastore + auto [listed, next] = sharded_seastore->list_objects( + coll, left_bound, right_bound, limit).get0(); + + // compute correct answer + auto correct_begin = std::find_if( + objs.begin(), objs.end(), + [&left_bound](const auto &in) { + return in >= left_bound; + }); + unsigned count = 0; + auto correct_end = correct_begin; + for (; count < limit && + correct_end != objs.end() && + *correct_end < right_bound; + ++correct_end, ++count); + + // validate return -- [correct_begin, correct_end) should match listed + decltype(objs) correct_listed(correct_begin, correct_end); + EXPECT_EQ(listed, correct_listed); + + if (count < limit) { + if (correct_end == objs.end()) { + // if listed extends to end of range, next should be >= right_bound + EXPECT_GE(next, right_bound); + } else { + // next <= *correct_end since *correct_end is the next object to list + EXPECT_LE(next, *correct_end); + // next > *(correct_end - 1) since we already listed it + EXPECT_GT(next, *(correct_end - 1)); + } + } else { + // we listed exactly limit objects + EXPECT_EQ(limit, listed.size()); + + EXPECT_GE(next, left_bound); + if (limit == 0) { + if (correct_end != objs.end()) { + // next <= *correct_end since *correct_end is the next object to list + EXPECT_LE(next, *correct_end); + } + } else { + // next > *(correct_end - 1) since we already listed it + EXPECT_GT(next, *(correct_end - 1)); + } + } + } + + // teardown + for (auto &&hoid : objs) { get_object(hoid).remove(*sharded_seastore); } + } +}; + +template <typename T, typename V> +auto contains(const T &t, const V &v) { + return std::find( + t.begin(), + t.end(), + v) != t.end(); +} + +TEST_P(seastore_test_t, collection_create_list_remove) +{ + run_async([this] { + coll_t test_coll{spg_t{pg_t{1, 0}}}; + { + sharded_seastore->create_new_collection(test_coll).get0(); + { + CTransaction t; + t.create_collection(test_coll, 4); + do_transaction(std::move(t)); + } + auto colls_cores = seastore->list_collections().get0(); + std::vector<coll_t> colls; + colls.resize(colls_cores.size()); + std::transform( + colls_cores.begin(), colls_cores.end(), colls.begin(), + [](auto p) { return p.first; }); + EXPECT_EQ(colls.size(), 2); + EXPECT_TRUE(contains(colls, coll_name)); + EXPECT_TRUE(contains(colls, test_coll)); + } + + { + { + CTransaction t; + t.remove_collection(test_coll); + do_transaction(std::move(t)); + } + auto colls_cores = seastore->list_collections().get0(); + std::vector<coll_t> colls; + colls.resize(colls_cores.size()); + std::transform( + colls_cores.begin(), colls_cores.end(), colls.begin(), + [](auto p) { return p.first; }); + EXPECT_EQ(colls.size(), 1); + EXPECT_TRUE(contains(colls, coll_name)); + } + }); +} + +TEST_P(seastore_test_t, meta) { + run_async([this] { + set_meta("key1", "value1"); + set_meta("key2", "value2"); + + const auto [ret1, value1] = get_meta("key1"); + const auto [ret2, value2] = get_meta("key2"); + EXPECT_EQ(ret1, 0); + EXPECT_EQ(ret2, 0); + EXPECT_EQ(value1, "value1"); + EXPECT_EQ(value2, "value2"); + }); +} + +TEST_P(seastore_test_t, touch_stat_list_remove) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + test_obj.check_size(*sharded_seastore); + validate_objects(); + + remove_object(test_obj); + validate_objects(); + }); +} + +using bound_t = seastore_test_t::bound_t; +constexpr unsigned MAX_LIMIT = std::numeric_limits<unsigned>::max(); +static const seastore_test_t::list_test_cases_t temp_list_cases{ + // list all temp, maybe overlap to normal on right + {MAX_LIMIT, bound_t::get_min() , bound_t::get_max() }, + { 5, bound_t::get_min() , bound_t::get_temp_end()}, + { 6, bound_t::get_min() , bound_t::get_temp_end()}, + { 6, bound_t::get_min() , bound_t::get_max() }, + + // list temp starting at min up to but not past boundary + { 3, bound_t::get_min() , bound_t::get_temp(3) }, + { 3, bound_t::get_min() , bound_t::get_temp(4) }, + { 3, bound_t::get_min() , bound_t::get_temp(2) }, + + // list temp starting > min up to or past boundary + { 3, bound_t::get_temp(2) , bound_t::get_temp_end()}, + { 3, bound_t::get_temp(2) , bound_t::get_max() }, + { 3, bound_t::get_temp(3) , bound_t::get_max() }, + { 3, bound_t::get_temp(1) , bound_t::get_max() }, + + // 0 limit + { 0, bound_t::get_min() , bound_t::get_max() }, + { 0, bound_t::get_temp(1) , bound_t::get_max() }, + { 0, bound_t::get_temp_end(), bound_t::get_max() }, +}; + +TEST_P(seastore_test_t, list_objects_temp_only) +{ + run_async([this] { test_list(5, 0, temp_list_cases); }); +} + +TEST_P(seastore_test_t, list_objects_temp_overlap) +{ + run_async([this] { test_list(5, 5, temp_list_cases); }); +} + +static const seastore_test_t::list_test_cases_t normal_list_cases{ + // list all normal, maybe overlap to temp on left + {MAX_LIMIT, bound_t::get_min() , bound_t::get_max() }, + { 5, bound_t::get_normal_begin(), bound_t::get_max() }, + { 6, bound_t::get_normal_begin(), bound_t::get_max() }, + { 6, bound_t::get_temp(4) , bound_t::get_max() }, + + // list normal starting <= normal_begin < end + { 3, bound_t::get_normal_begin(), bound_t::get_normal(3)}, + { 3, bound_t::get_normal_begin(), bound_t::get_normal(4)}, + { 3, bound_t::get_normal_begin(), bound_t::get_normal(2)}, + { 3, bound_t::get_temp(5) , bound_t::get_normal(2)}, + { 3, bound_t::get_temp(4) , bound_t::get_normal(2)}, + + // list normal starting > min up to end + { 3, bound_t::get_normal(2) , bound_t::get_max() }, + { 3, bound_t::get_normal(2) , bound_t::get_max() }, + { 3, bound_t::get_normal(3) , bound_t::get_max() }, + { 3, bound_t::get_normal(1) , bound_t::get_max() }, + + // 0 limit + { 0, bound_t::get_min() , bound_t::get_max() }, + { 0, bound_t::get_normal(1) , bound_t::get_max() }, + { 0, bound_t::get_normal_begin(), bound_t::get_max() }, +}; + +TEST_P(seastore_test_t, list_objects_normal_only) +{ + run_async([this] { test_list(5, 0, normal_list_cases); }); +} + +TEST_P(seastore_test_t, list_objects_normal_overlap) +{ + run_async([this] { test_list(5, 5, normal_list_cases); }); +} + +bufferlist make_bufferlist(size_t len) { + bufferptr ptr(len); + bufferlist bl; + bl.append(ptr); + return bl; +} + +TEST_P(seastore_test_t, omap_test_simple) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + test_obj.set_omap( + *sharded_seastore, + "asdf", + make_bufferlist(128)); + test_obj.check_omap_key( + *sharded_seastore, + "asdf"); + }); +} + +TEST_P(seastore_test_t, clone_aligned_extents) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.write(*sharded_seastore, 0, 4096, 'a'); + + test_obj.clone(*sharded_seastore, 10); + std::cout << "reading origin after clone10" << std::endl; + test_obj.read(*sharded_seastore, 0, 4096); + test_obj.write(*sharded_seastore, 0, 4096, 'b'); + test_obj.write(*sharded_seastore, 4096, 4096, 'c'); + std::cout << "reading origin after clone10 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 8192); + auto clone_obj10 = test_obj.get_clone(10); + std::cout << "reading clone after clone10 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 8192); + + test_obj.clone(*sharded_seastore, 20); + std::cout << "reading origin after clone20" << std::endl; + test_obj.read(*sharded_seastore, 0, 4096); + test_obj.write(*sharded_seastore, 0, 4096, 'd'); + test_obj.write(*sharded_seastore, 4096, 4096, 'e'); + test_obj.write(*sharded_seastore, 8192, 4096, 'f'); + std::cout << "reading origin after clone20 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 12288); + auto clone_obj20 = test_obj.get_clone(20); + std::cout << "reading clone after clone20 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 12288); + clone_obj20.read(*sharded_seastore, 0, 12288); + }); +} + +TEST_P(seastore_test_t, clone_unaligned_extents) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.write(*sharded_seastore, 0, 8192, 'a'); + test_obj.write(*sharded_seastore, 8192, 8192, 'b'); + test_obj.write(*sharded_seastore, 16384, 8192, 'c'); + + test_obj.clone(*sharded_seastore, 10); + test_obj.write(*sharded_seastore, 4096, 12288, 'd'); + std::cout << "reading origin after clone10 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 24576); + + auto clone_obj10 = test_obj.get_clone(10); + std::cout << "reading clone after clone10 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 24576); + + test_obj.clone(*sharded_seastore, 20); + test_obj.write(*sharded_seastore, 8192, 12288, 'e'); + std::cout << "reading origin after clone20 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 24576); + + auto clone_obj20 = test_obj.get_clone(20); + std::cout << "reading clone after clone20 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 24576); + clone_obj20.read(*sharded_seastore, 0, 24576); + + test_obj.write(*sharded_seastore, 0, 24576, 'f'); + test_obj.clone(*sharded_seastore, 30); + test_obj.write(*sharded_seastore, 8192, 4096, 'g'); + std::cout << "reading origin after clone30 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 24576); + + auto clone_obj30 = test_obj.get_clone(30); + std::cout << "reading clone after clone30 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 24576); + clone_obj20.read(*sharded_seastore, 0, 24576); + clone_obj30.read(*sharded_seastore, 0, 24576); + }); +} + +TEST_P(seastore_test_t, attr) +{ + run_async([this] { + auto& test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + { + std::string oi("asdfasdfasdf"); + bufferlist bl; + encode(oi, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + std::string ss("fdsfdsfs"); + bl.clear(); + encode(ss, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + std::string test_val("ssssssssssss"); + bl.clear(); + encode(test_val, bl); + test_obj.set_attr(*sharded_seastore, "test_key", bl); + + auto attrs = test_obj.get_attrs(*sharded_seastore); + std::string oi2; + bufferlist bl2 = attrs[OI_ATTR]; + decode(oi2, bl2); + bl2.clear(); + bl2 = attrs[SS_ATTR]; + std::string ss2; + decode(ss2, bl2); + std::string test_val2; + bl2.clear(); + bl2 = attrs["test_key"]; + decode(test_val2, bl2); + EXPECT_EQ(ss, ss2); + EXPECT_EQ(oi, oi2); + EXPECT_EQ(test_val, test_val2); + + bl2.clear(); + bl2 = test_obj.get_attr(*sharded_seastore, "test_key"); + test_val2.clear(); + decode(test_val2, bl2); + EXPECT_EQ(test_val, test_val2); + //test rm_attrs + test_obj.rm_attrs(*sharded_seastore); + attrs = test_obj.get_attrs(*sharded_seastore); + EXPECT_EQ(attrs.find(OI_ATTR), attrs.end()); + EXPECT_EQ(attrs.find(SS_ATTR), attrs.end()); + EXPECT_EQ(attrs.find("test_key"), attrs.end()); + + std::cout << "test_key passed" << std::endl; + //create OI_ATTR with len > onode_layout_t::MAX_OI_LENGTH, rm OI_ATTR + //create SS_ATTR with len > onode_layout_t::MAX_SS_LENGTH, rm SS_ATTR + char oi_array[onode_layout_t::MAX_OI_LENGTH + 1] = {'a'}; + std::string oi_str(&oi_array[0], sizeof(oi_array)); + bl.clear(); + encode(oi_str, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + char ss_array[onode_layout_t::MAX_SS_LENGTH + 1] = {'b'}; + std::string ss_str(&ss_array[0], sizeof(ss_array)); + bl.clear(); + encode(ss_str, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + attrs = test_obj.get_attrs(*sharded_seastore); + bl2.clear(); + bl2 = attrs[OI_ATTR]; + std::string oi_str2; + decode(oi_str2, bl2); + EXPECT_EQ(oi_str, oi_str2); + + bl2.clear(); + bl2 = attrs[SS_ATTR]; + std::string ss_str2; + decode(ss_str2, bl2); + EXPECT_EQ(ss_str, ss_str2); + + bl2.clear(); + ss_str2.clear(); + bl2 = test_obj.get_attr(*sharded_seastore, SS_ATTR); + decode(ss_str2, bl2); + EXPECT_EQ(ss_str, ss_str2); + + bl2.clear(); + oi_str2.clear(); + bl2 = test_obj.get_attr(*sharded_seastore, OI_ATTR); + decode(oi_str2, bl2); + EXPECT_EQ(oi_str, oi_str2); + + test_obj.rm_attr(*sharded_seastore, OI_ATTR); + test_obj.rm_attr(*sharded_seastore, SS_ATTR); + + attrs = test_obj.get_attrs(*sharded_seastore); + EXPECT_EQ(attrs.find(OI_ATTR), attrs.end()); + EXPECT_EQ(attrs.find(SS_ATTR), attrs.end()); + } + { + //create OI_ATTR with len <= onode_layout_t::MAX_OI_LENGTH, rm OI_ATTR + //create SS_ATTR with len <= onode_layout_t::MAX_SS_LENGTH, rm SS_ATTR + std::string oi("asdfasdfasdf"); + bufferlist bl; + encode(oi, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + std::string ss("f"); + bl.clear(); + encode(ss, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + std::string test_val("ssssssssssss"); + bl.clear(); + encode(test_val, bl); + test_obj.set_attr(*sharded_seastore, "test_key", bl); + + auto attrs = test_obj.get_attrs(*sharded_seastore); + std::string oi2; + bufferlist bl2 = attrs[OI_ATTR]; + decode(oi2, bl2); + bl2.clear(); + bl2 = attrs[SS_ATTR]; + std::string ss2; + decode(ss2, bl2); + std::string test_val2; + bl2.clear(); + bl2 = attrs["test_key"]; + decode(test_val2, bl2); + EXPECT_EQ(ss, ss2); + EXPECT_EQ(oi, oi2); + EXPECT_EQ(test_val, test_val2); + + test_obj.rm_attr(*sharded_seastore, OI_ATTR); + test_obj.rm_attr(*sharded_seastore, SS_ATTR); + test_obj.rm_attr(*sharded_seastore, "test_key"); + + attrs = test_obj.get_attrs(*sharded_seastore); + EXPECT_EQ(attrs.find(OI_ATTR), attrs.end()); + EXPECT_EQ(attrs.find(SS_ATTR), attrs.end()); + EXPECT_EQ(attrs.find("test_key"), attrs.end()); + } + { + // create OI_ATTR with len > onode_layout_t::MAX_OI_LENGTH, then + // overwrite it with another OI_ATTR len of which < onode_layout_t::MAX_OI_LENGTH + // create SS_ATTR with len > onode_layout_t::MAX_SS_LENGTH, then + // overwrite it with another SS_ATTR len of which < onode_layout_t::MAX_SS_LENGTH + char oi_array[onode_layout_t::MAX_OI_LENGTH + 1] = {'a'}; + std::string oi(&oi_array[0], sizeof(oi_array)); + bufferlist bl; + encode(oi, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + oi = "asdfasdfasdf"; + bl.clear(); + encode(oi, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + char ss_array[onode_layout_t::MAX_SS_LENGTH + 1] = {'b'}; + std::string ss(&ss_array[0], sizeof(ss_array)); + bl.clear(); + encode(ss, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + ss = "f"; + bl.clear(); + encode(ss, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + auto attrs = test_obj.get_attrs(*sharded_seastore); + std::string oi2, ss2; + bufferlist bl2 = attrs[OI_ATTR]; + decode(oi2, bl2); + bl2.clear(); + bl2 = attrs[SS_ATTR]; + decode(ss2, bl2); + EXPECT_EQ(oi, oi2); + EXPECT_EQ(ss, ss2); + } + }); +} + +TEST_P(seastore_test_t, omap_test_iterator) +{ + run_async([this] { + auto make_key = [](unsigned i) { + std::stringstream ss; + ss << "key" << i; + return ss.str(); + }; + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + for (unsigned i = 0; i < 20; ++i) { + test_obj.set_omap( + *sharded_seastore, + make_key(i), + make_bufferlist(128)); + } + test_obj.check_omap(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, object_data_omap_remove) +{ + run_async([this] { + auto make_key = [](unsigned i) { + std::stringstream ss; + ss << "key" << i; + return ss.str(); + }; + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + for (unsigned i = 0; i < 1024; ++i) { + test_obj.set_omap( + *sharded_seastore, + make_key(i), + make_bufferlist(128)); + } + test_obj.check_omap(*sharded_seastore); + + for (uint64_t i = 0; i < 16; i++) { + test_obj.write( + *sharded_seastore, + 4096 * i, + 4096, + 'a'); + } + test_obj.remove(*sharded_seastore); + }); +} + + +TEST_P(seastore_test_t, simple_extent_test) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.write( + *sharded_seastore, + 1024, + 1024, + 'a'); + test_obj.read( + *sharded_seastore, + 1024, + 1024); + test_obj.check_size(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, fiemap_empty) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + test_obj.truncate(*sharded_seastore, 100000); + + std::map<uint64_t, uint64_t> m; + m = test_obj.fiemap(*sharded_seastore, 0, 100000); + EXPECT_TRUE(m.empty()); + + test_obj.remove(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, fiemap_holes) +{ + run_async([this] { + const uint64_t MAX_EXTENTS = 100; + + // large enough to ensure that seastore will allocate each write seperately + const uint64_t SKIP_STEP = 16 << 10; + auto &test_obj = get_object(make_oid(0)); + bufferlist bl; + bl.append("foo"); + + test_obj.touch(*sharded_seastore); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + test_obj.write(*sharded_seastore, SKIP_STEP * i, bl); + } + + { // fiemap test from 0 to SKIP_STEP * (MAX_EXTENTS - 1) + 3 + auto m = test_obj.fiemap( + *sharded_seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3); + ASSERT_EQ(m.size(), MAX_EXTENTS); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + ASSERT_TRUE(m.count(SKIP_STEP * i)); + ASSERT_GE(m[SKIP_STEP * i], bl.length()); + } + } + + { // fiemap test from SKIP_STEP to SKIP_STEP * (MAX_EXTENTS - 2) + 3 + auto m = test_obj.fiemap( + *sharded_seastore, SKIP_STEP, SKIP_STEP * (MAX_EXTENTS - 3) + 3); + ASSERT_EQ(m.size(), MAX_EXTENTS - 2); + for (uint64_t i = 1; i < MAX_EXTENTS - 1; i++) { + ASSERT_TRUE(m.count(SKIP_STEP * i)); + ASSERT_GE(m[SKIP_STEP * i], bl.length()); + } + } + + { // fiemap test SKIP_STEP + 1 to 2 * SKIP_STEP + 1 (partial overlap) + auto m = test_obj.fiemap( + *sharded_seastore, SKIP_STEP + 1, SKIP_STEP + 1); + ASSERT_EQ(m.size(), 2); + ASSERT_EQ(m.begin()->first, SKIP_STEP + 1); + ASSERT_GE(m.begin()->second, bl.length()); + ASSERT_LE(m.rbegin()->first, (2 * SKIP_STEP) + 1); + ASSERT_EQ(m.rbegin()->first + m.rbegin()->second, 2 * SKIP_STEP + 2); + } + + test_obj.remove(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, sparse_read) +{ + run_async([this] { + const uint64_t MAX_EXTENTS = 100; + const uint64_t SKIP_STEP = 16 << 10; + auto &test_obj = get_object(make_oid(0)); + bufferlist wbl; + wbl.append("foo"); + + test_obj.touch(*sharded_seastore); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + test_obj.write(*sharded_seastore, SKIP_STEP * i, wbl); + } + interval_set<uint64_t> m; + m = interval_set<uint64_t>( + test_obj.fiemap(*sharded_seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3)); + ASSERT_TRUE(!m.empty()); + uint64_t off = 0; + auto rbl = test_obj.readv(*sharded_seastore, m); + + for (auto &&miter : m) { + bufferlist subl; + subl.substr_of(rbl, off, std::min(miter.second, uint64_t(wbl.length()))); + ASSERT_TRUE(subl.contents_equal(wbl)); + off += miter.second; + } + test_obj.remove(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, zero) +{ + run_async([this] { + auto test_zero = [this]( + // [(off, len, repeat)] + std::vector<std::tuple<uint64_t, uint64_t, uint64_t>> writes, + uint64_t zero_off, uint64_t zero_len) { + + // Test zero within a block + auto &test_obj = get_object(make_oid(0)); + uint64_t size = 0; + for (auto &[off, len, repeat]: writes) { + for (decltype(repeat) i = 0; i < repeat; ++i) { + test_obj.write(*sharded_seastore, off + (len * repeat), len, 'a'); + } + size = off + (len * (repeat + 1)); + } + test_obj.read( + *sharded_seastore, + 0, + size); + test_obj.check_size(*sharded_seastore); + test_obj.zero(*sharded_seastore, zero_off, zero_len); + test_obj.read( + *sharded_seastore, + 0, + size); + test_obj.check_size(*sharded_seastore); + remove_object(test_obj); + }; + + const uint64_t BS = 4<<10; + + // Test zero within a block + test_zero( + {{1<<10, 1<<10, 1}}, + 1124, 200); + + // Multiple writes, partial on left, partial on right. + test_zero( + {{BS, BS, 10}}, + BS + 128, + BS * 4); + + // Single large write, block boundary on right, partial on left. + test_zero( + {{BS, BS * 10, 1}}, + BS + 128, + (BS * 4) - 128); + + // Multiple writes, block boundary on left, partial on right. + test_zero( + {{BS, BS, 10}}, + BS, + (BS * 4) + 128); + }); +} +INSTANTIATE_TEST_SUITE_P( + seastore_test, + seastore_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/test_seastore_cache.cc b/src/test/crimson/seastore/test_seastore_cache.cc new file mode 100644 index 000000000..b249d27e4 --- /dev/null +++ b/src/test/crimson/seastore/test_seastore_cache.cc @@ -0,0 +1,260 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include "crimson/common/log.h" +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct cache_test_t : public seastar_test_suite_t { + segment_manager::EphemeralSegmentManagerRef segment_manager; + ExtentPlacementManagerRef epm; + CacheRef cache; + paddr_t current; + journal_seq_t seq = JOURNAL_SEQ_MIN; + + cache_test_t() = default; + + seastar::future<paddr_t> submit_transaction( + TransactionRef t) { + auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL); + + bufferlist bl; + for (auto &&block : record.extents) { + bl.append(block.bl); + } + + ceph_assert((segment_off_t)bl.length() < + segment_manager->get_segment_size()); + if (current.as_seg_paddr().get_segment_off() + (segment_off_t)bl.length() > + segment_manager->get_segment_size()) + current = paddr_t::make_seg_paddr( + segment_id_t( + current.as_seg_paddr().get_segment_id().device_id(), + current.as_seg_paddr().get_segment_id().device_segment_id() + 1), + 0); + + auto prev = current; + current.as_seg_paddr().set_segment_off( + current.as_seg_paddr().get_segment_off() + + bl.length()); + return segment_manager->segment_write( + prev, + std::move(bl), + true + ).safe_then( + [this, prev, t=std::move(t)]() mutable { + cache->complete_commit(*t, prev, seq /* TODO */); + return prev; + }, + crimson::ct_error::all_same_way([](auto e) { + ASSERT_FALSE("failed to submit"); + }) + ); + } + + auto get_transaction() { + return cache->create_transaction( + Transaction::src_t::MUTATE, "test_cache", false); + } + + template <typename T, typename... Args> + auto get_extent(Transaction &t, Args&&... args) { + return with_trans_intr( + t, + [this](auto &&... args) { + return cache->get_extent<T>(args...); + }, + std::forward<Args>(args)...); + } + + seastar::future<> set_up_fut() final { + segment_manager = segment_manager::create_test_ephemeral(); + return segment_manager->init( + ).safe_then([this] { + return segment_manager->mkfs( + segment_manager::get_ephemeral_device_config(0, 1, 0)); + }).safe_then([this] { + epm.reset(new ExtentPlacementManager()); + cache.reset(new Cache(*epm)); + current = paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0); + epm->test_init_no_background(segment_manager.get()); + return seastar::do_with( + get_transaction(), + [this](auto &ref_t) { + cache->init(); + return with_trans_intr(*ref_t, [&](auto &t) { + return cache->mkfs(t); + }).safe_then([this, &ref_t] { + return submit_transaction(std::move(ref_t) + ).then([](auto p) {}); + }); + }); + }).handle_error( + crimson::ct_error::all_same_way([](auto e) { + ASSERT_FALSE("failed to submit"); + }) + ); + } + + seastar::future<> tear_down_fut() final { + return cache->close( + ).safe_then([this] { + segment_manager.reset(); + epm.reset(); + cache.reset(); + }).handle_error( + Cache::close_ertr::assert_all{} + ); + } +}; + +TEST_F(cache_test_t, test_addr_fixup) +{ + run_async([this] { + paddr_t addr; + int csum = 0; + { + auto t = get_transaction(); + auto extent = cache->alloc_new_extent<TestBlockPhysical>( + *t, + TestBlockPhysical::SIZE, + placement_hint_t::HOT, + 0); + extent->set_contents('c'); + csum = extent->get_crc32c(); + submit_transaction(std::move(t)).get0(); + addr = extent->get_paddr(); + } + { + auto t = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_EQ(extent->get_paddr(), addr); + ASSERT_EQ(extent->get_crc32c(), csum); + } + }); +} + +TEST_F(cache_test_t, test_dirty_extent) +{ + run_async([this] { + paddr_t addr; + int csum = 0; + int csum2 = 0; + { + // write out initial test block + auto t = get_transaction(); + auto extent = cache->alloc_new_extent<TestBlockPhysical>( + *t, + TestBlockPhysical::SIZE, + placement_hint_t::HOT, + 0); + extent->set_contents('c'); + csum = extent->get_crc32c(); + auto reladdr = extent->get_paddr(); + ASSERT_TRUE(reladdr.is_relative()); + { + // test that read with same transaction sees new block though + // uncommitted + auto extent = get_extent<TestBlockPhysical>( + *t, + reladdr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_TRUE(extent->is_clean()); + ASSERT_TRUE(extent->is_pending()); + ASSERT_TRUE(extent->get_paddr().is_relative()); + ASSERT_EQ(extent->get_version(), 0); + ASSERT_EQ(csum, extent->get_crc32c()); + } + submit_transaction(std::move(t)).get0(); + addr = extent->get_paddr(); + } + { + // test that consecutive reads on the same extent get the same ref + auto t = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + auto t2 = get_transaction(); + auto extent2 = get_extent<TestBlockPhysical>( + *t2, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_EQ(&*extent, &*extent2); + } + { + // read back test block + auto t = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + // duplicate and reset contents + extent = cache->duplicate_for_write(*t, extent)->cast<TestBlockPhysical>(); + extent->set_contents('c'); + csum2 = extent->get_crc32c(); + ASSERT_EQ(extent->get_paddr(), addr); + { + // test that concurrent read with fresh transaction sees old + // block + auto t2 = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t2, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_TRUE(extent->is_clean()); + ASSERT_FALSE(extent->is_pending()); + ASSERT_EQ(addr, extent->get_paddr()); + ASSERT_EQ(extent->get_version(), 0); + ASSERT_EQ(csum, extent->get_crc32c()); + } + { + // test that read with same transaction sees new block + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_TRUE(extent->is_dirty()); + ASSERT_TRUE(extent->is_pending()); + ASSERT_EQ(addr, extent->get_paddr()); + ASSERT_EQ(extent->get_version(), 1); + ASSERT_EQ(csum2, extent->get_crc32c()); + } + // submit transaction + submit_transaction(std::move(t)).get0(); + ASSERT_TRUE(extent->is_dirty()); + ASSERT_EQ(addr, extent->get_paddr()); + ASSERT_EQ(extent->get_version(), 1); + ASSERT_EQ(extent->get_crc32c(), csum2); + } + { + // test that fresh transaction now sees newly dirty block + auto t = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_TRUE(extent->is_dirty()); + ASSERT_EQ(addr, extent->get_paddr()); + ASSERT_EQ(extent->get_version(), 1); + ASSERT_EQ(csum2, extent->get_crc32c()); + } + }); +} diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc new file mode 100644 index 000000000..46ec723a3 --- /dev/null +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -0,0 +1,343 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include <random> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/async_cleaner.h" +#include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct record_validator_t { + record_t record; + paddr_t record_final_offset; + + template <typename... T> + record_validator_t(T&&... record) : record(std::forward<T>(record)...) {} + + void validate(SegmentManager &manager) { + paddr_t addr = make_record_relative_paddr(0); + for (auto &&block : record.extents) { + auto test = manager.read( + record_final_offset.add_relative(addr), + block.bl.length()).unsafe_get0(); + addr = addr.add_offset(block.bl.length()); + bufferlist bl; + bl.push_back(test); + ASSERT_EQ( + bl.length(), + block.bl.length()); + ASSERT_EQ( + bl.begin().crc32c(bl.length(), 1), + block.bl.begin().crc32c(block.bl.length(), 1)); + } + } + + auto get_replay_handler() { + auto checker = [this, iter=record.deltas.begin()] ( + paddr_t base, + const delta_info_t &di) mutable { + EXPECT_EQ(base, record_final_offset); + ceph_assert(iter != record.deltas.end()); + EXPECT_EQ(di, *iter++); + EXPECT_EQ(base, record_final_offset); + return iter != record.deltas.end(); + }; + if (record.deltas.size()) { + return std::make_optional(std::move(checker)); + } else { + return std::optional<decltype(checker)>(); + } + } +}; + +struct journal_test_t : seastar_test_suite_t, SegmentProvider, JournalTrimmer { + segment_manager::EphemeralSegmentManagerRef segment_manager; + WritePipeline pipeline; + JournalRef journal; + + std::vector<record_validator_t> records; + + std::default_random_engine generator; + + extent_len_t block_size; + + SegmentManagerGroupRef sms; + + segment_id_t next; + + std::map<segment_id_t, segment_seq_t> segment_seqs; + std::map<segment_id_t, segment_type_t> segment_types; + + journal_seq_t dummy_tail; + + mutable segment_info_t tmp_info; + + journal_test_t() = default; + + /* + * JournalTrimmer interfaces + */ + journal_seq_t get_journal_head() const final { return dummy_tail; } + + void set_journal_head(journal_seq_t) final {} + + journal_seq_t get_dirty_tail() const final { return dummy_tail; } + + journal_seq_t get_alloc_tail() const final { return dummy_tail; } + + void update_journal_tails(journal_seq_t, journal_seq_t) final {} + + bool try_reserve_inline_usage(std::size_t) final { return true; } + + void release_inline_usage(std::size_t) final {} + + std::size_t get_trim_size_per_cycle() const final { + return 0; + } + + /* + * SegmentProvider interfaces + */ + const segment_info_t& get_seg_info(segment_id_t id) const final { + tmp_info = {}; + tmp_info.seq = segment_seqs.at(id); + tmp_info.type = segment_types.at(id); + return tmp_info; + } + + segment_id_t allocate_segment( + segment_seq_t seq, + segment_type_t type, + data_category_t, + rewrite_gen_t + ) final { + auto ret = next; + next = segment_id_t{ + segment_manager->get_device_id(), + next.device_segment_id() + 1}; + segment_seqs[ret] = seq; + segment_types[ret] = type; + return ret; + } + + void close_segment(segment_id_t) final {} + + void update_segment_avail_bytes(segment_type_t, paddr_t) final {} + + void update_modify_time(segment_id_t, sea_time_point, std::size_t) final {} + + SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); } + + seastar::future<> set_up_fut() final { + segment_manager = segment_manager::create_test_ephemeral(); + return segment_manager->init( + ).safe_then([this] { + return segment_manager->mkfs( + segment_manager::get_ephemeral_device_config(0, 1, 0)); + }).safe_then([this] { + block_size = segment_manager->get_block_size(); + sms.reset(new SegmentManagerGroup()); + next = segment_id_t(segment_manager->get_device_id(), 0); + journal = journal::make_segmented(*this, *this); + journal->set_write_pipeline(&pipeline); + sms->add_segment_manager(segment_manager.get()); + return journal->open_for_mkfs(); + }).safe_then([this](auto) { + dummy_tail = journal_seq_t{0, + paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0)}; + }, crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Unable to mount"); + })); + } + + seastar::future<> tear_down_fut() final { + return journal->close( + ).safe_then([this] { + segment_manager.reset(); + sms.reset(); + journal.reset(); + }).handle_error( + crimson::ct_error::all_same_way([](auto e) { + ASSERT_FALSE("Unable to close"); + }) + ); + } + + template <typename T> + auto replay(T &&f) { + return journal->close( + ).safe_then([this, f=std::move(f)]() mutable { + journal = journal::make_segmented(*this, *this); + journal->set_write_pipeline(&pipeline); + return journal->replay(std::forward<T>(std::move(f))); + }).safe_then([this] { + return journal->open_for_mount(); + }); + } + + auto replay_and_check() { + auto record_iter = records.begin(); + decltype(record_iter->get_replay_handler()) delta_checker = std::nullopt; + auto advance = [this, &record_iter, &delta_checker] { + ceph_assert(!delta_checker); + while (record_iter != records.end()) { + auto checker = record_iter->get_replay_handler(); + record_iter++; + if (checker) { + delta_checker.emplace(std::move(*checker)); + break; + } + } + }; + advance(); + replay( + [&advance, + &delta_checker] + (const auto &offsets, + const auto &di, + const journal_seq_t &, + const journal_seq_t &, + auto t) mutable { + if (!delta_checker) { + EXPECT_FALSE("No Deltas Left"); + } + if (!(*delta_checker)(offsets.record_block_base, di)) { + delta_checker = std::nullopt; + advance(); + } + return Journal::replay_ertr::make_ready_future<bool>(true); + }).unsafe_get0(); + ASSERT_EQ(record_iter, records.end()); + for (auto &i : records) { + i.validate(*segment_manager); + } + } + + template <typename... T> + auto submit_record(T&&... _record) { + auto record{std::forward<T>(_record)...}; + records.push_back(record); + OrderingHandle handle = get_dummy_ordering_handle(); + auto [addr, _] = journal->submit_record( + std::move(record), + handle).unsafe_get0(); + records.back().record_final_offset = addr; + return addr; + } + + extent_t generate_extent(size_t blocks) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + bufferlist bl; + bl.append(buffer::ptr(buffer::create(blocks * block_size, contents))); + return extent_t{ + extent_types_t::TEST_BLOCK, + L_ADDR_NULL, + bl}; + } + + delta_info_t generate_delta(size_t bytes) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + bufferlist bl; + bl.append(buffer::ptr(buffer::create(bytes, contents))); + return delta_info_t{ + extent_types_t::TEST_BLOCK, + paddr_t{}, + L_ADDR_NULL, + 0, 0, + block_size, + 1, + MAX_SEG_SEQ, + segment_type_t::NULL_SEG, + bl + }; + } +}; + +TEST_F(journal_test_t, replay_one_journal_segment) +{ + run_async([this] { + submit_record(record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(23), generate_delta(30) } + }); + replay_and_check(); + }); +} + +TEST_F(journal_test_t, replay_two_records) +{ + run_async([this] { + submit_record(record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(23), generate_delta(30) } + }); + submit_record(record_t{ + { generate_extent(4), generate_extent(1) }, + { generate_delta(23), generate_delta(400) } + }); + replay_and_check(); + }); +} + +TEST_F(journal_test_t, replay_twice) +{ + run_async([this] { + submit_record(record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(23), generate_delta(30) } + }); + submit_record(record_t{ + { generate_extent(4), generate_extent(1) }, + { generate_delta(23), generate_delta(400) } + }); + replay_and_check(); + submit_record(record_t{ + { generate_extent(2), generate_extent(5) }, + { generate_delta(230), generate_delta(40) } + }); + replay_and_check(); + }); +} + +TEST_F(journal_test_t, roll_journal_and_replay) +{ + run_async([this] { + paddr_t current = submit_record( + record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(23), generate_delta(30) } + }); + auto starting_segment = current.as_seg_paddr().get_segment_id(); + unsigned so_far = 0; + while (current.as_seg_paddr().get_segment_id() == starting_segment) { + current = submit_record(record_t{ + { generate_extent(512), generate_extent(512) }, + { generate_delta(23), generate_delta(400) } + }); + ++so_far; + ASSERT_FALSE(so_far > 10); + } + replay_and_check(); + }); +} diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc new file mode 100644 index 000000000..1148884a0 --- /dev/null +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -0,0 +1,1995 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <random> + +#include <boost/iterator/counting_iterator.hpp> + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" +#include "crimson/os/seastore/segment_manager.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct test_extent_record_t { + test_extent_desc_t desc; + unsigned refcount = 0; + test_extent_record_t() = default; + test_extent_record_t( + const test_extent_desc_t &desc, + unsigned refcount) : desc(desc), refcount(refcount) {} + + void update(const test_extent_desc_t &to) { + desc = to; + } + + bool operator==(const test_extent_desc_t &rhs) const { + return desc == rhs; + } + bool operator!=(const test_extent_desc_t &rhs) const { + return desc != rhs; + } +}; + +template<> +struct fmt::formatter<test_extent_record_t> : fmt::formatter<std::string_view> { + template <typename FormatContext> + auto format(const test_extent_record_t& r, FormatContext& ctx) const { + return fmt::format_to(ctx.out(), "test_extent_record_t({}, refcount={})", + r.desc, r.refcount); + } +}; + +struct transaction_manager_test_t : + public seastar_test_suite_t, + TMTestState { + + std::random_device rd; + std::mt19937 gen; + + transaction_manager_test_t(std::size_t num_main_devices, std::size_t num_cold_devices) + : TMTestState(num_main_devices, num_cold_devices), gen(rd()) { + } + + laddr_t get_random_laddr(size_t block_size, laddr_t limit) { + return block_size * + std::uniform_int_distribution<>(0, (limit / block_size) - 1)(gen); + } + + char get_random_contents() { + return static_cast<char>(std::uniform_int_distribution<>(0, 255)(gen)); + } + + seastar::future<> set_up_fut() final { + return tm_setup(); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown(); + } + + struct test_extents_t : std::map<laddr_t, test_extent_record_t> { + using delta_t = std::map<laddr_t, std::optional<test_extent_record_t>>; + std::map<laddr_t, uint64_t> laddr_write_seq; + + struct delta_overlay_t { + const test_extents_t &extents; + const delta_t δ + + delta_overlay_t( + const test_extents_t &extents, + const delta_t &delta) + : extents(extents), delta(delta) {} + + + class iterator { + friend class test_extents_t; + + const delta_overlay_t &parent; + test_extents_t::const_iterator biter; + delta_t::const_iterator oiter; + std::optional<std::pair<laddr_t, test_extent_record_t>> cur; + + iterator( + const delta_overlay_t &parent, + test_extents_t::const_iterator biter, + delta_t::const_iterator oiter) + : parent(parent), biter(biter), oiter(oiter) {} + + laddr_t get_bkey() { + return biter == parent.extents.end() ? L_ADDR_MAX : biter->first; + } + + laddr_t get_okey() { + return oiter == parent.delta.end() ? L_ADDR_MAX : oiter->first; + } + + bool is_end() { + return oiter == parent.delta.end() && biter == parent.extents.end(); + } + + bool is_valid() { + return is_end() || + ((get_okey() < get_bkey()) && (oiter->second)) || + (get_okey() > get_bkey()); + } + + auto get_pair() { + assert(is_valid()); + assert(!is_end()); + auto okey = get_okey(); + auto bkey = get_bkey(); + return ( + bkey < okey ? + std::pair<laddr_t, test_extent_record_t>(*biter) : + std::make_pair(okey, *(oiter->second))); + } + + void adjust() { + while (!is_valid()) { + if (get_okey() < get_bkey()) { + assert(!oiter->second); + ++oiter; + } else { + assert(get_okey() == get_bkey()); + ++biter; + } + } + assert(is_valid()); + if (!is_end()) { + cur = get_pair(); + } else { + cur = std::nullopt; + } + } + + public: + iterator(const iterator &) = default; + iterator(iterator &&) = default; + + iterator &operator++() { + assert(is_valid()); + assert(!is_end()); + if (get_bkey() < get_okey()) { + ++biter; + } else { + ++oiter; + } + adjust(); + return *this; + } + + bool operator==(const iterator &o) const { + return o.biter == biter && o.oiter == oiter; + } + bool operator!=(const iterator &o) const { + return !(*this == o); + } + + auto operator*() { + assert(!is_end()); + return *cur; + } + auto operator->() { + assert(!is_end()); + return &*cur; + } + }; + + iterator begin() { + auto ret = iterator{*this, extents.begin(), delta.begin()}; + ret.adjust(); + return ret; + } + + iterator end() { + auto ret = iterator{*this, extents.end(), delta.end()}; + // adjust unnecessary + return ret; + } + + iterator lower_bound(laddr_t l) { + auto ret = iterator{*this, extents.lower_bound(l), delta.lower_bound(l)}; + ret.adjust(); + return ret; + } + + iterator upper_bound(laddr_t l) { + auto ret = iterator{*this, extents.upper_bound(l), delta.upper_bound(l)}; + ret.adjust(); + return ret; + } + + iterator find(laddr_t l) { + auto ret = lower_bound(l); + if (ret == end() || ret->first != l) { + return end(); + } else { + return ret; + } + } + }; + private: + void check_available( + laddr_t addr, extent_len_t len, const delta_t &delta + ) const { + delta_overlay_t overlay(*this, delta); + for (const auto &i: overlay) { + if (i.first < addr) { + EXPECT_FALSE(i.first + i.second.desc.len > addr); + } else { + EXPECT_FALSE(addr + len > i.first); + } + } + } + + void check_hint( + laddr_t hint, + laddr_t addr, + extent_len_t len, + delta_t &delta) const { + delta_overlay_t overlay(*this, delta); + auto iter = overlay.lower_bound(hint); + laddr_t last = hint; + while (true) { + if (iter == overlay.end() || iter->first > addr) { + EXPECT_EQ(addr, last); + break; + } + EXPECT_FALSE(iter->first - last > len); + last = iter->first + iter->second.desc.len; + ++iter; + } + } + + std::optional<test_extent_record_t> &populate_delta( + laddr_t addr, delta_t &delta, const test_extent_desc_t *desc) const { + auto diter = delta.find(addr); + if (diter != delta.end()) + return diter->second; + + auto iter = find(addr); + if (iter == end()) { + assert(desc); + auto ret = delta.emplace( + std::make_pair(addr, test_extent_record_t{*desc, 0})); + assert(ret.second); + return ret.first->second; + } else { + auto ret = delta.emplace(*iter); + assert(ret.second); + return ret.first->second; + } + } + public: + delta_overlay_t get_overlay(const delta_t &delta) const { + return delta_overlay_t{*this, delta}; + } + + void insert(TestBlock &extent, delta_t &delta) const { + check_available(extent.get_laddr(), extent.get_length(), delta); + delta[extent.get_laddr()] = + test_extent_record_t{extent.get_desc(), 1}; + } + + void alloced(laddr_t hint, TestBlock &extent, delta_t &delta) const { + check_hint(hint, extent.get_laddr(), extent.get_length(), delta); + insert(extent, delta); + } + + bool contains(laddr_t addr, const delta_t &delta) const { + delta_overlay_t overlay(*this, delta); + return overlay.find(addr) != overlay.end(); + } + + test_extent_record_t get(laddr_t addr, const delta_t &delta) const { + delta_overlay_t overlay(*this, delta); + auto iter = overlay.find(addr); + assert(iter != overlay.end()); + return iter->second; + } + + void update( + laddr_t addr, + const test_extent_desc_t &desc, + delta_t &delta) const { + auto &rec = populate_delta(addr, delta, &desc); + assert(rec); + rec->desc = desc; + } + + int inc_ref( + laddr_t addr, + delta_t &delta) const { + auto &rec = populate_delta(addr, delta, nullptr); + assert(rec); + return ++rec->refcount; + } + + int dec_ref( + laddr_t addr, + delta_t &delta) const { + auto &rec = populate_delta(addr, delta, nullptr); + assert(rec); + assert(rec->refcount > 0); + rec->refcount--; + if (rec->refcount == 0) { + delta[addr] = std::nullopt; + return 0; + } else { + return rec->refcount; + } + } + + void consume(const delta_t &delta, const uint64_t write_seq = 0) { + for (const auto &i : delta) { + if (i.second) { + if (laddr_write_seq.find(i.first) == laddr_write_seq.end() || + laddr_write_seq[i.first] <= write_seq) { + (*this)[i.first] = *i.second; + laddr_write_seq[i.first] = write_seq; + } + } else { + erase(i.first); + } + } + } + + } test_mappings; + + struct test_transaction_t { + TransactionRef t; + test_extents_t::delta_t mapping_delta; + }; + + test_transaction_t create_transaction() { + return { create_mutate_transaction(), {} }; + } + + test_transaction_t create_read_test_transaction() { + return {create_read_transaction(), {} }; + } + + test_transaction_t create_weak_test_transaction() { + return { create_weak_transaction(), {} }; + } + + TestBlockRef alloc_extent( + test_transaction_t &t, + laddr_t hint, + extent_len_t len, + char contents) { + auto extent = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->alloc_extent<TestBlock>(trans, hint, len); + }).unsafe_get0(); + extent->set_contents(contents); + EXPECT_FALSE(test_mappings.contains(extent->get_laddr(), t.mapping_delta)); + EXPECT_EQ(len, extent->get_length()); + test_mappings.alloced(hint, *extent, t.mapping_delta); + return extent; + } + + TestBlockRef alloc_extent( + test_transaction_t &t, + laddr_t hint, + extent_len_t len) { + return alloc_extent( + t, + hint, + len, + get_random_contents()); + } + + bool check_usage() { + return epm->check_usage(); + } + + void replay() { + EXPECT_TRUE(check_usage()); + restart(); + } + + void check() { + check_mappings(); + check_usage(); + } + + void check_mappings() { + auto t = create_weak_test_transaction(); + check_mappings(t); + } + + TestBlockRef get_extent( + test_transaction_t &t, + laddr_t addr, + extent_len_t len) { + ceph_assert(test_mappings.contains(addr, t.mapping_delta)); + ceph_assert(test_mappings.get(addr, t.mapping_delta).desc.len == len); + + auto ext = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->read_extent<TestBlock>(trans, addr, len); + }).unsafe_get0(); + EXPECT_EQ(addr, ext->get_laddr()); + return ext; + } + + TestBlockRef try_get_extent( + test_transaction_t &t, + laddr_t addr) { + ceph_assert(test_mappings.contains(addr, t.mapping_delta)); + + using ertr = with_trans_ertr<TransactionManager::read_extent_iertr>; + using ret = ertr::future<TestBlockRef>; + auto ext = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->read_extent<TestBlock>(trans, addr); + }).safe_then([](auto ext) -> ret { + return ertr::make_ready_future<TestBlockRef>(ext); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<TestBlockRef>(); + }, + crimson::ct_error::assert_all{ + "get_extent got invalid error" + } + ).get0(); + if (ext) { + EXPECT_EQ(addr, ext->get_laddr()); + } + return ext; + } + + TestBlockRef try_get_extent( + test_transaction_t &t, + laddr_t addr, + extent_len_t len) { + ceph_assert(test_mappings.contains(addr, t.mapping_delta)); + ceph_assert(test_mappings.get(addr, t.mapping_delta).desc.len == len); + + using ertr = with_trans_ertr<TransactionManager::read_extent_iertr>; + using ret = ertr::future<TestBlockRef>; + auto ext = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->read_extent<TestBlock>(trans, addr, len); + }).safe_then([](auto ext) -> ret { + return ertr::make_ready_future<TestBlockRef>(ext); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<TestBlockRef>(); + }, + crimson::ct_error::assert_all{ + "get_extent got invalid error" + } + ).get0(); + if (ext) { + EXPECT_EQ(addr, ext->get_laddr()); + } + return ext; + } + + TestBlockRef try_read_pin( + test_transaction_t &t, + LBAMappingRef &&pin) { + using ertr = with_trans_ertr<TransactionManager::base_iertr>; + using ret = ertr::future<TestBlockRef>; + auto addr = pin->get_key(); + auto ext = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->read_pin<TestBlock>(trans, std::move(pin)); + }).safe_then([](auto ext) -> ret { + return ertr::make_ready_future<TestBlockRef>(ext); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<TestBlockRef>(); + }, + crimson::ct_error::assert_all{ + "read_pin got invalid error" + } + ).get0(); + if (ext) { + EXPECT_EQ(addr, ext->get_laddr()); + } + if (t.t->is_conflicted()) { + return nullptr; + } + return ext; + } + + test_block_mutator_t mutator; + TestBlockRef mutate_extent( + test_transaction_t &t, + TestBlockRef ref) { + ceph_assert(test_mappings.contains(ref->get_laddr(), t.mapping_delta)); + ceph_assert( + test_mappings.get(ref->get_laddr(), t.mapping_delta).desc.len == + ref->get_length()); + + auto ext = tm->get_mutable_extent(*t.t, ref)->cast<TestBlock>(); + EXPECT_EQ(ext->get_laddr(), ref->get_laddr()); + EXPECT_EQ(ext->get_desc(), ref->get_desc()); + mutator.mutate(*ext, gen); + + test_mappings.update(ext->get_laddr(), ext->get_desc(), t.mapping_delta); + return ext; + } + + TestBlockRef mutate_addr( + test_transaction_t &t, + laddr_t offset, + size_t length) { + auto ext = get_extent(t, offset, length); + mutate_extent(t, ext); + return ext; + } + + LBAMappingRef get_pin( + test_transaction_t &t, + laddr_t offset) { + ceph_assert(test_mappings.contains(offset, t.mapping_delta)); + auto pin = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->get_pin(trans, offset); + }).unsafe_get0(); + EXPECT_EQ(offset, pin->get_key()); + return pin; + } + + LBAMappingRef try_get_pin( + test_transaction_t &t, + laddr_t offset) { + ceph_assert(test_mappings.contains(offset, t.mapping_delta)); + using ertr = with_trans_ertr<TransactionManager::get_pin_iertr>; + using ret = ertr::future<LBAMappingRef>; + auto pin = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->get_pin(trans, offset); + }).safe_then([](auto pin) -> ret { + return ertr::make_ready_future<LBAMappingRef>(std::move(pin)); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<LBAMappingRef>(); + }, + crimson::ct_error::assert_all{ + "get_extent got invalid error" + } + ).get0(); + if (pin) { + EXPECT_EQ(offset, pin->get_key()); + } + return pin; + } + + void inc_ref(test_transaction_t &t, laddr_t offset) { + ceph_assert(test_mappings.contains(offset, t.mapping_delta)); + ceph_assert(test_mappings.get(offset, t.mapping_delta).refcount > 0); + + auto refcnt = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->inc_ref(trans, offset); + }).unsafe_get0(); + auto check_refcnt = test_mappings.inc_ref(offset, t.mapping_delta); + EXPECT_EQ(refcnt, check_refcnt); + } + + void dec_ref(test_transaction_t &t, laddr_t offset) { + ceph_assert(test_mappings.contains(offset, t.mapping_delta)); + ceph_assert(test_mappings.get(offset, t.mapping_delta).refcount > 0); + + auto refcnt = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->dec_ref(trans, offset); + }).unsafe_get0(); + auto check_refcnt = test_mappings.dec_ref(offset, t.mapping_delta); + EXPECT_EQ(refcnt, check_refcnt); + if (refcnt == 0) + logger().debug("dec_ref: {} at refcount 0", offset); + } + + void check_mappings(test_transaction_t &t) { + auto overlay = test_mappings.get_overlay(t.mapping_delta); + for (const auto &i: overlay) { + logger().debug("check_mappings: {}->{}", i.first, i.second); + auto ext = get_extent(t, i.first, i.second.desc.len); + EXPECT_EQ(i.second, ext->get_desc()); + } + with_trans_intr( + *t.t, + [this, &overlay](auto &t) { + return lba_manager->scan_mappings( + t, + 0, + L_ADDR_MAX, + [iter=overlay.begin(), &overlay](auto l, auto p, auto len) mutable { + EXPECT_NE(iter, overlay.end()); + logger().debug( + "check_mappings: scan {}", + l); + EXPECT_EQ(l, iter->first); + ++iter; + }); + }).unsafe_get0(); + (void)with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->check_child_trackers(t); + }).unsafe_get0(); + } + + bool try_submit_transaction(test_transaction_t t) { + using ertr = with_trans_ertr<TransactionManager::submit_transaction_iertr>; + using ret = ertr::future<bool>; + uint64_t write_seq = 0; + bool success = submit_transaction_fut_with_seq(*t.t + ).safe_then([&write_seq](auto seq) -> ret { + write_seq = seq; + return ertr::make_ready_future<bool>(true); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<bool>(false); + }, + crimson::ct_error::assert_all{ + "try_submit_transaction hit invalid error" + } + ).then([this](auto ret) { + return epm->run_background_work_until_halt( + ).then([ret] { return ret; }); + }).get0(); + + if (success) { + test_mappings.consume(t.mapping_delta, write_seq); + } + + return success; + } + + void submit_transaction(test_transaction_t &&t) { + bool success = try_submit_transaction(std::move(t)); + EXPECT_TRUE(success); + } + + void submit_transaction_expect_conflict(test_transaction_t &&t) { + bool success = try_submit_transaction(std::move(t)); + EXPECT_FALSE(success); + } + + auto allocate_sequentially(const size_t size, const int num, bool run_clean = true) { + return repeat_eagain([this, size, num] { + return seastar::do_with( + create_transaction(), + [this, size, num](auto &t) { + return with_trans_intr( + *t.t, + [&t, this, size, num](auto &) { + return trans_intr::do_for_each( + boost::make_counting_iterator(0), + boost::make_counting_iterator(num), + [&t, this, size](auto) { + return tm->alloc_extent<TestBlock>( + *(t.t), L_ADDR_MIN, size + ).si_then([&t, this, size](auto extent) { + extent->set_contents(get_random_contents()); + EXPECT_FALSE( + test_mappings.contains(extent->get_laddr(), t.mapping_delta)); + EXPECT_EQ(size, extent->get_length()); + test_mappings.alloced(extent->get_laddr(), *extent, t.mapping_delta); + return seastar::now(); + }); + }).si_then([&t, this] { + return tm->submit_transaction(*t.t); + }); + }).safe_then([&t, this] { + test_mappings.consume(t.mapping_delta); + }); + }); + }).safe_then([this, run_clean]() { + if (run_clean) { + return epm->run_background_work_until_halt(); + } else { + return epm->background_process.trimmer->trim(); + } + }).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in SeaStore::list_collections" + } + ); + } + + void test_parallel_extent_read() { + constexpr size_t TOTAL = 4<<20; + constexpr size_t BSIZE = 4<<10; + constexpr size_t BLOCKS = TOTAL / BSIZE; + run_async([this] { + for (unsigned i = 0; i < BLOCKS; ++i) { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + i * BSIZE, + BSIZE); + ASSERT_EQ(i * BSIZE, extent->get_laddr()); + submit_transaction(std::move(t)); + } + + seastar::do_with( + create_read_test_transaction(), + [this](auto &t) { + return with_trans_intr(*(t.t), [this](auto &t) { + return trans_intr::parallel_for_each( + boost::make_counting_iterator(0lu), + boost::make_counting_iterator(BLOCKS), + [this, &t](auto i) { + return tm->read_extent<TestBlock>(t, i * BSIZE, BSIZE + ).si_then([](auto) { + return seastar::now(); + }); + }); + }); + }).unsafe_get0(); + }); + } + + void test_random_writes_concurrent() { + constexpr unsigned WRITE_STREAMS = 256; + + constexpr size_t TOTAL = 4<<20; + constexpr size_t BSIZE = 4<<10; + constexpr size_t BLOCKS = TOTAL / BSIZE; + run_async([this] { + std::for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(WRITE_STREAMS), + [&](auto idx) { + for (unsigned i = idx; i < BLOCKS; i += WRITE_STREAMS) { + while (true) { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + i * BSIZE, + BSIZE); + ASSERT_EQ(i * BSIZE, extent->get_laddr()); + if (try_submit_transaction(std::move(t))) + break; + } + } + }); + + int writes = 0; + unsigned failures = 0; + seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(WRITE_STREAMS), + [&](auto) { + return seastar::async([&] { + while (writes < 300) { + auto t = create_transaction(); + auto ext = try_get_extent( + t, + get_random_laddr(BSIZE, TOTAL), + BSIZE); + if (!ext){ + failures++; + continue; + } + auto mut = mutate_extent(t, ext); + auto success = try_submit_transaction(std::move(t)); + writes += success; + failures += !success; + } + }); + }).get0(); + replay(); + logger().info("random_writes_concurrent: checking"); + check(); + logger().info( + "random_writes_concurrent: {} suceeded, {} failed", + writes, + failures + ); + }); + } + + void test_evict() { + // only support segmented backend currently + ASSERT_EQ(epm->get_main_backend_type(), backend_type_t::SEGMENTED); + ASSERT_TRUE(epm->background_process.has_cold_tier()); + constexpr size_t device_size = + segment_manager::DEFAULT_TEST_EPHEMERAL.size; + constexpr size_t block_size = + segment_manager::DEFAULT_TEST_EPHEMERAL.block_size; + constexpr size_t segment_size = + segment_manager::DEFAULT_TEST_EPHEMERAL.segment_size; + ASSERT_GE(segment_size, block_size * 20); + + run_async([this] { + // indicates there is no available segments to reclaim + double stop_ratio = (double)segment_size / (double)device_size / 2; + // 1 segment + double default_ratio = stop_ratio * 2; + // 1.25 segment + double fast_ratio = stop_ratio * 2.5; + + epm->background_process + .eviction_state + .init(stop_ratio, default_ratio, fast_ratio); + + // these variables are described in + // EPM::BackgroundProcess::eviction_state_t::maybe_update_eviction_mode + size_t ratio_A_size = segment_size / 2 - block_size * 10; + size_t ratio_B_size = segment_size / 2 + block_size * 10; + size_t ratio_C_size = segment_size + block_size; + size_t ratio_D_size = segment_size * 1.25 + block_size; + + auto run_until = [this](size_t size) -> seastar::future<> { + return seastar::repeat([this, size] { + size_t current_size = epm->background_process + .main_cleaner->get_stat().data_stored; + if (current_size >= size) { + return seastar::futurize_invoke([] { + return seastar::stop_iteration::yes; + }); + } else { + int num = (size - current_size) / block_size; + return seastar::do_for_each( + boost::make_counting_iterator(0), + boost::make_counting_iterator(num), + [this](auto) { + // don't start background process to test the behavior + // of generation changes during alloc new extents + return allocate_sequentially(block_size, 1, false); + }).then([] { + return seastar::stop_iteration::no; + }); + } + }); + }; + + std::vector<extent_types_t> all_extent_types{ + extent_types_t::ROOT, + extent_types_t::LADDR_INTERNAL, + extent_types_t::LADDR_LEAF, + extent_types_t::OMAP_INNER, + extent_types_t::OMAP_LEAF, + extent_types_t::ONODE_BLOCK_STAGED, + extent_types_t::COLL_BLOCK, + extent_types_t::OBJECT_DATA_BLOCK, + extent_types_t::RETIRED_PLACEHOLDER, + extent_types_t::ALLOC_INFO, + extent_types_t::JOURNAL_TAIL, + extent_types_t::TEST_BLOCK, + extent_types_t::TEST_BLOCK_PHYSICAL, + extent_types_t::BACKREF_INTERNAL, + extent_types_t::BACKREF_LEAF + }; + + std::vector<rewrite_gen_t> all_generations; + for (auto i = INIT_GENERATION; i < REWRITE_GENERATIONS; i++) { + all_generations.push_back(i); + } + + // input target-generation -> expected generation after the adjustment + using generation_mapping_t = std::map<rewrite_gen_t, rewrite_gen_t>; + std::map<extent_types_t, generation_mapping_t> expected_generations; + + // this loop should be consistent with EPM::adjust_generation + for (auto t : all_extent_types) { + expected_generations[t] = {}; + if (!is_logical_type(t)) { + for (auto gen : all_generations) { + expected_generations[t][gen] = INLINE_GENERATION; + } + } else { + if (get_extent_category(t) == data_category_t::METADATA) { + expected_generations[t][INIT_GENERATION] = INLINE_GENERATION; + } else { + expected_generations[t][INIT_GENERATION] = OOL_GENERATION; + } + + for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) { + expected_generations[t][i] = i; + } + } + } + + auto update_data_gen_mapping = [&](std::function<rewrite_gen_t(rewrite_gen_t)> func) { + for (auto t : all_extent_types) { + if (!is_logical_type(t)) { + continue; + } + for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) { + expected_generations[t][i] = func(i); + } + } + // since background process didn't start in allocate_sequentially + // we update eviction mode manually. + epm->background_process.maybe_update_eviction_mode(); + }; + + auto test_gen = [&](const char *caller) { + for (auto t : all_extent_types) { + for (auto gen : all_generations) { + auto epm_gen = epm->adjust_generation( + get_extent_category(t), + t, + placement_hint_t::HOT, + gen); + if (expected_generations[t][gen] != epm_gen) { + logger().error("caller: {}, extent type: {}, input generation: {}, " + "expected generation : {}, adjust result from EPM: {}", + caller, t, gen, expected_generations[t][gen], epm_gen); + } + EXPECT_EQ(expected_generations[t][gen], epm_gen); + } + } + }; + + // verify that no data should go to the cold tier + update_data_gen_mapping([](rewrite_gen_t gen) -> rewrite_gen_t { + if (gen == MIN_COLD_GENERATION) { + return MIN_COLD_GENERATION - 1; + } else { + return gen; + } + }); + test_gen("init"); + + run_until(ratio_A_size).get(); + EXPECT_TRUE(epm->background_process.eviction_state.is_stop_mode()); + test_gen("exceed ratio A"); + epm->run_background_work_until_halt().get(); + + run_until(ratio_B_size).get(); + EXPECT_TRUE(epm->background_process.eviction_state.is_stop_mode()); + test_gen("exceed ratio B"); + epm->run_background_work_until_halt().get(); + + // verify that data may go to the cold tier + run_until(ratio_C_size).get(); + update_data_gen_mapping([](rewrite_gen_t gen) { return gen; }); + EXPECT_TRUE(epm->background_process.eviction_state.is_default_mode()); + test_gen("exceed ratio C"); + epm->run_background_work_until_halt().get(); + + // verify that data must go to the cold tier + run_until(ratio_D_size).get(); + update_data_gen_mapping([](rewrite_gen_t gen) { + if (gen >= MIN_REWRITE_GENERATION && gen < MIN_COLD_GENERATION) { + return MIN_COLD_GENERATION; + } else { + return gen; + } + }); + EXPECT_TRUE(epm->background_process.eviction_state.is_fast_mode()); + test_gen("exceed ratio D"); + + auto main_size = epm->background_process.main_cleaner->get_stat().data_stored; + auto cold_size = epm->background_process.cold_cleaner->get_stat().data_stored; + EXPECT_EQ(cold_size, 0); + epm->run_background_work_until_halt().get(); + auto new_main_size = epm->background_process.main_cleaner->get_stat().data_stored; + auto new_cold_size = epm->background_process.cold_cleaner->get_stat().data_stored; + EXPECT_GE(main_size, new_main_size); + EXPECT_NE(new_cold_size, 0); + + update_data_gen_mapping([](rewrite_gen_t gen) { return gen; }); + EXPECT_TRUE(epm->background_process.eviction_state.is_default_mode()); + test_gen("finish evict"); + }); + } + + using remap_entry = TransactionManager::remap_entry; + LBAMappingRef remap_pin( + test_transaction_t &t, + LBAMappingRef &&opin, + extent_len_t new_offset, + extent_len_t new_len) { + if (t.t->is_conflicted()) { + return nullptr; + } + auto o_laddr = opin->get_key(); + auto pin = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->remap_pin<TestBlock>( + trans, std::move(opin), std::array{ + remap_entry(new_offset, new_len)} + ).si_then([](auto ret) { + return std::move(ret[0]); + }); + }).handle_error(crimson::ct_error::eagain::handle([] { + LBAMappingRef t = nullptr; + return t; + }), crimson::ct_error::pass_further_all{}).unsafe_get0(); + if (t.t->is_conflicted()) { + return nullptr; + } + test_mappings.dec_ref(o_laddr, t.mapping_delta); + EXPECT_FALSE(test_mappings.contains(o_laddr, t.mapping_delta)); + EXPECT_TRUE(pin); + EXPECT_EQ(pin->get_length(), new_len); + EXPECT_EQ(pin->get_key(), o_laddr + new_offset); + + auto extent = try_read_pin(t, pin->duplicate()); + if (extent) { + test_mappings.alloced(pin->get_key(), *extent, t.mapping_delta); + EXPECT_TRUE(extent->is_exist_clean()); + } else { + ceph_assert(t.t->is_conflicted()); + return nullptr; + } + return pin; + } + + using _overwrite_pin_iertr = TransactionManager::get_pin_iertr; + using _overwrite_pin_ret = _overwrite_pin_iertr::future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>; + _overwrite_pin_ret _overwrite_pin( + Transaction &t, + LBAMappingRef &&opin, + extent_len_t new_offset, + extent_len_t new_len, + ceph::bufferlist &bl) { + auto o_laddr = opin->get_key(); + auto o_len = opin->get_length(); + if (new_offset != 0 && o_len != new_offset + new_len) { + return tm->remap_pin<TestBlock, 2>( + t, + std::move(opin), + std::array{ + remap_entry( + 0, + new_offset), + remap_entry( + new_offset + new_len, + o_len - new_offset - new_len) + } + ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) { + return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len + ).si_then([this, ret = std::move(ret), new_len, + new_offset, o_laddr, &t, &bl](auto ext) mutable { + ceph_assert(ret.size() == 2); + auto iter = bl.cbegin(); + iter.copy(new_len, ext->get_bptr().c_str()); + auto r_laddr = o_laddr + new_offset + new_len; + // old pins expired after alloc new extent, need to get it. + return tm->get_pin(t, o_laddr + ).si_then([this, &t, ext = std::move(ext), r_laddr](auto lpin) mutable { + return tm->get_pin(t, r_laddr + ).si_then([lpin = std::move(lpin), ext = std::move(ext)] + (auto rpin) mutable { + return _overwrite_pin_iertr::make_ready_future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>( + std::make_tuple( + std::move(lpin), std::move(ext), std::move(rpin))); + }); + }); + }); + }); + } else if (new_offset == 0 && o_len != new_offset + new_len) { + return tm->remap_pin<TestBlock, 1>( + t, + std::move(opin), + std::array{ + remap_entry( + new_offset + new_len, + o_len - new_offset - new_len) + } + ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) { + return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len + ).si_then([this, ret = std::move(ret), new_offset, new_len, + o_laddr, &t, &bl](auto ext) mutable { + ceph_assert(ret.size() == 1); + auto iter = bl.cbegin(); + iter.copy(new_len, ext->get_bptr().c_str()); + auto r_laddr = o_laddr + new_offset + new_len; + return tm->get_pin(t, r_laddr + ).si_then([ext = std::move(ext)](auto rpin) mutable { + return _overwrite_pin_iertr::make_ready_future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>( + std::make_tuple( + nullptr, std::move(ext), std::move(rpin))); + }); + }); + }); + } else if (new_offset != 0 && o_len == new_offset + new_len) { + return tm->remap_pin<TestBlock, 1>( + t, + std::move(opin), + std::array{ + remap_entry( + 0, + new_offset) + } + ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) { + return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len + ).si_then([this, ret = std::move(ret), new_len, o_laddr, &t, &bl] + (auto ext) mutable { + ceph_assert(ret.size() == 1); + auto iter = bl.cbegin(); + iter.copy(new_len, ext->get_bptr().c_str()); + return tm->get_pin(t, o_laddr + ).si_then([ext = std::move(ext)](auto lpin) mutable { + return _overwrite_pin_iertr::make_ready_future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>( + std::make_tuple( + std::move(lpin), std::move(ext), nullptr)); + }); + }); + }); + } else { + ceph_abort("impossible"); + return _overwrite_pin_iertr::make_ready_future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>( + std::make_tuple(nullptr, nullptr, nullptr)); + } + } + + using overwrite_pin_ret = std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>; + overwrite_pin_ret overwrite_pin( + test_transaction_t &t, + LBAMappingRef &&opin, + extent_len_t new_offset, + extent_len_t new_len, + ceph::bufferlist &bl) { + if (t.t->is_conflicted()) { + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + } + auto o_laddr = opin->get_key(); + auto o_paddr = opin->get_val(); + auto o_len = opin->get_length(); + auto res = with_trans_intr(*(t.t), [&](auto& trans) { + return _overwrite_pin( + trans, std::move(opin), new_offset, new_len, bl); + }).handle_error(crimson::ct_error::eagain::handle([] { + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + }), crimson::ct_error::pass_further_all{}).unsafe_get0(); + if (t.t->is_conflicted()) { + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + } + test_mappings.dec_ref(o_laddr, t.mapping_delta); + EXPECT_FALSE(test_mappings.contains(o_laddr, t.mapping_delta)); + auto &[lpin, ext, rpin] = res; + + EXPECT_TRUE(ext); + EXPECT_TRUE(lpin || rpin); + EXPECT_TRUE(o_len > ext->get_length()); + if (lpin) { + EXPECT_EQ(lpin->get_key(), o_laddr); + EXPECT_EQ(lpin->get_val(), o_paddr); + EXPECT_EQ(lpin->get_length(), new_offset); + auto lext = try_read_pin(t, lpin->duplicate()); + if (lext) { + test_mappings.alloced(lpin->get_key(), *lext, t.mapping_delta); + EXPECT_TRUE(lext->is_exist_clean()); + } else { + ceph_assert(t.t->is_conflicted()); + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + } + } + EXPECT_EQ(ext->get_laddr(), o_laddr + new_offset); + EXPECT_EQ(ext->get_length(), new_len); + test_mappings.alloced(ext->get_laddr(), *ext, t.mapping_delta); + if (rpin) { + EXPECT_EQ(rpin->get_key(), o_laddr + new_offset + new_len); + EXPECT_EQ(rpin->get_val(), o_paddr.add_offset(new_offset) + .add_offset(new_len)); + EXPECT_EQ(rpin->get_length(), o_len - new_offset - new_len); + auto rext = try_read_pin(t, rpin->duplicate()); + if (rext) { + test_mappings.alloced(rpin->get_key(), *rext, t.mapping_delta); + EXPECT_TRUE(rext->is_exist_clean()); + } else { + ceph_assert(t.t->is_conflicted()); + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + } + } + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + std::move(lpin), std::move(ext), std::move(rpin)); + } + + void test_remap_pin() { + run_async([this] { + constexpr size_t l_offset = 32 << 10; + constexpr size_t l_len = 32 << 10; + constexpr size_t r_offset = 64 << 10; + constexpr size_t r_len = 32 << 10; + { + auto t = create_transaction(); + auto lext = alloc_extent(t, l_offset, l_len); + lext->set_contents('l', 0, 16 << 10); + auto rext = alloc_extent(t, r_offset, r_len); + rext->set_contents('r', 16 << 10, 16 << 10); + submit_transaction(std::move(t)); + } + { + auto t = create_transaction(); + auto lpin = get_pin(t, l_offset); + auto rpin = get_pin(t, r_offset); + //split left + auto pin1 = remap_pin(t, std::move(lpin), 0, 16 << 10); + ASSERT_TRUE(pin1); + auto pin2 = remap_pin(t, std::move(pin1), 0, 8 << 10); + ASSERT_TRUE(pin2); + auto pin3 = remap_pin(t, std::move(pin2), 0, 4 << 10); + ASSERT_TRUE(pin3); + auto lext = get_extent(t, pin3->get_key(), pin3->get_length()); + EXPECT_EQ('l', lext->get_bptr().c_str()[0]); + auto mlext = mutate_extent(t, lext); + ASSERT_TRUE(mlext->is_exist_mutation_pending()); + ASSERT_TRUE(mlext.get() == lext.get()); + + //split right + auto pin4 = remap_pin(t, std::move(rpin), 16 << 10, 16 << 10); + ASSERT_TRUE(pin4); + auto pin5 = remap_pin(t, std::move(pin4), 8 << 10, 8 << 10); + ASSERT_TRUE(pin5); + auto pin6 = remap_pin(t, std::move(pin5), 4 << 10, 4 << 10); + ASSERT_TRUE(pin6); + auto rext = get_extent(t, pin6->get_key(), pin6->get_length()); + EXPECT_EQ('r', rext->get_bptr().c_str()[0]); + auto mrext = mutate_extent(t, rext); + ASSERT_TRUE(mrext->is_exist_mutation_pending()); + ASSERT_TRUE(mrext.get() == rext.get()); + + submit_transaction(std::move(t)); + check(); + } + replay(); + check(); + }); + } + + void test_overwrite_pin() { + run_async([this] { + constexpr size_t m_offset = 8 << 10; + constexpr size_t m_len = 56 << 10; + constexpr size_t l_offset = 64 << 10; + constexpr size_t l_len = 64 << 10; + constexpr size_t r_offset = 128 << 10; + constexpr size_t r_len = 64 << 10; + { + auto t = create_transaction(); + auto m_ext = alloc_extent(t, m_offset, m_len); + m_ext->set_contents('a', 0 << 10, 8 << 10); + m_ext->set_contents('b', 16 << 10, 4 << 10); + m_ext->set_contents('c', 36 << 10, 4 << 10); + m_ext->set_contents('d', 52 << 10, 4 << 10); + + auto l_ext = alloc_extent(t, l_offset, l_len); + auto r_ext = alloc_extent(t, r_offset, r_len); + submit_transaction(std::move(t)); + } + { + auto t = create_transaction(); + auto mpin = get_pin(t, m_offset); + auto lpin = get_pin(t, l_offset); + auto rpin = get_pin(t, r_offset); + + bufferlist mbl1, mbl2, mbl3; + mbl1.append(ceph::bufferptr(ceph::buffer::create(8 << 10, 0))); + mbl2.append(ceph::bufferptr(ceph::buffer::create(16 << 10, 0))); + mbl3.append(ceph::bufferptr(ceph::buffer::create(12 << 10, 0))); + auto [mlp1, mext1, mrp1] = overwrite_pin( + t, std::move(mpin), 8 << 10 , 8 << 10, mbl1); + auto [mlp2, mext2, mrp2] = overwrite_pin( + t, std::move(mrp1), 4 << 10 , 16 << 10, mbl2); + auto [mlpin3, me3, mrpin3] = overwrite_pin( + t, std::move(mrp2), 4 << 10 , 12 << 10, mbl3); + auto mlext1 = get_extent(t, mlp1->get_key(), mlp1->get_length()); + auto mlext2 = get_extent(t, mlp2->get_key(), mlp2->get_length()); + auto mlext3 = get_extent(t, mlpin3->get_key(), mlpin3->get_length()); + auto mrext3 = get_extent(t, mrpin3->get_key(), mrpin3->get_length()); + EXPECT_EQ('a', mlext1->get_bptr().c_str()[0]); + EXPECT_EQ('b', mlext2->get_bptr().c_str()[0]); + EXPECT_EQ('c', mlext3->get_bptr().c_str()[0]); + EXPECT_EQ('d', mrext3->get_bptr().c_str()[0]); + auto mutate_mlext1 = mutate_extent(t, mlext1); + auto mutate_mlext2 = mutate_extent(t, mlext2); + auto mutate_mlext3 = mutate_extent(t, mlext3); + auto mutate_mrext3 = mutate_extent(t, mrext3); + ASSERT_TRUE(mutate_mlext1->is_exist_mutation_pending()); + ASSERT_TRUE(mutate_mlext2->is_exist_mutation_pending()); + ASSERT_TRUE(mutate_mlext3->is_exist_mutation_pending()); + ASSERT_TRUE(mutate_mrext3->is_exist_mutation_pending()); + ASSERT_TRUE(mutate_mlext1.get() == mlext1.get()); + ASSERT_TRUE(mutate_mlext2.get() == mlext2.get()); + ASSERT_TRUE(mutate_mlext3.get() == mlext3.get()); + ASSERT_TRUE(mutate_mrext3.get() == mrext3.get()); + + bufferlist lbl1, rbl1; + lbl1.append(ceph::bufferptr(ceph::buffer::create(32 << 10, 0))); + auto [llp1, lext1, lrp1] = overwrite_pin( + t, std::move(lpin), 0 , 32 << 10, lbl1); + EXPECT_FALSE(llp1); + EXPECT_TRUE(lrp1); + EXPECT_TRUE(lext1); + + rbl1.append(ceph::bufferptr(ceph::buffer::create(32 << 10, 0))); + auto [rlp1, rext1, rrp1] = overwrite_pin( + t, std::move(rpin), 32 << 10 , 32 << 10, rbl1); + EXPECT_TRUE(rlp1); + EXPECT_TRUE(rext1); + EXPECT_FALSE(rrp1); + + submit_transaction(std::move(t)); + check(); + } + replay(); + check(); + }); + } + + void test_remap_pin_concurrent() { + run_async([this] { + constexpr unsigned REMAP_NUM = 32; + constexpr size_t offset = 0; + constexpr size_t length = 256 << 10; + { + auto t = create_transaction(); + auto extent = alloc_extent(t, offset, length); + ASSERT_EQ(length, extent->get_length()); + submit_transaction(std::move(t)); + } + int success = 0; + int early_exit = 0; + int conflicted = 0; + + seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(REMAP_NUM), + [&](auto) { + return seastar::async([&] { + uint32_t pieces = std::uniform_int_distribution<>(6, 31)(gen); + std::set<uint32_t> split_points; + for (uint32_t i = 0; i < pieces; i++) { + auto p = std::uniform_int_distribution<>(1, 256)(gen); + split_points.insert(p - p % 4); + } + + auto t = create_transaction(); + auto pin0 = try_get_pin(t, offset); + if (!pin0 || pin0->get_length() != length) { + early_exit++; + return; + } + + auto last_pin = pin0->duplicate(); + ASSERT_TRUE(!split_points.empty()); + for (auto off : split_points) { + if (off == 0 || off >= 255) { + continue; + } + auto new_off = (off << 10) - last_pin->get_key(); + auto new_len = last_pin->get_length() - new_off; + //always remap right extent at new split_point + auto pin = remap_pin(t, std::move(last_pin), new_off, new_len); + if (!pin) { + conflicted++; + return; + } + last_pin = pin->duplicate(); + } + auto last_ext = try_get_extent(t, last_pin->get_key()); + if (last_ext) { + auto last_ext1 = mutate_extent(t, last_ext); + ASSERT_TRUE(last_ext1->is_exist_mutation_pending()); + } else { + conflicted++; + return; + } + + if (try_submit_transaction(std::move(t))) { + success++; + logger().info("transaction {} submit the transction", + static_cast<void*>(t.t.get())); + } else { + conflicted++; + } + }); + }).handle_exception([](std::exception_ptr e) { + logger().info("{}", e); + }).get0(); + logger().info("test_remap_pin_concurrent: " + "early_exit {} conflicted {} success {}", + early_exit, conflicted, success); + ASSERT_TRUE(success == 1); + ASSERT_EQ(success + conflicted + early_exit, REMAP_NUM); + replay(); + check(); + }); + } + + void test_overwrite_pin_concurrent() { + run_async([this] { + constexpr unsigned REMAP_NUM = 32; + constexpr size_t offset = 0; + constexpr size_t length = 256 << 10; + { + auto t = create_transaction(); + auto extent = alloc_extent(t, offset, length); + ASSERT_EQ(length, extent->get_length()); + submit_transaction(std::move(t)); + } + int success = 0; + int early_exit = 0; + int conflicted = 0; + + seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(REMAP_NUM), + [&](auto) { + return seastar::async([&] { + uint32_t pieces = std::uniform_int_distribution<>(6, 31)(gen); + if (pieces % 2 == 1) { + pieces++; + } + std::list<uint32_t> split_points; + for (uint32_t i = 0; i < pieces; i++) { + auto p = std::uniform_int_distribution<>(1, 120)(gen); + split_points.push_back(p - p % 4); + } + split_points.sort(); + + auto t = create_transaction(); + auto pin0 = try_get_pin(t, offset); + if (!pin0 || pin0->get_length() != length) { + early_exit++; + return; + } + + auto empty_transaction = true; + auto last_rpin = pin0->duplicate(); + ASSERT_TRUE(!split_points.empty()); + while(!split_points.empty()) { + // new overwrite area: start_off ~ end_off + auto start_off = split_points.front(); + split_points.pop_front(); + auto end_off = split_points.front(); + split_points.pop_front(); + ASSERT_TRUE(start_off <= end_off); + if (((end_off << 10) == pin0->get_key() + pin0->get_length()) + || (start_off == end_off)) { + if (split_points.empty() && empty_transaction) { + early_exit++; + return; + } + continue; + } + empty_transaction = false; + auto new_off = (start_off << 10) - last_rpin->get_key(); + auto new_len = (end_off - start_off) << 10; + bufferlist bl; + bl.append(ceph::bufferptr(ceph::buffer::create(new_len, 0))); + auto [lpin, ext, rpin] = overwrite_pin( + t, last_rpin->duplicate(), new_off, new_len, bl); + if (!ext) { + conflicted++; + return; + } + // lpin is nullptr might not cause by confliction, + // it might just not exist. + if (lpin) { + auto lext = try_get_extent(t, lpin->get_key()); + if (!lext) { + conflicted++; + return; + } + if (get_random_contents() % 2 == 0) { + auto lext1 = mutate_extent(t, lext); + ASSERT_TRUE(lext1->is_exist_mutation_pending()); + } + } + ASSERT_TRUE(rpin); + last_rpin = rpin->duplicate(); + } + auto last_rext = try_get_extent(t, last_rpin->get_key()); + if (!last_rext) { + conflicted++; + return; + } + if (get_random_contents() % 2 == 0) { + auto last_rext1 = mutate_extent(t, last_rext); + ASSERT_TRUE(last_rext1->is_exist_mutation_pending()); + } + + if (try_submit_transaction(std::move(t))) { + success++; + logger().info("transaction {} submit the transction", + static_cast<void*>(t.t.get())); + } else { + conflicted++; + } + }); + }).handle_exception([](std::exception_ptr e) { + logger().info("{}", e); + }).get0(); + logger().info("test_overwrite_pin_concurrent: " + "early_exit {} conflicted {} success {}", + early_exit, conflicted, success); + ASSERT_TRUE(success == 1 || early_exit == REMAP_NUM); + ASSERT_EQ(success + conflicted + early_exit, REMAP_NUM); + replay(); + check(); + }); + } +}; + +struct tm_single_device_test_t : + public transaction_manager_test_t { + + tm_single_device_test_t() : transaction_manager_test_t(1, 0) {} +}; + +struct tm_multi_device_test_t : + public transaction_manager_test_t { + + tm_multi_device_test_t() : transaction_manager_test_t(3, 0) {} +}; + +struct tm_multi_tier_device_test_t : + public transaction_manager_test_t { + + tm_multi_tier_device_test_t() : transaction_manager_test_t(1, 2) {} +}; + +TEST_P(tm_single_device_test_t, basic) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + }); +} + +TEST_P(tm_single_device_test_t, mutate) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + ASSERT_TRUE(check_usage()); + replay(); + { + auto t = create_transaction(); + auto ext = get_extent( + t, + ADDR, + SIZE); + auto mut = mutate_extent(t, ext); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + ASSERT_TRUE(check_usage()); + replay(); + check(); + }); +} + +TEST_P(tm_single_device_test_t, allocate_lba_conflict) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + constexpr laddr_t ADDR2 = 0xFE * SIZE; + auto t = create_transaction(); + auto t2 = create_transaction(); + + // These should conflict as they should both modify the lba root + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + check(); + + auto extent2 = alloc_extent( + t2, + ADDR2, + SIZE, + 'a'); + ASSERT_EQ(ADDR2, extent2->get_laddr()); + check_mappings(t2); + extent2.reset(); + + submit_transaction(std::move(t2)); + submit_transaction_expect_conflict(std::move(t)); + }); +} + +TEST_P(tm_single_device_test_t, mutate_lba_conflict) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + { + auto t = create_transaction(); + for (unsigned i = 0; i < 300; ++i) { + auto extent = alloc_extent( + t, + laddr_t(i * SIZE), + SIZE); + } + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + + constexpr laddr_t ADDR = 150 * SIZE; + { + auto t = create_transaction(); + auto t2 = create_transaction(); + + mutate_addr(t, ADDR, SIZE); + mutate_addr(t2, ADDR, SIZE); + + submit_transaction(std::move(t)); + submit_transaction_expect_conflict(std::move(t2)); + } + check(); + + { + auto t = create_transaction(); + mutate_addr(t, ADDR, SIZE); + submit_transaction(std::move(t)); + } + check(); + }); +} + +TEST_P(tm_single_device_test_t, concurrent_mutate_lba_no_conflict) +{ + constexpr laddr_t SIZE = 4096; + constexpr size_t NUM = 500; + constexpr laddr_t addr = 0; + constexpr laddr_t addr2 = SIZE * (NUM - 1); + run_async([this] { + { + auto t = create_transaction(); + for (unsigned i = 0; i < NUM; ++i) { + auto extent = alloc_extent( + t, + laddr_t(i * SIZE), + SIZE); + } + submit_transaction(std::move(t)); + } + + { + auto t = create_transaction(); + auto t2 = create_transaction(); + + mutate_addr(t, addr, SIZE); + mutate_addr(t2, addr2, SIZE); + + submit_transaction(std::move(t)); + submit_transaction(std::move(t2)); + } + check(); + }); +} + +TEST_P(tm_single_device_test_t, create_remove_same_transaction) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + dec_ref(t, ADDR); + check_mappings(t); + + extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + + submit_transaction(std::move(t)); + check(); + } + replay(); + check(); + }); +} + +TEST_P(tm_single_device_test_t, split_merge_read_same_transaction) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + { + auto t = create_transaction(); + for (unsigned i = 0; i < 300; ++i) { + auto extent = alloc_extent( + t, + laddr_t(i * SIZE), + SIZE); + } + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + { + auto t = create_transaction(); + for (unsigned i = 0; i < 240; ++i) { + dec_ref( + t, + laddr_t(i * SIZE)); + } + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + }); +} + +TEST_P(tm_single_device_test_t, inc_dec_ref) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + replay(); + { + auto t = create_transaction(); + inc_ref(t, ADDR); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + { + auto t = create_transaction(); + dec_ref(t, ADDR); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + replay(); + { + auto t = create_transaction(); + dec_ref(t, ADDR); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + }); +} + +TEST_P(tm_single_device_test_t, cause_lba_split) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + for (unsigned i = 0; i < 200; ++i) { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + i * SIZE, + SIZE, + (char)(i & 0xFF)); + ASSERT_EQ(i * SIZE, extent->get_laddr()); + submit_transaction(std::move(t)); + } + check(); + }); +} + +TEST_P(tm_single_device_test_t, random_writes) +{ + constexpr size_t TOTAL = 4<<20; + constexpr size_t BSIZE = 4<<10; + constexpr size_t PADDING_SIZE = 256<<10; + constexpr size_t BLOCKS = TOTAL / BSIZE; + run_async([this] { + for (unsigned i = 0; i < BLOCKS; ++i) { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + i * BSIZE, + BSIZE); + ASSERT_EQ(i * BSIZE, extent->get_laddr()); + submit_transaction(std::move(t)); + } + + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = 0; j < 65; ++j) { + auto t = create_transaction(); + for (unsigned k = 0; k < 2; ++k) { + auto ext = get_extent( + t, + get_random_laddr(BSIZE, TOTAL), + BSIZE); + auto mut = mutate_extent(t, ext); + // pad out transaction + auto padding = alloc_extent( + t, + TOTAL + (k * PADDING_SIZE), + PADDING_SIZE); + dec_ref(t, padding->get_laddr()); + } + submit_transaction(std::move(t)); + } + replay(); + logger().info("random_writes: {} checking", i); + check(); + logger().info("random_writes: {} done replaying/checking", i); + } + }); +} + +TEST_P(tm_single_device_test_t, find_hole_assert_trigger) +{ + constexpr unsigned max = 10; + constexpr size_t BSIZE = 4<<10; + int num = 40; + run([&, this] { + return seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(max), + [&, this](auto idx) { + return allocate_sequentially(BSIZE, num); + }); + }); +} + +TEST_P(tm_single_device_test_t, remap_lazy_read) +{ + constexpr laddr_t offset = 0; + constexpr size_t length = 256 << 10; + run_async([this, offset] { + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + offset, + length, + 'a'); + ASSERT_EQ(offset, extent->get_laddr()); + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + replay(); + { + auto t = create_transaction(); + auto pin = get_pin(t, offset); + auto rpin = remap_pin(t, std::move(pin), 0, 128 << 10); + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + replay(); + { + auto t = create_transaction(); + auto pin = get_pin(t, offset); + bufferlist bl; + bl.append(ceph::bufferptr(ceph::buffer::create(64 << 10, 0))); + auto [lpin, ext, rpin] = overwrite_pin( + t, std::move(pin), 4 << 10 , 64 << 10, bl); + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + replay(); + }); +} + +TEST_P(tm_single_device_test_t, random_writes_concurrent) +{ + test_random_writes_concurrent(); +} + +TEST_P(tm_multi_device_test_t, random_writes_concurrent) +{ + test_random_writes_concurrent(); +} + +TEST_P(tm_multi_tier_device_test_t, evict) +{ + test_evict(); +} + +TEST_P(tm_single_device_test_t, parallel_extent_read) +{ + test_parallel_extent_read(); +} + +TEST_P(tm_single_device_test_t, test_remap_pin) +{ + test_remap_pin(); +} + +TEST_P(tm_single_device_test_t, test_overwrite_pin) +{ + test_overwrite_pin(); +} + +TEST_P(tm_single_device_test_t, test_remap_pin_concurrent) +{ + test_remap_pin_concurrent(); +} + +TEST_P(tm_single_device_test_t, test_overwrite_pin_concurrent) +{ + test_overwrite_pin_concurrent(); +} + +INSTANTIATE_TEST_SUITE_P( + transaction_manager_test, + tm_single_device_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); + +INSTANTIATE_TEST_SUITE_P( + transaction_manager_test, + tm_multi_device_test_t, + ::testing::Values ( + "segmented" + ) +); + +INSTANTIATE_TEST_SUITE_P( + transaction_manager_test, + tm_multi_tier_device_test_t, + ::testing::Values ( + "segmented" + ) +); diff --git a/src/test/crimson/seastore/transaction_manager_test_state.h b/src/test/crimson/seastore/transaction_manager_test_state.h new file mode 100644 index 000000000..81200b1db --- /dev/null +++ b/src/test/crimson/seastore/transaction_manager_test_state.h @@ -0,0 +1,450 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <random> +#include <boost/iterator/counting_iterator.hpp> + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/extent_placement_manager.h" +#include "crimson/os/seastore/logging.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" +#include "crimson/os/seastore/seastore.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/collection_manager/flat_collection_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h" +#include "crimson/os/seastore/random_block_manager/rbm_device.h" +#include "crimson/os/seastore/journal/circular_bounded_journal.h" +#include "crimson/os/seastore/random_block_manager/block_rb_manager.h" +#ifdef UNIT_TESTS_BUILT +#include "test/crimson/gtest_seastar.h" +#endif + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +class EphemeralDevices { +public: + virtual seastar::future<> setup() = 0; + virtual void remount() = 0; + virtual std::size_t get_num_devices() const = 0; + virtual void reset() = 0; + virtual std::vector<Device*> get_secondary_devices() = 0; + virtual ~EphemeralDevices() {} + virtual Device* get_primary_device() = 0; + virtual DeviceRef get_primary_device_ref() = 0; + virtual void set_primary_device_ref(DeviceRef) = 0; +}; +using EphemeralDevicesRef = std::unique_ptr<EphemeralDevices>; + +class EphemeralSegmentedDevices : public EphemeralDevices { + segment_manager::EphemeralSegmentManagerRef segment_manager; + std::list<segment_manager::EphemeralSegmentManagerRef> secondary_segment_managers; + std::size_t num_main_device_managers; + std::size_t num_cold_device_managers; + +public: + EphemeralSegmentedDevices(std::size_t num_main_devices, + std::size_t num_cold_devices) + : num_main_device_managers(num_main_devices), + num_cold_device_managers(num_cold_devices) + { + auto num_device_managers = num_main_device_managers + num_cold_device_managers; + assert(num_device_managers > 0); + secondary_segment_managers.resize(num_device_managers - 1); + } + + seastar::future<> setup() final { + segment_manager = segment_manager::create_test_ephemeral(); + for (auto &sec_sm : secondary_segment_managers) { + sec_sm = segment_manager::create_test_ephemeral(); + } + return segment_manager->init( + ).safe_then([this] { + return crimson::do_for_each( + secondary_segment_managers.begin(), + secondary_segment_managers.end(), + [](auto &sec_sm) + { + return sec_sm->init(); + }); + }).safe_then([this] { + return segment_manager->mkfs( + segment_manager::get_ephemeral_device_config( + 0, num_main_device_managers, num_cold_device_managers)); + }).safe_then([this] { + return seastar::do_with(std::size_t(0), [this](auto &cnt) { + return crimson::do_for_each( + secondary_segment_managers.begin(), + secondary_segment_managers.end(), + [this, &cnt](auto &sec_sm) + { + ++cnt; + return sec_sm->mkfs( + segment_manager::get_ephemeral_device_config( + cnt, num_main_device_managers, num_cold_device_managers)); + }); + }); + }).handle_error( + crimson::ct_error::assert_all{} + ); + } + + void remount() final { + segment_manager->remount(); + for (auto &sec_sm : secondary_segment_managers) { + sec_sm->remount(); + } + } + + std::size_t get_num_devices() const final { + return secondary_segment_managers.size() + 1; + } + + void reset() final { + segment_manager.reset(); + for (auto &sec_sm : secondary_segment_managers) { + sec_sm.reset(); + } + } + + std::vector<Device*> get_secondary_devices() final { + std::vector<Device*> sec_devices; + for (auto &sec_sm : secondary_segment_managers) { + sec_devices.emplace_back(sec_sm.get()); + } + return sec_devices; + } + + Device* get_primary_device() final { + return segment_manager.get(); + } + DeviceRef get_primary_device_ref() final; + void set_primary_device_ref(DeviceRef) final; +}; + +class EphemeralRandomBlockDevices : public EphemeralDevices { + random_block_device::RBMDeviceRef rb_device; + std::list<random_block_device::RBMDeviceRef> secondary_rb_devices; + +public: + EphemeralRandomBlockDevices(std::size_t num_device_managers) { + assert(num_device_managers > 0); + secondary_rb_devices.resize(num_device_managers - 1); + } + + seastar::future<> setup() final { + rb_device = random_block_device::create_test_ephemeral(); + device_config_t config = get_rbm_ephemeral_device_config(0, 1); + return rb_device->mkfs(config).handle_error(crimson::ct_error::assert_all{}); + } + + void remount() final {} + + std::size_t get_num_devices() const final { + return secondary_rb_devices.size() + 1; + } + + void reset() final { + rb_device.reset(); + for (auto &sec_rb : secondary_rb_devices) { + sec_rb.reset(); + } + } + + std::vector<Device*> get_secondary_devices() final { + std::vector<Device*> sec_devices; + for (auto &sec_rb : secondary_rb_devices) { + sec_devices.emplace_back(sec_rb.get()); + } + return sec_devices; + } + + Device* get_primary_device() final { + return rb_device.get(); + } + DeviceRef get_primary_device_ref() final; + void set_primary_device_ref(DeviceRef) final; +}; + +class EphemeralTestState +#ifdef UNIT_TESTS_BUILT + : public ::testing::WithParamInterface<const char*> { +#else + { +#endif +protected: + journal_type_t journal_type; + size_t num_main_device_managers = 0; + size_t num_cold_device_managers = 0; + EphemeralDevicesRef devices; + bool secondary_is_cold; + EphemeralTestState(std::size_t num_main_device_managers, + std::size_t num_cold_device_managers) : + num_main_device_managers(num_main_device_managers), + num_cold_device_managers(num_cold_device_managers) {} + + virtual seastar::future<> _init() = 0; + + virtual seastar::future<> _destroy() = 0; + virtual seastar::future<> _teardown() = 0; + seastar::future<> teardown() { + return _teardown().then([this] { + return _destroy(); + }); + } + + virtual FuturizedStore::mkfs_ertr::future<> _mkfs() = 0; + virtual FuturizedStore::mount_ertr::future<> _mount() = 0; + + seastar::future<> restart_fut() { + LOG_PREFIX(EphemeralTestState::restart_fut); + SUBINFO(test, "begin ..."); + return teardown().then([this] { + devices->remount(); + return _init().then([this] { + return _mount().handle_error(crimson::ct_error::assert_all{}); + }); + }).then([FNAME] { + SUBINFO(test, "finish"); + }); + } + + void restart() { + restart_fut().get0(); + } + + seastar::future<> tm_setup() { + LOG_PREFIX(EphemeralTestState::tm_setup); +#ifdef UNIT_TESTS_BUILT + std::string j_type = GetParam(); +#else + std::string j_type = "segmented"; +#endif + if (j_type == "circularbounded") { + //TODO: multiple devices + ceph_assert(num_main_device_managers == 1); + ceph_assert(num_cold_device_managers == 0); + devices.reset(new EphemeralRandomBlockDevices(1)); + } else { + // segmented by default + devices.reset(new + EphemeralSegmentedDevices( + num_main_device_managers, num_cold_device_managers)); + } + SUBINFO(test, "begin with {} devices ...", devices->get_num_devices()); + return devices->setup( + ).then([this] { + return _init(); + }).then([this, FNAME] { + return _mkfs( + ).safe_then([this] { + return restart_fut(); + }).handle_error( + crimson::ct_error::assert_all{} + ).then([FNAME] { + SUBINFO(test, "finish"); + }); + }); + } + + seastar::future<> tm_teardown() { + LOG_PREFIX(EphemeralTestState::tm_teardown); + SUBINFO(test, "begin"); + return teardown().then([this, FNAME] { + devices->reset(); + SUBINFO(test, "finish"); + }); + } +}; + +class TMTestState : public EphemeralTestState { +protected: + TransactionManagerRef tm; + LBAManager *lba_manager; + Cache* cache; + ExtentPlacementManager *epm; + uint64_t seq = 0; + + TMTestState() : EphemeralTestState(1, 0) {} + + TMTestState(std::size_t num_main_devices, std::size_t num_cold_devices) + : EphemeralTestState(num_main_devices, num_cold_devices) {} + + virtual seastar::future<> _init() override { + auto sec_devices = devices->get_secondary_devices(); + auto p_dev = devices->get_primary_device(); + tm = make_transaction_manager(p_dev, sec_devices, true); + epm = tm->get_epm(); + lba_manager = tm->get_lba_manager(); + cache = tm->get_cache(); + return seastar::now(); + } + + virtual seastar::future<> _destroy() override { + epm = nullptr; + lba_manager = nullptr; + cache = nullptr; + tm.reset(); + return seastar::now(); + } + + virtual seastar::future<> _teardown() { + return tm->close().handle_error( + crimson::ct_error::assert_all{"Error in teardown"} + ); + } + + virtual FuturizedStore::mount_ertr::future<> _mount() { + return tm->mount( + ).handle_error( + crimson::ct_error::assert_all{"Error in mount"} + ).then([this] { + return epm->stop_background(); + }).then([this] { + return epm->run_background_work_until_halt(); + }); + } + + virtual FuturizedStore::mkfs_ertr::future<> _mkfs() { + return tm->mkfs( + ).handle_error( + crimson::ct_error::assert_all{"Error in mkfs"} + ); + } + + auto create_mutate_transaction() { + return tm->create_transaction( + Transaction::src_t::MUTATE, "test_mutate"); + } + + auto create_read_transaction() { + return tm->create_transaction( + Transaction::src_t::READ, "test_read"); + } + + auto create_weak_transaction() { + return tm->create_transaction( + Transaction::src_t::READ, "test_read_weak", true); + } + + auto submit_transaction_fut2(Transaction& t) { + return tm->submit_transaction(t); + } + + auto submit_transaction_fut(Transaction &t) { + return with_trans_intr( + t, + [this](auto &t) { + return tm->submit_transaction(t); + }); + } + auto submit_transaction_fut_with_seq(Transaction &t) { + using ertr = TransactionManager::base_iertr; + return with_trans_intr( + t, + [this](auto &t) { + return tm->submit_transaction(t + ).si_then([this] { + return ertr::make_ready_future<uint64_t>(seq++); + }); + }); + } + + void submit_transaction(TransactionRef t) { + submit_transaction_fut(*t).unsafe_get0(); + epm->run_background_work_until_halt().get0(); + } +}; + + +DeviceRef EphemeralSegmentedDevices::get_primary_device_ref() { + return std::move(segment_manager); +} + +DeviceRef EphemeralRandomBlockDevices::get_primary_device_ref() { + return std::move(rb_device); +} + +void EphemeralSegmentedDevices::set_primary_device_ref(DeviceRef dev) { + segment_manager = + segment_manager::EphemeralSegmentManagerRef( + static_cast<segment_manager::EphemeralSegmentManager*>(dev.release())); +} + +void EphemeralRandomBlockDevices::set_primary_device_ref(DeviceRef dev) { + rb_device = + random_block_device::RBMDeviceRef( + static_cast<random_block_device::RBMDevice*>(dev.release())); +} + +class SeaStoreTestState : public EphemeralTestState { + class TestMDStoreState { + std::map<std::string, std::string> md; + public: + class Store final : public SeaStore::MDStore { + TestMDStoreState &parent; + public: + Store(TestMDStoreState &parent) : parent(parent) {} + + write_meta_ret write_meta( + const std::string& key, const std::string& value) final { + parent.md[key] = value; + return seastar::now(); + } + + read_meta_ret read_meta(const std::string& key) final { + auto iter = parent.md.find(key); + if (iter != parent.md.end()) { + return read_meta_ret( + read_meta_ertr::ready_future_marker{}, + iter->second); + } else { + return read_meta_ret( + read_meta_ertr::ready_future_marker{}, + std::nullopt); + } + } + }; + Store get_mdstore() { + return Store(*this); + } + } mdstore_state; + +protected: + std::unique_ptr<SeaStore> seastore; + FuturizedStore::Shard *sharded_seastore; + + SeaStoreTestState() : EphemeralTestState(1, 0) {} + + virtual seastar::future<> _init() final { + seastore = make_test_seastore( + std::make_unique<TestMDStoreState::Store>(mdstore_state.get_mdstore())); + return seastore->test_start(devices->get_primary_device_ref() + ).then([this] { + sharded_seastore = &(seastore->get_sharded_store()); + }); + } + + virtual seastar::future<> _destroy() final { + devices->set_primary_device_ref(seastore->get_primary_device_ref()); + return seastore->stop().then([this] { + seastore.reset(); + }); + } + + virtual seastar::future<> _teardown() final { + return seastore->umount(); + } + + virtual FuturizedStore::mount_ertr::future<> _mount() final { + return seastore->test_mount(); + } + + virtual FuturizedStore::mkfs_ertr::future<> _mkfs() final { + return seastore->test_mkfs(uuid_d{}); + } +}; |