diff options
Diffstat (limited to '')
45 files changed, 18741 insertions, 0 deletions
diff --git a/src/test/crimson/CMakeLists.txt b/src/test/crimson/CMakeLists.txt new file mode 100644 index 000000000..b1851cca2 --- /dev/null +++ b/src/test/crimson/CMakeLists.txt @@ -0,0 +1,105 @@ +# the crimson's backfill doesn't need nor use seastar +add_executable(unittest-crimson-backfill + test_backfill.cc + ${PROJECT_SOURCE_DIR}/src/auth/Crypto.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/backfill_state.cc + ${PROJECT_SOURCE_DIR}/src/osd/recovery_types.cc) +add_ceph_unittest(unittest-crimson-backfill + --memory 256M --smp 1) +target_link_libraries(unittest-crimson-backfill crimson GTest::Main) + +add_executable(unittest-seastar-buffer + test_buffer.cc) +add_ceph_unittest(unittest-seastar-buffer + --memory 256M --smp 1) +target_link_libraries(unittest-seastar-buffer crimson) + +add_executable(unittest-seastar-denc + test_denc.cc) +add_ceph_unittest(unittest-seastar-denc --memory 256M --smp 1) +target_link_libraries(unittest-seastar-denc crimson GTest::Main) + +add_executable(unittest-seastar-socket test_socket.cc) +add_ceph_unittest(unittest-seastar-socket + --memory 256M --smp 4) +target_link_libraries(unittest-seastar-socket crimson) + +add_executable(unittest-seastar-messenger test_messenger.cc) +add_ceph_unittest(unittest-seastar-messenger + --memory 256M --smp 4) +target_link_libraries(unittest-seastar-messenger crimson) + +add_executable(test-seastar-messenger-peer test_messenger_peer.cc) +target_link_libraries(test-seastar-messenger-peer ceph-common global ${ALLOC_LIBS}) + +add_executable(test-seastar-echo + test_alien_echo.cc) +target_link_libraries(test-seastar-echo crimson) + +add_executable(test-async-echo + test_async_echo.cc) +target_link_libraries(test-async-echo ceph-common global) + +add_executable(unittest-seastar-alienstore-thread-pool + test_alienstore_thread_pool.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/lsan_suppressions.cc) +add_ceph_unittest(unittest-seastar-alienstore-thread-pool + --memory 256M --smp 1) +target_link_libraries(unittest-seastar-alienstore-thread-pool + crimson-alienstore + crimson) + +add_executable(unittest-seastar-config + test_config.cc) +add_ceph_unittest(unittest-seastar-config + --memory 256M --smp 4) +target_link_libraries(unittest-seastar-config crimson) + +add_executable(unittest-seastar-monc + test_monc.cc) +target_link_libraries(unittest-seastar-monc crimson) + +add_executable(unittest-seastar-perfcounters + test_perfcounters.cc) +add_ceph_unittest(unittest-seastar-perfcounters + --memory 256M --smp 1) +target_link_libraries(unittest-seastar-perfcounters crimson) + +add_executable(unittest-seastar-lru + test_lru.cc) +add_ceph_unittest(unittest-seastar-lru + --memory 256M --smp 1) +target_link_libraries(unittest-seastar-lru crimson GTest::Main) + +add_executable(unittest-fixed-kv-node-layout + test_fixed_kv_node_layout.cc) +add_ceph_unittest(unittest-fixed-kv-node-layout) + +add_executable(unittest-interruptible-future + test_interruptible_future.cc + gtest_seastar.cc) +add_ceph_unittest(unittest-interruptible-future + --memory 256M --smp 1) +target_link_libraries( + unittest-interruptible-future + crimson-common) + +add_executable(unittest-seastar-messenger-thrash test_messenger_thrash.cc) +add_ceph_unittest(unittest-seastar-messenger-thrash + --memory 256M --smp 1) +target_link_libraries(unittest-seastar-messenger-thrash crimson) + +add_subdirectory(seastore) + +add_library(crimson-gtest STATIC + gtest_seastar.cc) +target_link_libraries(crimson-gtest crimson-common GTest::GTest) +add_library(crimson::gtest ALIAS crimson-gtest) + +add_executable(unittest-seastar-errorator + test_errorator.cc) +target_link_libraries( + unittest-seastar-errorator + crimson::gtest) +add_ceph_unittest(unittest-seastar-errorator + --memory 256M --smp 1) diff --git a/src/test/crimson/cbt/radosbench_4K_read.yaml b/src/test/crimson/cbt/radosbench_4K_read.yaml new file mode 100644 index 000000000..219ce643a --- /dev/null +++ b/src/test/crimson/cbt/radosbench_4K_read.yaml @@ -0,0 +1,36 @@ +meta: +- desc: | + Run radosbench benchmark using cbt. + 4K read workload. + +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 16 + concurrent_procs: 2 + op_size: [4096] + pool_profile: 'replicated' + read_time: 30 + read_only: true + readmode: 'rand' + prefill_time: 3 + acceptable: + bandwidth: '(or (greater) (near 0.05))' + iops_avg: '(or (greater) (near 0.05))' + iops_stddev: '(or (less) (near 2.00))' + latency_avg: '(or (less) (near 0.05))' + cpu_cycles_per_op: '(or (less) (near 0.05))' + monitoring_profiles: + perf: + nodes: + - osds + args: 'stat -p {pid} -o {perf_dir}/perf_stat.{pid}' + cluster: + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 128 + pgp_size: 128 + replication: 'replicated' diff --git a/src/test/crimson/cbt/radosbench_4K_write.yaml b/src/test/crimson/cbt/radosbench_4K_write.yaml new file mode 100644 index 000000000..526982b10 --- /dev/null +++ b/src/test/crimson/cbt/radosbench_4K_write.yaml @@ -0,0 +1,34 @@ +meta: +- desc: | + Run radosbench benchmark using cbt. + 4K write workload. + +tasks: +- cbt: + benchmarks: + radosbench: + concurrent_ops: 16 + concurrent_procs: 2 + op_size: [4096] + pool_profile: 'replicated' + write_time: 3 + write_only: true + acceptable: + bandwidth: '(or (greater) (near 0.05))' + iops_avg: '(or (greater) (near 0.05))' + iops_stddev: '(or (less) (near 2.00))' + latency_avg: '(or (less) (near 0.05))' + cpu_cycles_per_op: '(or (less) (near 0.05))' + monitoring_profiles: + perf: + nodes: + - osds + args: 'stat -p {pid} -o {perf_dir}/perf_stat.{pid}' + cluster: + osds_per_node: 3 + iterations: 1 + pool_profiles: + replicated: + pg_size: 128 + pgp_size: 128 + replication: 'replicated' diff --git a/src/test/crimson/cbt/t2c.py b/src/test/crimson/cbt/t2c.py new file mode 100755 index 000000000..0d4ee49e5 --- /dev/null +++ b/src/test/crimson/cbt/t2c.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +from __future__ import print_function +import argparse +import os +import os.path +import socket +import sys +import yaml + + +class Translator(object): + def __init__(self, build_dir): + self.build_dir = build_dir + + def translate(self, config): + cluster = config.get('cluster', {}) + benchmarks = config.get('benchmarks', []) + monitoring_profiles = config.get('monitoring_profiles', {}) + return dict(cluster=self._create_cluster_config(cluster), + benchmarks=benchmarks, + monitoring_profiles=monitoring_profiles) + + def _create_cluster_config(self, cluster): + # prepare the "cluster" section consumed by CBT + localhost = socket.getfqdn() + num_osds = cluster.get('osds_per_node', 3) + items_to_copy = ['iterations', 'pool_profiles'] + conf = dict((k, cluster[k]) for k in items_to_copy if k in cluster) + conf.update(dict( + head=localhost, + osds=[localhost], + osds_per_node=num_osds, + mons=[localhost], + clients=[localhost], + rebuild_every_test=False, + conf_file=os.path.join(self.build_dir, 'ceph.conf'), + ceph_cmd=os.path.join(self.build_dir, 'bin', 'ceph'), + rados_cmd=os.path.join(self.build_dir, 'bin', 'rados'), + pid_dir=os.path.join(self.build_dir, 'out') + )) + return conf + +def get_cbt_tasks(path): + with open(path) as input: + teuthology_config = yaml.load(input) + for task in teuthology_config['tasks']: + for name, conf in task.items(): + if name == 'cbt': + yield conf + +def main(): + parser = argparse.ArgumentParser(description='translate teuthology yaml to CBT yaml') + parser.add_argument('--build-dir', + default=os.getcwd(), + required=False, + help='Directory where CMakeCache.txt is located') + parser.add_argument('--input', + required=True, + help='The path to the input YAML file') + parser.add_argument('--output', + required=True, + help='The path to the output YAML file') + options = parser.parse_args(sys.argv[1:]) + cbt_tasks = [task for task in get_cbt_tasks(options.input)] + if not cbt_tasks: + print('cbt not found in "tasks" section', file=sys.stderr) + return sys.exit(1) + elif len(cbt_tasks) > 1: + print('more than one cbt task found in "tasks" section', file=sys.stderr) + return sys.exit(1) + translator = Translator(options.build_dir) + cbt_config = translator.translate(cbt_tasks[0]) + with open(options.output, 'w') as output: + yaml.dump(cbt_config, output) + +if __name__ == '__main__': + main() diff --git a/src/test/crimson/gtest_seastar.cc b/src/test/crimson/gtest_seastar.cc new file mode 100644 index 000000000..abb1f88f2 --- /dev/null +++ b/src/test/crimson/gtest_seastar.cc @@ -0,0 +1,65 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <cstdlib> +#include <iostream> + +#include "include/ceph_assert.h" +#include "gtest_seastar.h" + +#include "common/ceph_argparse.h" +#include "crimson/common/config_proxy.h" +#include "crimson/common/perf_counters_collection.h" + +SeastarRunner seastar_test_suite_t::seastar_env; + +int main(int argc, char **argv) +{ + // preprocess args + std::vector<const char*> args; + bool global_log_level_is_set = false; + const char* prefix_log_level = "--default-log-level"; + for (int i = 0; i < argc; ++i) { + if (std::strncmp(argv[i], prefix_log_level, + std::strlen(prefix_log_level)) == 0) { + global_log_level_is_set = true; + } + args.push_back(argv[i]); + } + // HACK: differentiate between the `make check` bot and human user + // for the sake of log flooding + if (!global_log_level_is_set && !std::getenv("FOR_MAKE_CHECK")) { + std::cout << "WARNING: set default seastar log level to debug" << std::endl; + ++argc; + args.push_back("--default-log-level=debug"); + } + + auto app_argv = const_cast<char**>(args.data()); + auto app_argc = static_cast<int>(args.size()); + ::testing::InitGoogleTest(&app_argc, app_argv); + + int ret = seastar_test_suite_t::seastar_env.init(app_argc, app_argv); + if (ret != 0) { + seastar_test_suite_t::seastar_env.stop(); + return ret; + } + + seastar_test_suite_t::seastar_env.run([] { + return crimson::common::sharded_conf().start( + EntityName{}, std::string_view{"ceph"} + ).then([] { + return crimson::common::sharded_perf_coll().start(); + }); + }); + + ret = RUN_ALL_TESTS(); + + seastar_test_suite_t::seastar_env.run([] { + return crimson::common::sharded_perf_coll().stop().then([] { + return crimson::common::sharded_conf().stop(); + }); + }); + + seastar_test_suite_t::seastar_env.stop(); + return ret; +} diff --git a/src/test/crimson/gtest_seastar.h b/src/test/crimson/gtest_seastar.h new file mode 100644 index 000000000..20709a3ee --- /dev/null +++ b/src/test/crimson/gtest_seastar.h @@ -0,0 +1,35 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "gtest/gtest.h" + +#include "seastar_runner.h" + +struct seastar_test_suite_t : public ::testing::Test { + static SeastarRunner seastar_env; + + template <typename Func> + void run(Func &&func) { + return seastar_env.run(std::forward<Func>(func)); + } + + template <typename Func> + void run_async(Func &&func) { + run( + [func=std::forward<Func>(func)]() mutable { + return seastar::async(std::forward<Func>(func)); + }); + } + + virtual seastar::future<> set_up_fut() { return seastar::now(); } + void SetUp() final { + return run([this] { return set_up_fut(); }); + } + + virtual seastar::future<> tear_down_fut() { return seastar::now(); } + void TearDown() final { + return run([this] { return tear_down_fut(); }); + } +}; diff --git a/src/test/crimson/seastar_runner.h b/src/test/crimson/seastar_runner.h new file mode 100644 index 000000000..58d3f8119 --- /dev/null +++ b/src/test/crimson/seastar_runner.h @@ -0,0 +1,102 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <stdio.h> +#include <signal.h> +#include <thread> + +#include <seastar/core/app-template.hh> +#include <seastar/core/future-util.hh> +#include <seastar/core/reactor.hh> +#include <seastar/core/alien.hh> +#include <seastar/core/thread.hh> + +struct SeastarRunner { + static constexpr eventfd_t APP_RUNNING = 1; + static constexpr eventfd_t APP_NOT_RUN = 2; + + seastar::app_template app; + seastar::file_desc begin_fd; + std::unique_ptr<seastar::readable_eventfd> on_end; + + std::thread thread; + + bool begin_signaled = false; + + SeastarRunner() : + begin_fd{seastar::file_desc::eventfd(0, 0)} {} + + ~SeastarRunner() {} + + bool is_running() const { + return !!on_end; + } + + int init(int argc, char **argv) + { + thread = std::thread([argc, argv, this] { reactor(argc, argv); }); + eventfd_t result; + if (int r = ::eventfd_read(begin_fd.get(), &result); r < 0) { + std::cerr << "unable to eventfd_read():" << errno << std::endl; + return r; + } + assert(begin_signaled == true); + if (result == APP_RUNNING) { + assert(is_running()); + return 0; + } else { + assert(result == APP_NOT_RUN); + assert(!is_running()); + return 1; + } + } + + void stop() + { + if (is_running()) { + run([this] { + on_end->write_side().signal(1); + return seastar::now(); + }); + } + thread.join(); + } + + void reactor(int argc, char **argv) + { + auto ret = app.run(argc, argv, [this] { + on_end.reset(new seastar::readable_eventfd); + return seastar::now().then([this] { + begin_signaled = true; + [[maybe_unused]] auto r = ::eventfd_write(begin_fd.get(), APP_RUNNING); + assert(r == 0); + return seastar::now(); + }).then([this] { + return on_end->wait().then([](size_t){}); + }).handle_exception([](auto ep) { + std::cerr << "Error: " << ep << std::endl; + }).finally([this] { + on_end.reset(); + }); + }); + if (ret != 0) { + std::cerr << "Seastar app returns " << ret << std::endl; + } + if (!begin_signaled) { + begin_signaled = true; + ::eventfd_write(begin_fd.get(), APP_NOT_RUN); + } + } + + template <typename Func> + void run(Func &&func) { + assert(is_running()); + auto fut = seastar::alien::submit_to(app.alien(), 0, + std::forward<Func>(func)); + fut.get(); + } +}; + + diff --git a/src/test/crimson/seastore/CMakeLists.txt b/src/test/crimson/seastore/CMakeLists.txt new file mode 100644 index 000000000..5c6c2771c --- /dev/null +++ b/src/test/crimson/seastore/CMakeLists.txt @@ -0,0 +1,128 @@ +add_executable(unittest-transaction-manager + test_block.cc + test_transaction_manager.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-transaction-manager + --memory 256M --smp 1) +target_link_libraries( + unittest-transaction-manager + ${CMAKE_DL_LIBS} + crimson-seastore) + +add_executable(unittest-btree-lba-manager + test_btree_lba_manager.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-btree-lba-manager + --memory 256M --smp 1) +target_link_libraries( + unittest-btree-lba-manager + ${CMAKE_DL_LIBS} + crimson-seastore) + +add_executable(unittest-seastore-journal + test_seastore_journal.cc) +add_ceph_test(unittest-seastore-journal + unittest-seastore-journal --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-journal + crimson::gtest + crimson-seastore) + +add_executable(unittest-seastore-cache + test_block.cc + test_seastore_cache.cc) +add_ceph_test(unittest-seastore-cache + unittest-seastore-cache --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-cache + crimson::gtest + crimson-seastore) + +add_executable(unittest-object-data-handler + test_object_data_handler.cc + ../gtest_seastar.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/lsan_suppressions.cc) +add_ceph_unittest(unittest-object-data-handler + --memory 256M --smp 1) +target_link_libraries( + unittest-object-data-handler + crimson::gtest + crimson-seastore + crimson-os + crimson-common) + +add_executable(unittest-collection-manager + test_collection_manager.cc + ../gtest_seastar.cc + ${PROJECT_SOURCE_DIR}/src/crimson/osd/lsan_suppressions.cc) +add_ceph_test(unittest-collection-manager + unittest-collection-manager --memory 256M --smp 1) +target_link_libraries( + unittest-collection-manager + crimson::gtest + crimson-seastore + crimson-os + crimson-common) + +add_executable(unittest-omap-manager + test_omap_manager.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-omap-manager + --memory 256M --smp 1) +target_link_libraries( + unittest-omap-manager + ${CMAKE_DL_LIBS} + crimson-seastore) + +add_executable(unittest-seastore + test_seastore.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest-seastore + --memory 256M --smp 1) +target_link_libraries( + unittest-seastore + ${CMAKE_DL_LIBS} + crimson-seastore + crimson-common) + +add_executable(unittest-seastore-randomblock-manager + test_randomblock_manager.cc) +add_ceph_test(unittest-seastore-randomblock-manager + unittest-seastore-randomblock-manager --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-randomblock-manager + crimson::gtest + ${CMAKE_DL_LIBS} + crimson-seastore) + +add_executable(unittest-seastore-nvmedevice + nvmedevice/test_nvmedevice.cc) +add_ceph_test(unittest-seastore-nvmedevice + unittest-seastore-nvmedevice --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-nvmedevice + crimson::gtest + crimson-seastore + aio) + +add_executable(unittest-seastore-cbjournal + test_cbjournal.cc) +add_ceph_test(unittest-seastore-cbjournal + unittest-seastore-cbjournal --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-cbjournal + crimson::gtest + crimson-seastore + aio) + +add_executable(unittest-seastore-extent-allocator + test_extent_allocator.cc) +add_ceph_test(unittest-seastore-extent-allocator + unittest-seastore-extent-allocator --memory 256M --smp 1) +target_link_libraries( + unittest-seastore-extent-allocator + crimson::gtest + crimson-seastore + aio) + +add_subdirectory(onode_tree) diff --git a/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc new file mode 100644 index 000000000..9c2f4c246 --- /dev/null +++ b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc @@ -0,0 +1,105 @@ +//-*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "include/buffer.h" +#include "crimson/os/seastore/random_block_manager/rbm_device.h" +#include "crimson/os/seastore/random_block_manager/nvme_block_device.h" +#include "test/crimson/gtest_seastar.h" +#include "include/stringify.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace random_block_device; +using namespace random_block_device::nvme; + +struct nvdev_test_t : seastar_test_suite_t { + std::unique_ptr<RBMDevice> device; + std::string dev_path; + + static const uint64_t DEV_SIZE = 1024 * 1024 * 1024; + + nvdev_test_t() : + device(nullptr), + dev_path("randomblock_manager.test_nvmedevice" + stringify(getpid())) { + int fd = ::open(dev_path.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644); + ceph_assert(fd >= 0); + ::ftruncate(fd, DEV_SIZE); + ::close(fd); + } + ~nvdev_test_t() { + ::unlink(dev_path.c_str()); + } +}; + +static const uint64_t BUF_SIZE = 1024; +static const uint64_t BLK_SIZE = 4096; + +struct nvdev_test_block_t { + uint8_t data[BUF_SIZE]; + + DENC(nvdev_test_block_t, v, p) { + DENC_START(1, 1, p); + for (uint64_t i = 0 ; i < BUF_SIZE; i++) + { + denc(v.data[i], p); + } + DENC_FINISH(p); + } +}; + +WRITE_CLASS_DENC_BOUNDED( + nvdev_test_block_t +) + +using crimson::common::local_conf; +TEST_F(nvdev_test_t, write_and_verify_test) +{ + run_async([this] { + device.reset(new random_block_device::nvme::NVMeBlockDevice(dev_path)); + local_conf().set_val("seastore_cbjournal_size", "1048576").get(); + device->start().get(); + device->mkfs( + device_config_t{ + true, + device_spec_t{ + (magic_t)std::rand(), + device_type_t::RANDOM_BLOCK_SSD, + static_cast<device_id_t>(DEVICE_ID_RANDOM_BLOCK_MIN)}, + seastore_meta_t{uuid_d()}, + secondary_device_set_t()} + ).unsafe_get(); + device->mount().unsafe_get(); + nvdev_test_block_t original_data; + std::minstd_rand0 generator; + uint8_t value = generator(); + memset(original_data.data, value, BUF_SIZE); + uint64_t bl_length = 0; + Device& d = device->get_sharded_device(); + { + bufferlist bl; + encode(original_data, bl); + bl_length = bl.length(); + auto write_buf = ceph::bufferptr(buffer::create_page_aligned(BLK_SIZE)); + bl.begin().copy(bl_length, write_buf.c_str()); + ((RBMDevice*)&d)->write(0, std::move(write_buf)).unsafe_get(); + } + + nvdev_test_block_t read_data; + { + auto read_buf = ceph::bufferptr(buffer::create_page_aligned(BLK_SIZE)); + ((RBMDevice*)&d)->read(0, read_buf).unsafe_get(); + bufferlist bl; + bl.push_back(read_buf); + auto bliter = bl.cbegin(); + decode(read_data, bliter); + } + + int ret = memcmp(original_data.data, read_data.data, BUF_SIZE); + ((RBMDevice*)&d)->close().unsafe_get(); + device->stop().get(); + ASSERT_TRUE(ret == 0); + device.reset(nullptr); + }); +} + diff --git a/src/test/crimson/seastore/onode_tree/CMakeLists.txt b/src/test/crimson/seastore/onode_tree/CMakeLists.txt new file mode 100644 index 000000000..bea208601 --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/CMakeLists.txt @@ -0,0 +1,15 @@ +add_executable(unittest-staged-fltree + test_staged_fltree.cc + ../../gtest_seastar.cc) +add_ceph_unittest(unittest-staged-fltree + --memory 256M --smp 1) +target_link_libraries(unittest-staged-fltree + crimson-seastore) + +add_executable(unittest-fltree-onode-manager + test_fltree_onode_manager.cc + ../../gtest_seastar.cc) +add_ceph_unittest(unittest-fltree-onode-manager + --memory 256M --smp 1) +target_link_libraries(unittest-fltree-onode-manager + crimson-seastore) diff --git a/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc b/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc new file mode 100644 index 000000000..1f661cdca --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc @@ -0,0 +1,330 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#include <boost/range/combine.hpp> + +#include "test/crimson/gtest_seastar.h" + +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace crimson::os::seastore::onode; +using CTransaction = ceph::os::Transaction; +using namespace std; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct onode_item_t { + uint32_t size; + uint64_t id; + uint64_t block_size; + uint32_t cnt_modify = 0; + + void initialize(Transaction& t, Onode& value) const { + auto& layout = value.get_mutable_layout(t); + layout.size = size; + layout.omap_root.update(omap_root_t(id, cnt_modify, + value.get_metadata_hint(block_size))); + validate(value); + } + + void validate(Onode& value) const { + auto& layout = value.get_layout(); + ceph_assert(laddr_t(layout.size) == laddr_t{size}); + ceph_assert(layout.omap_root.get(value.get_metadata_hint(block_size)).addr == id); + ceph_assert(layout.omap_root.get(value.get_metadata_hint(block_size)).depth == cnt_modify); + } + + void modify(Transaction& t, Onode& value) { + validate(value); + ++cnt_modify; + initialize(t, value); + } + + static onode_item_t create(std::size_t size, std::size_t id, uint64_t block_size) { + ceph_assert(size <= std::numeric_limits<uint32_t>::max()); + return {(uint32_t)size, id, block_size}; + } +}; + +struct fltree_onode_manager_test_t + : public seastar_test_suite_t, TMTestState { + using iterator_t = typename KVPool<onode_item_t>::iterator_t; + + FLTreeOnodeManagerRef manager; + + seastar::future<> set_up_fut() final { + return tm_setup(); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown(); + } + + virtual seastar::future<> _init() final { + return TMTestState::_init().then([this] { + manager.reset(new FLTreeOnodeManager(*tm)); + }); + } + + virtual seastar::future<> _destroy() final { + manager.reset(); + return TMTestState::_destroy(); + } + + virtual FuturizedStore::mkfs_ertr::future<> _mkfs() final { + return TMTestState::_mkfs( + ).safe_then([this] { + return restart_fut(); + }).safe_then([this] { + return repeat_eagain([this] { + return seastar::do_with( + create_mutate_transaction(), + [this](auto &ref_t) + { + return with_trans_intr(*ref_t, [&](auto &t) { + return manager->mkfs(t + ).si_then([this, &t] { + return submit_transaction_fut2(t); + }); + }); + }); + }); + }).handle_error( + crimson::ct_error::assert_all{"Invalid error in _mkfs"} + ); + } + + template <typename F> + void with_transaction(F&& f) { + auto t = create_mutate_transaction(); + std::invoke(f, *t); + submit_transaction(std::move(t)); + } + + template <typename F> + void with_onode_write(iterator_t& it, F&& f) { + with_transaction([this, &it, f=std::move(f)] (auto& t) { + auto p_kv = *it; + auto onode = with_trans_intr(t, [&](auto &t) { + return manager->get_or_create_onode(t, p_kv->key); + }).unsafe_get0(); + std::invoke(f, t, *onode, p_kv->value); + with_trans_intr(t, [&](auto &t) { + if (onode->is_alive()) { + return manager->write_dirty(t, {onode}); + } else { + return OnodeManager::write_dirty_iertr::now(); + } + }).unsafe_get0(); + }); + } + + void validate_onode(iterator_t& it) { + with_transaction([this, &it] (auto& t) { + auto p_kv = *it; + auto onode = with_trans_intr(t, [&](auto &t) { + return manager->get_onode(t, p_kv->key); + }).unsafe_get0(); + p_kv->value.validate(*onode); + }); + } + + void validate_erased(iterator_t& it) { + with_transaction([this, &it] (auto& t) { + auto p_kv = *it; + auto exist = with_trans_intr(t, [&](auto &t) { + return manager->contains_onode(t, p_kv->key); + }).unsafe_get0(); + ceph_assert(exist == false); + }); + } + + template <typename F> + void with_onodes_process( + const iterator_t& start, const iterator_t& end, F&& f) { + std::vector<ghobject_t> oids; + std::vector<onode_item_t*> items; + auto it = start; + while(it != end) { + auto p_kv = *it; + oids.emplace_back(p_kv->key); + items.emplace_back(&p_kv->value); + ++it; + } + with_transaction([&oids, &items, f=std::move(f)] (auto& t) mutable { + std::invoke(f, t, oids, items); + }); + } + + template <typename F> + void with_onodes_write( + const iterator_t& start, const iterator_t& end, F&& f) { + with_onodes_process(start, end, + [this, f=std::move(f)] (auto& t, auto& oids, auto& items) { + auto onodes = with_trans_intr(t, [&](auto &t) { + return manager->get_or_create_onodes(t, oids); + }).unsafe_get0(); + for (auto tup : boost::combine(onodes, items)) { + OnodeRef onode; + onode_item_t* p_item; + boost::tie(onode, p_item) = tup; + std::invoke(f, t, *onode, *p_item); + } + with_trans_intr(t, [&](auto &t) { + return manager->write_dirty(t, onodes); + }).unsafe_get0(); + }); + } + + void validate_onodes( + const iterator_t& start, const iterator_t& end) { + with_onodes_process(start, end, + [this] (auto& t, auto& oids, auto& items) { + for (auto tup : boost::combine(oids, items)) { + ghobject_t oid; + onode_item_t* p_item; + boost::tie(oid, p_item) = tup; + auto onode = with_trans_intr(t, [&](auto &t) { + return manager->get_onode(t, oid); + }).unsafe_get0(); + p_item->validate(*onode); + } + }); + } + + void validate_erased( + const iterator_t& start, const iterator_t& end) { + with_onodes_process(start, end, + [this] (auto& t, auto& oids, auto& items) { + for (auto& oid : oids) { + auto exist = with_trans_intr(t, [&](auto &t) { + return manager->contains_onode(t, oid); + }).unsafe_get0(); + ceph_assert(exist == false); + } + }); + } + + static constexpr uint64_t LIST_LIMIT = 10; + void validate_list_onodes(KVPool<onode_item_t>& pool) { + with_onodes_process(pool.begin(), pool.end(), + [this] (auto& t, auto& oids, auto& items) { + std::vector<ghobject_t> listed_oids; + auto start = ghobject_t(); + auto end = ghobject_t::get_max(); + assert(start < end); + assert(start < oids[0]); + assert(oids[0] < end); + while (start != end) { + auto [list_ret, list_end] = with_trans_intr(t, [&](auto &t) { + return manager->list_onodes(t, start, end, LIST_LIMIT); + }).unsafe_get0(); + listed_oids.insert(listed_oids.end(), list_ret.begin(), list_ret.end()); + start = list_end; + } + ceph_assert(oids.size() == listed_oids.size()); + }); + } + + fltree_onode_manager_test_t() {} +}; + +TEST_P(fltree_onode_manager_test_t, 1_single) +{ + run_async([this] { + uint64_t block_size = tm->get_block_size(); + auto pool = KVPool<onode_item_t>::create_range({0, 1}, {128, 256}, block_size); + auto iter = pool.begin(); + with_onode_write(iter, [](auto& t, auto& onode, auto& item) { + item.initialize(t, onode); + }); + validate_onode(iter); + + with_onode_write(iter, [](auto& t, auto& onode, auto& item) { + item.modify(t, onode); + }); + validate_onode(iter); + + validate_list_onodes(pool); + + with_onode_write(iter, [this](auto& t, auto& onode, auto& item) { + OnodeRef onode_ref = &onode; + with_trans_intr(t, [&](auto &t) { + return manager->erase_onode(t, onode_ref); + }).unsafe_get0(); + }); + validate_erased(iter); + }); +} + +TEST_P(fltree_onode_manager_test_t, 2_synthetic) +{ + run_async([this] { + uint64_t block_size = tm->get_block_size(); + auto pool = KVPool<onode_item_t>::create_range( + {0, 100}, {32, 64, 128, 256, 512}, block_size); + auto start = pool.begin(); + auto end = pool.end(); + with_onodes_write(start, end, + [](auto& t, auto& onode, auto& item) { + item.initialize(t, onode); + }); + validate_onodes(start, end); + + validate_list_onodes(pool); + + auto rd_start = pool.random_begin(); + auto rd_end = rd_start + 50; + with_onodes_write(rd_start, rd_end, + [](auto& t, auto& onode, auto& item) { + item.modify(t, onode); + }); + validate_onodes(start, end); + + pool.shuffle(); + rd_start = pool.random_begin(); + rd_end = rd_start + 50; + with_onodes_write(rd_start, rd_end, + [](auto& t, auto& onode, auto& item) { + item.modify(t, onode); + }); + validate_onodes(start, end); + + pool.shuffle(); + rd_start = pool.random_begin(); + rd_end = rd_start + 50; + with_onodes_write(rd_start, rd_end, + [this](auto& t, auto& onode, auto& item) { + OnodeRef onode_ref = &onode; + with_trans_intr(t, [&](auto &t) { + return manager->erase_onode(t, onode_ref); + }).unsafe_get0(); + }); + validate_erased(rd_start, rd_end); + pool.erase_from_random(rd_start, rd_end); + start = pool.begin(); + end = pool.end(); + validate_onodes(start, end); + + validate_list_onodes(pool); + }); +} + +INSTANTIATE_TEST_SUITE_P( + fltree_onode__manager_test, + fltree_onode_manager_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc new file mode 100644 index 000000000..7357b5ced --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc @@ -0,0 +1,1792 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#include <array> +#include <cstring> +#include <memory> +#include <set> +#include <sstream> +#include <vector> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_layout.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h" + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" +#include "test_value.h" + +using namespace crimson::os::seastore::onode; + +#define INTR(fun, t) \ + with_trans_intr( \ + t, \ + [&] (auto &tr) { \ + return fun(tr); \ + } \ + ) + +#define INTR_R(fun, t, args...) \ + with_trans_intr( \ + t, \ + [&] (auto &tr) { \ + return fun(tr, args); \ + } \ + ) + +#define INTR_WITH_PARAM(fun, c, b, v) \ + with_trans_intr( \ + c.t, \ + [=] (auto &t) { \ + return fun(c, L_ADDR_MIN, b, v); \ + } \ + ) + +namespace { + constexpr bool IS_DUMMY_SYNC = false; + using DummyManager = DummyNodeExtentManager<IS_DUMMY_SYNC>; + + using UnboundedBtree = Btree<UnboundedValue>; + + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } + + ghobject_t make_ghobj( + shard_t shard, pool_t pool, crush_hash_t crush, + std::string ns, std::string oid, snap_t snap, gen_t gen) { + return ghobject_t{shard_id_t{shard}, pool, crush, ns, oid, snap, gen}; + } + + // return a key_view_t and its underlying memory buffer. + // the buffer needs to be freed manually. + std::pair<key_view_t, void*> build_key_view(const ghobject_t& hobj) { + key_hobj_t key_hobj(hobj); + size_t key_size = sizeof(shard_pool_crush_t) + sizeof(snap_gen_t) + + ns_oid_view_t::estimate_size(key_hobj); + void* p_mem = std::malloc(key_size); + + key_view_t key_view; + char* p_fill = (char*)p_mem + key_size; + + auto spc = shard_pool_crush_t::from_key(key_hobj); + p_fill -= sizeof(shard_pool_crush_t); + std::memcpy(p_fill, &spc, sizeof(shard_pool_crush_t)); + key_view.set(*reinterpret_cast<const shard_pool_crush_t*>(p_fill)); + + auto p_ns_oid = p_fill; + ns_oid_view_t::test_append(key_hobj, p_fill); + ns_oid_view_t ns_oid_view(p_ns_oid); + key_view.set(ns_oid_view); + + auto sg = snap_gen_t::from_key(key_hobj); + p_fill -= sizeof(snap_gen_t); + ceph_assert(p_fill == (char*)p_mem); + std::memcpy(p_fill, &sg, sizeof(snap_gen_t)); + key_view.set(*reinterpret_cast<const snap_gen_t*>(p_fill)); + + return {key_view, p_mem}; + } +} + +struct a_basic_test_t : public seastar_test_suite_t {}; + +TEST_F(a_basic_test_t, 1_basic_sizes) +{ + logger().info("\n" + "Bytes of struct:\n" + " node_header_t: {}\n" + " shard_pool_t: {}\n" + " shard_pool_crush_t: {}\n" + " crush_t: {}\n" + " snap_gen_t: {}\n" + " slot_0_t: {}\n" + " slot_1_t: {}\n" + " slot_3_t: {}\n" + " node_fields_0_t: {}\n" + " node_fields_1_t: {}\n" + " node_fields_2_t: {}\n" + " internal_fields_3_t: {}\n" + " leaf_fields_3_t: {}\n" + " internal_sub_item_t: {}", + sizeof(node_header_t), sizeof(shard_pool_t), + sizeof(shard_pool_crush_t), sizeof(crush_t), sizeof(snap_gen_t), + sizeof(slot_0_t), sizeof(slot_1_t), sizeof(slot_3_t), + sizeof(node_fields_0_t), sizeof(node_fields_1_t), sizeof(node_fields_2_t), + sizeof(internal_fields_3_t), sizeof(leaf_fields_3_t), sizeof(internal_sub_item_t) + ); + + auto hobj = make_ghobj(0, 0, 0, "n", "o", 0, 0); + key_hobj_t key(hobj); + auto [key_view, p_mem] = build_key_view(hobj); + value_config_t value; + value.payload_size = 8; +#define _STAGE_T(NodeType) node_to_stage_t<typename NodeType::node_stage_t> +#define NXT_T(StageType) staged<typename StageType::next_param_t> + laddr_t i_value{0}; + logger().info("\n" + "Bytes of a key-value insertion (full-string):\n" + " s-p-c, 'n'-'o', s-g => value_payload(8): typically internal 43B, leaf 59B\n" + " InternalNode0: {} {} {}\n" + " InternalNode1: {} {} {}\n" + " InternalNode2: {} {}\n" + " InternalNode3: {}\n" + " LeafNode0: {} {} {}\n" + " LeafNode1: {} {} {}\n" + " LeafNode2: {} {}\n" + " LeafNode3: {}", + _STAGE_T(InternalNode0)::insert_size(key_view, i_value), + NXT_T(_STAGE_T(InternalNode0))::insert_size(key_view, i_value), + NXT_T(NXT_T(_STAGE_T(InternalNode0)))::insert_size(key_view, i_value), + _STAGE_T(InternalNode1)::insert_size(key_view, i_value), + NXT_T(_STAGE_T(InternalNode1))::insert_size(key_view, i_value), + NXT_T(NXT_T(_STAGE_T(InternalNode1)))::insert_size(key_view, i_value), + _STAGE_T(InternalNode2)::insert_size(key_view, i_value), + NXT_T(_STAGE_T(InternalNode2))::insert_size(key_view, i_value), + _STAGE_T(InternalNode3)::insert_size(key_view, i_value), + _STAGE_T(LeafNode0)::insert_size(key, value), + NXT_T(_STAGE_T(LeafNode0))::insert_size(key, value), + NXT_T(NXT_T(_STAGE_T(LeafNode0)))::insert_size(key, value), + _STAGE_T(LeafNode1)::insert_size(key, value), + NXT_T(_STAGE_T(LeafNode1))::insert_size(key, value), + NXT_T(NXT_T(_STAGE_T(LeafNode1)))::insert_size(key, value), + _STAGE_T(LeafNode2)::insert_size(key, value), + NXT_T(_STAGE_T(LeafNode2))::insert_size(key, value), + _STAGE_T(LeafNode3)::insert_size(key, value) + ); + std::free(p_mem); +} + +TEST_F(a_basic_test_t, 2_node_sizes) +{ + run_async([] { + auto nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + auto t = make_test_transaction(); + ValueBuilderImpl<UnboundedValue> vb; + context_t c{*nm, vb, *t}; + std::array<std::pair<NodeImplURef, NodeExtentMutable>, 16> nodes = { + INTR_WITH_PARAM(InternalNode0::allocate, c, false, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode1::allocate, c, false, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode2::allocate, c, false, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode3::allocate, c, false, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode0::allocate, c, true, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode1::allocate, c, true, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode2::allocate, c, true, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(InternalNode3::allocate, c, true, 1u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode0::allocate, c, false, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode1::allocate, c, false, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode2::allocate, c, false, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode3::allocate, c, false, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode0::allocate, c, true, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode1::allocate, c, true, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode2::allocate, c, true, 0u).unsafe_get0().make_pair(), + INTR_WITH_PARAM(LeafNode3::allocate, c, true, 0u).unsafe_get0().make_pair() + }; + std::ostringstream oss; + oss << "\nallocated nodes:"; + for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) { + oss << "\n "; + auto& ref_node = iter->first; + ref_node->dump_brief(oss); + } + logger().info("{}", oss.str()); + }); +} + +struct b_dummy_tree_test_t : public seastar_test_suite_t { + TransactionRef ref_t; + std::unique_ptr<UnboundedBtree> tree; + + b_dummy_tree_test_t() = default; + + seastar::future<> set_up_fut() override final { + ref_t = make_test_transaction(); + tree.reset( + new UnboundedBtree(NodeExtentManager::create_dummy(IS_DUMMY_SYNC)) + ); + return INTR(tree->mkfs, *ref_t).handle_error( + crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Unable to mkfs"); + }) + ); + } + + seastar::future<> tear_down_fut() final { + ref_t.reset(); + tree.reset(); + return seastar::now(); + } +}; + +TEST_F(b_dummy_tree_test_t, 3_random_insert_erase_leaf_node) +{ + run_async([this] { + logger().info("\n---------------------------------------------" + "\nrandomized leaf node insert:\n"); + auto key_s = ghobject_t(); + auto key_e = ghobject_t::get_max(); + ASSERT_TRUE(INTR_R(tree->find, *ref_t, key_s).unsafe_get0().is_end()); + ASSERT_TRUE(INTR(tree->begin, *ref_t).unsafe_get0().is_end()); + ASSERT_TRUE(INTR(tree->last, *ref_t).unsafe_get0().is_end()); + + std::map<ghobject_t, + std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history; + + auto f_validate_insert_new = [this, &insert_history] ( + const ghobject_t& key, const test_item_t& value) { + auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()}; + auto [cursor, success] = INTR_R(tree->insert, + *ref_t, key, conf).unsafe_get0(); + initialize_cursor_from_item(*ref_t, key, value, cursor, success); + insert_history.emplace(key, std::make_tuple(value, cursor)); + auto cursor_ = INTR_R(tree->find, *ref_t, key).unsafe_get0(); + ceph_assert(cursor_ != tree->end()); + ceph_assert(cursor_.value() == cursor.value()); + validate_cursor_from_item(key, value, cursor_); + return cursor.value(); + }; + + auto f_validate_erase = [this, &insert_history] (const ghobject_t& key) { + auto cursor_erase = INTR_R(tree->find, *ref_t, key).unsafe_get0(); + auto cursor_next = INTR(cursor_erase.get_next, *ref_t).unsafe_get0(); + auto cursor_ret = INTR_R(tree->erase, *ref_t, cursor_erase).unsafe_get0(); + ceph_assert(cursor_erase.is_end()); + ceph_assert(cursor_ret == cursor_next); + auto cursor_lb = INTR_R(tree->lower_bound, *ref_t, key).unsafe_get0(); + ceph_assert(cursor_lb == cursor_next); + auto it = insert_history.find(key); + ceph_assert(std::get<1>(it->second).is_end()); + insert_history.erase(it); + }; + + auto f_insert_erase_insert = [&f_validate_insert_new, &f_validate_erase] ( + const ghobject_t& key, const test_item_t& value) { + f_validate_insert_new(key, value); + f_validate_erase(key); + return f_validate_insert_new(key, value); + }; + + auto values = Values<test_item_t>(15); + + // insert key1, value1 at STAGE_LEFT + auto key1 = make_ghobj(3, 3, 3, "ns3", "oid3", 3, 3); + auto value1 = values.pick(); + auto test_value1 = f_insert_erase_insert(key1, value1); + + // validate lookup + { + auto cursor1_s = INTR_R(tree->lower_bound, *ref_t, key_s).unsafe_get0(); + ASSERT_EQ(cursor1_s.get_ghobj(), key1); + ASSERT_EQ(cursor1_s.value(), test_value1); + auto cursor1_e = INTR_R(tree->lower_bound, *ref_t, key_e).unsafe_get0(); + ASSERT_TRUE(cursor1_e.is_end()); + } + + // insert the same key1 with a different value + { + auto value1_dup = values.pick(); + auto conf = UnboundedBtree::tree_value_config_t{value1_dup.get_payload_size()}; + auto [cursor1_dup, ret1_dup] = INTR_R(tree->insert, + *ref_t, key1, conf).unsafe_get0(); + ASSERT_FALSE(ret1_dup); + validate_cursor_from_item(key1, value1, cursor1_dup); + } + + // insert key2, value2 to key1's left at STAGE_LEFT + // insert node front at STAGE_LEFT + auto key2 = make_ghobj(2, 2, 2, "ns3", "oid3", 3, 3); + auto value2 = values.pick(); + f_insert_erase_insert(key2, value2); + + // insert key3, value3 to key1's right at STAGE_LEFT + // insert node last at STAGE_LEFT + auto key3 = make_ghobj(4, 4, 4, "ns3", "oid3", 3, 3); + auto value3 = values.pick(); + f_insert_erase_insert(key3, value3); + + // insert key4, value4 to key1's left at STAGE_STRING (collision) + auto key4 = make_ghobj(3, 3, 3, "ns2", "oid2", 3, 3); + auto value4 = values.pick(); + f_insert_erase_insert(key4, value4); + + // insert key5, value5 to key1's right at STAGE_STRING (collision) + auto key5 = make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3); + auto value5 = values.pick(); + f_insert_erase_insert(key5, value5); + + // insert key6, value6 to key1's left at STAGE_RIGHT + auto key6 = make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2); + auto value6 = values.pick(); + f_insert_erase_insert(key6, value6); + + // insert key7, value7 to key1's right at STAGE_RIGHT + auto key7 = make_ghobj(3, 3, 3, "ns3", "oid3", 4, 4); + auto value7 = values.pick(); + f_insert_erase_insert(key7, value7); + + // insert node front at STAGE_RIGHT + auto key8 = make_ghobj(2, 2, 2, "ns3", "oid3", 2, 2); + auto value8 = values.pick(); + f_insert_erase_insert(key8, value8); + + // insert node front at STAGE_STRING (collision) + auto key9 = make_ghobj(2, 2, 2, "ns2", "oid2", 3, 3); + auto value9 = values.pick(); + f_insert_erase_insert(key9, value9); + + // insert node last at STAGE_RIGHT + auto key10 = make_ghobj(4, 4, 4, "ns3", "oid3", 4, 4); + auto value10 = values.pick(); + f_insert_erase_insert(key10, value10); + + // insert node last at STAGE_STRING (collision) + auto key11 = make_ghobj(4, 4, 4, "ns4", "oid4", 3, 3); + auto value11 = values.pick(); + f_insert_erase_insert(key11, value11); + + // insert key, value randomly until a perfect 3-ary tree is formed + std::vector<std::pair<ghobject_t, test_item_t>> kvs{ + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2), values.pick()}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 4, 4), values.pick()}, + {make_ghobj(2, 2, 2, "ns3", "oid3", 4, 4), values.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 2, 2), values.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 3, 3), values.pick()}, + {make_ghobj(2, 2, 2, "ns4", "oid4", 4, 4), values.pick()}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2), values.pick()}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 4, 4), values.pick()}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2), values.pick()}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 4, 4), values.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2), values.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 3, 3), values.pick()}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 4, 4), values.pick()}, + {make_ghobj(4, 4, 4, "ns3", "oid3", 2, 2), values.pick()}, + {make_ghobj(4, 4, 4, "ns4", "oid4", 2, 2), values.pick()}, + {make_ghobj(4, 4, 4, "ns4", "oid4", 4, 4), values.pick()}}; + auto [smallest_key, smallest_value] = kvs[0]; + auto [largest_key, largest_value] = kvs[kvs.size() - 1]; + std::shuffle(kvs.begin(), kvs.end(), std::default_random_engine{}); + std::for_each(kvs.begin(), kvs.end(), [&f_insert_erase_insert] (auto& kv) { + f_insert_erase_insert(kv.first, kv.second); + }); + ASSERT_EQ(INTR(tree->height, *ref_t).unsafe_get0(), 1); + ASSERT_FALSE(tree->test_is_clean()); + + for (auto& [k, val] : insert_history) { + auto& [v, c] = val; + // validate values in tree keep intact + auto cursor = with_trans_intr(*ref_t, [this, &k=k](auto& tr) { + return tree->find(tr, k); + }).unsafe_get0(); + EXPECT_NE(cursor, tree->end()); + validate_cursor_from_item(k, v, cursor); + // validate values in cursors keep intact + validate_cursor_from_item(k, v, c); + } + { + auto cursor = INTR_R(tree->lower_bound, *ref_t, key_s).unsafe_get0(); + validate_cursor_from_item(smallest_key, smallest_value, cursor); + } + { + auto cursor = INTR(tree->begin, *ref_t).unsafe_get0(); + validate_cursor_from_item(smallest_key, smallest_value, cursor); + } + { + auto cursor = INTR(tree->last, *ref_t).unsafe_get0(); + validate_cursor_from_item(largest_key, largest_value, cursor); + } + + // validate range query + { + kvs.clear(); + for (auto& [k, val] : insert_history) { + auto& [v, c] = val; + kvs.emplace_back(k, v); + } + insert_history.clear(); + std::sort(kvs.begin(), kvs.end(), [](auto& l, auto& r) { + return l.first < r.first; + }); + auto cursor = INTR(tree->begin, *ref_t).unsafe_get0(); + for (auto& [k, v] : kvs) { + ASSERT_FALSE(cursor.is_end()); + validate_cursor_from_item(k, v, cursor); + cursor = INTR(cursor.get_next, *ref_t).unsafe_get0(); + } + ASSERT_TRUE(cursor.is_end()); + } + + std::ostringstream oss; + tree->dump(*ref_t, oss); + logger().info("\n{}\n", oss.str()); + + // randomized erase until empty + std::shuffle(kvs.begin(), kvs.end(), std::default_random_engine{}); + for (auto& [k, v] : kvs) { + auto e_size = with_trans_intr(*ref_t, [this, &k=k](auto& tr) { + return tree->erase(tr, k); + }).unsafe_get0(); + ASSERT_EQ(e_size, 1); + } + auto cursor = INTR(tree->begin, *ref_t).unsafe_get0(); + ASSERT_TRUE(cursor.is_end()); + ASSERT_EQ(INTR(tree->height, *ref_t).unsafe_get0(), 1); + }); +} + +static std::set<ghobject_t> build_key_set( + std::pair<unsigned, unsigned> range_2, + std::pair<unsigned, unsigned> range_1, + std::pair<unsigned, unsigned> range_0, + std::string padding = "", + bool is_internal = false) { + ceph_assert(range_1.second <= 10); + std::set<ghobject_t> ret; + ghobject_t key; + for (unsigned i = range_2.first; i < range_2.second; ++i) { + for (unsigned j = range_1.first; j < range_1.second; ++j) { + for (unsigned k = range_0.first; k < range_0.second; ++k) { + std::ostringstream os_ns; + os_ns << "ns" << j; + std::ostringstream os_oid; + os_oid << "oid" << j << padding; + key = make_ghobj(i, i, i, os_ns.str(), os_oid.str(), k, k); + ret.insert(key); + } + } + } + if (is_internal) { + ret.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + } + return ret; +} + +class TestTree { + public: + TestTree() + : moved_nm{NodeExtentManager::create_dummy(IS_DUMMY_SYNC)}, + ref_t{make_test_transaction()}, + t{*ref_t}, + c{*moved_nm, vb, t}, + tree{std::move(moved_nm)}, + values{0} {} + + seastar::future<> build_tree( + std::pair<unsigned, unsigned> range_2, + std::pair<unsigned, unsigned> range_1, + std::pair<unsigned, unsigned> range_0, + size_t value_size) { + return seastar::async([this, range_2, range_1, range_0, value_size] { + INTR(tree.mkfs, t).unsafe_get0(); + //logger().info("\n---------------------------------------------" + // "\nbefore leaf node split:\n"); + auto keys = build_key_set(range_2, range_1, range_0); + for (auto& key : keys) { + auto value = values.create(value_size); + insert_tree(key, value).get0(); + } + ASSERT_EQ(INTR(tree.height, t).unsafe_get0(), 1); + ASSERT_FALSE(tree.test_is_clean()); + //std::ostringstream oss; + //tree.dump(t, oss); + //logger().info("\n{}\n", oss.str()); + }); + } + + seastar::future<> build_tree( + const std::vector<ghobject_t>& keys, const std::vector<test_item_t>& values) { + return seastar::async([this, keys, values] { + INTR(tree.mkfs, t).unsafe_get0(); + //logger().info("\n---------------------------------------------" + // "\nbefore leaf node split:\n"); + ASSERT_EQ(keys.size(), values.size()); + auto key_iter = keys.begin(); + auto value_iter = values.begin(); + while (key_iter != keys.end()) { + insert_tree(*key_iter, *value_iter).get0(); + ++key_iter; + ++value_iter; + } + ASSERT_EQ(INTR(tree.height, t).unsafe_get0(), 1); + ASSERT_FALSE(tree.test_is_clean()); + //std::ostringstream oss; + //tree.dump(t, oss); + //logger().info("\n{}\n", oss.str()); + }); + } + + seastar::future<> split_merge( + const ghobject_t& key, + const test_item_t& value, + const split_expectation_t& expected, + std::optional<ghobject_t> next_key) { + return seastar::async([this, key, value, expected, next_key] { + // clone + auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + auto p_dummy = static_cast<DummyManager*>(ref_dummy.get()); + UnboundedBtree tree_clone(std::move(ref_dummy)); + auto ref_t_clone = make_test_transaction(); + Transaction& t_clone = *ref_t_clone; + INTR_R(tree_clone.test_clone_from, t_clone, t, tree).unsafe_get0(); + + // insert and split + logger().info("\n\nINSERT-SPLIT {}:", key_hobj_t(key)); + auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()}; + auto [cursor, success] = INTR_R(tree_clone.insert, + t_clone, key, conf).unsafe_get0(); + initialize_cursor_from_item(t, key, value, cursor, success); + + { + std::ostringstream oss; + tree_clone.dump(t_clone, oss); + logger().info("dump new root:\n{}", oss.str()); + } + EXPECT_EQ(INTR(tree_clone.height, t_clone).unsafe_get0(), 2); + + for (auto& [k, val] : insert_history) { + auto& [v, c] = val; + auto result = with_trans_intr(t_clone, [&tree_clone, &k=k] (auto& tr) { + return tree_clone.find(tr, k); + }).unsafe_get0(); + EXPECT_NE(result, tree_clone.end()); + validate_cursor_from_item(k, v, result); + } + auto result = INTR_R(tree_clone.find, t_clone, key).unsafe_get0(); + EXPECT_NE(result, tree_clone.end()); + validate_cursor_from_item(key, value, result); + EXPECT_TRUE(last_split.match(expected)); + EXPECT_EQ(p_dummy->size(), 3); + + // erase and merge + logger().info("\n\nERASE-MERGE {}:", key_hobj_t(key)); + auto nxt_cursor = with_trans_intr(t_clone, [&cursor=cursor](auto& tr) { + return cursor.erase<true>(tr); + }).unsafe_get0(); + + { + // track root again to dump + auto begin = INTR(tree_clone.begin, t_clone).unsafe_get0(); + std::ignore = begin; + std::ostringstream oss; + tree_clone.dump(t_clone, oss); + logger().info("dump root:\n{}", oss.str()); + } + + if (next_key.has_value()) { + auto found = insert_history.find(*next_key); + ceph_assert(found != insert_history.end()); + validate_cursor_from_item( + *next_key, std::get<0>(found->second), nxt_cursor); + } else { + EXPECT_TRUE(nxt_cursor.is_end()); + } + + for (auto& [k, val] : insert_history) { + auto& [v, c] = val; + auto result = with_trans_intr(t_clone, [&tree_clone, &k=k](auto& tr) { + return tree_clone.find(tr, k); + }).unsafe_get0(); + EXPECT_NE(result, tree_clone.end()); + validate_cursor_from_item(k, v, result); + } + EXPECT_EQ(INTR(tree_clone.height, t_clone).unsafe_get0(), 1); + EXPECT_EQ(p_dummy->size(), 1); + }); + } + + test_item_t create_value(size_t size) { + return values.create(size); + } + + private: + seastar::future<> insert_tree(const ghobject_t& key, const test_item_t& value) { + return seastar::async([this, &key, &value] { + auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()}; + auto [cursor, success] = INTR_R(tree.insert, + t, key, conf).unsafe_get0(); + initialize_cursor_from_item(t, key, value, cursor, success); + insert_history.emplace(key, std::make_tuple(value, cursor)); + }); + } + + NodeExtentManagerURef moved_nm; + TransactionRef ref_t; + Transaction& t; + ValueBuilderImpl<UnboundedValue> vb; + context_t c; + UnboundedBtree tree; + Values<test_item_t> values; + std::map<ghobject_t, + std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history; +}; + +struct c_dummy_test_t : public seastar_test_suite_t {}; + +TEST_F(c_dummy_test_t, 4_split_merge_leaf_node) +{ + run_async([] { + { + TestTree test; + test.build_tree({2, 5}, {2, 5}, {2, 5}, 120).get0(); + + auto value = test.create_value(1144); + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left front at stage 2, 1, 0\n"); + test.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), value, + {2u, 2u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), value, + {2u, 1u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2", 1, 1), value, + {2u, 0u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4", 5, 5), value, + {2u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), value, + {2u, 1u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 3, 3, "ns3", "oid3", 3, 3), value, + {2u, 2u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), value, + {2u, 1u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 1, 1), value, + {2u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + + auto value0 = test.create_value(1416); + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right front at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value0, + {2u, 0u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value0, + {2u, 1u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 4, 4, "ns3", "oid3", 3, 3), value0, + {2u, 2u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value0, + {2u, 1u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value0, + {2u, 0u, false, InsertType::BEGIN}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right back at stage 0, 1, 2\n"); + test.split_merge(make_ghobj(4, 4, 4, "ns4", "oid4", 5, 5), value0, + {2u, 0u, false, InsertType::LAST}, + std::nullopt).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns5", "oid5", 3, 3), value0, + {2u, 1u, false, InsertType::LAST}, + std::nullopt).get0(); + test.split_merge(make_ghobj(5, 5, 5, "ns3", "oid3", 3, 3), value0, + {2u, 2u, false, InsertType::LAST}, + std::nullopt).get0(); + + auto value1 = test.create_value(316); + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left middle at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4", 5, 5), value1, + {1u, 0u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), value1, + {1u, 1u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(2, 2, 3, "ns3", "oid3", 3, 3), value1, + {1u, 2u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), value1, + {1u, 1u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 1, 1), value1, + {1u, 0u, true, InsertType::MID}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage 0, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 5, 5), value1, + {1u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), value1, + {1u, 1u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 1, 1), value1, + {1u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0(); + + auto value2 = test.create_value(452); + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 5, 5), value2, + {1u, 0u, false, InsertType::BEGIN}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid4", 3, 3), value2, + {1u, 1u, false, InsertType::BEGIN}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 1, 1), value2, + {1u, 0u, false, InsertType::BEGIN}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right middle at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value2, + {1u, 0u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value2, + {1u, 1u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 4, "ns3", "oid3", 3, 3), value2, + {1u, 2u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value2, + {1u, 1u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value2, + {1u, 0u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + + auto value3 = test.create_value(834); + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right middle at stage 0, 1, 2, 1, 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value3, + {0u, 0u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value3, + {0u, 1u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(3, 3, 4, "ns3", "oid3", 3, 3), value3, + {0u, 2u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value3, + {0u, 1u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value3, + {0u, 0u, false, InsertType::MID}, + {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right front at stage 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 2, 3), value3, + {0u, 0u, false, InsertType::BEGIN}, + {make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3)}).get0(); + + auto value4 = test.create_value(572); + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left back at stage 0\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 3, 4), value4, + {0u, 0u, true, InsertType::LAST}, + {make_ghobj(3, 3, 3, "ns2", "oid2", 4, 4)}).get0(); + } + + { + TestTree test; + test.build_tree({2, 4}, {2, 4}, {2, 4}, 232).get0(); + auto value = test.create_value(1996); + logger().info("\n---------------------------------------------" + "\nsplit at [0, 0, 0]; insert to left front at stage 2, 1, 0\n"); + test.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), value, + {2u, 2u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + test.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), value, + {2u, 1u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + test.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2", 1, 1), value, + {2u, 0u, true, InsertType::BEGIN}, + {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0(); + EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}})); + } + + { + TestTree test; + std::vector<ghobject_t> keys = { + make_ghobj(2, 2, 2, "ns3", "oid3", 3, 3), + make_ghobj(3, 3, 3, "ns3", "oid3", 3, 3)}; + std::vector<test_item_t> values = { + test.create_value(1360), + test.create_value(1632)}; + test.build_tree(keys, values).get0(); + auto value = test.create_value(1640); + logger().info("\n---------------------------------------------" + "\nsplit at [END, END, END]; insert to right at stage 0, 1, 2\n"); + test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 4, 4), value, + {0u, 0u, false, InsertType::BEGIN}, + std::nullopt).get0(); + EXPECT_TRUE(last_split.match_split_pos({1, {0, {1}}})); + test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3), value, + {1u, 1u, false, InsertType::BEGIN}, + std::nullopt).get0(); + EXPECT_TRUE(last_split.match_split_pos({1, {1, {0}}})); + test.split_merge(make_ghobj(4, 4, 4, "ns3", "oid3", 3, 3), value, + {2u, 2u, false, InsertType::BEGIN}, + std::nullopt).get0(); + EXPECT_TRUE(last_split.match_split_pos({2, {0, {0}}})); + } + }); +} + +namespace crimson::os::seastore::onode { + +class DummyChildPool { + class DummyChildImpl final : public NodeImpl { + public: + using URef = std::unique_ptr<DummyChildImpl>; + DummyChildImpl(const std::set<ghobject_t>& keys, bool is_level_tail, laddr_t laddr) + : keys{keys}, _is_level_tail{is_level_tail}, _laddr{laddr} { + std::tie(key_view, p_mem_key_view) = build_key_view(*keys.crbegin()); + build_name(); + } + ~DummyChildImpl() override { + std::free(p_mem_key_view); + } + + const std::set<ghobject_t>& get_keys() const { return keys; } + + void reset(const std::set<ghobject_t>& _keys, bool level_tail) { + keys = _keys; + _is_level_tail = level_tail; + std::free(p_mem_key_view); + std::tie(key_view, p_mem_key_view) = build_key_view(*keys.crbegin()); + build_name(); + } + + public: + laddr_t laddr() const override { return _laddr; } + bool is_level_tail() const override { return _is_level_tail; } + std::optional<key_view_t> get_pivot_index() const override { return {key_view}; } + bool is_extent_retired() const override { return _is_extent_retired; } + const std::string& get_name() const override { return name; } + search_position_t make_tail() override { + _is_level_tail = true; + build_name(); + return search_position_t::end(); + } + eagain_ifuture<> retire_extent(context_t) override { + assert(!_is_extent_retired); + _is_extent_retired = true; + return eagain_iertr::now(); + } + + protected: + node_type_t node_type() const override { return node_type_t::LEAF; } + field_type_t field_type() const override { return field_type_t::N0; } + const char* read() const override { + ceph_abort("impossible path"); } + extent_len_t get_node_size() const override { + ceph_abort("impossible path"); } + nextent_state_t get_extent_state() const override { + ceph_abort("impossible path"); } + level_t level() const override { return 0u; } + void prepare_mutate(context_t) override { + ceph_abort("impossible path"); } + void validate_non_empty() const override { + ceph_abort("impossible path"); } + bool is_keys_empty() const override { + ceph_abort("impossible path"); } + bool has_single_value() const override { + ceph_abort("impossible path"); } + node_offset_t free_size() const override { + ceph_abort("impossible path"); } + extent_len_t total_size() const override { + ceph_abort("impossible path"); } + bool is_size_underflow() const override { + ceph_abort("impossible path"); } + std::tuple<match_stage_t, search_position_t> erase(const search_position_t&) override { + ceph_abort("impossible path"); } + std::tuple<match_stage_t, std::size_t> evaluate_merge(NodeImpl&) override { + ceph_abort("impossible path"); } + search_position_t merge(NodeExtentMutable&, NodeImpl&, match_stage_t, extent_len_t) override { + ceph_abort("impossible path"); } + eagain_ifuture<NodeExtentMutable> rebuild_extent(context_t) override { + ceph_abort("impossible path"); } + node_stats_t get_stats() const override { + ceph_abort("impossible path"); } + std::ostream& dump(std::ostream&) const override { + ceph_abort("impossible path"); } + std::ostream& dump_brief(std::ostream&) const override { + ceph_abort("impossible path"); } + void validate_layout() const override { + ceph_abort("impossible path"); } + void test_copy_to(NodeExtentMutable&) const override { + ceph_abort("impossible path"); } + void test_set_tail(NodeExtentMutable&) override { + ceph_abort("impossible path"); } + + private: + void build_name() { + std::ostringstream sos; + sos << "DummyNode" + << "@0x" << std::hex << laddr() << std::dec + << "Lv" << (unsigned)level() + << (is_level_tail() ? "$" : "") + << "(" << key_view << ")"; + name = sos.str(); + } + + std::set<ghobject_t> keys; + bool _is_level_tail; + laddr_t _laddr; + std::string name; + bool _is_extent_retired = false; + + key_view_t key_view; + void* p_mem_key_view; + }; + + class DummyChild final : public Node { + public: + ~DummyChild() override = default; + + key_view_t get_pivot_key() const { return *impl->get_pivot_index(); } + + eagain_ifuture<> populate_split( + context_t c, std::set<Ref<DummyChild>>& splitable_nodes) { + ceph_assert(can_split()); + ceph_assert(splitable_nodes.find(this) != splitable_nodes.end()); + + size_t index; + const auto& keys = impl->get_keys(); + if (keys.size() == 2) { + index = 1; + } else { + index = rd() % (keys.size() - 2) + 1; + } + auto iter = keys.begin(); + std::advance(iter, index); + + std::set<ghobject_t> left_keys(keys.begin(), iter); + std::set<ghobject_t> right_keys(iter, keys.end()); + bool right_is_tail = impl->is_level_tail(); + impl->reset(left_keys, false); + auto right_child = DummyChild::create_new(right_keys, right_is_tail, pool); + if (!can_split()) { + splitable_nodes.erase(this); + } + if (right_child->can_split()) { + splitable_nodes.insert(right_child); + } + Ref<Node> this_ref = this; + return apply_split_to_parent( + c, std::move(this_ref), std::move(right_child), false); + } + + eagain_ifuture<> insert_and_split( + context_t c, const ghobject_t& insert_key, + std::set<Ref<DummyChild>>& splitable_nodes) { + const auto& keys = impl->get_keys(); + ceph_assert(keys.size() == 1); + auto& key = *keys.begin(); + ceph_assert(insert_key < key); + + std::set<ghobject_t> new_keys; + new_keys.insert(insert_key); + new_keys.insert(key); + impl->reset(new_keys, impl->is_level_tail()); + + splitable_nodes.clear(); + splitable_nodes.insert(this); + auto fut = populate_split(c, splitable_nodes); + ceph_assert(splitable_nodes.size() == 0); + return fut; + } + + eagain_ifuture<> merge(context_t c, Ref<DummyChild>&& this_ref) { + return parent_info().ptr->get_child_peers(c, parent_info().position + ).si_then([c, this_ref = std::move(this_ref), this] (auto lr_nodes) mutable { + auto& [lnode, rnode] = lr_nodes; + if (rnode) { + lnode.reset(); + Ref<DummyChild> r_dummy(static_cast<DummyChild*>(rnode.get())); + rnode.reset(); + pool.untrack_node(r_dummy); + assert(r_dummy->use_count() == 1); + return do_merge(c, std::move(this_ref), std::move(r_dummy), true); + } else { + ceph_assert(lnode); + Ref<DummyChild> l_dummy(static_cast<DummyChild*>(lnode.get())); + pool.untrack_node(this_ref); + assert(this_ref->use_count() == 1); + return do_merge(c, std::move(l_dummy), std::move(this_ref), false); + } + }); + } + + eagain_ifuture<> fix_key(context_t c, const ghobject_t& new_key) { + const auto& keys = impl->get_keys(); + ceph_assert(keys.size() == 1); + assert(impl->is_level_tail() == false); + + std::set<ghobject_t> new_keys; + new_keys.insert(new_key); + impl->reset(new_keys, impl->is_level_tail()); + Ref<Node> this_ref = this; + return fix_parent_index<true>(c, std::move(this_ref), false); + } + + bool match_pos(const search_position_t& pos) const { + ceph_assert(!is_root()); + return pos == parent_info().position; + } + + static Ref<DummyChild> create( + const std::set<ghobject_t>& keys, bool is_level_tail, + laddr_t addr, DummyChildPool& pool) { + auto ref_impl = std::make_unique<DummyChildImpl>(keys, is_level_tail, addr); + return new DummyChild(ref_impl.get(), std::move(ref_impl), pool); + } + + static Ref<DummyChild> create_new( + const std::set<ghobject_t>& keys, bool is_level_tail, DummyChildPool& pool) { + static laddr_t seed = 0; + return create(keys, is_level_tail, seed++, pool); + } + + static eagain_ifuture<Ref<DummyChild>> create_initial( + context_t c, const std::set<ghobject_t>& keys, + DummyChildPool& pool, RootNodeTracker& root_tracker) { + auto initial = create_new(keys, true, pool); + return c.nm.get_super(c.t, root_tracker + ).handle_error_interruptible( + eagain_iertr::pass_further{}, + crimson::ct_error::assert_all{"Invalid error during create_initial()"} + ).si_then([c, initial](auto super) { + initial->make_root_new(c, std::move(super)); + return initial->upgrade_root(c, L_ADDR_MIN).si_then([initial] { + return initial; + }); + }); + } + + protected: + eagain_ifuture<> test_clone_non_root( + context_t, Ref<InternalNode> new_parent) const override { + ceph_assert(!is_root()); + auto p_pool_clone = pool.pool_clone_in_progress; + ceph_assert(p_pool_clone != nullptr); + auto clone = create( + impl->get_keys(), impl->is_level_tail(), impl->laddr(), *p_pool_clone); + clone->as_child(parent_info().position, new_parent); + return eagain_iertr::now(); + } + eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) override { + ceph_abort("impossible path"); } + eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) override { + ceph_abort("impossible path"); } + eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const override { + ceph_abort("impossible path"); } + eagain_ifuture<search_result_t> lower_bound_tracked( + context_t, const key_hobj_t&, MatchHistory&) override { + ceph_abort("impossible path"); } + eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) override { + ceph_abort("impossible path"); } + bool is_tracking() const override { return false; } + void track_merge(Ref<Node>, match_stage_t, search_position_t&) override { + ceph_abort("impossible path"); } + + private: + DummyChild(DummyChildImpl* impl, DummyChildImpl::URef&& ref, DummyChildPool& pool) + : Node(std::move(ref)), impl{impl}, pool{pool} { + pool.track_node(this); + } + + bool can_split() const { return impl->get_keys().size() > 1; } + + static eagain_ifuture<> do_merge( + context_t c, Ref<DummyChild>&& left, Ref<DummyChild>&& right, bool stole_key) { + assert(right->use_count() == 1); + assert(left->impl->get_keys().size() == 1); + assert(right->impl->get_keys().size() == 1); + bool left_is_tail = right->impl->is_level_tail(); + const std::set<ghobject_t>* p_keys; + if (stole_key) { + p_keys = &right->impl->get_keys(); + } else { + p_keys = &left->impl->get_keys(); + } + left->impl->reset(*p_keys, left_is_tail); + auto left_addr = left->impl->laddr(); + return left->parent_info().ptr->apply_children_merge<true>( + c, std::move(left), left_addr, std::move(right), !stole_key); + } + + DummyChildImpl* impl; + DummyChildPool& pool; + mutable std::random_device rd; + }; + + public: + DummyChildPool() = default; + ~DummyChildPool() { reset(); } + + auto build_tree(const std::set<ghobject_t>& keys) { + reset(); + // create tree + auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + p_dummy = static_cast<DummyManager*>(ref_dummy.get()); + p_btree.emplace(std::move(ref_dummy)); + return with_trans_intr(get_context().t, [this, &keys] (auto &tr) { + return DummyChild::create_initial(get_context(), keys, *this, *p_btree->root_tracker + ).si_then([this](auto initial_child) { + // split + splitable_nodes.insert(initial_child); + return trans_intr::repeat([this] () + -> eagain_ifuture<seastar::stop_iteration> { + if (splitable_nodes.empty()) { + return seastar::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::yes); + } + auto index = rd() % splitable_nodes.size(); + auto iter = splitable_nodes.begin(); + std::advance(iter, index); + Ref<DummyChild> child = *iter; + return child->populate_split(get_context(), splitable_nodes + ).si_then([] { + return seastar::stop_iteration::no; + }); + }); + }).si_then([this] { + //std::ostringstream oss; + //p_btree->dump(t(), oss); + //logger().info("\n{}\n", oss.str()); + return p_btree->height(t()); + }).si_then([](auto height) { + ceph_assert(height == 2); + }); + }); + } + + seastar::future<> split_merge(ghobject_t key, search_position_t pos, + const split_expectation_t& expected) { + return seastar::async([this, key, pos, expected] { + DummyChildPool pool_clone; + clone_to(pool_clone); + + // insert and split + logger().info("\n\nINSERT-SPLIT {} at pos({}):", key_hobj_t(key), pos); + auto node_to_split = pool_clone.get_node_by_pos(pos); + with_trans_intr(pool_clone.get_context().t, [&] (auto &t) { + return node_to_split->insert_and_split( + pool_clone.get_context(), key, pool_clone.splitable_nodes); + }).unsafe_get0(); + { + std::ostringstream oss; + pool_clone.p_btree->dump(pool_clone.t(), oss); + logger().info("dump new root:\n{}", oss.str()); + } + auto &pt = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 3); + EXPECT_TRUE(last_split.match(expected)); + EXPECT_EQ(pool_clone.p_dummy->size(), 3); + + // erase and merge + [[maybe_unused]] auto pivot_key = node_to_split->get_pivot_key(); + logger().info("\n\nERASE-MERGE {}:", node_to_split->get_name()); + assert(pivot_key == key_hobj_t(key)); + with_trans_intr(pool_clone.get_context().t, [&] (auto &t) { + return node_to_split->merge( + pool_clone.get_context(), std::move(node_to_split)); + }).unsafe_get0(); + auto &pt2 = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height ,pt2).unsafe_get0(), 2); + EXPECT_EQ(pool_clone.p_dummy->size(), 1); + }); + } + + seastar::future<> fix_index( + ghobject_t new_key, search_position_t pos, bool expect_split) { + return seastar::async([this, new_key, pos, expect_split] { + DummyChildPool pool_clone; + clone_to(pool_clone); + + // fix + auto node_to_fix = pool_clone.get_node_by_pos(pos); + auto old_key = node_to_fix->get_pivot_key().to_ghobj(); + logger().info("\n\nFIX pos({}) from {} to {}, expect_split={}:", + pos, node_to_fix->get_name(), key_hobj_t(new_key), expect_split); + with_trans_intr(pool_clone.get_context().t, [&] (auto &t) { + return node_to_fix->fix_key(pool_clone.get_context(), new_key); + }).unsafe_get0(); + if (expect_split) { + std::ostringstream oss; + pool_clone.p_btree->dump(pool_clone.t(), oss); + logger().info("dump new root:\n{}", oss.str()); + auto &pt = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 3); + EXPECT_EQ(pool_clone.p_dummy->size(), 3); + } else { + auto &pt = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 2); + EXPECT_EQ(pool_clone.p_dummy->size(), 1); + } + + // fix back + logger().info("\n\nFIX pos({}) from {} back to {}:", + pos, node_to_fix->get_name(), key_hobj_t(old_key)); + with_trans_intr(pool_clone.get_context().t, [&] (auto &t) { + return node_to_fix->fix_key(pool_clone.get_context(), old_key); + }).unsafe_get0(); + auto &pt = pool_clone.t(); + EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 2); + EXPECT_EQ(pool_clone.p_dummy->size(), 1); + }); + } + + private: + void clone_to(DummyChildPool& pool_clone) { + pool_clone_in_progress = &pool_clone; + auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); + pool_clone.p_dummy = static_cast<DummyManager*>(ref_dummy.get()); + pool_clone.p_btree.emplace(std::move(ref_dummy)); + auto &pt = pool_clone.t(); + [[maybe_unused]] auto &tr = t(); + INTR_R(pool_clone.p_btree->test_clone_from, + pt, tr, *p_btree).unsafe_get0(); + pool_clone_in_progress = nullptr; + } + + void reset() { + ceph_assert(pool_clone_in_progress == nullptr); + if (tracked_children.size()) { + ceph_assert(!p_btree->test_is_clean()); + tracked_children.clear(); + ceph_assert(p_btree->test_is_clean()); + p_dummy = nullptr; + p_btree.reset(); + } else { + ceph_assert(!p_btree.has_value()); + } + splitable_nodes.clear(); + } + + void track_node(Ref<DummyChild> node) { + ceph_assert(tracked_children.find(node) == tracked_children.end()); + tracked_children.insert(node); + } + + void untrack_node(Ref<DummyChild> node) { + auto ret = tracked_children.erase(node); + ceph_assert(ret == 1); + } + + Ref<DummyChild> get_node_by_pos(const search_position_t& pos) const { + auto iter = std::find_if( + tracked_children.begin(), tracked_children.end(), [&pos](auto& child) { + return child->match_pos(pos); + }); + ceph_assert(iter != tracked_children.end()); + return *iter; + } + + context_t get_context() { + ceph_assert(p_dummy != nullptr); + return {*p_dummy, vb, t()}; + } + + Transaction& t() const { return *ref_t; } + + std::set<Ref<DummyChild>> tracked_children; + std::optional<UnboundedBtree> p_btree; + DummyManager* p_dummy = nullptr; + ValueBuilderImpl<UnboundedValue> vb; + TransactionRef ref_t = make_test_transaction(); + + std::random_device rd; + std::set<Ref<DummyChild>> splitable_nodes; + + DummyChildPool* pool_clone_in_progress = nullptr; +}; + +} + +TEST_F(c_dummy_test_t, 5_split_merge_internal_node) +{ + run_async([] { + DummyChildPool pool; + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert:\n"); + auto padding = std::string(250, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 4, 4)); + auto padding_s = std::string(257, '_'); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); + auto padding_e = std::string(247, '_'); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 2, 2)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 3, 3)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right front at stage 0, 1, 2, 1, 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 5, 5), {2, {0, {0}}}, + {2u, 0u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), {2, {0, {0}}}, + {2u, 1u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(3, 4, 4, "ns3", "oid3", 3, 3), {2, {0, {0}}}, + {2u, 2u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), {2, {0, {0}}}, + {2u, 1u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2" + padding, 1, 1), {2, {0, {0}}}, + {2u, 0u, false, InsertType::BEGIN}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right middle at stage 0, 1, 2, 1, 0\n"); + pool.split_merge(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), {3, {0, {0}}}, + {2u, 0u, false, InsertType::MID}).get(); + pool.split_merge(make_ghobj(4, 4, 4, "ns5", "oid5", 3, 3), {3, {0, {0}}}, + {2u, 1u, false, InsertType::MID}).get(); + pool.split_merge(make_ghobj(4, 4, 5, "ns3", "oid3", 3, 3), {3, {0, {0}}}, + {2u, 2u, false, InsertType::MID}).get(); + pool.split_merge(make_ghobj(5, 5, 5, "ns1", "oid1", 3, 3), {3, {0, {0}}}, + {2u, 1u, false, InsertType::MID}).get(); + pool.split_merge(make_ghobj(5, 5, 5, "ns2", "oid2" + padding, 1, 1), {3, {0, {0}}}, + {2u, 0u, false, InsertType::MID}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to right back at stage 0, 1, 2\n"); + pool.split_merge(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 5, 5), search_position_t::end() , + {2u, 0u, false, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(5, 5, 5, "ns5", "oid5", 3, 3), search_position_t::end(), + {2u, 1u, false, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(6, 6, 6, "ns3", "oid3", 3, 3), search_position_t::end(), + {2u, 2u, false, InsertType::LAST}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left front at stage 2, 1, 0\n"); + pool.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), {0, {0, {0}}}, + {0u, 2u, true, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), {0, {0, {0}}}, + {0u, 1u, true, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 1, 1), {0, {0, {0}}}, + {0u, 0u, true, InsertType::BEGIN}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left middle at stage 0, 1, 2, 1, 0\n"); + pool.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4" + padding, 5, 5), {1, {0, {0}}}, + {0u, 0u, true, InsertType::MID}).get(); + pool.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), {1, {0, {0}}}, + {0u, 1u, true, InsertType::MID}).get(); + pool.split_merge(make_ghobj(2, 2, 3, "ns3", "oid3" + std::string(80, '_'), 3, 3), {1, {0, {0}}} , + {0u, 2u, true, InsertType::MID}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), {1, {0, {0}}}, + {0u, 1u, true, InsertType::MID}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 1, 1), {1, {0, {0}}}, + {0u, 0u, true, InsertType::MID}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to left back at stage 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 3, 4), {1, {2, {2}}}, + {0u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (1):\n"); + auto padding = std::string(244, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 7, 7)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage 0, 1, 2, 1\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 5, 5), {2, {0, {0}}}, + {2u, 0u, true, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), {2, {0, {0}}}, + {2u, 1u, true, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(3, 4, 4, "n", "o", 3, 3), {2, {0, {0}}}, + {2u, 2u, true, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(4, 4, 4, "n", "o", 3, 3), {2, {0, {0}}}, + {2u, 1u, true, InsertType::LAST}).get(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left middle at stage 2\n"); + pool.split_merge(make_ghobj(2, 3, 3, "n", "o", 3, 3), {1, {0, {0}}}, + {2u, 2u, true, InsertType::MID}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (2):\n"); + auto padding = std::string(243, '_'); + auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true); + keys.insert(make_ghobj(4, 4, 4, "n", "o", 3, 3)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5)); + keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 2; insert to left back at stage (0, 1, 2, 1,) 0\n"); + pool.split_merge(make_ghobj(4, 4, 4, "n", "o", 2, 2), {2, {0, {0}}}, + {2u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (3):\n"); + auto padding = std::string(419, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}}, + {1u, 0u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}}, + {1u, 1u, false, InsertType::BEGIN}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 1, 1), {1, {1, {0}}}, + {1u, 0u, false, InsertType::BEGIN}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (4):\n"); + auto padding = std::string(361, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3)); + keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4)); + auto padding_s = std::string(386, '_'); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3)); + keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage 0, 1\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}}, + {1u, 0u, true, InsertType::LAST}).get(); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}}, + {1u, 1u, true, InsertType::LAST}).get(); + + logger().info("\n---------------------------------------------" + "\nfix end index from stage 0 to 0, 1, 2\n"); + auto padding1 = std::string(400, '_'); + pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), + {2, {2, {2}}}, false).get(); + pool.fix_index(make_ghobj(4, 4, 4, "ns5", "oid5" + padding1, 3, 3), + {2, {2, {2}}}, true).get(); + pool.fix_index(make_ghobj(5, 5, 5, "ns3", "oid3" + padding1, 3, 3), + {2, {2, {2}}}, true).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (5):\n"); + auto padding = std::string(412, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); + keys.insert(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3)); + keys.insert(make_ghobj(4, 4, 4, "ns3", "oid3" + padding, 5, 5)); + keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3)); + keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 4, 4)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 1; insert to left back at stage (0, 1,) 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 2, 2), {1, {1, {0}}}, + {1u, 0u, true, InsertType::LAST}).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (6):\n"); + auto padding = std::string(328, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); + keys.insert(make_ghobj(5, 5, 5, "ns3", "oid3" + std::string(270, '_'), 3, 3)); + keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nsplit at stage 0; insert to right front at stage 0\n"); + pool.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 2, 3), {1, {1, {1}}}, + {0u, 0u, false, InsertType::BEGIN}).get(); + + logger().info("\n---------------------------------------------" + "\nfix end index from stage 2 to 0, 1, 2\n"); + auto padding1 = std::string(400, '_'); + pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), + {3, {0, {0}}}, false).get(); + pool.fix_index(make_ghobj(4, 4, 4, "ns5", "oid5" + padding1, 3, 3), + {3, {0, {0}}}, true).get(); + pool.fix_index(make_ghobj(5, 5, 5, "ns4", "oid4" + padding1, 3, 3), + {3, {0, {0}}}, true).get(); + } + + { + logger().info("\n---------------------------------------------" + "\nbefore internal node insert (7):\n"); + auto padding = std::string(323, '_'); + auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding); + keys.insert(make_ghobj(4, 4, 4, "ns5", "oid5" + padding, 3, 3)); + keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9)); + pool.build_tree(keys).unsafe_get0(); + + logger().info("\n---------------------------------------------" + "\nfix end index from stage 1 to 0, 1, 2\n"); + auto padding1 = std::string(400, '_'); + pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), + {2, {3, {0}}}, false).get(); + pool.fix_index(make_ghobj(4, 4, 4, "ns6", "oid6" + padding1, 3, 3), + {2, {3, {0}}}, true).get(); + pool.fix_index(make_ghobj(5, 5, 5, "ns3", "oid3" + padding1, 3, 3), + {2, {3, {0}}}, true).get(); + } + + // Impossible to split at {0, 0, 0} + // Impossible to split at [END, END, END] + }); +} + +struct d_seastore_tm_test_t : + public seastar_test_suite_t, TMTestState { + seastar::future<> set_up_fut() override final { + return tm_setup(); + } + seastar::future<> tear_down_fut() override final { + return tm_teardown(); + } +}; + +TEST_P(d_seastore_tm_test_t, 6_random_tree_insert_erase) +{ + run_async([this] { + constexpr bool TEST_SEASTORE = true; + constexpr bool TRACK_CURSORS = true; + auto kvs = KVPool<test_item_t>::create_raw_range( + {8, 11, 64, 256, 301, 320}, + {8, 11, 64, 256, 301, 320}, + {8, 16, 128, 512, 576, 640}, + {0, 16}, {0, 10}, {0, 4}); + auto moved_nm = (TEST_SEASTORE ? NodeExtentManager::create_seastore(*tm) + : NodeExtentManager::create_dummy(IS_DUMMY_SYNC)); + auto p_nm = moved_nm.get(); + auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, BoundedValue>>( + kvs, std::move(moved_nm)); + { + auto t = create_mutate_transaction(); + INTR(tree->bootstrap, *t).unsafe_get(); + submit_transaction(std::move(t)); + } + + // test insert + { + auto t = create_mutate_transaction(); + INTR(tree->insert, *t).unsafe_get(); + submit_transaction(std::move(t)); + } + { + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get(); + } + if constexpr (TEST_SEASTORE) { + restart(); + tree->reload(NodeExtentManager::create_seastore(*tm)); + } + { + // Note: create_weak_transaction() can also work, but too slow. + auto t = create_read_transaction(); + INTR(tree->validate, *t).unsafe_get(); + } + + // test erase 3/4 + { + auto t = create_mutate_transaction(); + auto size = kvs.size() / 4 * 3; + INTR_R(tree->erase, *t, size).unsafe_get(); + submit_transaction(std::move(t)); + } + { + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get(); + } + if constexpr (TEST_SEASTORE) { + restart(); + tree->reload(NodeExtentManager::create_seastore(*tm)); + } + { + auto t = create_read_transaction(); + INTR(tree->validate, *t).unsafe_get(); + } + + // test erase remaining + { + auto t = create_mutate_transaction(); + auto size = kvs.size(); + INTR_R(tree->erase, *t, size).unsafe_get(); + submit_transaction(std::move(t)); + } + { + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get(); + } + if constexpr (TEST_SEASTORE) { + restart(); + tree->reload(NodeExtentManager::create_seastore(*tm)); + } + { + auto t = create_read_transaction(); + INTR(tree->validate, *t).unsafe_get(); + EXPECT_EQ(INTR(tree->height, *t).unsafe_get0(), 1); + } + + if constexpr (!TEST_SEASTORE) { + auto p_dummy = static_cast<DummyManager*>(p_nm); + EXPECT_EQ(p_dummy->size(), 1); + } + tree.reset(); + }); +} + +TEST_P(d_seastore_tm_test_t, 7_tree_insert_erase_eagain) +{ + run_async([this] { + constexpr double EAGAIN_PROBABILITY = 0.1; + constexpr bool TRACK_CURSORS = false; + auto kvs = KVPool<test_item_t>::create_raw_range( + {8, 11, 64, 128, 255, 256}, + {8, 13, 64, 512, 2035, 2048}, + {8, 16, 128, 576, 992, 1200}, + {0, 8}, {0, 10}, {0, 4}); + auto moved_nm = NodeExtentManager::create_seastore( + *tm, L_ADDR_MIN, EAGAIN_PROBABILITY); + auto p_nm = static_cast<SeastoreNodeExtentManager<true>*>(moved_nm.get()); + auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, ExtendedValue>>( + kvs, std::move(moved_nm)); + unsigned num_ops = 0; + unsigned num_ops_eagain = 0; + + // bootstrap + ++num_ops; + repeat_eagain([this, &tree, &num_ops_eagain] { + ++num_ops_eagain; + return seastar::do_with( + create_mutate_transaction(), + [this, &tree](auto &t) { + return INTR(tree->bootstrap, *t + ).safe_then([this, &t] { + return submit_transaction_fut(*t); + }); + }); + }).unsafe_get0(); + epm->run_background_work_until_halt().get0(); + + // insert + logger().warn("start inserting {} kvs ...", kvs.size()); + { + auto iter = kvs.random_begin(); + while (iter != kvs.random_end()) { + ++num_ops; + repeat_eagain([this, &tree, &num_ops_eagain, &iter] { + ++num_ops_eagain; + return seastar::do_with( + create_mutate_transaction(), + [this, &tree, &iter](auto &t) { + return INTR_R(tree->insert_one, *t, iter + ).safe_then([this, &t](auto cursor) { + cursor.invalidate(); + return submit_transaction_fut(*t); + }); + }); + }).unsafe_get0(); + epm->run_background_work_until_halt().get0(); + ++iter; + } + } + + { + p_nm->set_generate_eagain(false); + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get0(); + p_nm->set_generate_eagain(true); + } + + // lookup + logger().warn("start lookup {} kvs ...", kvs.size()); + { + auto iter = kvs.begin(); + while (iter != kvs.end()) { + ++num_ops; + repeat_eagain([this, &tree, &num_ops_eagain, &iter] { + ++num_ops_eagain; + auto t = create_read_transaction(); + return INTR_R(tree->validate_one, *t, iter + ).safe_then([t=std::move(t)]{}); + }).unsafe_get0(); + ++iter; + } + } + + // erase + logger().warn("start erase {} kvs ...", kvs.size()); + { + kvs.shuffle(); + auto iter = kvs.random_begin(); + while (iter != kvs.random_end()) { + ++num_ops; + repeat_eagain([this, &tree, &num_ops_eagain, &iter] { + ++num_ops_eagain; + return seastar::do_with( + create_mutate_transaction(), + [this, &tree, &iter](auto &t) { + return INTR_R(tree->erase_one, *t, iter + ).safe_then([this, &t] () mutable { + return submit_transaction_fut(*t); + }); + }); + }).unsafe_get0(); + epm->run_background_work_until_halt().get0(); + ++iter; + } + kvs.erase_from_random(kvs.random_begin(), kvs.random_end()); + } + + { + p_nm->set_generate_eagain(false); + auto t = create_read_transaction(); + INTR(tree->get_stats, *t).unsafe_get0(); + INTR(tree->validate, *t).unsafe_get0(); + EXPECT_EQ(INTR(tree->height,*t).unsafe_get0(), 1); + } + + // we can adjust EAGAIN_PROBABILITY to get a proper eagain_rate + double eagain_rate = num_ops_eagain; + eagain_rate /= num_ops; + logger().info("eagain rate: {}", eagain_rate); + + tree.reset(); + }); +} + +INSTANTIATE_TEST_SUITE_P( + d_seastore_tm_test, + d_seastore_tm_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/onode_tree/test_value.h b/src/test/crimson/seastore/onode_tree/test_value.h new file mode 100644 index 000000000..98249f8c9 --- /dev/null +++ b/src/test/crimson/seastore/onode_tree/test_value.h @@ -0,0 +1,240 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <fmt/format.h> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/value.h" + +namespace crimson::os::seastore::onode { + +struct test_item_t { + using id_t = uint16_t; + using magic_t = uint32_t; + + value_size_t size; + id_t id; + magic_t magic; + + value_size_t get_payload_size() const { + assert(size > sizeof(value_header_t)); + return static_cast<value_size_t>(size - sizeof(value_header_t)); + } + + static test_item_t create(std::size_t _size, std::size_t _id) { + ceph_assert(_size <= std::numeric_limits<value_size_t>::max()); + ceph_assert(_size > sizeof(value_header_t)); + value_size_t size = _size; + + ceph_assert(_id <= std::numeric_limits<id_t>::max()); + id_t id = _id; + + return {size, id, (magic_t)id * 137}; + } +}; +inline std::ostream& operator<<(std::ostream& os, const test_item_t& item) { + return os << "TestItem(#" << item.id << ", " << item.size << "B)"; +} + +enum class delta_op_t : uint8_t { + UPDATE_ID, + UPDATE_TAIL_MAGIC, +}; + +inline std::ostream& operator<<(std::ostream& os, const delta_op_t op) { + switch (op) { + case delta_op_t::UPDATE_ID: + return os << "update_id"; + case delta_op_t::UPDATE_TAIL_MAGIC: + return os << "update_tail_magic"; + default: + return os << "unknown"; + } +} + +} // namespace crimson::os::seastore::onode + +#if FMT_VERSION >= 90000 +template<> struct fmt::formatter<crimson::os::seastore::onode::delta_op_t> : fmt::ostream_formatter {}; +#endif + +namespace crimson::os::seastore::onode { + +template <value_magic_t MAGIC, + string_size_t MAX_NS_SIZE, + string_size_t MAX_OID_SIZE, + value_size_t MAX_VALUE_PAYLOAD_SIZE, + extent_len_t INTERNAL_NODE_SIZE, + extent_len_t LEAF_NODE_SIZE, + bool DO_SPLIT_CHECK> +class TestValue final : public Value { + public: + static constexpr tree_conf_t TREE_CONF = { + MAGIC, + MAX_NS_SIZE, + MAX_OID_SIZE, + MAX_VALUE_PAYLOAD_SIZE, + INTERNAL_NODE_SIZE, + LEAF_NODE_SIZE, + DO_SPLIT_CHECK + }; + + using id_t = test_item_t::id_t; + using magic_t = test_item_t::magic_t; + struct magic_packed_t { + magic_t value; + } __attribute__((packed)); + + private: + struct payload_t { + id_t id; + } __attribute__((packed)); + + struct Replayable { + static void set_id(NodeExtentMutable& payload_mut, id_t id) { + auto p_payload = get_write(payload_mut); + p_payload->id = id; + } + + static void set_tail_magic(NodeExtentMutable& payload_mut, magic_t magic) { + auto length = payload_mut.get_length(); + auto offset_magic = length - sizeof(magic_t); + payload_mut.copy_in_relative(offset_magic, magic); + } + + private: + static payload_t* get_write(NodeExtentMutable& payload_mut) { + return reinterpret_cast<payload_t*>(payload_mut.get_write()); + } + }; + + public: + class Recorder final : public ValueDeltaRecorder { + + public: + Recorder(ceph::bufferlist& encoded) + : ValueDeltaRecorder(encoded) {} + ~Recorder() override = default; + + void encode_set_id(NodeExtentMutable& payload_mut, id_t id) { + auto& encoded = get_encoded(payload_mut); + ceph::encode(delta_op_t::UPDATE_ID, encoded); + ceph::encode(id, encoded); + } + + void encode_set_tail_magic(NodeExtentMutable& payload_mut, magic_t magic) { + auto& encoded = get_encoded(payload_mut); + ceph::encode(delta_op_t::UPDATE_TAIL_MAGIC, encoded); + ceph::encode(magic, encoded); + } + + protected: + value_magic_t get_header_magic() const override { + return TREE_CONF.value_magic; + } + + void apply_value_delta(ceph::bufferlist::const_iterator& delta, + NodeExtentMutable& payload_mut, + laddr_t value_addr) override { + delta_op_t op; + try { + ceph::decode(op, delta); + switch (op) { + case delta_op_t::UPDATE_ID: { + logger().debug("OTree::TestValue::Replay: decoding UPDATE_ID ..."); + id_t id; + ceph::decode(id, delta); + logger().debug("OTree::TestValue::Replay: apply id={} ...", id); + Replayable::set_id(payload_mut, id); + break; + } + case delta_op_t::UPDATE_TAIL_MAGIC: { + logger().debug("OTree::TestValue::Replay: decoding UPDATE_TAIL_MAGIC ..."); + magic_t magic; + ceph::decode(magic, delta); + logger().debug("OTree::TestValue::Replay: apply magic={} ...", magic); + Replayable::set_tail_magic(payload_mut, magic); + break; + } + default: + logger().error("OTree::TestValue::Replay: got unknown op {} when replay {:#x}+{:#x}", + op, value_addr, payload_mut.get_length()); + ceph_abort(); + } + } catch (buffer::error& e) { + logger().error("OTree::TestValue::Replay: got decode error {} when replay {:#x}+{:#x}", + e.what(), value_addr, payload_mut.get_length()); + ceph_abort(); + } + } + + private: + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } + }; + + TestValue(NodeExtentManager& nm, const ValueBuilder& vb, Ref<tree_cursor_t>& p_cursor) + : Value(nm, vb, p_cursor) {} + ~TestValue() override = default; + + id_t get_id() const { + return read_payload<payload_t>()->id; + } + void set_id_replayable(Transaction& t, id_t id) { + auto value_mutable = prepare_mutate_payload<payload_t, Recorder>(t); + if (value_mutable.second) { + value_mutable.second->encode_set_id(value_mutable.first, id); + } + Replayable::set_id(value_mutable.first, id); + } + + magic_t get_tail_magic() const { + auto p_payload = read_payload<payload_t>(); + auto offset_magic = get_payload_size() - sizeof(magic_t); + auto p_magic = reinterpret_cast<const char*>(p_payload) + offset_magic; + return reinterpret_cast<const magic_packed_t*>(p_magic)->value; + } + void set_tail_magic_replayable(Transaction& t, magic_t magic) { + auto value_mutable = prepare_mutate_payload<payload_t, Recorder>(t); + if (value_mutable.second) { + value_mutable.second->encode_set_tail_magic(value_mutable.first, magic); + } + Replayable::set_tail_magic(value_mutable.first, magic); + } + + /* + * tree_util.h related interfaces + */ + + using item_t = test_item_t; + + void initialize(Transaction& t, const item_t& item) { + ceph_assert(get_payload_size() + sizeof(value_header_t) == item.size); + set_id_replayable(t, item.id); + set_tail_magic_replayable(t, item.magic); + } + + void validate(const item_t& item) const { + ceph_assert(get_payload_size() + sizeof(value_header_t) == item.size); + ceph_assert(get_id() == item.id); + ceph_assert(get_tail_magic() == item.magic); + } +}; + +using UnboundedValue = TestValue< + value_magic_t::TEST_UNBOUND, 4096, 4096, 4096, 4096, 4096, false>; +using BoundedValue = TestValue< + value_magic_t::TEST_BOUNDED, 320, 320, 640, 4096, 4096, true>; +// should be the same configuration with FLTreeOnode +using ExtendedValue = TestValue< + value_magic_t::TEST_EXTENDED, 256, 2048, 1200, 8192, 16384, true>; + +} + +#if FMT_VERSION >= 90000 +template<> +struct fmt::formatter<crimson::os::seastore::onode::test_item_t> : fmt::ostream_formatter {}; +#endif diff --git a/src/test/crimson/seastore/test_block.cc b/src/test/crimson/seastore/test_block.cc new file mode 100644 index 000000000..f7a39b0ef --- /dev/null +++ b/src/test/crimson/seastore/test_block.cc @@ -0,0 +1,41 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/seastore/test_block.h" + +namespace crimson::os::seastore { + + +ceph::bufferlist TestBlock::get_delta() { + ceph::bufferlist bl; + encode(delta, bl); + return bl; +} + + +void TestBlock::apply_delta(const ceph::bufferlist &bl) { + auto biter = bl.begin(); + decltype(delta) deltas; + decode(deltas, biter); + for (auto &&d : deltas) { + set_contents(d.val, d.offset, d.len); + } +} + +ceph::bufferlist TestBlockPhysical::get_delta() { + ceph::bufferlist bl; + encode(delta, bl); + return bl; +} + +void TestBlockPhysical::apply_delta_and_adjust_crc( + paddr_t, const ceph::bufferlist &bl) { + auto biter = bl.begin(); + decltype(delta) deltas; + decode(deltas, biter); + for (auto &&d : deltas) { + set_contents(d.val, d.offset, d.len); + } +} + +} diff --git a/src/test/crimson/seastore/test_block.h b/src/test/crimson/seastore/test_block.h new file mode 100644 index 000000000..ccdafb784 --- /dev/null +++ b/src/test/crimson/seastore/test_block.h @@ -0,0 +1,154 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <random> + +#include "crimson/os/seastore/transaction_manager.h" + +namespace crimson::os::seastore { + +struct test_extent_desc_t { + size_t len = 0; + unsigned checksum = 0; + + bool operator==(const test_extent_desc_t &rhs) const { + return (len == rhs.len && + checksum == rhs.checksum); + } + bool operator!=(const test_extent_desc_t &rhs) const { + return !(*this == rhs); + } +}; + +struct test_block_delta_t { + int8_t val = 0; + uint16_t offset = 0; + uint16_t len = 0; + + + DENC(test_block_delta_t, v, p) { + DENC_START(1, 1, p); + denc(v.val, p); + denc(v.offset, p); + denc(v.len, p); + DENC_FINISH(p); + } +}; + +inline std::ostream &operator<<( + std::ostream &lhs, const test_extent_desc_t &rhs) { + return lhs << "test_extent_desc_t(len=" << rhs.len + << ", checksum=" << rhs.checksum << ")"; +} + +struct TestBlock : crimson::os::seastore::LogicalCachedExtent { + constexpr static extent_len_t SIZE = 4<<10; + using Ref = TCachedExtentRef<TestBlock>; + + std::vector<test_block_delta_t> delta = {}; + + TestBlock(ceph::bufferptr &&ptr) + : LogicalCachedExtent(std::move(ptr)) {} + TestBlock(const TestBlock &other) + : LogicalCachedExtent(other) {} + + CachedExtentRef duplicate_for_write(Transaction&) final { + return CachedExtentRef(new TestBlock(*this)); + }; + + static constexpr extent_types_t TYPE = extent_types_t::TEST_BLOCK; + extent_types_t get_type() const final { + return TYPE; + } + + ceph::bufferlist get_delta() final; + + void set_contents(char c, uint16_t offset, uint16_t len) { + ::memset(get_bptr().c_str() + offset, c, len); + delta.push_back({c, offset, len}); + } + + void set_contents(char c) { + set_contents(c, 0, get_length()); + } + + test_extent_desc_t get_desc() { + return { get_length(), get_crc32c() }; + } + + void apply_delta(const ceph::bufferlist &bl) final; +}; +using TestBlockRef = TCachedExtentRef<TestBlock>; + +struct TestBlockPhysical : crimson::os::seastore::CachedExtent{ + constexpr static extent_len_t SIZE = 4<<10; + using Ref = TCachedExtentRef<TestBlockPhysical>; + + std::vector<test_block_delta_t> delta = {}; + + TestBlockPhysical(ceph::bufferptr &&ptr) + : CachedExtent(std::move(ptr)) {} + TestBlockPhysical(const TestBlockPhysical &other) + : CachedExtent(other) {} + + CachedExtentRef duplicate_for_write(Transaction&) final { + return CachedExtentRef(new TestBlockPhysical(*this)); + }; + + static constexpr extent_types_t TYPE = extent_types_t::TEST_BLOCK_PHYSICAL; + extent_types_t get_type() const final { + return TYPE; + } + + void set_contents(char c, uint16_t offset, uint16_t len) { + ::memset(get_bptr().c_str() + offset, c, len); + delta.push_back({c, offset, len}); + } + + void set_contents(char c) { + set_contents(c, 0, get_length()); + } + + ceph::bufferlist get_delta() final; + + void apply_delta_and_adjust_crc(paddr_t, const ceph::bufferlist &bl) final; +}; +using TestBlockPhysicalRef = TCachedExtentRef<TestBlockPhysical>; + +struct test_block_mutator_t { + std::uniform_int_distribution<int8_t> + contents_distribution = std::uniform_int_distribution<int8_t>( + std::numeric_limits<int8_t>::min(), + std::numeric_limits<int8_t>::max()); + + std::uniform_int_distribution<uint16_t> + offset_distribution = std::uniform_int_distribution<uint16_t>( + 0, TestBlock::SIZE - 1); + + std::uniform_int_distribution<uint16_t> length_distribution(uint16_t offset) { + return std::uniform_int_distribution<uint16_t>( + 0, TestBlock::SIZE - offset - 1); + } + + + template <typename generator_t> + void mutate(TestBlock &block, generator_t &gen) { + auto offset = offset_distribution(gen); + block.set_contents( + contents_distribution(gen), + offset, + length_distribution(offset)(gen)); + } +}; + +} + +WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::test_block_delta_t) + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter<crimson::os::seastore::test_extent_desc_t> : fmt::ostream_formatter {}; +template <> struct fmt::formatter<crimson::os::seastore::TestBlock> : fmt::ostream_formatter {}; +template <> struct fmt::formatter<crimson::os::seastore::TestBlockPhysical> : fmt::ostream_formatter {}; +#endif diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc new file mode 100644 index 000000000..f18c3ac67 --- /dev/null +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -0,0 +1,752 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include "crimson/common/log.h" + +#include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" +#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" + +#include "test/crimson/seastore/test_block.h" + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace crimson::os::seastore::lba_manager; +using namespace crimson::os::seastore::lba_manager::btree; + +struct btree_test_base : + public seastar_test_suite_t, SegmentProvider, JournalTrimmer { + + segment_manager::EphemeralSegmentManagerRef segment_manager; + SegmentManagerGroupRef sms; + JournalRef journal; + ExtentPlacementManagerRef epm; + CacheRef cache; + + size_t block_size; + + WritePipeline pipeline; + + segment_id_t next; + + std::map<segment_id_t, segment_seq_t> segment_seqs; + std::map<segment_id_t, segment_type_t> segment_types; + + journal_seq_t dummy_tail; + + mutable segment_info_t tmp_info; + + btree_test_base() = default; + + /* + * JournalTrimmer interfaces + */ + journal_seq_t get_journal_head() const final { return dummy_tail; } + + void set_journal_head(journal_seq_t) final {} + + journal_seq_t get_dirty_tail() const final { return dummy_tail; } + + journal_seq_t get_alloc_tail() const final { return dummy_tail; } + + void update_journal_tails(journal_seq_t, journal_seq_t) final {} + + bool try_reserve_inline_usage(std::size_t) final { return true; } + + void release_inline_usage(std::size_t) final {} + + std::size_t get_trim_size_per_cycle() const final { + return 0; + } + + /* + * SegmentProvider interfaces + */ + const segment_info_t& get_seg_info(segment_id_t id) const final { + tmp_info = {}; + tmp_info.seq = segment_seqs.at(id); + tmp_info.type = segment_types.at(id); + return tmp_info; + } + + segment_id_t allocate_segment( + segment_seq_t seq, + segment_type_t type, + data_category_t, + rewrite_gen_t + ) final { + auto ret = next; + next = segment_id_t{ + segment_manager->get_device_id(), + next.device_segment_id() + 1}; + segment_seqs[ret] = seq; + segment_types[ret] = type; + return ret; + } + + void close_segment(segment_id_t) final {} + + void update_segment_avail_bytes(segment_type_t, paddr_t) final {} + + void update_modify_time(segment_id_t, sea_time_point, std::size_t) final {} + + SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); } + + virtual void complete_commit(Transaction &t) {} + seastar::future<> submit_transaction(TransactionRef t) + { + auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL); + return journal->submit_record(std::move(record), t->get_handle()).safe_then( + [this, t=std::move(t)](auto submit_result) mutable { + cache->complete_commit( + *t, + submit_result.record_block_base, + submit_result.write_result.start_seq); + complete_commit(*t); + }).handle_error(crimson::ct_error::assert_all{}); + } + + virtual LBAManager::mkfs_ret test_structure_setup(Transaction &t) = 0; + seastar::future<> set_up_fut() final { + segment_manager = segment_manager::create_test_ephemeral(); + return segment_manager->init( + ).safe_then([this] { + return segment_manager->mkfs( + segment_manager::get_ephemeral_device_config(0, 1, 0)); + }).safe_then([this] { + sms.reset(new SegmentManagerGroup()); + journal = journal::make_segmented(*this, *this); + epm.reset(new ExtentPlacementManager()); + cache.reset(new Cache(*epm)); + + block_size = segment_manager->get_block_size(); + next = segment_id_t{segment_manager->get_device_id(), 0}; + sms->add_segment_manager(segment_manager.get()); + epm->test_init_no_background(segment_manager.get()); + journal->set_write_pipeline(&pipeline); + + return journal->open_for_mkfs().discard_result(); + }).safe_then([this] { + dummy_tail = journal_seq_t{0, + paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0)}; + return epm->open_for_write(); + }).safe_then([this] { + return seastar::do_with( + cache->create_transaction( + Transaction::src_t::MUTATE, "test_set_up_fut", false), + [this](auto &ref_t) { + return with_trans_intr(*ref_t, [&](auto &t) { + cache->init(); + return cache->mkfs(t + ).si_then([this, &t] { + return test_structure_setup(t); + }); + }).safe_then([this, &ref_t] { + return submit_transaction(std::move(ref_t)); + }); + }); + }).handle_error( + crimson::ct_error::all_same_way([] { + ceph_assert(0 == "error"); + }) + ); + } + + virtual void test_structure_reset() {} + seastar::future<> tear_down_fut() final { + return cache->close( + ).safe_then([this] { + return journal->close(); + }).safe_then([this] { + return epm->close(); + }).safe_then([this] { + test_structure_reset(); + segment_manager.reset(); + sms.reset(); + journal.reset(); + epm.reset(); + cache.reset(); + }).handle_error( + crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Unable to close"); + }) + ); + } +}; + +struct lba_btree_test : btree_test_base { + std::map<laddr_t, lba_map_val_t> check; + + auto get_op_context(Transaction &t) { + return op_context_t<laddr_t>{*cache, t}; + } + + LBAManager::mkfs_ret test_structure_setup(Transaction &t) final { + return cache->get_root( + t + ).si_then([this, &t](RootBlockRef croot) { + auto mut_croot = cache->duplicate_for_write( + t, croot + )->cast<RootBlock>(); + mut_croot->root.lba_root = + LBABtree::mkfs(mut_croot, get_op_context(t)); + }); + } + + template <typename F> + auto lba_btree_update(F &&f) { + auto tref = cache->create_transaction( + Transaction::src_t::MUTATE, "test_btree_update", false); + auto &t = *tref; + with_trans_intr( + t, + [this, tref=std::move(tref), f=std::forward<F>(f)](auto &t) mutable { + return cache->get_root( + t + ).si_then([f=std::move(f), &t](RootBlockRef croot) { + return seastar::do_with( + LBABtree(croot), + [f=std::move(f), &t](auto &btree) mutable { + return std::invoke( + std::move(f), btree, t + ); + }); + }).si_then([this, tref=std::move(tref)]() mutable { + return submit_transaction(std::move(tref)); + }); + }).unsafe_get0(); + } + + template <typename F> + auto lba_btree_read(F &&f) { + auto t = cache->create_transaction( + Transaction::src_t::READ, "test_btree_read", false); + return with_trans_intr( + *t, + [this, f=std::forward<F>(f)](auto &t) mutable { + return cache->get_root( + t + ).si_then([f=std::move(f), &t](RootBlockRef croot) mutable { + return seastar::do_with( + LBABtree(croot), + [f=std::move(f), &t](auto &btree) mutable { + return std::invoke( + std::move(f), btree, t + ); + }); + }); + }).unsafe_get0(); + } + + static auto get_map_val(extent_len_t len) { + return lba_map_val_t{0, (pladdr_t)P_ADDR_NULL, len, 0}; + } + + device_off_t next_off = 0; + paddr_t get_paddr() { + next_off += block_size; + return make_fake_paddr(next_off); + } + + void insert(laddr_t addr, extent_len_t len) { + ceph_assert(check.count(addr) == 0); + check.emplace(addr, get_map_val(len)); + lba_btree_update([=, this](auto &btree, auto &t) { + auto extent = cache->alloc_new_extent<TestBlock>( + t, + TestBlock::SIZE, + placement_hint_t::HOT, + 0, + get_paddr()); + return btree.insert( + get_op_context(t), addr, get_map_val(len), extent.get() + ).si_then([addr, extent](auto p){ + auto& [iter, inserted] = p; + assert(inserted); + extent->set_laddr(addr); + }); + }); + } + + void remove(laddr_t addr) { + auto iter = check.find(addr); + ceph_assert(iter != check.end()); + auto len = iter->second.len; + check.erase(iter++); + lba_btree_update([=, this](auto &btree, auto &t) { + return btree.lower_bound( + get_op_context(t), addr + ).si_then([this, len, addr, &btree, &t](auto iter) { + EXPECT_FALSE(iter.is_end()); + EXPECT_TRUE(iter.get_key() == addr); + EXPECT_TRUE(iter.get_val().len == len); + return btree.remove( + get_op_context(t), iter + ); + }); + }); + } + + void check_lower_bound(laddr_t addr) { + auto iter = check.lower_bound(addr); + auto result = lba_btree_read([=, this](auto &btree, auto &t) { + return btree.lower_bound( + get_op_context(t), addr + ).si_then([](auto iter) + -> std::optional<std::pair<const laddr_t, const lba_map_val_t>> { + if (iter.is_end()) { + return std::nullopt; + } else { + return std::make_optional( + std::make_pair(iter.get_key(), iter.get_val())); + } + }); + }); + if (iter == check.end()) { + EXPECT_FALSE(result); + } else { + EXPECT_TRUE(result); + decltype(result) to_check = *iter; + EXPECT_EQ(to_check, *result); + } + } +}; + +TEST_F(lba_btree_test, basic) +{ + run_async([this] { + constexpr unsigned total = 16<<10; + for (unsigned i = 0; i < total; i += 16) { + insert(i, 8); + } + + for (unsigned i = 0; i < total; i += 16) { + check_lower_bound(i); + check_lower_bound(i + 4); + check_lower_bound(i + 8); + check_lower_bound(i + 12); + } + }); +} + +struct btree_lba_manager_test : btree_test_base { + BtreeLBAManagerRef lba_manager; + + btree_lba_manager_test() = default; + + void complete_commit(Transaction &t) final {} + + LBAManager::mkfs_ret test_structure_setup(Transaction &t) final { + lba_manager.reset(new BtreeLBAManager(*cache)); + return lba_manager->mkfs(t); + } + + void test_structure_reset() final { + lba_manager.reset(); + } + + struct test_extent_t { + paddr_t addr; + size_t len = 0; + unsigned refcount = 0; + }; + using test_lba_mapping_t = std::map<laddr_t, test_extent_t>; + test_lba_mapping_t test_lba_mappings; + struct test_transaction_t { + TransactionRef t; + test_lba_mapping_t mappings; + }; + + auto create_transaction(bool create_fake_extent=true) { + auto t = test_transaction_t{ + cache->create_transaction( + Transaction::src_t::MUTATE, "test_mutate_lba", false), + test_lba_mappings + }; + if (create_fake_extent) { + cache->alloc_new_extent<TestBlockPhysical>( + *t.t, + TestBlockPhysical::SIZE, + placement_hint_t::HOT, + 0); + }; + return t; + } + + auto create_weak_transaction() { + auto t = test_transaction_t{ + cache->create_transaction( + Transaction::src_t::READ, "test_read_weak", true), + test_lba_mappings + }; + return t; + } + + void submit_test_transaction(test_transaction_t t) { + submit_transaction(std::move(t.t)).get(); + test_lba_mappings.swap(t.mappings); + } + + auto get_overlap(test_transaction_t &t, laddr_t addr, size_t len) { + auto bottom = t.mappings.upper_bound(addr); + if (bottom != t.mappings.begin()) + --bottom; + if (bottom != t.mappings.end() && + bottom->first + bottom->second.len <= addr) + ++bottom; + + auto top = t.mappings.lower_bound(addr + len); + return std::make_pair( + bottom, + top + ); + } + + device_off_t next_off = 0; + paddr_t get_paddr() { + next_off += block_size; + return make_fake_paddr(next_off); + } + + auto alloc_mapping( + test_transaction_t &t, + laddr_t hint, + size_t len) { + auto ret = with_trans_intr( + *t.t, + [=, this](auto &t) { + auto extent = cache->alloc_new_extent<TestBlock>( + t, + TestBlock::SIZE, + placement_hint_t::HOT, + 0, + get_paddr()); + return lba_manager->alloc_extent( + t, hint, len, extent->get_paddr(), *extent); + }).unsafe_get0(); + logger().debug("alloc'd: {}", *ret); + EXPECT_EQ(len, ret->get_length()); + auto [b, e] = get_overlap(t, ret->get_key(), len); + EXPECT_EQ(b, e); + t.mappings.emplace( + std::make_pair( + ret->get_key(), + test_extent_t{ + ret->get_val(), + ret->get_length(), + 1 + } + )); + return ret; + } + + auto decref_mapping( + test_transaction_t &t, + laddr_t addr) { + return decref_mapping(t, t.mappings.find(addr)); + } + + void decref_mapping( + test_transaction_t &t, + test_lba_mapping_t::iterator target) { + ceph_assert(target != t.mappings.end()); + ceph_assert(target->second.refcount > 0); + target->second.refcount--; + + (void) with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->decref_extent( + t, + target->first, + true + ).si_then([this, &t, target](auto result) { + EXPECT_EQ(result.refcount, target->second.refcount); + if (result.refcount == 0) { + return cache->retire_extent_addr( + t, result.addr.get_paddr(), result.length); + } + return Cache::retire_extent_iertr::now(); + }); + }).unsafe_get0(); + if (target->second.refcount == 0) { + t.mappings.erase(target); + } + } + + auto incref_mapping( + test_transaction_t &t, + laddr_t addr) { + return incref_mapping(t, t.mappings.find(addr)); + } + + void incref_mapping( + test_transaction_t &t, + test_lba_mapping_t::iterator target) { + ceph_assert(target->second.refcount > 0); + target->second.refcount++; + auto refcnt = with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->incref_extent( + t, + target->first); + }).unsafe_get0().refcount; + EXPECT_EQ(refcnt, target->second.refcount); + } + + std::vector<laddr_t> get_mapped_addresses() { + std::vector<laddr_t> addresses; + addresses.reserve(test_lba_mappings.size()); + for (auto &i: test_lba_mappings) { + addresses.push_back(i.first); + } + return addresses; + } + + std::vector<laddr_t> get_mapped_addresses(test_transaction_t &t) { + std::vector<laddr_t> addresses; + addresses.reserve(t.mappings.size()); + for (auto &i: t.mappings) { + addresses.push_back(i.first); + } + return addresses; + } + + void check_mappings() { + auto t = create_transaction(); + check_mappings(t); + } + + void check_mappings(test_transaction_t &t) { + (void)with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->check_child_trackers(t); + }).unsafe_get0(); + for (auto &&i: t.mappings) { + auto laddr = i.first; + auto len = i.second.len; + + auto ret_list = with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->get_mappings( + t, laddr, len); + }).unsafe_get0(); + EXPECT_EQ(ret_list.size(), 1); + auto &ret = *ret_list.begin(); + EXPECT_EQ(i.second.addr, ret->get_val()); + EXPECT_EQ(laddr, ret->get_key()); + EXPECT_EQ(len, ret->get_length()); + + auto ret_pin = with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->get_mapping( + t, laddr); + }).unsafe_get0(); + EXPECT_EQ(i.second.addr, ret_pin->get_val()); + EXPECT_EQ(laddr, ret_pin->get_key()); + EXPECT_EQ(len, ret_pin->get_length()); + } + with_trans_intr( + *t.t, + [=, &t, this](auto &) { + return lba_manager->scan_mappings( + *t.t, + 0, + L_ADDR_MAX, + [iter=t.mappings.begin(), &t](auto l, auto p, auto len) mutable { + EXPECT_NE(iter, t.mappings.end()); + EXPECT_EQ(l, iter->first); + EXPECT_EQ(p, iter->second.addr); + EXPECT_EQ(len, iter->second.len); + ++iter; + }); + }).unsafe_get(); + } +}; + +TEST_F(btree_lba_manager_test, basic) +{ + run_async([this] { + laddr_t laddr = 0x12345678 * block_size; + { + // write initial mapping + auto t = create_transaction(); + check_mappings(t); // check in progress transaction sees mapping + check_mappings(); // check concurrent does not + auto ret = alloc_mapping(t, laddr, block_size); + submit_test_transaction(std::move(t)); + } + check_mappings(); // check new transaction post commit sees it + }); +} + +TEST_F(btree_lba_manager_test, force_split) +{ + run_async([this] { + for (unsigned i = 0; i < 40; ++i) { + auto t = create_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 5; ++j) { + auto ret = alloc_mapping(t, 0, block_size); + if ((i % 10 == 0) && (j == 3)) { + check_mappings(t); + check_mappings(); + } + } + logger().debug("submitting transaction"); + submit_test_transaction(std::move(t)); + check_mappings(); + } + }); +} + +TEST_F(btree_lba_manager_test, force_split_merge) +{ + run_async([this] { + for (unsigned i = 0; i < 80; ++i) { + auto t = create_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 5; ++j) { + auto ret = alloc_mapping(t, 0, block_size); + // just to speed things up a bit + if ((i % 100 == 0) && (j == 3)) { + check_mappings(t); + check_mappings(); + } + incref_mapping(t, ret->get_key()); + decref_mapping(t, ret->get_key()); + } + logger().debug("submitting transaction"); + submit_test_transaction(std::move(t)); + if (i % 50 == 0) { + check_mappings(); + } + } + { + auto addresses = get_mapped_addresses(); + auto t = create_transaction(); + for (unsigned i = 0; i != addresses.size(); ++i) { + if (i % 2 == 0) { + incref_mapping(t, addresses[i]); + decref_mapping(t, addresses[i]); + decref_mapping(t, addresses[i]); + } + logger().debug("submitting transaction"); + if (i % 7 == 0) { + submit_test_transaction(std::move(t)); + t = create_transaction(); + } + if (i % 13 == 0) { + check_mappings(); + check_mappings(t); + } + } + submit_test_transaction(std::move(t)); + } + { + auto addresses = get_mapped_addresses(); + auto t = create_transaction(); + for (unsigned i = 0; i != addresses.size(); ++i) { + incref_mapping(t, addresses[i]); + decref_mapping(t, addresses[i]); + decref_mapping(t, addresses[i]); + } + check_mappings(t); + submit_test_transaction(std::move(t)); + check_mappings(); + } + }); +} + +TEST_F(btree_lba_manager_test, single_transaction_split_merge) +{ + run_async([this] { + { + auto t = create_transaction(); + for (unsigned i = 0; i < 400; ++i) { + alloc_mapping(t, 0, block_size); + } + check_mappings(t); + submit_test_transaction(std::move(t)); + } + check_mappings(); + + { + auto addresses = get_mapped_addresses(); + auto t = create_transaction(); + for (unsigned i = 0; i != addresses.size(); ++i) { + if (i % 4 != 0) { + decref_mapping(t, addresses[i]); + } + } + check_mappings(t); + submit_test_transaction(std::move(t)); + } + check_mappings(); + + { + auto t = create_transaction(); + for (unsigned i = 0; i < 600; ++i) { + alloc_mapping(t, 0, block_size); + } + auto addresses = get_mapped_addresses(t); + for (unsigned i = 0; i != addresses.size(); ++i) { + decref_mapping(t, addresses[i]); + } + check_mappings(t); + submit_test_transaction(std::move(t)); + } + check_mappings(); + }); +} + +TEST_F(btree_lba_manager_test, split_merge_multi) +{ + run_async([this] { + auto iterate = [&](auto f) { + for (uint64_t i = 0; i < (1<<10); ++i) { + auto t = create_transaction(false); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 5; ++j) { + f(t, (i * 5) + j); + } + logger().debug("submitting transaction"); + submit_test_transaction(std::move(t)); + } + }; + iterate([&](auto &t, auto idx) { + alloc_mapping(t, idx * block_size, block_size); + }); + check_mappings(); + iterate([&](auto &t, auto idx) { + if ((idx % 32) > 0) { + decref_mapping(t, idx * block_size); + } + }); + check_mappings(); + iterate([&](auto &t, auto idx) { + if ((idx % 32) > 0) { + alloc_mapping(t, idx * block_size, block_size); + } + }); + check_mappings(); + iterate([&](auto &t, auto idx) { + decref_mapping(t, idx * block_size); + }); + check_mappings(); + }); +} diff --git a/src/test/crimson/seastore/test_cbjournal.cc b/src/test/crimson/seastore/test_cbjournal.cc new file mode 100644 index 000000000..0bf2d4135 --- /dev/null +++ b/src/test/crimson/seastore/test_cbjournal.cc @@ -0,0 +1,583 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include <random> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/async_cleaner.h" +#include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/journal/circular_bounded_journal.h" +#include "crimson/os/seastore/random_block_manager.h" +#include "crimson/os/seastore/random_block_manager/rbm_device.h" +#include "crimson/os/seastore/seastore_types.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" +#include "crimson/os/seastore/random_block_manager/block_rb_manager.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace crimson::os::seastore::journal; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +std::optional<record_t> decode_record( + bufferlist& bl) +{ + record_t record; + record_group_header_t r_header; + auto bliter = bl.cbegin(); + decode(r_header, bliter); + logger().debug(" decode_record mdlength {} records {}", + r_header.mdlength, r_header.records); + device_id_t d_id = 1 << (std::numeric_limits<device_id_t>::digits - 1); + + auto del_infos = try_decode_deltas(r_header, bl, + paddr_t::make_blk_paddr(d_id, 0)); + for (auto &iter : *del_infos) { + for (auto r : iter.deltas) { + record.deltas.push_back(r.second); + } + } + auto ex_infos = try_decode_extent_infos(r_header, bl); + auto bliter_ex = bl.cbegin(); + bliter_ex += r_header.mdlength; + for (auto &iter: *ex_infos) { + for (auto e : iter.extent_infos) { + extent_t ex; + auto bptr = bufferptr(ceph::buffer::create_page_aligned(e.len)); + logger().debug(" exten len {} remaining {} ", e.len, bliter_ex.get_remaining()); + bliter_ex.copy(e.len, bptr.c_str()); + ex.bl.append(bptr); + record.extents.push_back(ex); + } + } + return record; +} + +struct entry_validator_t { + bufferlist bl; + int entries; + record_t record; + segment_nonce_t magic = 0; + journal_seq_t seq; + + template <typename... T> + entry_validator_t(T&&... entry) : record(std::forward<T>(entry)...) {} + + void validate(record_t read) { + auto iter = read.extents.begin(); + for (auto &&block : record.extents) { + ASSERT_EQ( + iter->bl.length(), + block.bl.length()); + ASSERT_EQ( + iter->bl.begin().crc32c(iter->bl.length(), 1), + block.bl.begin().crc32c(block.bl.length(), 1)); + ++iter; + } + auto iter_delta = read.deltas.begin(); + for (auto &&block : record.deltas) { + ASSERT_EQ( + iter_delta->bl.length(), + block.bl.length()); + ASSERT_EQ( + iter_delta->bl.begin().crc32c(iter_delta->bl.length(), 1), + block.bl.begin().crc32c(block.bl.length(), 1)); + ++iter_delta; + } + } + void validate(CircularBoundedJournal &cbj) { + rbm_abs_addr offset = 0; + auto cursor = scan_valid_records_cursor(seq); + cbj.test_initialize_cursor(cursor); + for (int i = 0; i < entries; i++) { + paddr_t paddr = seq.offset.add_offset(offset); + cursor.seq.offset = paddr; + auto md = cbj.test_read_validate_record_metadata( + cursor, magic).unsafe_get0(); + assert(md); + auto& [header, md_bl] = *md; + auto dbuf = cbj.read( + paddr.add_offset(header.mdlength), + header.dlength).unsafe_get0(); + + bufferlist bl; + bl.append(md_bl); + bl.append(dbuf); + auto record = decode_record(bl); + validate(*record); + offset += header.mdlength + header.dlength; + cursor.last_committed = header.committed_to; + } + } + + rbm_abs_addr get_abs_addr() { + return convert_paddr_to_abs_addr(seq.offset); + } + + bool validate_delta(bufferlist bl) { + for (auto &&block : record.deltas) { + if (bl.begin().crc32c(bl.length(), 1) == + block.bl.begin().crc32c(block.bl.length(), 1)) { + return true; + } + } + return false; + } +}; + +struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer +{ + std::vector<entry_validator_t> entries; + std::unique_ptr<CircularBoundedJournal> cbj; + random_block_device::EphemeralRBMDeviceRef device; + + std::default_random_engine generator; + uint64_t block_size; + WritePipeline pipeline; + + cbjournal_test_t() = default; + + /* + * JournalTrimmer interfaces + */ + journal_seq_t get_journal_head() const { + return JOURNAL_SEQ_NULL; + } + + journal_seq_t get_dirty_tail() const final { + return JOURNAL_SEQ_NULL; + } + + journal_seq_t get_alloc_tail() const final { + return JOURNAL_SEQ_NULL; + } + + void set_journal_head(journal_seq_t head) final {} + + void update_journal_tails( + journal_seq_t dirty_tail, + journal_seq_t alloc_tail) final {} + + bool try_reserve_inline_usage(std::size_t) final { return true; } + + void release_inline_usage(std::size_t) final {} + + std::size_t get_trim_size_per_cycle() const final { + return 0; + } + + auto submit_record(record_t&& record) { + entries.push_back(record); + OrderingHandle handle = get_dummy_ordering_handle(); + auto [addr, w_result] = cbj->submit_record( + std::move(record), + handle).unsafe_get0(); + entries.back().seq = w_result.start_seq; + entries.back().entries = 1; + entries.back().magic = cbj->get_cjs().get_cbj_header().magic; + logger().debug("submit entry to addr {}", entries.back().seq); + return convert_paddr_to_abs_addr(entries.back().seq.offset); + } + + seastar::future<> tear_down_fut() final { + return close(); + } + + extent_t generate_extent(size_t blocks) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + bufferlist bl; + bl.append(buffer::ptr(buffer::create(blocks * block_size, contents))); + return extent_t{extent_types_t::TEST_BLOCK, L_ADDR_NULL, bl}; + } + + delta_info_t generate_delta(size_t bytes) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + bufferlist bl; + bl.append(buffer::ptr(buffer::create(bytes, contents))); + return delta_info_t{ + extent_types_t::TEST_BLOCK, + paddr_t{}, + L_ADDR_NULL, + 0, 0, + device->get_block_size(), + 1, + 0, + segment_type_t::JOURNAL, + bl + }; + } + + auto replay_and_check() { + for (auto &i : entries) { + i.validate(*(cbj.get())); + } + } + + auto replay() { + return cbj->replay( + [this](const auto &offsets, + const auto &e, + auto &dirty_seq, + auto &alloc_seq, + auto last_modified) { + bool found = false; + for (auto &i : entries) { + paddr_t base = offsets.write_result.start_seq.offset; + rbm_abs_addr addr = convert_paddr_to_abs_addr(base); + if (addr == i.get_abs_addr()) { + logger().debug(" compare addr: {} and i.addr {} ", base, i.get_abs_addr()); + found = i.validate_delta(e.bl); + break; + } + } + assert(found == true); + return Journal::replay_ertr::make_ready_future<bool>(true); + }); + } + + auto mkfs() { + device_config_t config = get_rbm_ephemeral_device_config(0, 1); + return device->mkfs(config + ).safe_then([this]() { + return device->mount( + ).safe_then([this]() { + return cbj->open_for_mkfs( + ).safe_then([](auto q) { + return seastar::now(); + }); + }); + }).safe_then([this] { + return cbj->close(); + }); + } + auto open() { + return cbj->open_for_mount( + ).safe_then([](auto q) { + return seastar::now(); + }); + } + seastar::future<> close() { + return cbj->close().handle_error(crimson::ct_error::assert_all{}); + } + auto get_records_available_size() { + return cbj->get_cjs().get_records_available_size(); + } + auto get_records_total_size() { + return cbj->get_cjs().get_records_total_size(); + } + auto get_block_size() { + return device->get_block_size(); + } + auto get_written_to_rbm_addr() { + return cbj->get_rbm_addr(cbj->get_cjs().get_written_to()); + } + auto get_written_to() { + return cbj->get_cjs().get_written_to(); + } + auto get_journal_tail() { + return cbj->get_dirty_tail(); + } + auto get_records_used_size() { + return cbj->get_cjs().get_records_used_size(); + } + bool is_available_size(uint64_t size) { + return cbj->get_cjs().is_available_size(size); + } + void update_journal_tail(rbm_abs_addr addr, uint32_t len) { + paddr_t paddr = + convert_abs_addr_to_paddr( + addr + len, + cbj->get_device_id()); + journal_seq_t seq = {0, paddr}; + cbj->update_journal_tail( + seq, + seq + ).get0(); + } + void set_written_to(journal_seq_t seq) { + cbj->set_written_to(seq); + } + + seastar::future<> set_up_fut() final { + device = random_block_device::create_test_ephemeral( + random_block_device::DEFAULT_TEST_CBJOURNAL_SIZE, 0); + cbj.reset(new CircularBoundedJournal(*this, device.get(), std::string())); + block_size = device->get_block_size(); + cbj->set_write_pipeline(&pipeline); + return mkfs( + ).safe_then([this] { + return replay( + ).safe_then([this] { + return open( + ).safe_then([this] { + return replay(); + }); + }); + }).handle_error(crimson::ct_error::assert_all{}); + } +}; + +TEST_F(cbjournal_test_t, submit_one_record) +{ + run_async([this] { + submit_record( + record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(3), generate_delta(4) } + }); + replay_and_check(); + }); +} + +TEST_F(cbjournal_test_t, submit_three_records) +{ + run_async([this] { + submit_record( + record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(3), generate_delta(4) } + }); + submit_record( + record_t{ + { generate_extent(8), generate_extent(9) }, + { generate_delta(20), generate_delta(21) } + }); + submit_record( + record_t{ + { generate_extent(5), generate_extent(6) }, + { generate_delta(200), generate_delta(210) } + }); + replay_and_check(); + }); +} + +TEST_F(cbjournal_test_t, submit_full_records) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + + update_journal_tail(entries.back().get_abs_addr(), record_total_size); + ASSERT_EQ(get_records_total_size(), + get_records_available_size()); + + // will be appended at the begining of log + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + ASSERT_TRUE(record_total_size > get_records_available_size()); + }); +} + +TEST_F(cbjournal_test_t, boudary_check_verify) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + + uint64_t avail = get_records_available_size(); + // forward 2 recod size here because 1 block is reserved between head and tail + update_journal_tail(entries.front().get_abs_addr(), record_total_size * 2); + entries.erase(entries.begin()); + entries.erase(entries.begin()); + ASSERT_EQ(avail + (record_total_size * 2), get_records_available_size()); + avail = get_records_available_size(); + // will be appended at the begining of WAL + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + ASSERT_TRUE(avail - record_total_size >= get_records_available_size()); + replay_and_check(); + }); +} + +TEST_F(cbjournal_test_t, update_header) +{ + run_async([this] { + auto [header, _buf] = *(cbj->get_cjs().read_header().unsafe_get0()); + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + + update_journal_tail(entries.front().get_abs_addr(), record_total_size); + cbj->get_cjs().write_header().unsafe_get0(); + auto [update_header, update_buf2] = *(cbj->get_cjs().read_header().unsafe_get0()); + cbj->close().unsafe_get0(); + replay().unsafe_get0(); + + ASSERT_EQ(update_header.dirty_tail.offset, update_header.dirty_tail.offset); + }); +} + +TEST_F(cbjournal_test_t, replay) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + // will be appended at the begining of WAL + uint64_t avail = get_records_available_size(); + update_journal_tail(entries.front().get_abs_addr(), record_total_size * 2); + entries.erase(entries.begin()); + entries.erase(entries.begin()); + ASSERT_EQ(avail + (record_total_size * 2), get_records_available_size()); + avail = get_records_available_size(); + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + ASSERT_TRUE(avail - record_total_size >= get_records_available_size()); + cbj->close().unsafe_get0(); + replay().unsafe_get0(); + }); +} + +TEST_F(cbjournal_test_t, replay_after_reset) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + auto old_written_to = get_written_to(); + auto old_used_size = get_records_used_size(); + set_written_to( + journal_seq_t{0, + convert_abs_addr_to_paddr( + cbj->get_records_start(), + cbj->get_device_id())}); + cbj->close().unsafe_get0(); + replay().unsafe_get0(); + ASSERT_EQ(old_written_to, get_written_to()); + ASSERT_EQ(old_used_size, + get_records_used_size()); + }); +} + +TEST_F(cbjournal_test_t, multiple_submit_at_end) +{ + run_async([this] { + record_t rec { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }; + auto r_size = record_group_size_t(rec.size, block_size); + auto record_total_size = r_size.get_encoded_length(); + submit_record(std::move(rec)); + while (is_available_size(record_total_size)) { + submit_record( + record_t { + { generate_extent(1), generate_extent(2) }, + { generate_delta(20), generate_delta(21) } + }); + } + update_journal_tail(entries.front().get_abs_addr(), record_total_size * 8); + for (int i = 0; i < 8; i++) { + entries.erase(entries.begin()); + } + seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(4u), + [&](auto) { + return seastar::async([&] { + auto writes = 0; + while (writes < 2) { + record_t rec { + { generate_extent(1) }, + { generate_delta(20) } }; + submit_record(std::move(rec)); + writes++; + } + }); + }).get0(); + auto old_written_to = get_written_to(); + cbj->close().unsafe_get0(); + cbj->replay( + [](const auto &offsets, + const auto &e, + auto &dirty_seq, + auto &alloc_seq, + auto last_modified) { + return Journal::replay_ertr::make_ready_future<bool>(true); + }).unsafe_get0(); + assert(get_written_to() == old_written_to); + }); +} diff --git a/src/test/crimson/seastore/test_collection_manager.cc b/src/test/crimson/seastore/test_collection_manager.cc new file mode 100644 index 000000000..cedcc5e8f --- /dev/null +++ b/src/test/crimson/seastore/test_collection_manager.cc @@ -0,0 +1,195 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "os/ObjectStore.h" +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/collection_manager.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + + +#define TEST_COLL_FORWARD(METHOD) \ + template <typename... Args> \ + auto METHOD(coll_root_t &root, Transaction &t, Args&&... args) const { \ + return with_trans_intr( \ + t, \ + [this](auto &t, auto &root, auto&&... args) { \ + return collection_manager->METHOD( \ + root, \ + t, \ + std::forward<decltype(args)>(args)...); \ + }, \ + root, \ + std::forward<Args>(args)...).unsafe_get0(); \ + } + +struct collection_manager_test_t : + public seastar_test_suite_t, + TMTestState { + + CollectionManagerRef collection_manager; + + collection_manager_test_t() {} + + seastar::future<> set_up_fut() final { + return tm_setup().then([this] { + collection_manager = collection_manager::create_coll_manager(*tm); + return seastar::now(); + }); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown().then([this] { + collection_manager.reset(); + return seastar::now(); + }); + } + + using test_collection_t = std::map<coll_t, coll_info_t>; + test_collection_t test_coll_mappings; + + void replay() { + restart(); + collection_manager = collection_manager::create_coll_manager(*tm); + } + + auto get_root() { + auto tref = create_mutate_transaction(); + auto coll_root = with_trans_intr( + *tref, + [this](auto &t) { + return collection_manager->mkfs(t); + }).unsafe_get0(); + submit_transaction(std::move(tref)); + return coll_root; + } + + TEST_COLL_FORWARD(remove) + TEST_COLL_FORWARD(list) + TEST_COLL_FORWARD(create) + TEST_COLL_FORWARD(update) + + void checking_mappings(coll_root_t &coll_root, Transaction &t) { + auto coll_list = list(coll_root, t); + EXPECT_EQ(test_coll_mappings.size(), coll_list.size()); + for (std::pair<coll_t, coll_info_t> p : test_coll_mappings) { + EXPECT_NE( + std::find(coll_list.begin(), coll_list.end(), p), + coll_list.end()); + } + } + + void checking_mappings(coll_root_t &coll_root) { + auto t = create_read_transaction(); + checking_mappings(coll_root, *t); + } +}; + +TEST_P(collection_manager_test_t, basic) +{ + run_async([this] { + coll_root_t coll_root = get_root(); + { + auto t = create_mutate_transaction(); + for (int i = 0; i < 20; i++) { + coll_t cid(spg_t(pg_t(i+1,i+2), shard_id_t::NO_SHARD)); + create(coll_root, *t, cid, coll_info_t(i)); + test_coll_mappings.emplace(cid, coll_info_t(i)); + } + checking_mappings(coll_root, *t); + submit_transaction(std::move(t)); + EXPECT_EQ(test_coll_mappings.size(), 20); + } + + replay(); + checking_mappings(coll_root); + { + auto t = create_mutate_transaction(); + for (auto iter = test_coll_mappings.begin(); + iter != test_coll_mappings.end();) { + remove(coll_root, *t, iter->first); + iter = test_coll_mappings.erase(iter); + } + submit_transaction(std::move(t)); + } + replay(); + { + auto t = create_mutate_transaction(); + auto list_ret = list(coll_root, *t); + submit_transaction(std::move(t)); + EXPECT_EQ(list_ret.size(), test_coll_mappings.size()); + } + }); +} + +TEST_P(collection_manager_test_t, overflow) +{ + run_async([this] { + coll_root_t coll_root = get_root(); + auto old_location = coll_root.get_location(); + + auto t = create_mutate_transaction(); + for (int i = 0; i < 412; i++) { + coll_t cid(spg_t(pg_t(i+1,i+2), shard_id_t::NO_SHARD)); + create(coll_root, *t, cid, coll_info_t(i)); + test_coll_mappings.emplace(cid, coll_info_t(i)); + } + submit_transaction(std::move(t)); + EXPECT_NE(old_location, coll_root.get_location()); + checking_mappings(coll_root); + + replay(); + checking_mappings(coll_root); + }); +} + +TEST_P(collection_manager_test_t, update) +{ + run_async([this] { + coll_root_t coll_root = get_root(); + { + auto t = create_mutate_transaction(); + for (int i = 0; i < 2; i++) { + coll_t cid(spg_t(pg_t(1,i+1), shard_id_t::NO_SHARD)); + create(coll_root, *t, cid, coll_info_t(i)); + test_coll_mappings.emplace(cid, coll_info_t(i)); + } + submit_transaction(std::move(t)); + } + { + auto iter1= test_coll_mappings.begin(); + auto iter2 = std::next(test_coll_mappings.begin(), 1); + EXPECT_NE(iter1->second.split_bits, iter2->second.split_bits); + auto t = create_mutate_transaction(); + update(coll_root, *t, iter1->first, iter2->second); + submit_transaction(std::move(t)); + iter1->second.split_bits = iter2->second.split_bits; + } + replay(); + checking_mappings(coll_root); + }); +} + +INSTANTIATE_TEST_SUITE_P( + collection_manager_test, + collection_manager_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/test_extent_allocator.cc b/src/test/crimson/seastore/test_extent_allocator.cc new file mode 100644 index 000000000..8217e5a66 --- /dev/null +++ b/src/test/crimson/seastore/test_extent_allocator.cc @@ -0,0 +1,181 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <random> + +#include <boost/iterator/counting_iterator.hpp> + +#include "test/crimson/gtest_seastar.h" +#include "crimson/os/seastore/random_block_manager.h" +#include "crimson/os/seastore/random_block_manager/extent_allocator.h" +#include "crimson/os/seastore/random_block_manager/avlallocator.h" +#include "include/interval_set.h" + + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct allocator_test_t : + public seastar_test_suite_t, + ::testing::WithParamInterface<const char*> { + std::random_device rd; + std::mt19937 gen; + ExtentAllocatorRef allocator; + + allocator_test_t() + : gen(rd()) {} + + seastar::future<> set_up_fut() final { + std::string a_type = GetParam(); + if (a_type == "avl") { + allocator.reset(new AvlAllocator(false)); + return seastar::now(); + } + ceph_assert(0 == "no support"); + } + seastar::future<> tear_down_fut() final { + if (allocator) { + allocator->close(); + } + return seastar::now(); + } + void init_alloc(uint64_t block_size, uint64_t total_size) { + assert(allocator); + allocator->init(0, total_size, block_size); + } + void close() { + assert(allocator); + allocator->close(); + } + auto allocate(size_t size) { + return allocator->alloc_extent(size); + } + void free(uint64_t start, uint64_t length) { + allocator->free_extent(start, length); + } + rbm_abs_addr get_random_addr(size_t block_size, size_t capacity) { + return block_size * + std::uniform_int_distribution<>(0, (capacity / block_size) - 1)(gen); + } +}; + +TEST_P(allocator_test_t, test_alloc_init) +{ + init_alloc(4096, 4096 * 64); + ASSERT_EQ((4096 * 64), allocator->get_available_size()); + close(); + init_alloc(8192, 8192 * 32); + allocate(8192); + ASSERT_EQ(8192 * 32 - 8192, allocator->get_available_size()); + close(); + init_alloc(4096, 4096 * 128); + allocate(8192); + ASSERT_EQ(4096 * 128 - 8192, allocator->get_available_size()); +} + +TEST_P(allocator_test_t, test_init_alloc_free) +{ + uint64_t block_size = 4096; + uint64_t capacity = 4 * 1024 * block_size; + + { + init_alloc(block_size, capacity); + + auto free_length = allocator->get_available_size(); + allocate(allocator->get_max_alloc_size()); + ASSERT_EQ(free_length - allocator->get_max_alloc_size(), + allocator->get_available_size()); + + free(0, allocator->get_max_alloc_size()); + ASSERT_EQ(free_length, allocator->get_available_size()); + } +} + +TEST_P(allocator_test_t, test_alloc_failure) +{ + uint64_t block_size = 8192; + uint64_t capacity = 1024 * block_size; + + { + init_alloc(block_size, capacity); + allocator->mark_extent_used(0, block_size * 256); + allocator->mark_extent_used(block_size * 512, block_size * 256); + + auto result = allocate(block_size * 512); + ASSERT_EQ(false, result.has_value()); + + free(0, block_size * 256); + allocator->mark_extent_used(0, block_size * 512); + + result = allocate(block_size * 512); + ASSERT_EQ(false, result.has_value()); + } +} + +TEST_P(allocator_test_t, test_random_alloc_verify) +{ + uint64_t block_size = 4096; + uint64_t capacity = 64 * 1024 * block_size; + uint64_t avail = capacity; + interval_set<rbm_abs_addr> alloc_map; + init_alloc(block_size, capacity); + + { + for (int i = 0; i < 256; i++) { + auto addr = get_random_addr(block_size, capacity); + auto size = get_random_addr(block_size, capacity) % (4 << 20); + if (addr + size > capacity || size == 0 || + alloc_map.intersects(addr, size) ) continue; + allocator->mark_extent_used(addr, size); + alloc_map.insert(addr, size); + avail -= size; + } + ASSERT_EQ(avail, allocator->get_available_size()); + + for (auto p : alloc_map) { + free(p.first, p.second); + avail += p.second; + alloc_map.erase(p.first, p.second); + ASSERT_EQ(avail, allocator->get_available_size()); + } + ASSERT_EQ(capacity, allocator->get_available_size()); + + for (int i = 0; i < 100; i++) { + auto addr = get_random_addr(block_size, capacity); + auto size = get_random_addr(block_size, capacity) % (4 << 20); + if (addr + size > capacity || size == 0 || + alloc_map.intersects(addr, size) ) continue; + allocator->mark_extent_used(addr, size); + alloc_map.insert(addr, size); + avail -= size; + } + + for (int i = 0; i < 50; i++) { + free((*alloc_map.begin()).first, (*alloc_map.begin()).second); + avail += (*alloc_map.begin()).second; + alloc_map.erase((*alloc_map.begin()).first, (*alloc_map.begin()).second); + ASSERT_EQ(avail, allocator->get_available_size()); + + auto addr = get_random_addr(block_size, capacity); + auto size = get_random_addr(block_size, capacity) % (4 << 20); + if (addr + size > capacity || size == 0 || + alloc_map.intersects(addr, size) ) continue; + allocator->mark_extent_used(addr, size); + alloc_map.insert(addr, size); + avail -= size; + } + ASSERT_EQ(avail, allocator->get_available_size()); + } +} + +INSTANTIATE_TEST_SUITE_P( + allocator_test, + allocator_test_t, + ::testing::Values("avl")); diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc new file mode 100644 index 000000000..6510cb5d9 --- /dev/null +++ b/src/test/crimson/seastore/test_object_data_handler.cc @@ -0,0 +1,431 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/onode.h" +#include "crimson/os/seastore/object_data_handler.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +#define MAX_OBJECT_SIZE (16<<20) +#define DEFAULT_OBJECT_DATA_RESERVATION (16<<20) +#define DEFAULT_OBJECT_METADATA_RESERVATION (16<<20) + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +class TestOnode final : public Onode { + onode_layout_t layout; + bool dirty = false; + +public: + TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {} + const onode_layout_t &get_layout() const final { + return layout; + } + onode_layout_t &get_mutable_layout(Transaction &t) final { + dirty = true; + return layout; + } + bool is_alive() const { + return true; + } + bool is_dirty() const { return dirty; } + laddr_t get_hint() const final {return L_ADDR_MIN; } + ~TestOnode() final = default; +}; + +struct object_data_handler_test_t: + public seastar_test_suite_t, + TMTestState { + OnodeRef onode; + + bufferptr known_contents; + extent_len_t size = 0; + + object_data_handler_test_t() {} + + void write(Transaction &t, objaddr_t offset, extent_len_t len, char fill) { + ceph_assert(offset + len <= known_contents.length()); + size = std::max<extent_len_t>(size, offset + len); + memset( + known_contents.c_str() + offset, + fill, + len); + bufferlist bl; + bl.append( + bufferptr( + known_contents, + offset, + len)); + with_trans_intr(t, [&](auto &t) { + return ObjectDataHandler(MAX_OBJECT_SIZE).write( + ObjectDataHandler::context_t{ + *tm, + t, + *onode, + }, + offset, + bl); + }).unsafe_get0(); + } + void write(objaddr_t offset, extent_len_t len, char fill) { + auto t = create_mutate_transaction(); + write(*t, offset, len, fill); + return submit_transaction(std::move(t)); + } + + void truncate(Transaction &t, objaddr_t offset) { + if (size > offset) { + memset( + known_contents.c_str() + offset, + 0, + size - offset); + with_trans_intr(t, [&](auto &t) { + return ObjectDataHandler(MAX_OBJECT_SIZE).truncate( + ObjectDataHandler::context_t{ + *tm, + t, + *onode + }, + offset); + }).unsafe_get0(); + } + size = offset; + } + void truncate(objaddr_t offset) { + auto t = create_mutate_transaction(); + truncate(*t, offset); + return submit_transaction(std::move(t)); + } + + void read(Transaction &t, objaddr_t offset, extent_len_t len) { + bufferlist bl = with_trans_intr(t, [&](auto &t) { + return ObjectDataHandler(MAX_OBJECT_SIZE).read( + ObjectDataHandler::context_t{ + *tm, + t, + *onode + }, + offset, + len); + }).unsafe_get0(); + bufferlist known; + known.append( + bufferptr( + known_contents, + offset, + len)); + EXPECT_EQ(bl.length(), known.length()); + EXPECT_EQ(bl, known); + } + void read(objaddr_t offset, extent_len_t len) { + auto t = create_read_transaction(); + read(*t, offset, len); + } + void read_near(objaddr_t offset, extent_len_t len, extent_len_t fuzz) { + auto fuzzes = std::vector<int32_t>{-1 * (int32_t)fuzz, 0, (int32_t)fuzz}; + for (auto left_fuzz : fuzzes) { + for (auto right_fuzz : fuzzes) { + read(offset + left_fuzz, len - left_fuzz + right_fuzz); + } + } + } + std::list<LBAMappingRef> get_mappings(objaddr_t offset, extent_len_t length) { + auto t = create_mutate_transaction(); + auto ret = with_trans_intr(*t, [&](auto &t) { + return tm->get_pins(t, offset, length); + }).unsafe_get0(); + return ret; + } + + seastar::future<> set_up_fut() final { + onode = new TestOnode( + DEFAULT_OBJECT_DATA_RESERVATION, + DEFAULT_OBJECT_METADATA_RESERVATION); + known_contents = buffer::create(4<<20 /* 4MB */); + memset(known_contents.c_str(), 0, known_contents.length()); + size = 0; + return tm_setup(); + } + + seastar::future<> tear_down_fut() final { + onode.reset(); + size = 0; + return tm_teardown(); + } +}; + +TEST_P(object_data_handler_test_t, single_write) +{ + run_async([this] { + write(1<<20, 8<<10, 'c'); + + read_near(1<<20, 8<<10, 1); + read_near(1<<20, 8<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, multi_write) +{ + run_async([this] { + write((1<<20) - (4<<10), 4<<10, 'a'); + write(1<<20, 4<<10, 'b'); + write((1<<20) + (4<<10), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20)-(4<<10), 12<<10, 1); + read_near((1<<20)-(4<<10), 12<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, write_hole) +{ + run_async([this] { + write((1<<20) - (4<<10), 4<<10, 'a'); + // hole at 1<<20 + write((1<<20) + (4<<10), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20)-(4<<10), 12<<10, 1); + read_near((1<<20)-(4<<10), 12<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, overwrite_single) +{ + run_async([this] { + write((1<<20), 4<<10, 'a'); + write((1<<20), 4<<10, 'c'); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, overwrite_double) +{ + run_async([this] { + write((1<<20), 4<<10, 'a'); + write((1<<20)+(4<<10), 4<<10, 'c'); + write((1<<20), 8<<10, 'b'); + + read_near(1<<20, 8<<10, 1); + read_near(1<<20, 8<<10, 512); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20) + (4<<10), 4<<10, 1); + read_near((1<<20) + (4<<10), 4<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, overwrite_partial) +{ + run_async([this] { + write((1<<20), 12<<10, 'a'); + read_near(1<<20, 12<<10, 1); + + write((1<<20)+(8<<10), 4<<10, 'b'); + read_near(1<<20, 12<<10, 1); + + write((1<<20)+(4<<10), 4<<10, 'c'); + read_near(1<<20, 12<<10, 1); + + write((1<<20), 4<<10, 'd'); + + read_near(1<<20, 12<<10, 1); + read_near(1<<20, 12<<10, 512); + + read_near(1<<20, 4<<10, 1); + read_near(1<<20, 4<<10, 512); + + read_near((1<<20) + (4<<10), 4<<10, 1); + read_near((1<<20) + (4<<10), 4<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, unaligned_write) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, (4<<10)+(1<<10), 'a'); + read_near(base-(4<<10), 12<<10, 512); + + base = (1<<20) + (64<<10); + write(base+(1<<10), (4<<10)+(1<<10), 'b'); + read_near(base-(4<<10), 12<<10, 512); + + base = (1<<20) + (128<<10); + write(base-(1<<10), (4<<10)+(2<<20), 'c'); + read_near(base-(4<<10), 12<<10, 512); + }); +} + +TEST_P(object_data_handler_test_t, unaligned_overwrite) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, (128<<10) + (16<<10), 'x'); + + write(base, (4<<10)+(1<<10), 'a'); + read_near(base-(4<<10), 12<<10, 2<<10); + + base = (1<<20) + (64<<10); + write(base+(1<<10), (4<<10)+(1<<10), 'b'); + read_near(base-(4<<10), 12<<10, 2<<10); + + base = (1<<20) + (128<<10); + write(base-(1<<10), (4<<10)+(2<<20), 'c'); + read_near(base-(4<<10), 12<<10, 2<<10); + + read(base, (128<<10) + (16<<10)); + }); +} + +TEST_P(object_data_handler_test_t, truncate) +{ + run_async([this] { + objaddr_t base = 1<<20; + write(base, 8<<10, 'a'); + write(base+(8<<10), 8<<10, 'b'); + write(base+(16<<10), 8<<10, 'c'); + + truncate(base + (32<<10)); + read(base, 64<<10); + + truncate(base + (24<<10)); + read(base, 64<<10); + + truncate(base + (12<<10)); + read(base, 64<<10); + + truncate(base - (12<<10)); + read(base, 64<<10); + }); +} + +TEST_P(object_data_handler_test_t, no_split) { + run_async([this] { + write(0, 8<<10, 'x'); + write(0, 8<<10, 'a'); + + auto pins = get_mappings(0, 8<<10); + EXPECT_EQ(pins.size(), 1); + + read(0, 8<<10); + }); +} + +TEST_P(object_data_handler_test_t, split_left) { + run_async([this] { + write(0, 128<<10, 'x'); + + write(64<<10, 60<<10, 'a'); + + auto pins = get_mappings(0, 128<<10); + EXPECT_EQ(pins.size(), 2); + + size_t res[2] = {0, 64<<10}; + auto base = pins.front()->get_key(); + int i = 0; + for (auto &pin : pins) { + EXPECT_EQ(pin->get_key() - base, res[i]); + i++; + } + read(0, 128<<10); + }); +} + +TEST_P(object_data_handler_test_t, split_right) { + run_async([this] { + write(0, 128<<10, 'x'); + write(4<<10, 60<<10, 'a'); + + auto pins = get_mappings(0, 128<<10); + EXPECT_EQ(pins.size(), 2); + + size_t res[2] = {0, 64<<10}; + auto base = pins.front()->get_key(); + int i = 0; + for (auto &pin : pins) { + EXPECT_EQ(pin->get_key() - base, res[i]); + i++; + } + read(0, 128<<10); + }); +} +TEST_P(object_data_handler_test_t, split_left_right) { + run_async([this] { + write(0, 128<<10, 'x'); + write(48<<10, 32<<10, 'a'); + + auto pins = get_mappings(0, 128<<10); + EXPECT_EQ(pins.size(), 3); + + size_t res[3] = {0, 48<<10, 80<<10}; + auto base = pins.front()->get_key(); + int i = 0; + for (auto &pin : pins) { + EXPECT_EQ(pin->get_key() - base, res[i]); + i++; + } + }); +} +TEST_P(object_data_handler_test_t, multiple_split) { + run_async([this] { + write(0, 128<<10, 'x'); + + auto t = create_mutate_transaction(); + // normal split + write(*t, 120<<10, 4<<10, 'a'); + // not aligned right + write(*t, 4<<10, 5<<10, 'b'); + // split right extent of last split result + write(*t, 32<<10, 4<<10, 'c'); + // non aligned overwrite + write(*t, 13<<10, 4<<10, 'd'); + + write(*t, 64<<10, 32<<10, 'e'); + // not split right + write(*t, 60<<10, 8<<10, 'f'); + + submit_transaction(std::move(t)); + + auto pins = get_mappings(0, 128<<10); + EXPECT_EQ(pins.size(), 10); + + size_t res[10] = {0, 4<<10, 12<<10, 20<<10, 32<<10, + 36<<10, 60<<10, 96<<10, 120<<10, 124<<10}; + auto base = pins.front()->get_key(); + int i = 0; + for (auto &pin : pins) { + EXPECT_EQ(pin->get_key() - base, res[i]); + i++; + } + read(0, 128<<10); + }); +} + +INSTANTIATE_TEST_SUITE_P( + object_data_handler_test, + object_data_handler_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); + + diff --git a/src/test/crimson/seastore/test_omap_manager.cc b/src/test/crimson/seastore/test_omap_manager.cc new file mode 100644 index 000000000..ab2218565 --- /dev/null +++ b/src/test/crimson/seastore/test_omap_manager.cc @@ -0,0 +1,730 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/omap_manager.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace std; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +const int STR_LEN = 50; + +std::string rand_name(const int len) +{ + std::string ret; + ret.reserve(len); + for (int i = 0; i < len; ++i) { + ret.append(1, (char)(rand() % ('z' - '0')) + '0'); + } + return ret; +} + +bufferlist rand_buffer(const int len) { + bufferptr ptr(len); + for (auto i = ptr.c_str(); i < ptr.c_str() + len; ++i) { + *i = (char)rand(); + } + bufferlist bl; + bl.append(ptr); + return bl; +} + +struct omap_manager_test_t : + public seastar_test_suite_t, + TMTestState { + + OMapManagerRef omap_manager; + + omap_manager_test_t() {} + + seastar::future<> set_up_fut() final { + return tm_setup().then([this] { + omap_manager = omap_manager::create_omap_manager(*tm); + return seastar::now(); + }); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown().then([this] { + omap_manager.reset(); + return seastar::now(); + }); + } + + using test_omap_t = std::map<std::string, ceph::bufferlist>; + test_omap_t test_omap_mappings; + + void set_key( + omap_root_t &omap_root, + Transaction &t, + const string &key, + const bufferlist &val) { + with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_set_key(omap_root, t, key, val); + }).unsafe_get0(); + test_omap_mappings[key] = val; + } + + void set_key( + omap_root_t &omap_root, + Transaction &t, + const string &key, + const string &val) { + bufferlist bl; + bl.append(val); + set_key(omap_root, t, key, bl); + } + + std::string set_random_key( + omap_root_t &omap_root, + Transaction &t) { + auto key = rand_name(STR_LEN); + set_key( + omap_root, + t, + key, + rand_buffer(STR_LEN)); + return key; + } + + void get_value( + omap_root_t &omap_root, + Transaction &t, + const string &key) { + auto ret = with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_get_value(omap_root, t, key); + }).unsafe_get0(); + auto iter = test_omap_mappings.find(key); + if (iter == test_omap_mappings.end()) { + EXPECT_FALSE(ret); + } else { + EXPECT_TRUE(ret); + if (ret) { + EXPECT_TRUE(*ret == iter->second); + } + } + } + + void rm_key( + omap_root_t &omap_root, + Transaction &t, + const string &key) { + with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_rm_key(omap_root, t, key); + }).unsafe_get0(); + test_omap_mappings.erase(test_omap_mappings.find(key)); + } + + std::vector<std::string> rm_key_range( + omap_root_t &omap_root, + Transaction &t, + const std::string &first, + const std::string &last) { + logger().debug("rm keys in range {} ~ {}", first, last); + auto config = OMapManager::omap_list_config_t() + .with_max(3000) + .with_inclusive(true, false); + + with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_rm_key_range( + omap_root, t, first, last, config); + }).unsafe_get0(); + + std::vector<std::string> keys; + size_t count = 0; + for (auto iter = test_omap_mappings.begin(); + iter != test_omap_mappings.end(); ) { + if (iter->first >= first && iter->first < last) { + keys.push_back(iter->first); + iter = test_omap_mappings.erase(iter); + count++; + } else { + iter++; + } + if (count == config.max_result_size) { + break; + } + } + return keys; + } + + void list( + const omap_root_t &omap_root, + Transaction &t, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + size_t max = 128, + bool inclusive = false) { + + if (first && last) { + logger().debug("list on {} ~ {}", *first, *last); + } else if (first) { + logger().debug("list on {} ~ end", *first); + } else if (last) { + logger().debug("list on start ~ {}", *last); + } else { + logger().debug("list on start ~ end"); + } + + auto config = OMapManager::omap_list_config_t() + .with_max(max) + .with_inclusive(inclusive, false); + + auto [complete, results] = with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_list(omap_root, t, first, last, config); + }).unsafe_get0(); + + test_omap_t::iterator it, lit; + if (first) { + it = config.first_inclusive ? + test_omap_mappings.lower_bound(*first) : + test_omap_mappings.upper_bound(*first); + } else { + it = test_omap_mappings.begin(); + } + if (last) { + lit = config.last_inclusive ? + test_omap_mappings.upper_bound(*last) : + test_omap_mappings.lower_bound(*last); + } else { + lit = test_omap_mappings.end(); + } + + for (auto &&[k, v]: results) { + EXPECT_NE(it, test_omap_mappings.end()); + if (it == test_omap_mappings.end()) { + return; + } + EXPECT_EQ(k, it->first); + EXPECT_EQ(v, it->second); + it++; + } + if (it == lit) { + EXPECT_TRUE(complete); + } else { + EXPECT_EQ(results.size(), max); + } + } + + void clear( + omap_root_t &omap_root, + Transaction &t) { + with_trans_intr( + t, + [&, this](auto &t) { + return omap_manager->omap_clear(omap_root, t); + }).unsafe_get0(); + EXPECT_EQ(omap_root.get_location(), L_ADDR_NULL); + } + + void check_mappings(omap_root_t &omap_root, Transaction &t) { + for (const auto &i: test_omap_mappings){ + get_value(omap_root, t, i.first); + } + } + + void check_mappings(omap_root_t &omap_root) { + auto t = create_read_transaction(); + check_mappings(omap_root, *t); + } + + std::vector<std::string> get_mapped_keys() { + std::vector<std::string> mkeys; + mkeys.reserve(test_omap_mappings.size()); + for (auto &k: test_omap_mappings) { + mkeys.push_back(k.first); + } + return mkeys; + } + + void replay() { + restart(); + omap_manager = omap_manager::create_omap_manager(*tm); + } + + auto initialize() { + auto t = create_mutate_transaction(); + omap_root_t omap_root = with_trans_intr( + *t, + [this](auto &t) { + return omap_manager->initialize_omap(t, L_ADDR_MIN); + }).unsafe_get0(); + submit_transaction(std::move(t)); + return omap_root; + } +}; + +TEST_P(omap_manager_test_t, basic) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + string key = "owner"; + string val = "test"; + + { + auto t = create_mutate_transaction(); + logger().debug("first transaction"); + set_key(omap_root, *t, key, val); + get_value(omap_root, *t, key); + submit_transaction(std::move(t)); + } + { + auto t = create_mutate_transaction(); + logger().debug("second transaction"); + get_value(omap_root, *t, key); + rm_key(omap_root, *t, key); + get_value(omap_root, *t, key); + submit_transaction(std::move(t)); + } + { + auto t = create_mutate_transaction(); + logger().debug("third transaction"); + get_value(omap_root, *t, key); + submit_transaction(std::move(t)); + } + }); +} + +TEST_P(omap_manager_test_t, force_leafnode_split) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 40; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 10; ++j) { + set_random_key(omap_root, *t); + if ((i % 20 == 0) && (j == 5)) { + check_mappings(omap_root, *t); + } + } + logger().debug("force split submit transaction i = {}", i); + submit_transaction(std::move(t)); + check_mappings(omap_root); + } + }); +} + +TEST_P(omap_manager_test_t, force_leafnode_split_merge) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 80; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened split_merge transaction"); + for (unsigned j = 0; j < 5; ++j) { + set_random_key(omap_root, *t); + if ((i % 10 == 0) && (j == 3)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + submit_transaction(std::move(t)); + if (i % 50 == 0) { + check_mappings(omap_root); + } + } + auto mkeys = get_mapped_keys(); + auto t = create_mutate_transaction(); + for (unsigned i = 0; i < mkeys.size(); i++) { + if (i % 3 != 0) { + rm_key(omap_root, *t, mkeys[i]); + } + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + submit_transaction(std::move(t)); + t = create_mutate_transaction(); + } + if (i % 100 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + } + logger().debug("finally submitting transaction "); + submit_transaction(std::move(t)); + }); +} + +TEST_P(omap_manager_test_t, force_leafnode_split_merge_fullandbalanced) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 50; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened split_merge transaction"); + for (unsigned j = 0; j < 5; ++j) { + set_random_key(omap_root, *t); + if ((i % 10 == 0) && (j == 3)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + submit_transaction(std::move(t)); + if (i % 50 == 0) { + check_mappings(omap_root); + } + } + auto mkeys = get_mapped_keys(); + auto t = create_mutate_transaction(); + for (unsigned i = 0; i < mkeys.size(); i++) { + if (30 < i && i < 100) { + rm_key(omap_root, *t, mkeys[i]); + } + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + submit_transaction(std::move(t)); + t = create_mutate_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + if (i == 100) { + break; + } + } + logger().debug("finally submitting transaction "); + submit_transaction(std::move(t)); + check_mappings(omap_root); + }); +} + +TEST_P(omap_manager_test_t, force_split_listkeys_list_rmkey_range_clear) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + string first, last; + for (unsigned i = 0; i < 40; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 10; ++j) { + auto key = set_random_key(omap_root, *t); + if (i == 10) { + first = key; + } + if (i == 30) { + last = key; + if (first > last) { + std::swap(first, last); + } + } + if ((i % 20 == 0) && (j == 5)) { + check_mappings(omap_root, *t); + } + } + logger().debug("force split submit transaction i = {}", i); + submit_transaction(std::move(t)); + check_mappings(omap_root); + } + + std::optional<std::string> first_temp; + std::optional<std::string> last_temp; + { + auto t = create_read_transaction(); + first_temp = std::nullopt; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp, 100); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp, 100, true); + } + + { + auto t = create_read_transaction(); + first_temp = std::nullopt; + last_temp = last; + list(omap_root, *t, first_temp, last_temp, 10240); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = last; + list(omap_root, *t, first_temp, last_temp, 10240, true); + } + + { + auto t = create_read_transaction(); + list(omap_root, *t, first, last, 10240, true); + } + + { + auto t = create_mutate_transaction(); + auto keys = rm_key_range(omap_root, *t, first, last); + for (const auto& key : keys) { + get_value(omap_root, *t, key); + } + submit_transaction(std::move(t)); + } + + { + auto t = create_mutate_transaction(); + clear(omap_root, *t); + submit_transaction(std::move(t)); + } + }); +} + +TEST_P(omap_manager_test_t, force_inner_node_split_list_rmkey_range) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + string first = ""; + string last; + while (cache->get_omap_tree_depth() < 3) { + for (unsigned i = 0; i < 40; i++) { + auto t = create_mutate_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 10; ++j) { + auto key = set_random_key(omap_root, *t); + if (key.compare(first) < 0 || !first.length()) { + first = key; + } + if (i == 10) { + last = key; + } + } + logger().debug("force split submit transaction i = {}", i); + submit_transaction(std::move(t)); + } + } + + std::optional<std::string> first_temp; + std::optional<std::string> last_temp; + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp, 10240); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = std::nullopt; + list(omap_root, *t, first_temp, last_temp, 10240, true); + } + + { + auto t = create_read_transaction(); + first_temp = std::nullopt; + last_temp = last; + list(omap_root, *t, first_temp, last_temp, 10240); + } + + { + auto t = create_read_transaction(); + first_temp = first; + last_temp = last; + list(omap_root, *t, first_temp, last_temp, 10240, true); + } + + { + auto t = create_mutate_transaction(); + auto keys = rm_key_range(omap_root, *t, first, last); + for (const auto& key : keys) { + get_value(omap_root, *t, key); + } + submit_transaction(std::move(t)); + } + + { + auto t = create_mutate_transaction(); + clear(omap_root, *t); + submit_transaction(std::move(t)); + } + }); +} + + +TEST_P(omap_manager_test_t, internal_force_split) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 10; i++) { + logger().debug("opened split transaction"); + auto t = create_mutate_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + set_random_key(omap_root, *t); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction i = {}", i); + submit_transaction(std::move(t)); + } + check_mappings(omap_root); + }); +} + +TEST_P(omap_manager_test_t, internal_force_merge_fullandbalanced) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 8; i++) { + logger().debug("opened split transaction"); + auto t = create_mutate_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + set_random_key(omap_root, *t); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + submit_transaction(std::move(t)); + } + auto mkeys = get_mapped_keys(); + auto t = create_mutate_transaction(); + for (unsigned i = 0; i < mkeys.size(); i++) { + rm_key(omap_root, *t, mkeys[i]); + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + submit_transaction(std::move(t)); + t = create_mutate_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + } + logger().debug("finally submitting transaction "); + submit_transaction(std::move(t)); + check_mappings(omap_root); + }); +} + +TEST_P(omap_manager_test_t, replay) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + for (unsigned i = 0; i < 8; i++) { + logger().debug("opened split transaction"); + auto t = create_mutate_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + set_random_key(omap_root, *t); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction i = {}", i); + submit_transaction(std::move(t)); + } + replay(); + check_mappings(omap_root); + + auto mkeys = get_mapped_keys(); + auto t = create_mutate_transaction(); + for (unsigned i = 0; i < mkeys.size(); i++) { + rm_key(omap_root, *t, mkeys[i]); + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + submit_transaction(std::move(t)); + replay(); + t = create_mutate_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + } + logger().debug("finally submitting transaction "); + submit_transaction(std::move(t)); + replay(); + check_mappings(omap_root); + }); +} + + +TEST_P(omap_manager_test_t, internal_force_split_to_root) +{ + run_async([this] { + omap_root_t omap_root = initialize(); + + logger().debug("set big keys"); + for (unsigned i = 0; i < 53; i++) { + auto t = create_mutate_transaction(); + + for (unsigned j = 0; j < 8; ++j) { + set_random_key(omap_root, *t); + } + logger().debug("submitting transaction i = {}", i); + submit_transaction(std::move(t)); + } + logger().debug("set small keys"); + for (unsigned i = 0; i < 100; i++) { + auto t = create_mutate_transaction(); + for (unsigned j = 0; j < 8; ++j) { + set_random_key(omap_root, *t); + } + logger().debug("submitting transaction last"); + submit_transaction(std::move(t)); + } + check_mappings(omap_root); + }); +} + +INSTANTIATE_TEST_SUITE_P( + omap_manager_test, + omap_manager_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/test_randomblock_manager.cc b/src/test/crimson/seastore/test_randomblock_manager.cc new file mode 100644 index 000000000..9ddb7f9ad --- /dev/null +++ b/src/test/crimson/seastore/test_randomblock_manager.cc @@ -0,0 +1,178 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include <random> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/random_block_manager/block_rb_manager.h" +#include "crimson/os/seastore/random_block_manager/rbm_device.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +constexpr uint64_t DEFAULT_TEST_SIZE = 1 << 20; + +struct rbm_test_t : + public seastar_test_suite_t { + std::unique_ptr<BlockRBManager> rbm_manager; + std::unique_ptr<random_block_device::RBMDevice> device; + + struct rbm_transaction { + void add_rbm_allocated_blocks(alloc_delta_t &d) { + allocated_blocks.push_back(d); + } + void clear_rbm_allocated_blocks() { + if (!allocated_blocks.empty()) { + allocated_blocks.clear(); + } + } + const auto &get_rbm_allocated_blocks() { + return allocated_blocks; + } + std::vector<alloc_delta_t> allocated_blocks; + }; + + std::default_random_engine generator; + + uint64_t block_size = 0; + uint64_t size = 0; + + device_config_t config; + + rbm_test_t() = default; + + seastar::future<> set_up_fut() final { + device = random_block_device::create_test_ephemeral( + random_block_device::DEFAULT_TEST_CBJOURNAL_SIZE, DEFAULT_TEST_SIZE); + block_size = device->get_block_size(); + size = device->get_available_size(); + rbm_manager.reset(new BlockRBManager(device.get(), std::string(), false)); + config = get_rbm_ephemeral_device_config(0, 1); + return device->mkfs(config).handle_error(crimson::ct_error::assert_all{} + ).then([this] { + return device->mount().handle_error(crimson::ct_error::assert_all{} + ).then([this] { + return rbm_manager->open().handle_error(crimson::ct_error::assert_all{}); + }); + }); + } + + seastar::future<> tear_down_fut() final { + rbm_manager->close().unsafe_get0(); + device->close().unsafe_get0(); + rbm_manager.reset(); + device.reset(); + return seastar::now(); + } + + auto mkfs() { + return device->mkfs(config).unsafe_get0(); + } + + auto read_rbm_header() { + return device->read_rbm_header(RBM_START_ADDRESS).unsafe_get0(); + } + + auto open() { + device->mount().unsafe_get0(); + return rbm_manager->open().unsafe_get0(); + } + + auto write(uint64_t addr, bufferptr &ptr) { + paddr_t paddr = convert_abs_addr_to_paddr( + addr, + rbm_manager->get_device_id()); + return rbm_manager->write(paddr, ptr).unsafe_get0(); + } + + auto read(uint64_t addr, bufferptr &ptr) { + paddr_t paddr = convert_abs_addr_to_paddr( + addr, + rbm_manager->get_device_id()); + return rbm_manager->read(paddr, ptr).unsafe_get0(); + } + + bufferptr generate_extent(size_t blocks) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + return buffer::ptr(buffer::create(blocks * block_size, contents)); + } + + void close() { + rbm_manager->close().unsafe_get0(); + return; + } + +}; + +TEST_F(rbm_test_t, mkfs_test) +{ + run_async([this] { + auto super = read_rbm_header(); + ASSERT_TRUE( + super.block_size == block_size && + super.size == size + ); + config.spec.id = DEVICE_ID_NULL; + mkfs(); + super = read_rbm_header(); + ASSERT_TRUE( + super.config.spec.id == DEVICE_ID_NULL && + super.size == size + ); + }); +} + +TEST_F(rbm_test_t, open_read_write_test) +{ + run_async([this] { + auto content = generate_extent(1); + { + write( + block_size, + content + ); + auto bp = bufferptr(ceph::buffer::create_page_aligned(block_size)); + read( + block_size, + bp + ); + bufferlist bl; + bufferlist block; + bl.append(bp); + block.append(content); + ASSERT_EQ( + bl.begin().crc32c(bl.length(), 1), + block.begin().crc32c(block.length(), 1)); + } + close(); + open(); + { + auto bp = bufferptr(ceph::buffer::create_page_aligned(block_size)); + read( + block_size, + bp + ); + bufferlist bl; + bufferlist block; + bl.append(bp); + block.append(content); + ASSERT_EQ( + bl.begin().crc32c(bl.length(), 1), + block.begin().crc32c(block.length(), 1)); + } + }); +} + diff --git a/src/test/crimson/seastore/test_seastore.cc b/src/test/crimson/seastore/test_seastore.cc new file mode 100644 index 000000000..63bf4c51f --- /dev/null +++ b/src/test/crimson/seastore/test_seastore.cc @@ -0,0 +1,1268 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <string> +#include <iostream> +#include <sstream> + +#include "test/crimson/gtest_seastar.h" + +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/futurized_collection.h" +#include "crimson/os/seastore/seastore.h" +#include "crimson/os/seastore/onode.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using SeaStoreShard = FuturizedStore::Shard; +using CTransaction = ceph::os::Transaction; +using namespace std; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +ghobject_t make_oid(int i) { + stringstream ss; + ss << "object_" << i; + auto ret = ghobject_t( + hobject_t( + sobject_t(ss.str(), CEPH_NOSNAP))); + ret.set_shard(shard_id_t(shard_id_t::NO_SHARD)); + ret.hobj.nspace = "asdf"; + ret.hobj.pool = 0; + uint32_t reverse_hash = hobject_t::_reverse_bits(0); + ret.hobj.set_bitwise_key_u32(reverse_hash + i * 100); + return ret; +} + +ghobject_t make_temp_oid(int i) { + stringstream ss; + ss << "temp_object_" << i; + auto ret = ghobject_t( + hobject_t( + sobject_t(ss.str(), CEPH_NOSNAP))); + ret.set_shard(shard_id_t(shard_id_t::NO_SHARD)); + ret.hobj.nspace = "hjkl"; + ret.hobj.pool = -2ll; + uint32_t reverse_hash = hobject_t::_reverse_bits(0); + ret.hobj.set_bitwise_key_u32(reverse_hash + i * 100); + return ret; +} + +struct seastore_test_t : + public seastar_test_suite_t, + SeaStoreTestState { + + coll_t coll_name{spg_t{pg_t{0, 0}}}; + CollectionRef coll; + + seastore_test_t() {} + + seastar::future<> set_up_fut() final { + return tm_setup( + ).then([this] { + return sharded_seastore->create_new_collection(coll_name); + }).then([this](auto coll_ref) { + coll = coll_ref; + CTransaction t; + t.create_collection(coll_name, 0); + return sharded_seastore->do_transaction( + coll, + std::move(t)); + }); + } + + seastar::future<> tear_down_fut() final { + coll.reset(); + return tm_teardown(); + } + + void do_transaction(CTransaction &&t) { + return sharded_seastore->do_transaction( + coll, + std::move(t)).get0(); + } + + void set_meta( + const std::string& key, + const std::string& value) { + return seastore->write_meta(key, value).get0(); + } + + std::tuple<int, std::string> get_meta( + const std::string& key) { + return seastore->read_meta(key).get(); + } + + struct object_state_t { + const coll_t cid; + const CollectionRef coll; + const ghobject_t oid; + + std::map<string, bufferlist> omap; + bufferlist contents; + + std::map<snapid_t, bufferlist> clone_contents; + + void touch( + CTransaction &t) { + t.touch(cid, oid); + } + + void touch( + SeaStoreShard &sharded_seastore) { + CTransaction t; + touch(t); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void truncate( + CTransaction &t, + uint64_t off) { + t.truncate(cid, oid, off); + } + + void truncate( + SeaStoreShard &sharded_seastore, + uint64_t off) { + CTransaction t; + truncate(t, off); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + std::map<uint64_t, uint64_t> fiemap( + SeaStoreShard &sharded_seastore, + uint64_t off, + uint64_t len) { + return sharded_seastore.fiemap(coll, oid, off, len).unsafe_get0(); + } + + bufferlist readv( + SeaStoreShard &sharded_seastore, + interval_set<uint64_t>&m) { + return sharded_seastore.readv(coll, oid, m).unsafe_get0(); + } + + void remove( + CTransaction &t) { + t.remove(cid, oid); + t.remove_collection(cid); + } + + void remove( + SeaStoreShard &sharded_seastore) { + CTransaction t; + remove(t); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void set_omap( + CTransaction &t, + const string &key, + const bufferlist &val) { + omap[key] = val; + std::map<string, bufferlist> arg; + arg[key] = val; + t.omap_setkeys( + cid, + oid, + arg); + } + + void set_omap( + SeaStoreShard &sharded_seastore, + const string &key, + const bufferlist &val) { + CTransaction t; + set_omap(t, key, val); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void write( + SeaStoreShard &sharded_seastore, + CTransaction &t, + uint64_t offset, + bufferlist bl) { + bufferlist new_contents; + if (offset > 0 && contents.length()) { + new_contents.substr_of( + contents, + 0, + std::min<size_t>(offset, contents.length()) + ); + } + new_contents.append_zero(offset - new_contents.length()); + new_contents.append(bl); + + auto tail_offset = offset + bl.length(); + if (contents.length() > tail_offset) { + bufferlist tail; + tail.substr_of( + contents, + tail_offset, + contents.length() - tail_offset); + new_contents.append(tail); + } + contents.swap(new_contents); + + t.write( + cid, + oid, + offset, + bl.length(), + bl); + } + + void write( + SeaStoreShard &sharded_seastore, + uint64_t offset, + bufferlist bl) { + CTransaction t; + write(sharded_seastore, t, offset, bl); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void clone( + SeaStoreShard &sharded_seastore, + snapid_t snap) { + ghobject_t coid = oid; + coid.hobj.snap = snap; + CTransaction t; + t.clone(cid, oid, coid); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + clone_contents[snap].reserve(contents.length()); + auto it = contents.begin(); + it.copy_all(clone_contents[snap]); + } + + object_state_t get_clone(snapid_t snap) { + auto coid = oid; + coid.hobj.snap = snap; + auto clone_obj = object_state_t{cid, coll, coid}; + clone_obj.contents.reserve(clone_contents[snap].length()); + auto it = clone_contents[snap].begin(); + it.copy_all(clone_obj.contents); + return clone_obj; + } + + void write( + SeaStoreShard &sharded_seastore, + uint64_t offset, + size_t len, + char fill) { + auto buffer = bufferptr(buffer::create(len)); + ::memset(buffer.c_str(), fill, len); + bufferlist bl; + bl.append(buffer); + write(sharded_seastore, offset, bl); + } + + void zero( + SeaStoreShard &sharded_seastore, + CTransaction &t, + uint64_t offset, + size_t len) { + ceph::buffer::list bl; + bl.append_zero(len); + bufferlist new_contents; + if (offset > 0 && contents.length()) { + new_contents.substr_of( + contents, + 0, + std::min<size_t>(offset, contents.length()) + ); + } + new_contents.append_zero(offset - new_contents.length()); + new_contents.append(bl); + + auto tail_offset = offset + bl.length(); + if (contents.length() > tail_offset) { + bufferlist tail; + tail.substr_of( + contents, + tail_offset, + contents.length() - tail_offset); + new_contents.append(tail); + } + contents.swap(new_contents); + + t.zero( + cid, + oid, + offset, + len); + } + + void zero( + SeaStoreShard &sharded_seastore, + uint64_t offset, + size_t len) { + CTransaction t; + zero(sharded_seastore, t, offset, len); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void read( + SeaStoreShard &sharded_seastore, + uint64_t offset, + uint64_t len) { + bufferlist to_check; + if (contents.length() >= offset) { + to_check.substr_of( + contents, + offset, + std::min(len, (uint64_t)contents.length())); + } + auto ret = sharded_seastore.read( + coll, + oid, + offset, + len).unsafe_get0(); + EXPECT_EQ(ret.length(), to_check.length()); + EXPECT_EQ(ret, to_check); + } + + void check_size(SeaStoreShard &sharded_seastore) { + auto st = sharded_seastore.stat( + coll, + oid).get0(); + EXPECT_EQ(contents.length(), st.st_size); + } + + void set_attr( + SeaStoreShard &sharded_seastore, + std::string key, + bufferlist& val) { + CTransaction t; + t.setattr(cid, oid, key, val); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void rm_attr( + SeaStoreShard &sharded_seastore, + std::string key) { + CTransaction t; + t.rmattr(cid, oid, key); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + void rm_attrs( + SeaStoreShard &sharded_seastore) { + CTransaction t; + t.rmattrs(cid, oid); + sharded_seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + SeaStoreShard::attrs_t get_attrs( + SeaStoreShard &sharded_seastore) { + return sharded_seastore.get_attrs(coll, oid) + .handle_error(SeaStoreShard::get_attrs_ertr::discard_all{}) + .get(); + } + + ceph::bufferlist get_attr( + SeaStoreShard& sharded_seastore, + std::string_view name) { + return sharded_seastore.get_attr(coll, oid, name) + .handle_error( + SeaStoreShard::get_attr_errorator::discard_all{}) + .get(); + } + + void check_omap_key( + SeaStoreShard &sharded_seastore, + const string &key) { + std::set<string> to_check; + to_check.insert(key); + auto result = sharded_seastore.omap_get_values( + coll, + oid, + to_check).unsafe_get0(); + if (result.empty()) { + EXPECT_EQ(omap.find(key), omap.end()); + } else { + auto iter = omap.find(key); + EXPECT_NE(iter, omap.end()); + if (iter != omap.end()) { + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(iter->second, result.begin()->second); + } + } + } + + void check_omap(SeaStoreShard &sharded_seastore) { + auto refiter = omap.begin(); + std::optional<std::string> start; + while(true) { + auto [done, kvs] = sharded_seastore.omap_get_values( + coll, + oid, + start).unsafe_get0(); + auto iter = kvs.begin(); + while (true) { + if ((done && iter == kvs.end()) && refiter == omap.end()) { + return; // finished + } else if (!done && iter == kvs.end()) { + break; // reload kvs + } + if (iter == kvs.end() || refiter->first < iter->first) { + logger().debug( + "check_omap: missing omap key {}", + refiter->first); + GTEST_FAIL() << "missing omap key " << refiter->first; + ++refiter; + } else if (refiter == omap.end() || refiter->first > iter->first) { + logger().debug( + "check_omap: extra omap key {}", + iter->first); + GTEST_FAIL() << "extra omap key " << iter->first; + ++iter; + } else { + EXPECT_EQ(iter->second, refiter->second); + ++iter; + ++refiter; + } + } + if (!done) { + start = kvs.rbegin()->first; + } + } + } + }; + + map<ghobject_t, object_state_t> test_objects; + object_state_t &get_object( + const ghobject_t &oid) { + return test_objects.emplace( + std::make_pair( + oid, + object_state_t{coll_name, coll, oid})).first->second; + } + + void remove_object( + object_state_t &sobj) { + + sobj.remove(*sharded_seastore); + auto erased = test_objects.erase(sobj.oid); + ceph_assert(erased == 1); + } + + void validate_objects() const { + std::vector<ghobject_t> oids; + for (auto& [oid, obj] : test_objects) { + oids.emplace_back(oid); + } + auto ret = sharded_seastore->list_objects( + coll, + ghobject_t(), + ghobject_t::get_max(), + std::numeric_limits<uint64_t>::max()).get0(); + EXPECT_EQ(std::get<1>(ret), ghobject_t::get_max()); + EXPECT_EQ(std::get<0>(ret), oids); + } + + // create temp objects + struct bound_t { + enum class type_t { + MIN, + MAX, + TEMP, + TEMP_END, + NORMAL_BEGIN, + NORMAL, + } type = type_t::MIN; + unsigned index = 0; + + static bound_t get_temp(unsigned index) { + return bound_t{type_t::TEMP, index}; + } + static bound_t get_normal(unsigned index) { + return bound_t{type_t::NORMAL, index}; + } + static bound_t get_min() { return bound_t{type_t::MIN}; } + static bound_t get_max() { return bound_t{type_t::MAX}; } + static bound_t get_temp_end() { return bound_t{type_t::TEMP_END}; } + static bound_t get_normal_begin() { + return bound_t{type_t::NORMAL_BEGIN}; + } + + ghobject_t get_oid(SeaStore &seastore, CollectionRef &coll) const { + switch (type) { + case type_t::MIN: + return ghobject_t(); + case type_t::MAX: + return ghobject_t::get_max(); + case type_t::TEMP: + return make_temp_oid(index); + case type_t::TEMP_END: + return seastore.get_objs_range(coll, 0).temp_end; + case type_t::NORMAL_BEGIN: + return seastore.get_objs_range(coll, 0).obj_begin; + case type_t::NORMAL: + return make_oid(index); + default: + assert(0 == "impossible"); + return ghobject_t(); + } + } + }; + struct list_test_case_t { + bound_t left; + bound_t right; + unsigned limit; + }; + // list_test_cases_t :: [<limit, left_bound, right_bound>] + using list_test_cases_t = std::list<std::tuple<unsigned, bound_t, bound_t>>; + + void test_list( + unsigned temp_to_create, /// create temp 0..temp_to_create-1 + unsigned normal_to_create, /// create normal 0..normal_to_create-1 + list_test_cases_t cases /// cases to test + ) { + std::vector<ghobject_t> objs; + + // setup + auto create = [this, &objs](ghobject_t hoid) { + objs.emplace_back(std::move(hoid)); + auto &obj = get_object(objs.back()); + obj.touch(*sharded_seastore); + obj.check_size(*sharded_seastore); + }; + for (unsigned i = 0; i < temp_to_create; ++i) { + create(make_temp_oid(i)); + } + for (unsigned i = 0; i < normal_to_create; ++i) { + create(make_oid(i)); + } + + // list and validate each case + for (auto [limit, in_left_bound, in_right_bound] : cases) { + auto left_bound = in_left_bound.get_oid(*seastore, coll); + auto right_bound = in_right_bound.get_oid(*seastore, coll); + + // get results from seastore + auto [listed, next] = sharded_seastore->list_objects( + coll, left_bound, right_bound, limit).get0(); + + // compute correct answer + auto correct_begin = std::find_if( + objs.begin(), objs.end(), + [&left_bound](const auto &in) { + return in >= left_bound; + }); + unsigned count = 0; + auto correct_end = correct_begin; + for (; count < limit && + correct_end != objs.end() && + *correct_end < right_bound; + ++correct_end, ++count); + + // validate return -- [correct_begin, correct_end) should match listed + decltype(objs) correct_listed(correct_begin, correct_end); + EXPECT_EQ(listed, correct_listed); + + if (count < limit) { + if (correct_end == objs.end()) { + // if listed extends to end of range, next should be >= right_bound + EXPECT_GE(next, right_bound); + } else { + // next <= *correct_end since *correct_end is the next object to list + EXPECT_LE(next, *correct_end); + // next > *(correct_end - 1) since we already listed it + EXPECT_GT(next, *(correct_end - 1)); + } + } else { + // we listed exactly limit objects + EXPECT_EQ(limit, listed.size()); + + EXPECT_GE(next, left_bound); + if (limit == 0) { + if (correct_end != objs.end()) { + // next <= *correct_end since *correct_end is the next object to list + EXPECT_LE(next, *correct_end); + } + } else { + // next > *(correct_end - 1) since we already listed it + EXPECT_GT(next, *(correct_end - 1)); + } + } + } + + // teardown + for (auto &&hoid : objs) { get_object(hoid).remove(*sharded_seastore); } + } +}; + +template <typename T, typename V> +auto contains(const T &t, const V &v) { + return std::find( + t.begin(), + t.end(), + v) != t.end(); +} + +TEST_P(seastore_test_t, collection_create_list_remove) +{ + run_async([this] { + coll_t test_coll{spg_t{pg_t{1, 0}}}; + { + sharded_seastore->create_new_collection(test_coll).get0(); + { + CTransaction t; + t.create_collection(test_coll, 4); + do_transaction(std::move(t)); + } + auto colls_cores = seastore->list_collections().get0(); + std::vector<coll_t> colls; + colls.resize(colls_cores.size()); + std::transform( + colls_cores.begin(), colls_cores.end(), colls.begin(), + [](auto p) { return p.first; }); + EXPECT_EQ(colls.size(), 2); + EXPECT_TRUE(contains(colls, coll_name)); + EXPECT_TRUE(contains(colls, test_coll)); + } + + { + { + CTransaction t; + t.remove_collection(test_coll); + do_transaction(std::move(t)); + } + auto colls_cores = seastore->list_collections().get0(); + std::vector<coll_t> colls; + colls.resize(colls_cores.size()); + std::transform( + colls_cores.begin(), colls_cores.end(), colls.begin(), + [](auto p) { return p.first; }); + EXPECT_EQ(colls.size(), 1); + EXPECT_TRUE(contains(colls, coll_name)); + } + }); +} + +TEST_P(seastore_test_t, meta) { + run_async([this] { + set_meta("key1", "value1"); + set_meta("key2", "value2"); + + const auto [ret1, value1] = get_meta("key1"); + const auto [ret2, value2] = get_meta("key2"); + EXPECT_EQ(ret1, 0); + EXPECT_EQ(ret2, 0); + EXPECT_EQ(value1, "value1"); + EXPECT_EQ(value2, "value2"); + }); +} + +TEST_P(seastore_test_t, touch_stat_list_remove) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + test_obj.check_size(*sharded_seastore); + validate_objects(); + + remove_object(test_obj); + validate_objects(); + }); +} + +using bound_t = seastore_test_t::bound_t; +constexpr unsigned MAX_LIMIT = std::numeric_limits<unsigned>::max(); +static const seastore_test_t::list_test_cases_t temp_list_cases{ + // list all temp, maybe overlap to normal on right + {MAX_LIMIT, bound_t::get_min() , bound_t::get_max() }, + { 5, bound_t::get_min() , bound_t::get_temp_end()}, + { 6, bound_t::get_min() , bound_t::get_temp_end()}, + { 6, bound_t::get_min() , bound_t::get_max() }, + + // list temp starting at min up to but not past boundary + { 3, bound_t::get_min() , bound_t::get_temp(3) }, + { 3, bound_t::get_min() , bound_t::get_temp(4) }, + { 3, bound_t::get_min() , bound_t::get_temp(2) }, + + // list temp starting > min up to or past boundary + { 3, bound_t::get_temp(2) , bound_t::get_temp_end()}, + { 3, bound_t::get_temp(2) , bound_t::get_max() }, + { 3, bound_t::get_temp(3) , bound_t::get_max() }, + { 3, bound_t::get_temp(1) , bound_t::get_max() }, + + // 0 limit + { 0, bound_t::get_min() , bound_t::get_max() }, + { 0, bound_t::get_temp(1) , bound_t::get_max() }, + { 0, bound_t::get_temp_end(), bound_t::get_max() }, +}; + +TEST_P(seastore_test_t, list_objects_temp_only) +{ + run_async([this] { test_list(5, 0, temp_list_cases); }); +} + +TEST_P(seastore_test_t, list_objects_temp_overlap) +{ + run_async([this] { test_list(5, 5, temp_list_cases); }); +} + +static const seastore_test_t::list_test_cases_t normal_list_cases{ + // list all normal, maybe overlap to temp on left + {MAX_LIMIT, bound_t::get_min() , bound_t::get_max() }, + { 5, bound_t::get_normal_begin(), bound_t::get_max() }, + { 6, bound_t::get_normal_begin(), bound_t::get_max() }, + { 6, bound_t::get_temp(4) , bound_t::get_max() }, + + // list normal starting <= normal_begin < end + { 3, bound_t::get_normal_begin(), bound_t::get_normal(3)}, + { 3, bound_t::get_normal_begin(), bound_t::get_normal(4)}, + { 3, bound_t::get_normal_begin(), bound_t::get_normal(2)}, + { 3, bound_t::get_temp(5) , bound_t::get_normal(2)}, + { 3, bound_t::get_temp(4) , bound_t::get_normal(2)}, + + // list normal starting > min up to end + { 3, bound_t::get_normal(2) , bound_t::get_max() }, + { 3, bound_t::get_normal(2) , bound_t::get_max() }, + { 3, bound_t::get_normal(3) , bound_t::get_max() }, + { 3, bound_t::get_normal(1) , bound_t::get_max() }, + + // 0 limit + { 0, bound_t::get_min() , bound_t::get_max() }, + { 0, bound_t::get_normal(1) , bound_t::get_max() }, + { 0, bound_t::get_normal_begin(), bound_t::get_max() }, +}; + +TEST_P(seastore_test_t, list_objects_normal_only) +{ + run_async([this] { test_list(5, 0, normal_list_cases); }); +} + +TEST_P(seastore_test_t, list_objects_normal_overlap) +{ + run_async([this] { test_list(5, 5, normal_list_cases); }); +} + +bufferlist make_bufferlist(size_t len) { + bufferptr ptr(len); + bufferlist bl; + bl.append(ptr); + return bl; +} + +TEST_P(seastore_test_t, omap_test_simple) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + test_obj.set_omap( + *sharded_seastore, + "asdf", + make_bufferlist(128)); + test_obj.check_omap_key( + *sharded_seastore, + "asdf"); + }); +} + +TEST_P(seastore_test_t, clone_aligned_extents) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.write(*sharded_seastore, 0, 4096, 'a'); + + test_obj.clone(*sharded_seastore, 10); + std::cout << "reading origin after clone10" << std::endl; + test_obj.read(*sharded_seastore, 0, 4096); + test_obj.write(*sharded_seastore, 0, 4096, 'b'); + test_obj.write(*sharded_seastore, 4096, 4096, 'c'); + std::cout << "reading origin after clone10 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 8192); + auto clone_obj10 = test_obj.get_clone(10); + std::cout << "reading clone after clone10 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 8192); + + test_obj.clone(*sharded_seastore, 20); + std::cout << "reading origin after clone20" << std::endl; + test_obj.read(*sharded_seastore, 0, 4096); + test_obj.write(*sharded_seastore, 0, 4096, 'd'); + test_obj.write(*sharded_seastore, 4096, 4096, 'e'); + test_obj.write(*sharded_seastore, 8192, 4096, 'f'); + std::cout << "reading origin after clone20 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 12288); + auto clone_obj20 = test_obj.get_clone(20); + std::cout << "reading clone after clone20 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 12288); + clone_obj20.read(*sharded_seastore, 0, 12288); + }); +} + +TEST_P(seastore_test_t, clone_unaligned_extents) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.write(*sharded_seastore, 0, 8192, 'a'); + test_obj.write(*sharded_seastore, 8192, 8192, 'b'); + test_obj.write(*sharded_seastore, 16384, 8192, 'c'); + + test_obj.clone(*sharded_seastore, 10); + test_obj.write(*sharded_seastore, 4096, 12288, 'd'); + std::cout << "reading origin after clone10 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 24576); + + auto clone_obj10 = test_obj.get_clone(10); + std::cout << "reading clone after clone10 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 24576); + + test_obj.clone(*sharded_seastore, 20); + test_obj.write(*sharded_seastore, 8192, 12288, 'e'); + std::cout << "reading origin after clone20 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 24576); + + auto clone_obj20 = test_obj.get_clone(20); + std::cout << "reading clone after clone20 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 24576); + clone_obj20.read(*sharded_seastore, 0, 24576); + + test_obj.write(*sharded_seastore, 0, 24576, 'f'); + test_obj.clone(*sharded_seastore, 30); + test_obj.write(*sharded_seastore, 8192, 4096, 'g'); + std::cout << "reading origin after clone30 and write" << std::endl; + test_obj.read(*sharded_seastore, 0, 24576); + + auto clone_obj30 = test_obj.get_clone(30); + std::cout << "reading clone after clone30 and write" << std::endl; + clone_obj10.read(*sharded_seastore, 0, 24576); + clone_obj20.read(*sharded_seastore, 0, 24576); + clone_obj30.read(*sharded_seastore, 0, 24576); + }); +} + +TEST_P(seastore_test_t, attr) +{ + run_async([this] { + auto& test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + { + std::string oi("asdfasdfasdf"); + bufferlist bl; + encode(oi, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + std::string ss("fdsfdsfs"); + bl.clear(); + encode(ss, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + std::string test_val("ssssssssssss"); + bl.clear(); + encode(test_val, bl); + test_obj.set_attr(*sharded_seastore, "test_key", bl); + + auto attrs = test_obj.get_attrs(*sharded_seastore); + std::string oi2; + bufferlist bl2 = attrs[OI_ATTR]; + decode(oi2, bl2); + bl2.clear(); + bl2 = attrs[SS_ATTR]; + std::string ss2; + decode(ss2, bl2); + std::string test_val2; + bl2.clear(); + bl2 = attrs["test_key"]; + decode(test_val2, bl2); + EXPECT_EQ(ss, ss2); + EXPECT_EQ(oi, oi2); + EXPECT_EQ(test_val, test_val2); + + bl2.clear(); + bl2 = test_obj.get_attr(*sharded_seastore, "test_key"); + test_val2.clear(); + decode(test_val2, bl2); + EXPECT_EQ(test_val, test_val2); + //test rm_attrs + test_obj.rm_attrs(*sharded_seastore); + attrs = test_obj.get_attrs(*sharded_seastore); + EXPECT_EQ(attrs.find(OI_ATTR), attrs.end()); + EXPECT_EQ(attrs.find(SS_ATTR), attrs.end()); + EXPECT_EQ(attrs.find("test_key"), attrs.end()); + + std::cout << "test_key passed" << std::endl; + //create OI_ATTR with len > onode_layout_t::MAX_OI_LENGTH, rm OI_ATTR + //create SS_ATTR with len > onode_layout_t::MAX_SS_LENGTH, rm SS_ATTR + char oi_array[onode_layout_t::MAX_OI_LENGTH + 1] = {'a'}; + std::string oi_str(&oi_array[0], sizeof(oi_array)); + bl.clear(); + encode(oi_str, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + char ss_array[onode_layout_t::MAX_SS_LENGTH + 1] = {'b'}; + std::string ss_str(&ss_array[0], sizeof(ss_array)); + bl.clear(); + encode(ss_str, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + attrs = test_obj.get_attrs(*sharded_seastore); + bl2.clear(); + bl2 = attrs[OI_ATTR]; + std::string oi_str2; + decode(oi_str2, bl2); + EXPECT_EQ(oi_str, oi_str2); + + bl2.clear(); + bl2 = attrs[SS_ATTR]; + std::string ss_str2; + decode(ss_str2, bl2); + EXPECT_EQ(ss_str, ss_str2); + + bl2.clear(); + ss_str2.clear(); + bl2 = test_obj.get_attr(*sharded_seastore, SS_ATTR); + decode(ss_str2, bl2); + EXPECT_EQ(ss_str, ss_str2); + + bl2.clear(); + oi_str2.clear(); + bl2 = test_obj.get_attr(*sharded_seastore, OI_ATTR); + decode(oi_str2, bl2); + EXPECT_EQ(oi_str, oi_str2); + + test_obj.rm_attr(*sharded_seastore, OI_ATTR); + test_obj.rm_attr(*sharded_seastore, SS_ATTR); + + attrs = test_obj.get_attrs(*sharded_seastore); + EXPECT_EQ(attrs.find(OI_ATTR), attrs.end()); + EXPECT_EQ(attrs.find(SS_ATTR), attrs.end()); + } + { + //create OI_ATTR with len <= onode_layout_t::MAX_OI_LENGTH, rm OI_ATTR + //create SS_ATTR with len <= onode_layout_t::MAX_SS_LENGTH, rm SS_ATTR + std::string oi("asdfasdfasdf"); + bufferlist bl; + encode(oi, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + std::string ss("f"); + bl.clear(); + encode(ss, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + std::string test_val("ssssssssssss"); + bl.clear(); + encode(test_val, bl); + test_obj.set_attr(*sharded_seastore, "test_key", bl); + + auto attrs = test_obj.get_attrs(*sharded_seastore); + std::string oi2; + bufferlist bl2 = attrs[OI_ATTR]; + decode(oi2, bl2); + bl2.clear(); + bl2 = attrs[SS_ATTR]; + std::string ss2; + decode(ss2, bl2); + std::string test_val2; + bl2.clear(); + bl2 = attrs["test_key"]; + decode(test_val2, bl2); + EXPECT_EQ(ss, ss2); + EXPECT_EQ(oi, oi2); + EXPECT_EQ(test_val, test_val2); + + test_obj.rm_attr(*sharded_seastore, OI_ATTR); + test_obj.rm_attr(*sharded_seastore, SS_ATTR); + test_obj.rm_attr(*sharded_seastore, "test_key"); + + attrs = test_obj.get_attrs(*sharded_seastore); + EXPECT_EQ(attrs.find(OI_ATTR), attrs.end()); + EXPECT_EQ(attrs.find(SS_ATTR), attrs.end()); + EXPECT_EQ(attrs.find("test_key"), attrs.end()); + } + { + // create OI_ATTR with len > onode_layout_t::MAX_OI_LENGTH, then + // overwrite it with another OI_ATTR len of which < onode_layout_t::MAX_OI_LENGTH + // create SS_ATTR with len > onode_layout_t::MAX_SS_LENGTH, then + // overwrite it with another SS_ATTR len of which < onode_layout_t::MAX_SS_LENGTH + char oi_array[onode_layout_t::MAX_OI_LENGTH + 1] = {'a'}; + std::string oi(&oi_array[0], sizeof(oi_array)); + bufferlist bl; + encode(oi, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + oi = "asdfasdfasdf"; + bl.clear(); + encode(oi, bl); + test_obj.set_attr(*sharded_seastore, OI_ATTR, bl); + + char ss_array[onode_layout_t::MAX_SS_LENGTH + 1] = {'b'}; + std::string ss(&ss_array[0], sizeof(ss_array)); + bl.clear(); + encode(ss, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + ss = "f"; + bl.clear(); + encode(ss, bl); + test_obj.set_attr(*sharded_seastore, SS_ATTR, bl); + + auto attrs = test_obj.get_attrs(*sharded_seastore); + std::string oi2, ss2; + bufferlist bl2 = attrs[OI_ATTR]; + decode(oi2, bl2); + bl2.clear(); + bl2 = attrs[SS_ATTR]; + decode(ss2, bl2); + EXPECT_EQ(oi, oi2); + EXPECT_EQ(ss, ss2); + } + }); +} + +TEST_P(seastore_test_t, omap_test_iterator) +{ + run_async([this] { + auto make_key = [](unsigned i) { + std::stringstream ss; + ss << "key" << i; + return ss.str(); + }; + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + for (unsigned i = 0; i < 20; ++i) { + test_obj.set_omap( + *sharded_seastore, + make_key(i), + make_bufferlist(128)); + } + test_obj.check_omap(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, object_data_omap_remove) +{ + run_async([this] { + auto make_key = [](unsigned i) { + std::stringstream ss; + ss << "key" << i; + return ss.str(); + }; + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + for (unsigned i = 0; i < 1024; ++i) { + test_obj.set_omap( + *sharded_seastore, + make_key(i), + make_bufferlist(128)); + } + test_obj.check_omap(*sharded_seastore); + + for (uint64_t i = 0; i < 16; i++) { + test_obj.write( + *sharded_seastore, + 4096 * i, + 4096, + 'a'); + } + test_obj.remove(*sharded_seastore); + }); +} + + +TEST_P(seastore_test_t, simple_extent_test) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.write( + *sharded_seastore, + 1024, + 1024, + 'a'); + test_obj.read( + *sharded_seastore, + 1024, + 1024); + test_obj.check_size(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, fiemap_empty) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*sharded_seastore); + test_obj.truncate(*sharded_seastore, 100000); + + std::map<uint64_t, uint64_t> m; + m = test_obj.fiemap(*sharded_seastore, 0, 100000); + EXPECT_TRUE(m.empty()); + + test_obj.remove(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, fiemap_holes) +{ + run_async([this] { + const uint64_t MAX_EXTENTS = 100; + + // large enough to ensure that seastore will allocate each write seperately + const uint64_t SKIP_STEP = 16 << 10; + auto &test_obj = get_object(make_oid(0)); + bufferlist bl; + bl.append("foo"); + + test_obj.touch(*sharded_seastore); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + test_obj.write(*sharded_seastore, SKIP_STEP * i, bl); + } + + { // fiemap test from 0 to SKIP_STEP * (MAX_EXTENTS - 1) + 3 + auto m = test_obj.fiemap( + *sharded_seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3); + ASSERT_EQ(m.size(), MAX_EXTENTS); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + ASSERT_TRUE(m.count(SKIP_STEP * i)); + ASSERT_GE(m[SKIP_STEP * i], bl.length()); + } + } + + { // fiemap test from SKIP_STEP to SKIP_STEP * (MAX_EXTENTS - 2) + 3 + auto m = test_obj.fiemap( + *sharded_seastore, SKIP_STEP, SKIP_STEP * (MAX_EXTENTS - 3) + 3); + ASSERT_EQ(m.size(), MAX_EXTENTS - 2); + for (uint64_t i = 1; i < MAX_EXTENTS - 1; i++) { + ASSERT_TRUE(m.count(SKIP_STEP * i)); + ASSERT_GE(m[SKIP_STEP * i], bl.length()); + } + } + + { // fiemap test SKIP_STEP + 1 to 2 * SKIP_STEP + 1 (partial overlap) + auto m = test_obj.fiemap( + *sharded_seastore, SKIP_STEP + 1, SKIP_STEP + 1); + ASSERT_EQ(m.size(), 2); + ASSERT_EQ(m.begin()->first, SKIP_STEP + 1); + ASSERT_GE(m.begin()->second, bl.length()); + ASSERT_LE(m.rbegin()->first, (2 * SKIP_STEP) + 1); + ASSERT_EQ(m.rbegin()->first + m.rbegin()->second, 2 * SKIP_STEP + 2); + } + + test_obj.remove(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, sparse_read) +{ + run_async([this] { + const uint64_t MAX_EXTENTS = 100; + const uint64_t SKIP_STEP = 16 << 10; + auto &test_obj = get_object(make_oid(0)); + bufferlist wbl; + wbl.append("foo"); + + test_obj.touch(*sharded_seastore); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + test_obj.write(*sharded_seastore, SKIP_STEP * i, wbl); + } + interval_set<uint64_t> m; + m = interval_set<uint64_t>( + test_obj.fiemap(*sharded_seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3)); + ASSERT_TRUE(!m.empty()); + uint64_t off = 0; + auto rbl = test_obj.readv(*sharded_seastore, m); + + for (auto &&miter : m) { + bufferlist subl; + subl.substr_of(rbl, off, std::min(miter.second, uint64_t(wbl.length()))); + ASSERT_TRUE(subl.contents_equal(wbl)); + off += miter.second; + } + test_obj.remove(*sharded_seastore); + }); +} + +TEST_P(seastore_test_t, zero) +{ + run_async([this] { + auto test_zero = [this]( + // [(off, len, repeat)] + std::vector<std::tuple<uint64_t, uint64_t, uint64_t>> writes, + uint64_t zero_off, uint64_t zero_len) { + + // Test zero within a block + auto &test_obj = get_object(make_oid(0)); + uint64_t size = 0; + for (auto &[off, len, repeat]: writes) { + for (decltype(repeat) i = 0; i < repeat; ++i) { + test_obj.write(*sharded_seastore, off + (len * repeat), len, 'a'); + } + size = off + (len * (repeat + 1)); + } + test_obj.read( + *sharded_seastore, + 0, + size); + test_obj.check_size(*sharded_seastore); + test_obj.zero(*sharded_seastore, zero_off, zero_len); + test_obj.read( + *sharded_seastore, + 0, + size); + test_obj.check_size(*sharded_seastore); + remove_object(test_obj); + }; + + const uint64_t BS = 4<<10; + + // Test zero within a block + test_zero( + {{1<<10, 1<<10, 1}}, + 1124, 200); + + // Multiple writes, partial on left, partial on right. + test_zero( + {{BS, BS, 10}}, + BS + 128, + BS * 4); + + // Single large write, block boundary on right, partial on left. + test_zero( + {{BS, BS * 10, 1}}, + BS + 128, + (BS * 4) - 128); + + // Multiple writes, block boundary on left, partial on right. + test_zero( + {{BS, BS, 10}}, + BS, + (BS * 4) + 128); + }); +} +INSTANTIATE_TEST_SUITE_P( + seastore_test, + seastore_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); diff --git a/src/test/crimson/seastore/test_seastore_cache.cc b/src/test/crimson/seastore/test_seastore_cache.cc new file mode 100644 index 000000000..b249d27e4 --- /dev/null +++ b/src/test/crimson/seastore/test_seastore_cache.cc @@ -0,0 +1,260 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include "crimson/common/log.h" +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct cache_test_t : public seastar_test_suite_t { + segment_manager::EphemeralSegmentManagerRef segment_manager; + ExtentPlacementManagerRef epm; + CacheRef cache; + paddr_t current; + journal_seq_t seq = JOURNAL_SEQ_MIN; + + cache_test_t() = default; + + seastar::future<paddr_t> submit_transaction( + TransactionRef t) { + auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL); + + bufferlist bl; + for (auto &&block : record.extents) { + bl.append(block.bl); + } + + ceph_assert((segment_off_t)bl.length() < + segment_manager->get_segment_size()); + if (current.as_seg_paddr().get_segment_off() + (segment_off_t)bl.length() > + segment_manager->get_segment_size()) + current = paddr_t::make_seg_paddr( + segment_id_t( + current.as_seg_paddr().get_segment_id().device_id(), + current.as_seg_paddr().get_segment_id().device_segment_id() + 1), + 0); + + auto prev = current; + current.as_seg_paddr().set_segment_off( + current.as_seg_paddr().get_segment_off() + + bl.length()); + return segment_manager->segment_write( + prev, + std::move(bl), + true + ).safe_then( + [this, prev, t=std::move(t)]() mutable { + cache->complete_commit(*t, prev, seq /* TODO */); + return prev; + }, + crimson::ct_error::all_same_way([](auto e) { + ASSERT_FALSE("failed to submit"); + }) + ); + } + + auto get_transaction() { + return cache->create_transaction( + Transaction::src_t::MUTATE, "test_cache", false); + } + + template <typename T, typename... Args> + auto get_extent(Transaction &t, Args&&... args) { + return with_trans_intr( + t, + [this](auto &&... args) { + return cache->get_extent<T>(args...); + }, + std::forward<Args>(args)...); + } + + seastar::future<> set_up_fut() final { + segment_manager = segment_manager::create_test_ephemeral(); + return segment_manager->init( + ).safe_then([this] { + return segment_manager->mkfs( + segment_manager::get_ephemeral_device_config(0, 1, 0)); + }).safe_then([this] { + epm.reset(new ExtentPlacementManager()); + cache.reset(new Cache(*epm)); + current = paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0); + epm->test_init_no_background(segment_manager.get()); + return seastar::do_with( + get_transaction(), + [this](auto &ref_t) { + cache->init(); + return with_trans_intr(*ref_t, [&](auto &t) { + return cache->mkfs(t); + }).safe_then([this, &ref_t] { + return submit_transaction(std::move(ref_t) + ).then([](auto p) {}); + }); + }); + }).handle_error( + crimson::ct_error::all_same_way([](auto e) { + ASSERT_FALSE("failed to submit"); + }) + ); + } + + seastar::future<> tear_down_fut() final { + return cache->close( + ).safe_then([this] { + segment_manager.reset(); + epm.reset(); + cache.reset(); + }).handle_error( + Cache::close_ertr::assert_all{} + ); + } +}; + +TEST_F(cache_test_t, test_addr_fixup) +{ + run_async([this] { + paddr_t addr; + int csum = 0; + { + auto t = get_transaction(); + auto extent = cache->alloc_new_extent<TestBlockPhysical>( + *t, + TestBlockPhysical::SIZE, + placement_hint_t::HOT, + 0); + extent->set_contents('c'); + csum = extent->get_crc32c(); + submit_transaction(std::move(t)).get0(); + addr = extent->get_paddr(); + } + { + auto t = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_EQ(extent->get_paddr(), addr); + ASSERT_EQ(extent->get_crc32c(), csum); + } + }); +} + +TEST_F(cache_test_t, test_dirty_extent) +{ + run_async([this] { + paddr_t addr; + int csum = 0; + int csum2 = 0; + { + // write out initial test block + auto t = get_transaction(); + auto extent = cache->alloc_new_extent<TestBlockPhysical>( + *t, + TestBlockPhysical::SIZE, + placement_hint_t::HOT, + 0); + extent->set_contents('c'); + csum = extent->get_crc32c(); + auto reladdr = extent->get_paddr(); + ASSERT_TRUE(reladdr.is_relative()); + { + // test that read with same transaction sees new block though + // uncommitted + auto extent = get_extent<TestBlockPhysical>( + *t, + reladdr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_TRUE(extent->is_clean()); + ASSERT_TRUE(extent->is_pending()); + ASSERT_TRUE(extent->get_paddr().is_relative()); + ASSERT_EQ(extent->get_version(), 0); + ASSERT_EQ(csum, extent->get_crc32c()); + } + submit_transaction(std::move(t)).get0(); + addr = extent->get_paddr(); + } + { + // test that consecutive reads on the same extent get the same ref + auto t = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + auto t2 = get_transaction(); + auto extent2 = get_extent<TestBlockPhysical>( + *t2, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_EQ(&*extent, &*extent2); + } + { + // read back test block + auto t = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + // duplicate and reset contents + extent = cache->duplicate_for_write(*t, extent)->cast<TestBlockPhysical>(); + extent->set_contents('c'); + csum2 = extent->get_crc32c(); + ASSERT_EQ(extent->get_paddr(), addr); + { + // test that concurrent read with fresh transaction sees old + // block + auto t2 = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t2, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_TRUE(extent->is_clean()); + ASSERT_FALSE(extent->is_pending()); + ASSERT_EQ(addr, extent->get_paddr()); + ASSERT_EQ(extent->get_version(), 0); + ASSERT_EQ(csum, extent->get_crc32c()); + } + { + // test that read with same transaction sees new block + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_TRUE(extent->is_dirty()); + ASSERT_TRUE(extent->is_pending()); + ASSERT_EQ(addr, extent->get_paddr()); + ASSERT_EQ(extent->get_version(), 1); + ASSERT_EQ(csum2, extent->get_crc32c()); + } + // submit transaction + submit_transaction(std::move(t)).get0(); + ASSERT_TRUE(extent->is_dirty()); + ASSERT_EQ(addr, extent->get_paddr()); + ASSERT_EQ(extent->get_version(), 1); + ASSERT_EQ(extent->get_crc32c(), csum2); + } + { + // test that fresh transaction now sees newly dirty block + auto t = get_transaction(); + auto extent = get_extent<TestBlockPhysical>( + *t, + addr, + TestBlockPhysical::SIZE).unsafe_get0(); + ASSERT_TRUE(extent->is_dirty()); + ASSERT_EQ(addr, extent->get_paddr()); + ASSERT_EQ(extent->get_version(), 1); + ASSERT_EQ(csum2, extent->get_crc32c()); + } + }); +} diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc new file mode 100644 index 000000000..46ec723a3 --- /dev/null +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -0,0 +1,343 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include <random> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/async_cleaner.h" +#include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct record_validator_t { + record_t record; + paddr_t record_final_offset; + + template <typename... T> + record_validator_t(T&&... record) : record(std::forward<T>(record)...) {} + + void validate(SegmentManager &manager) { + paddr_t addr = make_record_relative_paddr(0); + for (auto &&block : record.extents) { + auto test = manager.read( + record_final_offset.add_relative(addr), + block.bl.length()).unsafe_get0(); + addr = addr.add_offset(block.bl.length()); + bufferlist bl; + bl.push_back(test); + ASSERT_EQ( + bl.length(), + block.bl.length()); + ASSERT_EQ( + bl.begin().crc32c(bl.length(), 1), + block.bl.begin().crc32c(block.bl.length(), 1)); + } + } + + auto get_replay_handler() { + auto checker = [this, iter=record.deltas.begin()] ( + paddr_t base, + const delta_info_t &di) mutable { + EXPECT_EQ(base, record_final_offset); + ceph_assert(iter != record.deltas.end()); + EXPECT_EQ(di, *iter++); + EXPECT_EQ(base, record_final_offset); + return iter != record.deltas.end(); + }; + if (record.deltas.size()) { + return std::make_optional(std::move(checker)); + } else { + return std::optional<decltype(checker)>(); + } + } +}; + +struct journal_test_t : seastar_test_suite_t, SegmentProvider, JournalTrimmer { + segment_manager::EphemeralSegmentManagerRef segment_manager; + WritePipeline pipeline; + JournalRef journal; + + std::vector<record_validator_t> records; + + std::default_random_engine generator; + + extent_len_t block_size; + + SegmentManagerGroupRef sms; + + segment_id_t next; + + std::map<segment_id_t, segment_seq_t> segment_seqs; + std::map<segment_id_t, segment_type_t> segment_types; + + journal_seq_t dummy_tail; + + mutable segment_info_t tmp_info; + + journal_test_t() = default; + + /* + * JournalTrimmer interfaces + */ + journal_seq_t get_journal_head() const final { return dummy_tail; } + + void set_journal_head(journal_seq_t) final {} + + journal_seq_t get_dirty_tail() const final { return dummy_tail; } + + journal_seq_t get_alloc_tail() const final { return dummy_tail; } + + void update_journal_tails(journal_seq_t, journal_seq_t) final {} + + bool try_reserve_inline_usage(std::size_t) final { return true; } + + void release_inline_usage(std::size_t) final {} + + std::size_t get_trim_size_per_cycle() const final { + return 0; + } + + /* + * SegmentProvider interfaces + */ + const segment_info_t& get_seg_info(segment_id_t id) const final { + tmp_info = {}; + tmp_info.seq = segment_seqs.at(id); + tmp_info.type = segment_types.at(id); + return tmp_info; + } + + segment_id_t allocate_segment( + segment_seq_t seq, + segment_type_t type, + data_category_t, + rewrite_gen_t + ) final { + auto ret = next; + next = segment_id_t{ + segment_manager->get_device_id(), + next.device_segment_id() + 1}; + segment_seqs[ret] = seq; + segment_types[ret] = type; + return ret; + } + + void close_segment(segment_id_t) final {} + + void update_segment_avail_bytes(segment_type_t, paddr_t) final {} + + void update_modify_time(segment_id_t, sea_time_point, std::size_t) final {} + + SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); } + + seastar::future<> set_up_fut() final { + segment_manager = segment_manager::create_test_ephemeral(); + return segment_manager->init( + ).safe_then([this] { + return segment_manager->mkfs( + segment_manager::get_ephemeral_device_config(0, 1, 0)); + }).safe_then([this] { + block_size = segment_manager->get_block_size(); + sms.reset(new SegmentManagerGroup()); + next = segment_id_t(segment_manager->get_device_id(), 0); + journal = journal::make_segmented(*this, *this); + journal->set_write_pipeline(&pipeline); + sms->add_segment_manager(segment_manager.get()); + return journal->open_for_mkfs(); + }).safe_then([this](auto) { + dummy_tail = journal_seq_t{0, + paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0)}; + }, crimson::ct_error::all_same_way([] { + ASSERT_FALSE("Unable to mount"); + })); + } + + seastar::future<> tear_down_fut() final { + return journal->close( + ).safe_then([this] { + segment_manager.reset(); + sms.reset(); + journal.reset(); + }).handle_error( + crimson::ct_error::all_same_way([](auto e) { + ASSERT_FALSE("Unable to close"); + }) + ); + } + + template <typename T> + auto replay(T &&f) { + return journal->close( + ).safe_then([this, f=std::move(f)]() mutable { + journal = journal::make_segmented(*this, *this); + journal->set_write_pipeline(&pipeline); + return journal->replay(std::forward<T>(std::move(f))); + }).safe_then([this] { + return journal->open_for_mount(); + }); + } + + auto replay_and_check() { + auto record_iter = records.begin(); + decltype(record_iter->get_replay_handler()) delta_checker = std::nullopt; + auto advance = [this, &record_iter, &delta_checker] { + ceph_assert(!delta_checker); + while (record_iter != records.end()) { + auto checker = record_iter->get_replay_handler(); + record_iter++; + if (checker) { + delta_checker.emplace(std::move(*checker)); + break; + } + } + }; + advance(); + replay( + [&advance, + &delta_checker] + (const auto &offsets, + const auto &di, + const journal_seq_t &, + const journal_seq_t &, + auto t) mutable { + if (!delta_checker) { + EXPECT_FALSE("No Deltas Left"); + } + if (!(*delta_checker)(offsets.record_block_base, di)) { + delta_checker = std::nullopt; + advance(); + } + return Journal::replay_ertr::make_ready_future<bool>(true); + }).unsafe_get0(); + ASSERT_EQ(record_iter, records.end()); + for (auto &i : records) { + i.validate(*segment_manager); + } + } + + template <typename... T> + auto submit_record(T&&... _record) { + auto record{std::forward<T>(_record)...}; + records.push_back(record); + OrderingHandle handle = get_dummy_ordering_handle(); + auto [addr, _] = journal->submit_record( + std::move(record), + handle).unsafe_get0(); + records.back().record_final_offset = addr; + return addr; + } + + extent_t generate_extent(size_t blocks) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + bufferlist bl; + bl.append(buffer::ptr(buffer::create(blocks * block_size, contents))); + return extent_t{ + extent_types_t::TEST_BLOCK, + L_ADDR_NULL, + bl}; + } + + delta_info_t generate_delta(size_t bytes) { + std::uniform_int_distribution<char> distribution( + std::numeric_limits<char>::min(), + std::numeric_limits<char>::max() + ); + char contents = distribution(generator); + bufferlist bl; + bl.append(buffer::ptr(buffer::create(bytes, contents))); + return delta_info_t{ + extent_types_t::TEST_BLOCK, + paddr_t{}, + L_ADDR_NULL, + 0, 0, + block_size, + 1, + MAX_SEG_SEQ, + segment_type_t::NULL_SEG, + bl + }; + } +}; + +TEST_F(journal_test_t, replay_one_journal_segment) +{ + run_async([this] { + submit_record(record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(23), generate_delta(30) } + }); + replay_and_check(); + }); +} + +TEST_F(journal_test_t, replay_two_records) +{ + run_async([this] { + submit_record(record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(23), generate_delta(30) } + }); + submit_record(record_t{ + { generate_extent(4), generate_extent(1) }, + { generate_delta(23), generate_delta(400) } + }); + replay_and_check(); + }); +} + +TEST_F(journal_test_t, replay_twice) +{ + run_async([this] { + submit_record(record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(23), generate_delta(30) } + }); + submit_record(record_t{ + { generate_extent(4), generate_extent(1) }, + { generate_delta(23), generate_delta(400) } + }); + replay_and_check(); + submit_record(record_t{ + { generate_extent(2), generate_extent(5) }, + { generate_delta(230), generate_delta(40) } + }); + replay_and_check(); + }); +} + +TEST_F(journal_test_t, roll_journal_and_replay) +{ + run_async([this] { + paddr_t current = submit_record( + record_t{ + { generate_extent(1), generate_extent(2) }, + { generate_delta(23), generate_delta(30) } + }); + auto starting_segment = current.as_seg_paddr().get_segment_id(); + unsigned so_far = 0; + while (current.as_seg_paddr().get_segment_id() == starting_segment) { + current = submit_record(record_t{ + { generate_extent(512), generate_extent(512) }, + { generate_delta(23), generate_delta(400) } + }); + ++so_far; + ASSERT_FALSE(so_far > 10); + } + replay_and_check(); + }); +} diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc new file mode 100644 index 000000000..1148884a0 --- /dev/null +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -0,0 +1,1995 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <random> + +#include <boost/iterator/counting_iterator.hpp> + +#include "test/crimson/gtest_seastar.h" +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" +#include "crimson/os/seastore/segment_manager.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct test_extent_record_t { + test_extent_desc_t desc; + unsigned refcount = 0; + test_extent_record_t() = default; + test_extent_record_t( + const test_extent_desc_t &desc, + unsigned refcount) : desc(desc), refcount(refcount) {} + + void update(const test_extent_desc_t &to) { + desc = to; + } + + bool operator==(const test_extent_desc_t &rhs) const { + return desc == rhs; + } + bool operator!=(const test_extent_desc_t &rhs) const { + return desc != rhs; + } +}; + +template<> +struct fmt::formatter<test_extent_record_t> : fmt::formatter<std::string_view> { + template <typename FormatContext> + auto format(const test_extent_record_t& r, FormatContext& ctx) const { + return fmt::format_to(ctx.out(), "test_extent_record_t({}, refcount={})", + r.desc, r.refcount); + } +}; + +struct transaction_manager_test_t : + public seastar_test_suite_t, + TMTestState { + + std::random_device rd; + std::mt19937 gen; + + transaction_manager_test_t(std::size_t num_main_devices, std::size_t num_cold_devices) + : TMTestState(num_main_devices, num_cold_devices), gen(rd()) { + } + + laddr_t get_random_laddr(size_t block_size, laddr_t limit) { + return block_size * + std::uniform_int_distribution<>(0, (limit / block_size) - 1)(gen); + } + + char get_random_contents() { + return static_cast<char>(std::uniform_int_distribution<>(0, 255)(gen)); + } + + seastar::future<> set_up_fut() final { + return tm_setup(); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown(); + } + + struct test_extents_t : std::map<laddr_t, test_extent_record_t> { + using delta_t = std::map<laddr_t, std::optional<test_extent_record_t>>; + std::map<laddr_t, uint64_t> laddr_write_seq; + + struct delta_overlay_t { + const test_extents_t &extents; + const delta_t δ + + delta_overlay_t( + const test_extents_t &extents, + const delta_t &delta) + : extents(extents), delta(delta) {} + + + class iterator { + friend class test_extents_t; + + const delta_overlay_t &parent; + test_extents_t::const_iterator biter; + delta_t::const_iterator oiter; + std::optional<std::pair<laddr_t, test_extent_record_t>> cur; + + iterator( + const delta_overlay_t &parent, + test_extents_t::const_iterator biter, + delta_t::const_iterator oiter) + : parent(parent), biter(biter), oiter(oiter) {} + + laddr_t get_bkey() { + return biter == parent.extents.end() ? L_ADDR_MAX : biter->first; + } + + laddr_t get_okey() { + return oiter == parent.delta.end() ? L_ADDR_MAX : oiter->first; + } + + bool is_end() { + return oiter == parent.delta.end() && biter == parent.extents.end(); + } + + bool is_valid() { + return is_end() || + ((get_okey() < get_bkey()) && (oiter->second)) || + (get_okey() > get_bkey()); + } + + auto get_pair() { + assert(is_valid()); + assert(!is_end()); + auto okey = get_okey(); + auto bkey = get_bkey(); + return ( + bkey < okey ? + std::pair<laddr_t, test_extent_record_t>(*biter) : + std::make_pair(okey, *(oiter->second))); + } + + void adjust() { + while (!is_valid()) { + if (get_okey() < get_bkey()) { + assert(!oiter->second); + ++oiter; + } else { + assert(get_okey() == get_bkey()); + ++biter; + } + } + assert(is_valid()); + if (!is_end()) { + cur = get_pair(); + } else { + cur = std::nullopt; + } + } + + public: + iterator(const iterator &) = default; + iterator(iterator &&) = default; + + iterator &operator++() { + assert(is_valid()); + assert(!is_end()); + if (get_bkey() < get_okey()) { + ++biter; + } else { + ++oiter; + } + adjust(); + return *this; + } + + bool operator==(const iterator &o) const { + return o.biter == biter && o.oiter == oiter; + } + bool operator!=(const iterator &o) const { + return !(*this == o); + } + + auto operator*() { + assert(!is_end()); + return *cur; + } + auto operator->() { + assert(!is_end()); + return &*cur; + } + }; + + iterator begin() { + auto ret = iterator{*this, extents.begin(), delta.begin()}; + ret.adjust(); + return ret; + } + + iterator end() { + auto ret = iterator{*this, extents.end(), delta.end()}; + // adjust unnecessary + return ret; + } + + iterator lower_bound(laddr_t l) { + auto ret = iterator{*this, extents.lower_bound(l), delta.lower_bound(l)}; + ret.adjust(); + return ret; + } + + iterator upper_bound(laddr_t l) { + auto ret = iterator{*this, extents.upper_bound(l), delta.upper_bound(l)}; + ret.adjust(); + return ret; + } + + iterator find(laddr_t l) { + auto ret = lower_bound(l); + if (ret == end() || ret->first != l) { + return end(); + } else { + return ret; + } + } + }; + private: + void check_available( + laddr_t addr, extent_len_t len, const delta_t &delta + ) const { + delta_overlay_t overlay(*this, delta); + for (const auto &i: overlay) { + if (i.first < addr) { + EXPECT_FALSE(i.first + i.second.desc.len > addr); + } else { + EXPECT_FALSE(addr + len > i.first); + } + } + } + + void check_hint( + laddr_t hint, + laddr_t addr, + extent_len_t len, + delta_t &delta) const { + delta_overlay_t overlay(*this, delta); + auto iter = overlay.lower_bound(hint); + laddr_t last = hint; + while (true) { + if (iter == overlay.end() || iter->first > addr) { + EXPECT_EQ(addr, last); + break; + } + EXPECT_FALSE(iter->first - last > len); + last = iter->first + iter->second.desc.len; + ++iter; + } + } + + std::optional<test_extent_record_t> &populate_delta( + laddr_t addr, delta_t &delta, const test_extent_desc_t *desc) const { + auto diter = delta.find(addr); + if (diter != delta.end()) + return diter->second; + + auto iter = find(addr); + if (iter == end()) { + assert(desc); + auto ret = delta.emplace( + std::make_pair(addr, test_extent_record_t{*desc, 0})); + assert(ret.second); + return ret.first->second; + } else { + auto ret = delta.emplace(*iter); + assert(ret.second); + return ret.first->second; + } + } + public: + delta_overlay_t get_overlay(const delta_t &delta) const { + return delta_overlay_t{*this, delta}; + } + + void insert(TestBlock &extent, delta_t &delta) const { + check_available(extent.get_laddr(), extent.get_length(), delta); + delta[extent.get_laddr()] = + test_extent_record_t{extent.get_desc(), 1}; + } + + void alloced(laddr_t hint, TestBlock &extent, delta_t &delta) const { + check_hint(hint, extent.get_laddr(), extent.get_length(), delta); + insert(extent, delta); + } + + bool contains(laddr_t addr, const delta_t &delta) const { + delta_overlay_t overlay(*this, delta); + return overlay.find(addr) != overlay.end(); + } + + test_extent_record_t get(laddr_t addr, const delta_t &delta) const { + delta_overlay_t overlay(*this, delta); + auto iter = overlay.find(addr); + assert(iter != overlay.end()); + return iter->second; + } + + void update( + laddr_t addr, + const test_extent_desc_t &desc, + delta_t &delta) const { + auto &rec = populate_delta(addr, delta, &desc); + assert(rec); + rec->desc = desc; + } + + int inc_ref( + laddr_t addr, + delta_t &delta) const { + auto &rec = populate_delta(addr, delta, nullptr); + assert(rec); + return ++rec->refcount; + } + + int dec_ref( + laddr_t addr, + delta_t &delta) const { + auto &rec = populate_delta(addr, delta, nullptr); + assert(rec); + assert(rec->refcount > 0); + rec->refcount--; + if (rec->refcount == 0) { + delta[addr] = std::nullopt; + return 0; + } else { + return rec->refcount; + } + } + + void consume(const delta_t &delta, const uint64_t write_seq = 0) { + for (const auto &i : delta) { + if (i.second) { + if (laddr_write_seq.find(i.first) == laddr_write_seq.end() || + laddr_write_seq[i.first] <= write_seq) { + (*this)[i.first] = *i.second; + laddr_write_seq[i.first] = write_seq; + } + } else { + erase(i.first); + } + } + } + + } test_mappings; + + struct test_transaction_t { + TransactionRef t; + test_extents_t::delta_t mapping_delta; + }; + + test_transaction_t create_transaction() { + return { create_mutate_transaction(), {} }; + } + + test_transaction_t create_read_test_transaction() { + return {create_read_transaction(), {} }; + } + + test_transaction_t create_weak_test_transaction() { + return { create_weak_transaction(), {} }; + } + + TestBlockRef alloc_extent( + test_transaction_t &t, + laddr_t hint, + extent_len_t len, + char contents) { + auto extent = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->alloc_extent<TestBlock>(trans, hint, len); + }).unsafe_get0(); + extent->set_contents(contents); + EXPECT_FALSE(test_mappings.contains(extent->get_laddr(), t.mapping_delta)); + EXPECT_EQ(len, extent->get_length()); + test_mappings.alloced(hint, *extent, t.mapping_delta); + return extent; + } + + TestBlockRef alloc_extent( + test_transaction_t &t, + laddr_t hint, + extent_len_t len) { + return alloc_extent( + t, + hint, + len, + get_random_contents()); + } + + bool check_usage() { + return epm->check_usage(); + } + + void replay() { + EXPECT_TRUE(check_usage()); + restart(); + } + + void check() { + check_mappings(); + check_usage(); + } + + void check_mappings() { + auto t = create_weak_test_transaction(); + check_mappings(t); + } + + TestBlockRef get_extent( + test_transaction_t &t, + laddr_t addr, + extent_len_t len) { + ceph_assert(test_mappings.contains(addr, t.mapping_delta)); + ceph_assert(test_mappings.get(addr, t.mapping_delta).desc.len == len); + + auto ext = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->read_extent<TestBlock>(trans, addr, len); + }).unsafe_get0(); + EXPECT_EQ(addr, ext->get_laddr()); + return ext; + } + + TestBlockRef try_get_extent( + test_transaction_t &t, + laddr_t addr) { + ceph_assert(test_mappings.contains(addr, t.mapping_delta)); + + using ertr = with_trans_ertr<TransactionManager::read_extent_iertr>; + using ret = ertr::future<TestBlockRef>; + auto ext = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->read_extent<TestBlock>(trans, addr); + }).safe_then([](auto ext) -> ret { + return ertr::make_ready_future<TestBlockRef>(ext); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<TestBlockRef>(); + }, + crimson::ct_error::assert_all{ + "get_extent got invalid error" + } + ).get0(); + if (ext) { + EXPECT_EQ(addr, ext->get_laddr()); + } + return ext; + } + + TestBlockRef try_get_extent( + test_transaction_t &t, + laddr_t addr, + extent_len_t len) { + ceph_assert(test_mappings.contains(addr, t.mapping_delta)); + ceph_assert(test_mappings.get(addr, t.mapping_delta).desc.len == len); + + using ertr = with_trans_ertr<TransactionManager::read_extent_iertr>; + using ret = ertr::future<TestBlockRef>; + auto ext = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->read_extent<TestBlock>(trans, addr, len); + }).safe_then([](auto ext) -> ret { + return ertr::make_ready_future<TestBlockRef>(ext); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<TestBlockRef>(); + }, + crimson::ct_error::assert_all{ + "get_extent got invalid error" + } + ).get0(); + if (ext) { + EXPECT_EQ(addr, ext->get_laddr()); + } + return ext; + } + + TestBlockRef try_read_pin( + test_transaction_t &t, + LBAMappingRef &&pin) { + using ertr = with_trans_ertr<TransactionManager::base_iertr>; + using ret = ertr::future<TestBlockRef>; + auto addr = pin->get_key(); + auto ext = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->read_pin<TestBlock>(trans, std::move(pin)); + }).safe_then([](auto ext) -> ret { + return ertr::make_ready_future<TestBlockRef>(ext); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<TestBlockRef>(); + }, + crimson::ct_error::assert_all{ + "read_pin got invalid error" + } + ).get0(); + if (ext) { + EXPECT_EQ(addr, ext->get_laddr()); + } + if (t.t->is_conflicted()) { + return nullptr; + } + return ext; + } + + test_block_mutator_t mutator; + TestBlockRef mutate_extent( + test_transaction_t &t, + TestBlockRef ref) { + ceph_assert(test_mappings.contains(ref->get_laddr(), t.mapping_delta)); + ceph_assert( + test_mappings.get(ref->get_laddr(), t.mapping_delta).desc.len == + ref->get_length()); + + auto ext = tm->get_mutable_extent(*t.t, ref)->cast<TestBlock>(); + EXPECT_EQ(ext->get_laddr(), ref->get_laddr()); + EXPECT_EQ(ext->get_desc(), ref->get_desc()); + mutator.mutate(*ext, gen); + + test_mappings.update(ext->get_laddr(), ext->get_desc(), t.mapping_delta); + return ext; + } + + TestBlockRef mutate_addr( + test_transaction_t &t, + laddr_t offset, + size_t length) { + auto ext = get_extent(t, offset, length); + mutate_extent(t, ext); + return ext; + } + + LBAMappingRef get_pin( + test_transaction_t &t, + laddr_t offset) { + ceph_assert(test_mappings.contains(offset, t.mapping_delta)); + auto pin = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->get_pin(trans, offset); + }).unsafe_get0(); + EXPECT_EQ(offset, pin->get_key()); + return pin; + } + + LBAMappingRef try_get_pin( + test_transaction_t &t, + laddr_t offset) { + ceph_assert(test_mappings.contains(offset, t.mapping_delta)); + using ertr = with_trans_ertr<TransactionManager::get_pin_iertr>; + using ret = ertr::future<LBAMappingRef>; + auto pin = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->get_pin(trans, offset); + }).safe_then([](auto pin) -> ret { + return ertr::make_ready_future<LBAMappingRef>(std::move(pin)); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<LBAMappingRef>(); + }, + crimson::ct_error::assert_all{ + "get_extent got invalid error" + } + ).get0(); + if (pin) { + EXPECT_EQ(offset, pin->get_key()); + } + return pin; + } + + void inc_ref(test_transaction_t &t, laddr_t offset) { + ceph_assert(test_mappings.contains(offset, t.mapping_delta)); + ceph_assert(test_mappings.get(offset, t.mapping_delta).refcount > 0); + + auto refcnt = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->inc_ref(trans, offset); + }).unsafe_get0(); + auto check_refcnt = test_mappings.inc_ref(offset, t.mapping_delta); + EXPECT_EQ(refcnt, check_refcnt); + } + + void dec_ref(test_transaction_t &t, laddr_t offset) { + ceph_assert(test_mappings.contains(offset, t.mapping_delta)); + ceph_assert(test_mappings.get(offset, t.mapping_delta).refcount > 0); + + auto refcnt = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->dec_ref(trans, offset); + }).unsafe_get0(); + auto check_refcnt = test_mappings.dec_ref(offset, t.mapping_delta); + EXPECT_EQ(refcnt, check_refcnt); + if (refcnt == 0) + logger().debug("dec_ref: {} at refcount 0", offset); + } + + void check_mappings(test_transaction_t &t) { + auto overlay = test_mappings.get_overlay(t.mapping_delta); + for (const auto &i: overlay) { + logger().debug("check_mappings: {}->{}", i.first, i.second); + auto ext = get_extent(t, i.first, i.second.desc.len); + EXPECT_EQ(i.second, ext->get_desc()); + } + with_trans_intr( + *t.t, + [this, &overlay](auto &t) { + return lba_manager->scan_mappings( + t, + 0, + L_ADDR_MAX, + [iter=overlay.begin(), &overlay](auto l, auto p, auto len) mutable { + EXPECT_NE(iter, overlay.end()); + logger().debug( + "check_mappings: scan {}", + l); + EXPECT_EQ(l, iter->first); + ++iter; + }); + }).unsafe_get0(); + (void)with_trans_intr( + *t.t, + [=, this](auto &t) { + return lba_manager->check_child_trackers(t); + }).unsafe_get0(); + } + + bool try_submit_transaction(test_transaction_t t) { + using ertr = with_trans_ertr<TransactionManager::submit_transaction_iertr>; + using ret = ertr::future<bool>; + uint64_t write_seq = 0; + bool success = submit_transaction_fut_with_seq(*t.t + ).safe_then([&write_seq](auto seq) -> ret { + write_seq = seq; + return ertr::make_ready_future<bool>(true); + }).handle_error( + [](const crimson::ct_error::eagain &e) { + return seastar::make_ready_future<bool>(false); + }, + crimson::ct_error::assert_all{ + "try_submit_transaction hit invalid error" + } + ).then([this](auto ret) { + return epm->run_background_work_until_halt( + ).then([ret] { return ret; }); + }).get0(); + + if (success) { + test_mappings.consume(t.mapping_delta, write_seq); + } + + return success; + } + + void submit_transaction(test_transaction_t &&t) { + bool success = try_submit_transaction(std::move(t)); + EXPECT_TRUE(success); + } + + void submit_transaction_expect_conflict(test_transaction_t &&t) { + bool success = try_submit_transaction(std::move(t)); + EXPECT_FALSE(success); + } + + auto allocate_sequentially(const size_t size, const int num, bool run_clean = true) { + return repeat_eagain([this, size, num] { + return seastar::do_with( + create_transaction(), + [this, size, num](auto &t) { + return with_trans_intr( + *t.t, + [&t, this, size, num](auto &) { + return trans_intr::do_for_each( + boost::make_counting_iterator(0), + boost::make_counting_iterator(num), + [&t, this, size](auto) { + return tm->alloc_extent<TestBlock>( + *(t.t), L_ADDR_MIN, size + ).si_then([&t, this, size](auto extent) { + extent->set_contents(get_random_contents()); + EXPECT_FALSE( + test_mappings.contains(extent->get_laddr(), t.mapping_delta)); + EXPECT_EQ(size, extent->get_length()); + test_mappings.alloced(extent->get_laddr(), *extent, t.mapping_delta); + return seastar::now(); + }); + }).si_then([&t, this] { + return tm->submit_transaction(*t.t); + }); + }).safe_then([&t, this] { + test_mappings.consume(t.mapping_delta); + }); + }); + }).safe_then([this, run_clean]() { + if (run_clean) { + return epm->run_background_work_until_halt(); + } else { + return epm->background_process.trimmer->trim(); + } + }).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in SeaStore::list_collections" + } + ); + } + + void test_parallel_extent_read() { + constexpr size_t TOTAL = 4<<20; + constexpr size_t BSIZE = 4<<10; + constexpr size_t BLOCKS = TOTAL / BSIZE; + run_async([this] { + for (unsigned i = 0; i < BLOCKS; ++i) { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + i * BSIZE, + BSIZE); + ASSERT_EQ(i * BSIZE, extent->get_laddr()); + submit_transaction(std::move(t)); + } + + seastar::do_with( + create_read_test_transaction(), + [this](auto &t) { + return with_trans_intr(*(t.t), [this](auto &t) { + return trans_intr::parallel_for_each( + boost::make_counting_iterator(0lu), + boost::make_counting_iterator(BLOCKS), + [this, &t](auto i) { + return tm->read_extent<TestBlock>(t, i * BSIZE, BSIZE + ).si_then([](auto) { + return seastar::now(); + }); + }); + }); + }).unsafe_get0(); + }); + } + + void test_random_writes_concurrent() { + constexpr unsigned WRITE_STREAMS = 256; + + constexpr size_t TOTAL = 4<<20; + constexpr size_t BSIZE = 4<<10; + constexpr size_t BLOCKS = TOTAL / BSIZE; + run_async([this] { + std::for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(WRITE_STREAMS), + [&](auto idx) { + for (unsigned i = idx; i < BLOCKS; i += WRITE_STREAMS) { + while (true) { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + i * BSIZE, + BSIZE); + ASSERT_EQ(i * BSIZE, extent->get_laddr()); + if (try_submit_transaction(std::move(t))) + break; + } + } + }); + + int writes = 0; + unsigned failures = 0; + seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(WRITE_STREAMS), + [&](auto) { + return seastar::async([&] { + while (writes < 300) { + auto t = create_transaction(); + auto ext = try_get_extent( + t, + get_random_laddr(BSIZE, TOTAL), + BSIZE); + if (!ext){ + failures++; + continue; + } + auto mut = mutate_extent(t, ext); + auto success = try_submit_transaction(std::move(t)); + writes += success; + failures += !success; + } + }); + }).get0(); + replay(); + logger().info("random_writes_concurrent: checking"); + check(); + logger().info( + "random_writes_concurrent: {} suceeded, {} failed", + writes, + failures + ); + }); + } + + void test_evict() { + // only support segmented backend currently + ASSERT_EQ(epm->get_main_backend_type(), backend_type_t::SEGMENTED); + ASSERT_TRUE(epm->background_process.has_cold_tier()); + constexpr size_t device_size = + segment_manager::DEFAULT_TEST_EPHEMERAL.size; + constexpr size_t block_size = + segment_manager::DEFAULT_TEST_EPHEMERAL.block_size; + constexpr size_t segment_size = + segment_manager::DEFAULT_TEST_EPHEMERAL.segment_size; + ASSERT_GE(segment_size, block_size * 20); + + run_async([this] { + // indicates there is no available segments to reclaim + double stop_ratio = (double)segment_size / (double)device_size / 2; + // 1 segment + double default_ratio = stop_ratio * 2; + // 1.25 segment + double fast_ratio = stop_ratio * 2.5; + + epm->background_process + .eviction_state + .init(stop_ratio, default_ratio, fast_ratio); + + // these variables are described in + // EPM::BackgroundProcess::eviction_state_t::maybe_update_eviction_mode + size_t ratio_A_size = segment_size / 2 - block_size * 10; + size_t ratio_B_size = segment_size / 2 + block_size * 10; + size_t ratio_C_size = segment_size + block_size; + size_t ratio_D_size = segment_size * 1.25 + block_size; + + auto run_until = [this](size_t size) -> seastar::future<> { + return seastar::repeat([this, size] { + size_t current_size = epm->background_process + .main_cleaner->get_stat().data_stored; + if (current_size >= size) { + return seastar::futurize_invoke([] { + return seastar::stop_iteration::yes; + }); + } else { + int num = (size - current_size) / block_size; + return seastar::do_for_each( + boost::make_counting_iterator(0), + boost::make_counting_iterator(num), + [this](auto) { + // don't start background process to test the behavior + // of generation changes during alloc new extents + return allocate_sequentially(block_size, 1, false); + }).then([] { + return seastar::stop_iteration::no; + }); + } + }); + }; + + std::vector<extent_types_t> all_extent_types{ + extent_types_t::ROOT, + extent_types_t::LADDR_INTERNAL, + extent_types_t::LADDR_LEAF, + extent_types_t::OMAP_INNER, + extent_types_t::OMAP_LEAF, + extent_types_t::ONODE_BLOCK_STAGED, + extent_types_t::COLL_BLOCK, + extent_types_t::OBJECT_DATA_BLOCK, + extent_types_t::RETIRED_PLACEHOLDER, + extent_types_t::ALLOC_INFO, + extent_types_t::JOURNAL_TAIL, + extent_types_t::TEST_BLOCK, + extent_types_t::TEST_BLOCK_PHYSICAL, + extent_types_t::BACKREF_INTERNAL, + extent_types_t::BACKREF_LEAF + }; + + std::vector<rewrite_gen_t> all_generations; + for (auto i = INIT_GENERATION; i < REWRITE_GENERATIONS; i++) { + all_generations.push_back(i); + } + + // input target-generation -> expected generation after the adjustment + using generation_mapping_t = std::map<rewrite_gen_t, rewrite_gen_t>; + std::map<extent_types_t, generation_mapping_t> expected_generations; + + // this loop should be consistent with EPM::adjust_generation + for (auto t : all_extent_types) { + expected_generations[t] = {}; + if (!is_logical_type(t)) { + for (auto gen : all_generations) { + expected_generations[t][gen] = INLINE_GENERATION; + } + } else { + if (get_extent_category(t) == data_category_t::METADATA) { + expected_generations[t][INIT_GENERATION] = INLINE_GENERATION; + } else { + expected_generations[t][INIT_GENERATION] = OOL_GENERATION; + } + + for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) { + expected_generations[t][i] = i; + } + } + } + + auto update_data_gen_mapping = [&](std::function<rewrite_gen_t(rewrite_gen_t)> func) { + for (auto t : all_extent_types) { + if (!is_logical_type(t)) { + continue; + } + for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) { + expected_generations[t][i] = func(i); + } + } + // since background process didn't start in allocate_sequentially + // we update eviction mode manually. + epm->background_process.maybe_update_eviction_mode(); + }; + + auto test_gen = [&](const char *caller) { + for (auto t : all_extent_types) { + for (auto gen : all_generations) { + auto epm_gen = epm->adjust_generation( + get_extent_category(t), + t, + placement_hint_t::HOT, + gen); + if (expected_generations[t][gen] != epm_gen) { + logger().error("caller: {}, extent type: {}, input generation: {}, " + "expected generation : {}, adjust result from EPM: {}", + caller, t, gen, expected_generations[t][gen], epm_gen); + } + EXPECT_EQ(expected_generations[t][gen], epm_gen); + } + } + }; + + // verify that no data should go to the cold tier + update_data_gen_mapping([](rewrite_gen_t gen) -> rewrite_gen_t { + if (gen == MIN_COLD_GENERATION) { + return MIN_COLD_GENERATION - 1; + } else { + return gen; + } + }); + test_gen("init"); + + run_until(ratio_A_size).get(); + EXPECT_TRUE(epm->background_process.eviction_state.is_stop_mode()); + test_gen("exceed ratio A"); + epm->run_background_work_until_halt().get(); + + run_until(ratio_B_size).get(); + EXPECT_TRUE(epm->background_process.eviction_state.is_stop_mode()); + test_gen("exceed ratio B"); + epm->run_background_work_until_halt().get(); + + // verify that data may go to the cold tier + run_until(ratio_C_size).get(); + update_data_gen_mapping([](rewrite_gen_t gen) { return gen; }); + EXPECT_TRUE(epm->background_process.eviction_state.is_default_mode()); + test_gen("exceed ratio C"); + epm->run_background_work_until_halt().get(); + + // verify that data must go to the cold tier + run_until(ratio_D_size).get(); + update_data_gen_mapping([](rewrite_gen_t gen) { + if (gen >= MIN_REWRITE_GENERATION && gen < MIN_COLD_GENERATION) { + return MIN_COLD_GENERATION; + } else { + return gen; + } + }); + EXPECT_TRUE(epm->background_process.eviction_state.is_fast_mode()); + test_gen("exceed ratio D"); + + auto main_size = epm->background_process.main_cleaner->get_stat().data_stored; + auto cold_size = epm->background_process.cold_cleaner->get_stat().data_stored; + EXPECT_EQ(cold_size, 0); + epm->run_background_work_until_halt().get(); + auto new_main_size = epm->background_process.main_cleaner->get_stat().data_stored; + auto new_cold_size = epm->background_process.cold_cleaner->get_stat().data_stored; + EXPECT_GE(main_size, new_main_size); + EXPECT_NE(new_cold_size, 0); + + update_data_gen_mapping([](rewrite_gen_t gen) { return gen; }); + EXPECT_TRUE(epm->background_process.eviction_state.is_default_mode()); + test_gen("finish evict"); + }); + } + + using remap_entry = TransactionManager::remap_entry; + LBAMappingRef remap_pin( + test_transaction_t &t, + LBAMappingRef &&opin, + extent_len_t new_offset, + extent_len_t new_len) { + if (t.t->is_conflicted()) { + return nullptr; + } + auto o_laddr = opin->get_key(); + auto pin = with_trans_intr(*(t.t), [&](auto& trans) { + return tm->remap_pin<TestBlock>( + trans, std::move(opin), std::array{ + remap_entry(new_offset, new_len)} + ).si_then([](auto ret) { + return std::move(ret[0]); + }); + }).handle_error(crimson::ct_error::eagain::handle([] { + LBAMappingRef t = nullptr; + return t; + }), crimson::ct_error::pass_further_all{}).unsafe_get0(); + if (t.t->is_conflicted()) { + return nullptr; + } + test_mappings.dec_ref(o_laddr, t.mapping_delta); + EXPECT_FALSE(test_mappings.contains(o_laddr, t.mapping_delta)); + EXPECT_TRUE(pin); + EXPECT_EQ(pin->get_length(), new_len); + EXPECT_EQ(pin->get_key(), o_laddr + new_offset); + + auto extent = try_read_pin(t, pin->duplicate()); + if (extent) { + test_mappings.alloced(pin->get_key(), *extent, t.mapping_delta); + EXPECT_TRUE(extent->is_exist_clean()); + } else { + ceph_assert(t.t->is_conflicted()); + return nullptr; + } + return pin; + } + + using _overwrite_pin_iertr = TransactionManager::get_pin_iertr; + using _overwrite_pin_ret = _overwrite_pin_iertr::future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>; + _overwrite_pin_ret _overwrite_pin( + Transaction &t, + LBAMappingRef &&opin, + extent_len_t new_offset, + extent_len_t new_len, + ceph::bufferlist &bl) { + auto o_laddr = opin->get_key(); + auto o_len = opin->get_length(); + if (new_offset != 0 && o_len != new_offset + new_len) { + return tm->remap_pin<TestBlock, 2>( + t, + std::move(opin), + std::array{ + remap_entry( + 0, + new_offset), + remap_entry( + new_offset + new_len, + o_len - new_offset - new_len) + } + ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) { + return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len + ).si_then([this, ret = std::move(ret), new_len, + new_offset, o_laddr, &t, &bl](auto ext) mutable { + ceph_assert(ret.size() == 2); + auto iter = bl.cbegin(); + iter.copy(new_len, ext->get_bptr().c_str()); + auto r_laddr = o_laddr + new_offset + new_len; + // old pins expired after alloc new extent, need to get it. + return tm->get_pin(t, o_laddr + ).si_then([this, &t, ext = std::move(ext), r_laddr](auto lpin) mutable { + return tm->get_pin(t, r_laddr + ).si_then([lpin = std::move(lpin), ext = std::move(ext)] + (auto rpin) mutable { + return _overwrite_pin_iertr::make_ready_future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>( + std::make_tuple( + std::move(lpin), std::move(ext), std::move(rpin))); + }); + }); + }); + }); + } else if (new_offset == 0 && o_len != new_offset + new_len) { + return tm->remap_pin<TestBlock, 1>( + t, + std::move(opin), + std::array{ + remap_entry( + new_offset + new_len, + o_len - new_offset - new_len) + } + ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) { + return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len + ).si_then([this, ret = std::move(ret), new_offset, new_len, + o_laddr, &t, &bl](auto ext) mutable { + ceph_assert(ret.size() == 1); + auto iter = bl.cbegin(); + iter.copy(new_len, ext->get_bptr().c_str()); + auto r_laddr = o_laddr + new_offset + new_len; + return tm->get_pin(t, r_laddr + ).si_then([ext = std::move(ext)](auto rpin) mutable { + return _overwrite_pin_iertr::make_ready_future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>( + std::make_tuple( + nullptr, std::move(ext), std::move(rpin))); + }); + }); + }); + } else if (new_offset != 0 && o_len == new_offset + new_len) { + return tm->remap_pin<TestBlock, 1>( + t, + std::move(opin), + std::array{ + remap_entry( + 0, + new_offset) + } + ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) { + return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len + ).si_then([this, ret = std::move(ret), new_len, o_laddr, &t, &bl] + (auto ext) mutable { + ceph_assert(ret.size() == 1); + auto iter = bl.cbegin(); + iter.copy(new_len, ext->get_bptr().c_str()); + return tm->get_pin(t, o_laddr + ).si_then([ext = std::move(ext)](auto lpin) mutable { + return _overwrite_pin_iertr::make_ready_future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>( + std::make_tuple( + std::move(lpin), std::move(ext), nullptr)); + }); + }); + }); + } else { + ceph_abort("impossible"); + return _overwrite_pin_iertr::make_ready_future< + std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>( + std::make_tuple(nullptr, nullptr, nullptr)); + } + } + + using overwrite_pin_ret = std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>; + overwrite_pin_ret overwrite_pin( + test_transaction_t &t, + LBAMappingRef &&opin, + extent_len_t new_offset, + extent_len_t new_len, + ceph::bufferlist &bl) { + if (t.t->is_conflicted()) { + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + } + auto o_laddr = opin->get_key(); + auto o_paddr = opin->get_val(); + auto o_len = opin->get_length(); + auto res = with_trans_intr(*(t.t), [&](auto& trans) { + return _overwrite_pin( + trans, std::move(opin), new_offset, new_len, bl); + }).handle_error(crimson::ct_error::eagain::handle([] { + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + }), crimson::ct_error::pass_further_all{}).unsafe_get0(); + if (t.t->is_conflicted()) { + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + } + test_mappings.dec_ref(o_laddr, t.mapping_delta); + EXPECT_FALSE(test_mappings.contains(o_laddr, t.mapping_delta)); + auto &[lpin, ext, rpin] = res; + + EXPECT_TRUE(ext); + EXPECT_TRUE(lpin || rpin); + EXPECT_TRUE(o_len > ext->get_length()); + if (lpin) { + EXPECT_EQ(lpin->get_key(), o_laddr); + EXPECT_EQ(lpin->get_val(), o_paddr); + EXPECT_EQ(lpin->get_length(), new_offset); + auto lext = try_read_pin(t, lpin->duplicate()); + if (lext) { + test_mappings.alloced(lpin->get_key(), *lext, t.mapping_delta); + EXPECT_TRUE(lext->is_exist_clean()); + } else { + ceph_assert(t.t->is_conflicted()); + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + } + } + EXPECT_EQ(ext->get_laddr(), o_laddr + new_offset); + EXPECT_EQ(ext->get_length(), new_len); + test_mappings.alloced(ext->get_laddr(), *ext, t.mapping_delta); + if (rpin) { + EXPECT_EQ(rpin->get_key(), o_laddr + new_offset + new_len); + EXPECT_EQ(rpin->get_val(), o_paddr.add_offset(new_offset) + .add_offset(new_len)); + EXPECT_EQ(rpin->get_length(), o_len - new_offset - new_len); + auto rext = try_read_pin(t, rpin->duplicate()); + if (rext) { + test_mappings.alloced(rpin->get_key(), *rext, t.mapping_delta); + EXPECT_TRUE(rext->is_exist_clean()); + } else { + ceph_assert(t.t->is_conflicted()); + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + nullptr, nullptr, nullptr); + } + } + return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>( + std::move(lpin), std::move(ext), std::move(rpin)); + } + + void test_remap_pin() { + run_async([this] { + constexpr size_t l_offset = 32 << 10; + constexpr size_t l_len = 32 << 10; + constexpr size_t r_offset = 64 << 10; + constexpr size_t r_len = 32 << 10; + { + auto t = create_transaction(); + auto lext = alloc_extent(t, l_offset, l_len); + lext->set_contents('l', 0, 16 << 10); + auto rext = alloc_extent(t, r_offset, r_len); + rext->set_contents('r', 16 << 10, 16 << 10); + submit_transaction(std::move(t)); + } + { + auto t = create_transaction(); + auto lpin = get_pin(t, l_offset); + auto rpin = get_pin(t, r_offset); + //split left + auto pin1 = remap_pin(t, std::move(lpin), 0, 16 << 10); + ASSERT_TRUE(pin1); + auto pin2 = remap_pin(t, std::move(pin1), 0, 8 << 10); + ASSERT_TRUE(pin2); + auto pin3 = remap_pin(t, std::move(pin2), 0, 4 << 10); + ASSERT_TRUE(pin3); + auto lext = get_extent(t, pin3->get_key(), pin3->get_length()); + EXPECT_EQ('l', lext->get_bptr().c_str()[0]); + auto mlext = mutate_extent(t, lext); + ASSERT_TRUE(mlext->is_exist_mutation_pending()); + ASSERT_TRUE(mlext.get() == lext.get()); + + //split right + auto pin4 = remap_pin(t, std::move(rpin), 16 << 10, 16 << 10); + ASSERT_TRUE(pin4); + auto pin5 = remap_pin(t, std::move(pin4), 8 << 10, 8 << 10); + ASSERT_TRUE(pin5); + auto pin6 = remap_pin(t, std::move(pin5), 4 << 10, 4 << 10); + ASSERT_TRUE(pin6); + auto rext = get_extent(t, pin6->get_key(), pin6->get_length()); + EXPECT_EQ('r', rext->get_bptr().c_str()[0]); + auto mrext = mutate_extent(t, rext); + ASSERT_TRUE(mrext->is_exist_mutation_pending()); + ASSERT_TRUE(mrext.get() == rext.get()); + + submit_transaction(std::move(t)); + check(); + } + replay(); + check(); + }); + } + + void test_overwrite_pin() { + run_async([this] { + constexpr size_t m_offset = 8 << 10; + constexpr size_t m_len = 56 << 10; + constexpr size_t l_offset = 64 << 10; + constexpr size_t l_len = 64 << 10; + constexpr size_t r_offset = 128 << 10; + constexpr size_t r_len = 64 << 10; + { + auto t = create_transaction(); + auto m_ext = alloc_extent(t, m_offset, m_len); + m_ext->set_contents('a', 0 << 10, 8 << 10); + m_ext->set_contents('b', 16 << 10, 4 << 10); + m_ext->set_contents('c', 36 << 10, 4 << 10); + m_ext->set_contents('d', 52 << 10, 4 << 10); + + auto l_ext = alloc_extent(t, l_offset, l_len); + auto r_ext = alloc_extent(t, r_offset, r_len); + submit_transaction(std::move(t)); + } + { + auto t = create_transaction(); + auto mpin = get_pin(t, m_offset); + auto lpin = get_pin(t, l_offset); + auto rpin = get_pin(t, r_offset); + + bufferlist mbl1, mbl2, mbl3; + mbl1.append(ceph::bufferptr(ceph::buffer::create(8 << 10, 0))); + mbl2.append(ceph::bufferptr(ceph::buffer::create(16 << 10, 0))); + mbl3.append(ceph::bufferptr(ceph::buffer::create(12 << 10, 0))); + auto [mlp1, mext1, mrp1] = overwrite_pin( + t, std::move(mpin), 8 << 10 , 8 << 10, mbl1); + auto [mlp2, mext2, mrp2] = overwrite_pin( + t, std::move(mrp1), 4 << 10 , 16 << 10, mbl2); + auto [mlpin3, me3, mrpin3] = overwrite_pin( + t, std::move(mrp2), 4 << 10 , 12 << 10, mbl3); + auto mlext1 = get_extent(t, mlp1->get_key(), mlp1->get_length()); + auto mlext2 = get_extent(t, mlp2->get_key(), mlp2->get_length()); + auto mlext3 = get_extent(t, mlpin3->get_key(), mlpin3->get_length()); + auto mrext3 = get_extent(t, mrpin3->get_key(), mrpin3->get_length()); + EXPECT_EQ('a', mlext1->get_bptr().c_str()[0]); + EXPECT_EQ('b', mlext2->get_bptr().c_str()[0]); + EXPECT_EQ('c', mlext3->get_bptr().c_str()[0]); + EXPECT_EQ('d', mrext3->get_bptr().c_str()[0]); + auto mutate_mlext1 = mutate_extent(t, mlext1); + auto mutate_mlext2 = mutate_extent(t, mlext2); + auto mutate_mlext3 = mutate_extent(t, mlext3); + auto mutate_mrext3 = mutate_extent(t, mrext3); + ASSERT_TRUE(mutate_mlext1->is_exist_mutation_pending()); + ASSERT_TRUE(mutate_mlext2->is_exist_mutation_pending()); + ASSERT_TRUE(mutate_mlext3->is_exist_mutation_pending()); + ASSERT_TRUE(mutate_mrext3->is_exist_mutation_pending()); + ASSERT_TRUE(mutate_mlext1.get() == mlext1.get()); + ASSERT_TRUE(mutate_mlext2.get() == mlext2.get()); + ASSERT_TRUE(mutate_mlext3.get() == mlext3.get()); + ASSERT_TRUE(mutate_mrext3.get() == mrext3.get()); + + bufferlist lbl1, rbl1; + lbl1.append(ceph::bufferptr(ceph::buffer::create(32 << 10, 0))); + auto [llp1, lext1, lrp1] = overwrite_pin( + t, std::move(lpin), 0 , 32 << 10, lbl1); + EXPECT_FALSE(llp1); + EXPECT_TRUE(lrp1); + EXPECT_TRUE(lext1); + + rbl1.append(ceph::bufferptr(ceph::buffer::create(32 << 10, 0))); + auto [rlp1, rext1, rrp1] = overwrite_pin( + t, std::move(rpin), 32 << 10 , 32 << 10, rbl1); + EXPECT_TRUE(rlp1); + EXPECT_TRUE(rext1); + EXPECT_FALSE(rrp1); + + submit_transaction(std::move(t)); + check(); + } + replay(); + check(); + }); + } + + void test_remap_pin_concurrent() { + run_async([this] { + constexpr unsigned REMAP_NUM = 32; + constexpr size_t offset = 0; + constexpr size_t length = 256 << 10; + { + auto t = create_transaction(); + auto extent = alloc_extent(t, offset, length); + ASSERT_EQ(length, extent->get_length()); + submit_transaction(std::move(t)); + } + int success = 0; + int early_exit = 0; + int conflicted = 0; + + seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(REMAP_NUM), + [&](auto) { + return seastar::async([&] { + uint32_t pieces = std::uniform_int_distribution<>(6, 31)(gen); + std::set<uint32_t> split_points; + for (uint32_t i = 0; i < pieces; i++) { + auto p = std::uniform_int_distribution<>(1, 256)(gen); + split_points.insert(p - p % 4); + } + + auto t = create_transaction(); + auto pin0 = try_get_pin(t, offset); + if (!pin0 || pin0->get_length() != length) { + early_exit++; + return; + } + + auto last_pin = pin0->duplicate(); + ASSERT_TRUE(!split_points.empty()); + for (auto off : split_points) { + if (off == 0 || off >= 255) { + continue; + } + auto new_off = (off << 10) - last_pin->get_key(); + auto new_len = last_pin->get_length() - new_off; + //always remap right extent at new split_point + auto pin = remap_pin(t, std::move(last_pin), new_off, new_len); + if (!pin) { + conflicted++; + return; + } + last_pin = pin->duplicate(); + } + auto last_ext = try_get_extent(t, last_pin->get_key()); + if (last_ext) { + auto last_ext1 = mutate_extent(t, last_ext); + ASSERT_TRUE(last_ext1->is_exist_mutation_pending()); + } else { + conflicted++; + return; + } + + if (try_submit_transaction(std::move(t))) { + success++; + logger().info("transaction {} submit the transction", + static_cast<void*>(t.t.get())); + } else { + conflicted++; + } + }); + }).handle_exception([](std::exception_ptr e) { + logger().info("{}", e); + }).get0(); + logger().info("test_remap_pin_concurrent: " + "early_exit {} conflicted {} success {}", + early_exit, conflicted, success); + ASSERT_TRUE(success == 1); + ASSERT_EQ(success + conflicted + early_exit, REMAP_NUM); + replay(); + check(); + }); + } + + void test_overwrite_pin_concurrent() { + run_async([this] { + constexpr unsigned REMAP_NUM = 32; + constexpr size_t offset = 0; + constexpr size_t length = 256 << 10; + { + auto t = create_transaction(); + auto extent = alloc_extent(t, offset, length); + ASSERT_EQ(length, extent->get_length()); + submit_transaction(std::move(t)); + } + int success = 0; + int early_exit = 0; + int conflicted = 0; + + seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(REMAP_NUM), + [&](auto) { + return seastar::async([&] { + uint32_t pieces = std::uniform_int_distribution<>(6, 31)(gen); + if (pieces % 2 == 1) { + pieces++; + } + std::list<uint32_t> split_points; + for (uint32_t i = 0; i < pieces; i++) { + auto p = std::uniform_int_distribution<>(1, 120)(gen); + split_points.push_back(p - p % 4); + } + split_points.sort(); + + auto t = create_transaction(); + auto pin0 = try_get_pin(t, offset); + if (!pin0 || pin0->get_length() != length) { + early_exit++; + return; + } + + auto empty_transaction = true; + auto last_rpin = pin0->duplicate(); + ASSERT_TRUE(!split_points.empty()); + while(!split_points.empty()) { + // new overwrite area: start_off ~ end_off + auto start_off = split_points.front(); + split_points.pop_front(); + auto end_off = split_points.front(); + split_points.pop_front(); + ASSERT_TRUE(start_off <= end_off); + if (((end_off << 10) == pin0->get_key() + pin0->get_length()) + || (start_off == end_off)) { + if (split_points.empty() && empty_transaction) { + early_exit++; + return; + } + continue; + } + empty_transaction = false; + auto new_off = (start_off << 10) - last_rpin->get_key(); + auto new_len = (end_off - start_off) << 10; + bufferlist bl; + bl.append(ceph::bufferptr(ceph::buffer::create(new_len, 0))); + auto [lpin, ext, rpin] = overwrite_pin( + t, last_rpin->duplicate(), new_off, new_len, bl); + if (!ext) { + conflicted++; + return; + } + // lpin is nullptr might not cause by confliction, + // it might just not exist. + if (lpin) { + auto lext = try_get_extent(t, lpin->get_key()); + if (!lext) { + conflicted++; + return; + } + if (get_random_contents() % 2 == 0) { + auto lext1 = mutate_extent(t, lext); + ASSERT_TRUE(lext1->is_exist_mutation_pending()); + } + } + ASSERT_TRUE(rpin); + last_rpin = rpin->duplicate(); + } + auto last_rext = try_get_extent(t, last_rpin->get_key()); + if (!last_rext) { + conflicted++; + return; + } + if (get_random_contents() % 2 == 0) { + auto last_rext1 = mutate_extent(t, last_rext); + ASSERT_TRUE(last_rext1->is_exist_mutation_pending()); + } + + if (try_submit_transaction(std::move(t))) { + success++; + logger().info("transaction {} submit the transction", + static_cast<void*>(t.t.get())); + } else { + conflicted++; + } + }); + }).handle_exception([](std::exception_ptr e) { + logger().info("{}", e); + }).get0(); + logger().info("test_overwrite_pin_concurrent: " + "early_exit {} conflicted {} success {}", + early_exit, conflicted, success); + ASSERT_TRUE(success == 1 || early_exit == REMAP_NUM); + ASSERT_EQ(success + conflicted + early_exit, REMAP_NUM); + replay(); + check(); + }); + } +}; + +struct tm_single_device_test_t : + public transaction_manager_test_t { + + tm_single_device_test_t() : transaction_manager_test_t(1, 0) {} +}; + +struct tm_multi_device_test_t : + public transaction_manager_test_t { + + tm_multi_device_test_t() : transaction_manager_test_t(3, 0) {} +}; + +struct tm_multi_tier_device_test_t : + public transaction_manager_test_t { + + tm_multi_tier_device_test_t() : transaction_manager_test_t(1, 2) {} +}; + +TEST_P(tm_single_device_test_t, basic) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + }); +} + +TEST_P(tm_single_device_test_t, mutate) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + ASSERT_TRUE(check_usage()); + replay(); + { + auto t = create_transaction(); + auto ext = get_extent( + t, + ADDR, + SIZE); + auto mut = mutate_extent(t, ext); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + ASSERT_TRUE(check_usage()); + replay(); + check(); + }); +} + +TEST_P(tm_single_device_test_t, allocate_lba_conflict) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + constexpr laddr_t ADDR2 = 0xFE * SIZE; + auto t = create_transaction(); + auto t2 = create_transaction(); + + // These should conflict as they should both modify the lba root + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + check(); + + auto extent2 = alloc_extent( + t2, + ADDR2, + SIZE, + 'a'); + ASSERT_EQ(ADDR2, extent2->get_laddr()); + check_mappings(t2); + extent2.reset(); + + submit_transaction(std::move(t2)); + submit_transaction_expect_conflict(std::move(t)); + }); +} + +TEST_P(tm_single_device_test_t, mutate_lba_conflict) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + { + auto t = create_transaction(); + for (unsigned i = 0; i < 300; ++i) { + auto extent = alloc_extent( + t, + laddr_t(i * SIZE), + SIZE); + } + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + + constexpr laddr_t ADDR = 150 * SIZE; + { + auto t = create_transaction(); + auto t2 = create_transaction(); + + mutate_addr(t, ADDR, SIZE); + mutate_addr(t2, ADDR, SIZE); + + submit_transaction(std::move(t)); + submit_transaction_expect_conflict(std::move(t2)); + } + check(); + + { + auto t = create_transaction(); + mutate_addr(t, ADDR, SIZE); + submit_transaction(std::move(t)); + } + check(); + }); +} + +TEST_P(tm_single_device_test_t, concurrent_mutate_lba_no_conflict) +{ + constexpr laddr_t SIZE = 4096; + constexpr size_t NUM = 500; + constexpr laddr_t addr = 0; + constexpr laddr_t addr2 = SIZE * (NUM - 1); + run_async([this] { + { + auto t = create_transaction(); + for (unsigned i = 0; i < NUM; ++i) { + auto extent = alloc_extent( + t, + laddr_t(i * SIZE), + SIZE); + } + submit_transaction(std::move(t)); + } + + { + auto t = create_transaction(); + auto t2 = create_transaction(); + + mutate_addr(t, addr, SIZE); + mutate_addr(t2, addr2, SIZE); + + submit_transaction(std::move(t)); + submit_transaction(std::move(t2)); + } + check(); + }); +} + +TEST_P(tm_single_device_test_t, create_remove_same_transaction) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + dec_ref(t, ADDR); + check_mappings(t); + + extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + + submit_transaction(std::move(t)); + check(); + } + replay(); + check(); + }); +} + +TEST_P(tm_single_device_test_t, split_merge_read_same_transaction) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + { + auto t = create_transaction(); + for (unsigned i = 0; i < 300; ++i) { + auto extent = alloc_extent( + t, + laddr_t(i * SIZE), + SIZE); + } + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + { + auto t = create_transaction(); + for (unsigned i = 0; i < 240; ++i) { + dec_ref( + t, + laddr_t(i * SIZE)); + } + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + }); +} + +TEST_P(tm_single_device_test_t, inc_dec_ref) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + constexpr laddr_t ADDR = 0xFF * SIZE; + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + ADDR, + SIZE, + 'a'); + ASSERT_EQ(ADDR, extent->get_laddr()); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + replay(); + { + auto t = create_transaction(); + inc_ref(t, ADDR); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + { + auto t = create_transaction(); + dec_ref(t, ADDR); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + replay(); + { + auto t = create_transaction(); + dec_ref(t, ADDR); + check_mappings(t); + check(); + submit_transaction(std::move(t)); + check(); + } + }); +} + +TEST_P(tm_single_device_test_t, cause_lba_split) +{ + constexpr laddr_t SIZE = 4096; + run_async([this] { + for (unsigned i = 0; i < 200; ++i) { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + i * SIZE, + SIZE, + (char)(i & 0xFF)); + ASSERT_EQ(i * SIZE, extent->get_laddr()); + submit_transaction(std::move(t)); + } + check(); + }); +} + +TEST_P(tm_single_device_test_t, random_writes) +{ + constexpr size_t TOTAL = 4<<20; + constexpr size_t BSIZE = 4<<10; + constexpr size_t PADDING_SIZE = 256<<10; + constexpr size_t BLOCKS = TOTAL / BSIZE; + run_async([this] { + for (unsigned i = 0; i < BLOCKS; ++i) { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + i * BSIZE, + BSIZE); + ASSERT_EQ(i * BSIZE, extent->get_laddr()); + submit_transaction(std::move(t)); + } + + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = 0; j < 65; ++j) { + auto t = create_transaction(); + for (unsigned k = 0; k < 2; ++k) { + auto ext = get_extent( + t, + get_random_laddr(BSIZE, TOTAL), + BSIZE); + auto mut = mutate_extent(t, ext); + // pad out transaction + auto padding = alloc_extent( + t, + TOTAL + (k * PADDING_SIZE), + PADDING_SIZE); + dec_ref(t, padding->get_laddr()); + } + submit_transaction(std::move(t)); + } + replay(); + logger().info("random_writes: {} checking", i); + check(); + logger().info("random_writes: {} done replaying/checking", i); + } + }); +} + +TEST_P(tm_single_device_test_t, find_hole_assert_trigger) +{ + constexpr unsigned max = 10; + constexpr size_t BSIZE = 4<<10; + int num = 40; + run([&, this] { + return seastar::parallel_for_each( + boost::make_counting_iterator(0u), + boost::make_counting_iterator(max), + [&, this](auto idx) { + return allocate_sequentially(BSIZE, num); + }); + }); +} + +TEST_P(tm_single_device_test_t, remap_lazy_read) +{ + constexpr laddr_t offset = 0; + constexpr size_t length = 256 << 10; + run_async([this, offset] { + { + auto t = create_transaction(); + auto extent = alloc_extent( + t, + offset, + length, + 'a'); + ASSERT_EQ(offset, extent->get_laddr()); + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + replay(); + { + auto t = create_transaction(); + auto pin = get_pin(t, offset); + auto rpin = remap_pin(t, std::move(pin), 0, 128 << 10); + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + replay(); + { + auto t = create_transaction(); + auto pin = get_pin(t, offset); + bufferlist bl; + bl.append(ceph::bufferptr(ceph::buffer::create(64 << 10, 0))); + auto [lpin, ext, rpin] = overwrite_pin( + t, std::move(pin), 4 << 10 , 64 << 10, bl); + check_mappings(t); + submit_transaction(std::move(t)); + check(); + } + replay(); + }); +} + +TEST_P(tm_single_device_test_t, random_writes_concurrent) +{ + test_random_writes_concurrent(); +} + +TEST_P(tm_multi_device_test_t, random_writes_concurrent) +{ + test_random_writes_concurrent(); +} + +TEST_P(tm_multi_tier_device_test_t, evict) +{ + test_evict(); +} + +TEST_P(tm_single_device_test_t, parallel_extent_read) +{ + test_parallel_extent_read(); +} + +TEST_P(tm_single_device_test_t, test_remap_pin) +{ + test_remap_pin(); +} + +TEST_P(tm_single_device_test_t, test_overwrite_pin) +{ + test_overwrite_pin(); +} + +TEST_P(tm_single_device_test_t, test_remap_pin_concurrent) +{ + test_remap_pin_concurrent(); +} + +TEST_P(tm_single_device_test_t, test_overwrite_pin_concurrent) +{ + test_overwrite_pin_concurrent(); +} + +INSTANTIATE_TEST_SUITE_P( + transaction_manager_test, + tm_single_device_test_t, + ::testing::Values ( + "segmented", + "circularbounded" + ) +); + +INSTANTIATE_TEST_SUITE_P( + transaction_manager_test, + tm_multi_device_test_t, + ::testing::Values ( + "segmented" + ) +); + +INSTANTIATE_TEST_SUITE_P( + transaction_manager_test, + tm_multi_tier_device_test_t, + ::testing::Values ( + "segmented" + ) +); diff --git a/src/test/crimson/seastore/transaction_manager_test_state.h b/src/test/crimson/seastore/transaction_manager_test_state.h new file mode 100644 index 000000000..81200b1db --- /dev/null +++ b/src/test/crimson/seastore/transaction_manager_test_state.h @@ -0,0 +1,450 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <random> +#include <boost/iterator/counting_iterator.hpp> + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/extent_placement_manager.h" +#include "crimson/os/seastore/logging.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager/ephemeral.h" +#include "crimson/os/seastore/seastore.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/collection_manager/flat_collection_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h" +#include "crimson/os/seastore/random_block_manager/rbm_device.h" +#include "crimson/os/seastore/journal/circular_bounded_journal.h" +#include "crimson/os/seastore/random_block_manager/block_rb_manager.h" +#ifdef UNIT_TESTS_BUILT +#include "test/crimson/gtest_seastar.h" +#endif + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; + +class EphemeralDevices { +public: + virtual seastar::future<> setup() = 0; + virtual void remount() = 0; + virtual std::size_t get_num_devices() const = 0; + virtual void reset() = 0; + virtual std::vector<Device*> get_secondary_devices() = 0; + virtual ~EphemeralDevices() {} + virtual Device* get_primary_device() = 0; + virtual DeviceRef get_primary_device_ref() = 0; + virtual void set_primary_device_ref(DeviceRef) = 0; +}; +using EphemeralDevicesRef = std::unique_ptr<EphemeralDevices>; + +class EphemeralSegmentedDevices : public EphemeralDevices { + segment_manager::EphemeralSegmentManagerRef segment_manager; + std::list<segment_manager::EphemeralSegmentManagerRef> secondary_segment_managers; + std::size_t num_main_device_managers; + std::size_t num_cold_device_managers; + +public: + EphemeralSegmentedDevices(std::size_t num_main_devices, + std::size_t num_cold_devices) + : num_main_device_managers(num_main_devices), + num_cold_device_managers(num_cold_devices) + { + auto num_device_managers = num_main_device_managers + num_cold_device_managers; + assert(num_device_managers > 0); + secondary_segment_managers.resize(num_device_managers - 1); + } + + seastar::future<> setup() final { + segment_manager = segment_manager::create_test_ephemeral(); + for (auto &sec_sm : secondary_segment_managers) { + sec_sm = segment_manager::create_test_ephemeral(); + } + return segment_manager->init( + ).safe_then([this] { + return crimson::do_for_each( + secondary_segment_managers.begin(), + secondary_segment_managers.end(), + [](auto &sec_sm) + { + return sec_sm->init(); + }); + }).safe_then([this] { + return segment_manager->mkfs( + segment_manager::get_ephemeral_device_config( + 0, num_main_device_managers, num_cold_device_managers)); + }).safe_then([this] { + return seastar::do_with(std::size_t(0), [this](auto &cnt) { + return crimson::do_for_each( + secondary_segment_managers.begin(), + secondary_segment_managers.end(), + [this, &cnt](auto &sec_sm) + { + ++cnt; + return sec_sm->mkfs( + segment_manager::get_ephemeral_device_config( + cnt, num_main_device_managers, num_cold_device_managers)); + }); + }); + }).handle_error( + crimson::ct_error::assert_all{} + ); + } + + void remount() final { + segment_manager->remount(); + for (auto &sec_sm : secondary_segment_managers) { + sec_sm->remount(); + } + } + + std::size_t get_num_devices() const final { + return secondary_segment_managers.size() + 1; + } + + void reset() final { + segment_manager.reset(); + for (auto &sec_sm : secondary_segment_managers) { + sec_sm.reset(); + } + } + + std::vector<Device*> get_secondary_devices() final { + std::vector<Device*> sec_devices; + for (auto &sec_sm : secondary_segment_managers) { + sec_devices.emplace_back(sec_sm.get()); + } + return sec_devices; + } + + Device* get_primary_device() final { + return segment_manager.get(); + } + DeviceRef get_primary_device_ref() final; + void set_primary_device_ref(DeviceRef) final; +}; + +class EphemeralRandomBlockDevices : public EphemeralDevices { + random_block_device::RBMDeviceRef rb_device; + std::list<random_block_device::RBMDeviceRef> secondary_rb_devices; + +public: + EphemeralRandomBlockDevices(std::size_t num_device_managers) { + assert(num_device_managers > 0); + secondary_rb_devices.resize(num_device_managers - 1); + } + + seastar::future<> setup() final { + rb_device = random_block_device::create_test_ephemeral(); + device_config_t config = get_rbm_ephemeral_device_config(0, 1); + return rb_device->mkfs(config).handle_error(crimson::ct_error::assert_all{}); + } + + void remount() final {} + + std::size_t get_num_devices() const final { + return secondary_rb_devices.size() + 1; + } + + void reset() final { + rb_device.reset(); + for (auto &sec_rb : secondary_rb_devices) { + sec_rb.reset(); + } + } + + std::vector<Device*> get_secondary_devices() final { + std::vector<Device*> sec_devices; + for (auto &sec_rb : secondary_rb_devices) { + sec_devices.emplace_back(sec_rb.get()); + } + return sec_devices; + } + + Device* get_primary_device() final { + return rb_device.get(); + } + DeviceRef get_primary_device_ref() final; + void set_primary_device_ref(DeviceRef) final; +}; + +class EphemeralTestState +#ifdef UNIT_TESTS_BUILT + : public ::testing::WithParamInterface<const char*> { +#else + { +#endif +protected: + journal_type_t journal_type; + size_t num_main_device_managers = 0; + size_t num_cold_device_managers = 0; + EphemeralDevicesRef devices; + bool secondary_is_cold; + EphemeralTestState(std::size_t num_main_device_managers, + std::size_t num_cold_device_managers) : + num_main_device_managers(num_main_device_managers), + num_cold_device_managers(num_cold_device_managers) {} + + virtual seastar::future<> _init() = 0; + + virtual seastar::future<> _destroy() = 0; + virtual seastar::future<> _teardown() = 0; + seastar::future<> teardown() { + return _teardown().then([this] { + return _destroy(); + }); + } + + virtual FuturizedStore::mkfs_ertr::future<> _mkfs() = 0; + virtual FuturizedStore::mount_ertr::future<> _mount() = 0; + + seastar::future<> restart_fut() { + LOG_PREFIX(EphemeralTestState::restart_fut); + SUBINFO(test, "begin ..."); + return teardown().then([this] { + devices->remount(); + return _init().then([this] { + return _mount().handle_error(crimson::ct_error::assert_all{}); + }); + }).then([FNAME] { + SUBINFO(test, "finish"); + }); + } + + void restart() { + restart_fut().get0(); + } + + seastar::future<> tm_setup() { + LOG_PREFIX(EphemeralTestState::tm_setup); +#ifdef UNIT_TESTS_BUILT + std::string j_type = GetParam(); +#else + std::string j_type = "segmented"; +#endif + if (j_type == "circularbounded") { + //TODO: multiple devices + ceph_assert(num_main_device_managers == 1); + ceph_assert(num_cold_device_managers == 0); + devices.reset(new EphemeralRandomBlockDevices(1)); + } else { + // segmented by default + devices.reset(new + EphemeralSegmentedDevices( + num_main_device_managers, num_cold_device_managers)); + } + SUBINFO(test, "begin with {} devices ...", devices->get_num_devices()); + return devices->setup( + ).then([this] { + return _init(); + }).then([this, FNAME] { + return _mkfs( + ).safe_then([this] { + return restart_fut(); + }).handle_error( + crimson::ct_error::assert_all{} + ).then([FNAME] { + SUBINFO(test, "finish"); + }); + }); + } + + seastar::future<> tm_teardown() { + LOG_PREFIX(EphemeralTestState::tm_teardown); + SUBINFO(test, "begin"); + return teardown().then([this, FNAME] { + devices->reset(); + SUBINFO(test, "finish"); + }); + } +}; + +class TMTestState : public EphemeralTestState { +protected: + TransactionManagerRef tm; + LBAManager *lba_manager; + Cache* cache; + ExtentPlacementManager *epm; + uint64_t seq = 0; + + TMTestState() : EphemeralTestState(1, 0) {} + + TMTestState(std::size_t num_main_devices, std::size_t num_cold_devices) + : EphemeralTestState(num_main_devices, num_cold_devices) {} + + virtual seastar::future<> _init() override { + auto sec_devices = devices->get_secondary_devices(); + auto p_dev = devices->get_primary_device(); + tm = make_transaction_manager(p_dev, sec_devices, true); + epm = tm->get_epm(); + lba_manager = tm->get_lba_manager(); + cache = tm->get_cache(); + return seastar::now(); + } + + virtual seastar::future<> _destroy() override { + epm = nullptr; + lba_manager = nullptr; + cache = nullptr; + tm.reset(); + return seastar::now(); + } + + virtual seastar::future<> _teardown() { + return tm->close().handle_error( + crimson::ct_error::assert_all{"Error in teardown"} + ); + } + + virtual FuturizedStore::mount_ertr::future<> _mount() { + return tm->mount( + ).handle_error( + crimson::ct_error::assert_all{"Error in mount"} + ).then([this] { + return epm->stop_background(); + }).then([this] { + return epm->run_background_work_until_halt(); + }); + } + + virtual FuturizedStore::mkfs_ertr::future<> _mkfs() { + return tm->mkfs( + ).handle_error( + crimson::ct_error::assert_all{"Error in mkfs"} + ); + } + + auto create_mutate_transaction() { + return tm->create_transaction( + Transaction::src_t::MUTATE, "test_mutate"); + } + + auto create_read_transaction() { + return tm->create_transaction( + Transaction::src_t::READ, "test_read"); + } + + auto create_weak_transaction() { + return tm->create_transaction( + Transaction::src_t::READ, "test_read_weak", true); + } + + auto submit_transaction_fut2(Transaction& t) { + return tm->submit_transaction(t); + } + + auto submit_transaction_fut(Transaction &t) { + return with_trans_intr( + t, + [this](auto &t) { + return tm->submit_transaction(t); + }); + } + auto submit_transaction_fut_with_seq(Transaction &t) { + using ertr = TransactionManager::base_iertr; + return with_trans_intr( + t, + [this](auto &t) { + return tm->submit_transaction(t + ).si_then([this] { + return ertr::make_ready_future<uint64_t>(seq++); + }); + }); + } + + void submit_transaction(TransactionRef t) { + submit_transaction_fut(*t).unsafe_get0(); + epm->run_background_work_until_halt().get0(); + } +}; + + +DeviceRef EphemeralSegmentedDevices::get_primary_device_ref() { + return std::move(segment_manager); +} + +DeviceRef EphemeralRandomBlockDevices::get_primary_device_ref() { + return std::move(rb_device); +} + +void EphemeralSegmentedDevices::set_primary_device_ref(DeviceRef dev) { + segment_manager = + segment_manager::EphemeralSegmentManagerRef( + static_cast<segment_manager::EphemeralSegmentManager*>(dev.release())); +} + +void EphemeralRandomBlockDevices::set_primary_device_ref(DeviceRef dev) { + rb_device = + random_block_device::RBMDeviceRef( + static_cast<random_block_device::RBMDevice*>(dev.release())); +} + +class SeaStoreTestState : public EphemeralTestState { + class TestMDStoreState { + std::map<std::string, std::string> md; + public: + class Store final : public SeaStore::MDStore { + TestMDStoreState &parent; + public: + Store(TestMDStoreState &parent) : parent(parent) {} + + write_meta_ret write_meta( + const std::string& key, const std::string& value) final { + parent.md[key] = value; + return seastar::now(); + } + + read_meta_ret read_meta(const std::string& key) final { + auto iter = parent.md.find(key); + if (iter != parent.md.end()) { + return read_meta_ret( + read_meta_ertr::ready_future_marker{}, + iter->second); + } else { + return read_meta_ret( + read_meta_ertr::ready_future_marker{}, + std::nullopt); + } + } + }; + Store get_mdstore() { + return Store(*this); + } + } mdstore_state; + +protected: + std::unique_ptr<SeaStore> seastore; + FuturizedStore::Shard *sharded_seastore; + + SeaStoreTestState() : EphemeralTestState(1, 0) {} + + virtual seastar::future<> _init() final { + seastore = make_test_seastore( + std::make_unique<TestMDStoreState::Store>(mdstore_state.get_mdstore())); + return seastore->test_start(devices->get_primary_device_ref() + ).then([this] { + sharded_seastore = &(seastore->get_sharded_store()); + }); + } + + virtual seastar::future<> _destroy() final { + devices->set_primary_device_ref(seastore->get_primary_device_ref()); + return seastore->stop().then([this] { + seastore.reset(); + }); + } + + virtual seastar::future<> _teardown() final { + return seastore->umount(); + } + + virtual FuturizedStore::mount_ertr::future<> _mount() final { + return seastore->test_mount(); + } + + virtual FuturizedStore::mkfs_ertr::future<> _mkfs() final { + return seastore->test_mkfs(uuid_d{}); + } +}; diff --git a/src/test/crimson/test_alien_echo.cc b/src/test/crimson/test_alien_echo.cc new file mode 100644 index 000000000..8bef5e651 --- /dev/null +++ b/src/test/crimson/test_alien_echo.cc @@ -0,0 +1,294 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#include "auth/Auth.h" +#include "messages/MPing.h" +#include "common/ceph_argparse.h" +#include "crimson/auth/DummyAuth.h" +#include "crimson/common/throttle.h" +#include "crimson/net/Connection.h" +#include "crimson/net/Dispatcher.h" +#include "crimson/net/Messenger.h" + +#include <seastar/core/alien.hh> +#include <seastar/core/app-template.hh> +#include <seastar/core/future-util.hh> +#include <seastar/core/internal/pollable_fd.hh> +#include <seastar/core/posix.hh> +#include <seastar/core/reactor.hh> + +using crimson::common::local_conf; + +enum class echo_role { + as_server, + as_client, +}; + +namespace seastar_pingpong { +struct DummyAuthAuthorizer : public AuthAuthorizer { + DummyAuthAuthorizer() + : AuthAuthorizer(CEPH_AUTH_CEPHX) + {} + bool verify_reply(bufferlist::const_iterator&, + std::string *connection_secret) override { + return true; + } + bool add_challenge(CephContext*, const bufferlist&) override { + return true; + } +}; + +struct Server { + crimson::common::Throttle byte_throttler; + crimson::net::MessengerRef msgr; + crimson::auth::DummyAuthClientServer dummy_auth; + struct ServerDispatcher final : crimson::net::Dispatcher { + unsigned count = 0; + seastar::condition_variable on_reply; + std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef c, + MessageRef m) final + { + std::cout << "server got ping " << *m << std::endl; + // reply with a pong + return c->send(crimson::make_message<MPing>()).then([this] { + ++count; + on_reply.signal(); + return seastar::now(); + }); + } + } dispatcher; + Server(crimson::net::MessengerRef msgr) + : byte_throttler(local_conf()->osd_client_message_size_cap), + msgr{msgr} + { } +}; + +struct Client { + crimson::common::Throttle byte_throttler; + crimson::net::MessengerRef msgr; + crimson::auth::DummyAuthClientServer dummy_auth; + struct ClientDispatcher final : crimson::net::Dispatcher { + unsigned count = 0; + seastar::condition_variable on_reply; + std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef c, + MessageRef m) final + { + std::cout << "client got pong " << *m << std::endl; + ++count; + on_reply.signal(); + return seastar::now(); + } + } dispatcher; + Client(crimson::net::MessengerRef msgr) + : byte_throttler(local_conf()->osd_client_message_size_cap), + msgr{msgr} + { } +}; +} // namespace seastar_pingpong + +class SeastarContext { + int begin_fd; + seastar::file_desc on_end; + +public: + SeastarContext() + : begin_fd{eventfd(0, 0)}, + on_end{seastar::file_desc::eventfd(0, 0)} + {} + + template<class Func> + std::thread with_seastar(Func&& func) { + return std::thread{[this, on_end = on_end.get(), + func = std::forward<Func>(func)] { + // alien: are you ready? + wait_for_seastar(); + // alien: could you help me apply(func)? + func(); + // alien: i've sent my request. have you replied it? + // wait_for_seastar(); + // alien: you are free to go! + ::eventfd_write(on_end, 1); + }}; + } + + void run(seastar::app_template& app, int argc, char** argv) { + app.run(argc, argv, [this] { + std::vector<const char*> args; + std::string cluster; + std::string conf_file_list; + auto init_params = ceph_argparse_early_args(args, + CEPH_ENTITY_TYPE_CLIENT, + &cluster, + &conf_file_list); + return crimson::common::sharded_conf().start(init_params.name, cluster) + .then([conf_file_list] { + return local_conf().parse_config_files(conf_file_list); + }).then([this] { + return set_seastar_ready(); + }).then([on_end = std::move(on_end)] () mutable { + // seastar: let me know once i am free to leave. + return seastar::do_with(seastar::pollable_fd(std::move(on_end)), [] + (seastar::pollable_fd& on_end_fds) { + return on_end_fds.readable().then([&on_end_fds] { + eventfd_t result = 0; + on_end_fds.get_file_desc().read(&result, sizeof(result)); + return seastar::make_ready_future<>(); + }); + }); + }).then([]() { + return crimson::common::sharded_conf().stop(); + }).handle_exception([](auto ep) { + std::cerr << "Error: " << ep << std::endl; + }).finally([] { + seastar::engine().exit(0); + }); + }); + } + + seastar::future<> set_seastar_ready() { + // seastar: i am ready to serve! + ::eventfd_write(begin_fd, 1); + return seastar::now(); + } + +private: + void wait_for_seastar() { + eventfd_t result = 0; + if (int r = ::eventfd_read(begin_fd, &result); r < 0) { + std::cerr << "unable to eventfd_read():" << errno << std::endl; + } + } +}; + +static seastar::future<> +seastar_echo(const entity_addr_t addr, echo_role role, unsigned count) +{ + std::cout << "seastar/"; + if (role == echo_role::as_server) { + return seastar::do_with( + seastar_pingpong::Server{crimson::net::Messenger::create( + entity_name_t::OSD(0), "server", addr.get_nonce(), true)}, + [addr, count](auto& server) mutable { + std::cout << "server listening at " << addr << std::endl; + // bind the server + server.msgr->set_default_policy(crimson::net::SocketPolicy::stateless_server(0)); + server.msgr->set_policy_throttler(entity_name_t::TYPE_OSD, + &server.byte_throttler); + server.msgr->set_auth_client(&server.dummy_auth); + server.msgr->set_auth_server(&server.dummy_auth); + return server.msgr->bind(entity_addrvec_t{addr} + ).safe_then([&server] { + return server.msgr->start({&server.dispatcher}); + }, crimson::net::Messenger::bind_ertr::all_same_way([](auto& e) { + ceph_abort_msg("bind failed"); + })).then([&dispatcher=server.dispatcher, count] { + return dispatcher.on_reply.wait([&dispatcher, count] { + return dispatcher.count >= count; + }); + }).finally([&server] { + std::cout << "server shutting down" << std::endl; + server.msgr->stop(); + return server.msgr->shutdown(); + }); + }); + } else { + return seastar::do_with( + seastar_pingpong::Client{crimson::net::Messenger::create( + entity_name_t::OSD(1), "client", addr.get_nonce(), true)}, + [addr, count](auto& client) { + std::cout << "client sending to " << addr << std::endl; + client.msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0)); + client.msgr->set_policy_throttler(entity_name_t::TYPE_OSD, + &client.byte_throttler); + client.msgr->set_auth_client(&client.dummy_auth); + client.msgr->set_auth_server(&client.dummy_auth); + return client.msgr->start({&client.dispatcher}).then( + [addr, &client, &disp=client.dispatcher, count] { + auto conn = client.msgr->connect(addr, entity_name_t::TYPE_OSD); + return seastar::do_until( + [&disp,count] { return disp.count >= count; }, + [&disp,conn] { + return conn->send(crimson::make_message<MPing>()).then([&] { + return disp.on_reply.wait(); + }); + } + ); + }).finally([&client] { + std::cout << "client shutting down" << std::endl; + client.msgr->stop(); + return client.msgr->shutdown(); + }); + }); + } +} + +int main(int argc, char** argv) +{ + namespace po = boost::program_options; + po::options_description desc{"Allowed options"}; + desc.add_options() + ("help,h", "show help message") + ("role", po::value<std::string>()->default_value("pong"), + "role to play (ping | pong)") + ("port", po::value<uint16_t>()->default_value(9010), + "port #") + ("nonce", po::value<uint32_t>()->default_value(42), + "a unique number to identify the pong server") + ("count", po::value<unsigned>()->default_value(10), + "stop after sending/echoing <count> MPing messages"); + po::variables_map vm; + std::vector<std::string> unrecognized_options; + try { + auto parsed = po::command_line_parser(argc, argv) + .options(desc) + .allow_unregistered() + .run(); + po::store(parsed, vm); + if (vm.count("help")) { + std::cout << desc << std::endl; + return 0; + } + po::notify(vm); + unrecognized_options = po::collect_unrecognized(parsed.options, po::include_positional); + } catch(const po::error& e) { + std::cerr << "error: " << e.what() << std::endl; + return 1; + } + + entity_addr_t addr; + addr.set_type(entity_addr_t::TYPE_MSGR2); + addr.set_family(AF_INET); + addr.set_port(vm["port"].as<std::uint16_t>()); + addr.set_nonce(vm["nonce"].as<std::uint32_t>()); + + echo_role role = echo_role::as_server; + if (vm["role"].as<std::string>() == "ping") { + role = echo_role::as_client; + } + + auto count = vm["count"].as<unsigned>(); + seastar::app_template app; + SeastarContext sc; + auto job = sc.with_seastar([&] { + auto fut = seastar::alien::submit_to(app.alien(), 0, [addr, role, count] { + return seastar_echo(addr, role, count); + }); + fut.wait(); + }); + std::vector<char*> av{argv[0]}; + std::transform(begin(unrecognized_options), + end(unrecognized_options), + std::back_inserter(av), + [](auto& s) { + return const_cast<char*>(s.c_str()); + }); + sc.run(app, av.size(), av.data()); + job.join(); +} + +/* + * Local Variables: + * compile-command: "make -j4 \ + * -C ../../../build \ + * unittest_seastar_echo" + * End: + */ diff --git a/src/test/crimson/test_alienstore_thread_pool.cc b/src/test/crimson/test_alienstore_thread_pool.cc new file mode 100644 index 000000000..dbeed26cd --- /dev/null +++ b/src/test/crimson/test_alienstore_thread_pool.cc @@ -0,0 +1,78 @@ +#include <chrono> +#include <iostream> +#include <numeric> +#include <seastar/core/app-template.hh> +#include "common/ceph_argparse.h" +#include "crimson/common/config_proxy.h" +#include "crimson/os/alienstore/thread_pool.h" +#include "include/msgr.h" + +using namespace std::chrono_literals; +using ThreadPool = crimson::os::ThreadPool; +using crimson::common::local_conf; + +seastar::future<> test_accumulate(ThreadPool& tp) { + static constexpr auto N = 5; + static constexpr auto M = 1; + auto slow_plus = [&tp](int i) { + return tp.submit(::rand() % 2, [=] { + std::this_thread::sleep_for(10ns); + return i + M; + }); + }; + return seastar::map_reduce( + boost::irange(0, N), slow_plus, 0, std::plus{}).then([] (int sum) { + auto r = boost::irange(0 + M, N + M); + if (sum != std::accumulate(r.begin(), r.end(), 0)) { + throw std::runtime_error("test_accumulate failed"); + } + }); +} + +seastar::future<> test_void_return(ThreadPool& tp) { + return tp.submit(::rand() % 2, [=] { + std::this_thread::sleep_for(10ns); + }); +} + +int main(int argc, char** argv) +{ + seastar::app_template app; + return app.run(argc, argv, [] { + std::vector<const char*> args; + std::string cluster; + std::string conf_file_list; + auto init_params = ceph_argparse_early_args(args, + CEPH_ENTITY_TYPE_CLIENT, + &cluster, + &conf_file_list); + return crimson::common::sharded_conf().start(init_params.name, cluster) + .then([conf_file_list] { + return local_conf().parse_config_files(conf_file_list); + }).then([] { + return seastar::do_with(std::make_unique<crimson::os::ThreadPool>(2, 128, seastar::resource::cpuset{0}), + [](auto& tp) { + return tp->start().then([&tp] { + return test_accumulate(*tp); + }).then([&tp] { + return test_void_return(*tp); + }).finally([&tp] { + return tp->stop(); + }); + }); + }).finally([] { + return crimson::common::sharded_conf().stop(); + }).handle_exception([](auto e) { + std::cerr << "Error: " << e << std::endl; + seastar::engine().exit(1); + }); + }); +} + +/* + * Local Variables: + * compile-command: "make -j4 \ + * -C ../../../build \ + * unittest_seastar_thread_pool" + * End: + */ diff --git a/src/test/crimson/test_async_echo.cc b/src/test/crimson/test_async_echo.cc new file mode 100644 index 000000000..758bcf626 --- /dev/null +++ b/src/test/crimson/test_async_echo.cc @@ -0,0 +1,234 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#include <boost/program_options/variables_map.hpp> +#include <boost/program_options/parsers.hpp> + +#include "auth/Auth.h" +#include "global/global_init.h" +#include "messages/MPing.h" +#include "msg/Dispatcher.h" +#include "msg/Messenger.h" + +#include "auth/DummyAuth.h" + +enum class echo_role { + as_server, + as_client, +}; + +namespace native_pingpong { + +constexpr int CEPH_OSD_PROTOCOL = 10; + +struct Server { + Server(CephContext* cct, const entity_inst_t& entity) + : dummy_auth(cct), dispatcher(cct) + { + msgr.reset(Messenger::create(cct, "async", entity.name, "pong", entity.addr.get_nonce())); + dummy_auth.auth_registry.refresh_config(); + msgr->set_cluster_protocol(CEPH_OSD_PROTOCOL); + msgr->set_default_policy(Messenger::Policy::stateless_server(0)); + msgr->set_auth_client(&dummy_auth); + msgr->set_auth_server(&dummy_auth); + } + DummyAuthClientServer dummy_auth; + std::unique_ptr<Messenger> msgr; + struct ServerDispatcher : Dispatcher { + std::mutex mutex; + std::condition_variable on_reply; + bool replied = false; + ServerDispatcher(CephContext* cct) + : Dispatcher(cct) + {} + bool ms_can_fast_dispatch_any() const override { + return true; + } + bool ms_can_fast_dispatch(const Message* m) const override { + return m->get_type() == CEPH_MSG_PING; + } + void ms_fast_dispatch(Message* m) override { + m->get_connection()->send_message(new MPing); + m->put(); + { + std::lock_guard lock{mutex}; + replied = true; + } + on_reply.notify_one(); + } + bool ms_dispatch(Message*) override { + ceph_abort(); + } + bool ms_handle_reset(Connection*) override { + return true; + } + void ms_handle_remote_reset(Connection*) override { + } + bool ms_handle_refused(Connection*) override { + return true; + } + void echo() { + replied = false; + std::unique_lock lock{mutex}; + return on_reply.wait(lock, [this] { return replied; }); + } + } dispatcher; + void echo() { + dispatcher.echo(); + } +}; + +struct Client { + std::unique_ptr<Messenger> msgr; + Client(CephContext *cct) + : dummy_auth(cct), dispatcher(cct) + { + msgr.reset(Messenger::create(cct, "async", entity_name_t::CLIENT(-1), "ping", getpid())); + dummy_auth.auth_registry.refresh_config(); + msgr->set_cluster_protocol(CEPH_OSD_PROTOCOL); + msgr->set_default_policy(Messenger::Policy::lossy_client(0)); + msgr->set_auth_client(&dummy_auth); + msgr->set_auth_server(&dummy_auth); + } + DummyAuthClientServer dummy_auth; + struct ClientDispatcher : Dispatcher { + std::mutex mutex; + std::condition_variable on_reply; + bool replied = false; + + ClientDispatcher(CephContext* cct) + : Dispatcher(cct) + {} + bool ms_can_fast_dispatch_any() const override { + return true; + } + bool ms_can_fast_dispatch(const Message* m) const override { + return m->get_type() == CEPH_MSG_PING; + } + void ms_fast_dispatch(Message* m) override { + m->put(); + { + std::lock_guard lock{mutex}; + replied = true; + } + on_reply.notify_one(); + } + bool ms_dispatch(Message*) override { + ceph_abort(); + } + bool ms_handle_reset(Connection *) override { + return true; + } + void ms_handle_remote_reset(Connection*) override { + } + bool ms_handle_refused(Connection*) override { + return true; + } + bool ping(Messenger* msgr, const entity_inst_t& peer) { + using namespace std::chrono_literals; + auto conn = msgr->connect_to(peer.name.type(), + entity_addrvec_t{peer.addr}); + replied = false; + conn->send_message(new MPing); + std::unique_lock lock{mutex}; + return on_reply.wait_for(lock, 500ms, [&] { + return replied; + }); + } + } dispatcher; + void ping(const entity_inst_t& peer) { + dispatcher.ping(msgr.get(), peer); + } +}; +} // namespace native_pingpong + +static void ceph_echo(CephContext* cct, + entity_addr_t addr, echo_role role, unsigned count) +{ + std::cout << "ceph/"; + entity_inst_t entity{entity_name_t::OSD(0), addr}; + if (role == echo_role::as_server) { + std::cout << "server listening at " << addr << std::endl; + native_pingpong::Server server{cct, entity}; + server.msgr->bind(addr); + server.msgr->add_dispatcher_head(&server.dispatcher); + server.msgr->start(); + for (unsigned i = 0; i < count; i++) { + server.echo(); + } + server.msgr->shutdown(); + server.msgr->wait(); + } else { + std::cout << "client sending to " << addr << std::endl; + native_pingpong::Client client{cct}; + client.msgr->add_dispatcher_head(&client.dispatcher); + client.msgr->start(); + auto conn = client.msgr->connect_to(entity.name.type(), + entity_addrvec_t{entity.addr}); + for (unsigned i = 0; i < count; i++) { + std::cout << "seq=" << i << std::endl; + client.ping(entity); + } + client.msgr->shutdown(); + client.msgr->wait(); + } +} + +int main(int argc, char** argv) +{ + namespace po = boost::program_options; + po::options_description desc{"Allowed options"}; + desc.add_options() + ("help,h", "show help message") + ("role", po::value<std::string>()->default_value("pong"), + "role to play (ping | pong)") + ("port", po::value<uint16_t>()->default_value(9010), + "port #") + ("nonce", po::value<uint32_t>()->default_value(42), + "a unique number to identify the pong server") + ("count", po::value<unsigned>()->default_value(10), + "stop after sending/echoing <count> MPing messages") + ("v2", po::value<bool>()->default_value(false), + "using msgr v2 protocol"); + po::variables_map vm; + std::vector<std::string> unrecognized_options; + try { + auto parsed = po::command_line_parser(argc, argv) + .options(desc) + .allow_unregistered() + .run(); + po::store(parsed, vm); + if (vm.count("help")) { + std::cout << desc << std::endl; + return 0; + } + po::notify(vm); + unrecognized_options = po::collect_unrecognized(parsed.options, po::include_positional); + } catch(const po::error& e) { + std::cerr << "error: " << e.what() << std::endl; + return 1; + } + + entity_addr_t addr; + if (vm["v2"].as<bool>()) { + addr.set_type(entity_addr_t::TYPE_MSGR2); + } else { + addr.set_type(entity_addr_t::TYPE_LEGACY); + } + addr.set_family(AF_INET); + addr.set_port(vm["port"].as<std::uint16_t>()); + addr.set_nonce(vm["nonce"].as<std::uint32_t>()); + + echo_role role = echo_role::as_server; + if (vm["role"].as<std::string>() == "ping") { + role = echo_role::as_client; + } + + auto count = vm["count"].as<unsigned>(); + std::vector<const char*> args(argv, argv + argc); + auto cct = global_init(nullptr, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_MON_CONFIG); + common_init_finish(cct.get()); + ceph_echo(cct.get(), addr, role, count); +} diff --git a/src/test/crimson/test_backfill.cc b/src/test/crimson/test_backfill.cc new file mode 100644 index 000000000..6d7d62ce5 --- /dev/null +++ b/src/test/crimson/test_backfill.cc @@ -0,0 +1,501 @@ +#include <algorithm> +#include <cstdlib> +#include <deque> +#include <functional> +#include <initializer_list> +#include <iostream> +#include <iterator> +#include <limits> +#include <map> +#include <set> +#include <string> + +#include <boost/statechart/event_base.hpp> +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +#include "common/hobject.h" +#include "crimson/osd/backfill_state.h" +#include "osd/recovery_types.h" + + +// The sole purpose is to convert from the string representation. +// An alternative approach could use boost::range in FakeStore's +// constructor. +struct improved_hobject_t : hobject_t { + improved_hobject_t(const char parsable_name[]) { + this->parse(parsable_name); + } + improved_hobject_t(const hobject_t& obj) + : hobject_t(obj) { + } + bool operator==(const improved_hobject_t& rhs) const { + return static_cast<const hobject_t&>(*this) == \ + static_cast<const hobject_t&>(rhs); + } +}; + + +struct FakeStore { + using objs_t = std::map<improved_hobject_t, eversion_t>; + + objs_t objs; + + void push(const hobject_t& obj, eversion_t version) { + objs[obj] = version; + } + + void drop(const hobject_t& obj, const eversion_t version) { + auto it = objs.find(obj); + ceph_assert(it != std::end(objs)); + ceph_assert(it->second == version); + objs.erase(it); + } + + template <class Func> + hobject_t list(const hobject_t& start, Func&& per_entry) const { + auto it = objs.lower_bound(start); + for (auto max = std::numeric_limits<std::uint64_t>::max(); + it != std::end(objs) && max > 0; + ++it, --max) { + per_entry(*it); + } + return it != std::end(objs) ? static_cast<const hobject_t&>(it->first) + : hobject_t::get_max(); + } + + bool operator==(const FakeStore& rhs) const { + return std::size(objs) == std::size(rhs.objs) && \ + std::equal(std::begin(objs), std::end(objs), std::begin(rhs.objs)); + } + bool operator!=(const FakeStore& rhs) const { + return !(*this == rhs); + } +}; + + +struct FakeReplica { + FakeStore store; + hobject_t last_backfill; + + FakeReplica(FakeStore&& store) + : store(std::move(store)) { + } +}; + +struct FakePrimary { + FakeStore store; + eversion_t last_update; + eversion_t projected_last_update; + eversion_t log_tail; + + FakePrimary(FakeStore&& store) + : store(std::move(store)) { + } +}; + +class BackfillFixture : public crimson::osd::BackfillState::BackfillListener { + friend class BackfillFixtureBuilder; + + FakePrimary backfill_source; + std::map<pg_shard_t, FakeReplica> backfill_targets; + std::map<pg_shard_t, + std::vector<std::pair<hobject_t, eversion_t>>> enqueued_drops; + std::deque< + boost::intrusive_ptr< + const boost::statechart::event_base>> events_to_dispatch; + crimson::osd::BackfillState backfill_state; + + BackfillFixture(FakePrimary&& backfill_source, + std::map<pg_shard_t, FakeReplica>&& backfill_targets); + + template <class EventT> + void schedule_event(const EventT& event) { + events_to_dispatch.emplace_back(event.intrusive_from_this()); + } + + // BackfillListener { + void request_replica_scan( + const pg_shard_t& target, + const hobject_t& begin, + const hobject_t& end) override; + + void request_primary_scan( + const hobject_t& begin) override; + + void enqueue_push( + const hobject_t& obj, + const eversion_t& v) override; + + void enqueue_drop( + const pg_shard_t& target, + const hobject_t& obj, + const eversion_t& v) override; + + void maybe_flush() override; + + void update_peers_last_backfill( + const hobject_t& new_last_backfill) override; + + bool budget_available() const override; + +public: + MOCK_METHOD(void, backfilled, (), (override)); + // } + + void next_round(std::size_t how_many=1) { + ceph_assert(events_to_dispatch.size() >= how_many); + while (how_many-- > 0) { + backfill_state.process_event(std::move(events_to_dispatch.front())); + events_to_dispatch.pop_front(); + } + } + + void next_till_done() { + while (!events_to_dispatch.empty()) { + next_round(); + } + } + + bool all_stores_look_like(const FakeStore& reference) const { + const bool all_replica_match = std::all_of( + std::begin(backfill_targets), std::end(backfill_targets), + [&reference] (const auto kv) { + return kv.second.store == reference; + }); + return backfill_source.store == reference && all_replica_match; + } + + struct PeeringFacade; + struct PGFacade; +}; + +struct BackfillFixture::PeeringFacade + : public crimson::osd::BackfillState::PeeringFacade { + FakePrimary& backfill_source; + std::map<pg_shard_t, FakeReplica>& backfill_targets; + // sorry, this is duplicative but that's the interface + std::set<pg_shard_t> backfill_targets_as_set; + + PeeringFacade(FakePrimary& backfill_source, + std::map<pg_shard_t, FakeReplica>& backfill_targets) + : backfill_source(backfill_source), + backfill_targets(backfill_targets) { + std::transform( + std::begin(backfill_targets), std::end(backfill_targets), + std::inserter(backfill_targets_as_set, std::end(backfill_targets_as_set)), + [](auto pair) { + return pair.first; + }); + } + + hobject_t earliest_backfill() const override { + hobject_t e = hobject_t::get_max(); + for (const auto& kv : backfill_targets) { + e = std::min(kv.second.last_backfill, e); + } + return e; + } + const std::set<pg_shard_t>& get_backfill_targets() const override { + return backfill_targets_as_set; + } + const hobject_t& get_peer_last_backfill(pg_shard_t peer) const override { + return backfill_targets.at(peer).last_backfill; + } + const eversion_t& get_last_update() const override { + return backfill_source.last_update; + } + const eversion_t& get_log_tail() const override { + return backfill_source.log_tail; + } + + void scan_log_after(eversion_t, scan_log_func_t) const override { + /* NOP */ + } + + bool is_backfill_target(pg_shard_t peer) const override { + return backfill_targets.count(peer) == 1; + } + void update_complete_backfill_object_stats(const hobject_t &hoid, + const pg_stat_t &stats) override { + } + bool is_backfilling() const override { + return true; + } +}; + +struct BackfillFixture::PGFacade : public crimson::osd::BackfillState::PGFacade { + FakePrimary& backfill_source; + + PGFacade(FakePrimary& backfill_source) + : backfill_source(backfill_source) { + } + + const eversion_t& get_projected_last_update() const override { + return backfill_source.projected_last_update; + } +}; + +BackfillFixture::BackfillFixture( + FakePrimary&& backfill_source, + std::map<pg_shard_t, FakeReplica>&& backfill_targets) + : backfill_source(std::move(backfill_source)), + backfill_targets(std::move(backfill_targets)), + backfill_state(*this, + std::make_unique<PeeringFacade>(this->backfill_source, + this->backfill_targets), + std::make_unique<PGFacade>(this->backfill_source)) +{ + backfill_state.process_event(crimson::osd::BackfillState::Triggered{}.intrusive_from_this()); +} + +void BackfillFixture::request_replica_scan( + const pg_shard_t& target, + const hobject_t& begin, + const hobject_t& end) +{ + BackfillInterval bi; + bi.end = backfill_targets.at(target).store.list(begin, [&bi](auto kv) { + bi.objects.insert(std::move(kv)); + }); + bi.begin = begin; + bi.version = backfill_source.last_update; + + schedule_event(crimson::osd::BackfillState::ReplicaScanned{ target, std::move(bi) }); +} + +void BackfillFixture::request_primary_scan( + const hobject_t& begin) +{ + BackfillInterval bi; + bi.end = backfill_source.store.list(begin, [&bi](auto kv) { + bi.objects.insert(std::move(kv)); + }); + bi.begin = begin; + bi.version = backfill_source.last_update; + + schedule_event(crimson::osd::BackfillState::PrimaryScanned{ std::move(bi) }); +} + +void BackfillFixture::enqueue_push( + const hobject_t& obj, + const eversion_t& v) +{ + for (auto& [ _, bt ] : backfill_targets) { + bt.store.push(obj, v); + } + schedule_event(crimson::osd::BackfillState::ObjectPushed{ obj }); +} + +void BackfillFixture::enqueue_drop( + const pg_shard_t& target, + const hobject_t& obj, + const eversion_t& v) +{ + enqueued_drops[target].emplace_back(obj, v); +} + +void BackfillFixture::maybe_flush() +{ + for (const auto& [target, versioned_objs] : enqueued_drops) { + for (const auto& [obj, v] : versioned_objs) { + backfill_targets.at(target).store.drop(obj, v); + } + } + enqueued_drops.clear(); +} + +void BackfillFixture::update_peers_last_backfill( + const hobject_t& new_last_backfill) +{ +} + +bool BackfillFixture::budget_available() const +{ + return true; +} + +struct BackfillFixtureBuilder { + FakeStore backfill_source; + std::map<pg_shard_t, FakeReplica> backfill_targets; + + static BackfillFixtureBuilder add_source(FakeStore::objs_t objs) { + BackfillFixtureBuilder bfb; + bfb.backfill_source = FakeStore{ std::move(objs) }; + return bfb; + } + + BackfillFixtureBuilder&& add_target(FakeStore::objs_t objs) && { + const auto new_osd_num = std::size(backfill_targets); + const auto [ _, inserted ] = backfill_targets.emplace( + new_osd_num, FakeReplica{ FakeStore{std::move(objs)} }); + ceph_assert(inserted); + return std::move(*this); + } + + BackfillFixture get_result() && { + return BackfillFixture{ std::move(backfill_source), + std::move(backfill_targets) }; + } +}; + +// The straightest case: single primary, single replica. All have the same +// content in their object stores, so the entire backfill boils into just +// `request_primary_scan()` and `request_replica_scan()`. +TEST(backfill, same_primary_same_replica) +{ + const auto reference_store = FakeStore{ { + { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} }, + { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} }, + { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} }, + }}; + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + reference_store.objs + ).get_result(); + + cluster_fixture.next_round(); + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_round(); + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} + +TEST(backfill, one_empty_replica) +{ + const auto reference_store = FakeStore{ { + { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} }, + { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} }, + { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} }, + }}; + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + { /* nothing */ } + ).get_result(); + + cluster_fixture.next_round(); + cluster_fixture.next_round(); + cluster_fixture.next_round(2); + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_round(); + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} + +TEST(backfill, two_empty_replicas) +{ + const auto reference_store = FakeStore{ { + { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} }, + { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} }, + { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} }, + }}; + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + { /* nothing 1 */ } + ).add_target( + { /* nothing 2 */ } + ).get_result(); + + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_till_done(); + + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} + +namespace StoreRandomizer { + // FIXME: copied & pasted from test/test_snap_mapper.cc. We need to + // find a way to avoid code duplication in test. A static library? + std::string random_string(std::size_t size) { + std::string name; + for (size_t j = 0; j < size; ++j) { + name.push_back('a' + (std::rand() % 26)); + } + return name; + } + + hobject_t random_hobject() { + uint32_t mask{0}; + uint32_t bits{0}; + return hobject_t( + random_string(1+(std::rand() % 16)), + random_string(1+(std::rand() % 16)), + snapid_t(std::rand() % 1000), + (std::rand() & ((~0)<<bits)) | (mask & ~((~0)<<bits)), + 0, random_string(std::rand() % 16)); + } + + eversion_t random_eversion() { + return eversion_t{ std::rand() % 512U, std::rand() % 256UL }; + } + + FakeStore create() { + FakeStore store; + for (std::size_t i = std::rand() % 2048; i > 0; --i) { + store.push(random_hobject(), random_eversion()); + } + return store; + } + + template <class... Args> + void execute_random(Args&&... args) { + std::array<std::function<void()>, sizeof...(Args)> funcs = { + std::forward<Args>(args)... + }; + return std::move(funcs[std::rand() % std::size(funcs)])(); + } + + FakeStore mutate(const FakeStore& source_store) { + FakeStore mutated_store; + source_store.list(hobject_t{}, [&] (const auto& kv) { + const auto &oid = kv.first; + const auto &version = kv.second; + execute_random( + [] { /* just drop the entry */ }, + [&] { mutated_store.push(oid, version); }, + [&] { mutated_store.push(oid, random_eversion()); }, + [&] { mutated_store.push(random_hobject(), version); }, + [&] { + for (auto how_many = std::rand() % 8; how_many > 0; --how_many) { + mutated_store.push(random_hobject(), random_eversion()); + } + } + ); + }); + return mutated_store; + } +} + +// The name might suggest randomness is involved here. Well, that's true +// but till we know the seed the test still is repeatable. +TEST(backfill, one_pseudorandomized_replica) +{ + const auto reference_store = StoreRandomizer::create(); + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + StoreRandomizer::mutate(reference_store).objs + ).get_result(); + + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_till_done(); + + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} + +TEST(backfill, two_pseudorandomized_replicas) +{ + const auto reference_store = StoreRandomizer::create(); + auto cluster_fixture = BackfillFixtureBuilder::add_source( + reference_store.objs + ).add_target( + StoreRandomizer::mutate(reference_store).objs + ).add_target( + StoreRandomizer::mutate(reference_store).objs + ).get_result(); + + EXPECT_CALL(cluster_fixture, backfilled); + cluster_fixture.next_till_done(); + + EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store)); +} diff --git a/src/test/crimson/test_buffer.cc b/src/test/crimson/test_buffer.cc new file mode 100644 index 000000000..64a815bd2 --- /dev/null +++ b/src/test/crimson/test_buffer.cc @@ -0,0 +1,50 @@ +#include <iostream> +#include <seastar/core/app-template.hh> +#include <seastar/core/future-util.hh> +#include <seastar/core/reactor.hh> +#include "include/buffer.h" + +// allocate a foreign buffer on each cpu, collect them all into a bufferlist, +// and destruct it on this cpu +seastar::future<> test_foreign_bufferlist() +{ + auto make_foreign_buffer = [] (unsigned cpu) { + return seastar::smp::submit_to(cpu, [=] { + bufferlist bl; + seastar::temporary_buffer<char> buf("abcd", 4); + bl.append(buffer::create(std::move(buf))); + return bl; + }); + }; + auto reduce = [] (bufferlist&& lhs, bufferlist&& rhs) { + bufferlist bl; + bl.claim_append(lhs); + bl.claim_append(rhs); + return bl; + }; + return seastar::map_reduce(seastar::smp::all_cpus(), make_foreign_buffer, + bufferlist(), reduce).then( + [] (bufferlist&& bl) { + if (bl.length() != 4 * seastar::smp::count) { + auto e = std::make_exception_ptr(std::runtime_error("wrong buffer size")); + return seastar::make_exception_future<>(e); + } + bl.clear(); + return seastar::make_ready_future<>(); + }); +} + +int main(int argc, char** argv) +{ + seastar::app_template app; + return app.run(argc, argv, [] { + return seastar::now().then( + &test_foreign_bufferlist + ).then([] { + std::cout << "All tests succeeded" << std::endl; + }).handle_exception([] (auto eptr) { + std::cout << "Test failure" << std::endl; + return seastar::make_exception_future<>(eptr); + }); + }); +} diff --git a/src/test/crimson/test_config.cc b/src/test/crimson/test_config.cc new file mode 100644 index 000000000..7541c0931 --- /dev/null +++ b/src/test/crimson/test_config.cc @@ -0,0 +1,109 @@ +#include <chrono> +#include <string> +#include <numeric> +#include <seastar/core/app-template.hh> +#include <seastar/core/sharded.hh> +#include "common/ceph_argparse.h" +#include "common/config_obs.h" +#include "crimson/common/config_proxy.h" + +using namespace std::literals; +using Config = crimson::common::ConfigProxy; +const std::string test_uint_option = "osd_max_pgls"; +const uint64_t INVALID_VALUE = (uint64_t)(-1); +const uint64_t EXPECTED_VALUE = 42; + +class ConfigObs : public ceph::md_config_obs_impl<Config> { + uint64_t last_change = INVALID_VALUE; + uint64_t num_changes = 0; + + const char** get_tracked_conf_keys() const override { + static const char* keys[] = { + test_uint_option.c_str(), + nullptr, + }; + return keys; + } + void handle_conf_change(const Config& conf, + const std::set <std::string> &changes) override{ + if (changes.count(test_uint_option)) { + last_change = conf.get_val<uint64_t>(test_uint_option); + num_changes += 1; + } + } +public: + ConfigObs() { + crimson::common::local_conf().add_observer(this); + } + + uint64_t get_last_change() const { return last_change; } + uint64_t get_num_changes() const { return num_changes; } + seastar::future<> stop() { + crimson::common::local_conf().remove_observer(this); + return seastar::make_ready_future<>(); + } +}; + +seastar::sharded<ConfigObs> sharded_cobs; + +static seastar::future<> test_config() +{ + return crimson::common::sharded_conf().start(EntityName{}, "ceph"sv).then([] { + std::vector<const char*> args; + std::string cluster; + std::string conf_file_list; + auto init_params = ceph_argparse_early_args(args, + CEPH_ENTITY_TYPE_CLIENT, + &cluster, + &conf_file_list); + auto& conf = crimson::common::local_conf(); + conf->name = init_params.name; + conf->cluster = cluster; + return conf.parse_config_files(conf_file_list); + }).then([] { + return crimson::common::sharded_conf().invoke_on(0, &Config::start); + }).then([] { + return sharded_cobs.start(); + }).then([] { + auto& conf = crimson::common::local_conf(); + return conf.set_val(test_uint_option, std::to_string(EXPECTED_VALUE)); + }).then([] { + return crimson::common::sharded_conf().invoke_on_all([](Config& config) { + if (config.get_val<uint64_t>(test_uint_option) != EXPECTED_VALUE) { + throw std::runtime_error("configurations don't match"); + } + if (sharded_cobs.local().get_last_change() != EXPECTED_VALUE) { + throw std::runtime_error("last applied changes don't match the latest config"); + } + if (sharded_cobs.local().get_num_changes() != 1) { + throw std::runtime_error("num changes don't match actual changes"); + } + }); + }).finally([] { + return sharded_cobs.stop(); + }).finally([] { + return crimson::common::sharded_conf().stop(); + }); +} + +int main(int argc, char** argv) +{ + seastar::app_template app; + return app.run(argc, argv, [&] { + return test_config().then([] { + std::cout << "All tests succeeded" << std::endl; + }).handle_exception([] (auto eptr) { + std::cout << "Test failure" << std::endl; + return seastar::make_exception_future<>(eptr); + }); + }); +} + + +/* + * Local Variables: + * compile-command: "make -j4 \ + * -C ../../../build \ + * unittest_seastar_config" + * End: + */ diff --git a/src/test/crimson/test_denc.cc b/src/test/crimson/test_denc.cc new file mode 100644 index 000000000..10ebd6dce --- /dev/null +++ b/src/test/crimson/test_denc.cc @@ -0,0 +1,53 @@ +#include <string> +#include <seastar/core/temporary_buffer.hh> +#include <gtest/gtest.h> +#include "include/denc.h" +#include "common/buffer_seastar.h" + +using temporary_buffer = seastar::temporary_buffer<char>; +using buffer_iterator = seastar_buffer_iterator; +using const_buffer_iterator = const_seastar_buffer_iterator; + +template<typename T> +void test_denc(T v) { + // estimate + size_t s = 0; + denc(v, s); + ASSERT_NE(s, 0u); + + // encode + temporary_buffer buf{s}; + buffer_iterator enc{buf}; + denc(v, enc); + size_t len = enc.get() - buf.begin(); + ASSERT_LE(len, s); + + // decode + T out; + temporary_buffer encoded = buf.share(); + encoded.trim(len); + const_buffer_iterator dec{encoded}; + denc(out, dec); + ASSERT_EQ(v, out); + ASSERT_EQ(dec.get(), enc.get()); +} + +TEST(denc, simple) +{ + test_denc((uint8_t)4); + test_denc((int8_t)-5); + test_denc((uint16_t)6); + test_denc((int16_t)-7); + test_denc((uint32_t)8); + test_denc((int32_t)-9); + test_denc((uint64_t)10); + test_denc((int64_t)-11); +} + +TEST(denc, string) +{ + std::string a, b("hi"), c("multi\nline\n"); + test_denc(a); + test_denc(b); + test_denc(c); +} diff --git a/src/test/crimson/test_errorator.cc b/src/test/crimson/test_errorator.cc new file mode 100644 index 000000000..939c6cde8 --- /dev/null +++ b/src/test/crimson/test_errorator.cc @@ -0,0 +1,99 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#include <boost/iterator/counting_iterator.hpp> +#include <numeric> + +#include "test/crimson/gtest_seastar.h" + +#include "crimson/common/errorator.h" +#include "crimson/common/errorator-loop.h" +#include "crimson/common/log.h" +#include "seastar/core/sleep.hh" + +struct errorator_test_t : public seastar_test_suite_t { + using ertr = crimson::errorator<crimson::ct_error::invarg>; + ertr::future<> test_do_until() { + return crimson::repeat([i=0]() mutable { + if (i < 5) { + ++i; + return ertr::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::no); + } else { + return ertr::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::yes); + } + }); + } + static constexpr int SIZE = 42; + ertr::future<> test_parallel_for_each() { + auto sum = std::make_unique<int>(0); + return ertr::parallel_for_each( + boost::make_counting_iterator(0), + boost::make_counting_iterator(SIZE), + [sum=sum.get()](int i) { + *sum += i; + }).safe_then([sum=std::move(sum)] { + int expected = std::accumulate(boost::make_counting_iterator(0), + boost::make_counting_iterator(SIZE), + 0); + ASSERT_EQ(*sum, expected); + }); + } + struct noncopyable_t { + constexpr noncopyable_t() = default; + ~noncopyable_t() = default; + noncopyable_t(noncopyable_t&&) = default; + private: + noncopyable_t(const noncopyable_t&) = delete; + noncopyable_t& operator=(const noncopyable_t&) = delete; + }; + ertr::future<> test_non_copy_then() { + return create_noncopyable().safe_then([](auto t) { + return ertr::now(); + }); + } + ertr::future<int> test_futurization() { + // we don't want to be enforced to always do `make_ready_future(...)`. + // as in seastar::future, the futurization should take care about + // turning non-future types (e.g. int) into futurized ones (e.g. + // ertr::future<int>). + return ertr::now().safe_then([] { + return 42; + }).safe_then([](int life) { + return ertr::make_ready_future<int>(life); + }); + } +private: + ertr::future<noncopyable_t> create_noncopyable() { + return ertr::make_ready_future<noncopyable_t>(); + } +}; + +TEST_F(errorator_test_t, basic) +{ + run_async([this] { + test_do_until().unsafe_get0(); + }); +} + +TEST_F(errorator_test_t, parallel_for_each) +{ + run_async([this] { + test_parallel_for_each().unsafe_get0(); + }); +} + +TEST_F(errorator_test_t, non_copy_then) +{ + run_async([this] { + test_non_copy_then().unsafe_get0(); + }); +} + +TEST_F(errorator_test_t, test_futurization) +{ + run_async([this] { + test_futurization().unsafe_get0(); + }); +} diff --git a/src/test/crimson/test_fixed_kv_node_layout.cc b/src/test/crimson/test_fixed_kv_node_layout.cc new file mode 100644 index 000000000..e6377ec14 --- /dev/null +++ b/src/test/crimson/test_fixed_kv_node_layout.cc @@ -0,0 +1,376 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <stdio.h> +#include <iostream> + +#include "gtest/gtest.h" + +#include "crimson/common/fixed_kv_node_layout.h" + +using namespace crimson; +using namespace crimson::common; + +struct test_val_t { + uint32_t t1 = 0; + int32_t t2 = 0; + + bool operator==(const test_val_t &rhs) const { + return rhs.t1 == t1 && rhs.t2 == t2; + } + bool operator!=(const test_val_t &rhs) const { + return !(*this == rhs); + } +}; + +struct test_val_le_t { + ceph_le32 t1{0}; + ceph_les32 t2{0}; + + test_val_le_t() = default; + test_val_le_t(const test_val_le_t &) = default; + test_val_le_t(const test_val_t &nv) + : t1(nv.t1), t2(nv.t2) {} + + operator test_val_t() const { + return test_val_t{t1, t2}; + } +}; + +struct test_meta_t { + uint32_t t1 = 0; + uint32_t t2 = 0; + + bool operator==(const test_meta_t &rhs) const { + return rhs.t1 == t1 && rhs.t2 == t2; + } + bool operator!=(const test_meta_t &rhs) const { + return !(*this == rhs); + } + + std::pair<test_meta_t, test_meta_t> split_into(uint32_t pivot) const { + return std::make_pair( + test_meta_t{t1, pivot}, + test_meta_t{pivot, t2}); + } + + static test_meta_t merge_from(const test_meta_t &lhs, const test_meta_t &rhs) { + return test_meta_t{lhs.t1, rhs.t2}; + } + + static std::pair<test_meta_t, test_meta_t> + rebalance(const test_meta_t &lhs, const test_meta_t &rhs, uint32_t pivot) { + return std::make_pair( + test_meta_t{lhs.t1, pivot}, + test_meta_t{pivot, rhs.t2}); + } +}; + +struct test_meta_le_t { + ceph_le32 t1{0}; + ceph_le32 t2{0}; + + test_meta_le_t() = default; + test_meta_le_t(const test_meta_le_t &) = default; + test_meta_le_t(const test_meta_t &nv) + : t1(nv.t1), t2(nv.t2) {} + + operator test_meta_t() const { + return test_meta_t{t1, t2}; + } +}; + +constexpr size_t CAPACITY = 339; + +struct TestNode : FixedKVNodeLayout< + CAPACITY, + test_meta_t, test_meta_le_t, + uint32_t, ceph_le32, + test_val_t, test_val_le_t> { + char buf[4096]; + TestNode() : FixedKVNodeLayout(buf) { + memset(buf, 0, sizeof(buf)); + set_meta({0, std::numeric_limits<uint32_t>::max()}); + } + TestNode(const TestNode &rhs) + : FixedKVNodeLayout(buf) { + ::memcpy(buf, rhs.buf, sizeof(buf)); + } + + TestNode &operator=(const TestNode &rhs) { + memcpy(buf, rhs.buf, sizeof(buf)); + return *this; + } +}; + +TEST(FixedKVNodeTest, basic) { + auto node = TestNode(); + ASSERT_EQ(node.get_size(), 0); + + auto val = test_val_t{ 1, 1 }; + node.journal_insert(node.begin(), 1, val, nullptr); + ASSERT_EQ(node.get_size(), 1); + + auto iter = node.begin(); + ASSERT_EQ(iter.get_key(), 1); + ASSERT_EQ(val, iter.get_val()); + + ASSERT_EQ(std::numeric_limits<uint32_t>::max(), iter.get_next_key_or_max()); +} + +TEST(FixedKVNodeTest, at_capacity) { + auto node = TestNode(); + ASSERT_EQ(CAPACITY, node.get_capacity()); + + ASSERT_EQ(node.get_size(), 0); + + unsigned short num = 0; + auto iter = node.begin(); + while (num < CAPACITY) { + node.journal_insert(iter, num, test_val_t{num, num}, nullptr); + ++num; + ++iter; + } + ASSERT_EQ(node.get_size(), CAPACITY); + + num = 0; + for (auto &i : node) { + ASSERT_EQ(i.get_key(), num); + ASSERT_EQ(i.get_val(), (test_val_t{num, num})); + if (num < (CAPACITY - 1)) { + ASSERT_EQ(i.get_next_key_or_max(), num + 1); + } else { + ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max()); + } + ++num; + } +} + +TEST(FixedKVNodeTest, split) { + auto node = TestNode(); + + ASSERT_EQ(node.get_size(), 0); + + unsigned short num = 0; + auto iter = node.begin(); + while (num < CAPACITY) { + node.journal_insert(iter, num, test_val_t{num, num}, nullptr); + ++num; + ++iter; + } + ASSERT_EQ(node.get_size(), CAPACITY); + + auto split_left = TestNode(); + auto split_right = TestNode(); + node.split_into(split_left, split_right); + + ASSERT_EQ(split_left.get_size() + split_right.get_size(), CAPACITY); + ASSERT_EQ(split_left.get_meta().t1, split_left.begin()->get_key()); + ASSERT_EQ(split_left.get_meta().t2, split_right.get_meta().t1); + ASSERT_EQ(split_right.get_meta().t2, std::numeric_limits<uint32_t>::max()); + + num = 0; + for (auto &i : split_left) { + ASSERT_EQ(i.get_key(), num); + ASSERT_EQ(i.get_val(), (test_val_t{num, num})); + if (num < split_left.get_size() - 1) { + ASSERT_EQ(i.get_next_key_or_max(), num + 1); + } else { + ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max()); + } + ++num; + } + for (auto &i : split_right) { + ASSERT_EQ(i.get_key(), num); + ASSERT_EQ(i.get_val(), (test_val_t{num, num})); + if (num < CAPACITY - 1) { + ASSERT_EQ(i.get_next_key_or_max(), num + 1); + } else { + ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max()); + } + ++num; + } + ASSERT_EQ(num, CAPACITY); +} + +TEST(FixedKVNodeTest, merge) { + auto node = TestNode(); + auto node2 = TestNode(); + + ASSERT_EQ(node.get_size(), 0); + ASSERT_EQ(node2.get_size(), 0); + + unsigned short num = 0; + auto iter = node.begin(); + while (num < CAPACITY/2) { + node.journal_insert(iter, num, test_val_t{num, num}, nullptr); + ++num; + ++iter; + } + node.set_meta({0, num}); + node2.set_meta({num, std::numeric_limits<uint32_t>::max()}); + iter = node2.begin(); + while (num < (2 * (CAPACITY / 2))) { + node2.journal_insert(iter, num, test_val_t{num, num}, nullptr); + ++num; + ++iter; + } + + ASSERT_EQ(node.get_size(), CAPACITY / 2); + ASSERT_EQ(node2.get_size(), CAPACITY / 2); + + auto total = node.get_size() + node2.get_size(); + + auto node_merged = TestNode(); + node_merged.merge_from(node, node2); + + ASSERT_EQ( + node_merged.get_meta(), + (test_meta_t{0, std::numeric_limits<uint32_t>::max()})); + + ASSERT_EQ(node_merged.get_size(), total); + num = 0; + for (auto &i : node_merged) { + ASSERT_EQ(i.get_key(), num); + ASSERT_EQ(i.get_val(), (test_val_t{num, num})); + if (num < node_merged.get_size() - 1) { + ASSERT_EQ(i.get_next_key_or_max(), num + 1); + } else { + ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max()); + } + ++num; + } + ASSERT_EQ(num, total); +} + +void run_balance_test(unsigned left, unsigned right, bool prefer_left) +{ + auto node = TestNode(); + auto node2 = TestNode(); + + ASSERT_EQ(node.get_size(), 0); + ASSERT_EQ(node2.get_size(), 0); + + unsigned short num = 0; + auto iter = node.begin(); + while (num < left) { + node.journal_insert(iter, num, test_val_t{num, num}, nullptr); + ++num; + ++iter; + } + node.set_meta({0, num}); + node2.set_meta({num, std::numeric_limits<uint32_t>::max()}); + iter = node2.begin(); + while (num < (left + right)) { + node2.journal_insert(iter, num, test_val_t{num, num}, nullptr); + ++num; + ++iter; + } + + ASSERT_EQ(node.get_size(), left); + ASSERT_EQ(node2.get_size(), right); + + auto total = node.get_size() + node2.get_size(); + + auto node_balanced = TestNode(); + auto node_balanced2 = TestNode(); + auto pivot = TestNode::balance_into_new_nodes( + node, + node2, + prefer_left, + node_balanced, + node_balanced2); + + ASSERT_EQ(total, node_balanced.get_size() + node_balanced2.get_size()); + + unsigned left_size, right_size; + if (total % 2) { + if (prefer_left) { + left_size = (total/2) + 1; + right_size = total/2; + } else { + left_size = total/2; + right_size = (total/2) + 1; + } + } else { + left_size = right_size = total/2; + } + ASSERT_EQ(pivot, left_size); + ASSERT_EQ(left_size, node_balanced.get_size()); + ASSERT_EQ(right_size, node_balanced2.get_size()); + + ASSERT_EQ( + node_balanced.get_meta(), + (test_meta_t{0, left_size})); + ASSERT_EQ( + node_balanced2.get_meta(), + (test_meta_t{left_size, std::numeric_limits<uint32_t>::max()})); + + num = 0; + for (auto &i: node_balanced) { + ASSERT_EQ(i.get_key(), num); + ASSERT_EQ(i.get_val(), (test_val_t{num, num})); + if (num < node_balanced.get_size() - 1) { + ASSERT_EQ(i.get_next_key_or_max(), num + 1); + } else { + ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max()); + } + ++num; + } + for (auto &i: node_balanced2) { + ASSERT_EQ(i.get_key(), num); + ASSERT_EQ(i.get_val(), (test_val_t{num, num})); + if (num < total - 1) { + ASSERT_EQ(i.get_next_key_or_max(), num + 1); + } else { + ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max()); + } + ++num; + } +} + +TEST(FixedKVNodeTest, balanced) { + run_balance_test(CAPACITY / 2, CAPACITY, true); + run_balance_test(CAPACITY / 2, CAPACITY, false); + run_balance_test(CAPACITY, CAPACITY / 2, true); + run_balance_test(CAPACITY, CAPACITY / 2, false); + run_balance_test(CAPACITY - 1, CAPACITY / 2, true); + run_balance_test(CAPACITY / 2, CAPACITY - 1, false); + run_balance_test(CAPACITY / 2, CAPACITY / 2, false); +} + +void run_replay_test( + std::vector<std::function<void(TestNode&, TestNode::delta_buffer_t&)>> &&f +) { + TestNode node; + for (unsigned i = 0; i < f.size(); ++i) { + TestNode::delta_buffer_t buf; + TestNode replayed = node; + f[i](node, buf); + buf.replay(replayed); + ASSERT_EQ(node.get_size(), replayed.get_size()); + ASSERT_EQ(node, replayed); + } +} + +TEST(FixedKVNodeTest, replay) { + run_replay_test({ + [](auto &n, auto &b) { + n.journal_insert(n.lower_bound(1), 1, test_val_t{1, 1}, &b); + ASSERT_EQ(1, n.get_size()); + }, + [](auto &n, auto &b) { + n.journal_insert(n.lower_bound(3), 3, test_val_t{1, 2}, &b); + ASSERT_EQ(2, n.get_size()); + }, + [](auto &n, auto &b) { + n.journal_remove(n.find(3), &b); + ASSERT_EQ(1, n.get_size()); + }, + [](auto &n, auto &b) { + n.journal_insert(n.lower_bound(2), 2, test_val_t{5, 1}, &b); + ASSERT_EQ(2, n.get_size()); + } + }); + +} diff --git a/src/test/crimson/test_interruptible_future.cc b/src/test/crimson/test_interruptible_future.cc new file mode 100644 index 000000000..bb938de24 --- /dev/null +++ b/src/test/crimson/test_interruptible_future.cc @@ -0,0 +1,301 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <seastar/core/sleep.hh> + +#include "test/crimson/gtest_seastar.h" + +#include "crimson/common/interruptible_future.h" +#include "crimson/common/log.h" + +using namespace crimson; + +class test_interruption : public std::exception +{}; + +class TestInterruptCondition { +public: + TestInterruptCondition(bool interrupt) + : interrupt(interrupt) {} + + template <typename T> + std::optional<T> may_interrupt() { + if (interrupt) { + return seastar::futurize<T>::make_exception_future(test_interruption()); + } else { + return std::optional<T>(); + } + } + + template <typename T> + static constexpr bool is_interruption_v = std::is_same_v<T, test_interruption>; + + static bool is_interruption(std::exception_ptr& eptr) { + if (*eptr.__cxa_exception_type() == typeid(test_interruption)) + return true; + return false; + } +private: + bool interrupt = false; +}; + +namespace crimson::interruptible { +template +thread_local interrupt_cond_t<TestInterruptCondition> +interrupt_cond<TestInterruptCondition>; +} + +TEST_F(seastar_test_suite_t, basic) +{ + using interruptor = + interruptible::interruptor<TestInterruptCondition>; + run_async([] { + interruptor::with_interruption( + [] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return interruptor::make_interruptible(seastar::now()) + .then_interruptible([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + }).then_interruptible([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return errorator<ct_error::enoent>::make_ready_future<>(); + }).safe_then_interruptible([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return seastar::now(); + }, errorator<ct_error::enoent>::all_same_way([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + }) + ); + }, [](std::exception_ptr) {}, false).get0(); + + interruptor::with_interruption( + [] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return interruptor::make_interruptible(seastar::now()) + .then_interruptible([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + }); + }, [](std::exception_ptr) { + ceph_assert(!interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return seastar::now(); + }, true).get0(); + + + }); +} + +TEST_F(seastar_test_suite_t, loops) +{ + using interruptor = + interruptible::interruptor<TestInterruptCondition>; + std::cout << "testing interruptible loops" << std::endl; + run_async([] { + std::cout << "beginning" << std::endl; + interruptor::with_interruption( + [] { + std::cout << "interruptiion enabled" << std::endl; + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return interruptor::make_interruptible(seastar::now()) + .then_interruptible([] { + std::cout << "test seastar future do_for_each" << std::endl; + std::vector<int> vec = {1, 2}; + return seastar::do_with(std::move(vec), [](auto& vec) { + return interruptor::do_for_each(std::begin(vec), std::end(vec), [](int) { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return seastar::now(); + }); + }); + }).then_interruptible([] { + std::cout << "test interruptible seastar future do_for_each" << std::endl; + std::vector<int> vec = {1, 2}; + return seastar::do_with(std::move(vec), [](auto& vec) { + return interruptor::do_for_each(std::begin(vec), std::end(vec), [](int) { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return interruptor::make_interruptible(seastar::now()); + }); + }); + }).then_interruptible([] { + std::cout << "test seastar future repeat" << std::endl; + return interruptor::repeat([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return interruptor::make_interruptible( + seastar::make_ready_future< + seastar::stop_iteration>( + seastar::stop_iteration::yes)); + }); + }).then_interruptible([] { + std::cout << "test interruptible seastar future repeat" << std::endl; + return interruptor::repeat([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return seastar::make_ready_future< + seastar::stop_iteration>( + seastar::stop_iteration::yes); + }); + }).then_interruptible([] { + std::cout << "test interruptible errorated future do_for_each" << std::endl; + std::vector<int> vec = {1, 2}; + return seastar::do_with(std::move(vec), [](auto& vec) { + using namespace std::chrono_literals; + return interruptor::make_interruptible(seastar::now()).then_interruptible([&vec] { + return interruptor::do_for_each(std::begin(vec), std::end(vec), [](int) { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return interruptor::make_interruptible( + errorator<ct_error::enoent>::make_ready_future<>()); + }).safe_then_interruptible([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return seastar::now(); + }, errorator<ct_error::enoent>::all_same_way([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + })); + }); + }); + }).then_interruptible([] { + std::cout << "test errorated future do_for_each" << std::endl; + std::vector<int> vec; + // set a big enough iteration times to test if there is stack overflow in do_for_each + for (int i = 0; i < 1000000; i++) { + vec.push_back(i); + } + return seastar::do_with(std::move(vec), [](auto& vec) { + using namespace std::chrono_literals; + return interruptor::make_interruptible(seastar::now()).then_interruptible([&vec] { + return interruptor::do_for_each(std::begin(vec), std::end(vec), [](int) { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return errorator<ct_error::enoent>::make_ready_future<>(); + }).safe_then_interruptible([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return seastar::now(); + }, errorator<ct_error::enoent>::all_same_way([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + })); + }); + }); + }).then_interruptible([] { + ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond); + return seastar::now(); + }); + }, [](std::exception_ptr) {}, false).get0(); + }); +} + +using base_intr = interruptible::interruptor<TestInterruptCondition>; + +using base_ertr = errorator<ct_error::enoent, ct_error::eagain>; +using base_iertr = interruptible::interruptible_errorator< + TestInterruptCondition, + base_ertr>; + +using base2_ertr = base_ertr::extend<ct_error::input_output_error>; +using base2_iertr = interruptible::interruptible_errorator< + TestInterruptCondition, + base2_ertr>; + +template <typename F> +auto with_intr(F &&f) { + return base_intr::with_interruption_to_error<ct_error::eagain>( + std::forward<F>(f), + TestInterruptCondition(false)); +} + +TEST_F(seastar_test_suite_t, errorated) +{ + run_async([] { + base_ertr::future<> ret = with_intr( + []() { + return base_iertr::now(); + } + ); + ret.unsafe_get0(); + }); +} + +TEST_F(seastar_test_suite_t, errorated_value) +{ + run_async([] { + base_ertr::future<int> ret = with_intr( + []() { + return base_iertr::make_ready_future<int>( + 1 + ); + }); + EXPECT_EQ(ret.unsafe_get0(), 1); + }); +} + +TEST_F(seastar_test_suite_t, expand_errorated_value) +{ + run_async([] { + base2_ertr::future<> ret = with_intr( + []() { + return base_iertr::make_ready_future<int>( + 1 + ).si_then([](auto) { + return base2_iertr::make_ready_future<>(); + }); + }); + ret.unsafe_get0(); + }); +} + +TEST_F(seastar_test_suite_t, interruptible_async) +{ + using interruptor = + interruptible::interruptor<TestInterruptCondition>; + + run_async([] { + interruptor::with_interruption([] { + auto fut = interruptor::async([] { + interruptor::make_interruptible( + seastar::sleep(std::chrono::milliseconds(10))).get(); + ceph_assert(interruptible::interrupt_cond< + TestInterruptCondition>.interrupt_cond); + ceph_assert(interruptible::interrupt_cond< + TestInterruptCondition>.ref_count == 1); + }); + ceph_assert(interruptible::interrupt_cond< + TestInterruptCondition>.interrupt_cond); + ceph_assert(interruptible::interrupt_cond< + TestInterruptCondition>.ref_count == 1); + return fut; + }, [](std::exception_ptr) {}, false).get0(); + }); +} + +TEST_F(seastar_test_suite_t, DISABLED_nested_interruptors) +{ + run_async([] { + base_ertr::future<> ret = with_intr( + []() { + return base_iertr::now().safe_then_interruptible([]() { + return with_intr( + []() { + return base_iertr::now(); + } + ); + }); + } + ); + ret.unsafe_get0(); + }); +} + +#if 0 +// This seems to cause a hang in the gcc-9 linker on bionic +TEST_F(seastar_test_suite_t, handle_error) +{ + run_async([] { + base_ertr::future<> ret = with_intr( + []() { + return base2_iertr::make_ready_future<int>( + 1 + ).handle_error_interruptible( + base_iertr::pass_further{}, + ct_error::assert_all{"crash on eio"} + ).si_then([](auto) { + return base_iertr::now(); + }); + }); + ret.unsafe_get0(); + }); +} +#endif diff --git a/src/test/crimson/test_lru.cc b/src/test/crimson/test_lru.cc new file mode 100644 index 000000000..40ab41539 --- /dev/null +++ b/src/test/crimson/test_lru.cc @@ -0,0 +1,213 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com> + * + * Author: Loic Dachary <loic@dachary.org> + * Cheng Cheng <ccheng.leo@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library Public License for more details. + * + */ + +#include <stdio.h> +#include "gtest/gtest.h" +#include "crimson/common/shared_lru.h" + +class LRUTest : public SharedLRU<unsigned int, int> { +public: + auto add(unsigned int key, int value, bool* existed = nullptr) { + auto pv = new int{value}; + auto ptr = insert(key, std::unique_ptr<int>{pv}); + if (existed) { + *existed = (ptr.get() != pv); + } + return ptr; + } +}; + +TEST(LRU, add) { + LRUTest cache; + unsigned int key = 1; + int value1 = 2; + bool existed = false; + { + auto ptr = cache.add(key, value1, &existed); + ASSERT_TRUE(ptr); + ASSERT_TRUE(ptr.get()); + ASSERT_EQ(value1, *ptr); + ASSERT_FALSE(existed); + } + { + auto ptr = cache.add(key, 3, &existed); + ASSERT_EQ(value1, *ptr); + ASSERT_TRUE(existed); + } +} + +TEST(LRU, empty) { + LRUTest cache; + unsigned int key = 1; + bool existed = false; + + ASSERT_TRUE(cache.empty()); + { + int value1 = 2; + auto ptr = cache.add(key, value1, &existed); + ASSERT_EQ(value1, *ptr); + ASSERT_FALSE(existed); + } + ASSERT_FALSE(cache.empty()); + + cache.clear(); + ASSERT_TRUE(cache.empty()); +} + +TEST(LRU, lookup) { + LRUTest cache; + unsigned int key = 1; + { + int value = 2; + auto ptr = cache.add(key, value); + ASSERT_TRUE(ptr); + ASSERT_TRUE(ptr.get()); + ASSERT_TRUE(cache.find(key).get()); + ASSERT_EQ(value, *cache.find(key)); + } + ASSERT_TRUE(cache.find(key).get()); +} + +TEST(LRU, lookup_or_create) { + LRUTest cache; + { + int value = 2; + unsigned int key = 1; + ASSERT_TRUE(cache.add(key, value).get()); + ASSERT_TRUE(cache[key].get()); + ASSERT_EQ(value, *cache.find(key)); + } + { + unsigned int key = 2; + ASSERT_TRUE(cache[key].get()); + ASSERT_EQ(0, *cache.find(key)); + } + ASSERT_TRUE(cache.find(1).get()); + ASSERT_TRUE(cache.find(2).get()); +} + +TEST(LRU, lower_bound) { + LRUTest cache; + + { + unsigned int key = 1; + ASSERT_FALSE(cache.lower_bound(key)); + int value = 2; + + ASSERT_TRUE(cache.add(key, value).get()); + ASSERT_TRUE(cache.lower_bound(key).get()); + EXPECT_EQ(value, *cache.lower_bound(key)); + } +} + +TEST(LRU, get_next) { + + { + LRUTest cache; + const unsigned int key = 0; + EXPECT_FALSE(cache.upper_bound(key)); + } + { + LRUTest cache; + const unsigned int key1 = 111; + auto ptr1 = cache[key1]; + const unsigned int key2 = 222; + auto ptr2 = cache[key2]; + + auto i = cache.upper_bound(0); + ASSERT_TRUE(i); + EXPECT_EQ(i->first, key1); + auto j = cache.upper_bound(i->first); + ASSERT_TRUE(j); + EXPECT_EQ(j->first, key2); + } +} + +TEST(LRU, clear) { + LRUTest cache; + unsigned int key = 1; + int value = 2; + cache.add(key, value); + { + auto found = cache.find(key); + ASSERT_TRUE(found); + ASSERT_EQ(value, *found); + } + ASSERT_TRUE(cache.find(key).get()); + cache.clear(); + ASSERT_FALSE(cache.find(key)); + ASSERT_TRUE(cache.empty()); +} + +TEST(LRU, eviction) { + LRUTest cache{5}; + bool existed; + // add a bunch of elements, some of them will be evicted + for (size_t i = 0; i < 2 * cache.capacity(); ++i) { + cache.add(i, i, &existed); + ASSERT_FALSE(existed); + } + size_t i = 0; + for (; i < cache.capacity(); ++i) { + ASSERT_FALSE(cache.find(i)); + } + for (; i < 2 * cache.capacity(); ++i) { + ASSERT_TRUE(cache.find(i)); + } +} + +TEST(LRU, track_weak) { + constexpr int SIZE = 5; + LRUTest cache{SIZE}; + + bool existed = false; + // strong reference to keep 0 alive + auto ptr = cache.add(0, 0, &existed); + ASSERT_FALSE(existed); + + // add a bunch of elements to get 0 evicted + for (size_t i = 1; i < 2 * cache.capacity(); ++i) { + cache.add(i, i, &existed); + ASSERT_FALSE(existed); + } + // 0 is still reachable via the cache + ASSERT_TRUE(cache.find(0)); + ASSERT_TRUE(cache.find(0).get()); + ASSERT_EQ(0, *cache.find(0)); + + // [0..SIZE) are evicted when adding [SIZE..2*SIZE) + // [SIZE..SIZE * 2) were still in the cache before accessing 0, + // but SIZE got evicted when accessing 0 + ASSERT_FALSE(cache.find(SIZE-1)); + ASSERT_FALSE(cache.find(SIZE)); + ASSERT_TRUE(cache.find(SIZE+1)); + ASSERT_TRUE(cache.find(SIZE+1).get()); + ASSERT_EQ((int)SIZE+1, *cache.find(SIZE+1)); + + ptr.reset(); + // 0 is still reachable, as it is now put back into LRU cache + ASSERT_TRUE(cache.find(0)); +} + +// Local Variables: +// compile-command: "cmake --build ../../../build -j 8 --target unittest_seastar_lru && ctest -R unittest_seastar_lru # --gtest_filter=*.* --log-to-stderr=true" +// End: diff --git a/src/test/crimson/test_messenger.cc b/src/test/crimson/test_messenger.cc new file mode 100644 index 000000000..a42572246 --- /dev/null +++ b/src/test/crimson/test_messenger.cc @@ -0,0 +1,3874 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_argparse.h" +#include "common/ceph_time.h" +#include "messages/MPing.h" +#include "messages/MCommand.h" +#include "messages/MCommandReply.h" +#include "messages/MOSDOp.h" +#include "messages/MOSDOpReply.h" +#include "crimson/auth/DummyAuth.h" +#include "crimson/common/log.h" +#include "crimson/net/Connection.h" +#include "crimson/net/Dispatcher.h" +#include "crimson/net/Messenger.h" +#include "crimson/net/Interceptor.h" + +#include <map> +#include <random> +#include <boost/program_options.hpp> +#include <fmt/format.h> +#include <fmt/ostream.h> +#include <seastar/core/app-template.hh> +#include <seastar/core/do_with.hh> +#include <seastar/core/future-util.hh> +#include <seastar/core/gate.hh> +#include <seastar/core/reactor.hh> +#include <seastar/core/sleep.hh> +#include <seastar/core/with_timeout.hh> + +#include "test_messenger.h" + +using namespace std::chrono_literals; +namespace bpo = boost::program_options; +using crimson::common::local_conf; + +namespace { + +seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); +} + +static std::random_device rd; +static std::default_random_engine rng{rd()}; +static bool verbose = false; + +static entity_addr_t get_server_addr() { + static int port = 9030; + ++port; + entity_addr_t saddr; + saddr.parse("127.0.0.1", nullptr); + saddr.set_port(port); + return saddr; +} + +template <typename T, typename... Args> +seastar::future<T*> create_sharded(Args... args) { + // we should only construct/stop shards on #0 + return seastar::smp::submit_to(0, [=] { + auto sharded_obj = seastar::make_lw_shared<seastar::sharded<T>>(); + return sharded_obj->start(args... + ).then([sharded_obj] { + seastar::engine().at_exit([sharded_obj] { + return sharded_obj->stop().then([sharded_obj] {}); + }); + return sharded_obj.get(); + }); + }).then([](seastar::sharded<T> *ptr_shard) { + return &ptr_shard->local(); + }); +} + +class ShardedGates + : public seastar::peering_sharded_service<ShardedGates> { +public: + ShardedGates() = default; + ~ShardedGates() { + assert(gate.is_closed()); + } + + template <typename Func> + void dispatch_in_background(const char *what, Func &&f) { + std::ignore = seastar::with_gate( + container().local().gate, std::forward<Func>(f) + ).handle_exception([what](std::exception_ptr eptr) { + try { + std::rethrow_exception(eptr); + } catch (std::exception &e) { + logger().error("ShardedGates::dispatch_in_background: " + "{} got exxception {}", what, e.what()); + } + }); + } + + seastar::future<> close() { + return container().invoke_on_all([](auto &local) { + return local.gate.close(); + }); + } + + static seastar::future<ShardedGates*> create() { + return create_sharded<ShardedGates>(); + } + + // seastar::future<> stop() is intentially not implemented + +private: + seastar::gate gate; +}; + +static seastar::future<> test_echo(unsigned rounds, + double keepalive_ratio) +{ + struct test_state { + struct Server final + : public crimson::net::Dispatcher { + ShardedGates &gates; + crimson::net::MessengerRef msgr; + crimson::auth::DummyAuthClientServer dummy_auth; + + Server(ShardedGates &gates) : gates{gates} {} + + void ms_handle_accept( + crimson::net::ConnectionRef conn, + seastar::shard_id prv_shard, + bool is_replace) override { + logger().info("server accepted {}", *conn); + ceph_assert(prv_shard == seastar::this_shard_id()); + ceph_assert(!is_replace); + } + + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef c, MessageRef m) override { + if (verbose) { + logger().info("server got {}", *m); + } + // reply with a pong + gates.dispatch_in_background("echo_send_pong", [c] { + return c->send(crimson::make_message<MPing>()); + }); + return {seastar::now()}; + } + + seastar::future<> init(const entity_name_t& name, + const std::string& lname, + const uint64_t nonce, + const entity_addr_t& addr) { + msgr = crimson::net::Messenger::create( + name, lname, nonce, false); + msgr->set_default_policy(crimson::net::SocketPolicy::stateless_server(0)); + msgr->set_auth_client(&dummy_auth); + msgr->set_auth_server(&dummy_auth); + return msgr->bind(entity_addrvec_t{addr}).safe_then([this] { + return msgr->start({this}); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [addr] (const std::error_code& e) { + logger().error("test_echo(): " + "there is another instance running at {}", addr); + ceph_abort(); + })); + } + seastar::future<> shutdown() { + ceph_assert(msgr); + msgr->stop(); + return msgr->shutdown(); + } + }; + + class Client final + : public crimson::net::Dispatcher, + public seastar::peering_sharded_service<Client> { + public: + Client(seastar::shard_id primary_sid, + unsigned rounds, + double keepalive_ratio, + ShardedGates *gates) + : primary_sid{primary_sid}, + keepalive_dist(std::bernoulli_distribution{keepalive_ratio}), + rounds(rounds), + gates{*gates} {} + + seastar::future<> init(const entity_name_t& name, + const std::string& lname, + const uint64_t nonce) { + assert(seastar::this_shard_id() == primary_sid); + msgr = crimson::net::Messenger::create( + name, lname, nonce, false); + msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0)); + msgr->set_auth_client(&dummy_auth); + msgr->set_auth_server(&dummy_auth); + return msgr->start({this}); + } + + seastar::future<> shutdown() { + assert(seastar::this_shard_id() == primary_sid); + ceph_assert(msgr); + msgr->stop(); + return msgr->shutdown(); + } + + seastar::future<> dispatch_pingpong(const entity_addr_t& peer_addr) { + assert(seastar::this_shard_id() == primary_sid); + mono_time start_time = mono_clock::now(); + auto conn = msgr->connect(peer_addr, entity_name_t::TYPE_OSD); + return seastar::futurize_invoke([this, conn] { + return do_dispatch_pingpong(conn); + }).then([] { + // 500ms should be enough to establish the connection + return seastar::sleep(500ms); + }).then([this, conn, start_time] { + return container().invoke_on( + conn->get_shard_id(), + [pconn=&*conn, start_time](auto &local) { + assert(pconn->is_connected()); + auto session = local.find_session(pconn); + std::chrono::duration<double> dur_handshake = session->connected_time - start_time; + std::chrono::duration<double> dur_pingpong = session->finish_time - session->connected_time; + logger().info("{}: handshake {}, pingpong {}", + *pconn, dur_handshake.count(), dur_pingpong.count()); + }).then([conn] {}); + }); + } + + static seastar::future<Client*> create( + unsigned rounds, + double keepalive_ratio, + ShardedGates *gates) { + return create_sharded<Client>( + seastar::this_shard_id(), + rounds, + keepalive_ratio, + gates); + } + + private: + struct PingSession : public seastar::enable_shared_from_this<PingSession> { + unsigned count = 0u; + mono_time connected_time; + mono_time finish_time; + }; + using PingSessionRef = seastar::shared_ptr<PingSession>; + + void ms_handle_connect( + crimson::net::ConnectionRef conn, + seastar::shard_id prv_shard) override { + auto &local = container().local(); + assert(prv_shard == seastar::this_shard_id()); + auto session = seastar::make_shared<PingSession>(); + auto [i, added] = local.sessions.emplace(&*conn, session); + std::ignore = i; + ceph_assert(added); + session->connected_time = mono_clock::now(); + } + + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef c, MessageRef m) override { + auto &local = container().local(); + auto session = local.find_session(&*c); + ++(session->count); + if (verbose) { + logger().info("client ms_dispatch {}", session->count); + } + + if (session->count > rounds) { + logger().error("{}: got {} pongs, more than expected {}", *c, session->count, rounds); + ceph_abort(); + } else if (session->count == rounds) { + logger().info("{}: finished receiving {} pongs", *c, session->count); + session->finish_time = mono_clock::now(); + gates.dispatch_in_background("echo_notify_done", [c, this] { + return container().invoke_on(primary_sid, [pconn=&*c](auto &local) { + auto found = local.pending_conns.find(pconn); + ceph_assert(found != local.pending_conns.end()); + found->second.set_value(); + }).then([c] {}); + }); + } + return {seastar::now()}; + } + + PingSessionRef find_session(crimson::net::Connection *c) { + auto found = sessions.find(c); + if (found == sessions.end()) { + ceph_assert(false); + } + return found->second; + } + + seastar::future<> do_dispatch_pingpong(crimson::net::ConnectionRef conn) { + auto [i, added] = pending_conns.emplace(&*conn, seastar::promise<>()); + std::ignore = i; + ceph_assert(added); + return seastar::do_with(0u, 0u, + [this, conn](auto &count_ping, auto &count_keepalive) { + return seastar::do_until( + [this, conn, &count_ping, &count_keepalive] { + bool stop = (count_ping == rounds); + if (stop) { + logger().info("{}: finished sending {} pings with {} keepalives", + *conn, count_ping, count_keepalive); + } + return stop; + }, + [this, conn, &count_ping, &count_keepalive] { + return seastar::repeat([this, conn, &count_ping, &count_keepalive] { + if (keepalive_dist(rng)) { + return conn->send_keepalive( + ).then([&count_keepalive] { + count_keepalive += 1; + return seastar::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::no); + }); + } else { + return conn->send(crimson::make_message<MPing>() + ).then([&count_ping] { + count_ping += 1; + return seastar::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::yes); + }); + } + }); + }).then([this, conn] { + auto found = pending_conns.find(&*conn); + assert(found != pending_conns.end()); + return found->second.get_future(); + } + ); + }); + } + + private: + // primary shard only + const seastar::shard_id primary_sid; + std::bernoulli_distribution keepalive_dist; + crimson::net::MessengerRef msgr; + std::map<crimson::net::Connection*, seastar::promise<>> pending_conns; + crimson::auth::DummyAuthClientServer dummy_auth; + + // per shard + const unsigned rounds; + std::map<crimson::net::Connection*, PingSessionRef> sessions; + ShardedGates &gates; + }; + }; + + logger().info("test_echo(rounds={}, keepalive_ratio={}):", + rounds, keepalive_ratio); + return ShardedGates::create( + ).then([rounds, keepalive_ratio](auto *gates) { + return seastar::when_all_succeed( + test_state::Client::create(rounds, keepalive_ratio, gates), + test_state::Client::create(rounds, keepalive_ratio, gates), + seastar::make_ready_future<ShardedGates*>(gates)); + }).then_unpack([](auto *client1, auto *client2, auto *gates) { + auto server1 = seastar::make_shared<test_state::Server>(*gates); + auto server2 = seastar::make_shared<test_state::Server>(*gates); + // start servers and clients + auto addr1 = get_server_addr(); + auto addr2 = get_server_addr(); + addr1.set_type(entity_addr_t::TYPE_MSGR2); + addr2.set_type(entity_addr_t::TYPE_MSGR2); + return seastar::when_all_succeed( + server1->init(entity_name_t::OSD(0), "server1", 1, addr1), + server2->init(entity_name_t::OSD(1), "server2", 2, addr2), + client1->init(entity_name_t::OSD(2), "client1", 3), + client2->init(entity_name_t::OSD(3), "client2", 4) + // dispatch pingpoing + ).then_unpack([client1, client2, server1, server2] { + return seastar::when_all_succeed( + // test connecting in parallel, accepting in parallel + client1->dispatch_pingpong(server1->msgr->get_myaddr()), + client1->dispatch_pingpong(server2->msgr->get_myaddr()), + client2->dispatch_pingpong(server1->msgr->get_myaddr()), + client2->dispatch_pingpong(server2->msgr->get_myaddr())); + // shutdown + }).then_unpack([client1] { + logger().info("client1 shutdown..."); + return client1->shutdown(); + }).then([client2] { + logger().info("client2 shutdown..."); + return client2->shutdown(); + }).then([server1] { + logger().info("server1 shutdown..."); + return server1->shutdown(); + }).then([server2] { + logger().info("server2 shutdown..."); + return server2->shutdown(); + }).then([] { + logger().info("test_echo() done!\n"); + }).handle_exception([](auto eptr) { + logger().error("test_echo() failed: got exception {}", eptr); + throw; + }).finally([gates, server1, server2] { + return gates->close(); + }); + }); +} + +seastar::future<> test_preemptive_shutdown() { + struct test_state { + class Server final + : public crimson::net::Dispatcher { + crimson::net::MessengerRef msgr; + crimson::auth::DummyAuthClientServer dummy_auth; + + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef c, MessageRef m) override { + std::ignore = c->send(crimson::make_message<MPing>()); + return {seastar::now()}; + } + + public: + seastar::future<> init(const entity_name_t& name, + const std::string& lname, + const uint64_t nonce, + const entity_addr_t& addr) { + msgr = crimson::net::Messenger::create( + name, lname, nonce, true); + msgr->set_default_policy(crimson::net::SocketPolicy::stateless_server(0)); + msgr->set_auth_client(&dummy_auth); + msgr->set_auth_server(&dummy_auth); + return msgr->bind(entity_addrvec_t{addr}).safe_then([this] { + return msgr->start({this}); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [addr] (const std::error_code& e) { + logger().error("test_preemptive_shutdown(): " + "there is another instance running at {}", addr); + ceph_abort(); + })); + } + entity_addr_t get_addr() const { + return msgr->get_myaddr(); + } + seastar::future<> shutdown() { + msgr->stop(); + return msgr->shutdown(); + } + }; + + class Client final + : public crimson::net::Dispatcher { + crimson::net::MessengerRef msgr; + crimson::auth::DummyAuthClientServer dummy_auth; + + bool stop_send = false; + seastar::promise<> stopped_send_promise; + + std::optional<seastar::future<>> ms_dispatch( + crimson::net::ConnectionRef, MessageRef m) override { + return {seastar::now()}; + } + + public: + seastar::future<> init(const entity_name_t& name, + const std::string& lname, + const uint64_t nonce) { + msgr = crimson::net::Messenger::create( + name, lname, nonce, true); + msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0)); + msgr->set_auth_client(&dummy_auth); + msgr->set_auth_server(&dummy_auth); + return msgr->start({this}); + } + void send_pings(const entity_addr_t& addr) { + auto conn = msgr->connect(addr, entity_name_t::TYPE_OSD); + // forwarded to stopped_send_promise + (void) seastar::do_until( + [this] { return stop_send; }, + [conn] { + return conn->send(crimson::make_message<MPing>()).then([] { + return seastar::sleep(0ms); + }); + } + ).then_wrapped([this, conn] (auto fut) { + fut.forward_to(std::move(stopped_send_promise)); + }); + } + seastar::future<> shutdown() { + msgr->stop(); + return msgr->shutdown().then([this] { + stop_send = true; + return stopped_send_promise.get_future(); + }); + } + }; + }; + + logger().info("test_preemptive_shutdown():"); + auto server = seastar::make_shared<test_state::Server>(); + auto client = seastar::make_shared<test_state::Client>(); + auto addr = get_server_addr(); + addr.set_type(entity_addr_t::TYPE_MSGR2); + addr.set_family(AF_INET); + return seastar::when_all_succeed( + server->init(entity_name_t::OSD(6), "server4", 7, addr), + client->init(entity_name_t::OSD(7), "client4", 8) + ).then_unpack([server, client] { + client->send_pings(server->get_addr()); + return seastar::sleep(100ms); + }).then([client] { + logger().info("client shutdown..."); + return client->shutdown(); + }).then([server] { + logger().info("server shutdown..."); + return server->shutdown(); + }).then([] { + logger().info("test_preemptive_shutdown() done!\n"); + }).handle_exception([server, client] (auto eptr) { + logger().error("test_preemptive_shutdown() failed: got exception {}", eptr); + throw; + }); +} + +using ceph::msgr::v2::Tag; +using crimson::net::bp_action_t; +using crimson::net::bp_type_t; +using crimson::net::Breakpoint; +using crimson::net::Connection; +using crimson::net::ConnectionRef; +using crimson::net::custom_bp_t; +using crimson::net::Dispatcher; +using crimson::net::Interceptor; +using crimson::net::Messenger; +using crimson::net::MessengerRef; +using crimson::net::SocketPolicy; +using crimson::net::tag_bp_t; +using namespace ceph::net::test; + +struct counter_t { unsigned counter = 0; }; + +enum class conn_state_t { + unknown = 0, + established, + closed, + replaced, +}; + +std::ostream& operator<<(std::ostream& out, const conn_state_t& state) { + switch(state) { + case conn_state_t::unknown: + return out << "unknown"; + case conn_state_t::established: + return out << "established"; + case conn_state_t::closed: + return out << "closed"; + case conn_state_t::replaced: + return out << "replaced"; + default: + ceph_abort(); + } +} + +} // anonymous namespace + +#if FMT_VERSION >= 90000 +template<> +struct fmt::formatter<conn_state_t> : fmt::ostream_formatter {}; +#endif + +namespace { + +struct ConnResult { + ConnectionRef conn; + unsigned index; + conn_state_t state = conn_state_t::unknown; + + unsigned connect_attempts = 0; + unsigned client_connect_attempts = 0; + unsigned client_reconnect_attempts = 0; + unsigned cnt_connect_dispatched = 0; + + unsigned accept_attempts = 0; + unsigned server_connect_attempts = 0; + unsigned server_reconnect_attempts = 0; + unsigned cnt_accept_dispatched = 0; + + unsigned cnt_reset_dispatched = 0; + unsigned cnt_remote_reset_dispatched = 0; + + ConnResult(ConnectionRef conn, unsigned index) + : conn(conn), index(index) {} + + template <typename T> + void _assert_eq(const char* expr_actual, T actual, + const char* expr_expected, T expected) const { + if (actual != expected) { + throw std::runtime_error(fmt::format( + "[{}] {} '{}' is actually {}, not the expected '{}' {}", + index, *conn, expr_actual, actual, expr_expected, expected)); + } + } + +#define ASSERT_EQUAL(actual, expected) \ + _assert_eq(#actual, actual, #expected, expected) + + void assert_state_at(conn_state_t expected) const { + ASSERT_EQUAL(state, expected); + } + + void assert_connect(unsigned attempts, + unsigned connects, + unsigned reconnects, + unsigned dispatched) const { + ASSERT_EQUAL(connect_attempts, attempts); + ASSERT_EQUAL(client_connect_attempts, connects); + ASSERT_EQUAL(client_reconnect_attempts, reconnects); + ASSERT_EQUAL(cnt_connect_dispatched, dispatched); + } + + void assert_connect(unsigned attempts, + unsigned dispatched) const { + ASSERT_EQUAL(connect_attempts, attempts); + ASSERT_EQUAL(cnt_connect_dispatched, dispatched); + } + + void assert_accept(unsigned attempts, + unsigned accepts, + unsigned reaccepts, + unsigned dispatched) const { + ASSERT_EQUAL(accept_attempts, attempts); + ASSERT_EQUAL(server_connect_attempts, accepts); + ASSERT_EQUAL(server_reconnect_attempts, reaccepts); + ASSERT_EQUAL(cnt_accept_dispatched, dispatched); + } + + void assert_accept(unsigned attempts, + unsigned dispatched) const { + ASSERT_EQUAL(accept_attempts, attempts); + ASSERT_EQUAL(cnt_accept_dispatched, dispatched); + } + + void assert_reset(unsigned local, unsigned remote) const { + ASSERT_EQUAL(cnt_reset_dispatched, local); + ASSERT_EQUAL(cnt_remote_reset_dispatched, remote); + } + + void dump() const { + logger().info("\nResult({}):\n" + " conn: [{}] {}:\n" + " state: {}\n" + " connect_attempts: {}\n" + " client_connect_attempts: {}\n" + " client_reconnect_attempts: {}\n" + " cnt_connect_dispatched: {}\n" + " accept_attempts: {}\n" + " server_connect_attempts: {}\n" + " server_reconnect_attempts: {}\n" + " cnt_accept_dispatched: {}\n" + " cnt_reset_dispatched: {}\n" + " cnt_remote_reset_dispatched: {}\n", + static_cast<const void*>(this), + index, *conn, + state, + connect_attempts, + client_connect_attempts, + client_reconnect_attempts, + cnt_connect_dispatched, + accept_attempts, + server_connect_attempts, + server_reconnect_attempts, + cnt_accept_dispatched, + cnt_reset_dispatched, + cnt_remote_reset_dispatched); + } +}; +using ConnResults = std::vector<ConnResult>; + +struct TestInterceptor : public Interceptor { + std::map<Breakpoint, std::map<unsigned, bp_action_t>> breakpoints; + std::map<Breakpoint, counter_t> breakpoints_counter; + std::map<Connection*, unsigned> conns; + ConnResults results; + std::optional<seastar::abort_source> signal; + const seastar::shard_id primary_sid; + + TestInterceptor() : primary_sid{seastar::this_shard_id()} {} + + // only used for copy breakpoint configurations + TestInterceptor(const TestInterceptor& other) : primary_sid{other.primary_sid} { + assert(other.breakpoints_counter.empty()); + assert(other.conns.empty()); + assert(other.results.empty()); + breakpoints = other.breakpoints; + assert(!other.signal); + assert(seastar::this_shard_id() == primary_sid); + } + + void make_fault(Breakpoint bp, unsigned round = 1) { + assert(round >= 1); + breakpoints[bp][round] = bp_action_t::FAULT; + } + + void make_block(Breakpoint bp, unsigned round = 1) { + assert(round >= 1); + breakpoints[bp][round] = bp_action_t::BLOCK; + } + + void make_stall(Breakpoint bp, unsigned round = 1) { + assert(round >= 1); + breakpoints[bp][round] = bp_action_t::STALL; + } + + ConnResult* find_result(Connection *conn) { + assert(seastar::this_shard_id() == primary_sid); + auto it = conns.find(conn); + if (it == conns.end()) { + return nullptr; + } else { + return &results[it->second]; + } + } + + seastar::future<> wait() { + assert(seastar::this_shard_id() == primary_sid); + assert(!signal); + signal = seastar::abort_source(); + return seastar::sleep_abortable(10s, *signal).then([] { + throw std::runtime_error("Timeout (10s) in TestInterceptor::wait()"); + }).handle_exception_type([] (const seastar::sleep_aborted& e) { + // wait done! + }); + } + + void notify() { + assert(seastar::this_shard_id() == primary_sid); + if (signal) { + signal->request_abort(); + signal = std::nullopt; + } + } + + private: + void register_conn(ConnectionRef conn) override { + auto result = find_result(&*conn); + if (result != nullptr) { + logger().error("The connection [{}] {} already exists when register {}", + result->index, *result->conn, *conn); + ceph_abort(); + } + unsigned index = results.size(); + results.emplace_back(conn, index); + conns[&*conn] = index; + notify(); + logger().info("[{}] {} new connection registered", index, *conn); + } + + void register_conn_closed(ConnectionRef conn) override { + auto result = find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked closed connection: {}", *conn); + ceph_abort(); + } + + if (result->state != conn_state_t::replaced) { + result->state = conn_state_t::closed; + } + notify(); + logger().info("[{}] {} closed({})", result->index, *conn, result->state); + } + + void register_conn_ready(ConnectionRef conn) override { + auto result = find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked ready connection: {}", *conn); + ceph_abort(); + } + + ceph_assert(conn->is_protocol_ready()); + notify(); + logger().info("[{}] {} ready", result->index, *conn); + } + + void register_conn_replaced(ConnectionRef conn) override { + auto result = find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked replaced connection: {}", *conn); + ceph_abort(); + } + + result->state = conn_state_t::replaced; + logger().info("[{}] {} {}", result->index, *conn, result->state); + } + + seastar::future<bp_action_t> + intercept(Connection &_conn, std::vector<Breakpoint> bps) override { + assert(bps.size() >= 1); + Connection *conn = &_conn; + + return seastar::smp::submit_to(primary_sid, [conn, bps, this] { + std::vector<bp_action_t> actions; + for (const Breakpoint &bp : bps) { + ++breakpoints_counter[bp].counter; + + auto result = find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked intercepted connection: {}, at breakpoint {}({})", + *conn, bp, breakpoints_counter[bp].counter); + ceph_abort(); + } + + if (bp == custom_bp_t::SOCKET_CONNECTING) { + ++result->connect_attempts; + logger().info("[Test] connect_attempts={}", result->connect_attempts); + } else if (bp == tag_bp_t{Tag::CLIENT_IDENT, bp_type_t::WRITE}) { + ++result->client_connect_attempts; + logger().info("[Test] client_connect_attempts={}", result->client_connect_attempts); + } else if (bp == tag_bp_t{Tag::SESSION_RECONNECT, bp_type_t::WRITE}) { + ++result->client_reconnect_attempts; + logger().info("[Test] client_reconnect_attempts={}", result->client_reconnect_attempts); + } else if (bp == custom_bp_t::SOCKET_ACCEPTED) { + ++result->accept_attempts; + logger().info("[Test] accept_attempts={}", result->accept_attempts); + } else if (bp == tag_bp_t{Tag::CLIENT_IDENT, bp_type_t::READ}) { + ++result->server_connect_attempts; + logger().info("[Test] server_connect_attemps={}", result->server_connect_attempts); + } else if (bp == tag_bp_t{Tag::SESSION_RECONNECT, bp_type_t::READ}) { + ++result->server_reconnect_attempts; + logger().info("[Test] server_reconnect_attempts={}", result->server_reconnect_attempts); + } + + auto it_bp = breakpoints.find(bp); + if (it_bp != breakpoints.end()) { + auto it_cnt = it_bp->second.find(breakpoints_counter[bp].counter); + if (it_cnt != it_bp->second.end()) { + logger().info("[{}] {} intercepted {}({}) => {}", + result->index, *conn, bp, + breakpoints_counter[bp].counter, it_cnt->second); + actions.emplace_back(it_cnt->second); + continue; + } + } + logger().info("[{}] {} intercepted {}({})", + result->index, *conn, bp, breakpoints_counter[bp].counter); + actions.emplace_back(bp_action_t::CONTINUE); + } + + bp_action_t action = bp_action_t::CONTINUE; + for (bp_action_t &a : actions) { + if (a != bp_action_t::CONTINUE) { + if (action == bp_action_t::CONTINUE) { + action = a; + } else { + ceph_abort("got multiple incompatible actions"); + } + } + } + return seastar::make_ready_future<bp_action_t>(action); + }); + } +}; + +SocketPolicy to_socket_policy(policy_t policy) { + switch (policy) { + case policy_t::stateful_server: + return SocketPolicy::stateful_server(0); + case policy_t::stateless_server: + return SocketPolicy::stateless_server(0); + case policy_t::lossless_peer: + return SocketPolicy::lossless_peer(0); + case policy_t::lossless_peer_reuse: + return SocketPolicy::lossless_peer_reuse(0); + case policy_t::lossy_client: + return SocketPolicy::lossy_client(0); + case policy_t::lossless_client: + return SocketPolicy::lossless_client(0); + default: + logger().error("unexpected policy type"); + ceph_abort(); + } +} + +class FailoverSuite : public Dispatcher { + crimson::auth::DummyAuthClientServer dummy_auth; + MessengerRef test_msgr; + const entity_addr_t test_peer_addr; + TestInterceptor interceptor; + + unsigned tracked_index = 0; + Connection *tracked_conn = nullptr; + unsigned pending_send = 0; + unsigned pending_peer_receive = 0; + unsigned pending_receive = 0; + + ShardedGates &gates; + const seastar::shard_id primary_sid; + + std::optional<seastar::future<>> ms_dispatch( + ConnectionRef conn_ref, MessageRef m) override { + ceph_assert(m->get_type() == CEPH_MSG_OSD_OP); + Connection *conn = &*conn_ref; + gates.dispatch_in_background("TestSuite_ms_dispatch", + [this, conn, conn_ref] { + return seastar::smp::submit_to(primary_sid, [this, conn] { + auto result = interceptor.find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked ms dispatched connection: {}", *conn); + ceph_abort(); + } + + if (tracked_conn != &*conn) { + logger().warn("[{}] {} got op, but doesn't match tracked_conn [{}] {}", + result->index, *conn, tracked_index, *tracked_conn); + } else { + ceph_assert(result->index == tracked_index); + } + + ceph_assert(pending_receive > 0); + --pending_receive; + if (pending_receive == 0) { + interceptor.notify(); + } + logger().info("[Test] got op, left {} ops -- [{}] {}", + pending_receive, result->index, *conn); + }).then([conn_ref] {}); + }); + return {seastar::now()}; + } + + void ms_handle_accept( + ConnectionRef conn_ref, + seastar::shard_id prv_shard, + bool is_replace) override { + Connection *conn = &*conn_ref; + gates.dispatch_in_background("TestSuite_ms_dispatch", + [this, conn, conn_ref] { + return seastar::smp::submit_to(primary_sid, [this, conn] { + auto result = interceptor.find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked accepted connection: {}", *conn); + ceph_abort(); + } + + if (tracked_conn && + !tracked_conn->is_protocol_closed() && + tracked_conn != &*conn) { + logger().error("[{}] {} got accepted, but there's already a valid traced_conn [{}] {}", + result->index, *conn, tracked_index, *tracked_conn); + ceph_abort(); + } + + tracked_index = result->index; + tracked_conn = &*conn; + ++result->cnt_accept_dispatched; + logger().info("[Test] got accept (cnt_accept_dispatched={}), track [{}] {}", + result->cnt_accept_dispatched, result->index, *conn); + return flush_pending_send(); + }).then([conn_ref] {}); + }); + } + + void ms_handle_connect( + ConnectionRef conn_ref, + seastar::shard_id prv_shard) override { + Connection *conn = &*conn_ref; + gates.dispatch_in_background("TestSuite_ms_dispatch", + [this, conn, conn_ref] { + return seastar::smp::submit_to(primary_sid, [this, conn] { + auto result = interceptor.find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked connected connection: {}", *conn); + ceph_abort(); + } + + if (tracked_conn && + !tracked_conn->is_protocol_closed() && + tracked_conn != &*conn) { + logger().error("[{}] {} got connected, but there's already a avlid tracked_conn [{}] {}", + result->index, *conn, tracked_index, *tracked_conn); + ceph_abort(); + } + + if (tracked_conn == &*conn) { + ceph_assert(result->index == tracked_index); + } + + ++result->cnt_connect_dispatched; + logger().info("[Test] got connected (cnt_connect_dispatched={}) -- [{}] {}", + result->cnt_connect_dispatched, result->index, *conn); + }).then([conn_ref] {}); + }); + } + + void ms_handle_reset( + ConnectionRef conn_ref, + bool is_replace) override { + Connection *conn = &*conn_ref; + gates.dispatch_in_background("TestSuite_ms_dispatch", + [this, conn, conn_ref] { + return seastar::smp::submit_to(primary_sid, [this, conn] { + auto result = interceptor.find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked reset connection: {}", *conn); + ceph_abort(); + } + + if (tracked_conn != &*conn) { + logger().warn("[{}] {} got reset, but doesn't match tracked_conn [{}] {}", + result->index, *conn, tracked_index, *tracked_conn); + } else { + ceph_assert(result->index == tracked_index); + tracked_index = 0; + tracked_conn = nullptr; + } + + ++result->cnt_reset_dispatched; + logger().info("[Test] got reset (cnt_reset_dispatched={}), untrack [{}] {}", + result->cnt_reset_dispatched, result->index, *conn); + }).then([conn_ref] {}); + }); + } + + void ms_handle_remote_reset( + ConnectionRef conn_ref) override { + Connection *conn = &*conn_ref; + gates.dispatch_in_background("TestSuite_ms_dispatch", + [this, conn, conn_ref] { + return seastar::smp::submit_to(primary_sid, [this, conn] { + auto result = interceptor.find_result(&*conn); + if (result == nullptr) { + logger().error("Untracked remotely reset connection: {}", *conn); + ceph_abort(); + } + + if (tracked_conn != &*conn) { + logger().warn("[{}] {} got remotely reset, but doesn't match tracked_conn [{}] {}", + result->index, *conn, tracked_index, *tracked_conn); + } else { + ceph_assert(result->index == tracked_index); + } + + ++result->cnt_remote_reset_dispatched; + logger().info("[Test] got remote reset (cnt_remote_reset_dispatched={}) -- [{}] {}", + result->cnt_remote_reset_dispatched, result->index, *conn); + }).then([conn_ref] {}); + }); + } + + private: + seastar::future<> init(entity_addr_t test_addr, SocketPolicy policy) { + test_msgr->set_default_policy(policy); + test_msgr->set_auth_client(&dummy_auth); + test_msgr->set_auth_server(&dummy_auth); + test_msgr->set_interceptor(&interceptor); + return test_msgr->bind(entity_addrvec_t{test_addr}).safe_then([this] { + return test_msgr->start({this}); + }, Messenger::bind_ertr::all_same_way([test_addr] (const std::error_code& e) { + logger().error("FailoverSuite: " + "there is another instance running at {}", test_addr); + ceph_abort(); + })); + } + + seastar::future<> send_op(bool expect_reply=true) { + ceph_assert(tracked_conn); + ceph_assert(!tracked_conn->is_protocol_closed()); + if (expect_reply) { + ++pending_peer_receive; + } + pg_t pgid; + object_locator_t oloc; + hobject_t hobj(object_t(), oloc.key, CEPH_NOSNAP, pgid.ps(), + pgid.pool(), oloc.nspace); + spg_t spgid(pgid); + return tracked_conn->send(crimson::make_message<MOSDOp>(0, 0, hobj, spgid, 0, 0, 0)); + } + + seastar::future<> flush_pending_send() { + if (pending_send != 0) { + logger().info("[Test] flush sending {} ops", pending_send); + } + ceph_assert(tracked_conn); + ceph_assert(!tracked_conn->is_protocol_closed()); + return seastar::do_until( + [this] { return pending_send == 0; }, + [this] { + --pending_send; + return send_op(); + }); + } + + seastar::future<> wait_ready(unsigned num_ready_conns, + unsigned num_replaced, + bool wait_received) { + assert(seastar::this_shard_id() == primary_sid); + unsigned pending_conns = 0; + unsigned pending_establish = 0; + unsigned replaced_conns = 0; + for (auto& result : interceptor.results) { + if (result.conn->is_protocol_closed_clean()) { + if (result.state == conn_state_t::replaced) { + ++replaced_conns; + } + } else if (result.conn->is_protocol_ready()) { + if (pending_send == 0 && pending_peer_receive == 0 && pending_receive == 0) { + result.state = conn_state_t::established; + } else { + ++pending_establish; + } + } else { + ++pending_conns; + } + } + + bool do_wait = false; + if (num_ready_conns > 0) { + if (interceptor.results.size() > num_ready_conns) { + throw std::runtime_error(fmt::format( + "{} connections, more than expected: {}", + interceptor.results.size(), num_ready_conns)); + } else if (interceptor.results.size() < num_ready_conns || pending_conns > 0) { + logger().info("[Test] wait_ready(): wait for connections," + " currently {} out of {}, pending {} ready ...", + interceptor.results.size(), num_ready_conns, pending_conns); + do_wait = true; + } + } + if (wait_received) { + if (pending_send || pending_peer_receive || pending_receive) { + if (pending_conns || pending_establish) { + logger().info("[Test] wait_ready(): wait for pending_send={}," + " pending_peer_receive={}, pending_receive={}," + " pending {}/{} ready/establish connections ...", + pending_send, pending_peer_receive, pending_receive, + pending_conns, pending_establish); + do_wait = true; + } else { + // If there are pending messages, stop waiting if there are + // no longer pending connections. + } + } else { + // Stop waiting if there are no pending messages. Pending connections + // should not be important. + } + } + if (num_replaced > 0) { + if (replaced_conns > num_replaced) { + throw std::runtime_error(fmt::format( + "{} replaced connections, more than expected: {}", + replaced_conns, num_replaced)); + } + if (replaced_conns < num_replaced) { + logger().info("[Test] wait_ready(): wait for {} replaced connections," + " currently {} ...", + num_replaced, replaced_conns); + do_wait = true; + } + } + + if (do_wait) { + return interceptor.wait( + ).then([this, num_ready_conns, num_replaced, wait_received] { + return wait_ready(num_ready_conns, num_replaced, wait_received); + }); + } else { + logger().info("[Test] wait_ready(): wait done!"); + return seastar::now(); + } + } + + // called by FailoverTest + public: + FailoverSuite(MessengerRef test_msgr, + entity_addr_t test_peer_addr, + const TestInterceptor& interceptor, + ShardedGates &gates) + : test_msgr(test_msgr), + test_peer_addr(test_peer_addr), + interceptor(interceptor), + gates{gates}, + primary_sid{seastar::this_shard_id()} { } + + entity_addr_t get_addr() const { + return test_msgr->get_myaddr(); + } + + seastar::future<> shutdown() { + test_msgr->stop(); + return test_msgr->shutdown(); + } + + void needs_receive() { + ++pending_receive; + } + + void notify_peer_reply() { + ceph_assert(pending_peer_receive > 0); + --pending_peer_receive; + logger().info("[Test] TestPeer said got op, left {} ops", + pending_peer_receive); + if (pending_peer_receive == 0) { + interceptor.notify(); + } + } + + void post_check() const { + // make sure all breakpoints were hit + for (auto& kv : interceptor.breakpoints) { + auto it = interceptor.breakpoints_counter.find(kv.first); + if (it == interceptor.breakpoints_counter.end()) { + throw std::runtime_error(fmt::format("{} was missed", kv.first)); + } + auto expected = kv.second.rbegin()->first; + if (expected > it->second.counter) { + throw std::runtime_error(fmt::format( + "{} only triggered {} times, not the expected {}", + kv.first, it->second.counter, expected)); + } + } + } + + void dump_results() const { + for (auto& result : interceptor.results) { + result.dump(); + } + } + + static seastar::future<std::unique_ptr<FailoverSuite>> + create(entity_addr_t test_addr, + SocketPolicy test_policy, + entity_addr_t test_peer_addr, + const TestInterceptor& interceptor, + ShardedGates &gates) { + auto suite = std::make_unique<FailoverSuite>( + Messenger::create( + entity_name_t::OSD(TEST_OSD), + "Test", + TEST_NONCE, + false), + test_peer_addr, + interceptor, + gates); + return suite->init(test_addr, test_policy + ).then([suite = std::move(suite)] () mutable { + return std::move(suite); + }); + } + + // called by tests + public: + seastar::future<> connect_peer() { + logger().info("[Test] connect_peer({})", test_peer_addr); + assert(seastar::this_shard_id() == primary_sid); + auto conn = test_msgr->connect(test_peer_addr, entity_name_t::TYPE_OSD); + auto result = interceptor.find_result(&*conn); + ceph_assert(result != nullptr); + + if (tracked_conn) { + if (tracked_conn->is_protocol_closed()) { + logger().info("[Test] this is a new session" + " replacing an closed one"); + ceph_assert(tracked_conn != &*conn); + } else { + logger().info("[Test] this is not a new session"); + ceph_assert(tracked_index == result->index); + ceph_assert(tracked_conn == &*conn); + } + } else { + logger().info("[Test] this is a new session"); + } + tracked_index = result->index; + tracked_conn = &*conn; + + return flush_pending_send(); + } + + seastar::future<> send_peer() { + assert(seastar::this_shard_id() == primary_sid); + if (tracked_conn) { + logger().info("[Test] send_peer()"); + ceph_assert(!tracked_conn->is_protocol_closed()); + ceph_assert(!pending_send); + return send_op(); + } else { + ++pending_send; + logger().info("[Test] send_peer() (pending {})", pending_send); + return seastar::now(); + } + } + + seastar::future<> keepalive_peer() { + logger().info("[Test] keepalive_peer()"); + assert(seastar::this_shard_id() == primary_sid); + ceph_assert(tracked_conn); + ceph_assert(!tracked_conn->is_protocol_closed()); + return tracked_conn->send_keepalive(); + } + + seastar::future<> try_send_peer() { + logger().info("[Test] try_send_peer()"); + assert(seastar::this_shard_id() == primary_sid); + ceph_assert(tracked_conn); + ceph_assert(!tracked_conn->is_protocol_closed()); + return send_op(false); + } + + seastar::future<> markdown() { + logger().info("[Test] markdown() in 100ms ..."); + assert(seastar::this_shard_id() == primary_sid); + ceph_assert(tracked_conn); + // sleep to propagate potential remaining acks + return seastar::sleep(50ms + ).then([this] { + return seastar::smp::submit_to( + tracked_conn->get_shard_id(), [tracked_conn=tracked_conn] { + assert(tracked_conn->get_shard_id() == seastar::this_shard_id()); + tracked_conn->mark_down(); + }); + }).then([] { + // sleep to wait for markdown propagate to the primary sid + return seastar::sleep(100ms); + }); + } + + seastar::future<> wait_blocked() { + logger().info("[Test] wait_blocked() ..."); + assert(seastar::this_shard_id() == primary_sid); + return interceptor.blocker.wait_blocked(); + } + + void unblock() { + logger().info("[Test] unblock()"); + assert(seastar::this_shard_id() == primary_sid); + return interceptor.blocker.unblock(); + } + + seastar::future<> wait_replaced(unsigned count) { + logger().info("[Test] wait_replaced({}) ...", count); + return wait_ready(0, count, false); + } + + seastar::future<> wait_established() { + logger().info("[Test] wait_established() ..."); + return wait_ready(0, 0, true); + } + + seastar::future<std::reference_wrapper<ConnResults>> + wait_results(unsigned count) { + logger().info("[Test] wait_result({}) ...", count); + return wait_ready(count, 0, true).then([this] { + return std::reference_wrapper<ConnResults>(interceptor.results); + }); + } + + bool is_standby() { + assert(seastar::this_shard_id() == primary_sid); + ceph_assert(tracked_conn); + return tracked_conn->is_protocol_standby(); + } +}; + +class FailoverTest : public Dispatcher { + crimson::auth::DummyAuthClientServer dummy_auth; + MessengerRef cmd_msgr; + ConnectionRef cmd_conn; + const entity_addr_t test_addr; + const entity_addr_t test_peer_addr; + + std::optional<seastar::promise<>> recv_pong; + std::optional<seastar::promise<>> recv_cmdreply; + + std::unique_ptr<FailoverSuite> test_suite; + + std::optional<seastar::future<>> ms_dispatch(ConnectionRef c, MessageRef m) override { + switch (m->get_type()) { + case CEPH_MSG_PING: + ceph_assert(recv_pong); + recv_pong->set_value(); + recv_pong = std::nullopt; + break; + case MSG_COMMAND_REPLY: + ceph_assert(recv_cmdreply); + recv_cmdreply->set_value(); + recv_cmdreply = std::nullopt; + break; + case MSG_COMMAND: { + auto m_cmd = boost::static_pointer_cast<MCommand>(m); + ceph_assert(static_cast<cmd_t>(m_cmd->cmd[0][0]) == cmd_t::suite_recv_op); + ceph_assert(test_suite); + test_suite->notify_peer_reply(); + break; + } + default: + logger().error("{} got unexpected msg from cmd server: {}", *c, *m); + ceph_abort(); + } + return {seastar::now()}; + } + + private: + seastar::future<> prepare_cmd( + cmd_t cmd, + std::function<void(MCommand&)> + f_prepare = [] (auto& m) { return; }) { + assert(!recv_cmdreply); + recv_cmdreply = seastar::promise<>(); + auto fut = recv_cmdreply->get_future(); + auto m = crimson::make_message<MCommand>(); + m->cmd.emplace_back(1, static_cast<char>(cmd)); + f_prepare(*m); + return cmd_conn->send(std::move(m)).then([fut = std::move(fut)] () mutable { + return std::move(fut); + }); + } + + seastar::future<> start_peer(policy_t peer_policy) { + return prepare_cmd(cmd_t::suite_start, + [peer_policy] (auto& m) { + m.cmd.emplace_back(1, static_cast<char>(peer_policy)); + }); + } + + seastar::future<> stop_peer() { + return prepare_cmd(cmd_t::suite_stop); + } + + seastar::future<> pingpong() { + assert(!recv_pong); + recv_pong = seastar::promise<>(); + auto fut = recv_pong->get_future(); + return cmd_conn->send(crimson::make_message<MPing>() + ).then([fut = std::move(fut)] () mutable { + return std::move(fut); + }); + } + + seastar::future<> init(entity_addr_t cmd_peer_addr) { + cmd_msgr->set_default_policy(SocketPolicy::lossy_client(0)); + cmd_msgr->set_auth_client(&dummy_auth); + cmd_msgr->set_auth_server(&dummy_auth); + return cmd_msgr->start({this}).then([this, cmd_peer_addr] { + logger().info("CmdCli connect to CmdSrv({}) ...", cmd_peer_addr); + cmd_conn = cmd_msgr->connect(cmd_peer_addr, entity_name_t::TYPE_OSD); + return pingpong(); + }); + } + + public: + FailoverTest(MessengerRef cmd_msgr, + entity_addr_t test_addr, + entity_addr_t test_peer_addr) + : cmd_msgr(cmd_msgr), + test_addr(test_addr), + test_peer_addr(test_peer_addr) { } + + seastar::future<> shutdown() { + logger().info("CmdCli shutdown..."); + assert(!recv_cmdreply); + auto m = crimson::make_message<MCommand>(); + m->cmd.emplace_back(1, static_cast<char>(cmd_t::shutdown)); + return cmd_conn->send(std::move(m)).then([] { + return seastar::sleep(200ms); + }).then([this] { + cmd_msgr->stop(); + return cmd_msgr->shutdown(); + }); + } + + static seastar::future<seastar::lw_shared_ptr<FailoverTest>> + create(entity_addr_t test_addr, + entity_addr_t cmd_peer_addr, + entity_addr_t test_peer_addr) { + auto test = seastar::make_lw_shared<FailoverTest>( + Messenger::create( + entity_name_t::OSD(CMD_CLI_OSD), + "CmdCli", + CMD_CLI_NONCE, + true), + test_addr, test_peer_addr); + return test->init(cmd_peer_addr).then([test] { + logger().info("CmdCli ready"); + return test; + }); + } + + // called by tests + public: + seastar::future<> run_suite( + std::string name, + const TestInterceptor& interceptor, + policy_t test_policy, + policy_t peer_policy, + std::function<seastar::future<>(FailoverSuite&)>&& f) { + logger().info("\n\n[{}]", name); + ceph_assert(!test_suite); + SocketPolicy test_policy_ = to_socket_policy(test_policy); + return ShardedGates::create( + ).then([this, test_policy_, peer_policy, interceptor, + f=std::move(f)](auto *gates) mutable { + return FailoverSuite::create( + test_addr, test_policy_, test_peer_addr, interceptor, *gates + ).then([this, peer_policy, f = std::move(f)](auto suite) mutable { + ceph_assert(suite->get_addr() == test_addr); + test_suite.swap(suite); + return start_peer(peer_policy + ).then([this, f = std::move(f)] { + return f(*test_suite); + }).then([this] { + test_suite->post_check(); + logger().info("\n[SUCCESS]"); + }).handle_exception([this](auto eptr) { + logger().info("\n[FAIL: {}]", eptr); + test_suite->dump_results(); + throw; + }).then([this] { + return stop_peer(); + }).then([this] { + return test_suite->shutdown( + ).then([this] { + test_suite.reset(); + }); + }); + }).then([gates] { + return gates->close(); + }); + }); + } + + seastar::future<> peer_connect_me() { + logger().info("[Test] peer_connect_me({})", test_addr); + return prepare_cmd(cmd_t::suite_connect_me, + [this] (auto& m) { + m.cmd.emplace_back(fmt::format("{}", test_addr)); + }); + } + + seastar::future<> peer_send_me() { + logger().info("[Test] peer_send_me()"); + ceph_assert(test_suite); + test_suite->needs_receive(); + return prepare_cmd(cmd_t::suite_send_me); + } + + seastar::future<> try_peer_send_me() { + logger().info("[Test] try_peer_send_me()"); + ceph_assert(test_suite); + return prepare_cmd(cmd_t::suite_send_me); + } + + seastar::future<> send_bidirectional() { + ceph_assert(test_suite); + return test_suite->send_peer().then([this] { + return peer_send_me(); + }); + } + + seastar::future<> peer_keepalive_me() { + logger().info("[Test] peer_keepalive_me()"); + ceph_assert(test_suite); + return prepare_cmd(cmd_t::suite_keepalive_me); + } + + seastar::future<> markdown_peer() { + logger().info("[Test] markdown_peer() in 150ms ..."); + // sleep to propagate potential remaining acks + return seastar::sleep(50ms + ).then([this] { + return prepare_cmd(cmd_t::suite_markdown); + }).then([] { + // sleep awhile for peer markdown propagated + return seastar::sleep(100ms); + }); + } +}; + +class FailoverSuitePeer : public Dispatcher { + using cb_t = std::function<seastar::future<>()>; + crimson::auth::DummyAuthClientServer dummy_auth; + MessengerRef peer_msgr; + cb_t op_callback; + + ConnectionRef tracked_conn; + unsigned pending_send = 0; + + std::optional<seastar::future<>> ms_dispatch(ConnectionRef conn, MessageRef m) override { + logger().info("[TestPeer] got op from Test"); + ceph_assert(m->get_type() == CEPH_MSG_OSD_OP); + std::ignore = op_callback(); + return {seastar::now()}; + } + + void ms_handle_accept( + ConnectionRef conn, + seastar::shard_id prv_shard, + bool is_replace) override { + assert(prv_shard == seastar::this_shard_id()); + logger().info("[TestPeer] got accept from Test"); + + if (tracked_conn && + !tracked_conn->is_protocol_closed() && + tracked_conn != conn) { + logger().error("[TestPeer] {} got accepted, but there's already a valid traced_conn {}", + *conn, *tracked_conn); + } + tracked_conn = conn; + std::ignore = flush_pending_send(); + } + + void ms_handle_reset(ConnectionRef conn, bool is_replace) override { + logger().info("[TestPeer] got reset from Test"); + } + + private: + seastar::future<> init(entity_addr_t test_peer_addr, SocketPolicy policy) { + peer_msgr->set_default_policy(policy); + peer_msgr->set_auth_client(&dummy_auth); + peer_msgr->set_auth_server(&dummy_auth); + return peer_msgr->bind(entity_addrvec_t{test_peer_addr}).safe_then([this] { + return peer_msgr->start({this}); + }, Messenger::bind_ertr::all_same_way([test_peer_addr] (const std::error_code& e) { + logger().error("FailoverSuitePeer: " + "there is another instance running at {}", test_peer_addr); + ceph_abort(); + })); + } + + seastar::future<> send_op() { + ceph_assert(tracked_conn); + if (tracked_conn->is_protocol_closed()) { + logger().error("[TestPeer] send op but the connection is closed -- {}", + *tracked_conn); + } + + pg_t pgid; + object_locator_t oloc; + hobject_t hobj(object_t(), oloc.key, CEPH_NOSNAP, pgid.ps(), + pgid.pool(), oloc.nspace); + spg_t spgid(pgid); + return tracked_conn->send(crimson::make_message<MOSDOp>(0, 0, hobj, spgid, 0, 0, 0)); + } + + seastar::future<> flush_pending_send() { + if (pending_send != 0) { + logger().info("[TestPeer] flush sending {} ops", pending_send); + } + ceph_assert(tracked_conn); + return seastar::do_until( + [this] { return pending_send == 0; }, + [this] { + --pending_send; + return send_op(); + }); + } + + public: + FailoverSuitePeer(MessengerRef peer_msgr, cb_t op_callback) + : peer_msgr(peer_msgr), + op_callback(op_callback) { } + + seastar::future<> shutdown() { + peer_msgr->stop(); + return peer_msgr->shutdown(); + } + + seastar::future<> connect_peer(entity_addr_t test_addr_decoded) { + logger().info("[TestPeer] connect_peer({})", test_addr_decoded); + auto conn = peer_msgr->connect(test_addr_decoded, entity_name_t::TYPE_OSD); + + if (tracked_conn) { + if (tracked_conn->is_protocol_closed()) { + logger().info("[TestPeer] this is a new session" + " replacing an closed one"); + ceph_assert(tracked_conn != conn); + } else { + logger().info("[TestPeer] this is not a new session"); + ceph_assert(tracked_conn == conn); + } + } else { + logger().info("[TestPeer] this is a new session"); + } + tracked_conn = conn; + + return flush_pending_send(); + } + + seastar::future<> send_peer() { + if (tracked_conn) { + logger().info("[TestPeer] send_peer()"); + ceph_assert(!pending_send); + return send_op(); + } else { + ++pending_send; + logger().info("[TestPeer] send_peer() (pending {})", pending_send); + return seastar::now(); + } + } + + seastar::future<> keepalive_peer() { + logger().info("[TestPeer] keepalive_peer()"); + ceph_assert(tracked_conn); + return tracked_conn->send_keepalive(); + } + + seastar::future<> markdown() { + logger().info("[TestPeer] markdown()"); + ceph_assert(tracked_conn); + tracked_conn->mark_down(); + return seastar::now(); + } + + static seastar::future<std::unique_ptr<FailoverSuitePeer>> + create(entity_addr_t test_peer_addr, const SocketPolicy& policy, cb_t op_callback) { + auto suite = std::make_unique<FailoverSuitePeer>( + Messenger::create( + entity_name_t::OSD(TEST_PEER_OSD), + "TestPeer", + TEST_PEER_NONCE, + true), + op_callback + ); + return suite->init(test_peer_addr, policy + ).then([suite = std::move(suite)] () mutable { + return std::move(suite); + }); + } +}; + +class FailoverTestPeer : public Dispatcher { + crimson::auth::DummyAuthClientServer dummy_auth; + MessengerRef cmd_msgr; + ConnectionRef cmd_conn; + const entity_addr_t test_peer_addr; + std::unique_ptr<FailoverSuitePeer> test_suite; + + std::optional<seastar::future<>> ms_dispatch(ConnectionRef c, MessageRef m) override { + ceph_assert(cmd_conn == c); + switch (m->get_type()) { + case CEPH_MSG_PING: + std::ignore = c->send(crimson::make_message<MPing>()); + break; + case MSG_COMMAND: { + auto m_cmd = boost::static_pointer_cast<MCommand>(m); + auto cmd = static_cast<cmd_t>(m_cmd->cmd[0][0]); + if (cmd == cmd_t::shutdown) { + logger().info("CmdSrv shutdown..."); + // forwarded to FailoverTestPeer::wait() + cmd_msgr->stop(); + std::ignore = cmd_msgr->shutdown(); + } else { + std::ignore = handle_cmd(cmd, m_cmd).then([c] { + return c->send(crimson::make_message<MCommandReply>()); + }); + } + break; + } + default: + logger().error("{} got unexpected msg from cmd client: {}", *c, *m); + ceph_abort(); + } + return {seastar::now()}; + } + + void ms_handle_accept( + ConnectionRef conn, + seastar::shard_id prv_shard, + bool is_replace) override { + assert(prv_shard == seastar::this_shard_id()); + cmd_conn = conn; + } + + private: + seastar::future<> notify_recv_op() { + ceph_assert(cmd_conn); + auto m = crimson::make_message<MCommand>(); + m->cmd.emplace_back(1, static_cast<char>(cmd_t::suite_recv_op)); + return cmd_conn->send(std::move(m)); + } + + seastar::future<> handle_cmd(cmd_t cmd, MRef<MCommand> m_cmd) { + switch (cmd) { + case cmd_t::suite_start: { + ceph_assert(!test_suite); + auto policy = to_socket_policy(static_cast<policy_t>(m_cmd->cmd[1][0])); + return FailoverSuitePeer::create( + test_peer_addr, policy, [this] { return notify_recv_op(); } + ).then([this] (auto suite) { + test_suite.swap(suite); + }); + } + case cmd_t::suite_stop: + ceph_assert(test_suite); + return test_suite->shutdown().then([this] { + test_suite.reset(); + }); + case cmd_t::suite_connect_me: { + ceph_assert(test_suite); + entity_addr_t test_addr_decoded = entity_addr_t(); + test_addr_decoded.parse(m_cmd->cmd[1].c_str(), nullptr); + return test_suite->connect_peer(test_addr_decoded); + } + case cmd_t::suite_send_me: + ceph_assert(test_suite); + return test_suite->send_peer(); + case cmd_t::suite_keepalive_me: + ceph_assert(test_suite); + return test_suite->keepalive_peer(); + case cmd_t::suite_markdown: + ceph_assert(test_suite); + return test_suite->markdown(); + default: + logger().error("TestPeer got unexpected command {} from Test", + fmt::ptr(m_cmd.get())); + ceph_abort(); + return seastar::now(); + } + } + + seastar::future<> init(entity_addr_t cmd_peer_addr) { + cmd_msgr->set_default_policy(SocketPolicy::stateless_server(0)); + cmd_msgr->set_auth_client(&dummy_auth); + cmd_msgr->set_auth_server(&dummy_auth); + return cmd_msgr->bind(entity_addrvec_t{cmd_peer_addr}).safe_then([this] { + return cmd_msgr->start({this}); + }, Messenger::bind_ertr::all_same_way([cmd_peer_addr] (const std::error_code& e) { + logger().error("FailoverTestPeer: " + "there is another instance running at {}", cmd_peer_addr); + ceph_abort(); + })); + } + + public: + FailoverTestPeer(MessengerRef cmd_msgr, + entity_addr_t test_peer_addr) + : cmd_msgr(cmd_msgr), + test_peer_addr(test_peer_addr) { } + + seastar::future<> wait() { + return cmd_msgr->wait(); + } + + static seastar::future<std::unique_ptr<FailoverTestPeer>> + create(entity_addr_t cmd_peer_addr, entity_addr_t test_peer_addr) { + auto test_peer = std::make_unique<FailoverTestPeer>( + Messenger::create( + entity_name_t::OSD(CMD_SRV_OSD), + "CmdSrv", + CMD_SRV_NONCE, + true), + test_peer_addr); + return test_peer->init(cmd_peer_addr + ).then([test_peer = std::move(test_peer)] () mutable { + logger().info("CmdSrv ready"); + return std::move(test_peer); + }); + } +}; + +seastar::future<> +test_v2_lossy_early_connect_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {custom_bp_t::SOCKET_CONNECTING}, + {custom_bp_t::BANNER_WRITE}, + {custom_bp_t::BANNER_READ}, + {custom_bp_t::BANNER_PAYLOAD_READ}, + {Tag::HELLO, bp_type_t::WRITE}, + {Tag::HELLO, bp_type_t::READ}, + {Tag::AUTH_REQUEST, bp_type_t::WRITE}, + {Tag::AUTH_DONE, bp_type_t::READ}, + {Tag::AUTH_SIGNATURE, bp_type_t::WRITE}, + {Tag::AUTH_SIGNATURE, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossy_early_connect_fault -- {}", bp), + interceptor, + policy_t::lossy_client, + policy_t::stateless_server, + [] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossy_connect_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {Tag::CLIENT_IDENT, bp_type_t::WRITE}, + {Tag::SERVER_IDENT, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossy_connect_fault -- {}", bp), + interceptor, + policy_t::lossy_client, + policy_t::stateless_server, + [] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 2, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossy_connected_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {Tag::MESSAGE, bp_type_t::WRITE}, + {Tag::MESSAGE, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossy_connected_fault -- {}", bp), + interceptor, + policy_t::lossy_client, + policy_t::stateless_server, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(1, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossy_early_accept_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {custom_bp_t::BANNER_WRITE}, + {custom_bp_t::BANNER_READ}, + {custom_bp_t::BANNER_PAYLOAD_READ}, + {Tag::HELLO, bp_type_t::WRITE}, + {Tag::HELLO, bp_type_t::READ}, + {Tag::AUTH_REQUEST, bp_type_t::READ}, + {Tag::AUTH_DONE, bp_type_t::WRITE}, + {Tag::AUTH_SIGNATURE, bp_type_t::WRITE}, + {Tag::AUTH_SIGNATURE, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossy_early_accept_fault -- {}", bp), + interceptor, + policy_t::stateless_server, + policy_t::lossy_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossy_accept_fault(FailoverTest& test) { + auto bp = Breakpoint{Tag::CLIENT_IDENT, bp_type_t::READ}; + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossy_accept_fault -- {}", bp), + interceptor, + policy_t::stateless_server, + policy_t::lossy_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_lossy_establishing_fault(FailoverTest& test) { + auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::WRITE}; + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossy_establishing_fault -- {}", bp), + interceptor, + policy_t::stateless_server, + policy_t::lossy_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(1, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_lossy_accepted_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {Tag::MESSAGE, bp_type_t::WRITE}, + {Tag::MESSAGE, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossy_accepted_fault -- {}", bp), + interceptor, + policy_t::stateless_server, + policy_t::lossy_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(1, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossless_connect_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {Tag::CLIENT_IDENT, bp_type_t::WRITE}, + {Tag::SERVER_IDENT, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossless_connect_fault -- {}", bp), + interceptor, + policy_t::lossless_client, + policy_t::stateful_server, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 2, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossless_connected_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {Tag::MESSAGE, bp_type_t::WRITE}, + {Tag::MESSAGE, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossless_connected_fault -- {}", bp), + interceptor, + policy_t::lossless_client, + policy_t::stateful_server, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 1, 1, 2); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossless_connected_fault2(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {Tag::ACK, bp_type_t::READ}, + {Tag::ACK, bp_type_t::WRITE}, + {Tag::KEEPALIVE2, bp_type_t::READ}, + {Tag::KEEPALIVE2, bp_type_t::WRITE}, + {Tag::KEEPALIVE2_ACK, bp_type_t::READ}, + {Tag::KEEPALIVE2_ACK, bp_type_t::WRITE}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossless_connected_fault2 -- {}", bp), + interceptor, + policy_t::lossless_client, + policy_t::stateful_server, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_established(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.keepalive_peer(); + }).then([&suite] { + return suite.wait_established(); + }).then([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_keepalive_me(); + }).then([&suite] { + return suite.wait_established(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_established(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_established(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 1, 1, 2); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossless_reconnect_fault(FailoverTest& test) { + return seastar::do_with(std::vector<std::pair<Breakpoint, Breakpoint>>{ + {{Tag::MESSAGE, bp_type_t::WRITE}, + {Tag::SESSION_RECONNECT, bp_type_t::WRITE}}, + {{Tag::MESSAGE, bp_type_t::WRITE}, + {Tag::SESSION_RECONNECT_OK, bp_type_t::READ}}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp_pair) { + TestInterceptor interceptor; + interceptor.make_fault(bp_pair.first); + interceptor.make_fault(bp_pair.second); + return test.run_suite( + fmt::format("test_v2_lossless_reconnect_fault -- {}, {}", + bp_pair.first, bp_pair.second), + interceptor, + policy_t::lossless_client, + policy_t::stateful_server, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(3, 1, 2, 2); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossless_accept_fault(FailoverTest& test) { + auto bp = Breakpoint{Tag::CLIENT_IDENT, bp_type_t::READ}; + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossless_accept_fault -- {}", bp), + interceptor, + policy_t::stateful_server, + policy_t::lossless_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_lossless_establishing_fault(FailoverTest& test) { + auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::WRITE}; + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossless_establishing_fault -- {}", bp), + interceptor, + policy_t::stateful_server, + policy_t::lossless_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_lossless_accepted_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {Tag::MESSAGE, bp_type_t::WRITE}, + {Tag::MESSAGE, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_lossless_accepted_fault -- {}", bp), + interceptor, + policy_t::stateful_server, + policy_t::lossless_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0); + results[1].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_lossless_reaccept_fault(FailoverTest& test) { + return seastar::do_with(std::vector<std::pair<Breakpoint, Breakpoint>>{ + {{Tag::MESSAGE, bp_type_t::READ}, + {Tag::SESSION_RECONNECT, bp_type_t::READ}}, + {{Tag::MESSAGE, bp_type_t::READ}, + {Tag::SESSION_RECONNECT_OK, bp_type_t::WRITE}}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp_pair) { + TestInterceptor interceptor; + interceptor.make_fault(bp_pair.first); + interceptor.make_fault(bp_pair.second); + return test.run_suite( + fmt::format("test_v2_lossless_reaccept_fault -- {}, {}", + bp_pair.first, bp_pair.second), + interceptor, + policy_t::stateful_server, + policy_t::lossless_client, + [&test, bp = bp_pair.second] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.send_bidirectional(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(3); + }).then([bp] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + if (bp == Breakpoint{Tag::SESSION_RECONNECT, bp_type_t::READ}) { + results[0].assert_accept(1, 1, 0, 2); + } else { + results[0].assert_accept(1, 1, 0, 3); + } + results[0].assert_reset(0, 0); + if (bp == Breakpoint{Tag::SESSION_RECONNECT, bp_type_t::READ}) { + results[1].assert_state_at(conn_state_t::closed); + } else { + results[1].assert_state_at(conn_state_t::replaced); + } + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0, 1, 0); + results[1].assert_reset(0, 0); + results[2].assert_state_at(conn_state_t::replaced); + results[2].assert_connect(0, 0, 0, 0); + results[2].assert_accept(1, 0, 1, 0); + results[2].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_peer_connect_fault(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {Tag::CLIENT_IDENT, bp_type_t::WRITE}, + {Tag::SERVER_IDENT, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_peer_connect_fault -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 2, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_peer_accept_fault(FailoverTest& test) { + auto bp = Breakpoint{Tag::CLIENT_IDENT, bp_type_t::READ}; + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_peer_accept_fault -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_peer_establishing_fault(FailoverTest& test) { + auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::WRITE}; + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_peer_establishing_fault -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_peer_connected_fault_reconnect(FailoverTest& test) { + auto bp = Breakpoint{Tag::MESSAGE, bp_type_t::WRITE}; + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_peer_connected_fault_reconnect -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 1, 1, 2); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_peer_connected_fault_reaccept(FailoverTest& test) { + auto bp = Breakpoint{Tag::MESSAGE, bp_type_t::READ}; + TestInterceptor interceptor; + interceptor.make_fault(bp); + return test.run_suite( + fmt::format("test_v2_peer_connected_fault_reaccept -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 1); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0, 1, 0); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<bool> +check_peer_wins(FailoverTest& test) { + return seastar::do_with(bool(), [&test] (auto& ret) { + return test.run_suite("check_peer_wins", + TestInterceptor(), + policy_t::lossy_client, + policy_t::stateless_server, + [&ret] (FailoverSuite& suite) { + return suite.connect_peer().then([&suite] { + return suite.wait_results(1); + }).then([&ret] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + ret = results[0].conn->peer_wins(); + logger().info("check_peer_wins: {}", ret); + }); + }).then([&ret] { + return ret; + }); + }); +} + +seastar::future<> +test_v2_racing_reconnect_acceptor_lose(FailoverTest& test) { + return seastar::do_with(std::vector<std::pair<unsigned, Breakpoint>>{ + {1, {Tag::SESSION_RECONNECT, bp_type_t::READ}}, + {2, {custom_bp_t::BANNER_WRITE}}, + {2, {custom_bp_t::BANNER_READ}}, + {2, {custom_bp_t::BANNER_PAYLOAD_READ}}, + {2, {Tag::HELLO, bp_type_t::WRITE}}, + {2, {Tag::HELLO, bp_type_t::READ}}, + {2, {Tag::AUTH_REQUEST, bp_type_t::READ}}, + {2, {Tag::AUTH_DONE, bp_type_t::WRITE}}, + {2, {Tag::AUTH_SIGNATURE, bp_type_t::WRITE}}, + {2, {Tag::AUTH_SIGNATURE, bp_type_t::READ}}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + // fault acceptor + interceptor.make_fault({Tag::MESSAGE, bp_type_t::READ}); + // block acceptor + interceptor.make_block(bp.second, bp.first); + return test.run_suite( + fmt::format("test_v2_racing_reconnect_acceptor_lose -- {}({})", + bp.second, bp.first), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_established(); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(1, 0, 1, 1); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::closed); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0); + results[1].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_racing_reconnect_acceptor_win(FailoverTest& test) { + return seastar::do_with(std::vector<std::pair<unsigned, Breakpoint>>{ + {1, {Tag::SESSION_RECONNECT, bp_type_t::WRITE}}, + {2, {custom_bp_t::SOCKET_CONNECTING}}, + {2, {custom_bp_t::BANNER_WRITE}}, + {2, {custom_bp_t::BANNER_READ}}, + {2, {custom_bp_t::BANNER_PAYLOAD_READ}}, + {2, {Tag::HELLO, bp_type_t::WRITE}}, + {2, {Tag::HELLO, bp_type_t::READ}}, + {2, {Tag::AUTH_REQUEST, bp_type_t::WRITE}}, + {2, {Tag::AUTH_DONE, bp_type_t::READ}}, + {2, {Tag::AUTH_SIGNATURE, bp_type_t::WRITE}}, + {2, {Tag::AUTH_SIGNATURE, bp_type_t::READ}}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + // fault connector + interceptor.make_fault({Tag::MESSAGE, bp_type_t::WRITE}); + // block connector + interceptor.make_block(bp.second, bp.first); + return test.run_suite( + fmt::format("test_v2_racing_reconnect_acceptor_win -- {}({})", + bp.second, bp.first), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_replaced(1); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 1); + results[0].assert_accept(0, 0, 0, 1); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0, 1, 0); + results[1].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_racing_connect_acceptor_lose(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {custom_bp_t::BANNER_WRITE}, + {custom_bp_t::BANNER_READ}, + {custom_bp_t::BANNER_PAYLOAD_READ}, + {Tag::HELLO, bp_type_t::WRITE}, + {Tag::HELLO, bp_type_t::READ}, + {Tag::AUTH_REQUEST, bp_type_t::READ}, + {Tag::AUTH_DONE, bp_type_t::WRITE}, + {Tag::AUTH_SIGNATURE, bp_type_t::WRITE}, + {Tag::AUTH_SIGNATURE, bp_type_t::READ}, + {Tag::CLIENT_IDENT, bp_type_t::READ}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + // block acceptor + interceptor.make_block(bp); + return test.run_suite( + fmt::format("test_v2_racing_connect_acceptor_lose -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_established(); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(1, 1, 0, 1); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_racing_connect_acceptor_win(FailoverTest& test) { + return seastar::do_with(std::vector<Breakpoint>{ + {custom_bp_t::SOCKET_CONNECTING}, + {custom_bp_t::BANNER_WRITE}, + {custom_bp_t::BANNER_READ}, + {custom_bp_t::BANNER_PAYLOAD_READ}, + {Tag::HELLO, bp_type_t::WRITE}, + {Tag::HELLO, bp_type_t::READ}, + {Tag::AUTH_REQUEST, bp_type_t::WRITE}, + {Tag::AUTH_DONE, bp_type_t::READ}, + {Tag::AUTH_SIGNATURE, bp_type_t::WRITE}, + {Tag::AUTH_SIGNATURE, bp_type_t::READ}, + {Tag::CLIENT_IDENT, bp_type_t::WRITE}, + }, [&test] (auto& failure_cases) { + return seastar::do_for_each(failure_cases, [&test] (auto bp) { + TestInterceptor interceptor; + // block connector + interceptor.make_block(bp); + return test.run_suite( + fmt::format("test_v2_racing_connect_acceptor_win -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_replaced(1); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(1, 0); + results[0].assert_accept(0, 0, 0, 1); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + }); + }); + }); + }); +} + +seastar::future<> +test_v2_racing_connect_reconnect_lose(FailoverTest& test) { + TestInterceptor interceptor; + interceptor.make_fault({Tag::SERVER_IDENT, bp_type_t::READ}); + interceptor.make_block({Tag::CLIENT_IDENT, bp_type_t::WRITE}, 2); + return test.run_suite("test_v2_racing_connect_reconnect_lose", + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_replaced(1); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 2, 0, 0); + results[0].assert_accept(0, 0, 0, 1); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 1, 0); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_racing_connect_reconnect_win(FailoverTest& test) { + TestInterceptor interceptor; + interceptor.make_fault({Tag::SERVER_IDENT, bp_type_t::READ}); + interceptor.make_block({Tag::SESSION_RECONNECT, bp_type_t::READ}); + return test.run_suite("test_v2_racing_connect_reconnect_win", + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_established(); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 2, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::closed); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0, 1, 0); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_stale_connect(FailoverTest& test) { + auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::READ}; + TestInterceptor interceptor; + interceptor.make_stall(bp); + return test.run_suite( + fmt::format("test_v2_stale_connect -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_replaced(1); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(1, 1, 0, 0); + results[0].assert_accept(0, 0, 0, 1); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 1, 0); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_stale_reconnect(FailoverTest& test) { + auto bp = Breakpoint{Tag::SESSION_RECONNECT_OK, bp_type_t::READ}; + TestInterceptor interceptor; + interceptor.make_fault({Tag::MESSAGE, bp_type_t::WRITE}); + interceptor.make_stall(bp); + return test.run_suite( + fmt::format("test_v2_stale_reconnect -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_replaced(1); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(2, 1, 1, 1); + results[0].assert_accept(0, 0, 0, 1); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0, 1, 0); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_stale_accept(FailoverTest& test) { + auto bp = Breakpoint{Tag::CLIENT_IDENT, bp_type_t::READ}; + TestInterceptor interceptor; + interceptor.make_stall(bp); + return test.run_suite( + fmt::format("test_v2_stale_accept -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_established(); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_stale_establishing(FailoverTest& test) { + auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::WRITE}; + TestInterceptor interceptor; + interceptor.make_stall(bp); + return test.run_suite( + fmt::format("test_v2_stale_establishing -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_replaced(1); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0); + results[1].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_stale_reaccept(FailoverTest& test) { + auto bp = Breakpoint{Tag::SESSION_RECONNECT_OK, bp_type_t::WRITE}; + TestInterceptor interceptor; + interceptor.make_fault({Tag::MESSAGE, bp_type_t::READ}); + interceptor.make_stall(bp); + return test.run_suite( + fmt::format("test_v2_stale_reaccept -- {}", bp), + interceptor, + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + return test.peer_send_me(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&suite] { + return suite.wait_blocked(); + }).then([] { + logger().info("[Test] block the broken REPLACING for 210ms..."); + return seastar::sleep(210ms); + }).then([&suite] { + suite.unblock(); + return suite.wait_results(3); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 3); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 0, 1, 0); + results[1].assert_reset(0, 0); + results[2].assert_state_at(conn_state_t::replaced); + results[2].assert_connect(0, 0, 0, 0); + results[2].assert_accept(1, 0); + results[2].assert_reset(0, 0); + ceph_assert(results[2].server_reconnect_attempts >= 1); + }); + }); +} + +seastar::future<> +test_v2_lossy_client(FailoverTest& test) { + return test.run_suite( + "test_v2_lossy_client", + TestInterceptor(), + policy_t::lossy_client, + policy_t::stateless_server, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + logger().info("-- 0 --"); + logger().info("[Test] setup connection..."); + return suite.connect_peer(); + }).then([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }).then([&suite] { + logger().info("-- 1 --"); + logger().info("[Test] client markdown..."); + return suite.markdown(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(1, 1, 0, 1); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 2 --"); + logger().info("[Test] server markdown..."); + return test.markdown_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::closed); + results[1].assert_connect(1, 1, 0, 1); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(1, 0); + }).then([&suite] { + logger().info("-- 3 --"); + logger().info("[Test] client reconnect..."); + return suite.connect_peer(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_results(3); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::closed); + results[1].assert_connect(1, 1, 0, 1); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(1, 0); + results[2].assert_state_at(conn_state_t::established); + results[2].assert_connect(1, 1, 0, 1); + results[2].assert_accept(0, 0, 0, 0); + results[2].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_stateless_server(FailoverTest& test) { + return test.run_suite( + "test_v2_stateless_server", + TestInterceptor(), + policy_t::stateless_server, + policy_t::lossy_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + logger().info("-- 0 --"); + logger().info("[Test] setup connection..."); + return test.peer_connect_me(); + }).then([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 1 --"); + logger().info("[Test] client markdown..."); + return test.markdown_peer(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(1, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + }).then([&suite] { + logger().info("-- 2 --"); + logger().info("[Test] server markdown..."); + return suite.markdown(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(1, 0); + results[1].assert_state_at(conn_state_t::closed); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 3 --"); + logger().info("[Test] client reconnect..."); + return test.peer_connect_me(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_results(3); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(1, 0); + results[1].assert_state_at(conn_state_t::closed); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 1); + results[1].assert_reset(0, 0); + results[2].assert_state_at(conn_state_t::established); + results[2].assert_connect(0, 0, 0, 0); + results[2].assert_accept(1, 1, 0, 1); + results[2].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_lossless_client(FailoverTest& test) { + return test.run_suite( + "test_v2_lossless_client", + TestInterceptor(), + policy_t::lossless_client, + policy_t::stateful_server, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + logger().info("-- 0 --"); + logger().info("[Test] setup connection..."); + return suite.connect_peer(); + }).then([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }).then([&suite] { + logger().info("-- 1 --"); + logger().info("[Test] client markdown..."); + return suite.markdown(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(1, 1, 0, 1); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 2 --"); + logger().info("[Test] server markdown..."); + return test.markdown_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(2, 2, 1, 2); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 1); + }).then([&suite] { + logger().info("-- 3 --"); + logger().info("[Test] client reconnect..."); + return suite.connect_peer(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(2, 2, 1, 2); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 1); + }); + }); +} + +seastar::future<> +test_v2_stateful_server(FailoverTest& test) { + return test.run_suite( + "test_v2_stateful_server", + TestInterceptor(), + policy_t::stateful_server, + policy_t::lossless_client, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + logger().info("-- 0 --"); + logger().info("[Test] setup connection..."); + return test.peer_connect_me(); + }).then([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 1 --"); + logger().info("[Test] client markdown..."); + return test.markdown_peer(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 1); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + }).then([&suite] { + logger().info("-- 2 --"); + logger().info("[Test] server markdown..."); + return suite.markdown(); + }).then([&suite] { + return suite.wait_results(3); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 1); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + results[2].assert_state_at(conn_state_t::established); + results[2].assert_connect(0, 0, 0, 0); + results[2].assert_accept(1, 1, 1, 1); + results[2].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 3 --"); + logger().info("[Test] client reconnect..."); + return test.peer_connect_me(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_results(3); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 1); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + results[2].assert_state_at(conn_state_t::established); + results[2].assert_connect(0, 0, 0, 0); + results[2].assert_accept(1, 1, 1, 1); + results[2].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_peer_reuse_connector(FailoverTest& test) { + return test.run_suite( + "test_v2_peer_reuse_connector", + TestInterceptor(), + policy_t::lossless_peer_reuse, + policy_t::lossless_peer_reuse, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + logger().info("-- 0 --"); + logger().info("[Test] setup connection..."); + return suite.connect_peer(); + }).then([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }).then([&suite] { + logger().info("-- 1 --"); + logger().info("[Test] connector markdown..."); + return suite.markdown(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(1, 1, 0, 1); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 2 --"); + logger().info("[Test] acceptor markdown..."); + return test.markdown_peer(); + }).then([&suite] { + ceph_assert(suite.is_standby()); + logger().info("-- 3 --"); + logger().info("[Test] connector reconnect..."); + return suite.connect_peer(); + }).then([&suite] { + return suite.try_send_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(2, 2, 1, 2); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 1); + }); + }); +} + +seastar::future<> +test_v2_peer_reuse_acceptor(FailoverTest& test) { + return test.run_suite( + "test_v2_peer_reuse_acceptor", + TestInterceptor(), + policy_t::lossless_peer_reuse, + policy_t::lossless_peer_reuse, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + logger().info("-- 0 --"); + logger().info("[Test] setup connection..."); + return test.peer_connect_me(); + }).then([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 1 --"); + logger().info("[Test] connector markdown..."); + return test.markdown_peer(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 1); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + }).then([&suite] { + logger().info("-- 2 --"); + logger().info("[Test] acceptor markdown..."); + return suite.markdown(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 1); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 3 --"); + logger().info("[Test] connector reconnect..."); + return test.peer_connect_me(); + }).then([&test] { + return test.try_peer_send_me(); + }).then([&suite] { + return suite.wait_results(3); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 1); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + results[2].assert_state_at(conn_state_t::established); + results[2].assert_connect(0, 0, 0, 0); + results[2].assert_accept(1, 1, 1, 1); + results[2].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_lossless_peer_connector(FailoverTest& test) { + return test.run_suite( + "test_v2_lossless_peer_connector", + TestInterceptor(), + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&suite] { + logger().info("-- 0 --"); + logger().info("[Test] setup connection..."); + return suite.connect_peer(); + }).then([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + }).then([&suite] { + logger().info("-- 1 --"); + logger().info("[Test] connector markdown..."); + return suite.markdown(); + }).then([&suite] { + return suite.connect_peer(); + }).then([&suite] { + return suite.send_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(1, 1, 0, 1); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 2 --"); + logger().info("[Test] acceptor markdown..."); + return test.markdown_peer(); + }).then([&suite] { + ceph_assert(suite.is_standby()); + logger().info("-- 3 --"); + logger().info("[Test] connector reconnect..."); + return suite.connect_peer(); + }).then([&suite] { + return suite.try_send_peer(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(1, 1, 0, 1); + results[0].assert_accept(0, 0, 0, 0); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::established); + results[1].assert_connect(2, 2, 1, 2); + results[1].assert_accept(0, 0, 0, 0); + results[1].assert_reset(0, 1); + }); + }); +} + +seastar::future<> +test_v2_lossless_peer_acceptor(FailoverTest& test) { + return test.run_suite( + "test_v2_lossless_peer_acceptor", + TestInterceptor(), + policy_t::lossless_peer, + policy_t::lossless_peer, + [&test] (FailoverSuite& suite) { + return seastar::futurize_invoke([&test] { + logger().info("-- 0 --"); + logger().info("[Test] setup connection..."); + return test.peer_connect_me(); + }).then([&test] { + return test.send_bidirectional(); + }).then([&suite] { + return suite.wait_results(1); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 1); + results[0].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 1 --"); + logger().info("[Test] connector markdown..."); + return test.markdown_peer(); + }).then([&test] { + return test.peer_connect_me(); + }).then([&test] { + return test.peer_send_me(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::established); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + }).then([&suite] { + logger().info("-- 2 --"); + logger().info("[Test] acceptor markdown..."); + return suite.markdown(); + }).then([&suite] { + return suite.wait_results(2); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + }).then([&test] { + logger().info("-- 3 --"); + logger().info("[Test] connector reconnect..."); + return test.peer_connect_me(); + }).then([&test] { + return test.try_peer_send_me(); + }).then([&suite] { + return suite.wait_results(3); + }).then([] (ConnResults& results) { + results[0].assert_state_at(conn_state_t::closed); + results[0].assert_connect(0, 0, 0, 0); + results[0].assert_accept(1, 1, 0, 2); + results[0].assert_reset(0, 0); + results[1].assert_state_at(conn_state_t::replaced); + results[1].assert_connect(0, 0, 0, 0); + results[1].assert_accept(1, 1, 0, 0); + results[1].assert_reset(0, 0); + results[2].assert_state_at(conn_state_t::established); + results[2].assert_connect(0, 0, 0, 0); + results[2].assert_accept(1, 1, 1, 1); + results[2].assert_reset(0, 0); + }); + }); +} + +seastar::future<> +test_v2_protocol(entity_addr_t test_addr, + entity_addr_t cmd_peer_addr, + entity_addr_t test_peer_addr, + bool test_peer_islocal, + bool peer_wins) { + ceph_assert_always(test_addr.is_msgr2()); + ceph_assert_always(cmd_peer_addr.is_msgr2()); + ceph_assert_always(test_peer_addr.is_msgr2()); + + if (test_peer_islocal) { + // initiate crimson test peer locally + logger().info("test_v2_protocol: start local TestPeer at {}...", cmd_peer_addr); + return FailoverTestPeer::create(cmd_peer_addr, test_peer_addr + ).then([test_addr, cmd_peer_addr, test_peer_addr, peer_wins](auto peer) { + return test_v2_protocol( + test_addr, + cmd_peer_addr, + test_peer_addr, + false, + peer_wins + ).then([peer = std::move(peer)] () mutable { + return peer->wait().then([peer = std::move(peer)] {}); + }); + }).handle_exception([] (auto eptr) { + logger().error("FailoverTestPeer failed: got exception {}", eptr); + throw; + }); + } + + return FailoverTest::create(test_addr, cmd_peer_addr, test_peer_addr + ).then([peer_wins](auto test) { + return seastar::futurize_invoke([test] { + return test_v2_lossy_early_connect_fault(*test); + }).then([test] { + return test_v2_lossy_connect_fault(*test); + }).then([test] { + return test_v2_lossy_connected_fault(*test); + }).then([test] { + return test_v2_lossy_early_accept_fault(*test); + }).then([test] { + return test_v2_lossy_accept_fault(*test); + }).then([test] { + return test_v2_lossy_establishing_fault(*test); + }).then([test] { + return test_v2_lossy_accepted_fault(*test); + }).then([test] { + return test_v2_lossless_connect_fault(*test); + }).then([test] { + return test_v2_lossless_connected_fault(*test); + }).then([test] { + return test_v2_lossless_connected_fault2(*test); + }).then([test] { + return test_v2_lossless_reconnect_fault(*test); + }).then([test] { + return test_v2_lossless_accept_fault(*test); + }).then([test] { + return test_v2_lossless_establishing_fault(*test); + }).then([test] { + return test_v2_lossless_accepted_fault(*test); + }).then([test] { + return test_v2_lossless_reaccept_fault(*test); + }).then([test] { + return test_v2_peer_connect_fault(*test); + }).then([test] { + return test_v2_peer_accept_fault(*test); + }).then([test] { + return test_v2_peer_establishing_fault(*test); + }).then([test] { + return test_v2_peer_connected_fault_reconnect(*test); + }).then([test] { + return test_v2_peer_connected_fault_reaccept(*test); + }).then([test] { + return check_peer_wins(*test); + }).then([test, peer_wins](bool ret_peer_wins) { + ceph_assert(peer_wins == ret_peer_wins); + if (ret_peer_wins) { + return seastar::futurize_invoke([test] { + return test_v2_racing_connect_acceptor_win(*test); + }).then([test] { + return test_v2_racing_reconnect_acceptor_win(*test); + }); + } else { + return seastar::futurize_invoke([test] { + return test_v2_racing_connect_acceptor_lose(*test); + }).then([test] { + return test_v2_racing_reconnect_acceptor_lose(*test); + }); + } + }).then([test] { + return test_v2_racing_connect_reconnect_win(*test); + }).then([test] { + return test_v2_racing_connect_reconnect_lose(*test); + }).then([test] { + return test_v2_stale_connect(*test); + }).then([test] { + return test_v2_stale_reconnect(*test); + }).then([test] { + return test_v2_stale_accept(*test); + }).then([test] { + return test_v2_stale_establishing(*test); + }).then([test] { + return test_v2_stale_reaccept(*test); + }).then([test] { + return test_v2_lossy_client(*test); + }).then([test] { + return test_v2_stateless_server(*test); + }).then([test] { + return test_v2_lossless_client(*test); + }).then([test] { + return test_v2_stateful_server(*test); + }).then([test] { + return test_v2_peer_reuse_connector(*test); + }).then([test] { + return test_v2_peer_reuse_acceptor(*test); + }).then([test] { + return test_v2_lossless_peer_connector(*test); + }).then([test] { + return test_v2_lossless_peer_acceptor(*test); + }).then([test] { + return test->shutdown().then([test] {}); + }); + }).handle_exception([] (auto eptr) { + logger().error("FailoverTest failed: got exception {}", eptr); + throw; + }); +} + +} + +seastar::future<int> do_test(seastar::app_template& app) +{ + std::vector<const char*> args; + std::string cluster; + std::string conf_file_list; + auto init_params = ceph_argparse_early_args(args, + CEPH_ENTITY_TYPE_CLIENT, + &cluster, + &conf_file_list); + return crimson::common::sharded_conf().start( + init_params.name, cluster + ).then([] { + return local_conf().start(); + }).then([conf_file_list] { + return local_conf().parse_config_files(conf_file_list); + }).then([&app] { + auto&& config = app.configuration(); + verbose = config["verbose"].as<bool>(); + auto rounds = config["rounds"].as<unsigned>(); + auto keepalive_ratio = config["keepalive-ratio"].as<double>(); + auto testpeer_islocal = config["testpeer-islocal"].as<bool>(); + + entity_addr_t test_addr; + ceph_assert(test_addr.parse( + config["test-addr"].as<std::string>().c_str(), nullptr)); + test_addr.set_nonce(TEST_NONCE); + + entity_addr_t cmd_peer_addr; + ceph_assert(cmd_peer_addr.parse( + config["testpeer-addr"].as<std::string>().c_str(), nullptr)); + cmd_peer_addr.set_nonce(CMD_SRV_NONCE); + + entity_addr_t test_peer_addr = get_test_peer_addr(cmd_peer_addr); + bool peer_wins = (test_addr > test_peer_addr); + + logger().info("test configuration: verbose={}, rounds={}, keepalive_ratio={}, " + "test_addr={}, cmd_peer_addr={}, test_peer_addr={}, " + "testpeer_islocal={}, peer_wins={}, smp={}", + verbose, rounds, keepalive_ratio, + test_addr, cmd_peer_addr, test_peer_addr, + testpeer_islocal, peer_wins, + seastar::smp::count); + return test_echo(rounds, keepalive_ratio + ).then([] { + return test_preemptive_shutdown(); + }).then([test_addr, cmd_peer_addr, test_peer_addr, testpeer_islocal, peer_wins] { + return test_v2_protocol( + test_addr, + cmd_peer_addr, + test_peer_addr, + testpeer_islocal, + peer_wins); + }).then([] { + logger().info("All tests succeeded"); + // Seastar has bugs to have events undispatched during shutdown, + // which will result in memory leak and thus fail LeakSanitizer. + return seastar::sleep(100ms); + }); + }).then([] { + return crimson::common::sharded_conf().stop(); + }).then([] { + return 0; + }).handle_exception([] (auto eptr) { + logger().error("Test failed: got exception {}", eptr); + return 1; + }); +} + +int main(int argc, char** argv) +{ + seastar::app_template app; + app.add_options() + ("verbose,v", bpo::value<bool>()->default_value(false), + "chatty if true") + ("rounds", bpo::value<unsigned>()->default_value(512), + "number of pingpong rounds") + ("keepalive-ratio", bpo::value<double>()->default_value(0.1), + "ratio of keepalive in ping messages") + ("test-addr", bpo::value<std::string>()->default_value("v2:127.0.0.1:9014"), + "address of v2 failover tests") + ("testpeer-addr", bpo::value<std::string>()->default_value("v2:127.0.0.1:9012"), + "addresses of v2 failover testpeer" + " (This is CmdSrv address, and TestPeer address is at port+=1)") + ("testpeer-islocal", bpo::value<bool>()->default_value(true), + "create a local crimson testpeer, or connect to a remote testpeer"); + return app.run(argc, argv, [&app] { + // This test normally succeeds within 60 seconds, so kill it after 300 + // seconds in case it is blocked forever due to unaddressed bugs. + return seastar::with_timeout(seastar::lowres_clock::now() + 300s, do_test(app)) + .handle_exception_type([](seastar::timed_out_error&) { + logger().error("test_messenger timeout after 300s, abort! " + "Consider to extend the period if the test is still running."); + // use the retcode of timeout(1) + return 124; + }); + }); +} diff --git a/src/test/crimson/test_messenger.h b/src/test/crimson/test_messenger.h new file mode 100644 index 000000000..635f7fae3 --- /dev/null +++ b/src/test/crimson/test_messenger.h @@ -0,0 +1,95 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "msg/msg_types.h" + +namespace ceph::net::test { + +constexpr uint64_t CMD_CLI_NONCE = 1; +constexpr int64_t CMD_CLI_OSD = 1; +constexpr uint64_t TEST_NONCE = 2; +constexpr int64_t TEST_OSD = 2; +constexpr uint64_t CMD_SRV_NONCE = 3; +constexpr int64_t CMD_SRV_OSD = 3; +constexpr uint64_t TEST_PEER_NONCE = 2; +constexpr int64_t TEST_PEER_OSD = 4; + +inline entity_addr_t get_test_peer_addr( + const entity_addr_t &cmd_peer_addr) { + entity_addr_t test_peer_addr = cmd_peer_addr; + test_peer_addr.set_port(cmd_peer_addr.get_port() + 1); + test_peer_addr.set_nonce(TEST_PEER_NONCE); + return test_peer_addr; +} + +enum class cmd_t : char { + none = '\0', + shutdown, + suite_start, + suite_stop, + suite_connect_me, + suite_send_me, + suite_keepalive_me, + suite_markdown, + suite_recv_op +}; + +enum class policy_t : char { + none = '\0', + stateful_server, + stateless_server, + lossless_peer, + lossless_peer_reuse, + lossy_client, + lossless_client +}; + +inline std::ostream& operator<<(std::ostream& out, const cmd_t& cmd) { + switch(cmd) { + case cmd_t::none: + return out << "none"; + case cmd_t::shutdown: + return out << "shutdown"; + case cmd_t::suite_start: + return out << "suite_start"; + case cmd_t::suite_stop: + return out << "suite_stop"; + case cmd_t::suite_connect_me: + return out << "suite_connect_me"; + case cmd_t::suite_send_me: + return out << "suite_send_me"; + case cmd_t::suite_keepalive_me: + return out << "suite_keepalive_me"; + case cmd_t::suite_markdown: + return out << "suite_markdown"; + case cmd_t::suite_recv_op: + return out << "suite_recv_op"; + default: + ceph_abort(); + } +} + +inline std::ostream& operator<<(std::ostream& out, const policy_t& policy) { + switch(policy) { + case policy_t::none: + return out << "none"; + case policy_t::stateful_server: + return out << "stateful_server"; + case policy_t::stateless_server: + return out << "stateless_server"; + case policy_t::lossless_peer: + return out << "lossless_peer"; + case policy_t::lossless_peer_reuse: + return out << "lossless_peer_reuse"; + case policy_t::lossy_client: + return out << "lossy_client"; + case policy_t::lossless_client: + return out << "lossless_client"; + default: + ceph_abort(); + } +} + +} // namespace ceph::net::test diff --git a/src/test/crimson/test_messenger_peer.cc b/src/test/crimson/test_messenger_peer.cc new file mode 100644 index 000000000..28d8a3d38 --- /dev/null +++ b/src/test/crimson/test_messenger_peer.cc @@ -0,0 +1,462 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- + +#include <boost/pointer_cast.hpp> +#include <boost/program_options/variables_map.hpp> +#include <boost/program_options/parsers.hpp> + +#include "auth/DummyAuth.h" +#include "common/dout.h" +#include "global/global_init.h" +#include "messages/MPing.h" +#include "messages/MCommand.h" +#include "messages/MCommandReply.h" +#include "messages/MOSDOp.h" +#include "msg/Dispatcher.h" +#include "msg/Messenger.h" + +#include "test_messenger.h" + +namespace { + +#define dout_subsys ceph_subsys_test + +using namespace ceph::net::test; +using SocketPolicy = Messenger::Policy; + +constexpr int CEPH_OSD_PROTOCOL = 10; + +class FailoverSuitePeer : public Dispatcher { + using cb_t = std::function<void()>; + DummyAuthClientServer dummy_auth; + std::unique_ptr<Messenger> peer_msgr; + cb_t op_callback; + + Connection* tracked_conn = nullptr; + unsigned pending_send = 0; + + bool ms_can_fast_dispatch_any() const override { return true; } + bool ms_can_fast_dispatch(const Message* m) const override { return true; } + void ms_fast_dispatch(Message* m) override { + auto conn = m->get_connection().get(); + if (tracked_conn == nullptr) { + ldout(cct, 0) << "[!TestPeer] got op from Test(conn " + << conn << "not tracked yet)" << dendl; + tracked_conn = conn; + } else if (tracked_conn != conn) { + lderr(cct) << "[TestPeer] got op from Test: conn(" << conn + << ") != tracked_conn(" << tracked_conn + << ")" << dendl; + ceph_abort(); + } else { + ldout(cct, 0) << "[TestPeer] got op from Test" << dendl; + } + op_callback(); + } + bool ms_dispatch(Message* m) override { ceph_abort(); } + void ms_handle_fast_connect(Connection* conn) override { + if (tracked_conn == conn) { + ldout(cct, 0) << "[TestPeer] connected: " << conn << dendl; + } else { + lderr(cct) << "[TestPeer] connected: conn(" << conn + << ") != tracked_conn(" << tracked_conn + << ")" << dendl; + ceph_abort(); + } + } + void ms_handle_fast_accept(Connection* conn) override { + if (tracked_conn == nullptr) { + ldout(cct, 0) << "[TestPeer] accepted: " << conn << dendl; + tracked_conn = conn; + } else if (tracked_conn != conn) { + lderr(cct) << "[TestPeer] accepted: conn(" << conn + << ") != tracked_conn(" << tracked_conn + << ")" << dendl; + ceph_abort(); + } else { + ldout(cct, 0) << "[!TestPeer] accepted(stale event): " << conn << dendl; + } + flush_pending_send(); + } + bool ms_handle_reset(Connection* conn) override { + if (tracked_conn == conn) { + ldout(cct, 0) << "[TestPeer] reset: " << conn << dendl; + tracked_conn = nullptr; + } else { + ldout(cct, 0) << "[!TestPeer] reset(invalid event): conn(" << conn + << ") != tracked_conn(" << tracked_conn + << ")" << dendl; + } + return true; + } + void ms_handle_remote_reset(Connection* conn) override { + if (tracked_conn == conn) { + ldout(cct, 0) << "[TestPeer] remote reset: " << conn << dendl; + } else { + ldout(cct, 0) << "[!TestPeer] reset(invalid event): conn(" << conn + << ") != tracked_conn(" << tracked_conn + << ")" << dendl; + } + } + bool ms_handle_refused(Connection* conn) override { + ldout(cct, 0) << "[!TestPeer] refused: " << conn << dendl; + return true; + } + + private: + void init(entity_addr_t test_peer_addr, SocketPolicy policy) { + peer_msgr.reset(Messenger::create( + cct, "async", + entity_name_t::OSD(TEST_PEER_OSD), + "TestPeer", + TEST_PEER_NONCE)); + dummy_auth.auth_registry.refresh_config(); + peer_msgr->set_cluster_protocol(CEPH_OSD_PROTOCOL); + peer_msgr->set_default_policy(policy); + peer_msgr->set_auth_client(&dummy_auth); + peer_msgr->set_auth_server(&dummy_auth); + peer_msgr->bind(test_peer_addr); + peer_msgr->add_dispatcher_head(this); + peer_msgr->start(); + } + + void send_op() { + ceph_assert(tracked_conn); + pg_t pgid; + object_locator_t oloc; + hobject_t hobj(object_t(), oloc.key, CEPH_NOSNAP, pgid.ps(), + pgid.pool(), oloc.nspace); + spg_t spgid(pgid); + tracked_conn->send_message2(make_message<MOSDOp>(0, 0, hobj, spgid, 0, 0, 0)); + } + + void flush_pending_send() { + if (pending_send != 0) { + ldout(cct, 0) << "[TestPeer] flush sending " + << pending_send << " ops" << dendl; + } + ceph_assert(tracked_conn); + while (pending_send) { + send_op(); + --pending_send; + } + } + + public: + FailoverSuitePeer(CephContext* cct, cb_t op_callback) + : Dispatcher(cct), dummy_auth(cct), op_callback(op_callback) { } + + void shutdown() { + peer_msgr->shutdown(); + peer_msgr->wait(); + } + + void connect_peer(entity_addr_t test_addr) { + ldout(cct, 0) << "[TestPeer] connect_peer(" << test_addr << ")" << dendl; + auto conn = peer_msgr->connect_to_osd(entity_addrvec_t{test_addr}); + if (tracked_conn) { + if (tracked_conn == conn.get()) { + ldout(cct, 0) << "[TestPeer] this is not a new session " << conn.get() << dendl; + } else { + ldout(cct, 0) << "[TestPeer] this is a new session " << conn.get() + << ", replacing old one " << tracked_conn << dendl; + } + } else { + ldout(cct, 0) << "[TestPeer] this is a new session " << conn.get() << dendl; + } + tracked_conn = conn.get(); + flush_pending_send(); + } + + void send_peer() { + if (tracked_conn) { + ldout(cct, 0) << "[TestPeer] send_peer()" << dendl; + send_op(); + } else { + ++pending_send; + ldout(cct, 0) << "[TestPeer] send_peer() (pending " << pending_send << ")" << dendl; + } + } + + void keepalive_peer() { + ldout(cct, 0) << "[TestPeer] keepalive_peer()" << dendl; + ceph_assert(tracked_conn); + tracked_conn->send_keepalive(); + } + + void markdown() { + ldout(cct, 0) << "[TestPeer] markdown()" << dendl; + ceph_assert(tracked_conn); + tracked_conn->mark_down(); + tracked_conn = nullptr; + } + + static std::unique_ptr<FailoverSuitePeer> + create(CephContext* cct, entity_addr_t test_peer_addr, + SocketPolicy policy, cb_t op_callback) { + auto suite = std::make_unique<FailoverSuitePeer>(cct, op_callback); + suite->init(test_peer_addr, policy); + return suite; + } +}; + +SocketPolicy to_socket_policy(CephContext* cct, policy_t policy) { + switch (policy) { + case policy_t::stateful_server: + return SocketPolicy::stateful_server(0); + case policy_t::stateless_server: + return SocketPolicy::stateless_server(0); + case policy_t::lossless_peer: + return SocketPolicy::lossless_peer(0); + case policy_t::lossless_peer_reuse: + return SocketPolicy::lossless_peer_reuse(0); + case policy_t::lossy_client: + return SocketPolicy::lossy_client(0); + case policy_t::lossless_client: + return SocketPolicy::lossless_client(0); + default: + lderr(cct) << "[CmdSrv] unexpected policy type" << dendl; + ceph_abort(); + } +} + +class FailoverTestPeer : public Dispatcher { + DummyAuthClientServer dummy_auth; + std::unique_ptr<Messenger> cmd_msgr; + Connection *cmd_conn = nullptr; + const entity_addr_t test_peer_addr; + std::unique_ptr<FailoverSuitePeer> test_suite; + const bool nonstop; + + bool ms_can_fast_dispatch_any() const override { return false; } + bool ms_can_fast_dispatch(const Message* m) const override { return false; } + void ms_fast_dispatch(Message* m) override { ceph_abort(); } + bool ms_dispatch(Message* m) override { + auto conn = m->get_connection().get(); + if (cmd_conn == nullptr) { + ldout(cct, 0) << "[!CmdSrv] got msg from CmdCli(conn " + << conn << "not tracked yet)" << dendl; + cmd_conn = conn; + } else if (cmd_conn != conn) { + lderr(cct) << "[CmdSrv] got msg from CmdCli: conn(" << conn + << ") != cmd_conn(" << cmd_conn + << ")" << dendl; + ceph_abort(); + } else { + // good! + } + switch (m->get_type()) { + case CEPH_MSG_PING: { + ldout(cct, 0) << "[CmdSrv] got PING, sending PONG ..." << dendl; + cmd_conn->send_message2(make_message<MPing>()); + break; + } + case MSG_COMMAND: { + auto m_cmd = boost::static_pointer_cast<MCommand>(m); + auto cmd = static_cast<cmd_t>(m_cmd->cmd[0][0]); + if (cmd == cmd_t::shutdown) { + ldout(cct, 0) << "All tests succeeded" << dendl; + if (!nonstop) { + ldout(cct, 0) << "[CmdSrv] shutdown ..." << dendl; + cmd_msgr->shutdown(); + } else { + ldout(cct, 0) << "[CmdSrv] nonstop set ..." << dendl; + } + } else { + ldout(cct, 0) << "[CmdSrv] got cmd " << cmd << dendl; + handle_cmd(cmd, m_cmd); + ldout(cct, 0) << "[CmdSrv] done, send cmd reply ..." << dendl; + cmd_conn->send_message2(make_message<MCommandReply>()); + } + break; + } + default: + lderr(cct) << "[CmdSrv] " << __func__ << " " << cmd_conn + << " got unexpected msg from CmdCli: " + << m << dendl; + ceph_abort(); + } + m->put(); + return true; + } + void ms_handle_fast_connect(Connection*) override { ceph_abort(); } + void ms_handle_fast_accept(Connection *conn) override { + if (cmd_conn == nullptr) { + ldout(cct, 0) << "[CmdSrv] accepted: " << conn << dendl; + cmd_conn = conn; + } else if (cmd_conn != conn) { + lderr(cct) << "[CmdSrv] accepted: conn(" << conn + << ") != cmd_conn(" << cmd_conn + << ")" << dendl; + ceph_abort(); + } else { + ldout(cct, 0) << "[!CmdSrv] accepted(stale event): " << conn << dendl; + } + } + bool ms_handle_reset(Connection* conn) override { + if (cmd_conn == conn) { + ldout(cct, 0) << "[CmdSrv] reset: " << conn << dendl; + cmd_conn = nullptr; + } else { + ldout(cct, 0) << "[!CmdSrv] reset(invalid event): conn(" << conn + << ") != cmd_conn(" << cmd_conn + << ")" << dendl; + } + return true; + } + void ms_handle_remote_reset(Connection*) override { ceph_abort(); } + bool ms_handle_refused(Connection*) override { ceph_abort(); } + + private: + void notify_recv_op() { + ceph_assert(cmd_conn); + auto m = make_message<MCommand>(); + m->cmd.emplace_back(1, static_cast<char>(cmd_t::suite_recv_op)); + cmd_conn->send_message2(m); + } + + void handle_cmd(cmd_t cmd, MRef<MCommand> m_cmd) { + switch (cmd) { + case cmd_t::suite_start: { + if (test_suite) { + test_suite->shutdown(); + test_suite.reset(); + ldout(cct, 0) << "-------- suite stopped (force) --------\n\n" << dendl; + } + auto p = static_cast<policy_t>(m_cmd->cmd[1][0]); + ldout(cct, 0) << "[CmdSrv] suite starting (" << p + <<", " << test_peer_addr << ") ..." << dendl; + auto policy = to_socket_policy(cct, p); + auto suite = FailoverSuitePeer::create(cct, test_peer_addr, policy, + [this] { notify_recv_op(); }); + test_suite.swap(suite); + return; + } + case cmd_t::suite_stop: + ceph_assert(test_suite); + test_suite->shutdown(); + test_suite.reset(); + ldout(cct, 0) << "-------- suite stopped --------\n\n" << dendl; + return; + case cmd_t::suite_connect_me: { + ceph_assert(test_suite); + entity_addr_t test_addr = entity_addr_t(); + test_addr.parse(m_cmd->cmd[1].c_str(), nullptr); + test_suite->connect_peer(test_addr); + return; + } + case cmd_t::suite_send_me: + ceph_assert(test_suite); + test_suite->send_peer(); + return; + case cmd_t::suite_keepalive_me: + ceph_assert(test_suite); + test_suite->keepalive_peer(); + return; + case cmd_t::suite_markdown: + ceph_assert(test_suite); + test_suite->markdown(); + return; + default: + lderr(cct) << "[CmdSrv] got unexpected command " << m_cmd + << " from CmdCli" << dendl; + ceph_abort(); + } + } + + void init(entity_addr_t cmd_peer_addr) { + cmd_msgr.reset(Messenger::create( + cct, "async", + entity_name_t::OSD(CMD_SRV_OSD), + "CmdSrv", + CMD_SRV_NONCE)); + dummy_auth.auth_registry.refresh_config(); + cmd_msgr->set_cluster_protocol(CEPH_OSD_PROTOCOL); + cmd_msgr->set_default_policy(Messenger::Policy::stateless_server(0)); + cmd_msgr->set_auth_client(&dummy_auth); + cmd_msgr->set_auth_server(&dummy_auth); + cmd_msgr->bind(cmd_peer_addr); + cmd_msgr->add_dispatcher_head(this); + cmd_msgr->start(); + } + + public: + FailoverTestPeer(CephContext* cct, + entity_addr_t test_peer_addr, + bool nonstop) + : Dispatcher(cct), + dummy_auth(cct), + test_peer_addr(test_peer_addr), + nonstop(nonstop) { } + + void wait() { cmd_msgr->wait(); } + + static std::unique_ptr<FailoverTestPeer> + create(CephContext* cct, + entity_addr_t cmd_peer_addr, + entity_addr_t test_peer_addr, + bool nonstop) { + auto test_peer = std::make_unique<FailoverTestPeer>( + cct, test_peer_addr, nonstop); + test_peer->init(cmd_peer_addr); + ldout(cct, 0) << "[CmdSrv] ready" << dendl; + return test_peer; + } +}; + +} + +int main(int argc, char** argv) +{ + namespace po = boost::program_options; + po::options_description desc{"Allowed options"}; + desc.add_options() + ("help,h", "show help message") + ("addr", po::value<std::string>()->default_value("v2:127.0.0.1:9012"), + "This is CmdSrv address, and TestPeer address is at port+=1") + ("nonstop", po::value<bool>()->default_value(false), + "Do not shutdown TestPeer when all tests are successful"); + po::variables_map vm; + std::vector<std::string> unrecognized_options; + try { + auto parsed = po::command_line_parser(argc, argv) + .options(desc) + .allow_unregistered() + .run(); + po::store(parsed, vm); + if (vm.count("help")) { + std::cout << desc << std::endl; + return 0; + } + po::notify(vm); + unrecognized_options = po::collect_unrecognized(parsed.options, po::include_positional); + } catch(const po::error& e) { + std::cerr << "error: " << e.what() << std::endl; + return 1; + } + + std::vector<const char*> args(argv, argv + argc); + auto cct = global_init(nullptr, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_MON_CONFIG); + common_init_finish(cct.get()); + + auto addr = vm["addr"].as<std::string>(); + entity_addr_t cmd_peer_addr; + cmd_peer_addr.parse(addr.c_str(), nullptr); + cmd_peer_addr.set_nonce(CMD_SRV_NONCE); + ceph_assert_always(cmd_peer_addr.is_msgr2()); + auto test_peer_addr = get_test_peer_addr(cmd_peer_addr); + auto nonstop = vm["nonstop"].as<bool>(); + ldout(cct, 0) << "test configuration: cmd_peer_addr=" << cmd_peer_addr + << ", test_peer_addr=" << test_peer_addr + << ", nonstop=" << nonstop + << dendl; + + auto test_peer = FailoverTestPeer::create( + cct.get(), + cmd_peer_addr, + test_peer_addr, + nonstop); + test_peer->wait(); +} diff --git a/src/test/crimson/test_messenger_thrash.cc b/src/test/crimson/test_messenger_thrash.cc new file mode 100644 index 000000000..f2b1828f1 --- /dev/null +++ b/src/test/crimson/test_messenger_thrash.cc @@ -0,0 +1,672 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <map> +#include <random> +#include <fmt/format.h> +#include <fmt/ostream.h> +#include <seastar/core/app-template.hh> +#include <seastar/core/do_with.hh> +#include <seastar/core/future-util.hh> +#include <seastar/core/reactor.hh> +#include <seastar/core/sleep.hh> +#include <seastar/core/with_timeout.hh> + +#include "common/ceph_argparse.h" +#include "messages/MPing.h" +#include "messages/MCommand.h" +#include "crimson/auth/DummyAuth.h" +#include "crimson/common/log.h" +#include "crimson/net/Connection.h" +#include "crimson/net/Dispatcher.h" +#include "crimson/net/Messenger.h" + +using namespace std::chrono_literals; +namespace bpo = boost::program_options; +using crimson::common::local_conf; +using payload_seq_t = uint64_t; + +struct Payload { + enum Who : uint8_t { + PING = 0, + PONG = 1, + }; + uint8_t who = 0; + payload_seq_t seq = 0; + bufferlist data; + + Payload(Who who, uint64_t seq, const bufferlist& data) + : who(who), seq(seq), data(data) + {} + Payload() = default; + DENC(Payload, v, p) { + DENC_START(1, 1, p); + denc(v.who, p); + denc(v.seq, p); + denc(v.data, p); + DENC_FINISH(p); + } +}; +WRITE_CLASS_DENC(Payload) + +template<> +struct fmt::formatter<Payload> : fmt::formatter<std::string_view> { + template <typename FormatContext> + auto format(const Payload& pl, FormatContext& ctx) const { + return fmt::format_to(ctx.out(), "reply={} i={}", pl.who, pl.seq); + } +}; + +namespace { + +seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); +} + +std::random_device rd; +std::default_random_engine rng{rd()}; +std::uniform_int_distribution<> prob(0,99); +bool verbose = false; + +entity_addr_t get_server_addr() { + static int port = 16800; + ++port; + entity_addr_t saddr; + saddr.parse("127.0.0.1", nullptr); + saddr.set_port(port); + return saddr; +} + +uint64_t get_nonce() { + static uint64_t nonce = 1; + ++nonce; + return nonce; +} + +struct thrash_params_t { + std::size_t servers; + std::size_t clients; + std::size_t connections; + std::size_t random_op; +}; + +class SyntheticWorkload; + +class SyntheticDispatcher final + : public crimson::net::Dispatcher { + public: + std::map<crimson::net::Connection*, std::deque<payload_seq_t> > conn_sent; + std::map<payload_seq_t, bufferlist> sent; + unsigned index; + SyntheticWorkload *workload; + + SyntheticDispatcher(bool s, SyntheticWorkload *wl): + index(0), workload(wl) { + } + + std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef con, + MessageRef m) final { + if (verbose) { + logger().warn("{}: con = {}", __func__, *con); + } + // MSG_COMMAND is used to disorganize regular message flow + if (m->get_type() == MSG_COMMAND) { + return seastar::now(); + } + + Payload pl; + auto p = m->get_data().cbegin(); + decode(pl, p); + if (pl.who == Payload::PING) { + logger().info(" {} conn= {} {}", __func__, *con, pl); + return reply_message(m, con, pl); + } else { + ceph_assert(pl.who == Payload::PONG); + if (sent.count(pl.seq)) { + logger().info(" {} conn= {} {}", __func__, *con, pl); + ceph_assert(conn_sent[&*con].front() == pl.seq); + ceph_assert(pl.data.contents_equal(sent[pl.seq])); + conn_sent[&*con].pop_front(); + sent.erase(pl.seq); + } + + return seastar::now(); + } + } + + void ms_handle_accept( + crimson::net::ConnectionRef conn, + seastar::shard_id prv_shard, + bool is_replace) final { + logger().info("{} - Connection:{}", __func__, *conn); + assert(prv_shard == seastar::this_shard_id()); + } + + void ms_handle_connect( + crimson::net::ConnectionRef conn, + seastar::shard_id prv_shard) final { + logger().info("{} - Connection:{}", __func__, *conn); + assert(prv_shard == seastar::this_shard_id()); + } + + void ms_handle_reset(crimson::net::ConnectionRef con, bool is_replace) final; + + void ms_handle_remote_reset(crimson::net::ConnectionRef con) final { + clear_pending(con); + } + + std::optional<seastar::future<>> reply_message( + const MessageRef m, + crimson::net::ConnectionRef con, + Payload& pl) { + pl.who = Payload::PONG; + bufferlist bl; + encode(pl, bl); + auto rm = crimson::make_message<MPing>(); + rm->set_data(bl); + if (verbose) { + logger().info("{} conn= {} reply i= {}", + __func__, *con, pl.seq); + } + return con->send(std::move(rm)); + } + + seastar::future<> send_message_wrap(crimson::net::ConnectionRef con, + const bufferlist& data) { + auto m = crimson::make_message<MPing>(); + Payload pl{Payload::PING, index++, data}; + bufferlist bl; + encode(pl, bl); + m->set_data(bl); + sent[pl.seq] = pl.data; + conn_sent[&*con].push_back(pl.seq); + logger().info("{} conn= {} send i= {}", + __func__, *con, pl.seq); + + return con->send(std::move(m)); + } + + uint64_t get_num_pending_msgs() { + return sent.size(); + } + + void clear_pending(crimson::net::ConnectionRef con) { + for (std::deque<uint64_t>::iterator it = conn_sent[&*con].begin(); + it != conn_sent[&*con].end(); ++it) + sent.erase(*it); + conn_sent.erase(&*con); + } + + void print() { + for (auto && [connptr, list] : conn_sent) { + if (!list.empty()) { + logger().info("{} {} wait {}", __func__, + (void*)connptr, list.size()); + } + } + } +}; + +class SyntheticWorkload { + // messengers must be freed after its connections + std::set<crimson::net::MessengerRef> available_servers; + std::set<crimson::net::MessengerRef> available_clients; + + crimson::net::SocketPolicy server_policy; + crimson::net::SocketPolicy client_policy; + std::map<crimson::net::ConnectionRef, + std::pair<crimson::net::MessengerRef, + crimson::net::MessengerRef>> available_connections; + SyntheticDispatcher dispatcher; + std::vector<bufferlist> rand_data; + crimson::auth::DummyAuthClientServer dummy_auth; + + seastar::future<crimson::net::ConnectionRef> get_random_connection() { + return seastar::do_until( + [this] { return dispatcher.get_num_pending_msgs() <= max_in_flight; }, + [] { return seastar::sleep(100ms); } + ).then([this] { + boost::uniform_int<> choose(0, available_connections.size() - 1); + int index = choose(rng); + std::map<crimson::net::ConnectionRef, + std::pair<crimson::net::MessengerRef, crimson::net::MessengerRef>>::iterator i + = available_connections.begin(); + for (; index > 0; --index, ++i) ; + return seastar::make_ready_future<crimson::net::ConnectionRef>(i->first); + }); + } + + public: + const unsigned min_connections = 10; + const unsigned max_in_flight = 64; + const unsigned max_connections = 128; + const unsigned max_message_len = 1024 * 1024 * 4; + const uint64_t servers, clients; + + SyntheticWorkload(int servers, int clients, int random_num, + crimson::net::SocketPolicy srv_policy, + crimson::net::SocketPolicy cli_policy) + : server_policy(srv_policy), + client_policy(cli_policy), + dispatcher(false, this), + servers(servers), + clients(clients) { + + for (int i = 0; i < random_num; i++) { + bufferlist bl; + boost::uniform_int<> u(32, max_message_len); + uint64_t value_len = u(rng); + bufferptr bp(value_len); + bp.zero(); + for (uint64_t j = 0; j < value_len-sizeof(i); ) { + memcpy(bp.c_str()+j, &i, sizeof(i)); + j += 4096; + } + + bl.append(bp); + rand_data.push_back(bl); + } + } + + + bool can_create_connection() { + return available_connections.size() < max_connections; + } + + seastar::future<> maybe_generate_connection() { + if (!can_create_connection()) { + return seastar::now(); + } + crimson::net::MessengerRef server, client; + { + boost::uniform_int<> choose(0, available_servers.size() - 1); + int index = choose(rng); + std::set<crimson::net::MessengerRef>::iterator i + = available_servers.begin(); + for (; index > 0; --index, ++i) ; + server = *i; + } + { + boost::uniform_int<> choose(0, available_clients.size() - 1); + int index = choose(rng); + std::set<crimson::net::MessengerRef>::iterator i + = available_clients.begin(); + for (; index > 0; --index, ++i) ; + client = *i; + } + + + std::pair<crimson::net::MessengerRef, crimson::net::MessengerRef> + connected_pair; + { + crimson::net::ConnectionRef conn = client->connect( + server->get_myaddr(), + entity_name_t::TYPE_OSD); + connected_pair = std::make_pair(client, server); + available_connections[conn] = connected_pair; + } + return seastar::now(); + } + + seastar::future<> random_op (const uint64_t& iter) { + return seastar::do_with(iter, [this] (uint64_t& iter) { + return seastar::do_until( + [&] { return iter == 0; }, + [&, this] + { + if (!(iter % 10)) { + logger().info("{} Op {} : ", __func__ ,iter); + print_internal_state(); + } + --iter; + int val = prob(rng); + if(val > 90) { + return maybe_generate_connection(); + } else if (val > 80) { + return drop_connection(); + } else if (val > 10) { + return send_message(); + } else { + return seastar::sleep( + std::chrono::milliseconds(rand() % 1000 + 500)); + } + }); + }); + } + + seastar::future<> generate_connections (const uint64_t& iter) { + return seastar::do_with(iter, [this] (uint64_t& iter) { + return seastar::do_until( + [&] { return iter == 0; }, + [&, this] + { + --iter; + if (!(connections_count() % 10)) { + logger().info("seeding connection {}", + connections_count()); + } + return maybe_generate_connection(); + }); + }); + } + + seastar::future<> init_server(const entity_name_t& name, + const std::string& lname, + const uint64_t nonce, + const entity_addr_t& addr) { + crimson::net::MessengerRef msgr = + crimson::net::Messenger::create( + name, lname, nonce, true); + msgr->set_default_policy(server_policy); + msgr->set_auth_client(&dummy_auth); + msgr->set_auth_server(&dummy_auth); + available_servers.insert(msgr); + return msgr->bind(entity_addrvec_t{addr}).safe_then( + [this, msgr] { + return msgr->start({&dispatcher}); + }, crimson::net::Messenger::bind_ertr::all_same_way( + [addr] (const std::error_code& e) { + logger().error("{} test_messenger_thrash(): " + "there is another instance running at {}", + __func__, addr); + ceph_abort(); + })); + } + + seastar::future<> init_client(const entity_name_t& name, + const std::string& lname, + const uint64_t nonce) { + crimson::net::MessengerRef msgr = + crimson::net::Messenger::create( + name, lname, nonce, true); + msgr->set_default_policy(client_policy); + msgr->set_auth_client(&dummy_auth); + msgr->set_auth_server(&dummy_auth); + available_clients.insert(msgr); + return msgr->start({&dispatcher}); + } + + seastar::future<> send_message() { + return get_random_connection() + .then([this] (crimson::net::ConnectionRef conn) { + boost::uniform_int<> true_false(0, 99); + int val = true_false(rng); + if (val >= 95) { + uuid_d uuid; + uuid.generate_random(); + auto m = crimson::make_message<MCommand>(uuid); + std::vector<std::string> cmds; + cmds.push_back("command"); + m->cmd = cmds; + m->set_priority(200); + return conn->send(std::move(m)); + } else { + boost::uniform_int<> u(0, rand_data.size()-1); + return dispatcher.send_message_wrap(conn, rand_data[u(rng)]); + } + }); + } + + seastar::future<> drop_connection() { + if (available_connections.size() < min_connections) { + return seastar::now(); + } + + return get_random_connection() + .then([this] (crimson::net::ConnectionRef conn) { + dispatcher.clear_pending(conn); + conn->mark_down(); + if (!client_policy.server && + client_policy.standby) { + // it's a lossless policy, so we need to mark down each side + std::pair<crimson::net::MessengerRef, crimson::net::MessengerRef> &p = + available_connections[conn]; + if (!p.first->get_default_policy().server && + !p.second->get_default_policy().server) { + //verify that equal-to operator applies here + ceph_assert(p.first->owns_connection(*conn)); + crimson::net::ConnectionRef peer = p.second->connect( + p.first->get_myaddr(), p.first->get_mytype()); + peer->mark_down(); + dispatcher.clear_pending(peer); + available_connections.erase(peer); + } + } + ceph_assert(available_connections.erase(conn) == 1U); + return seastar::now(); + }); + } + + void print_internal_state(bool detail=false) { + logger().info("available_connections: {} inflight messages: {}", + available_connections.size(), + dispatcher.get_num_pending_msgs()); + if (detail && !available_connections.empty()) { + dispatcher.print(); + } + } + + seastar::future<> wait_for_done() { + int i = 0; + return seastar::do_until( + [this] { return !dispatcher.get_num_pending_msgs(); }, + [this, &i] + { + if (i++ % 50 == 0){ + print_internal_state(true); + } + return seastar::sleep(100ms); + }).then([this] { + return seastar::do_for_each(available_servers, [] (auto server) { + if (verbose) { + logger().info("server {} shutdown" , server->get_myaddrs()); + } + server->stop(); + return server->shutdown(); + }); + }).then([this] { + return seastar::do_for_each(available_clients, [] (auto client) { + if (verbose) { + logger().info("client {} shutdown" , client->get_myaddrs()); + } + client->stop(); + return client->shutdown(); + }); + }); + } + + void handle_reset(crimson::net::ConnectionRef con) { + available_connections.erase(con); + } + + uint64_t servers_count() { + return available_servers.size(); + } + + uint64_t clients_count() { + return available_clients.size(); + } + + uint64_t connections_count() { + return available_connections.size(); + } +}; + +void SyntheticDispatcher::ms_handle_reset(crimson::net::ConnectionRef con, + bool is_replace) { + workload->handle_reset(con); + clear_pending(con); +} + +seastar::future<> reset_conf() { + return seastar::when_all_succeed( + local_conf().set_val("ms_inject_socket_failures", "0"), + local_conf().set_val("ms_inject_internal_delays", "0"), + local_conf().set_val("ms_inject_delay_probability", "0"), + local_conf().set_val("ms_inject_delay_max", "0") + ).then_unpack([] { + return seastar::now(); + }); +} + +// Testing Crimson messenger (with msgr-v2 protocol) robustness against +// network delays and failures. The test includes stress tests and +// socket level delays/failures injection tests, letting time +// and randomness achieve the best test coverage. + +// Test Parameters: +// Clients: 8 (stateful) +// Servers: 32 (lossless) +// Connections: 100 (Generated between random clients/server) +// Random Operations: 120 (Generate/Drop Connection, Send Message, Sleep) +seastar::future<> test_stress(thrash_params_t tp) +{ + + logger().info("test_stress():"); + + SyntheticWorkload test_msg(tp.servers, tp.clients, 100, + crimson::net::SocketPolicy::stateful_server(0), + crimson::net::SocketPolicy::lossless_client(0)); + + return seastar::do_with(test_msg, [tp] + (SyntheticWorkload& test_msg) { + return seastar::do_until([&test_msg] { + return test_msg.servers_count() == test_msg.servers; }, + [&test_msg] { + entity_addr_t bind_addr = get_server_addr(); + bind_addr.set_type(entity_addr_t::TYPE_MSGR2); + uint64_t server_num = get_nonce(); + return test_msg.init_server(entity_name_t::OSD(server_num), + "server", server_num , bind_addr); + }).then([&test_msg] { + return seastar::do_until([&test_msg] { + return test_msg.clients_count() == test_msg.clients; }, + [&test_msg] { + return test_msg.init_client(entity_name_t::CLIENT(-1), + "client", get_nonce()); + }); + }).then([&test_msg, tp] { + return test_msg.generate_connections(tp.connections); + }).then([&test_msg, tp] { + return test_msg.random_op(tp.random_op); + }).then([&test_msg] { + return test_msg.wait_for_done(); + }).then([] { + logger().info("test_stress() DONE"); + }).handle_exception([] (auto eptr) { + logger().error( + "test_stress() failed: got exception {}", + eptr); + throw; + }); + }); +} + +// Test Parameters: +// Clients: 8 (statefull) +// Servers: 32 (loseless) +// Connections: 100 (Generated between random clients/server) +// Random Operations: 120 (Generate/Drop Connection, Send Message, Sleep) +seastar::future<> test_injection(thrash_params_t tp) +{ + + logger().info("test_injection():"); + + SyntheticWorkload test_msg(tp.servers, tp.clients, 100, + crimson::net::SocketPolicy::stateful_server(0), + crimson::net::SocketPolicy::lossless_client(0)); + + return seastar::do_with(test_msg, [tp] + (SyntheticWorkload& test_msg) { + return seastar::do_until([&test_msg] { + return test_msg.servers_count() == test_msg.servers; }, + [&test_msg] { + entity_addr_t bind_addr = get_server_addr(); + bind_addr.set_type(entity_addr_t::TYPE_MSGR2); + uint64_t server_num = get_nonce(); + return test_msg.init_server(entity_name_t::OSD(server_num), + "server", server_num , bind_addr); + }).then([&test_msg] { + return seastar::do_until([&test_msg] { + return test_msg.clients_count() == test_msg.clients; }, + [&test_msg] { + return test_msg.init_client(entity_name_t::CLIENT(-1), + "client", get_nonce()); + }); + }).then([] { + return seastar::when_all_succeed( + local_conf().set_val("ms_inject_socket_failures", "30"), + local_conf().set_val("ms_inject_internal_delays", "0.1"), + local_conf().set_val("ms_inject_delay_probability", "1"), + local_conf().set_val("ms_inject_delay_max", "5")); + }).then_unpack([] { + return seastar::now(); + }).then([&test_msg, tp] { + return test_msg.generate_connections(tp.connections); + }).then([&test_msg, tp] { + return test_msg.random_op(tp.random_op); + }).then([&test_msg] { + return test_msg.wait_for_done(); + }).then([] { + logger().info("test_inejction() DONE"); + return seastar::now(); + }).then([] { + return reset_conf(); + }).handle_exception([] (auto eptr) { + logger().error( + "test_injection() failed: got exception {}", + eptr); + throw; + }); + }); +} + +} + +seastar::future<int> do_test(seastar::app_template& app) +{ + std::vector<const char*> args; + std::string cluster; + std::string conf_file_list; + auto init_params = ceph_argparse_early_args(args, + CEPH_ENTITY_TYPE_CLIENT, + &cluster, + &conf_file_list); + return crimson::common::sharded_conf().start( + init_params.name, cluster + ).then([] { + return local_conf().start(); + }).then([conf_file_list] { + return local_conf().parse_config_files(conf_file_list); + }).then([&app] { + auto&& config = app.configuration(); + verbose = config["verbose"].as<bool>(); + return test_stress(thrash_params_t{8, 32, 50, 120}) + .then([] { + return test_injection(thrash_params_t{16, 32, 50, 120}); + }).then([] { + logger().info("All tests succeeded"); + // Seastar has bugs to have events undispatched during shutdown, + // which will result in memory leak and thus fail LeakSanitizer. + return seastar::sleep(100ms); + }); + }).then([] { + return crimson::common::sharded_conf().stop(); + }).then([] { + return 0; + }).handle_exception([] (auto eptr) { + logger().error("Test failed: got exception {}", eptr); + return 1; + }); +} + +int main(int argc, char** argv) +{ + seastar::app_template app; + app.add_options() + ("verbose,v", bpo::value<bool>()->default_value(false), + "chatty if true"); + return app.run(argc, argv, [&app] { + return do_test(app); + }); +} diff --git a/src/test/crimson/test_monc.cc b/src/test/crimson/test_monc.cc new file mode 100644 index 000000000..e60df4525 --- /dev/null +++ b/src/test/crimson/test_monc.cc @@ -0,0 +1,84 @@ +#include <seastar/core/app-template.hh> +#include "common/ceph_argparse.h" +#include "crimson/common/auth_handler.h" +#include "crimson/common/config_proxy.h" +#include "crimson/mon/MonClient.h" +#include "crimson/net/Connection.h" +#include "crimson/net/Messenger.h" + +using Config = crimson::common::ConfigProxy; +using MonClient = crimson::mon::Client; + +namespace { + +class DummyAuthHandler : public crimson::common::AuthHandler { +public: + void handle_authentication(const EntityName& name, + const AuthCapsInfo& caps) final + {} +}; + +DummyAuthHandler dummy_handler; + +} + +using namespace std::literals; + +static seastar::future<> test_monc() +{ + return crimson::common::sharded_conf().start(EntityName{}, "ceph"sv).then([] { + std::vector<const char*> args; + std::string cluster; + std::string conf_file_list; + auto init_params = ceph_argparse_early_args(args, + CEPH_ENTITY_TYPE_CLIENT, + &cluster, + &conf_file_list); + auto& conf = crimson::common::local_conf(); + conf->name = init_params.name; + conf->cluster = cluster; + return conf.parse_config_files(conf_file_list); + }).then([] { + return crimson::common::sharded_perf_coll().start(); + }).then([]() mutable { + auto msgr = crimson::net::Messenger::create(entity_name_t::OSD(0), "monc", 0, true); + return seastar::do_with(MonClient{*msgr, dummy_handler}, + [msgr](auto& monc) mutable { + return msgr->start({&monc}).then([&monc] { + return seastar::with_timeout( + seastar::lowres_clock::now() + std::chrono::seconds{10}, + monc.start()); + }).then([&monc] { + return monc.stop(); + }); + }).finally([msgr] { + return msgr->shutdown(); + }); + }).finally([] { + return crimson::common::sharded_perf_coll().stop().then([] { + return crimson::common::sharded_conf().stop(); + }); + }); +} + +int main(int argc, char** argv) +{ + seastar::app_template app; + return app.run(argc, argv, [&] { + return test_monc().then([] { + std::cout << "All tests succeeded" << std::endl; + }).handle_exception([] (auto eptr) { + std::cout << "Test failure" << std::endl; + return seastar::make_exception_future<>(eptr); + }); + }); +} + + +/* + * Local Variables: + * compile-command: "make -j4 \ + * -C ../../../build \ + * unittest_seastar_monc" + * End: + */ diff --git a/src/test/crimson/test_perfcounters.cc b/src/test/crimson/test_perfcounters.cc new file mode 100644 index 000000000..8aecbf911 --- /dev/null +++ b/src/test/crimson/test_perfcounters.cc @@ -0,0 +1,62 @@ +#include <pthread.h> +#include <stdlib.h> +#include <iostream> +#include <fmt/format.h> + +#include "common/Formatter.h" +#include "common/perf_counters.h" +#include "crimson/common/perf_counters_collection.h" + +#include <seastar/core/app-template.hh> +#include <seastar/core/sharded.hh> + +enum { + PERFTEST_FIRST = 1000000, + PERFTEST_INDEX, + PERFTEST_LAST, +}; + +static constexpr uint64_t PERF_VAL = 42; + +static seastar::future<> test_perfcounters(){ + return crimson::common::sharded_perf_coll().start().then([] { + return crimson::common::sharded_perf_coll().invoke_on_all([] (auto& s){ + std::string name =fmt::format("seastar-osd::shard-{}",seastar::this_shard_id()); + PerfCountersBuilder plb(NULL, name, PERFTEST_FIRST,PERFTEST_LAST); + plb.add_u64_counter(PERFTEST_INDEX, "perftest_count", "count perftest"); + auto perf_logger = plb.create_perf_counters(); + perf_logger->inc(PERFTEST_INDEX,PERF_VAL); + s.get_perf_collection()->add(perf_logger); + }); + }).then([]{ + return crimson::common::sharded_perf_coll().invoke_on_all([] (auto& s){ + auto pcc = s.get_perf_collection(); + pcc->with_counters([](auto& by_path){ + for (auto& perf_counter : by_path) { + if (PERF_VAL != perf_counter.second.perf_counters->get(PERFTEST_INDEX)) { + throw std::runtime_error("perf counter does not match"); + } + } + }); + }); + }).finally([] { + return crimson::common::sharded_perf_coll().stop(); + }); + +} + +int main(int argc, char** argv) +{ + seastar::app_template app; + return app.run(argc, argv, [&] { + return test_perfcounters().then([] { + std::cout << "All tests succeeded" << std::endl; + }).handle_exception([] (auto eptr) { + std::cout << "Test failure" << std::endl; + return seastar::make_exception_future<>(eptr); + }); + }); + +} + + diff --git a/src/test/crimson/test_socket.cc b/src/test/crimson/test_socket.cc new file mode 100644 index 000000000..2b61196ea --- /dev/null +++ b/src/test/crimson/test_socket.cc @@ -0,0 +1,558 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "common/ceph_argparse.h" +#include <fmt/os.h> +#include <seastar/core/app-template.hh> +#include <seastar/core/gate.hh> +#include <seastar/core/sharded.hh> +#include <seastar/core/sleep.hh> +#include <seastar/core/when_all.hh> +#include <seastar/util/later.hh> + +#include "crimson/common/log.h" +#include "crimson/net/Errors.h" +#include "crimson/net/Fwd.h" +#include "crimson/net/Socket.h" + +using crimson::common::local_conf; + +namespace { + +using namespace std::chrono_literals; + +using seastar::engine; +using seastar::future; +using crimson::net::error; +using crimson::net::listen_ertr; +using crimson::net::ShardedServerSocket; +using crimson::net::Socket; +using crimson::net::SocketRef; +using crimson::net::stop_t; + +using SocketFRef = seastar::foreign_ptr<SocketRef>; + +seastar::logger &logger() { + return crimson::get_logger(ceph_subsys_test); +} + +entity_addr_t get_server_addr() { + entity_addr_t saddr; + saddr.parse("127.0.0.1", nullptr); + saddr.set_port(9020); + return saddr; +} + +future<SocketRef> socket_connect(const entity_addr_t& saddr) { + logger().debug("socket_connect() to {} ...", saddr); + return Socket::connect(saddr).then([](auto socket) { + logger().debug("socket_connect() connected"); + return socket; + }); +} + +future<> test_refused() { + logger().info("test_refused()..."); + auto saddr = get_server_addr(); + return socket_connect(saddr).discard_result().then([saddr] { + logger().error("test_refused(): connection to {} is not refused", saddr); + ceph_abort(); + }).handle_exception_type([](const std::system_error& e) { + if (e.code() != std::errc::connection_refused) { + logger().error("test_refused() got unexpeted error {}", e); + ceph_abort(); + } else { + logger().info("test_refused() ok\n"); + } + }).handle_exception([](auto eptr) { + logger().error("test_refused() got unexpeted exception {}", eptr); + ceph_abort(); + }); +} + +future<> test_bind_same(bool is_fixed_cpu) { + logger().info("test_bind_same()..."); + return ShardedServerSocket::create(is_fixed_cpu + ).then([is_fixed_cpu](auto pss1) { + auto saddr = get_server_addr(); + return pss1->listen(saddr).safe_then([saddr, is_fixed_cpu] { + // try to bind the same address + return ShardedServerSocket::create(is_fixed_cpu + ).then([saddr](auto pss2) { + return pss2->listen(saddr).safe_then([] { + logger().error("test_bind_same() should raise address_in_use"); + ceph_abort(); + }, listen_ertr::all_same_way( + [](const std::error_code& e) { + if (e == std::errc::address_in_use) { + // successful! + logger().info("test_bind_same() ok\n"); + } else { + logger().error("test_bind_same() got unexpected error {}", e); + ceph_abort(); + } + // Note: need to return a explicit ready future, or there will be a + // runtime error: member access within null pointer of type 'struct promise_base' + return seastar::now(); + })).then([pss2] { + return pss2->shutdown_destroy(); + }); + }); + }, listen_ertr::all_same_way( + [saddr](const std::error_code& e) { + logger().error("test_bind_same(): there is another instance running at {}", + saddr); + ceph_abort(); + })).then([pss1] { + return pss1->shutdown_destroy(); + }).handle_exception([](auto eptr) { + logger().error("test_bind_same() got unexpeted exception {}", eptr); + ceph_abort(); + }); + }); +} + +future<> test_accept(bool is_fixed_cpu) { + logger().info("test_accept()"); + return ShardedServerSocket::create(is_fixed_cpu + ).then([](auto pss) { + auto saddr = get_server_addr(); + return pss->listen(saddr + ).safe_then([pss] { + return pss->accept([](auto socket, auto paddr) { + logger().info("test_accept(): accepted at shard {}", seastar::this_shard_id()); + // simple accept + return seastar::sleep(100ms + ).then([socket = std::move(socket)]() mutable { + return socket->close( + ).finally([cleanup = std::move(socket)] {}); + }); + }); + }, listen_ertr::all_same_way( + [saddr](const std::error_code& e) { + logger().error("test_accept(): there is another instance running at {}", + saddr); + ceph_abort(); + })).then([saddr] { + return seastar::when_all( + socket_connect(saddr).then([](auto socket) { + return socket->close().finally([cleanup = std::move(socket)] {}); }), + socket_connect(saddr).then([](auto socket) { + return socket->close().finally([cleanup = std::move(socket)] {}); }), + socket_connect(saddr).then([](auto socket) { + return socket->close().finally([cleanup = std::move(socket)] {}); }) + ).discard_result(); + }).then([] { + // should be enough to be connected locally + return seastar::sleep(50ms); + }).then([] { + logger().info("test_accept() ok\n"); + }).then([pss] { + return pss->shutdown_destroy(); + }).handle_exception([](auto eptr) { + logger().error("test_accept() got unexpeted exception {}", eptr); + ceph_abort(); + }); + }); +} + +class SocketFactory { + static constexpr seastar::shard_id CLIENT_CPU = 0u; + SocketRef client_socket; + seastar::promise<> server_connected; + + static constexpr seastar::shard_id SERVER_CPU = 1u; + ShardedServerSocket *pss = nullptr; + + seastar::shard_id server_socket_CPU; + SocketFRef server_socket; + + public: + template <typename FuncC, typename FuncS> + static future<> dispatch_sockets( + bool is_fixed_cpu, + FuncC&& cb_client, + FuncS&& cb_server) { + ceph_assert_always(seastar::this_shard_id() == CLIENT_CPU); + auto owner = std::make_unique<SocketFactory>(); + auto psf = owner.get(); + auto saddr = get_server_addr(); + return seastar::smp::submit_to(SERVER_CPU, [psf, saddr, is_fixed_cpu] { + return ShardedServerSocket::create(is_fixed_cpu + ).then([psf, saddr](auto pss) { + psf->pss = pss; + return pss->listen(saddr + ).safe_then([] { + }, listen_ertr::all_same_way([saddr](const std::error_code& e) { + logger().error("dispatch_sockets(): there is another instance running at {}", + saddr); + ceph_abort(); + })); + }); + }).then([psf, saddr] { + return seastar::when_all_succeed( + seastar::smp::submit_to(CLIENT_CPU, [psf, saddr] { + return socket_connect(saddr).then([psf](auto socket) { + ceph_assert_always(seastar::this_shard_id() == CLIENT_CPU); + psf->client_socket = std::move(socket); + }); + }), + seastar::smp::submit_to(SERVER_CPU, [psf] { + return psf->pss->accept([psf](auto _socket, auto paddr) { + logger().info("dispatch_sockets(): accepted at shard {}", + seastar::this_shard_id()); + psf->server_socket_CPU = seastar::this_shard_id(); + if (psf->pss->is_fixed_shard_dispatching()) { + ceph_assert_always(SERVER_CPU == seastar::this_shard_id()); + } + SocketFRef socket = seastar::make_foreign(std::move(_socket)); + psf->server_socket = std::move(socket); + return seastar::smp::submit_to(CLIENT_CPU, [psf] { + psf->server_connected.set_value(); + }); + }); + }) + ); + }).then_unpack([] { + return seastar::now(); + }).then([psf] { + return psf->server_connected.get_future(); + }).then([psf] { + if (psf->pss) { + return seastar::smp::submit_to(SERVER_CPU, [psf] { + return psf->pss->shutdown_destroy(); + }); + } + return seastar::now(); + }).then([psf, + cb_client = std::move(cb_client), + cb_server = std::move(cb_server)]() mutable { + logger().debug("dispatch_sockets(): client/server socket are ready"); + return seastar::when_all_succeed( + seastar::smp::submit_to(CLIENT_CPU, + [socket = psf->client_socket.get(), cb_client = std::move(cb_client)] { + return cb_client(socket).then([socket] { + logger().debug("closing client socket..."); + return socket->close(); + }).handle_exception([](auto eptr) { + logger().error("dispatch_sockets():" + " cb_client() got unexpeted exception {}", eptr); + ceph_abort(); + }); + }), + seastar::smp::submit_to(psf->server_socket_CPU, + [socket = psf->server_socket.get(), cb_server = std::move(cb_server)] { + return cb_server(socket).then([socket] { + logger().debug("closing server socket..."); + return socket->close(); + }).handle_exception([](auto eptr) { + logger().error("dispatch_sockets():" + " cb_server() got unexpeted exception {}", eptr); + ceph_abort(); + }); + }) + ); + }).then_unpack([] { + return seastar::now(); + }).finally([cleanup = std::move(owner)] {}); + } +}; + +class Connection { + static const uint64_t DATA_TAIL = 5327; + static const unsigned DATA_SIZE = 4096; + std::array<uint64_t, DATA_SIZE> data = {0}; + + void verify_data_read(const uint64_t read_data[]) { + ceph_assert(read_data[0] == read_count); + ceph_assert(data[DATA_SIZE - 1] = DATA_TAIL); + } + + Socket* socket = nullptr; + uint64_t write_count = 0; + uint64_t read_count = 0; + + Connection(Socket* socket) : socket{socket} { + assert(socket); + data[DATA_SIZE - 1] = DATA_TAIL; + } + + future<> dispatch_write(unsigned round = 0, bool force_shut = false) { + logger().debug("dispatch_write(round={}, force_shut={})...", round, force_shut); + return seastar::repeat([this, round, force_shut] { + if (round != 0 && round <= write_count) { + return seastar::futurize_invoke([this, force_shut] { + if (force_shut) { + logger().debug("dispatch_write() done, force shutdown output"); + socket->force_shutdown_out(); + } else { + logger().debug("dispatch_write() done"); + } + }).then([] { + return seastar::make_ready_future<stop_t>(stop_t::yes); + }); + } else { + data[0] = write_count; + bufferlist bl; + bl.append(buffer::copy( + reinterpret_cast<const char*>(&data), sizeof(data))); + return socket->write(bl + ).then([this] { + return socket->flush(); + }).then([this] { + write_count += 1; + return seastar::make_ready_future<stop_t>(stop_t::no); + }); + } + }); + } + + future<> dispatch_write_unbounded() { + return dispatch_write( + ).then([] { + ceph_abort(); + }).handle_exception_type([this](const std::system_error& e) { + if (e.code() != std::errc::broken_pipe && + e.code() != std::errc::connection_reset) { + logger().error("dispatch_write_unbounded(): " + "unexpected error {}", e); + throw; + } + // successful + logger().debug("dispatch_write_unbounded(): " + "expected error {}", e); + shutdown(); + }); + } + + future<> dispatch_read(unsigned round = 0, bool force_shut = false) { + logger().debug("dispatch_read(round={}, force_shut={})...", round, force_shut); + return seastar::repeat([this, round, force_shut] { + if (round != 0 && round <= read_count) { + return seastar::futurize_invoke([this, force_shut] { + if (force_shut) { + logger().debug("dispatch_read() done, force shutdown input"); + socket->force_shutdown_in(); + } else { + logger().debug("dispatch_read() done"); + } + }).then([] { + return seastar::make_ready_future<stop_t>(stop_t::yes); + }); + } else { + return seastar::futurize_invoke([this] { + // we want to test both Socket::read() and Socket::read_exactly() + if (read_count % 2) { + return socket->read(DATA_SIZE * sizeof(uint64_t) + ).then([this](ceph::bufferlist bl) { + uint64_t read_data[DATA_SIZE]; + auto p = bl.cbegin(); + ::ceph::decode_raw(read_data, p); + verify_data_read(read_data); + }); + } else { + return socket->read_exactly(DATA_SIZE * sizeof(uint64_t) + ).then([this](auto bptr) { + uint64_t read_data[DATA_SIZE]; + std::memcpy(read_data, bptr.c_str(), DATA_SIZE * sizeof(uint64_t)); + verify_data_read(read_data); + }); + } + }).then([this] { + ++read_count; + return seastar::make_ready_future<stop_t>(stop_t::no); + }); + } + }); + } + + future<> dispatch_read_unbounded() { + return dispatch_read( + ).then([] { + ceph_abort(); + }).handle_exception_type([this](const std::system_error& e) { + if (e.code() != error::read_eof + && e.code() != std::errc::connection_reset) { + logger().error("dispatch_read_unbounded(): " + "unexpected error {}", e); + throw; + } + // successful + logger().debug("dispatch_read_unbounded(): " + "expected error {}", e); + shutdown(); + }); + } + + void shutdown() { + socket->shutdown(); + } + + public: + static future<> dispatch_rw_bounded(Socket* socket, unsigned round, + bool force_shut = false) { + logger().debug("dispatch_rw_bounded(round={}, force_shut={})...", + round, force_shut); + return seastar::do_with(Connection{socket}, + [round, force_shut](auto& conn) { + ceph_assert(round != 0); + return seastar::when_all_succeed( + conn.dispatch_write(round, force_shut), + conn.dispatch_read(round, force_shut) + ).then_unpack([] { + return seastar::now(); + }); + }); + } + + static future<> dispatch_rw_unbounded(Socket* socket, bool preemptive_shut = false) { + logger().debug("dispatch_rw_unbounded(preemptive_shut={})...", preemptive_shut); + return seastar::do_with(Connection{socket}, [preemptive_shut](auto& conn) { + return seastar::when_all_succeed( + conn.dispatch_write_unbounded(), + conn.dispatch_read_unbounded(), + seastar::futurize_invoke([&conn, preemptive_shut] { + if (preemptive_shut) { + return seastar::sleep(100ms).then([&conn] { + logger().debug("dispatch_rw_unbounded() shutdown socket preemptively(100ms)"); + conn.shutdown(); + }); + } else { + return seastar::now(); + } + }) + ).then_unpack([] { + return seastar::now(); + }); + }); + } +}; + +future<> test_read_write(bool is_fixed_cpu) { + logger().info("test_read_write()..."); + return SocketFactory::dispatch_sockets( + is_fixed_cpu, + [](auto cs) { return Connection::dispatch_rw_bounded(cs, 128); }, + [](auto ss) { return Connection::dispatch_rw_bounded(ss, 128); } + ).then([] { + logger().info("test_read_write() ok\n"); + }).handle_exception([](auto eptr) { + logger().error("test_read_write() got unexpeted exception {}", eptr); + ceph_abort(); + }); +} + +future<> test_unexpected_down(bool is_fixed_cpu) { + logger().info("test_unexpected_down()..."); + return SocketFactory::dispatch_sockets( + is_fixed_cpu, + [](auto cs) { + return Connection::dispatch_rw_bounded(cs, 128, true + ).handle_exception_type([](const std::system_error& e) { + logger().debug("test_unexpected_down(): client get error {}", e); + ceph_assert(e.code() == error::read_eof); + }); + }, + [](auto ss) { return Connection::dispatch_rw_unbounded(ss); } + ).then([] { + logger().info("test_unexpected_down() ok\n"); + }).handle_exception([](auto eptr) { + logger().error("test_unexpected_down() got unexpeted exception {}", eptr); + ceph_abort(); + }); +} + +future<> test_shutdown_propagated(bool is_fixed_cpu) { + logger().info("test_shutdown_propagated()..."); + return SocketFactory::dispatch_sockets( + is_fixed_cpu, + [](auto cs) { + logger().debug("test_shutdown_propagated() shutdown client socket"); + cs->shutdown(); + return seastar::now(); + }, + [](auto ss) { return Connection::dispatch_rw_unbounded(ss); } + ).then([] { + logger().info("test_shutdown_propagated() ok\n"); + }).handle_exception([](auto eptr) { + logger().error("test_shutdown_propagated() got unexpeted exception {}", eptr); + ceph_abort(); + }); +} + +future<> test_preemptive_down(bool is_fixed_cpu) { + logger().info("test_preemptive_down()..."); + return SocketFactory::dispatch_sockets( + is_fixed_cpu, + [](auto cs) { return Connection::dispatch_rw_unbounded(cs, true); }, + [](auto ss) { return Connection::dispatch_rw_unbounded(ss); } + ).then([] { + logger().info("test_preemptive_down() ok\n"); + }).handle_exception([](auto eptr) { + logger().error("test_preemptive_down() got unexpeted exception {}", eptr); + ceph_abort(); + }); +} + +future<> do_test_with_type(bool is_fixed_cpu) { + return test_bind_same(is_fixed_cpu + ).then([is_fixed_cpu] { + return test_accept(is_fixed_cpu); + }).then([is_fixed_cpu] { + return test_read_write(is_fixed_cpu); + }).then([is_fixed_cpu] { + return test_unexpected_down(is_fixed_cpu); + }).then([is_fixed_cpu] { + return test_shutdown_propagated(is_fixed_cpu); + }).then([is_fixed_cpu] { + return test_preemptive_down(is_fixed_cpu); + }); +} + +} + +seastar::future<int> do_test(seastar::app_template& app) +{ + std::vector<const char*> args; + std::string cluster; + std::string conf_file_list; + auto init_params = ceph_argparse_early_args(args, + CEPH_ENTITY_TYPE_CLIENT, + &cluster, + &conf_file_list); + return crimson::common::sharded_conf().start( + init_params.name, cluster + ).then([] { + return local_conf().start(); + }).then([conf_file_list] { + return local_conf().parse_config_files(conf_file_list); + }).then([] { + return local_conf().set_val("ms_inject_internal_delays", "0"); + }).then([] { + return test_refused(); + }).then([] { + return do_test_with_type(true); + }).then([] { + return do_test_with_type(false); + }).then([] { + logger().info("All tests succeeded"); + // Seastar has bugs to have events undispatched during shutdown, + // which will result in memory leak and thus fail LeakSanitizer. + return seastar::sleep(100ms); + }).then([] { + return crimson::common::sharded_conf().stop(); + }).then([] { + return 0; + }).handle_exception([](auto eptr) { + logger().error("Test failed: got exception {}", eptr); + return 1; + }); +} + +int main(int argc, char** argv) +{ + seastar::app_template app; + return app.run(argc, argv, [&app] { + return do_test(app); + }); +} |