summaryrefslogtreecommitdiffstats
path: root/src/test/crimson
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/test/crimson
parentInitial commit. (diff)
downloadceph-upstream/18.2.2.tar.xz
ceph-upstream/18.2.2.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/test/crimson')
-rw-r--r--src/test/crimson/CMakeLists.txt105
-rw-r--r--src/test/crimson/cbt/radosbench_4K_read.yaml36
-rw-r--r--src/test/crimson/cbt/radosbench_4K_write.yaml34
-rwxr-xr-xsrc/test/crimson/cbt/t2c.py78
-rw-r--r--src/test/crimson/gtest_seastar.cc65
-rw-r--r--src/test/crimson/gtest_seastar.h35
-rw-r--r--src/test/crimson/seastar_runner.h102
-rw-r--r--src/test/crimson/seastore/CMakeLists.txt128
-rw-r--r--src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc105
-rw-r--r--src/test/crimson/seastore/onode_tree/CMakeLists.txt15
-rw-r--r--src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc330
-rw-r--r--src/test/crimson/seastore/onode_tree/test_staged_fltree.cc1792
-rw-r--r--src/test/crimson/seastore/onode_tree/test_value.h240
-rw-r--r--src/test/crimson/seastore/test_block.cc41
-rw-r--r--src/test/crimson/seastore/test_block.h154
-rw-r--r--src/test/crimson/seastore/test_btree_lba_manager.cc752
-rw-r--r--src/test/crimson/seastore/test_cbjournal.cc583
-rw-r--r--src/test/crimson/seastore/test_collection_manager.cc195
-rw-r--r--src/test/crimson/seastore/test_extent_allocator.cc181
-rw-r--r--src/test/crimson/seastore/test_object_data_handler.cc431
-rw-r--r--src/test/crimson/seastore/test_omap_manager.cc730
-rw-r--r--src/test/crimson/seastore/test_randomblock_manager.cc178
-rw-r--r--src/test/crimson/seastore/test_seastore.cc1268
-rw-r--r--src/test/crimson/seastore/test_seastore_cache.cc260
-rw-r--r--src/test/crimson/seastore/test_seastore_journal.cc343
-rw-r--r--src/test/crimson/seastore/test_transaction_manager.cc1995
-rw-r--r--src/test/crimson/seastore/transaction_manager_test_state.h450
-rw-r--r--src/test/crimson/test_alien_echo.cc294
-rw-r--r--src/test/crimson/test_alienstore_thread_pool.cc78
-rw-r--r--src/test/crimson/test_async_echo.cc234
-rw-r--r--src/test/crimson/test_backfill.cc501
-rw-r--r--src/test/crimson/test_buffer.cc50
-rw-r--r--src/test/crimson/test_config.cc109
-rw-r--r--src/test/crimson/test_denc.cc53
-rw-r--r--src/test/crimson/test_errorator.cc99
-rw-r--r--src/test/crimson/test_fixed_kv_node_layout.cc376
-rw-r--r--src/test/crimson/test_interruptible_future.cc301
-rw-r--r--src/test/crimson/test_lru.cc213
-rw-r--r--src/test/crimson/test_messenger.cc3874
-rw-r--r--src/test/crimson/test_messenger.h95
-rw-r--r--src/test/crimson/test_messenger_peer.cc462
-rw-r--r--src/test/crimson/test_messenger_thrash.cc672
-rw-r--r--src/test/crimson/test_monc.cc84
-rw-r--r--src/test/crimson/test_perfcounters.cc62
-rw-r--r--src/test/crimson/test_socket.cc558
45 files changed, 18741 insertions, 0 deletions
diff --git a/src/test/crimson/CMakeLists.txt b/src/test/crimson/CMakeLists.txt
new file mode 100644
index 000000000..b1851cca2
--- /dev/null
+++ b/src/test/crimson/CMakeLists.txt
@@ -0,0 +1,105 @@
+# the crimson's backfill doesn't need nor use seastar
+add_executable(unittest-crimson-backfill
+ test_backfill.cc
+ ${PROJECT_SOURCE_DIR}/src/auth/Crypto.cc
+ ${PROJECT_SOURCE_DIR}/src/crimson/osd/backfill_state.cc
+ ${PROJECT_SOURCE_DIR}/src/osd/recovery_types.cc)
+add_ceph_unittest(unittest-crimson-backfill
+ --memory 256M --smp 1)
+target_link_libraries(unittest-crimson-backfill crimson GTest::Main)
+
+add_executable(unittest-seastar-buffer
+ test_buffer.cc)
+add_ceph_unittest(unittest-seastar-buffer
+ --memory 256M --smp 1)
+target_link_libraries(unittest-seastar-buffer crimson)
+
+add_executable(unittest-seastar-denc
+ test_denc.cc)
+add_ceph_unittest(unittest-seastar-denc --memory 256M --smp 1)
+target_link_libraries(unittest-seastar-denc crimson GTest::Main)
+
+add_executable(unittest-seastar-socket test_socket.cc)
+add_ceph_unittest(unittest-seastar-socket
+ --memory 256M --smp 4)
+target_link_libraries(unittest-seastar-socket crimson)
+
+add_executable(unittest-seastar-messenger test_messenger.cc)
+add_ceph_unittest(unittest-seastar-messenger
+ --memory 256M --smp 4)
+target_link_libraries(unittest-seastar-messenger crimson)
+
+add_executable(test-seastar-messenger-peer test_messenger_peer.cc)
+target_link_libraries(test-seastar-messenger-peer ceph-common global ${ALLOC_LIBS})
+
+add_executable(test-seastar-echo
+ test_alien_echo.cc)
+target_link_libraries(test-seastar-echo crimson)
+
+add_executable(test-async-echo
+ test_async_echo.cc)
+target_link_libraries(test-async-echo ceph-common global)
+
+add_executable(unittest-seastar-alienstore-thread-pool
+ test_alienstore_thread_pool.cc
+ ${PROJECT_SOURCE_DIR}/src/crimson/osd/lsan_suppressions.cc)
+add_ceph_unittest(unittest-seastar-alienstore-thread-pool
+ --memory 256M --smp 1)
+target_link_libraries(unittest-seastar-alienstore-thread-pool
+ crimson-alienstore
+ crimson)
+
+add_executable(unittest-seastar-config
+ test_config.cc)
+add_ceph_unittest(unittest-seastar-config
+ --memory 256M --smp 4)
+target_link_libraries(unittest-seastar-config crimson)
+
+add_executable(unittest-seastar-monc
+ test_monc.cc)
+target_link_libraries(unittest-seastar-monc crimson)
+
+add_executable(unittest-seastar-perfcounters
+ test_perfcounters.cc)
+add_ceph_unittest(unittest-seastar-perfcounters
+ --memory 256M --smp 1)
+target_link_libraries(unittest-seastar-perfcounters crimson)
+
+add_executable(unittest-seastar-lru
+ test_lru.cc)
+add_ceph_unittest(unittest-seastar-lru
+ --memory 256M --smp 1)
+target_link_libraries(unittest-seastar-lru crimson GTest::Main)
+
+add_executable(unittest-fixed-kv-node-layout
+ test_fixed_kv_node_layout.cc)
+add_ceph_unittest(unittest-fixed-kv-node-layout)
+
+add_executable(unittest-interruptible-future
+ test_interruptible_future.cc
+ gtest_seastar.cc)
+add_ceph_unittest(unittest-interruptible-future
+ --memory 256M --smp 1)
+target_link_libraries(
+ unittest-interruptible-future
+ crimson-common)
+
+add_executable(unittest-seastar-messenger-thrash test_messenger_thrash.cc)
+add_ceph_unittest(unittest-seastar-messenger-thrash
+ --memory 256M --smp 1)
+target_link_libraries(unittest-seastar-messenger-thrash crimson)
+
+add_subdirectory(seastore)
+
+add_library(crimson-gtest STATIC
+ gtest_seastar.cc)
+target_link_libraries(crimson-gtest crimson-common GTest::GTest)
+add_library(crimson::gtest ALIAS crimson-gtest)
+
+add_executable(unittest-seastar-errorator
+ test_errorator.cc)
+target_link_libraries(
+ unittest-seastar-errorator
+ crimson::gtest)
+add_ceph_unittest(unittest-seastar-errorator
+ --memory 256M --smp 1)
diff --git a/src/test/crimson/cbt/radosbench_4K_read.yaml b/src/test/crimson/cbt/radosbench_4K_read.yaml
new file mode 100644
index 000000000..219ce643a
--- /dev/null
+++ b/src/test/crimson/cbt/radosbench_4K_read.yaml
@@ -0,0 +1,36 @@
+meta:
+- desc: |
+ Run radosbench benchmark using cbt.
+ 4K read workload.
+
+tasks:
+- cbt:
+ benchmarks:
+ radosbench:
+ concurrent_ops: 16
+ concurrent_procs: 2
+ op_size: [4096]
+ pool_profile: 'replicated'
+ read_time: 30
+ read_only: true
+ readmode: 'rand'
+ prefill_time: 3
+ acceptable:
+ bandwidth: '(or (greater) (near 0.05))'
+ iops_avg: '(or (greater) (near 0.05))'
+ iops_stddev: '(or (less) (near 2.00))'
+ latency_avg: '(or (less) (near 0.05))'
+ cpu_cycles_per_op: '(or (less) (near 0.05))'
+ monitoring_profiles:
+ perf:
+ nodes:
+ - osds
+ args: 'stat -p {pid} -o {perf_dir}/perf_stat.{pid}'
+ cluster:
+ osds_per_node: 3
+ iterations: 1
+ pool_profiles:
+ replicated:
+ pg_size: 128
+ pgp_size: 128
+ replication: 'replicated'
diff --git a/src/test/crimson/cbt/radosbench_4K_write.yaml b/src/test/crimson/cbt/radosbench_4K_write.yaml
new file mode 100644
index 000000000..526982b10
--- /dev/null
+++ b/src/test/crimson/cbt/radosbench_4K_write.yaml
@@ -0,0 +1,34 @@
+meta:
+- desc: |
+ Run radosbench benchmark using cbt.
+ 4K write workload.
+
+tasks:
+- cbt:
+ benchmarks:
+ radosbench:
+ concurrent_ops: 16
+ concurrent_procs: 2
+ op_size: [4096]
+ pool_profile: 'replicated'
+ write_time: 3
+ write_only: true
+ acceptable:
+ bandwidth: '(or (greater) (near 0.05))'
+ iops_avg: '(or (greater) (near 0.05))'
+ iops_stddev: '(or (less) (near 2.00))'
+ latency_avg: '(or (less) (near 0.05))'
+ cpu_cycles_per_op: '(or (less) (near 0.05))'
+ monitoring_profiles:
+ perf:
+ nodes:
+ - osds
+ args: 'stat -p {pid} -o {perf_dir}/perf_stat.{pid}'
+ cluster:
+ osds_per_node: 3
+ iterations: 1
+ pool_profiles:
+ replicated:
+ pg_size: 128
+ pgp_size: 128
+ replication: 'replicated'
diff --git a/src/test/crimson/cbt/t2c.py b/src/test/crimson/cbt/t2c.py
new file mode 100755
index 000000000..0d4ee49e5
--- /dev/null
+++ b/src/test/crimson/cbt/t2c.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+from __future__ import print_function
+import argparse
+import os
+import os.path
+import socket
+import sys
+import yaml
+
+
+class Translator(object):
+ def __init__(self, build_dir):
+ self.build_dir = build_dir
+
+ def translate(self, config):
+ cluster = config.get('cluster', {})
+ benchmarks = config.get('benchmarks', [])
+ monitoring_profiles = config.get('monitoring_profiles', {})
+ return dict(cluster=self._create_cluster_config(cluster),
+ benchmarks=benchmarks,
+ monitoring_profiles=monitoring_profiles)
+
+ def _create_cluster_config(self, cluster):
+ # prepare the "cluster" section consumed by CBT
+ localhost = socket.getfqdn()
+ num_osds = cluster.get('osds_per_node', 3)
+ items_to_copy = ['iterations', 'pool_profiles']
+ conf = dict((k, cluster[k]) for k in items_to_copy if k in cluster)
+ conf.update(dict(
+ head=localhost,
+ osds=[localhost],
+ osds_per_node=num_osds,
+ mons=[localhost],
+ clients=[localhost],
+ rebuild_every_test=False,
+ conf_file=os.path.join(self.build_dir, 'ceph.conf'),
+ ceph_cmd=os.path.join(self.build_dir, 'bin', 'ceph'),
+ rados_cmd=os.path.join(self.build_dir, 'bin', 'rados'),
+ pid_dir=os.path.join(self.build_dir, 'out')
+ ))
+ return conf
+
+def get_cbt_tasks(path):
+ with open(path) as input:
+ teuthology_config = yaml.load(input)
+ for task in teuthology_config['tasks']:
+ for name, conf in task.items():
+ if name == 'cbt':
+ yield conf
+
+def main():
+ parser = argparse.ArgumentParser(description='translate teuthology yaml to CBT yaml')
+ parser.add_argument('--build-dir',
+ default=os.getcwd(),
+ required=False,
+ help='Directory where CMakeCache.txt is located')
+ parser.add_argument('--input',
+ required=True,
+ help='The path to the input YAML file')
+ parser.add_argument('--output',
+ required=True,
+ help='The path to the output YAML file')
+ options = parser.parse_args(sys.argv[1:])
+ cbt_tasks = [task for task in get_cbt_tasks(options.input)]
+ if not cbt_tasks:
+ print('cbt not found in "tasks" section', file=sys.stderr)
+ return sys.exit(1)
+ elif len(cbt_tasks) > 1:
+ print('more than one cbt task found in "tasks" section', file=sys.stderr)
+ return sys.exit(1)
+ translator = Translator(options.build_dir)
+ cbt_config = translator.translate(cbt_tasks[0])
+ with open(options.output, 'w') as output:
+ yaml.dump(cbt_config, output)
+
+if __name__ == '__main__':
+ main()
diff --git a/src/test/crimson/gtest_seastar.cc b/src/test/crimson/gtest_seastar.cc
new file mode 100644
index 000000000..abb1f88f2
--- /dev/null
+++ b/src/test/crimson/gtest_seastar.cc
@@ -0,0 +1,65 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <cstdlib>
+#include <iostream>
+
+#include "include/ceph_assert.h"
+#include "gtest_seastar.h"
+
+#include "common/ceph_argparse.h"
+#include "crimson/common/config_proxy.h"
+#include "crimson/common/perf_counters_collection.h"
+
+SeastarRunner seastar_test_suite_t::seastar_env;
+
+int main(int argc, char **argv)
+{
+ // preprocess args
+ std::vector<const char*> args;
+ bool global_log_level_is_set = false;
+ const char* prefix_log_level = "--default-log-level";
+ for (int i = 0; i < argc; ++i) {
+ if (std::strncmp(argv[i], prefix_log_level,
+ std::strlen(prefix_log_level)) == 0) {
+ global_log_level_is_set = true;
+ }
+ args.push_back(argv[i]);
+ }
+ // HACK: differentiate between the `make check` bot and human user
+ // for the sake of log flooding
+ if (!global_log_level_is_set && !std::getenv("FOR_MAKE_CHECK")) {
+ std::cout << "WARNING: set default seastar log level to debug" << std::endl;
+ ++argc;
+ args.push_back("--default-log-level=debug");
+ }
+
+ auto app_argv = const_cast<char**>(args.data());
+ auto app_argc = static_cast<int>(args.size());
+ ::testing::InitGoogleTest(&app_argc, app_argv);
+
+ int ret = seastar_test_suite_t::seastar_env.init(app_argc, app_argv);
+ if (ret != 0) {
+ seastar_test_suite_t::seastar_env.stop();
+ return ret;
+ }
+
+ seastar_test_suite_t::seastar_env.run([] {
+ return crimson::common::sharded_conf().start(
+ EntityName{}, std::string_view{"ceph"}
+ ).then([] {
+ return crimson::common::sharded_perf_coll().start();
+ });
+ });
+
+ ret = RUN_ALL_TESTS();
+
+ seastar_test_suite_t::seastar_env.run([] {
+ return crimson::common::sharded_perf_coll().stop().then([] {
+ return crimson::common::sharded_conf().stop();
+ });
+ });
+
+ seastar_test_suite_t::seastar_env.stop();
+ return ret;
+}
diff --git a/src/test/crimson/gtest_seastar.h b/src/test/crimson/gtest_seastar.h
new file mode 100644
index 000000000..20709a3ee
--- /dev/null
+++ b/src/test/crimson/gtest_seastar.h
@@ -0,0 +1,35 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "gtest/gtest.h"
+
+#include "seastar_runner.h"
+
+struct seastar_test_suite_t : public ::testing::Test {
+ static SeastarRunner seastar_env;
+
+ template <typename Func>
+ void run(Func &&func) {
+ return seastar_env.run(std::forward<Func>(func));
+ }
+
+ template <typename Func>
+ void run_async(Func &&func) {
+ run(
+ [func=std::forward<Func>(func)]() mutable {
+ return seastar::async(std::forward<Func>(func));
+ });
+ }
+
+ virtual seastar::future<> set_up_fut() { return seastar::now(); }
+ void SetUp() final {
+ return run([this] { return set_up_fut(); });
+ }
+
+ virtual seastar::future<> tear_down_fut() { return seastar::now(); }
+ void TearDown() final {
+ return run([this] { return tear_down_fut(); });
+ }
+};
diff --git a/src/test/crimson/seastar_runner.h b/src/test/crimson/seastar_runner.h
new file mode 100644
index 000000000..58d3f8119
--- /dev/null
+++ b/src/test/crimson/seastar_runner.h
@@ -0,0 +1,102 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <stdio.h>
+#include <signal.h>
+#include <thread>
+
+#include <seastar/core/app-template.hh>
+#include <seastar/core/future-util.hh>
+#include <seastar/core/reactor.hh>
+#include <seastar/core/alien.hh>
+#include <seastar/core/thread.hh>
+
+struct SeastarRunner {
+ static constexpr eventfd_t APP_RUNNING = 1;
+ static constexpr eventfd_t APP_NOT_RUN = 2;
+
+ seastar::app_template app;
+ seastar::file_desc begin_fd;
+ std::unique_ptr<seastar::readable_eventfd> on_end;
+
+ std::thread thread;
+
+ bool begin_signaled = false;
+
+ SeastarRunner() :
+ begin_fd{seastar::file_desc::eventfd(0, 0)} {}
+
+ ~SeastarRunner() {}
+
+ bool is_running() const {
+ return !!on_end;
+ }
+
+ int init(int argc, char **argv)
+ {
+ thread = std::thread([argc, argv, this] { reactor(argc, argv); });
+ eventfd_t result;
+ if (int r = ::eventfd_read(begin_fd.get(), &result); r < 0) {
+ std::cerr << "unable to eventfd_read():" << errno << std::endl;
+ return r;
+ }
+ assert(begin_signaled == true);
+ if (result == APP_RUNNING) {
+ assert(is_running());
+ return 0;
+ } else {
+ assert(result == APP_NOT_RUN);
+ assert(!is_running());
+ return 1;
+ }
+ }
+
+ void stop()
+ {
+ if (is_running()) {
+ run([this] {
+ on_end->write_side().signal(1);
+ return seastar::now();
+ });
+ }
+ thread.join();
+ }
+
+ void reactor(int argc, char **argv)
+ {
+ auto ret = app.run(argc, argv, [this] {
+ on_end.reset(new seastar::readable_eventfd);
+ return seastar::now().then([this] {
+ begin_signaled = true;
+ [[maybe_unused]] auto r = ::eventfd_write(begin_fd.get(), APP_RUNNING);
+ assert(r == 0);
+ return seastar::now();
+ }).then([this] {
+ return on_end->wait().then([](size_t){});
+ }).handle_exception([](auto ep) {
+ std::cerr << "Error: " << ep << std::endl;
+ }).finally([this] {
+ on_end.reset();
+ });
+ });
+ if (ret != 0) {
+ std::cerr << "Seastar app returns " << ret << std::endl;
+ }
+ if (!begin_signaled) {
+ begin_signaled = true;
+ ::eventfd_write(begin_fd.get(), APP_NOT_RUN);
+ }
+ }
+
+ template <typename Func>
+ void run(Func &&func) {
+ assert(is_running());
+ auto fut = seastar::alien::submit_to(app.alien(), 0,
+ std::forward<Func>(func));
+ fut.get();
+ }
+};
+
+
diff --git a/src/test/crimson/seastore/CMakeLists.txt b/src/test/crimson/seastore/CMakeLists.txt
new file mode 100644
index 000000000..5c6c2771c
--- /dev/null
+++ b/src/test/crimson/seastore/CMakeLists.txt
@@ -0,0 +1,128 @@
+add_executable(unittest-transaction-manager
+ test_block.cc
+ test_transaction_manager.cc
+ ../gtest_seastar.cc)
+add_ceph_unittest(unittest-transaction-manager
+ --memory 256M --smp 1)
+target_link_libraries(
+ unittest-transaction-manager
+ ${CMAKE_DL_LIBS}
+ crimson-seastore)
+
+add_executable(unittest-btree-lba-manager
+ test_btree_lba_manager.cc
+ ../gtest_seastar.cc)
+add_ceph_unittest(unittest-btree-lba-manager
+ --memory 256M --smp 1)
+target_link_libraries(
+ unittest-btree-lba-manager
+ ${CMAKE_DL_LIBS}
+ crimson-seastore)
+
+add_executable(unittest-seastore-journal
+ test_seastore_journal.cc)
+add_ceph_test(unittest-seastore-journal
+ unittest-seastore-journal --memory 256M --smp 1)
+target_link_libraries(
+ unittest-seastore-journal
+ crimson::gtest
+ crimson-seastore)
+
+add_executable(unittest-seastore-cache
+ test_block.cc
+ test_seastore_cache.cc)
+add_ceph_test(unittest-seastore-cache
+ unittest-seastore-cache --memory 256M --smp 1)
+target_link_libraries(
+ unittest-seastore-cache
+ crimson::gtest
+ crimson-seastore)
+
+add_executable(unittest-object-data-handler
+ test_object_data_handler.cc
+ ../gtest_seastar.cc
+ ${PROJECT_SOURCE_DIR}/src/crimson/osd/lsan_suppressions.cc)
+add_ceph_unittest(unittest-object-data-handler
+ --memory 256M --smp 1)
+target_link_libraries(
+ unittest-object-data-handler
+ crimson::gtest
+ crimson-seastore
+ crimson-os
+ crimson-common)
+
+add_executable(unittest-collection-manager
+ test_collection_manager.cc
+ ../gtest_seastar.cc
+ ${PROJECT_SOURCE_DIR}/src/crimson/osd/lsan_suppressions.cc)
+add_ceph_test(unittest-collection-manager
+ unittest-collection-manager --memory 256M --smp 1)
+target_link_libraries(
+ unittest-collection-manager
+ crimson::gtest
+ crimson-seastore
+ crimson-os
+ crimson-common)
+
+add_executable(unittest-omap-manager
+ test_omap_manager.cc
+ ../gtest_seastar.cc)
+add_ceph_unittest(unittest-omap-manager
+ --memory 256M --smp 1)
+target_link_libraries(
+ unittest-omap-manager
+ ${CMAKE_DL_LIBS}
+ crimson-seastore)
+
+add_executable(unittest-seastore
+ test_seastore.cc
+ ../gtest_seastar.cc)
+add_ceph_unittest(unittest-seastore
+ --memory 256M --smp 1)
+target_link_libraries(
+ unittest-seastore
+ ${CMAKE_DL_LIBS}
+ crimson-seastore
+ crimson-common)
+
+add_executable(unittest-seastore-randomblock-manager
+ test_randomblock_manager.cc)
+add_ceph_test(unittest-seastore-randomblock-manager
+ unittest-seastore-randomblock-manager --memory 256M --smp 1)
+target_link_libraries(
+ unittest-seastore-randomblock-manager
+ crimson::gtest
+ ${CMAKE_DL_LIBS}
+ crimson-seastore)
+
+add_executable(unittest-seastore-nvmedevice
+ nvmedevice/test_nvmedevice.cc)
+add_ceph_test(unittest-seastore-nvmedevice
+ unittest-seastore-nvmedevice --memory 256M --smp 1)
+target_link_libraries(
+ unittest-seastore-nvmedevice
+ crimson::gtest
+ crimson-seastore
+ aio)
+
+add_executable(unittest-seastore-cbjournal
+ test_cbjournal.cc)
+add_ceph_test(unittest-seastore-cbjournal
+ unittest-seastore-cbjournal --memory 256M --smp 1)
+target_link_libraries(
+ unittest-seastore-cbjournal
+ crimson::gtest
+ crimson-seastore
+ aio)
+
+add_executable(unittest-seastore-extent-allocator
+ test_extent_allocator.cc)
+add_ceph_test(unittest-seastore-extent-allocator
+ unittest-seastore-extent-allocator --memory 256M --smp 1)
+target_link_libraries(
+ unittest-seastore-extent-allocator
+ crimson::gtest
+ crimson-seastore
+ aio)
+
+add_subdirectory(onode_tree)
diff --git a/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc
new file mode 100644
index 000000000..9c2f4c246
--- /dev/null
+++ b/src/test/crimson/seastore/nvmedevice/test_nvmedevice.cc
@@ -0,0 +1,105 @@
+//-*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "include/buffer.h"
+#include "crimson/os/seastore/random_block_manager/rbm_device.h"
+#include "crimson/os/seastore/random_block_manager/nvme_block_device.h"
+#include "test/crimson/gtest_seastar.h"
+#include "include/stringify.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+using namespace random_block_device;
+using namespace random_block_device::nvme;
+
+struct nvdev_test_t : seastar_test_suite_t {
+ std::unique_ptr<RBMDevice> device;
+ std::string dev_path;
+
+ static const uint64_t DEV_SIZE = 1024 * 1024 * 1024;
+
+ nvdev_test_t() :
+ device(nullptr),
+ dev_path("randomblock_manager.test_nvmedevice" + stringify(getpid())) {
+ int fd = ::open(dev_path.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644);
+ ceph_assert(fd >= 0);
+ ::ftruncate(fd, DEV_SIZE);
+ ::close(fd);
+ }
+ ~nvdev_test_t() {
+ ::unlink(dev_path.c_str());
+ }
+};
+
+static const uint64_t BUF_SIZE = 1024;
+static const uint64_t BLK_SIZE = 4096;
+
+struct nvdev_test_block_t {
+ uint8_t data[BUF_SIZE];
+
+ DENC(nvdev_test_block_t, v, p) {
+ DENC_START(1, 1, p);
+ for (uint64_t i = 0 ; i < BUF_SIZE; i++)
+ {
+ denc(v.data[i], p);
+ }
+ DENC_FINISH(p);
+ }
+};
+
+WRITE_CLASS_DENC_BOUNDED(
+ nvdev_test_block_t
+)
+
+using crimson::common::local_conf;
+TEST_F(nvdev_test_t, write_and_verify_test)
+{
+ run_async([this] {
+ device.reset(new random_block_device::nvme::NVMeBlockDevice(dev_path));
+ local_conf().set_val("seastore_cbjournal_size", "1048576").get();
+ device->start().get();
+ device->mkfs(
+ device_config_t{
+ true,
+ device_spec_t{
+ (magic_t)std::rand(),
+ device_type_t::RANDOM_BLOCK_SSD,
+ static_cast<device_id_t>(DEVICE_ID_RANDOM_BLOCK_MIN)},
+ seastore_meta_t{uuid_d()},
+ secondary_device_set_t()}
+ ).unsafe_get();
+ device->mount().unsafe_get();
+ nvdev_test_block_t original_data;
+ std::minstd_rand0 generator;
+ uint8_t value = generator();
+ memset(original_data.data, value, BUF_SIZE);
+ uint64_t bl_length = 0;
+ Device& d = device->get_sharded_device();
+ {
+ bufferlist bl;
+ encode(original_data, bl);
+ bl_length = bl.length();
+ auto write_buf = ceph::bufferptr(buffer::create_page_aligned(BLK_SIZE));
+ bl.begin().copy(bl_length, write_buf.c_str());
+ ((RBMDevice*)&d)->write(0, std::move(write_buf)).unsafe_get();
+ }
+
+ nvdev_test_block_t read_data;
+ {
+ auto read_buf = ceph::bufferptr(buffer::create_page_aligned(BLK_SIZE));
+ ((RBMDevice*)&d)->read(0, read_buf).unsafe_get();
+ bufferlist bl;
+ bl.push_back(read_buf);
+ auto bliter = bl.cbegin();
+ decode(read_data, bliter);
+ }
+
+ int ret = memcmp(original_data.data, read_data.data, BUF_SIZE);
+ ((RBMDevice*)&d)->close().unsafe_get();
+ device->stop().get();
+ ASSERT_TRUE(ret == 0);
+ device.reset(nullptr);
+ });
+}
+
diff --git a/src/test/crimson/seastore/onode_tree/CMakeLists.txt b/src/test/crimson/seastore/onode_tree/CMakeLists.txt
new file mode 100644
index 000000000..bea208601
--- /dev/null
+++ b/src/test/crimson/seastore/onode_tree/CMakeLists.txt
@@ -0,0 +1,15 @@
+add_executable(unittest-staged-fltree
+ test_staged_fltree.cc
+ ../../gtest_seastar.cc)
+add_ceph_unittest(unittest-staged-fltree
+ --memory 256M --smp 1)
+target_link_libraries(unittest-staged-fltree
+ crimson-seastore)
+
+add_executable(unittest-fltree-onode-manager
+ test_fltree_onode_manager.cc
+ ../../gtest_seastar.cc)
+add_ceph_unittest(unittest-fltree-onode-manager
+ --memory 256M --smp 1)
+target_link_libraries(unittest-fltree-onode-manager
+ crimson-seastore)
diff --git a/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc b/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc
new file mode 100644
index 000000000..1f661cdca
--- /dev/null
+++ b/src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc
@@ -0,0 +1,330 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/range/combine.hpp>
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+
+#include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+using namespace crimson::os::seastore::onode;
+using CTransaction = ceph::os::Transaction;
+using namespace std;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+struct onode_item_t {
+ uint32_t size;
+ uint64_t id;
+ uint64_t block_size;
+ uint32_t cnt_modify = 0;
+
+ void initialize(Transaction& t, Onode& value) const {
+ auto& layout = value.get_mutable_layout(t);
+ layout.size = size;
+ layout.omap_root.update(omap_root_t(id, cnt_modify,
+ value.get_metadata_hint(block_size)));
+ validate(value);
+ }
+
+ void validate(Onode& value) const {
+ auto& layout = value.get_layout();
+ ceph_assert(laddr_t(layout.size) == laddr_t{size});
+ ceph_assert(layout.omap_root.get(value.get_metadata_hint(block_size)).addr == id);
+ ceph_assert(layout.omap_root.get(value.get_metadata_hint(block_size)).depth == cnt_modify);
+ }
+
+ void modify(Transaction& t, Onode& value) {
+ validate(value);
+ ++cnt_modify;
+ initialize(t, value);
+ }
+
+ static onode_item_t create(std::size_t size, std::size_t id, uint64_t block_size) {
+ ceph_assert(size <= std::numeric_limits<uint32_t>::max());
+ return {(uint32_t)size, id, block_size};
+ }
+};
+
+struct fltree_onode_manager_test_t
+ : public seastar_test_suite_t, TMTestState {
+ using iterator_t = typename KVPool<onode_item_t>::iterator_t;
+
+ FLTreeOnodeManagerRef manager;
+
+ seastar::future<> set_up_fut() final {
+ return tm_setup();
+ }
+
+ seastar::future<> tear_down_fut() final {
+ return tm_teardown();
+ }
+
+ virtual seastar::future<> _init() final {
+ return TMTestState::_init().then([this] {
+ manager.reset(new FLTreeOnodeManager(*tm));
+ });
+ }
+
+ virtual seastar::future<> _destroy() final {
+ manager.reset();
+ return TMTestState::_destroy();
+ }
+
+ virtual FuturizedStore::mkfs_ertr::future<> _mkfs() final {
+ return TMTestState::_mkfs(
+ ).safe_then([this] {
+ return restart_fut();
+ }).safe_then([this] {
+ return repeat_eagain([this] {
+ return seastar::do_with(
+ create_mutate_transaction(),
+ [this](auto &ref_t)
+ {
+ return with_trans_intr(*ref_t, [&](auto &t) {
+ return manager->mkfs(t
+ ).si_then([this, &t] {
+ return submit_transaction_fut2(t);
+ });
+ });
+ });
+ });
+ }).handle_error(
+ crimson::ct_error::assert_all{"Invalid error in _mkfs"}
+ );
+ }
+
+ template <typename F>
+ void with_transaction(F&& f) {
+ auto t = create_mutate_transaction();
+ std::invoke(f, *t);
+ submit_transaction(std::move(t));
+ }
+
+ template <typename F>
+ void with_onode_write(iterator_t& it, F&& f) {
+ with_transaction([this, &it, f=std::move(f)] (auto& t) {
+ auto p_kv = *it;
+ auto onode = with_trans_intr(t, [&](auto &t) {
+ return manager->get_or_create_onode(t, p_kv->key);
+ }).unsafe_get0();
+ std::invoke(f, t, *onode, p_kv->value);
+ with_trans_intr(t, [&](auto &t) {
+ if (onode->is_alive()) {
+ return manager->write_dirty(t, {onode});
+ } else {
+ return OnodeManager::write_dirty_iertr::now();
+ }
+ }).unsafe_get0();
+ });
+ }
+
+ void validate_onode(iterator_t& it) {
+ with_transaction([this, &it] (auto& t) {
+ auto p_kv = *it;
+ auto onode = with_trans_intr(t, [&](auto &t) {
+ return manager->get_onode(t, p_kv->key);
+ }).unsafe_get0();
+ p_kv->value.validate(*onode);
+ });
+ }
+
+ void validate_erased(iterator_t& it) {
+ with_transaction([this, &it] (auto& t) {
+ auto p_kv = *it;
+ auto exist = with_trans_intr(t, [&](auto &t) {
+ return manager->contains_onode(t, p_kv->key);
+ }).unsafe_get0();
+ ceph_assert(exist == false);
+ });
+ }
+
+ template <typename F>
+ void with_onodes_process(
+ const iterator_t& start, const iterator_t& end, F&& f) {
+ std::vector<ghobject_t> oids;
+ std::vector<onode_item_t*> items;
+ auto it = start;
+ while(it != end) {
+ auto p_kv = *it;
+ oids.emplace_back(p_kv->key);
+ items.emplace_back(&p_kv->value);
+ ++it;
+ }
+ with_transaction([&oids, &items, f=std::move(f)] (auto& t) mutable {
+ std::invoke(f, t, oids, items);
+ });
+ }
+
+ template <typename F>
+ void with_onodes_write(
+ const iterator_t& start, const iterator_t& end, F&& f) {
+ with_onodes_process(start, end,
+ [this, f=std::move(f)] (auto& t, auto& oids, auto& items) {
+ auto onodes = with_trans_intr(t, [&](auto &t) {
+ return manager->get_or_create_onodes(t, oids);
+ }).unsafe_get0();
+ for (auto tup : boost::combine(onodes, items)) {
+ OnodeRef onode;
+ onode_item_t* p_item;
+ boost::tie(onode, p_item) = tup;
+ std::invoke(f, t, *onode, *p_item);
+ }
+ with_trans_intr(t, [&](auto &t) {
+ return manager->write_dirty(t, onodes);
+ }).unsafe_get0();
+ });
+ }
+
+ void validate_onodes(
+ const iterator_t& start, const iterator_t& end) {
+ with_onodes_process(start, end,
+ [this] (auto& t, auto& oids, auto& items) {
+ for (auto tup : boost::combine(oids, items)) {
+ ghobject_t oid;
+ onode_item_t* p_item;
+ boost::tie(oid, p_item) = tup;
+ auto onode = with_trans_intr(t, [&](auto &t) {
+ return manager->get_onode(t, oid);
+ }).unsafe_get0();
+ p_item->validate(*onode);
+ }
+ });
+ }
+
+ void validate_erased(
+ const iterator_t& start, const iterator_t& end) {
+ with_onodes_process(start, end,
+ [this] (auto& t, auto& oids, auto& items) {
+ for (auto& oid : oids) {
+ auto exist = with_trans_intr(t, [&](auto &t) {
+ return manager->contains_onode(t, oid);
+ }).unsafe_get0();
+ ceph_assert(exist == false);
+ }
+ });
+ }
+
+ static constexpr uint64_t LIST_LIMIT = 10;
+ void validate_list_onodes(KVPool<onode_item_t>& pool) {
+ with_onodes_process(pool.begin(), pool.end(),
+ [this] (auto& t, auto& oids, auto& items) {
+ std::vector<ghobject_t> listed_oids;
+ auto start = ghobject_t();
+ auto end = ghobject_t::get_max();
+ assert(start < end);
+ assert(start < oids[0]);
+ assert(oids[0] < end);
+ while (start != end) {
+ auto [list_ret, list_end] = with_trans_intr(t, [&](auto &t) {
+ return manager->list_onodes(t, start, end, LIST_LIMIT);
+ }).unsafe_get0();
+ listed_oids.insert(listed_oids.end(), list_ret.begin(), list_ret.end());
+ start = list_end;
+ }
+ ceph_assert(oids.size() == listed_oids.size());
+ });
+ }
+
+ fltree_onode_manager_test_t() {}
+};
+
+TEST_P(fltree_onode_manager_test_t, 1_single)
+{
+ run_async([this] {
+ uint64_t block_size = tm->get_block_size();
+ auto pool = KVPool<onode_item_t>::create_range({0, 1}, {128, 256}, block_size);
+ auto iter = pool.begin();
+ with_onode_write(iter, [](auto& t, auto& onode, auto& item) {
+ item.initialize(t, onode);
+ });
+ validate_onode(iter);
+
+ with_onode_write(iter, [](auto& t, auto& onode, auto& item) {
+ item.modify(t, onode);
+ });
+ validate_onode(iter);
+
+ validate_list_onodes(pool);
+
+ with_onode_write(iter, [this](auto& t, auto& onode, auto& item) {
+ OnodeRef onode_ref = &onode;
+ with_trans_intr(t, [&](auto &t) {
+ return manager->erase_onode(t, onode_ref);
+ }).unsafe_get0();
+ });
+ validate_erased(iter);
+ });
+}
+
+TEST_P(fltree_onode_manager_test_t, 2_synthetic)
+{
+ run_async([this] {
+ uint64_t block_size = tm->get_block_size();
+ auto pool = KVPool<onode_item_t>::create_range(
+ {0, 100}, {32, 64, 128, 256, 512}, block_size);
+ auto start = pool.begin();
+ auto end = pool.end();
+ with_onodes_write(start, end,
+ [](auto& t, auto& onode, auto& item) {
+ item.initialize(t, onode);
+ });
+ validate_onodes(start, end);
+
+ validate_list_onodes(pool);
+
+ auto rd_start = pool.random_begin();
+ auto rd_end = rd_start + 50;
+ with_onodes_write(rd_start, rd_end,
+ [](auto& t, auto& onode, auto& item) {
+ item.modify(t, onode);
+ });
+ validate_onodes(start, end);
+
+ pool.shuffle();
+ rd_start = pool.random_begin();
+ rd_end = rd_start + 50;
+ with_onodes_write(rd_start, rd_end,
+ [](auto& t, auto& onode, auto& item) {
+ item.modify(t, onode);
+ });
+ validate_onodes(start, end);
+
+ pool.shuffle();
+ rd_start = pool.random_begin();
+ rd_end = rd_start + 50;
+ with_onodes_write(rd_start, rd_end,
+ [this](auto& t, auto& onode, auto& item) {
+ OnodeRef onode_ref = &onode;
+ with_trans_intr(t, [&](auto &t) {
+ return manager->erase_onode(t, onode_ref);
+ }).unsafe_get0();
+ });
+ validate_erased(rd_start, rd_end);
+ pool.erase_from_random(rd_start, rd_end);
+ start = pool.begin();
+ end = pool.end();
+ validate_onodes(start, end);
+
+ validate_list_onodes(pool);
+ });
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ fltree_onode__manager_test,
+ fltree_onode_manager_test_t,
+ ::testing::Values (
+ "segmented",
+ "circularbounded"
+ )
+);
diff --git a/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc
new file mode 100644
index 000000000..7357b5ced
--- /dev/null
+++ b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc
@@ -0,0 +1,1792 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <array>
+#include <cstring>
+#include <memory>
+#include <set>
+#include <sstream>
+#include <vector>
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/node.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/node_layout.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/tree.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h"
+
+#include "test/crimson/gtest_seastar.h"
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+#include "test_value.h"
+
+using namespace crimson::os::seastore::onode;
+
+#define INTR(fun, t) \
+ with_trans_intr( \
+ t, \
+ [&] (auto &tr) { \
+ return fun(tr); \
+ } \
+ )
+
+#define INTR_R(fun, t, args...) \
+ with_trans_intr( \
+ t, \
+ [&] (auto &tr) { \
+ return fun(tr, args); \
+ } \
+ )
+
+#define INTR_WITH_PARAM(fun, c, b, v) \
+ with_trans_intr( \
+ c.t, \
+ [=] (auto &t) { \
+ return fun(c, L_ADDR_MIN, b, v); \
+ } \
+ )
+
+namespace {
+ constexpr bool IS_DUMMY_SYNC = false;
+ using DummyManager = DummyNodeExtentManager<IS_DUMMY_SYNC>;
+
+ using UnboundedBtree = Btree<UnboundedValue>;
+
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+
+ ghobject_t make_ghobj(
+ shard_t shard, pool_t pool, crush_hash_t crush,
+ std::string ns, std::string oid, snap_t snap, gen_t gen) {
+ return ghobject_t{shard_id_t{shard}, pool, crush, ns, oid, snap, gen};
+ }
+
+ // return a key_view_t and its underlying memory buffer.
+ // the buffer needs to be freed manually.
+ std::pair<key_view_t, void*> build_key_view(const ghobject_t& hobj) {
+ key_hobj_t key_hobj(hobj);
+ size_t key_size = sizeof(shard_pool_crush_t) + sizeof(snap_gen_t) +
+ ns_oid_view_t::estimate_size(key_hobj);
+ void* p_mem = std::malloc(key_size);
+
+ key_view_t key_view;
+ char* p_fill = (char*)p_mem + key_size;
+
+ auto spc = shard_pool_crush_t::from_key(key_hobj);
+ p_fill -= sizeof(shard_pool_crush_t);
+ std::memcpy(p_fill, &spc, sizeof(shard_pool_crush_t));
+ key_view.set(*reinterpret_cast<const shard_pool_crush_t*>(p_fill));
+
+ auto p_ns_oid = p_fill;
+ ns_oid_view_t::test_append(key_hobj, p_fill);
+ ns_oid_view_t ns_oid_view(p_ns_oid);
+ key_view.set(ns_oid_view);
+
+ auto sg = snap_gen_t::from_key(key_hobj);
+ p_fill -= sizeof(snap_gen_t);
+ ceph_assert(p_fill == (char*)p_mem);
+ std::memcpy(p_fill, &sg, sizeof(snap_gen_t));
+ key_view.set(*reinterpret_cast<const snap_gen_t*>(p_fill));
+
+ return {key_view, p_mem};
+ }
+}
+
+struct a_basic_test_t : public seastar_test_suite_t {};
+
+TEST_F(a_basic_test_t, 1_basic_sizes)
+{
+ logger().info("\n"
+ "Bytes of struct:\n"
+ " node_header_t: {}\n"
+ " shard_pool_t: {}\n"
+ " shard_pool_crush_t: {}\n"
+ " crush_t: {}\n"
+ " snap_gen_t: {}\n"
+ " slot_0_t: {}\n"
+ " slot_1_t: {}\n"
+ " slot_3_t: {}\n"
+ " node_fields_0_t: {}\n"
+ " node_fields_1_t: {}\n"
+ " node_fields_2_t: {}\n"
+ " internal_fields_3_t: {}\n"
+ " leaf_fields_3_t: {}\n"
+ " internal_sub_item_t: {}",
+ sizeof(node_header_t), sizeof(shard_pool_t),
+ sizeof(shard_pool_crush_t), sizeof(crush_t), sizeof(snap_gen_t),
+ sizeof(slot_0_t), sizeof(slot_1_t), sizeof(slot_3_t),
+ sizeof(node_fields_0_t), sizeof(node_fields_1_t), sizeof(node_fields_2_t),
+ sizeof(internal_fields_3_t), sizeof(leaf_fields_3_t), sizeof(internal_sub_item_t)
+ );
+
+ auto hobj = make_ghobj(0, 0, 0, "n", "o", 0, 0);
+ key_hobj_t key(hobj);
+ auto [key_view, p_mem] = build_key_view(hobj);
+ value_config_t value;
+ value.payload_size = 8;
+#define _STAGE_T(NodeType) node_to_stage_t<typename NodeType::node_stage_t>
+#define NXT_T(StageType) staged<typename StageType::next_param_t>
+ laddr_t i_value{0};
+ logger().info("\n"
+ "Bytes of a key-value insertion (full-string):\n"
+ " s-p-c, 'n'-'o', s-g => value_payload(8): typically internal 43B, leaf 59B\n"
+ " InternalNode0: {} {} {}\n"
+ " InternalNode1: {} {} {}\n"
+ " InternalNode2: {} {}\n"
+ " InternalNode3: {}\n"
+ " LeafNode0: {} {} {}\n"
+ " LeafNode1: {} {} {}\n"
+ " LeafNode2: {} {}\n"
+ " LeafNode3: {}",
+ _STAGE_T(InternalNode0)::insert_size(key_view, i_value),
+ NXT_T(_STAGE_T(InternalNode0))::insert_size(key_view, i_value),
+ NXT_T(NXT_T(_STAGE_T(InternalNode0)))::insert_size(key_view, i_value),
+ _STAGE_T(InternalNode1)::insert_size(key_view, i_value),
+ NXT_T(_STAGE_T(InternalNode1))::insert_size(key_view, i_value),
+ NXT_T(NXT_T(_STAGE_T(InternalNode1)))::insert_size(key_view, i_value),
+ _STAGE_T(InternalNode2)::insert_size(key_view, i_value),
+ NXT_T(_STAGE_T(InternalNode2))::insert_size(key_view, i_value),
+ _STAGE_T(InternalNode3)::insert_size(key_view, i_value),
+ _STAGE_T(LeafNode0)::insert_size(key, value),
+ NXT_T(_STAGE_T(LeafNode0))::insert_size(key, value),
+ NXT_T(NXT_T(_STAGE_T(LeafNode0)))::insert_size(key, value),
+ _STAGE_T(LeafNode1)::insert_size(key, value),
+ NXT_T(_STAGE_T(LeafNode1))::insert_size(key, value),
+ NXT_T(NXT_T(_STAGE_T(LeafNode1)))::insert_size(key, value),
+ _STAGE_T(LeafNode2)::insert_size(key, value),
+ NXT_T(_STAGE_T(LeafNode2))::insert_size(key, value),
+ _STAGE_T(LeafNode3)::insert_size(key, value)
+ );
+ std::free(p_mem);
+}
+
+TEST_F(a_basic_test_t, 2_node_sizes)
+{
+ run_async([] {
+ auto nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC);
+ auto t = make_test_transaction();
+ ValueBuilderImpl<UnboundedValue> vb;
+ context_t c{*nm, vb, *t};
+ std::array<std::pair<NodeImplURef, NodeExtentMutable>, 16> nodes = {
+ INTR_WITH_PARAM(InternalNode0::allocate, c, false, 1u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(InternalNode1::allocate, c, false, 1u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(InternalNode2::allocate, c, false, 1u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(InternalNode3::allocate, c, false, 1u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(InternalNode0::allocate, c, true, 1u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(InternalNode1::allocate, c, true, 1u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(InternalNode2::allocate, c, true, 1u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(InternalNode3::allocate, c, true, 1u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(LeafNode0::allocate, c, false, 0u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(LeafNode1::allocate, c, false, 0u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(LeafNode2::allocate, c, false, 0u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(LeafNode3::allocate, c, false, 0u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(LeafNode0::allocate, c, true, 0u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(LeafNode1::allocate, c, true, 0u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(LeafNode2::allocate, c, true, 0u).unsafe_get0().make_pair(),
+ INTR_WITH_PARAM(LeafNode3::allocate, c, true, 0u).unsafe_get0().make_pair()
+ };
+ std::ostringstream oss;
+ oss << "\nallocated nodes:";
+ for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) {
+ oss << "\n ";
+ auto& ref_node = iter->first;
+ ref_node->dump_brief(oss);
+ }
+ logger().info("{}", oss.str());
+ });
+}
+
+struct b_dummy_tree_test_t : public seastar_test_suite_t {
+ TransactionRef ref_t;
+ std::unique_ptr<UnboundedBtree> tree;
+
+ b_dummy_tree_test_t() = default;
+
+ seastar::future<> set_up_fut() override final {
+ ref_t = make_test_transaction();
+ tree.reset(
+ new UnboundedBtree(NodeExtentManager::create_dummy(IS_DUMMY_SYNC))
+ );
+ return INTR(tree->mkfs, *ref_t).handle_error(
+ crimson::ct_error::all_same_way([] {
+ ASSERT_FALSE("Unable to mkfs");
+ })
+ );
+ }
+
+ seastar::future<> tear_down_fut() final {
+ ref_t.reset();
+ tree.reset();
+ return seastar::now();
+ }
+};
+
+TEST_F(b_dummy_tree_test_t, 3_random_insert_erase_leaf_node)
+{
+ run_async([this] {
+ logger().info("\n---------------------------------------------"
+ "\nrandomized leaf node insert:\n");
+ auto key_s = ghobject_t();
+ auto key_e = ghobject_t::get_max();
+ ASSERT_TRUE(INTR_R(tree->find, *ref_t, key_s).unsafe_get0().is_end());
+ ASSERT_TRUE(INTR(tree->begin, *ref_t).unsafe_get0().is_end());
+ ASSERT_TRUE(INTR(tree->last, *ref_t).unsafe_get0().is_end());
+
+ std::map<ghobject_t,
+ std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history;
+
+ auto f_validate_insert_new = [this, &insert_history] (
+ const ghobject_t& key, const test_item_t& value) {
+ auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()};
+ auto [cursor, success] = INTR_R(tree->insert,
+ *ref_t, key, conf).unsafe_get0();
+ initialize_cursor_from_item(*ref_t, key, value, cursor, success);
+ insert_history.emplace(key, std::make_tuple(value, cursor));
+ auto cursor_ = INTR_R(tree->find, *ref_t, key).unsafe_get0();
+ ceph_assert(cursor_ != tree->end());
+ ceph_assert(cursor_.value() == cursor.value());
+ validate_cursor_from_item(key, value, cursor_);
+ return cursor.value();
+ };
+
+ auto f_validate_erase = [this, &insert_history] (const ghobject_t& key) {
+ auto cursor_erase = INTR_R(tree->find, *ref_t, key).unsafe_get0();
+ auto cursor_next = INTR(cursor_erase.get_next, *ref_t).unsafe_get0();
+ auto cursor_ret = INTR_R(tree->erase, *ref_t, cursor_erase).unsafe_get0();
+ ceph_assert(cursor_erase.is_end());
+ ceph_assert(cursor_ret == cursor_next);
+ auto cursor_lb = INTR_R(tree->lower_bound, *ref_t, key).unsafe_get0();
+ ceph_assert(cursor_lb == cursor_next);
+ auto it = insert_history.find(key);
+ ceph_assert(std::get<1>(it->second).is_end());
+ insert_history.erase(it);
+ };
+
+ auto f_insert_erase_insert = [&f_validate_insert_new, &f_validate_erase] (
+ const ghobject_t& key, const test_item_t& value) {
+ f_validate_insert_new(key, value);
+ f_validate_erase(key);
+ return f_validate_insert_new(key, value);
+ };
+
+ auto values = Values<test_item_t>(15);
+
+ // insert key1, value1 at STAGE_LEFT
+ auto key1 = make_ghobj(3, 3, 3, "ns3", "oid3", 3, 3);
+ auto value1 = values.pick();
+ auto test_value1 = f_insert_erase_insert(key1, value1);
+
+ // validate lookup
+ {
+ auto cursor1_s = INTR_R(tree->lower_bound, *ref_t, key_s).unsafe_get0();
+ ASSERT_EQ(cursor1_s.get_ghobj(), key1);
+ ASSERT_EQ(cursor1_s.value(), test_value1);
+ auto cursor1_e = INTR_R(tree->lower_bound, *ref_t, key_e).unsafe_get0();
+ ASSERT_TRUE(cursor1_e.is_end());
+ }
+
+ // insert the same key1 with a different value
+ {
+ auto value1_dup = values.pick();
+ auto conf = UnboundedBtree::tree_value_config_t{value1_dup.get_payload_size()};
+ auto [cursor1_dup, ret1_dup] = INTR_R(tree->insert,
+ *ref_t, key1, conf).unsafe_get0();
+ ASSERT_FALSE(ret1_dup);
+ validate_cursor_from_item(key1, value1, cursor1_dup);
+ }
+
+ // insert key2, value2 to key1's left at STAGE_LEFT
+ // insert node front at STAGE_LEFT
+ auto key2 = make_ghobj(2, 2, 2, "ns3", "oid3", 3, 3);
+ auto value2 = values.pick();
+ f_insert_erase_insert(key2, value2);
+
+ // insert key3, value3 to key1's right at STAGE_LEFT
+ // insert node last at STAGE_LEFT
+ auto key3 = make_ghobj(4, 4, 4, "ns3", "oid3", 3, 3);
+ auto value3 = values.pick();
+ f_insert_erase_insert(key3, value3);
+
+ // insert key4, value4 to key1's left at STAGE_STRING (collision)
+ auto key4 = make_ghobj(3, 3, 3, "ns2", "oid2", 3, 3);
+ auto value4 = values.pick();
+ f_insert_erase_insert(key4, value4);
+
+ // insert key5, value5 to key1's right at STAGE_STRING (collision)
+ auto key5 = make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3);
+ auto value5 = values.pick();
+ f_insert_erase_insert(key5, value5);
+
+ // insert key6, value6 to key1's left at STAGE_RIGHT
+ auto key6 = make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2);
+ auto value6 = values.pick();
+ f_insert_erase_insert(key6, value6);
+
+ // insert key7, value7 to key1's right at STAGE_RIGHT
+ auto key7 = make_ghobj(3, 3, 3, "ns3", "oid3", 4, 4);
+ auto value7 = values.pick();
+ f_insert_erase_insert(key7, value7);
+
+ // insert node front at STAGE_RIGHT
+ auto key8 = make_ghobj(2, 2, 2, "ns3", "oid3", 2, 2);
+ auto value8 = values.pick();
+ f_insert_erase_insert(key8, value8);
+
+ // insert node front at STAGE_STRING (collision)
+ auto key9 = make_ghobj(2, 2, 2, "ns2", "oid2", 3, 3);
+ auto value9 = values.pick();
+ f_insert_erase_insert(key9, value9);
+
+ // insert node last at STAGE_RIGHT
+ auto key10 = make_ghobj(4, 4, 4, "ns3", "oid3", 4, 4);
+ auto value10 = values.pick();
+ f_insert_erase_insert(key10, value10);
+
+ // insert node last at STAGE_STRING (collision)
+ auto key11 = make_ghobj(4, 4, 4, "ns4", "oid4", 3, 3);
+ auto value11 = values.pick();
+ f_insert_erase_insert(key11, value11);
+
+ // insert key, value randomly until a perfect 3-ary tree is formed
+ std::vector<std::pair<ghobject_t, test_item_t>> kvs{
+ {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2), values.pick()},
+ {make_ghobj(2, 2, 2, "ns2", "oid2", 4, 4), values.pick()},
+ {make_ghobj(2, 2, 2, "ns3", "oid3", 4, 4), values.pick()},
+ {make_ghobj(2, 2, 2, "ns4", "oid4", 2, 2), values.pick()},
+ {make_ghobj(2, 2, 2, "ns4", "oid4", 3, 3), values.pick()},
+ {make_ghobj(2, 2, 2, "ns4", "oid4", 4, 4), values.pick()},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2), values.pick()},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 4, 4), values.pick()},
+ {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2), values.pick()},
+ {make_ghobj(3, 3, 3, "ns4", "oid4", 4, 4), values.pick()},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2), values.pick()},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 3, 3), values.pick()},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 4, 4), values.pick()},
+ {make_ghobj(4, 4, 4, "ns3", "oid3", 2, 2), values.pick()},
+ {make_ghobj(4, 4, 4, "ns4", "oid4", 2, 2), values.pick()},
+ {make_ghobj(4, 4, 4, "ns4", "oid4", 4, 4), values.pick()}};
+ auto [smallest_key, smallest_value] = kvs[0];
+ auto [largest_key, largest_value] = kvs[kvs.size() - 1];
+ std::shuffle(kvs.begin(), kvs.end(), std::default_random_engine{});
+ std::for_each(kvs.begin(), kvs.end(), [&f_insert_erase_insert] (auto& kv) {
+ f_insert_erase_insert(kv.first, kv.second);
+ });
+ ASSERT_EQ(INTR(tree->height, *ref_t).unsafe_get0(), 1);
+ ASSERT_FALSE(tree->test_is_clean());
+
+ for (auto& [k, val] : insert_history) {
+ auto& [v, c] = val;
+ // validate values in tree keep intact
+ auto cursor = with_trans_intr(*ref_t, [this, &k=k](auto& tr) {
+ return tree->find(tr, k);
+ }).unsafe_get0();
+ EXPECT_NE(cursor, tree->end());
+ validate_cursor_from_item(k, v, cursor);
+ // validate values in cursors keep intact
+ validate_cursor_from_item(k, v, c);
+ }
+ {
+ auto cursor = INTR_R(tree->lower_bound, *ref_t, key_s).unsafe_get0();
+ validate_cursor_from_item(smallest_key, smallest_value, cursor);
+ }
+ {
+ auto cursor = INTR(tree->begin, *ref_t).unsafe_get0();
+ validate_cursor_from_item(smallest_key, smallest_value, cursor);
+ }
+ {
+ auto cursor = INTR(tree->last, *ref_t).unsafe_get0();
+ validate_cursor_from_item(largest_key, largest_value, cursor);
+ }
+
+ // validate range query
+ {
+ kvs.clear();
+ for (auto& [k, val] : insert_history) {
+ auto& [v, c] = val;
+ kvs.emplace_back(k, v);
+ }
+ insert_history.clear();
+ std::sort(kvs.begin(), kvs.end(), [](auto& l, auto& r) {
+ return l.first < r.first;
+ });
+ auto cursor = INTR(tree->begin, *ref_t).unsafe_get0();
+ for (auto& [k, v] : kvs) {
+ ASSERT_FALSE(cursor.is_end());
+ validate_cursor_from_item(k, v, cursor);
+ cursor = INTR(cursor.get_next, *ref_t).unsafe_get0();
+ }
+ ASSERT_TRUE(cursor.is_end());
+ }
+
+ std::ostringstream oss;
+ tree->dump(*ref_t, oss);
+ logger().info("\n{}\n", oss.str());
+
+ // randomized erase until empty
+ std::shuffle(kvs.begin(), kvs.end(), std::default_random_engine{});
+ for (auto& [k, v] : kvs) {
+ auto e_size = with_trans_intr(*ref_t, [this, &k=k](auto& tr) {
+ return tree->erase(tr, k);
+ }).unsafe_get0();
+ ASSERT_EQ(e_size, 1);
+ }
+ auto cursor = INTR(tree->begin, *ref_t).unsafe_get0();
+ ASSERT_TRUE(cursor.is_end());
+ ASSERT_EQ(INTR(tree->height, *ref_t).unsafe_get0(), 1);
+ });
+}
+
+static std::set<ghobject_t> build_key_set(
+ std::pair<unsigned, unsigned> range_2,
+ std::pair<unsigned, unsigned> range_1,
+ std::pair<unsigned, unsigned> range_0,
+ std::string padding = "",
+ bool is_internal = false) {
+ ceph_assert(range_1.second <= 10);
+ std::set<ghobject_t> ret;
+ ghobject_t key;
+ for (unsigned i = range_2.first; i < range_2.second; ++i) {
+ for (unsigned j = range_1.first; j < range_1.second; ++j) {
+ for (unsigned k = range_0.first; k < range_0.second; ++k) {
+ std::ostringstream os_ns;
+ os_ns << "ns" << j;
+ std::ostringstream os_oid;
+ os_oid << "oid" << j << padding;
+ key = make_ghobj(i, i, i, os_ns.str(), os_oid.str(), k, k);
+ ret.insert(key);
+ }
+ }
+ }
+ if (is_internal) {
+ ret.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9));
+ }
+ return ret;
+}
+
+class TestTree {
+ public:
+ TestTree()
+ : moved_nm{NodeExtentManager::create_dummy(IS_DUMMY_SYNC)},
+ ref_t{make_test_transaction()},
+ t{*ref_t},
+ c{*moved_nm, vb, t},
+ tree{std::move(moved_nm)},
+ values{0} {}
+
+ seastar::future<> build_tree(
+ std::pair<unsigned, unsigned> range_2,
+ std::pair<unsigned, unsigned> range_1,
+ std::pair<unsigned, unsigned> range_0,
+ size_t value_size) {
+ return seastar::async([this, range_2, range_1, range_0, value_size] {
+ INTR(tree.mkfs, t).unsafe_get0();
+ //logger().info("\n---------------------------------------------"
+ // "\nbefore leaf node split:\n");
+ auto keys = build_key_set(range_2, range_1, range_0);
+ for (auto& key : keys) {
+ auto value = values.create(value_size);
+ insert_tree(key, value).get0();
+ }
+ ASSERT_EQ(INTR(tree.height, t).unsafe_get0(), 1);
+ ASSERT_FALSE(tree.test_is_clean());
+ //std::ostringstream oss;
+ //tree.dump(t, oss);
+ //logger().info("\n{}\n", oss.str());
+ });
+ }
+
+ seastar::future<> build_tree(
+ const std::vector<ghobject_t>& keys, const std::vector<test_item_t>& values) {
+ return seastar::async([this, keys, values] {
+ INTR(tree.mkfs, t).unsafe_get0();
+ //logger().info("\n---------------------------------------------"
+ // "\nbefore leaf node split:\n");
+ ASSERT_EQ(keys.size(), values.size());
+ auto key_iter = keys.begin();
+ auto value_iter = values.begin();
+ while (key_iter != keys.end()) {
+ insert_tree(*key_iter, *value_iter).get0();
+ ++key_iter;
+ ++value_iter;
+ }
+ ASSERT_EQ(INTR(tree.height, t).unsafe_get0(), 1);
+ ASSERT_FALSE(tree.test_is_clean());
+ //std::ostringstream oss;
+ //tree.dump(t, oss);
+ //logger().info("\n{}\n", oss.str());
+ });
+ }
+
+ seastar::future<> split_merge(
+ const ghobject_t& key,
+ const test_item_t& value,
+ const split_expectation_t& expected,
+ std::optional<ghobject_t> next_key) {
+ return seastar::async([this, key, value, expected, next_key] {
+ // clone
+ auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC);
+ auto p_dummy = static_cast<DummyManager*>(ref_dummy.get());
+ UnboundedBtree tree_clone(std::move(ref_dummy));
+ auto ref_t_clone = make_test_transaction();
+ Transaction& t_clone = *ref_t_clone;
+ INTR_R(tree_clone.test_clone_from, t_clone, t, tree).unsafe_get0();
+
+ // insert and split
+ logger().info("\n\nINSERT-SPLIT {}:", key_hobj_t(key));
+ auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()};
+ auto [cursor, success] = INTR_R(tree_clone.insert,
+ t_clone, key, conf).unsafe_get0();
+ initialize_cursor_from_item(t, key, value, cursor, success);
+
+ {
+ std::ostringstream oss;
+ tree_clone.dump(t_clone, oss);
+ logger().info("dump new root:\n{}", oss.str());
+ }
+ EXPECT_EQ(INTR(tree_clone.height, t_clone).unsafe_get0(), 2);
+
+ for (auto& [k, val] : insert_history) {
+ auto& [v, c] = val;
+ auto result = with_trans_intr(t_clone, [&tree_clone, &k=k] (auto& tr) {
+ return tree_clone.find(tr, k);
+ }).unsafe_get0();
+ EXPECT_NE(result, tree_clone.end());
+ validate_cursor_from_item(k, v, result);
+ }
+ auto result = INTR_R(tree_clone.find, t_clone, key).unsafe_get0();
+ EXPECT_NE(result, tree_clone.end());
+ validate_cursor_from_item(key, value, result);
+ EXPECT_TRUE(last_split.match(expected));
+ EXPECT_EQ(p_dummy->size(), 3);
+
+ // erase and merge
+ logger().info("\n\nERASE-MERGE {}:", key_hobj_t(key));
+ auto nxt_cursor = with_trans_intr(t_clone, [&cursor=cursor](auto& tr) {
+ return cursor.erase<true>(tr);
+ }).unsafe_get0();
+
+ {
+ // track root again to dump
+ auto begin = INTR(tree_clone.begin, t_clone).unsafe_get0();
+ std::ignore = begin;
+ std::ostringstream oss;
+ tree_clone.dump(t_clone, oss);
+ logger().info("dump root:\n{}", oss.str());
+ }
+
+ if (next_key.has_value()) {
+ auto found = insert_history.find(*next_key);
+ ceph_assert(found != insert_history.end());
+ validate_cursor_from_item(
+ *next_key, std::get<0>(found->second), nxt_cursor);
+ } else {
+ EXPECT_TRUE(nxt_cursor.is_end());
+ }
+
+ for (auto& [k, val] : insert_history) {
+ auto& [v, c] = val;
+ auto result = with_trans_intr(t_clone, [&tree_clone, &k=k](auto& tr) {
+ return tree_clone.find(tr, k);
+ }).unsafe_get0();
+ EXPECT_NE(result, tree_clone.end());
+ validate_cursor_from_item(k, v, result);
+ }
+ EXPECT_EQ(INTR(tree_clone.height, t_clone).unsafe_get0(), 1);
+ EXPECT_EQ(p_dummy->size(), 1);
+ });
+ }
+
+ test_item_t create_value(size_t size) {
+ return values.create(size);
+ }
+
+ private:
+ seastar::future<> insert_tree(const ghobject_t& key, const test_item_t& value) {
+ return seastar::async([this, &key, &value] {
+ auto conf = UnboundedBtree::tree_value_config_t{value.get_payload_size()};
+ auto [cursor, success] = INTR_R(tree.insert,
+ t, key, conf).unsafe_get0();
+ initialize_cursor_from_item(t, key, value, cursor, success);
+ insert_history.emplace(key, std::make_tuple(value, cursor));
+ });
+ }
+
+ NodeExtentManagerURef moved_nm;
+ TransactionRef ref_t;
+ Transaction& t;
+ ValueBuilderImpl<UnboundedValue> vb;
+ context_t c;
+ UnboundedBtree tree;
+ Values<test_item_t> values;
+ std::map<ghobject_t,
+ std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history;
+};
+
+struct c_dummy_test_t : public seastar_test_suite_t {};
+
+TEST_F(c_dummy_test_t, 4_split_merge_leaf_node)
+{
+ run_async([] {
+ {
+ TestTree test;
+ test.build_tree({2, 5}, {2, 5}, {2, 5}, 120).get0();
+
+ auto value = test.create_value(1144);
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to left front at stage 2, 1, 0\n");
+ test.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), value,
+ {2u, 2u, true, InsertType::BEGIN},
+ {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), value,
+ {2u, 1u, true, InsertType::BEGIN},
+ {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2", 1, 1), value,
+ {2u, 0u, true, InsertType::BEGIN},
+ {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to left back at stage 0, 1, 2, 1, 0\n");
+ test.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4", 5, 5), value,
+ {2u, 0u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), value,
+ {2u, 1u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(2, 3, 3, "ns3", "oid3", 3, 3), value,
+ {2u, 2u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), value,
+ {2u, 1u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 1, 1), value,
+ {2u, 0u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+
+ auto value0 = test.create_value(1416);
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to right front at stage 0, 1, 2, 1, 0\n");
+ test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value0,
+ {2u, 0u, false, InsertType::BEGIN},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value0,
+ {2u, 1u, false, InsertType::BEGIN},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 4, 4, "ns3", "oid3", 3, 3), value0,
+ {2u, 2u, false, InsertType::BEGIN},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value0,
+ {2u, 1u, false, InsertType::BEGIN},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value0,
+ {2u, 0u, false, InsertType::BEGIN},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to right back at stage 0, 1, 2\n");
+ test.split_merge(make_ghobj(4, 4, 4, "ns4", "oid4", 5, 5), value0,
+ {2u, 0u, false, InsertType::LAST},
+ std::nullopt).get0();
+ test.split_merge(make_ghobj(4, 4, 4, "ns5", "oid5", 3, 3), value0,
+ {2u, 1u, false, InsertType::LAST},
+ std::nullopt).get0();
+ test.split_merge(make_ghobj(5, 5, 5, "ns3", "oid3", 3, 3), value0,
+ {2u, 2u, false, InsertType::LAST},
+ std::nullopt).get0();
+
+ auto value1 = test.create_value(316);
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 1; insert to left middle at stage 0, 1, 2, 1, 0\n");
+ test.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4", 5, 5), value1,
+ {1u, 0u, true, InsertType::MID},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), value1,
+ {1u, 1u, true, InsertType::MID},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(2, 2, 3, "ns3", "oid3", 3, 3), value1,
+ {1u, 2u, true, InsertType::MID},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), value1,
+ {1u, 1u, true, InsertType::MID},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 1, 1), value1,
+ {1u, 0u, true, InsertType::MID},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 2, 2)}).get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 1; insert to left back at stage 0, 1, 0\n");
+ test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 5, 5), value1,
+ {1u, 0u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), value1,
+ {1u, 1u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 1, 1), value1,
+ {1u, 0u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns3", "oid3", 2, 2)}).get0();
+
+ auto value2 = test.create_value(452);
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n");
+ test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 5, 5), value2,
+ {1u, 0u, false, InsertType::BEGIN},
+ {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid4", 3, 3), value2,
+ {1u, 1u, false, InsertType::BEGIN},
+ {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 1, 1), value2,
+ {1u, 0u, false, InsertType::BEGIN},
+ {make_ghobj(3, 3, 3, "ns4", "oid4", 2, 2)}).get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 1; insert to right middle at stage 0, 1, 2, 1, 0\n");
+ test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value2,
+ {1u, 0u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value2,
+ {1u, 1u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 4, "ns3", "oid3", 3, 3), value2,
+ {1u, 2u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value2,
+ {1u, 1u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value2,
+ {1u, 0u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+
+ auto value3 = test.create_value(834);
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 0; insert to right middle at stage 0, 1, 2, 1, 0\n");
+ test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 5, 5), value3,
+ {0u, 0u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), value3,
+ {0u, 1u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(3, 3, 4, "ns3", "oid3", 3, 3), value3,
+ {0u, 2u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), value3,
+ {0u, 1u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+ test.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2", 1, 1), value3,
+ {0u, 0u, false, InsertType::MID},
+ {make_ghobj(4, 4, 4, "ns2", "oid2", 2, 2)}).get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 0; insert to right front at stage 0\n");
+ test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 2, 3), value3,
+ {0u, 0u, false, InsertType::BEGIN},
+ {make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3)}).get0();
+
+ auto value4 = test.create_value(572);
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 0; insert to left back at stage 0\n");
+ test.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2", 3, 4), value4,
+ {0u, 0u, true, InsertType::LAST},
+ {make_ghobj(3, 3, 3, "ns2", "oid2", 4, 4)}).get0();
+ }
+
+ {
+ TestTree test;
+ test.build_tree({2, 4}, {2, 4}, {2, 4}, 232).get0();
+ auto value = test.create_value(1996);
+ logger().info("\n---------------------------------------------"
+ "\nsplit at [0, 0, 0]; insert to left front at stage 2, 1, 0\n");
+ test.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), value,
+ {2u, 2u, true, InsertType::BEGIN},
+ {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0();
+ EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}}));
+ test.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), value,
+ {2u, 1u, true, InsertType::BEGIN},
+ {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0();
+ EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}}));
+ test.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2", 1, 1), value,
+ {2u, 0u, true, InsertType::BEGIN},
+ {make_ghobj(2, 2, 2, "ns2", "oid2", 2, 2)}).get0();
+ EXPECT_TRUE(last_split.match_split_pos({0, {0, {0}}}));
+ }
+
+ {
+ TestTree test;
+ std::vector<ghobject_t> keys = {
+ make_ghobj(2, 2, 2, "ns3", "oid3", 3, 3),
+ make_ghobj(3, 3, 3, "ns3", "oid3", 3, 3)};
+ std::vector<test_item_t> values = {
+ test.create_value(1360),
+ test.create_value(1632)};
+ test.build_tree(keys, values).get0();
+ auto value = test.create_value(1640);
+ logger().info("\n---------------------------------------------"
+ "\nsplit at [END, END, END]; insert to right at stage 0, 1, 2\n");
+ test.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3", 4, 4), value,
+ {0u, 0u, false, InsertType::BEGIN},
+ std::nullopt).get0();
+ EXPECT_TRUE(last_split.match_split_pos({1, {0, {1}}}));
+ test.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4", 3, 3), value,
+ {1u, 1u, false, InsertType::BEGIN},
+ std::nullopt).get0();
+ EXPECT_TRUE(last_split.match_split_pos({1, {1, {0}}}));
+ test.split_merge(make_ghobj(4, 4, 4, "ns3", "oid3", 3, 3), value,
+ {2u, 2u, false, InsertType::BEGIN},
+ std::nullopt).get0();
+ EXPECT_TRUE(last_split.match_split_pos({2, {0, {0}}}));
+ }
+ });
+}
+
+namespace crimson::os::seastore::onode {
+
+class DummyChildPool {
+ class DummyChildImpl final : public NodeImpl {
+ public:
+ using URef = std::unique_ptr<DummyChildImpl>;
+ DummyChildImpl(const std::set<ghobject_t>& keys, bool is_level_tail, laddr_t laddr)
+ : keys{keys}, _is_level_tail{is_level_tail}, _laddr{laddr} {
+ std::tie(key_view, p_mem_key_view) = build_key_view(*keys.crbegin());
+ build_name();
+ }
+ ~DummyChildImpl() override {
+ std::free(p_mem_key_view);
+ }
+
+ const std::set<ghobject_t>& get_keys() const { return keys; }
+
+ void reset(const std::set<ghobject_t>& _keys, bool level_tail) {
+ keys = _keys;
+ _is_level_tail = level_tail;
+ std::free(p_mem_key_view);
+ std::tie(key_view, p_mem_key_view) = build_key_view(*keys.crbegin());
+ build_name();
+ }
+
+ public:
+ laddr_t laddr() const override { return _laddr; }
+ bool is_level_tail() const override { return _is_level_tail; }
+ std::optional<key_view_t> get_pivot_index() const override { return {key_view}; }
+ bool is_extent_retired() const override { return _is_extent_retired; }
+ const std::string& get_name() const override { return name; }
+ search_position_t make_tail() override {
+ _is_level_tail = true;
+ build_name();
+ return search_position_t::end();
+ }
+ eagain_ifuture<> retire_extent(context_t) override {
+ assert(!_is_extent_retired);
+ _is_extent_retired = true;
+ return eagain_iertr::now();
+ }
+
+ protected:
+ node_type_t node_type() const override { return node_type_t::LEAF; }
+ field_type_t field_type() const override { return field_type_t::N0; }
+ const char* read() const override {
+ ceph_abort("impossible path"); }
+ extent_len_t get_node_size() const override {
+ ceph_abort("impossible path"); }
+ nextent_state_t get_extent_state() const override {
+ ceph_abort("impossible path"); }
+ level_t level() const override { return 0u; }
+ void prepare_mutate(context_t) override {
+ ceph_abort("impossible path"); }
+ void validate_non_empty() const override {
+ ceph_abort("impossible path"); }
+ bool is_keys_empty() const override {
+ ceph_abort("impossible path"); }
+ bool has_single_value() const override {
+ ceph_abort("impossible path"); }
+ node_offset_t free_size() const override {
+ ceph_abort("impossible path"); }
+ extent_len_t total_size() const override {
+ ceph_abort("impossible path"); }
+ bool is_size_underflow() const override {
+ ceph_abort("impossible path"); }
+ std::tuple<match_stage_t, search_position_t> erase(const search_position_t&) override {
+ ceph_abort("impossible path"); }
+ std::tuple<match_stage_t, std::size_t> evaluate_merge(NodeImpl&) override {
+ ceph_abort("impossible path"); }
+ search_position_t merge(NodeExtentMutable&, NodeImpl&, match_stage_t, extent_len_t) override {
+ ceph_abort("impossible path"); }
+ eagain_ifuture<NodeExtentMutable> rebuild_extent(context_t) override {
+ ceph_abort("impossible path"); }
+ node_stats_t get_stats() const override {
+ ceph_abort("impossible path"); }
+ std::ostream& dump(std::ostream&) const override {
+ ceph_abort("impossible path"); }
+ std::ostream& dump_brief(std::ostream&) const override {
+ ceph_abort("impossible path"); }
+ void validate_layout() const override {
+ ceph_abort("impossible path"); }
+ void test_copy_to(NodeExtentMutable&) const override {
+ ceph_abort("impossible path"); }
+ void test_set_tail(NodeExtentMutable&) override {
+ ceph_abort("impossible path"); }
+
+ private:
+ void build_name() {
+ std::ostringstream sos;
+ sos << "DummyNode"
+ << "@0x" << std::hex << laddr() << std::dec
+ << "Lv" << (unsigned)level()
+ << (is_level_tail() ? "$" : "")
+ << "(" << key_view << ")";
+ name = sos.str();
+ }
+
+ std::set<ghobject_t> keys;
+ bool _is_level_tail;
+ laddr_t _laddr;
+ std::string name;
+ bool _is_extent_retired = false;
+
+ key_view_t key_view;
+ void* p_mem_key_view;
+ };
+
+ class DummyChild final : public Node {
+ public:
+ ~DummyChild() override = default;
+
+ key_view_t get_pivot_key() const { return *impl->get_pivot_index(); }
+
+ eagain_ifuture<> populate_split(
+ context_t c, std::set<Ref<DummyChild>>& splitable_nodes) {
+ ceph_assert(can_split());
+ ceph_assert(splitable_nodes.find(this) != splitable_nodes.end());
+
+ size_t index;
+ const auto& keys = impl->get_keys();
+ if (keys.size() == 2) {
+ index = 1;
+ } else {
+ index = rd() % (keys.size() - 2) + 1;
+ }
+ auto iter = keys.begin();
+ std::advance(iter, index);
+
+ std::set<ghobject_t> left_keys(keys.begin(), iter);
+ std::set<ghobject_t> right_keys(iter, keys.end());
+ bool right_is_tail = impl->is_level_tail();
+ impl->reset(left_keys, false);
+ auto right_child = DummyChild::create_new(right_keys, right_is_tail, pool);
+ if (!can_split()) {
+ splitable_nodes.erase(this);
+ }
+ if (right_child->can_split()) {
+ splitable_nodes.insert(right_child);
+ }
+ Ref<Node> this_ref = this;
+ return apply_split_to_parent(
+ c, std::move(this_ref), std::move(right_child), false);
+ }
+
+ eagain_ifuture<> insert_and_split(
+ context_t c, const ghobject_t& insert_key,
+ std::set<Ref<DummyChild>>& splitable_nodes) {
+ const auto& keys = impl->get_keys();
+ ceph_assert(keys.size() == 1);
+ auto& key = *keys.begin();
+ ceph_assert(insert_key < key);
+
+ std::set<ghobject_t> new_keys;
+ new_keys.insert(insert_key);
+ new_keys.insert(key);
+ impl->reset(new_keys, impl->is_level_tail());
+
+ splitable_nodes.clear();
+ splitable_nodes.insert(this);
+ auto fut = populate_split(c, splitable_nodes);
+ ceph_assert(splitable_nodes.size() == 0);
+ return fut;
+ }
+
+ eagain_ifuture<> merge(context_t c, Ref<DummyChild>&& this_ref) {
+ return parent_info().ptr->get_child_peers(c, parent_info().position
+ ).si_then([c, this_ref = std::move(this_ref), this] (auto lr_nodes) mutable {
+ auto& [lnode, rnode] = lr_nodes;
+ if (rnode) {
+ lnode.reset();
+ Ref<DummyChild> r_dummy(static_cast<DummyChild*>(rnode.get()));
+ rnode.reset();
+ pool.untrack_node(r_dummy);
+ assert(r_dummy->use_count() == 1);
+ return do_merge(c, std::move(this_ref), std::move(r_dummy), true);
+ } else {
+ ceph_assert(lnode);
+ Ref<DummyChild> l_dummy(static_cast<DummyChild*>(lnode.get()));
+ pool.untrack_node(this_ref);
+ assert(this_ref->use_count() == 1);
+ return do_merge(c, std::move(l_dummy), std::move(this_ref), false);
+ }
+ });
+ }
+
+ eagain_ifuture<> fix_key(context_t c, const ghobject_t& new_key) {
+ const auto& keys = impl->get_keys();
+ ceph_assert(keys.size() == 1);
+ assert(impl->is_level_tail() == false);
+
+ std::set<ghobject_t> new_keys;
+ new_keys.insert(new_key);
+ impl->reset(new_keys, impl->is_level_tail());
+ Ref<Node> this_ref = this;
+ return fix_parent_index<true>(c, std::move(this_ref), false);
+ }
+
+ bool match_pos(const search_position_t& pos) const {
+ ceph_assert(!is_root());
+ return pos == parent_info().position;
+ }
+
+ static Ref<DummyChild> create(
+ const std::set<ghobject_t>& keys, bool is_level_tail,
+ laddr_t addr, DummyChildPool& pool) {
+ auto ref_impl = std::make_unique<DummyChildImpl>(keys, is_level_tail, addr);
+ return new DummyChild(ref_impl.get(), std::move(ref_impl), pool);
+ }
+
+ static Ref<DummyChild> create_new(
+ const std::set<ghobject_t>& keys, bool is_level_tail, DummyChildPool& pool) {
+ static laddr_t seed = 0;
+ return create(keys, is_level_tail, seed++, pool);
+ }
+
+ static eagain_ifuture<Ref<DummyChild>> create_initial(
+ context_t c, const std::set<ghobject_t>& keys,
+ DummyChildPool& pool, RootNodeTracker& root_tracker) {
+ auto initial = create_new(keys, true, pool);
+ return c.nm.get_super(c.t, root_tracker
+ ).handle_error_interruptible(
+ eagain_iertr::pass_further{},
+ crimson::ct_error::assert_all{"Invalid error during create_initial()"}
+ ).si_then([c, initial](auto super) {
+ initial->make_root_new(c, std::move(super));
+ return initial->upgrade_root(c, L_ADDR_MIN).si_then([initial] {
+ return initial;
+ });
+ });
+ }
+
+ protected:
+ eagain_ifuture<> test_clone_non_root(
+ context_t, Ref<InternalNode> new_parent) const override {
+ ceph_assert(!is_root());
+ auto p_pool_clone = pool.pool_clone_in_progress;
+ ceph_assert(p_pool_clone != nullptr);
+ auto clone = create(
+ impl->get_keys(), impl->is_level_tail(), impl->laddr(), *p_pool_clone);
+ clone->as_child(parent_info().position, new_parent);
+ return eagain_iertr::now();
+ }
+ eagain_ifuture<Ref<tree_cursor_t>> lookup_smallest(context_t) override {
+ ceph_abort("impossible path"); }
+ eagain_ifuture<Ref<tree_cursor_t>> lookup_largest(context_t) override {
+ ceph_abort("impossible path"); }
+ eagain_ifuture<> test_clone_root(context_t, RootNodeTracker&) const override {
+ ceph_abort("impossible path"); }
+ eagain_ifuture<search_result_t> lower_bound_tracked(
+ context_t, const key_hobj_t&, MatchHistory&) override {
+ ceph_abort("impossible path"); }
+ eagain_ifuture<> do_get_tree_stats(context_t, tree_stats_t&) override {
+ ceph_abort("impossible path"); }
+ bool is_tracking() const override { return false; }
+ void track_merge(Ref<Node>, match_stage_t, search_position_t&) override {
+ ceph_abort("impossible path"); }
+
+ private:
+ DummyChild(DummyChildImpl* impl, DummyChildImpl::URef&& ref, DummyChildPool& pool)
+ : Node(std::move(ref)), impl{impl}, pool{pool} {
+ pool.track_node(this);
+ }
+
+ bool can_split() const { return impl->get_keys().size() > 1; }
+
+ static eagain_ifuture<> do_merge(
+ context_t c, Ref<DummyChild>&& left, Ref<DummyChild>&& right, bool stole_key) {
+ assert(right->use_count() == 1);
+ assert(left->impl->get_keys().size() == 1);
+ assert(right->impl->get_keys().size() == 1);
+ bool left_is_tail = right->impl->is_level_tail();
+ const std::set<ghobject_t>* p_keys;
+ if (stole_key) {
+ p_keys = &right->impl->get_keys();
+ } else {
+ p_keys = &left->impl->get_keys();
+ }
+ left->impl->reset(*p_keys, left_is_tail);
+ auto left_addr = left->impl->laddr();
+ return left->parent_info().ptr->apply_children_merge<true>(
+ c, std::move(left), left_addr, std::move(right), !stole_key);
+ }
+
+ DummyChildImpl* impl;
+ DummyChildPool& pool;
+ mutable std::random_device rd;
+ };
+
+ public:
+ DummyChildPool() = default;
+ ~DummyChildPool() { reset(); }
+
+ auto build_tree(const std::set<ghobject_t>& keys) {
+ reset();
+ // create tree
+ auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC);
+ p_dummy = static_cast<DummyManager*>(ref_dummy.get());
+ p_btree.emplace(std::move(ref_dummy));
+ return with_trans_intr(get_context().t, [this, &keys] (auto &tr) {
+ return DummyChild::create_initial(get_context(), keys, *this, *p_btree->root_tracker
+ ).si_then([this](auto initial_child) {
+ // split
+ splitable_nodes.insert(initial_child);
+ return trans_intr::repeat([this] ()
+ -> eagain_ifuture<seastar::stop_iteration> {
+ if (splitable_nodes.empty()) {
+ return seastar::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::yes);
+ }
+ auto index = rd() % splitable_nodes.size();
+ auto iter = splitable_nodes.begin();
+ std::advance(iter, index);
+ Ref<DummyChild> child = *iter;
+ return child->populate_split(get_context(), splitable_nodes
+ ).si_then([] {
+ return seastar::stop_iteration::no;
+ });
+ });
+ }).si_then([this] {
+ //std::ostringstream oss;
+ //p_btree->dump(t(), oss);
+ //logger().info("\n{}\n", oss.str());
+ return p_btree->height(t());
+ }).si_then([](auto height) {
+ ceph_assert(height == 2);
+ });
+ });
+ }
+
+ seastar::future<> split_merge(ghobject_t key, search_position_t pos,
+ const split_expectation_t& expected) {
+ return seastar::async([this, key, pos, expected] {
+ DummyChildPool pool_clone;
+ clone_to(pool_clone);
+
+ // insert and split
+ logger().info("\n\nINSERT-SPLIT {} at pos({}):", key_hobj_t(key), pos);
+ auto node_to_split = pool_clone.get_node_by_pos(pos);
+ with_trans_intr(pool_clone.get_context().t, [&] (auto &t) {
+ return node_to_split->insert_and_split(
+ pool_clone.get_context(), key, pool_clone.splitable_nodes);
+ }).unsafe_get0();
+ {
+ std::ostringstream oss;
+ pool_clone.p_btree->dump(pool_clone.t(), oss);
+ logger().info("dump new root:\n{}", oss.str());
+ }
+ auto &pt = pool_clone.t();
+ EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 3);
+ EXPECT_TRUE(last_split.match(expected));
+ EXPECT_EQ(pool_clone.p_dummy->size(), 3);
+
+ // erase and merge
+ [[maybe_unused]] auto pivot_key = node_to_split->get_pivot_key();
+ logger().info("\n\nERASE-MERGE {}:", node_to_split->get_name());
+ assert(pivot_key == key_hobj_t(key));
+ with_trans_intr(pool_clone.get_context().t, [&] (auto &t) {
+ return node_to_split->merge(
+ pool_clone.get_context(), std::move(node_to_split));
+ }).unsafe_get0();
+ auto &pt2 = pool_clone.t();
+ EXPECT_EQ(INTR(pool_clone.p_btree->height ,pt2).unsafe_get0(), 2);
+ EXPECT_EQ(pool_clone.p_dummy->size(), 1);
+ });
+ }
+
+ seastar::future<> fix_index(
+ ghobject_t new_key, search_position_t pos, bool expect_split) {
+ return seastar::async([this, new_key, pos, expect_split] {
+ DummyChildPool pool_clone;
+ clone_to(pool_clone);
+
+ // fix
+ auto node_to_fix = pool_clone.get_node_by_pos(pos);
+ auto old_key = node_to_fix->get_pivot_key().to_ghobj();
+ logger().info("\n\nFIX pos({}) from {} to {}, expect_split={}:",
+ pos, node_to_fix->get_name(), key_hobj_t(new_key), expect_split);
+ with_trans_intr(pool_clone.get_context().t, [&] (auto &t) {
+ return node_to_fix->fix_key(pool_clone.get_context(), new_key);
+ }).unsafe_get0();
+ if (expect_split) {
+ std::ostringstream oss;
+ pool_clone.p_btree->dump(pool_clone.t(), oss);
+ logger().info("dump new root:\n{}", oss.str());
+ auto &pt = pool_clone.t();
+ EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 3);
+ EXPECT_EQ(pool_clone.p_dummy->size(), 3);
+ } else {
+ auto &pt = pool_clone.t();
+ EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 2);
+ EXPECT_EQ(pool_clone.p_dummy->size(), 1);
+ }
+
+ // fix back
+ logger().info("\n\nFIX pos({}) from {} back to {}:",
+ pos, node_to_fix->get_name(), key_hobj_t(old_key));
+ with_trans_intr(pool_clone.get_context().t, [&] (auto &t) {
+ return node_to_fix->fix_key(pool_clone.get_context(), old_key);
+ }).unsafe_get0();
+ auto &pt = pool_clone.t();
+ EXPECT_EQ(INTR(pool_clone.p_btree->height, pt).unsafe_get0(), 2);
+ EXPECT_EQ(pool_clone.p_dummy->size(), 1);
+ });
+ }
+
+ private:
+ void clone_to(DummyChildPool& pool_clone) {
+ pool_clone_in_progress = &pool_clone;
+ auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC);
+ pool_clone.p_dummy = static_cast<DummyManager*>(ref_dummy.get());
+ pool_clone.p_btree.emplace(std::move(ref_dummy));
+ auto &pt = pool_clone.t();
+ [[maybe_unused]] auto &tr = t();
+ INTR_R(pool_clone.p_btree->test_clone_from,
+ pt, tr, *p_btree).unsafe_get0();
+ pool_clone_in_progress = nullptr;
+ }
+
+ void reset() {
+ ceph_assert(pool_clone_in_progress == nullptr);
+ if (tracked_children.size()) {
+ ceph_assert(!p_btree->test_is_clean());
+ tracked_children.clear();
+ ceph_assert(p_btree->test_is_clean());
+ p_dummy = nullptr;
+ p_btree.reset();
+ } else {
+ ceph_assert(!p_btree.has_value());
+ }
+ splitable_nodes.clear();
+ }
+
+ void track_node(Ref<DummyChild> node) {
+ ceph_assert(tracked_children.find(node) == tracked_children.end());
+ tracked_children.insert(node);
+ }
+
+ void untrack_node(Ref<DummyChild> node) {
+ auto ret = tracked_children.erase(node);
+ ceph_assert(ret == 1);
+ }
+
+ Ref<DummyChild> get_node_by_pos(const search_position_t& pos) const {
+ auto iter = std::find_if(
+ tracked_children.begin(), tracked_children.end(), [&pos](auto& child) {
+ return child->match_pos(pos);
+ });
+ ceph_assert(iter != tracked_children.end());
+ return *iter;
+ }
+
+ context_t get_context() {
+ ceph_assert(p_dummy != nullptr);
+ return {*p_dummy, vb, t()};
+ }
+
+ Transaction& t() const { return *ref_t; }
+
+ std::set<Ref<DummyChild>> tracked_children;
+ std::optional<UnboundedBtree> p_btree;
+ DummyManager* p_dummy = nullptr;
+ ValueBuilderImpl<UnboundedValue> vb;
+ TransactionRef ref_t = make_test_transaction();
+
+ std::random_device rd;
+ std::set<Ref<DummyChild>> splitable_nodes;
+
+ DummyChildPool* pool_clone_in_progress = nullptr;
+};
+
+}
+
+TEST_F(c_dummy_test_t, 5_split_merge_internal_node)
+{
+ run_async([] {
+ DummyChildPool pool;
+ {
+ logger().info("\n---------------------------------------------"
+ "\nbefore internal node insert:\n");
+ auto padding = std::string(250, '_');
+ auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true);
+ keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2));
+ keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3));
+ keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4));
+ keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 2, 2));
+ keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 3, 3));
+ keys.erase(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 4, 4));
+ auto padding_s = std::string(257, '_');
+ keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2));
+ keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3));
+ keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4));
+ auto padding_e = std::string(247, '_');
+ keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 2, 2));
+ keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 3, 3));
+ keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 4, 4));
+ pool.build_tree(keys).unsafe_get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to right front at stage 0, 1, 2, 1, 0\n");
+ pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 5, 5), {2, {0, {0}}},
+ {2u, 0u, false, InsertType::BEGIN}).get();
+ pool.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), {2, {0, {0}}},
+ {2u, 1u, false, InsertType::BEGIN}).get();
+ pool.split_merge(make_ghobj(3, 4, 4, "ns3", "oid3", 3, 3), {2, {0, {0}}},
+ {2u, 2u, false, InsertType::BEGIN}).get();
+ pool.split_merge(make_ghobj(4, 4, 4, "ns1", "oid1", 3, 3), {2, {0, {0}}},
+ {2u, 1u, false, InsertType::BEGIN}).get();
+ pool.split_merge(make_ghobj(4, 4, 4, "ns2", "oid2" + padding, 1, 1), {2, {0, {0}}},
+ {2u, 0u, false, InsertType::BEGIN}).get();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to right middle at stage 0, 1, 2, 1, 0\n");
+ pool.split_merge(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5), {3, {0, {0}}},
+ {2u, 0u, false, InsertType::MID}).get();
+ pool.split_merge(make_ghobj(4, 4, 4, "ns5", "oid5", 3, 3), {3, {0, {0}}},
+ {2u, 1u, false, InsertType::MID}).get();
+ pool.split_merge(make_ghobj(4, 4, 5, "ns3", "oid3", 3, 3), {3, {0, {0}}},
+ {2u, 2u, false, InsertType::MID}).get();
+ pool.split_merge(make_ghobj(5, 5, 5, "ns1", "oid1", 3, 3), {3, {0, {0}}},
+ {2u, 1u, false, InsertType::MID}).get();
+ pool.split_merge(make_ghobj(5, 5, 5, "ns2", "oid2" + padding, 1, 1), {3, {0, {0}}},
+ {2u, 0u, false, InsertType::MID}).get();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to right back at stage 0, 1, 2\n");
+ pool.split_merge(make_ghobj(5, 5, 5, "ns4", "oid4" + padding_e, 5, 5), search_position_t::end() ,
+ {2u, 0u, false, InsertType::LAST}).get();
+ pool.split_merge(make_ghobj(5, 5, 5, "ns5", "oid5", 3, 3), search_position_t::end(),
+ {2u, 1u, false, InsertType::LAST}).get();
+ pool.split_merge(make_ghobj(6, 6, 6, "ns3", "oid3", 3, 3), search_position_t::end(),
+ {2u, 2u, false, InsertType::LAST}).get();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 0; insert to left front at stage 2, 1, 0\n");
+ pool.split_merge(make_ghobj(1, 1, 1, "ns3", "oid3", 3, 3), {0, {0, {0}}},
+ {0u, 2u, true, InsertType::BEGIN}).get();
+ pool.split_merge(make_ghobj(2, 2, 2, "ns1", "oid1", 3, 3), {0, {0, {0}}},
+ {0u, 1u, true, InsertType::BEGIN}).get();
+ pool.split_merge(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 1, 1), {0, {0, {0}}},
+ {0u, 0u, true, InsertType::BEGIN}).get();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 0; insert to left middle at stage 0, 1, 2, 1, 0\n");
+ pool.split_merge(make_ghobj(2, 2, 2, "ns4", "oid4" + padding, 5, 5), {1, {0, {0}}},
+ {0u, 0u, true, InsertType::MID}).get();
+ pool.split_merge(make_ghobj(2, 2, 2, "ns5", "oid5", 3, 3), {1, {0, {0}}},
+ {0u, 1u, true, InsertType::MID}).get();
+ pool.split_merge(make_ghobj(2, 2, 3, "ns3", "oid3" + std::string(80, '_'), 3, 3), {1, {0, {0}}} ,
+ {0u, 2u, true, InsertType::MID}).get();
+ pool.split_merge(make_ghobj(3, 3, 3, "ns1", "oid1", 3, 3), {1, {0, {0}}},
+ {0u, 1u, true, InsertType::MID}).get();
+ pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 1, 1), {1, {0, {0}}},
+ {0u, 0u, true, InsertType::MID}).get();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 0; insert to left back at stage 0\n");
+ pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 3, 4), {1, {2, {2}}},
+ {0u, 0u, true, InsertType::LAST}).get();
+ }
+
+ {
+ logger().info("\n---------------------------------------------"
+ "\nbefore internal node insert (1):\n");
+ auto padding = std::string(244, '_');
+ auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true);
+ keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5));
+ keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6));
+ keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 7, 7));
+ pool.build_tree(keys).unsafe_get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to left back at stage 0, 1, 2, 1\n");
+ pool.split_merge(make_ghobj(3, 3, 3, "ns4", "oid4" + padding, 5, 5), {2, {0, {0}}},
+ {2u, 0u, true, InsertType::LAST}).get();
+ pool.split_merge(make_ghobj(3, 3, 3, "ns5", "oid5", 3, 3), {2, {0, {0}}},
+ {2u, 1u, true, InsertType::LAST}).get();
+ pool.split_merge(make_ghobj(3, 4, 4, "n", "o", 3, 3), {2, {0, {0}}},
+ {2u, 2u, true, InsertType::LAST}).get();
+ pool.split_merge(make_ghobj(4, 4, 4, "n", "o", 3, 3), {2, {0, {0}}},
+ {2u, 1u, true, InsertType::LAST}).get();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to left middle at stage 2\n");
+ pool.split_merge(make_ghobj(2, 3, 3, "n", "o", 3, 3), {1, {0, {0}}},
+ {2u, 2u, true, InsertType::MID}).get();
+ }
+
+ {
+ logger().info("\n---------------------------------------------"
+ "\nbefore internal node insert (2):\n");
+ auto padding = std::string(243, '_');
+ auto keys = build_key_set({2, 6}, {2, 5}, {2, 5}, padding, true);
+ keys.insert(make_ghobj(4, 4, 4, "n", "o", 3, 3));
+ keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 5, 5));
+ keys.insert(make_ghobj(5, 5, 5, "ns4", "oid4" + padding, 6, 6));
+ pool.build_tree(keys).unsafe_get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 2; insert to left back at stage (0, 1, 2, 1,) 0\n");
+ pool.split_merge(make_ghobj(4, 4, 4, "n", "o", 2, 2), {2, {0, {0}}},
+ {2u, 0u, true, InsertType::LAST}).get();
+ }
+
+ {
+ logger().info("\n---------------------------------------------"
+ "\nbefore internal node insert (3):\n");
+ auto padding = std::string(419, '_');
+ auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true);
+ keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2));
+ keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3));
+ keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 4, 4));
+ pool.build_tree(keys).unsafe_get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 1; insert to right front at stage 0, 1, 0\n");
+ pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}},
+ {1u, 0u, false, InsertType::BEGIN}).get();
+ pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}},
+ {1u, 1u, false, InsertType::BEGIN}).get();
+ pool.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 1, 1), {1, {1, {0}}},
+ {1u, 0u, false, InsertType::BEGIN}).get();
+ }
+
+ {
+ logger().info("\n---------------------------------------------"
+ "\nbefore internal node insert (4):\n");
+ auto padding = std::string(361, '_');
+ auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding, true);
+ keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 2, 2));
+ keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 3, 3));
+ keys.erase(make_ghobj(2, 2, 2, "ns2", "oid2" + padding, 4, 4));
+ auto padding_s = std::string(386, '_');
+ keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 2, 2));
+ keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 3, 3));
+ keys.insert(make_ghobj(2, 2, 2, "ns2", "oid2" + padding_s, 4, 4));
+ pool.build_tree(keys).unsafe_get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 1; insert to left back at stage 0, 1\n");
+ pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid2" + padding, 5, 5), {1, {1, {0}}},
+ {1u, 0u, true, InsertType::LAST}).get();
+ pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3), {1, {1, {0}}},
+ {1u, 1u, true, InsertType::LAST}).get();
+
+ logger().info("\n---------------------------------------------"
+ "\nfix end index from stage 0 to 0, 1, 2\n");
+ auto padding1 = std::string(400, '_');
+ pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5),
+ {2, {2, {2}}}, false).get();
+ pool.fix_index(make_ghobj(4, 4, 4, "ns5", "oid5" + padding1, 3, 3),
+ {2, {2, {2}}}, true).get();
+ pool.fix_index(make_ghobj(5, 5, 5, "ns3", "oid3" + padding1, 3, 3),
+ {2, {2, {2}}}, true).get();
+ }
+
+ {
+ logger().info("\n---------------------------------------------"
+ "\nbefore internal node insert (5):\n");
+ auto padding = std::string(412, '_');
+ auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding);
+ keys.insert(make_ghobj(3, 3, 3, "ns2", "oid3", 3, 3));
+ keys.insert(make_ghobj(4, 4, 4, "ns3", "oid3" + padding, 5, 5));
+ keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9));
+ keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 2, 2));
+ keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 3, 3));
+ keys.erase(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 4, 4));
+ pool.build_tree(keys).unsafe_get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 1; insert to left back at stage (0, 1,) 0\n");
+ pool.split_merge(make_ghobj(3, 3, 3, "ns2", "oid3", 2, 2), {1, {1, {0}}},
+ {1u, 0u, true, InsertType::LAST}).get();
+ }
+
+ {
+ logger().info("\n---------------------------------------------"
+ "\nbefore internal node insert (6):\n");
+ auto padding = std::string(328, '_');
+ auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding);
+ keys.insert(make_ghobj(5, 5, 5, "ns3", "oid3" + std::string(270, '_'), 3, 3));
+ keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9));
+ pool.build_tree(keys).unsafe_get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nsplit at stage 0; insert to right front at stage 0\n");
+ pool.split_merge(make_ghobj(3, 3, 3, "ns3", "oid3" + padding, 2, 3), {1, {1, {1}}},
+ {0u, 0u, false, InsertType::BEGIN}).get();
+
+ logger().info("\n---------------------------------------------"
+ "\nfix end index from stage 2 to 0, 1, 2\n");
+ auto padding1 = std::string(400, '_');
+ pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5),
+ {3, {0, {0}}}, false).get();
+ pool.fix_index(make_ghobj(4, 4, 4, "ns5", "oid5" + padding1, 3, 3),
+ {3, {0, {0}}}, true).get();
+ pool.fix_index(make_ghobj(5, 5, 5, "ns4", "oid4" + padding1, 3, 3),
+ {3, {0, {0}}}, true).get();
+ }
+
+ {
+ logger().info("\n---------------------------------------------"
+ "\nbefore internal node insert (7):\n");
+ auto padding = std::string(323, '_');
+ auto keys = build_key_set({2, 5}, {2, 5}, {2, 5}, padding);
+ keys.insert(make_ghobj(4, 4, 4, "ns5", "oid5" + padding, 3, 3));
+ keys.insert(make_ghobj(9, 9, 9, "ns~last", "oid~last", 9, 9));
+ pool.build_tree(keys).unsafe_get0();
+
+ logger().info("\n---------------------------------------------"
+ "\nfix end index from stage 1 to 0, 1, 2\n");
+ auto padding1 = std::string(400, '_');
+ pool.fix_index(make_ghobj(4, 4, 4, "ns4", "oid4" + padding, 5, 5),
+ {2, {3, {0}}}, false).get();
+ pool.fix_index(make_ghobj(4, 4, 4, "ns6", "oid6" + padding1, 3, 3),
+ {2, {3, {0}}}, true).get();
+ pool.fix_index(make_ghobj(5, 5, 5, "ns3", "oid3" + padding1, 3, 3),
+ {2, {3, {0}}}, true).get();
+ }
+
+ // Impossible to split at {0, 0, 0}
+ // Impossible to split at [END, END, END]
+ });
+}
+
+struct d_seastore_tm_test_t :
+ public seastar_test_suite_t, TMTestState {
+ seastar::future<> set_up_fut() override final {
+ return tm_setup();
+ }
+ seastar::future<> tear_down_fut() override final {
+ return tm_teardown();
+ }
+};
+
+TEST_P(d_seastore_tm_test_t, 6_random_tree_insert_erase)
+{
+ run_async([this] {
+ constexpr bool TEST_SEASTORE = true;
+ constexpr bool TRACK_CURSORS = true;
+ auto kvs = KVPool<test_item_t>::create_raw_range(
+ {8, 11, 64, 256, 301, 320},
+ {8, 11, 64, 256, 301, 320},
+ {8, 16, 128, 512, 576, 640},
+ {0, 16}, {0, 10}, {0, 4});
+ auto moved_nm = (TEST_SEASTORE ? NodeExtentManager::create_seastore(*tm)
+ : NodeExtentManager::create_dummy(IS_DUMMY_SYNC));
+ auto p_nm = moved_nm.get();
+ auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, BoundedValue>>(
+ kvs, std::move(moved_nm));
+ {
+ auto t = create_mutate_transaction();
+ INTR(tree->bootstrap, *t).unsafe_get();
+ submit_transaction(std::move(t));
+ }
+
+ // test insert
+ {
+ auto t = create_mutate_transaction();
+ INTR(tree->insert, *t).unsafe_get();
+ submit_transaction(std::move(t));
+ }
+ {
+ auto t = create_read_transaction();
+ INTR(tree->get_stats, *t).unsafe_get();
+ }
+ if constexpr (TEST_SEASTORE) {
+ restart();
+ tree->reload(NodeExtentManager::create_seastore(*tm));
+ }
+ {
+ // Note: create_weak_transaction() can also work, but too slow.
+ auto t = create_read_transaction();
+ INTR(tree->validate, *t).unsafe_get();
+ }
+
+ // test erase 3/4
+ {
+ auto t = create_mutate_transaction();
+ auto size = kvs.size() / 4 * 3;
+ INTR_R(tree->erase, *t, size).unsafe_get();
+ submit_transaction(std::move(t));
+ }
+ {
+ auto t = create_read_transaction();
+ INTR(tree->get_stats, *t).unsafe_get();
+ }
+ if constexpr (TEST_SEASTORE) {
+ restart();
+ tree->reload(NodeExtentManager::create_seastore(*tm));
+ }
+ {
+ auto t = create_read_transaction();
+ INTR(tree->validate, *t).unsafe_get();
+ }
+
+ // test erase remaining
+ {
+ auto t = create_mutate_transaction();
+ auto size = kvs.size();
+ INTR_R(tree->erase, *t, size).unsafe_get();
+ submit_transaction(std::move(t));
+ }
+ {
+ auto t = create_read_transaction();
+ INTR(tree->get_stats, *t).unsafe_get();
+ }
+ if constexpr (TEST_SEASTORE) {
+ restart();
+ tree->reload(NodeExtentManager::create_seastore(*tm));
+ }
+ {
+ auto t = create_read_transaction();
+ INTR(tree->validate, *t).unsafe_get();
+ EXPECT_EQ(INTR(tree->height, *t).unsafe_get0(), 1);
+ }
+
+ if constexpr (!TEST_SEASTORE) {
+ auto p_dummy = static_cast<DummyManager*>(p_nm);
+ EXPECT_EQ(p_dummy->size(), 1);
+ }
+ tree.reset();
+ });
+}
+
+TEST_P(d_seastore_tm_test_t, 7_tree_insert_erase_eagain)
+{
+ run_async([this] {
+ constexpr double EAGAIN_PROBABILITY = 0.1;
+ constexpr bool TRACK_CURSORS = false;
+ auto kvs = KVPool<test_item_t>::create_raw_range(
+ {8, 11, 64, 128, 255, 256},
+ {8, 13, 64, 512, 2035, 2048},
+ {8, 16, 128, 576, 992, 1200},
+ {0, 8}, {0, 10}, {0, 4});
+ auto moved_nm = NodeExtentManager::create_seastore(
+ *tm, L_ADDR_MIN, EAGAIN_PROBABILITY);
+ auto p_nm = static_cast<SeastoreNodeExtentManager<true>*>(moved_nm.get());
+ auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, ExtendedValue>>(
+ kvs, std::move(moved_nm));
+ unsigned num_ops = 0;
+ unsigned num_ops_eagain = 0;
+
+ // bootstrap
+ ++num_ops;
+ repeat_eagain([this, &tree, &num_ops_eagain] {
+ ++num_ops_eagain;
+ return seastar::do_with(
+ create_mutate_transaction(),
+ [this, &tree](auto &t) {
+ return INTR(tree->bootstrap, *t
+ ).safe_then([this, &t] {
+ return submit_transaction_fut(*t);
+ });
+ });
+ }).unsafe_get0();
+ epm->run_background_work_until_halt().get0();
+
+ // insert
+ logger().warn("start inserting {} kvs ...", kvs.size());
+ {
+ auto iter = kvs.random_begin();
+ while (iter != kvs.random_end()) {
+ ++num_ops;
+ repeat_eagain([this, &tree, &num_ops_eagain, &iter] {
+ ++num_ops_eagain;
+ return seastar::do_with(
+ create_mutate_transaction(),
+ [this, &tree, &iter](auto &t) {
+ return INTR_R(tree->insert_one, *t, iter
+ ).safe_then([this, &t](auto cursor) {
+ cursor.invalidate();
+ return submit_transaction_fut(*t);
+ });
+ });
+ }).unsafe_get0();
+ epm->run_background_work_until_halt().get0();
+ ++iter;
+ }
+ }
+
+ {
+ p_nm->set_generate_eagain(false);
+ auto t = create_read_transaction();
+ INTR(tree->get_stats, *t).unsafe_get0();
+ p_nm->set_generate_eagain(true);
+ }
+
+ // lookup
+ logger().warn("start lookup {} kvs ...", kvs.size());
+ {
+ auto iter = kvs.begin();
+ while (iter != kvs.end()) {
+ ++num_ops;
+ repeat_eagain([this, &tree, &num_ops_eagain, &iter] {
+ ++num_ops_eagain;
+ auto t = create_read_transaction();
+ return INTR_R(tree->validate_one, *t, iter
+ ).safe_then([t=std::move(t)]{});
+ }).unsafe_get0();
+ ++iter;
+ }
+ }
+
+ // erase
+ logger().warn("start erase {} kvs ...", kvs.size());
+ {
+ kvs.shuffle();
+ auto iter = kvs.random_begin();
+ while (iter != kvs.random_end()) {
+ ++num_ops;
+ repeat_eagain([this, &tree, &num_ops_eagain, &iter] {
+ ++num_ops_eagain;
+ return seastar::do_with(
+ create_mutate_transaction(),
+ [this, &tree, &iter](auto &t) {
+ return INTR_R(tree->erase_one, *t, iter
+ ).safe_then([this, &t] () mutable {
+ return submit_transaction_fut(*t);
+ });
+ });
+ }).unsafe_get0();
+ epm->run_background_work_until_halt().get0();
+ ++iter;
+ }
+ kvs.erase_from_random(kvs.random_begin(), kvs.random_end());
+ }
+
+ {
+ p_nm->set_generate_eagain(false);
+ auto t = create_read_transaction();
+ INTR(tree->get_stats, *t).unsafe_get0();
+ INTR(tree->validate, *t).unsafe_get0();
+ EXPECT_EQ(INTR(tree->height,*t).unsafe_get0(), 1);
+ }
+
+ // we can adjust EAGAIN_PROBABILITY to get a proper eagain_rate
+ double eagain_rate = num_ops_eagain;
+ eagain_rate /= num_ops;
+ logger().info("eagain rate: {}", eagain_rate);
+
+ tree.reset();
+ });
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ d_seastore_tm_test,
+ d_seastore_tm_test_t,
+ ::testing::Values (
+ "segmented",
+ "circularbounded"
+ )
+);
diff --git a/src/test/crimson/seastore/onode_tree/test_value.h b/src/test/crimson/seastore/onode_tree/test_value.h
new file mode 100644
index 000000000..98249f8c9
--- /dev/null
+++ b/src/test/crimson/seastore/onode_tree/test_value.h
@@ -0,0 +1,240 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/value.h"
+
+namespace crimson::os::seastore::onode {
+
+struct test_item_t {
+ using id_t = uint16_t;
+ using magic_t = uint32_t;
+
+ value_size_t size;
+ id_t id;
+ magic_t magic;
+
+ value_size_t get_payload_size() const {
+ assert(size > sizeof(value_header_t));
+ return static_cast<value_size_t>(size - sizeof(value_header_t));
+ }
+
+ static test_item_t create(std::size_t _size, std::size_t _id) {
+ ceph_assert(_size <= std::numeric_limits<value_size_t>::max());
+ ceph_assert(_size > sizeof(value_header_t));
+ value_size_t size = _size;
+
+ ceph_assert(_id <= std::numeric_limits<id_t>::max());
+ id_t id = _id;
+
+ return {size, id, (magic_t)id * 137};
+ }
+};
+inline std::ostream& operator<<(std::ostream& os, const test_item_t& item) {
+ return os << "TestItem(#" << item.id << ", " << item.size << "B)";
+}
+
+enum class delta_op_t : uint8_t {
+ UPDATE_ID,
+ UPDATE_TAIL_MAGIC,
+};
+
+inline std::ostream& operator<<(std::ostream& os, const delta_op_t op) {
+ switch (op) {
+ case delta_op_t::UPDATE_ID:
+ return os << "update_id";
+ case delta_op_t::UPDATE_TAIL_MAGIC:
+ return os << "update_tail_magic";
+ default:
+ return os << "unknown";
+ }
+}
+
+} // namespace crimson::os::seastore::onode
+
+#if FMT_VERSION >= 90000
+template<> struct fmt::formatter<crimson::os::seastore::onode::delta_op_t> : fmt::ostream_formatter {};
+#endif
+
+namespace crimson::os::seastore::onode {
+
+template <value_magic_t MAGIC,
+ string_size_t MAX_NS_SIZE,
+ string_size_t MAX_OID_SIZE,
+ value_size_t MAX_VALUE_PAYLOAD_SIZE,
+ extent_len_t INTERNAL_NODE_SIZE,
+ extent_len_t LEAF_NODE_SIZE,
+ bool DO_SPLIT_CHECK>
+class TestValue final : public Value {
+ public:
+ static constexpr tree_conf_t TREE_CONF = {
+ MAGIC,
+ MAX_NS_SIZE,
+ MAX_OID_SIZE,
+ MAX_VALUE_PAYLOAD_SIZE,
+ INTERNAL_NODE_SIZE,
+ LEAF_NODE_SIZE,
+ DO_SPLIT_CHECK
+ };
+
+ using id_t = test_item_t::id_t;
+ using magic_t = test_item_t::magic_t;
+ struct magic_packed_t {
+ magic_t value;
+ } __attribute__((packed));
+
+ private:
+ struct payload_t {
+ id_t id;
+ } __attribute__((packed));
+
+ struct Replayable {
+ static void set_id(NodeExtentMutable& payload_mut, id_t id) {
+ auto p_payload = get_write(payload_mut);
+ p_payload->id = id;
+ }
+
+ static void set_tail_magic(NodeExtentMutable& payload_mut, magic_t magic) {
+ auto length = payload_mut.get_length();
+ auto offset_magic = length - sizeof(magic_t);
+ payload_mut.copy_in_relative(offset_magic, magic);
+ }
+
+ private:
+ static payload_t* get_write(NodeExtentMutable& payload_mut) {
+ return reinterpret_cast<payload_t*>(payload_mut.get_write());
+ }
+ };
+
+ public:
+ class Recorder final : public ValueDeltaRecorder {
+
+ public:
+ Recorder(ceph::bufferlist& encoded)
+ : ValueDeltaRecorder(encoded) {}
+ ~Recorder() override = default;
+
+ void encode_set_id(NodeExtentMutable& payload_mut, id_t id) {
+ auto& encoded = get_encoded(payload_mut);
+ ceph::encode(delta_op_t::UPDATE_ID, encoded);
+ ceph::encode(id, encoded);
+ }
+
+ void encode_set_tail_magic(NodeExtentMutable& payload_mut, magic_t magic) {
+ auto& encoded = get_encoded(payload_mut);
+ ceph::encode(delta_op_t::UPDATE_TAIL_MAGIC, encoded);
+ ceph::encode(magic, encoded);
+ }
+
+ protected:
+ value_magic_t get_header_magic() const override {
+ return TREE_CONF.value_magic;
+ }
+
+ void apply_value_delta(ceph::bufferlist::const_iterator& delta,
+ NodeExtentMutable& payload_mut,
+ laddr_t value_addr) override {
+ delta_op_t op;
+ try {
+ ceph::decode(op, delta);
+ switch (op) {
+ case delta_op_t::UPDATE_ID: {
+ logger().debug("OTree::TestValue::Replay: decoding UPDATE_ID ...");
+ id_t id;
+ ceph::decode(id, delta);
+ logger().debug("OTree::TestValue::Replay: apply id={} ...", id);
+ Replayable::set_id(payload_mut, id);
+ break;
+ }
+ case delta_op_t::UPDATE_TAIL_MAGIC: {
+ logger().debug("OTree::TestValue::Replay: decoding UPDATE_TAIL_MAGIC ...");
+ magic_t magic;
+ ceph::decode(magic, delta);
+ logger().debug("OTree::TestValue::Replay: apply magic={} ...", magic);
+ Replayable::set_tail_magic(payload_mut, magic);
+ break;
+ }
+ default:
+ logger().error("OTree::TestValue::Replay: got unknown op {} when replay {:#x}+{:#x}",
+ op, value_addr, payload_mut.get_length());
+ ceph_abort();
+ }
+ } catch (buffer::error& e) {
+ logger().error("OTree::TestValue::Replay: got decode error {} when replay {:#x}+{:#x}",
+ e.what(), value_addr, payload_mut.get_length());
+ ceph_abort();
+ }
+ }
+
+ private:
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+ };
+
+ TestValue(NodeExtentManager& nm, const ValueBuilder& vb, Ref<tree_cursor_t>& p_cursor)
+ : Value(nm, vb, p_cursor) {}
+ ~TestValue() override = default;
+
+ id_t get_id() const {
+ return read_payload<payload_t>()->id;
+ }
+ void set_id_replayable(Transaction& t, id_t id) {
+ auto value_mutable = prepare_mutate_payload<payload_t, Recorder>(t);
+ if (value_mutable.second) {
+ value_mutable.second->encode_set_id(value_mutable.first, id);
+ }
+ Replayable::set_id(value_mutable.first, id);
+ }
+
+ magic_t get_tail_magic() const {
+ auto p_payload = read_payload<payload_t>();
+ auto offset_magic = get_payload_size() - sizeof(magic_t);
+ auto p_magic = reinterpret_cast<const char*>(p_payload) + offset_magic;
+ return reinterpret_cast<const magic_packed_t*>(p_magic)->value;
+ }
+ void set_tail_magic_replayable(Transaction& t, magic_t magic) {
+ auto value_mutable = prepare_mutate_payload<payload_t, Recorder>(t);
+ if (value_mutable.second) {
+ value_mutable.second->encode_set_tail_magic(value_mutable.first, magic);
+ }
+ Replayable::set_tail_magic(value_mutable.first, magic);
+ }
+
+ /*
+ * tree_util.h related interfaces
+ */
+
+ using item_t = test_item_t;
+
+ void initialize(Transaction& t, const item_t& item) {
+ ceph_assert(get_payload_size() + sizeof(value_header_t) == item.size);
+ set_id_replayable(t, item.id);
+ set_tail_magic_replayable(t, item.magic);
+ }
+
+ void validate(const item_t& item) const {
+ ceph_assert(get_payload_size() + sizeof(value_header_t) == item.size);
+ ceph_assert(get_id() == item.id);
+ ceph_assert(get_tail_magic() == item.magic);
+ }
+};
+
+using UnboundedValue = TestValue<
+ value_magic_t::TEST_UNBOUND, 4096, 4096, 4096, 4096, 4096, false>;
+using BoundedValue = TestValue<
+ value_magic_t::TEST_BOUNDED, 320, 320, 640, 4096, 4096, true>;
+// should be the same configuration with FLTreeOnode
+using ExtendedValue = TestValue<
+ value_magic_t::TEST_EXTENDED, 256, 2048, 1200, 8192, 16384, true>;
+
+}
+
+#if FMT_VERSION >= 90000
+template<>
+struct fmt::formatter<crimson::os::seastore::onode::test_item_t> : fmt::ostream_formatter {};
+#endif
diff --git a/src/test/crimson/seastore/test_block.cc b/src/test/crimson/seastore/test_block.cc
new file mode 100644
index 000000000..f7a39b0ef
--- /dev/null
+++ b/src/test/crimson/seastore/test_block.cc
@@ -0,0 +1,41 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/seastore/test_block.h"
+
+namespace crimson::os::seastore {
+
+
+ceph::bufferlist TestBlock::get_delta() {
+ ceph::bufferlist bl;
+ encode(delta, bl);
+ return bl;
+}
+
+
+void TestBlock::apply_delta(const ceph::bufferlist &bl) {
+ auto biter = bl.begin();
+ decltype(delta) deltas;
+ decode(deltas, biter);
+ for (auto &&d : deltas) {
+ set_contents(d.val, d.offset, d.len);
+ }
+}
+
+ceph::bufferlist TestBlockPhysical::get_delta() {
+ ceph::bufferlist bl;
+ encode(delta, bl);
+ return bl;
+}
+
+void TestBlockPhysical::apply_delta_and_adjust_crc(
+ paddr_t, const ceph::bufferlist &bl) {
+ auto biter = bl.begin();
+ decltype(delta) deltas;
+ decode(deltas, biter);
+ for (auto &&d : deltas) {
+ set_contents(d.val, d.offset, d.len);
+ }
+}
+
+}
diff --git a/src/test/crimson/seastore/test_block.h b/src/test/crimson/seastore/test_block.h
new file mode 100644
index 000000000..ccdafb784
--- /dev/null
+++ b/src/test/crimson/seastore/test_block.h
@@ -0,0 +1,154 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <random>
+
+#include "crimson/os/seastore/transaction_manager.h"
+
+namespace crimson::os::seastore {
+
+struct test_extent_desc_t {
+ size_t len = 0;
+ unsigned checksum = 0;
+
+ bool operator==(const test_extent_desc_t &rhs) const {
+ return (len == rhs.len &&
+ checksum == rhs.checksum);
+ }
+ bool operator!=(const test_extent_desc_t &rhs) const {
+ return !(*this == rhs);
+ }
+};
+
+struct test_block_delta_t {
+ int8_t val = 0;
+ uint16_t offset = 0;
+ uint16_t len = 0;
+
+
+ DENC(test_block_delta_t, v, p) {
+ DENC_START(1, 1, p);
+ denc(v.val, p);
+ denc(v.offset, p);
+ denc(v.len, p);
+ DENC_FINISH(p);
+ }
+};
+
+inline std::ostream &operator<<(
+ std::ostream &lhs, const test_extent_desc_t &rhs) {
+ return lhs << "test_extent_desc_t(len=" << rhs.len
+ << ", checksum=" << rhs.checksum << ")";
+}
+
+struct TestBlock : crimson::os::seastore::LogicalCachedExtent {
+ constexpr static extent_len_t SIZE = 4<<10;
+ using Ref = TCachedExtentRef<TestBlock>;
+
+ std::vector<test_block_delta_t> delta = {};
+
+ TestBlock(ceph::bufferptr &&ptr)
+ : LogicalCachedExtent(std::move(ptr)) {}
+ TestBlock(const TestBlock &other)
+ : LogicalCachedExtent(other) {}
+
+ CachedExtentRef duplicate_for_write(Transaction&) final {
+ return CachedExtentRef(new TestBlock(*this));
+ };
+
+ static constexpr extent_types_t TYPE = extent_types_t::TEST_BLOCK;
+ extent_types_t get_type() const final {
+ return TYPE;
+ }
+
+ ceph::bufferlist get_delta() final;
+
+ void set_contents(char c, uint16_t offset, uint16_t len) {
+ ::memset(get_bptr().c_str() + offset, c, len);
+ delta.push_back({c, offset, len});
+ }
+
+ void set_contents(char c) {
+ set_contents(c, 0, get_length());
+ }
+
+ test_extent_desc_t get_desc() {
+ return { get_length(), get_crc32c() };
+ }
+
+ void apply_delta(const ceph::bufferlist &bl) final;
+};
+using TestBlockRef = TCachedExtentRef<TestBlock>;
+
+struct TestBlockPhysical : crimson::os::seastore::CachedExtent{
+ constexpr static extent_len_t SIZE = 4<<10;
+ using Ref = TCachedExtentRef<TestBlockPhysical>;
+
+ std::vector<test_block_delta_t> delta = {};
+
+ TestBlockPhysical(ceph::bufferptr &&ptr)
+ : CachedExtent(std::move(ptr)) {}
+ TestBlockPhysical(const TestBlockPhysical &other)
+ : CachedExtent(other) {}
+
+ CachedExtentRef duplicate_for_write(Transaction&) final {
+ return CachedExtentRef(new TestBlockPhysical(*this));
+ };
+
+ static constexpr extent_types_t TYPE = extent_types_t::TEST_BLOCK_PHYSICAL;
+ extent_types_t get_type() const final {
+ return TYPE;
+ }
+
+ void set_contents(char c, uint16_t offset, uint16_t len) {
+ ::memset(get_bptr().c_str() + offset, c, len);
+ delta.push_back({c, offset, len});
+ }
+
+ void set_contents(char c) {
+ set_contents(c, 0, get_length());
+ }
+
+ ceph::bufferlist get_delta() final;
+
+ void apply_delta_and_adjust_crc(paddr_t, const ceph::bufferlist &bl) final;
+};
+using TestBlockPhysicalRef = TCachedExtentRef<TestBlockPhysical>;
+
+struct test_block_mutator_t {
+ std::uniform_int_distribution<int8_t>
+ contents_distribution = std::uniform_int_distribution<int8_t>(
+ std::numeric_limits<int8_t>::min(),
+ std::numeric_limits<int8_t>::max());
+
+ std::uniform_int_distribution<uint16_t>
+ offset_distribution = std::uniform_int_distribution<uint16_t>(
+ 0, TestBlock::SIZE - 1);
+
+ std::uniform_int_distribution<uint16_t> length_distribution(uint16_t offset) {
+ return std::uniform_int_distribution<uint16_t>(
+ 0, TestBlock::SIZE - offset - 1);
+ }
+
+
+ template <typename generator_t>
+ void mutate(TestBlock &block, generator_t &gen) {
+ auto offset = offset_distribution(gen);
+ block.set_contents(
+ contents_distribution(gen),
+ offset,
+ length_distribution(offset)(gen));
+ }
+};
+
+}
+
+WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::test_block_delta_t)
+
+#if FMT_VERSION >= 90000
+template <> struct fmt::formatter<crimson::os::seastore::test_extent_desc_t> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::TestBlock> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::TestBlockPhysical> : fmt::ostream_formatter {};
+#endif
diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc
new file mode 100644
index 000000000..f18c3ac67
--- /dev/null
+++ b/src/test/crimson/seastore/test_btree_lba_manager.cc
@@ -0,0 +1,752 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "crimson/common/log.h"
+
+#include "crimson/os/seastore/journal.h"
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/segment_manager/ephemeral.h"
+#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
+
+#include "test/crimson/seastore/test_block.h"
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+using namespace crimson::os::seastore::lba_manager;
+using namespace crimson::os::seastore::lba_manager::btree;
+
+struct btree_test_base :
+ public seastar_test_suite_t, SegmentProvider, JournalTrimmer {
+
+ segment_manager::EphemeralSegmentManagerRef segment_manager;
+ SegmentManagerGroupRef sms;
+ JournalRef journal;
+ ExtentPlacementManagerRef epm;
+ CacheRef cache;
+
+ size_t block_size;
+
+ WritePipeline pipeline;
+
+ segment_id_t next;
+
+ std::map<segment_id_t, segment_seq_t> segment_seqs;
+ std::map<segment_id_t, segment_type_t> segment_types;
+
+ journal_seq_t dummy_tail;
+
+ mutable segment_info_t tmp_info;
+
+ btree_test_base() = default;
+
+ /*
+ * JournalTrimmer interfaces
+ */
+ journal_seq_t get_journal_head() const final { return dummy_tail; }
+
+ void set_journal_head(journal_seq_t) final {}
+
+ journal_seq_t get_dirty_tail() const final { return dummy_tail; }
+
+ journal_seq_t get_alloc_tail() const final { return dummy_tail; }
+
+ void update_journal_tails(journal_seq_t, journal_seq_t) final {}
+
+ bool try_reserve_inline_usage(std::size_t) final { return true; }
+
+ void release_inline_usage(std::size_t) final {}
+
+ std::size_t get_trim_size_per_cycle() const final {
+ return 0;
+ }
+
+ /*
+ * SegmentProvider interfaces
+ */
+ const segment_info_t& get_seg_info(segment_id_t id) const final {
+ tmp_info = {};
+ tmp_info.seq = segment_seqs.at(id);
+ tmp_info.type = segment_types.at(id);
+ return tmp_info;
+ }
+
+ segment_id_t allocate_segment(
+ segment_seq_t seq,
+ segment_type_t type,
+ data_category_t,
+ rewrite_gen_t
+ ) final {
+ auto ret = next;
+ next = segment_id_t{
+ segment_manager->get_device_id(),
+ next.device_segment_id() + 1};
+ segment_seqs[ret] = seq;
+ segment_types[ret] = type;
+ return ret;
+ }
+
+ void close_segment(segment_id_t) final {}
+
+ void update_segment_avail_bytes(segment_type_t, paddr_t) final {}
+
+ void update_modify_time(segment_id_t, sea_time_point, std::size_t) final {}
+
+ SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); }
+
+ virtual void complete_commit(Transaction &t) {}
+ seastar::future<> submit_transaction(TransactionRef t)
+ {
+ auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL);
+ return journal->submit_record(std::move(record), t->get_handle()).safe_then(
+ [this, t=std::move(t)](auto submit_result) mutable {
+ cache->complete_commit(
+ *t,
+ submit_result.record_block_base,
+ submit_result.write_result.start_seq);
+ complete_commit(*t);
+ }).handle_error(crimson::ct_error::assert_all{});
+ }
+
+ virtual LBAManager::mkfs_ret test_structure_setup(Transaction &t) = 0;
+ seastar::future<> set_up_fut() final {
+ segment_manager = segment_manager::create_test_ephemeral();
+ return segment_manager->init(
+ ).safe_then([this] {
+ return segment_manager->mkfs(
+ segment_manager::get_ephemeral_device_config(0, 1, 0));
+ }).safe_then([this] {
+ sms.reset(new SegmentManagerGroup());
+ journal = journal::make_segmented(*this, *this);
+ epm.reset(new ExtentPlacementManager());
+ cache.reset(new Cache(*epm));
+
+ block_size = segment_manager->get_block_size();
+ next = segment_id_t{segment_manager->get_device_id(), 0};
+ sms->add_segment_manager(segment_manager.get());
+ epm->test_init_no_background(segment_manager.get());
+ journal->set_write_pipeline(&pipeline);
+
+ return journal->open_for_mkfs().discard_result();
+ }).safe_then([this] {
+ dummy_tail = journal_seq_t{0,
+ paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0)};
+ return epm->open_for_write();
+ }).safe_then([this] {
+ return seastar::do_with(
+ cache->create_transaction(
+ Transaction::src_t::MUTATE, "test_set_up_fut", false),
+ [this](auto &ref_t) {
+ return with_trans_intr(*ref_t, [&](auto &t) {
+ cache->init();
+ return cache->mkfs(t
+ ).si_then([this, &t] {
+ return test_structure_setup(t);
+ });
+ }).safe_then([this, &ref_t] {
+ return submit_transaction(std::move(ref_t));
+ });
+ });
+ }).handle_error(
+ crimson::ct_error::all_same_way([] {
+ ceph_assert(0 == "error");
+ })
+ );
+ }
+
+ virtual void test_structure_reset() {}
+ seastar::future<> tear_down_fut() final {
+ return cache->close(
+ ).safe_then([this] {
+ return journal->close();
+ }).safe_then([this] {
+ return epm->close();
+ }).safe_then([this] {
+ test_structure_reset();
+ segment_manager.reset();
+ sms.reset();
+ journal.reset();
+ epm.reset();
+ cache.reset();
+ }).handle_error(
+ crimson::ct_error::all_same_way([] {
+ ASSERT_FALSE("Unable to close");
+ })
+ );
+ }
+};
+
+struct lba_btree_test : btree_test_base {
+ std::map<laddr_t, lba_map_val_t> check;
+
+ auto get_op_context(Transaction &t) {
+ return op_context_t<laddr_t>{*cache, t};
+ }
+
+ LBAManager::mkfs_ret test_structure_setup(Transaction &t) final {
+ return cache->get_root(
+ t
+ ).si_then([this, &t](RootBlockRef croot) {
+ auto mut_croot = cache->duplicate_for_write(
+ t, croot
+ )->cast<RootBlock>();
+ mut_croot->root.lba_root =
+ LBABtree::mkfs(mut_croot, get_op_context(t));
+ });
+ }
+
+ template <typename F>
+ auto lba_btree_update(F &&f) {
+ auto tref = cache->create_transaction(
+ Transaction::src_t::MUTATE, "test_btree_update", false);
+ auto &t = *tref;
+ with_trans_intr(
+ t,
+ [this, tref=std::move(tref), f=std::forward<F>(f)](auto &t) mutable {
+ return cache->get_root(
+ t
+ ).si_then([f=std::move(f), &t](RootBlockRef croot) {
+ return seastar::do_with(
+ LBABtree(croot),
+ [f=std::move(f), &t](auto &btree) mutable {
+ return std::invoke(
+ std::move(f), btree, t
+ );
+ });
+ }).si_then([this, tref=std::move(tref)]() mutable {
+ return submit_transaction(std::move(tref));
+ });
+ }).unsafe_get0();
+ }
+
+ template <typename F>
+ auto lba_btree_read(F &&f) {
+ auto t = cache->create_transaction(
+ Transaction::src_t::READ, "test_btree_read", false);
+ return with_trans_intr(
+ *t,
+ [this, f=std::forward<F>(f)](auto &t) mutable {
+ return cache->get_root(
+ t
+ ).si_then([f=std::move(f), &t](RootBlockRef croot) mutable {
+ return seastar::do_with(
+ LBABtree(croot),
+ [f=std::move(f), &t](auto &btree) mutable {
+ return std::invoke(
+ std::move(f), btree, t
+ );
+ });
+ });
+ }).unsafe_get0();
+ }
+
+ static auto get_map_val(extent_len_t len) {
+ return lba_map_val_t{0, (pladdr_t)P_ADDR_NULL, len, 0};
+ }
+
+ device_off_t next_off = 0;
+ paddr_t get_paddr() {
+ next_off += block_size;
+ return make_fake_paddr(next_off);
+ }
+
+ void insert(laddr_t addr, extent_len_t len) {
+ ceph_assert(check.count(addr) == 0);
+ check.emplace(addr, get_map_val(len));
+ lba_btree_update([=, this](auto &btree, auto &t) {
+ auto extent = cache->alloc_new_extent<TestBlock>(
+ t,
+ TestBlock::SIZE,
+ placement_hint_t::HOT,
+ 0,
+ get_paddr());
+ return btree.insert(
+ get_op_context(t), addr, get_map_val(len), extent.get()
+ ).si_then([addr, extent](auto p){
+ auto& [iter, inserted] = p;
+ assert(inserted);
+ extent->set_laddr(addr);
+ });
+ });
+ }
+
+ void remove(laddr_t addr) {
+ auto iter = check.find(addr);
+ ceph_assert(iter != check.end());
+ auto len = iter->second.len;
+ check.erase(iter++);
+ lba_btree_update([=, this](auto &btree, auto &t) {
+ return btree.lower_bound(
+ get_op_context(t), addr
+ ).si_then([this, len, addr, &btree, &t](auto iter) {
+ EXPECT_FALSE(iter.is_end());
+ EXPECT_TRUE(iter.get_key() == addr);
+ EXPECT_TRUE(iter.get_val().len == len);
+ return btree.remove(
+ get_op_context(t), iter
+ );
+ });
+ });
+ }
+
+ void check_lower_bound(laddr_t addr) {
+ auto iter = check.lower_bound(addr);
+ auto result = lba_btree_read([=, this](auto &btree, auto &t) {
+ return btree.lower_bound(
+ get_op_context(t), addr
+ ).si_then([](auto iter)
+ -> std::optional<std::pair<const laddr_t, const lba_map_val_t>> {
+ if (iter.is_end()) {
+ return std::nullopt;
+ } else {
+ return std::make_optional(
+ std::make_pair(iter.get_key(), iter.get_val()));
+ }
+ });
+ });
+ if (iter == check.end()) {
+ EXPECT_FALSE(result);
+ } else {
+ EXPECT_TRUE(result);
+ decltype(result) to_check = *iter;
+ EXPECT_EQ(to_check, *result);
+ }
+ }
+};
+
+TEST_F(lba_btree_test, basic)
+{
+ run_async([this] {
+ constexpr unsigned total = 16<<10;
+ for (unsigned i = 0; i < total; i += 16) {
+ insert(i, 8);
+ }
+
+ for (unsigned i = 0; i < total; i += 16) {
+ check_lower_bound(i);
+ check_lower_bound(i + 4);
+ check_lower_bound(i + 8);
+ check_lower_bound(i + 12);
+ }
+ });
+}
+
+struct btree_lba_manager_test : btree_test_base {
+ BtreeLBAManagerRef lba_manager;
+
+ btree_lba_manager_test() = default;
+
+ void complete_commit(Transaction &t) final {}
+
+ LBAManager::mkfs_ret test_structure_setup(Transaction &t) final {
+ lba_manager.reset(new BtreeLBAManager(*cache));
+ return lba_manager->mkfs(t);
+ }
+
+ void test_structure_reset() final {
+ lba_manager.reset();
+ }
+
+ struct test_extent_t {
+ paddr_t addr;
+ size_t len = 0;
+ unsigned refcount = 0;
+ };
+ using test_lba_mapping_t = std::map<laddr_t, test_extent_t>;
+ test_lba_mapping_t test_lba_mappings;
+ struct test_transaction_t {
+ TransactionRef t;
+ test_lba_mapping_t mappings;
+ };
+
+ auto create_transaction(bool create_fake_extent=true) {
+ auto t = test_transaction_t{
+ cache->create_transaction(
+ Transaction::src_t::MUTATE, "test_mutate_lba", false),
+ test_lba_mappings
+ };
+ if (create_fake_extent) {
+ cache->alloc_new_extent<TestBlockPhysical>(
+ *t.t,
+ TestBlockPhysical::SIZE,
+ placement_hint_t::HOT,
+ 0);
+ };
+ return t;
+ }
+
+ auto create_weak_transaction() {
+ auto t = test_transaction_t{
+ cache->create_transaction(
+ Transaction::src_t::READ, "test_read_weak", true),
+ test_lba_mappings
+ };
+ return t;
+ }
+
+ void submit_test_transaction(test_transaction_t t) {
+ submit_transaction(std::move(t.t)).get();
+ test_lba_mappings.swap(t.mappings);
+ }
+
+ auto get_overlap(test_transaction_t &t, laddr_t addr, size_t len) {
+ auto bottom = t.mappings.upper_bound(addr);
+ if (bottom != t.mappings.begin())
+ --bottom;
+ if (bottom != t.mappings.end() &&
+ bottom->first + bottom->second.len <= addr)
+ ++bottom;
+
+ auto top = t.mappings.lower_bound(addr + len);
+ return std::make_pair(
+ bottom,
+ top
+ );
+ }
+
+ device_off_t next_off = 0;
+ paddr_t get_paddr() {
+ next_off += block_size;
+ return make_fake_paddr(next_off);
+ }
+
+ auto alloc_mapping(
+ test_transaction_t &t,
+ laddr_t hint,
+ size_t len) {
+ auto ret = with_trans_intr(
+ *t.t,
+ [=, this](auto &t) {
+ auto extent = cache->alloc_new_extent<TestBlock>(
+ t,
+ TestBlock::SIZE,
+ placement_hint_t::HOT,
+ 0,
+ get_paddr());
+ return lba_manager->alloc_extent(
+ t, hint, len, extent->get_paddr(), *extent);
+ }).unsafe_get0();
+ logger().debug("alloc'd: {}", *ret);
+ EXPECT_EQ(len, ret->get_length());
+ auto [b, e] = get_overlap(t, ret->get_key(), len);
+ EXPECT_EQ(b, e);
+ t.mappings.emplace(
+ std::make_pair(
+ ret->get_key(),
+ test_extent_t{
+ ret->get_val(),
+ ret->get_length(),
+ 1
+ }
+ ));
+ return ret;
+ }
+
+ auto decref_mapping(
+ test_transaction_t &t,
+ laddr_t addr) {
+ return decref_mapping(t, t.mappings.find(addr));
+ }
+
+ void decref_mapping(
+ test_transaction_t &t,
+ test_lba_mapping_t::iterator target) {
+ ceph_assert(target != t.mappings.end());
+ ceph_assert(target->second.refcount > 0);
+ target->second.refcount--;
+
+ (void) with_trans_intr(
+ *t.t,
+ [=, this](auto &t) {
+ return lba_manager->decref_extent(
+ t,
+ target->first,
+ true
+ ).si_then([this, &t, target](auto result) {
+ EXPECT_EQ(result.refcount, target->second.refcount);
+ if (result.refcount == 0) {
+ return cache->retire_extent_addr(
+ t, result.addr.get_paddr(), result.length);
+ }
+ return Cache::retire_extent_iertr::now();
+ });
+ }).unsafe_get0();
+ if (target->second.refcount == 0) {
+ t.mappings.erase(target);
+ }
+ }
+
+ auto incref_mapping(
+ test_transaction_t &t,
+ laddr_t addr) {
+ return incref_mapping(t, t.mappings.find(addr));
+ }
+
+ void incref_mapping(
+ test_transaction_t &t,
+ test_lba_mapping_t::iterator target) {
+ ceph_assert(target->second.refcount > 0);
+ target->second.refcount++;
+ auto refcnt = with_trans_intr(
+ *t.t,
+ [=, this](auto &t) {
+ return lba_manager->incref_extent(
+ t,
+ target->first);
+ }).unsafe_get0().refcount;
+ EXPECT_EQ(refcnt, target->second.refcount);
+ }
+
+ std::vector<laddr_t> get_mapped_addresses() {
+ std::vector<laddr_t> addresses;
+ addresses.reserve(test_lba_mappings.size());
+ for (auto &i: test_lba_mappings) {
+ addresses.push_back(i.first);
+ }
+ return addresses;
+ }
+
+ std::vector<laddr_t> get_mapped_addresses(test_transaction_t &t) {
+ std::vector<laddr_t> addresses;
+ addresses.reserve(t.mappings.size());
+ for (auto &i: t.mappings) {
+ addresses.push_back(i.first);
+ }
+ return addresses;
+ }
+
+ void check_mappings() {
+ auto t = create_transaction();
+ check_mappings(t);
+ }
+
+ void check_mappings(test_transaction_t &t) {
+ (void)with_trans_intr(
+ *t.t,
+ [=, this](auto &t) {
+ return lba_manager->check_child_trackers(t);
+ }).unsafe_get0();
+ for (auto &&i: t.mappings) {
+ auto laddr = i.first;
+ auto len = i.second.len;
+
+ auto ret_list = with_trans_intr(
+ *t.t,
+ [=, this](auto &t) {
+ return lba_manager->get_mappings(
+ t, laddr, len);
+ }).unsafe_get0();
+ EXPECT_EQ(ret_list.size(), 1);
+ auto &ret = *ret_list.begin();
+ EXPECT_EQ(i.second.addr, ret->get_val());
+ EXPECT_EQ(laddr, ret->get_key());
+ EXPECT_EQ(len, ret->get_length());
+
+ auto ret_pin = with_trans_intr(
+ *t.t,
+ [=, this](auto &t) {
+ return lba_manager->get_mapping(
+ t, laddr);
+ }).unsafe_get0();
+ EXPECT_EQ(i.second.addr, ret_pin->get_val());
+ EXPECT_EQ(laddr, ret_pin->get_key());
+ EXPECT_EQ(len, ret_pin->get_length());
+ }
+ with_trans_intr(
+ *t.t,
+ [=, &t, this](auto &) {
+ return lba_manager->scan_mappings(
+ *t.t,
+ 0,
+ L_ADDR_MAX,
+ [iter=t.mappings.begin(), &t](auto l, auto p, auto len) mutable {
+ EXPECT_NE(iter, t.mappings.end());
+ EXPECT_EQ(l, iter->first);
+ EXPECT_EQ(p, iter->second.addr);
+ EXPECT_EQ(len, iter->second.len);
+ ++iter;
+ });
+ }).unsafe_get();
+ }
+};
+
+TEST_F(btree_lba_manager_test, basic)
+{
+ run_async([this] {
+ laddr_t laddr = 0x12345678 * block_size;
+ {
+ // write initial mapping
+ auto t = create_transaction();
+ check_mappings(t); // check in progress transaction sees mapping
+ check_mappings(); // check concurrent does not
+ auto ret = alloc_mapping(t, laddr, block_size);
+ submit_test_transaction(std::move(t));
+ }
+ check_mappings(); // check new transaction post commit sees it
+ });
+}
+
+TEST_F(btree_lba_manager_test, force_split)
+{
+ run_async([this] {
+ for (unsigned i = 0; i < 40; ++i) {
+ auto t = create_transaction();
+ logger().debug("opened transaction");
+ for (unsigned j = 0; j < 5; ++j) {
+ auto ret = alloc_mapping(t, 0, block_size);
+ if ((i % 10 == 0) && (j == 3)) {
+ check_mappings(t);
+ check_mappings();
+ }
+ }
+ logger().debug("submitting transaction");
+ submit_test_transaction(std::move(t));
+ check_mappings();
+ }
+ });
+}
+
+TEST_F(btree_lba_manager_test, force_split_merge)
+{
+ run_async([this] {
+ for (unsigned i = 0; i < 80; ++i) {
+ auto t = create_transaction();
+ logger().debug("opened transaction");
+ for (unsigned j = 0; j < 5; ++j) {
+ auto ret = alloc_mapping(t, 0, block_size);
+ // just to speed things up a bit
+ if ((i % 100 == 0) && (j == 3)) {
+ check_mappings(t);
+ check_mappings();
+ }
+ incref_mapping(t, ret->get_key());
+ decref_mapping(t, ret->get_key());
+ }
+ logger().debug("submitting transaction");
+ submit_test_transaction(std::move(t));
+ if (i % 50 == 0) {
+ check_mappings();
+ }
+ }
+ {
+ auto addresses = get_mapped_addresses();
+ auto t = create_transaction();
+ for (unsigned i = 0; i != addresses.size(); ++i) {
+ if (i % 2 == 0) {
+ incref_mapping(t, addresses[i]);
+ decref_mapping(t, addresses[i]);
+ decref_mapping(t, addresses[i]);
+ }
+ logger().debug("submitting transaction");
+ if (i % 7 == 0) {
+ submit_test_transaction(std::move(t));
+ t = create_transaction();
+ }
+ if (i % 13 == 0) {
+ check_mappings();
+ check_mappings(t);
+ }
+ }
+ submit_test_transaction(std::move(t));
+ }
+ {
+ auto addresses = get_mapped_addresses();
+ auto t = create_transaction();
+ for (unsigned i = 0; i != addresses.size(); ++i) {
+ incref_mapping(t, addresses[i]);
+ decref_mapping(t, addresses[i]);
+ decref_mapping(t, addresses[i]);
+ }
+ check_mappings(t);
+ submit_test_transaction(std::move(t));
+ check_mappings();
+ }
+ });
+}
+
+TEST_F(btree_lba_manager_test, single_transaction_split_merge)
+{
+ run_async([this] {
+ {
+ auto t = create_transaction();
+ for (unsigned i = 0; i < 400; ++i) {
+ alloc_mapping(t, 0, block_size);
+ }
+ check_mappings(t);
+ submit_test_transaction(std::move(t));
+ }
+ check_mappings();
+
+ {
+ auto addresses = get_mapped_addresses();
+ auto t = create_transaction();
+ for (unsigned i = 0; i != addresses.size(); ++i) {
+ if (i % 4 != 0) {
+ decref_mapping(t, addresses[i]);
+ }
+ }
+ check_mappings(t);
+ submit_test_transaction(std::move(t));
+ }
+ check_mappings();
+
+ {
+ auto t = create_transaction();
+ for (unsigned i = 0; i < 600; ++i) {
+ alloc_mapping(t, 0, block_size);
+ }
+ auto addresses = get_mapped_addresses(t);
+ for (unsigned i = 0; i != addresses.size(); ++i) {
+ decref_mapping(t, addresses[i]);
+ }
+ check_mappings(t);
+ submit_test_transaction(std::move(t));
+ }
+ check_mappings();
+ });
+}
+
+TEST_F(btree_lba_manager_test, split_merge_multi)
+{
+ run_async([this] {
+ auto iterate = [&](auto f) {
+ for (uint64_t i = 0; i < (1<<10); ++i) {
+ auto t = create_transaction(false);
+ logger().debug("opened transaction");
+ for (unsigned j = 0; j < 5; ++j) {
+ f(t, (i * 5) + j);
+ }
+ logger().debug("submitting transaction");
+ submit_test_transaction(std::move(t));
+ }
+ };
+ iterate([&](auto &t, auto idx) {
+ alloc_mapping(t, idx * block_size, block_size);
+ });
+ check_mappings();
+ iterate([&](auto &t, auto idx) {
+ if ((idx % 32) > 0) {
+ decref_mapping(t, idx * block_size);
+ }
+ });
+ check_mappings();
+ iterate([&](auto &t, auto idx) {
+ if ((idx % 32) > 0) {
+ alloc_mapping(t, idx * block_size, block_size);
+ }
+ });
+ check_mappings();
+ iterate([&](auto &t, auto idx) {
+ decref_mapping(t, idx * block_size);
+ });
+ check_mappings();
+ });
+}
diff --git a/src/test/crimson/seastore/test_cbjournal.cc b/src/test/crimson/seastore/test_cbjournal.cc
new file mode 100644
index 000000000..0bf2d4135
--- /dev/null
+++ b/src/test/crimson/seastore/test_cbjournal.cc
@@ -0,0 +1,583 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/gtest_seastar.h"
+
+#include <random>
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/async_cleaner.h"
+#include "crimson/os/seastore/journal.h"
+#include "crimson/os/seastore/journal/circular_bounded_journal.h"
+#include "crimson/os/seastore/random_block_manager.h"
+#include "crimson/os/seastore/random_block_manager/rbm_device.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+#include "crimson/os/seastore/random_block_manager/block_rb_manager.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+using namespace crimson::os::seastore::journal;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+std::optional<record_t> decode_record(
+ bufferlist& bl)
+{
+ record_t record;
+ record_group_header_t r_header;
+ auto bliter = bl.cbegin();
+ decode(r_header, bliter);
+ logger().debug(" decode_record mdlength {} records {}",
+ r_header.mdlength, r_header.records);
+ device_id_t d_id = 1 << (std::numeric_limits<device_id_t>::digits - 1);
+
+ auto del_infos = try_decode_deltas(r_header, bl,
+ paddr_t::make_blk_paddr(d_id, 0));
+ for (auto &iter : *del_infos) {
+ for (auto r : iter.deltas) {
+ record.deltas.push_back(r.second);
+ }
+ }
+ auto ex_infos = try_decode_extent_infos(r_header, bl);
+ auto bliter_ex = bl.cbegin();
+ bliter_ex += r_header.mdlength;
+ for (auto &iter: *ex_infos) {
+ for (auto e : iter.extent_infos) {
+ extent_t ex;
+ auto bptr = bufferptr(ceph::buffer::create_page_aligned(e.len));
+ logger().debug(" exten len {} remaining {} ", e.len, bliter_ex.get_remaining());
+ bliter_ex.copy(e.len, bptr.c_str());
+ ex.bl.append(bptr);
+ record.extents.push_back(ex);
+ }
+ }
+ return record;
+}
+
+struct entry_validator_t {
+ bufferlist bl;
+ int entries;
+ record_t record;
+ segment_nonce_t magic = 0;
+ journal_seq_t seq;
+
+ template <typename... T>
+ entry_validator_t(T&&... entry) : record(std::forward<T>(entry)...) {}
+
+ void validate(record_t read) {
+ auto iter = read.extents.begin();
+ for (auto &&block : record.extents) {
+ ASSERT_EQ(
+ iter->bl.length(),
+ block.bl.length());
+ ASSERT_EQ(
+ iter->bl.begin().crc32c(iter->bl.length(), 1),
+ block.bl.begin().crc32c(block.bl.length(), 1));
+ ++iter;
+ }
+ auto iter_delta = read.deltas.begin();
+ for (auto &&block : record.deltas) {
+ ASSERT_EQ(
+ iter_delta->bl.length(),
+ block.bl.length());
+ ASSERT_EQ(
+ iter_delta->bl.begin().crc32c(iter_delta->bl.length(), 1),
+ block.bl.begin().crc32c(block.bl.length(), 1));
+ ++iter_delta;
+ }
+ }
+ void validate(CircularBoundedJournal &cbj) {
+ rbm_abs_addr offset = 0;
+ auto cursor = scan_valid_records_cursor(seq);
+ cbj.test_initialize_cursor(cursor);
+ for (int i = 0; i < entries; i++) {
+ paddr_t paddr = seq.offset.add_offset(offset);
+ cursor.seq.offset = paddr;
+ auto md = cbj.test_read_validate_record_metadata(
+ cursor, magic).unsafe_get0();
+ assert(md);
+ auto& [header, md_bl] = *md;
+ auto dbuf = cbj.read(
+ paddr.add_offset(header.mdlength),
+ header.dlength).unsafe_get0();
+
+ bufferlist bl;
+ bl.append(md_bl);
+ bl.append(dbuf);
+ auto record = decode_record(bl);
+ validate(*record);
+ offset += header.mdlength + header.dlength;
+ cursor.last_committed = header.committed_to;
+ }
+ }
+
+ rbm_abs_addr get_abs_addr() {
+ return convert_paddr_to_abs_addr(seq.offset);
+ }
+
+ bool validate_delta(bufferlist bl) {
+ for (auto &&block : record.deltas) {
+ if (bl.begin().crc32c(bl.length(), 1) ==
+ block.bl.begin().crc32c(block.bl.length(), 1)) {
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer
+{
+ std::vector<entry_validator_t> entries;
+ std::unique_ptr<CircularBoundedJournal> cbj;
+ random_block_device::EphemeralRBMDeviceRef device;
+
+ std::default_random_engine generator;
+ uint64_t block_size;
+ WritePipeline pipeline;
+
+ cbjournal_test_t() = default;
+
+ /*
+ * JournalTrimmer interfaces
+ */
+ journal_seq_t get_journal_head() const {
+ return JOURNAL_SEQ_NULL;
+ }
+
+ journal_seq_t get_dirty_tail() const final {
+ return JOURNAL_SEQ_NULL;
+ }
+
+ journal_seq_t get_alloc_tail() const final {
+ return JOURNAL_SEQ_NULL;
+ }
+
+ void set_journal_head(journal_seq_t head) final {}
+
+ void update_journal_tails(
+ journal_seq_t dirty_tail,
+ journal_seq_t alloc_tail) final {}
+
+ bool try_reserve_inline_usage(std::size_t) final { return true; }
+
+ void release_inline_usage(std::size_t) final {}
+
+ std::size_t get_trim_size_per_cycle() const final {
+ return 0;
+ }
+
+ auto submit_record(record_t&& record) {
+ entries.push_back(record);
+ OrderingHandle handle = get_dummy_ordering_handle();
+ auto [addr, w_result] = cbj->submit_record(
+ std::move(record),
+ handle).unsafe_get0();
+ entries.back().seq = w_result.start_seq;
+ entries.back().entries = 1;
+ entries.back().magic = cbj->get_cjs().get_cbj_header().magic;
+ logger().debug("submit entry to addr {}", entries.back().seq);
+ return convert_paddr_to_abs_addr(entries.back().seq.offset);
+ }
+
+ seastar::future<> tear_down_fut() final {
+ return close();
+ }
+
+ extent_t generate_extent(size_t blocks) {
+ std::uniform_int_distribution<char> distribution(
+ std::numeric_limits<char>::min(),
+ std::numeric_limits<char>::max()
+ );
+ char contents = distribution(generator);
+ bufferlist bl;
+ bl.append(buffer::ptr(buffer::create(blocks * block_size, contents)));
+ return extent_t{extent_types_t::TEST_BLOCK, L_ADDR_NULL, bl};
+ }
+
+ delta_info_t generate_delta(size_t bytes) {
+ std::uniform_int_distribution<char> distribution(
+ std::numeric_limits<char>::min(),
+ std::numeric_limits<char>::max()
+ );
+ char contents = distribution(generator);
+ bufferlist bl;
+ bl.append(buffer::ptr(buffer::create(bytes, contents)));
+ return delta_info_t{
+ extent_types_t::TEST_BLOCK,
+ paddr_t{},
+ L_ADDR_NULL,
+ 0, 0,
+ device->get_block_size(),
+ 1,
+ 0,
+ segment_type_t::JOURNAL,
+ bl
+ };
+ }
+
+ auto replay_and_check() {
+ for (auto &i : entries) {
+ i.validate(*(cbj.get()));
+ }
+ }
+
+ auto replay() {
+ return cbj->replay(
+ [this](const auto &offsets,
+ const auto &e,
+ auto &dirty_seq,
+ auto &alloc_seq,
+ auto last_modified) {
+ bool found = false;
+ for (auto &i : entries) {
+ paddr_t base = offsets.write_result.start_seq.offset;
+ rbm_abs_addr addr = convert_paddr_to_abs_addr(base);
+ if (addr == i.get_abs_addr()) {
+ logger().debug(" compare addr: {} and i.addr {} ", base, i.get_abs_addr());
+ found = i.validate_delta(e.bl);
+ break;
+ }
+ }
+ assert(found == true);
+ return Journal::replay_ertr::make_ready_future<bool>(true);
+ });
+ }
+
+ auto mkfs() {
+ device_config_t config = get_rbm_ephemeral_device_config(0, 1);
+ return device->mkfs(config
+ ).safe_then([this]() {
+ return device->mount(
+ ).safe_then([this]() {
+ return cbj->open_for_mkfs(
+ ).safe_then([](auto q) {
+ return seastar::now();
+ });
+ });
+ }).safe_then([this] {
+ return cbj->close();
+ });
+ }
+ auto open() {
+ return cbj->open_for_mount(
+ ).safe_then([](auto q) {
+ return seastar::now();
+ });
+ }
+ seastar::future<> close() {
+ return cbj->close().handle_error(crimson::ct_error::assert_all{});
+ }
+ auto get_records_available_size() {
+ return cbj->get_cjs().get_records_available_size();
+ }
+ auto get_records_total_size() {
+ return cbj->get_cjs().get_records_total_size();
+ }
+ auto get_block_size() {
+ return device->get_block_size();
+ }
+ auto get_written_to_rbm_addr() {
+ return cbj->get_rbm_addr(cbj->get_cjs().get_written_to());
+ }
+ auto get_written_to() {
+ return cbj->get_cjs().get_written_to();
+ }
+ auto get_journal_tail() {
+ return cbj->get_dirty_tail();
+ }
+ auto get_records_used_size() {
+ return cbj->get_cjs().get_records_used_size();
+ }
+ bool is_available_size(uint64_t size) {
+ return cbj->get_cjs().is_available_size(size);
+ }
+ void update_journal_tail(rbm_abs_addr addr, uint32_t len) {
+ paddr_t paddr =
+ convert_abs_addr_to_paddr(
+ addr + len,
+ cbj->get_device_id());
+ journal_seq_t seq = {0, paddr};
+ cbj->update_journal_tail(
+ seq,
+ seq
+ ).get0();
+ }
+ void set_written_to(journal_seq_t seq) {
+ cbj->set_written_to(seq);
+ }
+
+ seastar::future<> set_up_fut() final {
+ device = random_block_device::create_test_ephemeral(
+ random_block_device::DEFAULT_TEST_CBJOURNAL_SIZE, 0);
+ cbj.reset(new CircularBoundedJournal(*this, device.get(), std::string()));
+ block_size = device->get_block_size();
+ cbj->set_write_pipeline(&pipeline);
+ return mkfs(
+ ).safe_then([this] {
+ return replay(
+ ).safe_then([this] {
+ return open(
+ ).safe_then([this] {
+ return replay();
+ });
+ });
+ }).handle_error(crimson::ct_error::assert_all{});
+ }
+};
+
+TEST_F(cbjournal_test_t, submit_one_record)
+{
+ run_async([this] {
+ submit_record(
+ record_t{
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(3), generate_delta(4) }
+ });
+ replay_and_check();
+ });
+}
+
+TEST_F(cbjournal_test_t, submit_three_records)
+{
+ run_async([this] {
+ submit_record(
+ record_t{
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(3), generate_delta(4) }
+ });
+ submit_record(
+ record_t{
+ { generate_extent(8), generate_extent(9) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ submit_record(
+ record_t{
+ { generate_extent(5), generate_extent(6) },
+ { generate_delta(200), generate_delta(210) }
+ });
+ replay_and_check();
+ });
+}
+
+TEST_F(cbjournal_test_t, submit_full_records)
+{
+ run_async([this] {
+ record_t rec {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ };
+ auto r_size = record_group_size_t(rec.size, block_size);
+ auto record_total_size = r_size.get_encoded_length();
+
+ submit_record(std::move(rec));
+ while (is_available_size(record_total_size)) {
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ }
+
+ update_journal_tail(entries.back().get_abs_addr(), record_total_size);
+ ASSERT_EQ(get_records_total_size(),
+ get_records_available_size());
+
+ // will be appended at the begining of log
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+
+ while (is_available_size(record_total_size)) {
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ }
+ ASSERT_TRUE(record_total_size > get_records_available_size());
+ });
+}
+
+TEST_F(cbjournal_test_t, boudary_check_verify)
+{
+ run_async([this] {
+ record_t rec {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ };
+ auto r_size = record_group_size_t(rec.size, block_size);
+ auto record_total_size = r_size.get_encoded_length();
+ submit_record(std::move(rec));
+ while (is_available_size(record_total_size)) {
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ }
+
+ uint64_t avail = get_records_available_size();
+ // forward 2 recod size here because 1 block is reserved between head and tail
+ update_journal_tail(entries.front().get_abs_addr(), record_total_size * 2);
+ entries.erase(entries.begin());
+ entries.erase(entries.begin());
+ ASSERT_EQ(avail + (record_total_size * 2), get_records_available_size());
+ avail = get_records_available_size();
+ // will be appended at the begining of WAL
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ ASSERT_TRUE(avail - record_total_size >= get_records_available_size());
+ replay_and_check();
+ });
+}
+
+TEST_F(cbjournal_test_t, update_header)
+{
+ run_async([this] {
+ auto [header, _buf] = *(cbj->get_cjs().read_header().unsafe_get0());
+ record_t rec {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ };
+ auto r_size = record_group_size_t(rec.size, block_size);
+ auto record_total_size = r_size.get_encoded_length();
+ submit_record(std::move(rec));
+
+ update_journal_tail(entries.front().get_abs_addr(), record_total_size);
+ cbj->get_cjs().write_header().unsafe_get0();
+ auto [update_header, update_buf2] = *(cbj->get_cjs().read_header().unsafe_get0());
+ cbj->close().unsafe_get0();
+ replay().unsafe_get0();
+
+ ASSERT_EQ(update_header.dirty_tail.offset, update_header.dirty_tail.offset);
+ });
+}
+
+TEST_F(cbjournal_test_t, replay)
+{
+ run_async([this] {
+ record_t rec {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ };
+ auto r_size = record_group_size_t(rec.size, block_size);
+ auto record_total_size = r_size.get_encoded_length();
+ submit_record(std::move(rec));
+ while (is_available_size(record_total_size)) {
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ }
+ // will be appended at the begining of WAL
+ uint64_t avail = get_records_available_size();
+ update_journal_tail(entries.front().get_abs_addr(), record_total_size * 2);
+ entries.erase(entries.begin());
+ entries.erase(entries.begin());
+ ASSERT_EQ(avail + (record_total_size * 2), get_records_available_size());
+ avail = get_records_available_size();
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ ASSERT_TRUE(avail - record_total_size >= get_records_available_size());
+ cbj->close().unsafe_get0();
+ replay().unsafe_get0();
+ });
+}
+
+TEST_F(cbjournal_test_t, replay_after_reset)
+{
+ run_async([this] {
+ record_t rec {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ };
+ auto r_size = record_group_size_t(rec.size, block_size);
+ auto record_total_size = r_size.get_encoded_length();
+ submit_record(std::move(rec));
+ while (is_available_size(record_total_size)) {
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ }
+ auto old_written_to = get_written_to();
+ auto old_used_size = get_records_used_size();
+ set_written_to(
+ journal_seq_t{0,
+ convert_abs_addr_to_paddr(
+ cbj->get_records_start(),
+ cbj->get_device_id())});
+ cbj->close().unsafe_get0();
+ replay().unsafe_get0();
+ ASSERT_EQ(old_written_to, get_written_to());
+ ASSERT_EQ(old_used_size,
+ get_records_used_size());
+ });
+}
+
+TEST_F(cbjournal_test_t, multiple_submit_at_end)
+{
+ run_async([this] {
+ record_t rec {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ };
+ auto r_size = record_group_size_t(rec.size, block_size);
+ auto record_total_size = r_size.get_encoded_length();
+ submit_record(std::move(rec));
+ while (is_available_size(record_total_size)) {
+ submit_record(
+ record_t {
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(20), generate_delta(21) }
+ });
+ }
+ update_journal_tail(entries.front().get_abs_addr(), record_total_size * 8);
+ for (int i = 0; i < 8; i++) {
+ entries.erase(entries.begin());
+ }
+ seastar::parallel_for_each(
+ boost::make_counting_iterator(0u),
+ boost::make_counting_iterator(4u),
+ [&](auto) {
+ return seastar::async([&] {
+ auto writes = 0;
+ while (writes < 2) {
+ record_t rec {
+ { generate_extent(1) },
+ { generate_delta(20) } };
+ submit_record(std::move(rec));
+ writes++;
+ }
+ });
+ }).get0();
+ auto old_written_to = get_written_to();
+ cbj->close().unsafe_get0();
+ cbj->replay(
+ [](const auto &offsets,
+ const auto &e,
+ auto &dirty_seq,
+ auto &alloc_seq,
+ auto last_modified) {
+ return Journal::replay_ertr::make_ready_future<bool>(true);
+ }).unsafe_get0();
+ assert(get_written_to() == old_written_to);
+ });
+}
diff --git a/src/test/crimson/seastore/test_collection_manager.cc b/src/test/crimson/seastore/test_collection_manager.cc
new file mode 100644
index 000000000..cedcc5e8f
--- /dev/null
+++ b/src/test/crimson/seastore/test_collection_manager.cc
@@ -0,0 +1,195 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "os/ObjectStore.h"
+#include "test/crimson/gtest_seastar.h"
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/collection_manager.h"
+
+#include "test/crimson/seastore/test_block.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+
+#define TEST_COLL_FORWARD(METHOD) \
+ template <typename... Args> \
+ auto METHOD(coll_root_t &root, Transaction &t, Args&&... args) const { \
+ return with_trans_intr( \
+ t, \
+ [this](auto &t, auto &root, auto&&... args) { \
+ return collection_manager->METHOD( \
+ root, \
+ t, \
+ std::forward<decltype(args)>(args)...); \
+ }, \
+ root, \
+ std::forward<Args>(args)...).unsafe_get0(); \
+ }
+
+struct collection_manager_test_t :
+ public seastar_test_suite_t,
+ TMTestState {
+
+ CollectionManagerRef collection_manager;
+
+ collection_manager_test_t() {}
+
+ seastar::future<> set_up_fut() final {
+ return tm_setup().then([this] {
+ collection_manager = collection_manager::create_coll_manager(*tm);
+ return seastar::now();
+ });
+ }
+
+ seastar::future<> tear_down_fut() final {
+ return tm_teardown().then([this] {
+ collection_manager.reset();
+ return seastar::now();
+ });
+ }
+
+ using test_collection_t = std::map<coll_t, coll_info_t>;
+ test_collection_t test_coll_mappings;
+
+ void replay() {
+ restart();
+ collection_manager = collection_manager::create_coll_manager(*tm);
+ }
+
+ auto get_root() {
+ auto tref = create_mutate_transaction();
+ auto coll_root = with_trans_intr(
+ *tref,
+ [this](auto &t) {
+ return collection_manager->mkfs(t);
+ }).unsafe_get0();
+ submit_transaction(std::move(tref));
+ return coll_root;
+ }
+
+ TEST_COLL_FORWARD(remove)
+ TEST_COLL_FORWARD(list)
+ TEST_COLL_FORWARD(create)
+ TEST_COLL_FORWARD(update)
+
+ void checking_mappings(coll_root_t &coll_root, Transaction &t) {
+ auto coll_list = list(coll_root, t);
+ EXPECT_EQ(test_coll_mappings.size(), coll_list.size());
+ for (std::pair<coll_t, coll_info_t> p : test_coll_mappings) {
+ EXPECT_NE(
+ std::find(coll_list.begin(), coll_list.end(), p),
+ coll_list.end());
+ }
+ }
+
+ void checking_mappings(coll_root_t &coll_root) {
+ auto t = create_read_transaction();
+ checking_mappings(coll_root, *t);
+ }
+};
+
+TEST_P(collection_manager_test_t, basic)
+{
+ run_async([this] {
+ coll_root_t coll_root = get_root();
+ {
+ auto t = create_mutate_transaction();
+ for (int i = 0; i < 20; i++) {
+ coll_t cid(spg_t(pg_t(i+1,i+2), shard_id_t::NO_SHARD));
+ create(coll_root, *t, cid, coll_info_t(i));
+ test_coll_mappings.emplace(cid, coll_info_t(i));
+ }
+ checking_mappings(coll_root, *t);
+ submit_transaction(std::move(t));
+ EXPECT_EQ(test_coll_mappings.size(), 20);
+ }
+
+ replay();
+ checking_mappings(coll_root);
+ {
+ auto t = create_mutate_transaction();
+ for (auto iter = test_coll_mappings.begin();
+ iter != test_coll_mappings.end();) {
+ remove(coll_root, *t, iter->first);
+ iter = test_coll_mappings.erase(iter);
+ }
+ submit_transaction(std::move(t));
+ }
+ replay();
+ {
+ auto t = create_mutate_transaction();
+ auto list_ret = list(coll_root, *t);
+ submit_transaction(std::move(t));
+ EXPECT_EQ(list_ret.size(), test_coll_mappings.size());
+ }
+ });
+}
+
+TEST_P(collection_manager_test_t, overflow)
+{
+ run_async([this] {
+ coll_root_t coll_root = get_root();
+ auto old_location = coll_root.get_location();
+
+ auto t = create_mutate_transaction();
+ for (int i = 0; i < 412; i++) {
+ coll_t cid(spg_t(pg_t(i+1,i+2), shard_id_t::NO_SHARD));
+ create(coll_root, *t, cid, coll_info_t(i));
+ test_coll_mappings.emplace(cid, coll_info_t(i));
+ }
+ submit_transaction(std::move(t));
+ EXPECT_NE(old_location, coll_root.get_location());
+ checking_mappings(coll_root);
+
+ replay();
+ checking_mappings(coll_root);
+ });
+}
+
+TEST_P(collection_manager_test_t, update)
+{
+ run_async([this] {
+ coll_root_t coll_root = get_root();
+ {
+ auto t = create_mutate_transaction();
+ for (int i = 0; i < 2; i++) {
+ coll_t cid(spg_t(pg_t(1,i+1), shard_id_t::NO_SHARD));
+ create(coll_root, *t, cid, coll_info_t(i));
+ test_coll_mappings.emplace(cid, coll_info_t(i));
+ }
+ submit_transaction(std::move(t));
+ }
+ {
+ auto iter1= test_coll_mappings.begin();
+ auto iter2 = std::next(test_coll_mappings.begin(), 1);
+ EXPECT_NE(iter1->second.split_bits, iter2->second.split_bits);
+ auto t = create_mutate_transaction();
+ update(coll_root, *t, iter1->first, iter2->second);
+ submit_transaction(std::move(t));
+ iter1->second.split_bits = iter2->second.split_bits;
+ }
+ replay();
+ checking_mappings(coll_root);
+ });
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ collection_manager_test,
+ collection_manager_test_t,
+ ::testing::Values (
+ "segmented",
+ "circularbounded"
+ )
+);
diff --git a/src/test/crimson/seastore/test_extent_allocator.cc b/src/test/crimson/seastore/test_extent_allocator.cc
new file mode 100644
index 000000000..8217e5a66
--- /dev/null
+++ b/src/test/crimson/seastore/test_extent_allocator.cc
@@ -0,0 +1,181 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <random>
+
+#include <boost/iterator/counting_iterator.hpp>
+
+#include "test/crimson/gtest_seastar.h"
+#include "crimson/os/seastore/random_block_manager.h"
+#include "crimson/os/seastore/random_block_manager/extent_allocator.h"
+#include "crimson/os/seastore/random_block_manager/avlallocator.h"
+#include "include/interval_set.h"
+
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+struct allocator_test_t :
+ public seastar_test_suite_t,
+ ::testing::WithParamInterface<const char*> {
+ std::random_device rd;
+ std::mt19937 gen;
+ ExtentAllocatorRef allocator;
+
+ allocator_test_t()
+ : gen(rd()) {}
+
+ seastar::future<> set_up_fut() final {
+ std::string a_type = GetParam();
+ if (a_type == "avl") {
+ allocator.reset(new AvlAllocator(false));
+ return seastar::now();
+ }
+ ceph_assert(0 == "no support");
+ }
+ seastar::future<> tear_down_fut() final {
+ if (allocator) {
+ allocator->close();
+ }
+ return seastar::now();
+ }
+ void init_alloc(uint64_t block_size, uint64_t total_size) {
+ assert(allocator);
+ allocator->init(0, total_size, block_size);
+ }
+ void close() {
+ assert(allocator);
+ allocator->close();
+ }
+ auto allocate(size_t size) {
+ return allocator->alloc_extent(size);
+ }
+ void free(uint64_t start, uint64_t length) {
+ allocator->free_extent(start, length);
+ }
+ rbm_abs_addr get_random_addr(size_t block_size, size_t capacity) {
+ return block_size *
+ std::uniform_int_distribution<>(0, (capacity / block_size) - 1)(gen);
+ }
+};
+
+TEST_P(allocator_test_t, test_alloc_init)
+{
+ init_alloc(4096, 4096 * 64);
+ ASSERT_EQ((4096 * 64), allocator->get_available_size());
+ close();
+ init_alloc(8192, 8192 * 32);
+ allocate(8192);
+ ASSERT_EQ(8192 * 32 - 8192, allocator->get_available_size());
+ close();
+ init_alloc(4096, 4096 * 128);
+ allocate(8192);
+ ASSERT_EQ(4096 * 128 - 8192, allocator->get_available_size());
+}
+
+TEST_P(allocator_test_t, test_init_alloc_free)
+{
+ uint64_t block_size = 4096;
+ uint64_t capacity = 4 * 1024 * block_size;
+
+ {
+ init_alloc(block_size, capacity);
+
+ auto free_length = allocator->get_available_size();
+ allocate(allocator->get_max_alloc_size());
+ ASSERT_EQ(free_length - allocator->get_max_alloc_size(),
+ allocator->get_available_size());
+
+ free(0, allocator->get_max_alloc_size());
+ ASSERT_EQ(free_length, allocator->get_available_size());
+ }
+}
+
+TEST_P(allocator_test_t, test_alloc_failure)
+{
+ uint64_t block_size = 8192;
+ uint64_t capacity = 1024 * block_size;
+
+ {
+ init_alloc(block_size, capacity);
+ allocator->mark_extent_used(0, block_size * 256);
+ allocator->mark_extent_used(block_size * 512, block_size * 256);
+
+ auto result = allocate(block_size * 512);
+ ASSERT_EQ(false, result.has_value());
+
+ free(0, block_size * 256);
+ allocator->mark_extent_used(0, block_size * 512);
+
+ result = allocate(block_size * 512);
+ ASSERT_EQ(false, result.has_value());
+ }
+}
+
+TEST_P(allocator_test_t, test_random_alloc_verify)
+{
+ uint64_t block_size = 4096;
+ uint64_t capacity = 64 * 1024 * block_size;
+ uint64_t avail = capacity;
+ interval_set<rbm_abs_addr> alloc_map;
+ init_alloc(block_size, capacity);
+
+ {
+ for (int i = 0; i < 256; i++) {
+ auto addr = get_random_addr(block_size, capacity);
+ auto size = get_random_addr(block_size, capacity) % (4 << 20);
+ if (addr + size > capacity || size == 0 ||
+ alloc_map.intersects(addr, size) ) continue;
+ allocator->mark_extent_used(addr, size);
+ alloc_map.insert(addr, size);
+ avail -= size;
+ }
+ ASSERT_EQ(avail, allocator->get_available_size());
+
+ for (auto p : alloc_map) {
+ free(p.first, p.second);
+ avail += p.second;
+ alloc_map.erase(p.first, p.second);
+ ASSERT_EQ(avail, allocator->get_available_size());
+ }
+ ASSERT_EQ(capacity, allocator->get_available_size());
+
+ for (int i = 0; i < 100; i++) {
+ auto addr = get_random_addr(block_size, capacity);
+ auto size = get_random_addr(block_size, capacity) % (4 << 20);
+ if (addr + size > capacity || size == 0 ||
+ alloc_map.intersects(addr, size) ) continue;
+ allocator->mark_extent_used(addr, size);
+ alloc_map.insert(addr, size);
+ avail -= size;
+ }
+
+ for (int i = 0; i < 50; i++) {
+ free((*alloc_map.begin()).first, (*alloc_map.begin()).second);
+ avail += (*alloc_map.begin()).second;
+ alloc_map.erase((*alloc_map.begin()).first, (*alloc_map.begin()).second);
+ ASSERT_EQ(avail, allocator->get_available_size());
+
+ auto addr = get_random_addr(block_size, capacity);
+ auto size = get_random_addr(block_size, capacity) % (4 << 20);
+ if (addr + size > capacity || size == 0 ||
+ alloc_map.intersects(addr, size) ) continue;
+ allocator->mark_extent_used(addr, size);
+ alloc_map.insert(addr, size);
+ avail -= size;
+ }
+ ASSERT_EQ(avail, allocator->get_available_size());
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ allocator_test,
+ allocator_test_t,
+ ::testing::Values("avl"));
diff --git a/src/test/crimson/seastore/test_object_data_handler.cc b/src/test/crimson/seastore/test_object_data_handler.cc
new file mode 100644
index 000000000..6510cb5d9
--- /dev/null
+++ b/src/test/crimson/seastore/test_object_data_handler.cc
@@ -0,0 +1,431 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/gtest_seastar.h"
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+
+#include "crimson/os/seastore/onode.h"
+#include "crimson/os/seastore/object_data_handler.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+
+#define MAX_OBJECT_SIZE (16<<20)
+#define DEFAULT_OBJECT_DATA_RESERVATION (16<<20)
+#define DEFAULT_OBJECT_METADATA_RESERVATION (16<<20)
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+class TestOnode final : public Onode {
+ onode_layout_t layout;
+ bool dirty = false;
+
+public:
+ TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {}
+ const onode_layout_t &get_layout() const final {
+ return layout;
+ }
+ onode_layout_t &get_mutable_layout(Transaction &t) final {
+ dirty = true;
+ return layout;
+ }
+ bool is_alive() const {
+ return true;
+ }
+ bool is_dirty() const { return dirty; }
+ laddr_t get_hint() const final {return L_ADDR_MIN; }
+ ~TestOnode() final = default;
+};
+
+struct object_data_handler_test_t:
+ public seastar_test_suite_t,
+ TMTestState {
+ OnodeRef onode;
+
+ bufferptr known_contents;
+ extent_len_t size = 0;
+
+ object_data_handler_test_t() {}
+
+ void write(Transaction &t, objaddr_t offset, extent_len_t len, char fill) {
+ ceph_assert(offset + len <= known_contents.length());
+ size = std::max<extent_len_t>(size, offset + len);
+ memset(
+ known_contents.c_str() + offset,
+ fill,
+ len);
+ bufferlist bl;
+ bl.append(
+ bufferptr(
+ known_contents,
+ offset,
+ len));
+ with_trans_intr(t, [&](auto &t) {
+ return ObjectDataHandler(MAX_OBJECT_SIZE).write(
+ ObjectDataHandler::context_t{
+ *tm,
+ t,
+ *onode,
+ },
+ offset,
+ bl);
+ }).unsafe_get0();
+ }
+ void write(objaddr_t offset, extent_len_t len, char fill) {
+ auto t = create_mutate_transaction();
+ write(*t, offset, len, fill);
+ return submit_transaction(std::move(t));
+ }
+
+ void truncate(Transaction &t, objaddr_t offset) {
+ if (size > offset) {
+ memset(
+ known_contents.c_str() + offset,
+ 0,
+ size - offset);
+ with_trans_intr(t, [&](auto &t) {
+ return ObjectDataHandler(MAX_OBJECT_SIZE).truncate(
+ ObjectDataHandler::context_t{
+ *tm,
+ t,
+ *onode
+ },
+ offset);
+ }).unsafe_get0();
+ }
+ size = offset;
+ }
+ void truncate(objaddr_t offset) {
+ auto t = create_mutate_transaction();
+ truncate(*t, offset);
+ return submit_transaction(std::move(t));
+ }
+
+ void read(Transaction &t, objaddr_t offset, extent_len_t len) {
+ bufferlist bl = with_trans_intr(t, [&](auto &t) {
+ return ObjectDataHandler(MAX_OBJECT_SIZE).read(
+ ObjectDataHandler::context_t{
+ *tm,
+ t,
+ *onode
+ },
+ offset,
+ len);
+ }).unsafe_get0();
+ bufferlist known;
+ known.append(
+ bufferptr(
+ known_contents,
+ offset,
+ len));
+ EXPECT_EQ(bl.length(), known.length());
+ EXPECT_EQ(bl, known);
+ }
+ void read(objaddr_t offset, extent_len_t len) {
+ auto t = create_read_transaction();
+ read(*t, offset, len);
+ }
+ void read_near(objaddr_t offset, extent_len_t len, extent_len_t fuzz) {
+ auto fuzzes = std::vector<int32_t>{-1 * (int32_t)fuzz, 0, (int32_t)fuzz};
+ for (auto left_fuzz : fuzzes) {
+ for (auto right_fuzz : fuzzes) {
+ read(offset + left_fuzz, len - left_fuzz + right_fuzz);
+ }
+ }
+ }
+ std::list<LBAMappingRef> get_mappings(objaddr_t offset, extent_len_t length) {
+ auto t = create_mutate_transaction();
+ auto ret = with_trans_intr(*t, [&](auto &t) {
+ return tm->get_pins(t, offset, length);
+ }).unsafe_get0();
+ return ret;
+ }
+
+ seastar::future<> set_up_fut() final {
+ onode = new TestOnode(
+ DEFAULT_OBJECT_DATA_RESERVATION,
+ DEFAULT_OBJECT_METADATA_RESERVATION);
+ known_contents = buffer::create(4<<20 /* 4MB */);
+ memset(known_contents.c_str(), 0, known_contents.length());
+ size = 0;
+ return tm_setup();
+ }
+
+ seastar::future<> tear_down_fut() final {
+ onode.reset();
+ size = 0;
+ return tm_teardown();
+ }
+};
+
+TEST_P(object_data_handler_test_t, single_write)
+{
+ run_async([this] {
+ write(1<<20, 8<<10, 'c');
+
+ read_near(1<<20, 8<<10, 1);
+ read_near(1<<20, 8<<10, 512);
+ });
+}
+
+TEST_P(object_data_handler_test_t, multi_write)
+{
+ run_async([this] {
+ write((1<<20) - (4<<10), 4<<10, 'a');
+ write(1<<20, 4<<10, 'b');
+ write((1<<20) + (4<<10), 4<<10, 'c');
+
+ read_near(1<<20, 4<<10, 1);
+ read_near(1<<20, 4<<10, 512);
+
+ read_near((1<<20)-(4<<10), 12<<10, 1);
+ read_near((1<<20)-(4<<10), 12<<10, 512);
+ });
+}
+
+TEST_P(object_data_handler_test_t, write_hole)
+{
+ run_async([this] {
+ write((1<<20) - (4<<10), 4<<10, 'a');
+ // hole at 1<<20
+ write((1<<20) + (4<<10), 4<<10, 'c');
+
+ read_near(1<<20, 4<<10, 1);
+ read_near(1<<20, 4<<10, 512);
+
+ read_near((1<<20)-(4<<10), 12<<10, 1);
+ read_near((1<<20)-(4<<10), 12<<10, 512);
+ });
+}
+
+TEST_P(object_data_handler_test_t, overwrite_single)
+{
+ run_async([this] {
+ write((1<<20), 4<<10, 'a');
+ write((1<<20), 4<<10, 'c');
+
+ read_near(1<<20, 4<<10, 1);
+ read_near(1<<20, 4<<10, 512);
+ });
+}
+
+TEST_P(object_data_handler_test_t, overwrite_double)
+{
+ run_async([this] {
+ write((1<<20), 4<<10, 'a');
+ write((1<<20)+(4<<10), 4<<10, 'c');
+ write((1<<20), 8<<10, 'b');
+
+ read_near(1<<20, 8<<10, 1);
+ read_near(1<<20, 8<<10, 512);
+
+ read_near(1<<20, 4<<10, 1);
+ read_near(1<<20, 4<<10, 512);
+
+ read_near((1<<20) + (4<<10), 4<<10, 1);
+ read_near((1<<20) + (4<<10), 4<<10, 512);
+ });
+}
+
+TEST_P(object_data_handler_test_t, overwrite_partial)
+{
+ run_async([this] {
+ write((1<<20), 12<<10, 'a');
+ read_near(1<<20, 12<<10, 1);
+
+ write((1<<20)+(8<<10), 4<<10, 'b');
+ read_near(1<<20, 12<<10, 1);
+
+ write((1<<20)+(4<<10), 4<<10, 'c');
+ read_near(1<<20, 12<<10, 1);
+
+ write((1<<20), 4<<10, 'd');
+
+ read_near(1<<20, 12<<10, 1);
+ read_near(1<<20, 12<<10, 512);
+
+ read_near(1<<20, 4<<10, 1);
+ read_near(1<<20, 4<<10, 512);
+
+ read_near((1<<20) + (4<<10), 4<<10, 1);
+ read_near((1<<20) + (4<<10), 4<<10, 512);
+ });
+}
+
+TEST_P(object_data_handler_test_t, unaligned_write)
+{
+ run_async([this] {
+ objaddr_t base = 1<<20;
+ write(base, (4<<10)+(1<<10), 'a');
+ read_near(base-(4<<10), 12<<10, 512);
+
+ base = (1<<20) + (64<<10);
+ write(base+(1<<10), (4<<10)+(1<<10), 'b');
+ read_near(base-(4<<10), 12<<10, 512);
+
+ base = (1<<20) + (128<<10);
+ write(base-(1<<10), (4<<10)+(2<<20), 'c');
+ read_near(base-(4<<10), 12<<10, 512);
+ });
+}
+
+TEST_P(object_data_handler_test_t, unaligned_overwrite)
+{
+ run_async([this] {
+ objaddr_t base = 1<<20;
+ write(base, (128<<10) + (16<<10), 'x');
+
+ write(base, (4<<10)+(1<<10), 'a');
+ read_near(base-(4<<10), 12<<10, 2<<10);
+
+ base = (1<<20) + (64<<10);
+ write(base+(1<<10), (4<<10)+(1<<10), 'b');
+ read_near(base-(4<<10), 12<<10, 2<<10);
+
+ base = (1<<20) + (128<<10);
+ write(base-(1<<10), (4<<10)+(2<<20), 'c');
+ read_near(base-(4<<10), 12<<10, 2<<10);
+
+ read(base, (128<<10) + (16<<10));
+ });
+}
+
+TEST_P(object_data_handler_test_t, truncate)
+{
+ run_async([this] {
+ objaddr_t base = 1<<20;
+ write(base, 8<<10, 'a');
+ write(base+(8<<10), 8<<10, 'b');
+ write(base+(16<<10), 8<<10, 'c');
+
+ truncate(base + (32<<10));
+ read(base, 64<<10);
+
+ truncate(base + (24<<10));
+ read(base, 64<<10);
+
+ truncate(base + (12<<10));
+ read(base, 64<<10);
+
+ truncate(base - (12<<10));
+ read(base, 64<<10);
+ });
+}
+
+TEST_P(object_data_handler_test_t, no_split) {
+ run_async([this] {
+ write(0, 8<<10, 'x');
+ write(0, 8<<10, 'a');
+
+ auto pins = get_mappings(0, 8<<10);
+ EXPECT_EQ(pins.size(), 1);
+
+ read(0, 8<<10);
+ });
+}
+
+TEST_P(object_data_handler_test_t, split_left) {
+ run_async([this] {
+ write(0, 128<<10, 'x');
+
+ write(64<<10, 60<<10, 'a');
+
+ auto pins = get_mappings(0, 128<<10);
+ EXPECT_EQ(pins.size(), 2);
+
+ size_t res[2] = {0, 64<<10};
+ auto base = pins.front()->get_key();
+ int i = 0;
+ for (auto &pin : pins) {
+ EXPECT_EQ(pin->get_key() - base, res[i]);
+ i++;
+ }
+ read(0, 128<<10);
+ });
+}
+
+TEST_P(object_data_handler_test_t, split_right) {
+ run_async([this] {
+ write(0, 128<<10, 'x');
+ write(4<<10, 60<<10, 'a');
+
+ auto pins = get_mappings(0, 128<<10);
+ EXPECT_EQ(pins.size(), 2);
+
+ size_t res[2] = {0, 64<<10};
+ auto base = pins.front()->get_key();
+ int i = 0;
+ for (auto &pin : pins) {
+ EXPECT_EQ(pin->get_key() - base, res[i]);
+ i++;
+ }
+ read(0, 128<<10);
+ });
+}
+TEST_P(object_data_handler_test_t, split_left_right) {
+ run_async([this] {
+ write(0, 128<<10, 'x');
+ write(48<<10, 32<<10, 'a');
+
+ auto pins = get_mappings(0, 128<<10);
+ EXPECT_EQ(pins.size(), 3);
+
+ size_t res[3] = {0, 48<<10, 80<<10};
+ auto base = pins.front()->get_key();
+ int i = 0;
+ for (auto &pin : pins) {
+ EXPECT_EQ(pin->get_key() - base, res[i]);
+ i++;
+ }
+ });
+}
+TEST_P(object_data_handler_test_t, multiple_split) {
+ run_async([this] {
+ write(0, 128<<10, 'x');
+
+ auto t = create_mutate_transaction();
+ // normal split
+ write(*t, 120<<10, 4<<10, 'a');
+ // not aligned right
+ write(*t, 4<<10, 5<<10, 'b');
+ // split right extent of last split result
+ write(*t, 32<<10, 4<<10, 'c');
+ // non aligned overwrite
+ write(*t, 13<<10, 4<<10, 'd');
+
+ write(*t, 64<<10, 32<<10, 'e');
+ // not split right
+ write(*t, 60<<10, 8<<10, 'f');
+
+ submit_transaction(std::move(t));
+
+ auto pins = get_mappings(0, 128<<10);
+ EXPECT_EQ(pins.size(), 10);
+
+ size_t res[10] = {0, 4<<10, 12<<10, 20<<10, 32<<10,
+ 36<<10, 60<<10, 96<<10, 120<<10, 124<<10};
+ auto base = pins.front()->get_key();
+ int i = 0;
+ for (auto &pin : pins) {
+ EXPECT_EQ(pin->get_key() - base, res[i]);
+ i++;
+ }
+ read(0, 128<<10);
+ });
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ object_data_handler_test,
+ object_data_handler_test_t,
+ ::testing::Values (
+ "segmented",
+ "circularbounded"
+ )
+);
+
+
diff --git a/src/test/crimson/seastore/test_omap_manager.cc b/src/test/crimson/seastore/test_omap_manager.cc
new file mode 100644
index 000000000..ab2218565
--- /dev/null
+++ b/src/test/crimson/seastore/test_omap_manager.cc
@@ -0,0 +1,730 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/omap_manager.h"
+
+#include "test/crimson/seastore/test_block.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+using namespace std;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+const int STR_LEN = 50;
+
+std::string rand_name(const int len)
+{
+ std::string ret;
+ ret.reserve(len);
+ for (int i = 0; i < len; ++i) {
+ ret.append(1, (char)(rand() % ('z' - '0')) + '0');
+ }
+ return ret;
+}
+
+bufferlist rand_buffer(const int len) {
+ bufferptr ptr(len);
+ for (auto i = ptr.c_str(); i < ptr.c_str() + len; ++i) {
+ *i = (char)rand();
+ }
+ bufferlist bl;
+ bl.append(ptr);
+ return bl;
+}
+
+struct omap_manager_test_t :
+ public seastar_test_suite_t,
+ TMTestState {
+
+ OMapManagerRef omap_manager;
+
+ omap_manager_test_t() {}
+
+ seastar::future<> set_up_fut() final {
+ return tm_setup().then([this] {
+ omap_manager = omap_manager::create_omap_manager(*tm);
+ return seastar::now();
+ });
+ }
+
+ seastar::future<> tear_down_fut() final {
+ return tm_teardown().then([this] {
+ omap_manager.reset();
+ return seastar::now();
+ });
+ }
+
+ using test_omap_t = std::map<std::string, ceph::bufferlist>;
+ test_omap_t test_omap_mappings;
+
+ void set_key(
+ omap_root_t &omap_root,
+ Transaction &t,
+ const string &key,
+ const bufferlist &val) {
+ with_trans_intr(
+ t,
+ [&, this](auto &t) {
+ return omap_manager->omap_set_key(omap_root, t, key, val);
+ }).unsafe_get0();
+ test_omap_mappings[key] = val;
+ }
+
+ void set_key(
+ omap_root_t &omap_root,
+ Transaction &t,
+ const string &key,
+ const string &val) {
+ bufferlist bl;
+ bl.append(val);
+ set_key(omap_root, t, key, bl);
+ }
+
+ std::string set_random_key(
+ omap_root_t &omap_root,
+ Transaction &t) {
+ auto key = rand_name(STR_LEN);
+ set_key(
+ omap_root,
+ t,
+ key,
+ rand_buffer(STR_LEN));
+ return key;
+ }
+
+ void get_value(
+ omap_root_t &omap_root,
+ Transaction &t,
+ const string &key) {
+ auto ret = with_trans_intr(
+ t,
+ [&, this](auto &t) {
+ return omap_manager->omap_get_value(omap_root, t, key);
+ }).unsafe_get0();
+ auto iter = test_omap_mappings.find(key);
+ if (iter == test_omap_mappings.end()) {
+ EXPECT_FALSE(ret);
+ } else {
+ EXPECT_TRUE(ret);
+ if (ret) {
+ EXPECT_TRUE(*ret == iter->second);
+ }
+ }
+ }
+
+ void rm_key(
+ omap_root_t &omap_root,
+ Transaction &t,
+ const string &key) {
+ with_trans_intr(
+ t,
+ [&, this](auto &t) {
+ return omap_manager->omap_rm_key(omap_root, t, key);
+ }).unsafe_get0();
+ test_omap_mappings.erase(test_omap_mappings.find(key));
+ }
+
+ std::vector<std::string> rm_key_range(
+ omap_root_t &omap_root,
+ Transaction &t,
+ const std::string &first,
+ const std::string &last) {
+ logger().debug("rm keys in range {} ~ {}", first, last);
+ auto config = OMapManager::omap_list_config_t()
+ .with_max(3000)
+ .with_inclusive(true, false);
+
+ with_trans_intr(
+ t,
+ [&, this](auto &t) {
+ return omap_manager->omap_rm_key_range(
+ omap_root, t, first, last, config);
+ }).unsafe_get0();
+
+ std::vector<std::string> keys;
+ size_t count = 0;
+ for (auto iter = test_omap_mappings.begin();
+ iter != test_omap_mappings.end(); ) {
+ if (iter->first >= first && iter->first < last) {
+ keys.push_back(iter->first);
+ iter = test_omap_mappings.erase(iter);
+ count++;
+ } else {
+ iter++;
+ }
+ if (count == config.max_result_size) {
+ break;
+ }
+ }
+ return keys;
+ }
+
+ void list(
+ const omap_root_t &omap_root,
+ Transaction &t,
+ const std::optional<std::string> &first,
+ const std::optional<std::string> &last,
+ size_t max = 128,
+ bool inclusive = false) {
+
+ if (first && last) {
+ logger().debug("list on {} ~ {}", *first, *last);
+ } else if (first) {
+ logger().debug("list on {} ~ end", *first);
+ } else if (last) {
+ logger().debug("list on start ~ {}", *last);
+ } else {
+ logger().debug("list on start ~ end");
+ }
+
+ auto config = OMapManager::omap_list_config_t()
+ .with_max(max)
+ .with_inclusive(inclusive, false);
+
+ auto [complete, results] = with_trans_intr(
+ t,
+ [&, this](auto &t) {
+ return omap_manager->omap_list(omap_root, t, first, last, config);
+ }).unsafe_get0();
+
+ test_omap_t::iterator it, lit;
+ if (first) {
+ it = config.first_inclusive ?
+ test_omap_mappings.lower_bound(*first) :
+ test_omap_mappings.upper_bound(*first);
+ } else {
+ it = test_omap_mappings.begin();
+ }
+ if (last) {
+ lit = config.last_inclusive ?
+ test_omap_mappings.upper_bound(*last) :
+ test_omap_mappings.lower_bound(*last);
+ } else {
+ lit = test_omap_mappings.end();
+ }
+
+ for (auto &&[k, v]: results) {
+ EXPECT_NE(it, test_omap_mappings.end());
+ if (it == test_omap_mappings.end()) {
+ return;
+ }
+ EXPECT_EQ(k, it->first);
+ EXPECT_EQ(v, it->second);
+ it++;
+ }
+ if (it == lit) {
+ EXPECT_TRUE(complete);
+ } else {
+ EXPECT_EQ(results.size(), max);
+ }
+ }
+
+ void clear(
+ omap_root_t &omap_root,
+ Transaction &t) {
+ with_trans_intr(
+ t,
+ [&, this](auto &t) {
+ return omap_manager->omap_clear(omap_root, t);
+ }).unsafe_get0();
+ EXPECT_EQ(omap_root.get_location(), L_ADDR_NULL);
+ }
+
+ void check_mappings(omap_root_t &omap_root, Transaction &t) {
+ for (const auto &i: test_omap_mappings){
+ get_value(omap_root, t, i.first);
+ }
+ }
+
+ void check_mappings(omap_root_t &omap_root) {
+ auto t = create_read_transaction();
+ check_mappings(omap_root, *t);
+ }
+
+ std::vector<std::string> get_mapped_keys() {
+ std::vector<std::string> mkeys;
+ mkeys.reserve(test_omap_mappings.size());
+ for (auto &k: test_omap_mappings) {
+ mkeys.push_back(k.first);
+ }
+ return mkeys;
+ }
+
+ void replay() {
+ restart();
+ omap_manager = omap_manager::create_omap_manager(*tm);
+ }
+
+ auto initialize() {
+ auto t = create_mutate_transaction();
+ omap_root_t omap_root = with_trans_intr(
+ *t,
+ [this](auto &t) {
+ return omap_manager->initialize_omap(t, L_ADDR_MIN);
+ }).unsafe_get0();
+ submit_transaction(std::move(t));
+ return omap_root;
+ }
+};
+
+TEST_P(omap_manager_test_t, basic)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ string key = "owner";
+ string val = "test";
+
+ {
+ auto t = create_mutate_transaction();
+ logger().debug("first transaction");
+ set_key(omap_root, *t, key, val);
+ get_value(omap_root, *t, key);
+ submit_transaction(std::move(t));
+ }
+ {
+ auto t = create_mutate_transaction();
+ logger().debug("second transaction");
+ get_value(omap_root, *t, key);
+ rm_key(omap_root, *t, key);
+ get_value(omap_root, *t, key);
+ submit_transaction(std::move(t));
+ }
+ {
+ auto t = create_mutate_transaction();
+ logger().debug("third transaction");
+ get_value(omap_root, *t, key);
+ submit_transaction(std::move(t));
+ }
+ });
+}
+
+TEST_P(omap_manager_test_t, force_leafnode_split)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ for (unsigned i = 0; i < 40; i++) {
+ auto t = create_mutate_transaction();
+ logger().debug("opened transaction");
+ for (unsigned j = 0; j < 10; ++j) {
+ set_random_key(omap_root, *t);
+ if ((i % 20 == 0) && (j == 5)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("force split submit transaction i = {}", i);
+ submit_transaction(std::move(t));
+ check_mappings(omap_root);
+ }
+ });
+}
+
+TEST_P(omap_manager_test_t, force_leafnode_split_merge)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ for (unsigned i = 0; i < 80; i++) {
+ auto t = create_mutate_transaction();
+ logger().debug("opened split_merge transaction");
+ for (unsigned j = 0; j < 5; ++j) {
+ set_random_key(omap_root, *t);
+ if ((i % 10 == 0) && (j == 3)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction");
+ submit_transaction(std::move(t));
+ if (i % 50 == 0) {
+ check_mappings(omap_root);
+ }
+ }
+ auto mkeys = get_mapped_keys();
+ auto t = create_mutate_transaction();
+ for (unsigned i = 0; i < mkeys.size(); i++) {
+ if (i % 3 != 0) {
+ rm_key(omap_root, *t, mkeys[i]);
+ }
+
+ if (i % 10 == 0) {
+ logger().debug("submitting transaction i= {}", i);
+ submit_transaction(std::move(t));
+ t = create_mutate_transaction();
+ }
+ if (i % 100 == 0) {
+ logger().debug("check_mappings i= {}", i);
+ check_mappings(omap_root, *t);
+ check_mappings(omap_root);
+ }
+ }
+ logger().debug("finally submitting transaction ");
+ submit_transaction(std::move(t));
+ });
+}
+
+TEST_P(omap_manager_test_t, force_leafnode_split_merge_fullandbalanced)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ for (unsigned i = 0; i < 50; i++) {
+ auto t = create_mutate_transaction();
+ logger().debug("opened split_merge transaction");
+ for (unsigned j = 0; j < 5; ++j) {
+ set_random_key(omap_root, *t);
+ if ((i % 10 == 0) && (j == 3)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction");
+ submit_transaction(std::move(t));
+ if (i % 50 == 0) {
+ check_mappings(omap_root);
+ }
+ }
+ auto mkeys = get_mapped_keys();
+ auto t = create_mutate_transaction();
+ for (unsigned i = 0; i < mkeys.size(); i++) {
+ if (30 < i && i < 100) {
+ rm_key(omap_root, *t, mkeys[i]);
+ }
+
+ if (i % 10 == 0) {
+ logger().debug("submitting transaction i= {}", i);
+ submit_transaction(std::move(t));
+ t = create_mutate_transaction();
+ }
+ if (i % 50 == 0) {
+ logger().debug("check_mappings i= {}", i);
+ check_mappings(omap_root, *t);
+ check_mappings(omap_root);
+ }
+ if (i == 100) {
+ break;
+ }
+ }
+ logger().debug("finally submitting transaction ");
+ submit_transaction(std::move(t));
+ check_mappings(omap_root);
+ });
+}
+
+TEST_P(omap_manager_test_t, force_split_listkeys_list_rmkey_range_clear)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ string first, last;
+ for (unsigned i = 0; i < 40; i++) {
+ auto t = create_mutate_transaction();
+ logger().debug("opened transaction");
+ for (unsigned j = 0; j < 10; ++j) {
+ auto key = set_random_key(omap_root, *t);
+ if (i == 10) {
+ first = key;
+ }
+ if (i == 30) {
+ last = key;
+ if (first > last) {
+ std::swap(first, last);
+ }
+ }
+ if ((i % 20 == 0) && (j == 5)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("force split submit transaction i = {}", i);
+ submit_transaction(std::move(t));
+ check_mappings(omap_root);
+ }
+
+ std::optional<std::string> first_temp;
+ std::optional<std::string> last_temp;
+ {
+ auto t = create_read_transaction();
+ first_temp = std::nullopt;
+ last_temp = std::nullopt;
+ list(omap_root, *t, first_temp, last_temp);
+ }
+
+ {
+ auto t = create_read_transaction();
+ first_temp = first;
+ last_temp = std::nullopt;
+ list(omap_root, *t, first_temp, last_temp, 100);
+ }
+
+ {
+ auto t = create_read_transaction();
+ first_temp = first;
+ last_temp = std::nullopt;
+ list(omap_root, *t, first_temp, last_temp, 100, true);
+ }
+
+ {
+ auto t = create_read_transaction();
+ first_temp = std::nullopt;
+ last_temp = last;
+ list(omap_root, *t, first_temp, last_temp, 10240);
+ }
+
+ {
+ auto t = create_read_transaction();
+ first_temp = first;
+ last_temp = last;
+ list(omap_root, *t, first_temp, last_temp, 10240, true);
+ }
+
+ {
+ auto t = create_read_transaction();
+ list(omap_root, *t, first, last, 10240, true);
+ }
+
+ {
+ auto t = create_mutate_transaction();
+ auto keys = rm_key_range(omap_root, *t, first, last);
+ for (const auto& key : keys) {
+ get_value(omap_root, *t, key);
+ }
+ submit_transaction(std::move(t));
+ }
+
+ {
+ auto t = create_mutate_transaction();
+ clear(omap_root, *t);
+ submit_transaction(std::move(t));
+ }
+ });
+}
+
+TEST_P(omap_manager_test_t, force_inner_node_split_list_rmkey_range)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ string first = "";
+ string last;
+ while (cache->get_omap_tree_depth() < 3) {
+ for (unsigned i = 0; i < 40; i++) {
+ auto t = create_mutate_transaction();
+ logger().debug("opened transaction");
+ for (unsigned j = 0; j < 10; ++j) {
+ auto key = set_random_key(omap_root, *t);
+ if (key.compare(first) < 0 || !first.length()) {
+ first = key;
+ }
+ if (i == 10) {
+ last = key;
+ }
+ }
+ logger().debug("force split submit transaction i = {}", i);
+ submit_transaction(std::move(t));
+ }
+ }
+
+ std::optional<std::string> first_temp;
+ std::optional<std::string> last_temp;
+ {
+ auto t = create_read_transaction();
+ first_temp = first;
+ last_temp = std::nullopt;
+ list(omap_root, *t, first_temp, last_temp, 10240);
+ }
+
+ {
+ auto t = create_read_transaction();
+ first_temp = first;
+ last_temp = std::nullopt;
+ list(omap_root, *t, first_temp, last_temp, 10240, true);
+ }
+
+ {
+ auto t = create_read_transaction();
+ first_temp = std::nullopt;
+ last_temp = last;
+ list(omap_root, *t, first_temp, last_temp, 10240);
+ }
+
+ {
+ auto t = create_read_transaction();
+ first_temp = first;
+ last_temp = last;
+ list(omap_root, *t, first_temp, last_temp, 10240, true);
+ }
+
+ {
+ auto t = create_mutate_transaction();
+ auto keys = rm_key_range(omap_root, *t, first, last);
+ for (const auto& key : keys) {
+ get_value(omap_root, *t, key);
+ }
+ submit_transaction(std::move(t));
+ }
+
+ {
+ auto t = create_mutate_transaction();
+ clear(omap_root, *t);
+ submit_transaction(std::move(t));
+ }
+ });
+}
+
+
+TEST_P(omap_manager_test_t, internal_force_split)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ for (unsigned i = 0; i < 10; i++) {
+ logger().debug("opened split transaction");
+ auto t = create_mutate_transaction();
+
+ for (unsigned j = 0; j < 80; ++j) {
+ set_random_key(omap_root, *t);
+ if ((i % 2 == 0) && (j % 50 == 0)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction i = {}", i);
+ submit_transaction(std::move(t));
+ }
+ check_mappings(omap_root);
+ });
+}
+
+TEST_P(omap_manager_test_t, internal_force_merge_fullandbalanced)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ for (unsigned i = 0; i < 8; i++) {
+ logger().debug("opened split transaction");
+ auto t = create_mutate_transaction();
+
+ for (unsigned j = 0; j < 80; ++j) {
+ set_random_key(omap_root, *t);
+ if ((i % 2 == 0) && (j % 50 == 0)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction");
+ submit_transaction(std::move(t));
+ }
+ auto mkeys = get_mapped_keys();
+ auto t = create_mutate_transaction();
+ for (unsigned i = 0; i < mkeys.size(); i++) {
+ rm_key(omap_root, *t, mkeys[i]);
+
+ if (i % 10 == 0) {
+ logger().debug("submitting transaction i= {}", i);
+ submit_transaction(std::move(t));
+ t = create_mutate_transaction();
+ }
+ if (i % 50 == 0) {
+ logger().debug("check_mappings i= {}", i);
+ check_mappings(omap_root, *t);
+ check_mappings(omap_root);
+ }
+ }
+ logger().debug("finally submitting transaction ");
+ submit_transaction(std::move(t));
+ check_mappings(omap_root);
+ });
+}
+
+TEST_P(omap_manager_test_t, replay)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ for (unsigned i = 0; i < 8; i++) {
+ logger().debug("opened split transaction");
+ auto t = create_mutate_transaction();
+
+ for (unsigned j = 0; j < 80; ++j) {
+ set_random_key(omap_root, *t);
+ if ((i % 2 == 0) && (j % 50 == 0)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction i = {}", i);
+ submit_transaction(std::move(t));
+ }
+ replay();
+ check_mappings(omap_root);
+
+ auto mkeys = get_mapped_keys();
+ auto t = create_mutate_transaction();
+ for (unsigned i = 0; i < mkeys.size(); i++) {
+ rm_key(omap_root, *t, mkeys[i]);
+
+ if (i % 10 == 0) {
+ logger().debug("submitting transaction i= {}", i);
+ submit_transaction(std::move(t));
+ replay();
+ t = create_mutate_transaction();
+ }
+ if (i % 50 == 0) {
+ logger().debug("check_mappings i= {}", i);
+ check_mappings(omap_root, *t);
+ check_mappings(omap_root);
+ }
+ }
+ logger().debug("finally submitting transaction ");
+ submit_transaction(std::move(t));
+ replay();
+ check_mappings(omap_root);
+ });
+}
+
+
+TEST_P(omap_manager_test_t, internal_force_split_to_root)
+{
+ run_async([this] {
+ omap_root_t omap_root = initialize();
+
+ logger().debug("set big keys");
+ for (unsigned i = 0; i < 53; i++) {
+ auto t = create_mutate_transaction();
+
+ for (unsigned j = 0; j < 8; ++j) {
+ set_random_key(omap_root, *t);
+ }
+ logger().debug("submitting transaction i = {}", i);
+ submit_transaction(std::move(t));
+ }
+ logger().debug("set small keys");
+ for (unsigned i = 0; i < 100; i++) {
+ auto t = create_mutate_transaction();
+ for (unsigned j = 0; j < 8; ++j) {
+ set_random_key(omap_root, *t);
+ }
+ logger().debug("submitting transaction last");
+ submit_transaction(std::move(t));
+ }
+ check_mappings(omap_root);
+ });
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ omap_manager_test,
+ omap_manager_test_t,
+ ::testing::Values (
+ "segmented",
+ "circularbounded"
+ )
+);
diff --git a/src/test/crimson/seastore/test_randomblock_manager.cc b/src/test/crimson/seastore/test_randomblock_manager.cc
new file mode 100644
index 000000000..9ddb7f9ad
--- /dev/null
+++ b/src/test/crimson/seastore/test_randomblock_manager.cc
@@ -0,0 +1,178 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/gtest_seastar.h"
+
+#include <random>
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/random_block_manager/block_rb_manager.h"
+#include "crimson/os/seastore/random_block_manager/rbm_device.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+constexpr uint64_t DEFAULT_TEST_SIZE = 1 << 20;
+
+struct rbm_test_t :
+ public seastar_test_suite_t {
+ std::unique_ptr<BlockRBManager> rbm_manager;
+ std::unique_ptr<random_block_device::RBMDevice> device;
+
+ struct rbm_transaction {
+ void add_rbm_allocated_blocks(alloc_delta_t &d) {
+ allocated_blocks.push_back(d);
+ }
+ void clear_rbm_allocated_blocks() {
+ if (!allocated_blocks.empty()) {
+ allocated_blocks.clear();
+ }
+ }
+ const auto &get_rbm_allocated_blocks() {
+ return allocated_blocks;
+ }
+ std::vector<alloc_delta_t> allocated_blocks;
+ };
+
+ std::default_random_engine generator;
+
+ uint64_t block_size = 0;
+ uint64_t size = 0;
+
+ device_config_t config;
+
+ rbm_test_t() = default;
+
+ seastar::future<> set_up_fut() final {
+ device = random_block_device::create_test_ephemeral(
+ random_block_device::DEFAULT_TEST_CBJOURNAL_SIZE, DEFAULT_TEST_SIZE);
+ block_size = device->get_block_size();
+ size = device->get_available_size();
+ rbm_manager.reset(new BlockRBManager(device.get(), std::string(), false));
+ config = get_rbm_ephemeral_device_config(0, 1);
+ return device->mkfs(config).handle_error(crimson::ct_error::assert_all{}
+ ).then([this] {
+ return device->mount().handle_error(crimson::ct_error::assert_all{}
+ ).then([this] {
+ return rbm_manager->open().handle_error(crimson::ct_error::assert_all{});
+ });
+ });
+ }
+
+ seastar::future<> tear_down_fut() final {
+ rbm_manager->close().unsafe_get0();
+ device->close().unsafe_get0();
+ rbm_manager.reset();
+ device.reset();
+ return seastar::now();
+ }
+
+ auto mkfs() {
+ return device->mkfs(config).unsafe_get0();
+ }
+
+ auto read_rbm_header() {
+ return device->read_rbm_header(RBM_START_ADDRESS).unsafe_get0();
+ }
+
+ auto open() {
+ device->mount().unsafe_get0();
+ return rbm_manager->open().unsafe_get0();
+ }
+
+ auto write(uint64_t addr, bufferptr &ptr) {
+ paddr_t paddr = convert_abs_addr_to_paddr(
+ addr,
+ rbm_manager->get_device_id());
+ return rbm_manager->write(paddr, ptr).unsafe_get0();
+ }
+
+ auto read(uint64_t addr, bufferptr &ptr) {
+ paddr_t paddr = convert_abs_addr_to_paddr(
+ addr,
+ rbm_manager->get_device_id());
+ return rbm_manager->read(paddr, ptr).unsafe_get0();
+ }
+
+ bufferptr generate_extent(size_t blocks) {
+ std::uniform_int_distribution<char> distribution(
+ std::numeric_limits<char>::min(),
+ std::numeric_limits<char>::max()
+ );
+ char contents = distribution(generator);
+ return buffer::ptr(buffer::create(blocks * block_size, contents));
+ }
+
+ void close() {
+ rbm_manager->close().unsafe_get0();
+ return;
+ }
+
+};
+
+TEST_F(rbm_test_t, mkfs_test)
+{
+ run_async([this] {
+ auto super = read_rbm_header();
+ ASSERT_TRUE(
+ super.block_size == block_size &&
+ super.size == size
+ );
+ config.spec.id = DEVICE_ID_NULL;
+ mkfs();
+ super = read_rbm_header();
+ ASSERT_TRUE(
+ super.config.spec.id == DEVICE_ID_NULL &&
+ super.size == size
+ );
+ });
+}
+
+TEST_F(rbm_test_t, open_read_write_test)
+{
+ run_async([this] {
+ auto content = generate_extent(1);
+ {
+ write(
+ block_size,
+ content
+ );
+ auto bp = bufferptr(ceph::buffer::create_page_aligned(block_size));
+ read(
+ block_size,
+ bp
+ );
+ bufferlist bl;
+ bufferlist block;
+ bl.append(bp);
+ block.append(content);
+ ASSERT_EQ(
+ bl.begin().crc32c(bl.length(), 1),
+ block.begin().crc32c(block.length(), 1));
+ }
+ close();
+ open();
+ {
+ auto bp = bufferptr(ceph::buffer::create_page_aligned(block_size));
+ read(
+ block_size,
+ bp
+ );
+ bufferlist bl;
+ bufferlist block;
+ bl.append(bp);
+ block.append(content);
+ ASSERT_EQ(
+ bl.begin().crc32c(bl.length(), 1),
+ block.begin().crc32c(block.length(), 1));
+ }
+ });
+}
+
diff --git a/src/test/crimson/seastore/test_seastore.cc b/src/test/crimson/seastore/test_seastore.cc
new file mode 100644
index 000000000..63bf4c51f
--- /dev/null
+++ b/src/test/crimson/seastore/test_seastore.cc
@@ -0,0 +1,1268 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string>
+#include <iostream>
+#include <sstream>
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+
+#include "crimson/os/futurized_collection.h"
+#include "crimson/os/seastore/seastore.h"
+#include "crimson/os/seastore/onode.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+using SeaStoreShard = FuturizedStore::Shard;
+using CTransaction = ceph::os::Transaction;
+using namespace std;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+ghobject_t make_oid(int i) {
+ stringstream ss;
+ ss << "object_" << i;
+ auto ret = ghobject_t(
+ hobject_t(
+ sobject_t(ss.str(), CEPH_NOSNAP)));
+ ret.set_shard(shard_id_t(shard_id_t::NO_SHARD));
+ ret.hobj.nspace = "asdf";
+ ret.hobj.pool = 0;
+ uint32_t reverse_hash = hobject_t::_reverse_bits(0);
+ ret.hobj.set_bitwise_key_u32(reverse_hash + i * 100);
+ return ret;
+}
+
+ghobject_t make_temp_oid(int i) {
+ stringstream ss;
+ ss << "temp_object_" << i;
+ auto ret = ghobject_t(
+ hobject_t(
+ sobject_t(ss.str(), CEPH_NOSNAP)));
+ ret.set_shard(shard_id_t(shard_id_t::NO_SHARD));
+ ret.hobj.nspace = "hjkl";
+ ret.hobj.pool = -2ll;
+ uint32_t reverse_hash = hobject_t::_reverse_bits(0);
+ ret.hobj.set_bitwise_key_u32(reverse_hash + i * 100);
+ return ret;
+}
+
+struct seastore_test_t :
+ public seastar_test_suite_t,
+ SeaStoreTestState {
+
+ coll_t coll_name{spg_t{pg_t{0, 0}}};
+ CollectionRef coll;
+
+ seastore_test_t() {}
+
+ seastar::future<> set_up_fut() final {
+ return tm_setup(
+ ).then([this] {
+ return sharded_seastore->create_new_collection(coll_name);
+ }).then([this](auto coll_ref) {
+ coll = coll_ref;
+ CTransaction t;
+ t.create_collection(coll_name, 0);
+ return sharded_seastore->do_transaction(
+ coll,
+ std::move(t));
+ });
+ }
+
+ seastar::future<> tear_down_fut() final {
+ coll.reset();
+ return tm_teardown();
+ }
+
+ void do_transaction(CTransaction &&t) {
+ return sharded_seastore->do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ void set_meta(
+ const std::string& key,
+ const std::string& value) {
+ return seastore->write_meta(key, value).get0();
+ }
+
+ std::tuple<int, std::string> get_meta(
+ const std::string& key) {
+ return seastore->read_meta(key).get();
+ }
+
+ struct object_state_t {
+ const coll_t cid;
+ const CollectionRef coll;
+ const ghobject_t oid;
+
+ std::map<string, bufferlist> omap;
+ bufferlist contents;
+
+ std::map<snapid_t, bufferlist> clone_contents;
+
+ void touch(
+ CTransaction &t) {
+ t.touch(cid, oid);
+ }
+
+ void touch(
+ SeaStoreShard &sharded_seastore) {
+ CTransaction t;
+ touch(t);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ void truncate(
+ CTransaction &t,
+ uint64_t off) {
+ t.truncate(cid, oid, off);
+ }
+
+ void truncate(
+ SeaStoreShard &sharded_seastore,
+ uint64_t off) {
+ CTransaction t;
+ truncate(t, off);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ std::map<uint64_t, uint64_t> fiemap(
+ SeaStoreShard &sharded_seastore,
+ uint64_t off,
+ uint64_t len) {
+ return sharded_seastore.fiemap(coll, oid, off, len).unsafe_get0();
+ }
+
+ bufferlist readv(
+ SeaStoreShard &sharded_seastore,
+ interval_set<uint64_t>&m) {
+ return sharded_seastore.readv(coll, oid, m).unsafe_get0();
+ }
+
+ void remove(
+ CTransaction &t) {
+ t.remove(cid, oid);
+ t.remove_collection(cid);
+ }
+
+ void remove(
+ SeaStoreShard &sharded_seastore) {
+ CTransaction t;
+ remove(t);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ void set_omap(
+ CTransaction &t,
+ const string &key,
+ const bufferlist &val) {
+ omap[key] = val;
+ std::map<string, bufferlist> arg;
+ arg[key] = val;
+ t.omap_setkeys(
+ cid,
+ oid,
+ arg);
+ }
+
+ void set_omap(
+ SeaStoreShard &sharded_seastore,
+ const string &key,
+ const bufferlist &val) {
+ CTransaction t;
+ set_omap(t, key, val);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ void write(
+ SeaStoreShard &sharded_seastore,
+ CTransaction &t,
+ uint64_t offset,
+ bufferlist bl) {
+ bufferlist new_contents;
+ if (offset > 0 && contents.length()) {
+ new_contents.substr_of(
+ contents,
+ 0,
+ std::min<size_t>(offset, contents.length())
+ );
+ }
+ new_contents.append_zero(offset - new_contents.length());
+ new_contents.append(bl);
+
+ auto tail_offset = offset + bl.length();
+ if (contents.length() > tail_offset) {
+ bufferlist tail;
+ tail.substr_of(
+ contents,
+ tail_offset,
+ contents.length() - tail_offset);
+ new_contents.append(tail);
+ }
+ contents.swap(new_contents);
+
+ t.write(
+ cid,
+ oid,
+ offset,
+ bl.length(),
+ bl);
+ }
+
+ void write(
+ SeaStoreShard &sharded_seastore,
+ uint64_t offset,
+ bufferlist bl) {
+ CTransaction t;
+ write(sharded_seastore, t, offset, bl);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ void clone(
+ SeaStoreShard &sharded_seastore,
+ snapid_t snap) {
+ ghobject_t coid = oid;
+ coid.hobj.snap = snap;
+ CTransaction t;
+ t.clone(cid, oid, coid);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ clone_contents[snap].reserve(contents.length());
+ auto it = contents.begin();
+ it.copy_all(clone_contents[snap]);
+ }
+
+ object_state_t get_clone(snapid_t snap) {
+ auto coid = oid;
+ coid.hobj.snap = snap;
+ auto clone_obj = object_state_t{cid, coll, coid};
+ clone_obj.contents.reserve(clone_contents[snap].length());
+ auto it = clone_contents[snap].begin();
+ it.copy_all(clone_obj.contents);
+ return clone_obj;
+ }
+
+ void write(
+ SeaStoreShard &sharded_seastore,
+ uint64_t offset,
+ size_t len,
+ char fill) {
+ auto buffer = bufferptr(buffer::create(len));
+ ::memset(buffer.c_str(), fill, len);
+ bufferlist bl;
+ bl.append(buffer);
+ write(sharded_seastore, offset, bl);
+ }
+
+ void zero(
+ SeaStoreShard &sharded_seastore,
+ CTransaction &t,
+ uint64_t offset,
+ size_t len) {
+ ceph::buffer::list bl;
+ bl.append_zero(len);
+ bufferlist new_contents;
+ if (offset > 0 && contents.length()) {
+ new_contents.substr_of(
+ contents,
+ 0,
+ std::min<size_t>(offset, contents.length())
+ );
+ }
+ new_contents.append_zero(offset - new_contents.length());
+ new_contents.append(bl);
+
+ auto tail_offset = offset + bl.length();
+ if (contents.length() > tail_offset) {
+ bufferlist tail;
+ tail.substr_of(
+ contents,
+ tail_offset,
+ contents.length() - tail_offset);
+ new_contents.append(tail);
+ }
+ contents.swap(new_contents);
+
+ t.zero(
+ cid,
+ oid,
+ offset,
+ len);
+ }
+
+ void zero(
+ SeaStoreShard &sharded_seastore,
+ uint64_t offset,
+ size_t len) {
+ CTransaction t;
+ zero(sharded_seastore, t, offset, len);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ void read(
+ SeaStoreShard &sharded_seastore,
+ uint64_t offset,
+ uint64_t len) {
+ bufferlist to_check;
+ if (contents.length() >= offset) {
+ to_check.substr_of(
+ contents,
+ offset,
+ std::min(len, (uint64_t)contents.length()));
+ }
+ auto ret = sharded_seastore.read(
+ coll,
+ oid,
+ offset,
+ len).unsafe_get0();
+ EXPECT_EQ(ret.length(), to_check.length());
+ EXPECT_EQ(ret, to_check);
+ }
+
+ void check_size(SeaStoreShard &sharded_seastore) {
+ auto st = sharded_seastore.stat(
+ coll,
+ oid).get0();
+ EXPECT_EQ(contents.length(), st.st_size);
+ }
+
+ void set_attr(
+ SeaStoreShard &sharded_seastore,
+ std::string key,
+ bufferlist& val) {
+ CTransaction t;
+ t.setattr(cid, oid, key, val);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ void rm_attr(
+ SeaStoreShard &sharded_seastore,
+ std::string key) {
+ CTransaction t;
+ t.rmattr(cid, oid, key);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ void rm_attrs(
+ SeaStoreShard &sharded_seastore) {
+ CTransaction t;
+ t.rmattrs(cid, oid);
+ sharded_seastore.do_transaction(
+ coll,
+ std::move(t)).get0();
+ }
+
+ SeaStoreShard::attrs_t get_attrs(
+ SeaStoreShard &sharded_seastore) {
+ return sharded_seastore.get_attrs(coll, oid)
+ .handle_error(SeaStoreShard::get_attrs_ertr::discard_all{})
+ .get();
+ }
+
+ ceph::bufferlist get_attr(
+ SeaStoreShard& sharded_seastore,
+ std::string_view name) {
+ return sharded_seastore.get_attr(coll, oid, name)
+ .handle_error(
+ SeaStoreShard::get_attr_errorator::discard_all{})
+ .get();
+ }
+
+ void check_omap_key(
+ SeaStoreShard &sharded_seastore,
+ const string &key) {
+ std::set<string> to_check;
+ to_check.insert(key);
+ auto result = sharded_seastore.omap_get_values(
+ coll,
+ oid,
+ to_check).unsafe_get0();
+ if (result.empty()) {
+ EXPECT_EQ(omap.find(key), omap.end());
+ } else {
+ auto iter = omap.find(key);
+ EXPECT_NE(iter, omap.end());
+ if (iter != omap.end()) {
+ EXPECT_EQ(result.size(), 1);
+ EXPECT_EQ(iter->second, result.begin()->second);
+ }
+ }
+ }
+
+ void check_omap(SeaStoreShard &sharded_seastore) {
+ auto refiter = omap.begin();
+ std::optional<std::string> start;
+ while(true) {
+ auto [done, kvs] = sharded_seastore.omap_get_values(
+ coll,
+ oid,
+ start).unsafe_get0();
+ auto iter = kvs.begin();
+ while (true) {
+ if ((done && iter == kvs.end()) && refiter == omap.end()) {
+ return; // finished
+ } else if (!done && iter == kvs.end()) {
+ break; // reload kvs
+ }
+ if (iter == kvs.end() || refiter->first < iter->first) {
+ logger().debug(
+ "check_omap: missing omap key {}",
+ refiter->first);
+ GTEST_FAIL() << "missing omap key " << refiter->first;
+ ++refiter;
+ } else if (refiter == omap.end() || refiter->first > iter->first) {
+ logger().debug(
+ "check_omap: extra omap key {}",
+ iter->first);
+ GTEST_FAIL() << "extra omap key " << iter->first;
+ ++iter;
+ } else {
+ EXPECT_EQ(iter->second, refiter->second);
+ ++iter;
+ ++refiter;
+ }
+ }
+ if (!done) {
+ start = kvs.rbegin()->first;
+ }
+ }
+ }
+ };
+
+ map<ghobject_t, object_state_t> test_objects;
+ object_state_t &get_object(
+ const ghobject_t &oid) {
+ return test_objects.emplace(
+ std::make_pair(
+ oid,
+ object_state_t{coll_name, coll, oid})).first->second;
+ }
+
+ void remove_object(
+ object_state_t &sobj) {
+
+ sobj.remove(*sharded_seastore);
+ auto erased = test_objects.erase(sobj.oid);
+ ceph_assert(erased == 1);
+ }
+
+ void validate_objects() const {
+ std::vector<ghobject_t> oids;
+ for (auto& [oid, obj] : test_objects) {
+ oids.emplace_back(oid);
+ }
+ auto ret = sharded_seastore->list_objects(
+ coll,
+ ghobject_t(),
+ ghobject_t::get_max(),
+ std::numeric_limits<uint64_t>::max()).get0();
+ EXPECT_EQ(std::get<1>(ret), ghobject_t::get_max());
+ EXPECT_EQ(std::get<0>(ret), oids);
+ }
+
+ // create temp objects
+ struct bound_t {
+ enum class type_t {
+ MIN,
+ MAX,
+ TEMP,
+ TEMP_END,
+ NORMAL_BEGIN,
+ NORMAL,
+ } type = type_t::MIN;
+ unsigned index = 0;
+
+ static bound_t get_temp(unsigned index) {
+ return bound_t{type_t::TEMP, index};
+ }
+ static bound_t get_normal(unsigned index) {
+ return bound_t{type_t::NORMAL, index};
+ }
+ static bound_t get_min() { return bound_t{type_t::MIN}; }
+ static bound_t get_max() { return bound_t{type_t::MAX}; }
+ static bound_t get_temp_end() { return bound_t{type_t::TEMP_END}; }
+ static bound_t get_normal_begin() {
+ return bound_t{type_t::NORMAL_BEGIN};
+ }
+
+ ghobject_t get_oid(SeaStore &seastore, CollectionRef &coll) const {
+ switch (type) {
+ case type_t::MIN:
+ return ghobject_t();
+ case type_t::MAX:
+ return ghobject_t::get_max();
+ case type_t::TEMP:
+ return make_temp_oid(index);
+ case type_t::TEMP_END:
+ return seastore.get_objs_range(coll, 0).temp_end;
+ case type_t::NORMAL_BEGIN:
+ return seastore.get_objs_range(coll, 0).obj_begin;
+ case type_t::NORMAL:
+ return make_oid(index);
+ default:
+ assert(0 == "impossible");
+ return ghobject_t();
+ }
+ }
+ };
+ struct list_test_case_t {
+ bound_t left;
+ bound_t right;
+ unsigned limit;
+ };
+ // list_test_cases_t :: [<limit, left_bound, right_bound>]
+ using list_test_cases_t = std::list<std::tuple<unsigned, bound_t, bound_t>>;
+
+ void test_list(
+ unsigned temp_to_create, /// create temp 0..temp_to_create-1
+ unsigned normal_to_create, /// create normal 0..normal_to_create-1
+ list_test_cases_t cases /// cases to test
+ ) {
+ std::vector<ghobject_t> objs;
+
+ // setup
+ auto create = [this, &objs](ghobject_t hoid) {
+ objs.emplace_back(std::move(hoid));
+ auto &obj = get_object(objs.back());
+ obj.touch(*sharded_seastore);
+ obj.check_size(*sharded_seastore);
+ };
+ for (unsigned i = 0; i < temp_to_create; ++i) {
+ create(make_temp_oid(i));
+ }
+ for (unsigned i = 0; i < normal_to_create; ++i) {
+ create(make_oid(i));
+ }
+
+ // list and validate each case
+ for (auto [limit, in_left_bound, in_right_bound] : cases) {
+ auto left_bound = in_left_bound.get_oid(*seastore, coll);
+ auto right_bound = in_right_bound.get_oid(*seastore, coll);
+
+ // get results from seastore
+ auto [listed, next] = sharded_seastore->list_objects(
+ coll, left_bound, right_bound, limit).get0();
+
+ // compute correct answer
+ auto correct_begin = std::find_if(
+ objs.begin(), objs.end(),
+ [&left_bound](const auto &in) {
+ return in >= left_bound;
+ });
+ unsigned count = 0;
+ auto correct_end = correct_begin;
+ for (; count < limit &&
+ correct_end != objs.end() &&
+ *correct_end < right_bound;
+ ++correct_end, ++count);
+
+ // validate return -- [correct_begin, correct_end) should match listed
+ decltype(objs) correct_listed(correct_begin, correct_end);
+ EXPECT_EQ(listed, correct_listed);
+
+ if (count < limit) {
+ if (correct_end == objs.end()) {
+ // if listed extends to end of range, next should be >= right_bound
+ EXPECT_GE(next, right_bound);
+ } else {
+ // next <= *correct_end since *correct_end is the next object to list
+ EXPECT_LE(next, *correct_end);
+ // next > *(correct_end - 1) since we already listed it
+ EXPECT_GT(next, *(correct_end - 1));
+ }
+ } else {
+ // we listed exactly limit objects
+ EXPECT_EQ(limit, listed.size());
+
+ EXPECT_GE(next, left_bound);
+ if (limit == 0) {
+ if (correct_end != objs.end()) {
+ // next <= *correct_end since *correct_end is the next object to list
+ EXPECT_LE(next, *correct_end);
+ }
+ } else {
+ // next > *(correct_end - 1) since we already listed it
+ EXPECT_GT(next, *(correct_end - 1));
+ }
+ }
+ }
+
+ // teardown
+ for (auto &&hoid : objs) { get_object(hoid).remove(*sharded_seastore); }
+ }
+};
+
+template <typename T, typename V>
+auto contains(const T &t, const V &v) {
+ return std::find(
+ t.begin(),
+ t.end(),
+ v) != t.end();
+}
+
+TEST_P(seastore_test_t, collection_create_list_remove)
+{
+ run_async([this] {
+ coll_t test_coll{spg_t{pg_t{1, 0}}};
+ {
+ sharded_seastore->create_new_collection(test_coll).get0();
+ {
+ CTransaction t;
+ t.create_collection(test_coll, 4);
+ do_transaction(std::move(t));
+ }
+ auto colls_cores = seastore->list_collections().get0();
+ std::vector<coll_t> colls;
+ colls.resize(colls_cores.size());
+ std::transform(
+ colls_cores.begin(), colls_cores.end(), colls.begin(),
+ [](auto p) { return p.first; });
+ EXPECT_EQ(colls.size(), 2);
+ EXPECT_TRUE(contains(colls, coll_name));
+ EXPECT_TRUE(contains(colls, test_coll));
+ }
+
+ {
+ {
+ CTransaction t;
+ t.remove_collection(test_coll);
+ do_transaction(std::move(t));
+ }
+ auto colls_cores = seastore->list_collections().get0();
+ std::vector<coll_t> colls;
+ colls.resize(colls_cores.size());
+ std::transform(
+ colls_cores.begin(), colls_cores.end(), colls.begin(),
+ [](auto p) { return p.first; });
+ EXPECT_EQ(colls.size(), 1);
+ EXPECT_TRUE(contains(colls, coll_name));
+ }
+ });
+}
+
+TEST_P(seastore_test_t, meta) {
+ run_async([this] {
+ set_meta("key1", "value1");
+ set_meta("key2", "value2");
+
+ const auto [ret1, value1] = get_meta("key1");
+ const auto [ret2, value2] = get_meta("key2");
+ EXPECT_EQ(ret1, 0);
+ EXPECT_EQ(ret2, 0);
+ EXPECT_EQ(value1, "value1");
+ EXPECT_EQ(value2, "value2");
+ });
+}
+
+TEST_P(seastore_test_t, touch_stat_list_remove)
+{
+ run_async([this] {
+ auto &test_obj = get_object(make_oid(0));
+ test_obj.touch(*sharded_seastore);
+ test_obj.check_size(*sharded_seastore);
+ validate_objects();
+
+ remove_object(test_obj);
+ validate_objects();
+ });
+}
+
+using bound_t = seastore_test_t::bound_t;
+constexpr unsigned MAX_LIMIT = std::numeric_limits<unsigned>::max();
+static const seastore_test_t::list_test_cases_t temp_list_cases{
+ // list all temp, maybe overlap to normal on right
+ {MAX_LIMIT, bound_t::get_min() , bound_t::get_max() },
+ { 5, bound_t::get_min() , bound_t::get_temp_end()},
+ { 6, bound_t::get_min() , bound_t::get_temp_end()},
+ { 6, bound_t::get_min() , bound_t::get_max() },
+
+ // list temp starting at min up to but not past boundary
+ { 3, bound_t::get_min() , bound_t::get_temp(3) },
+ { 3, bound_t::get_min() , bound_t::get_temp(4) },
+ { 3, bound_t::get_min() , bound_t::get_temp(2) },
+
+ // list temp starting > min up to or past boundary
+ { 3, bound_t::get_temp(2) , bound_t::get_temp_end()},
+ { 3, bound_t::get_temp(2) , bound_t::get_max() },
+ { 3, bound_t::get_temp(3) , bound_t::get_max() },
+ { 3, bound_t::get_temp(1) , bound_t::get_max() },
+
+ // 0 limit
+ { 0, bound_t::get_min() , bound_t::get_max() },
+ { 0, bound_t::get_temp(1) , bound_t::get_max() },
+ { 0, bound_t::get_temp_end(), bound_t::get_max() },
+};
+
+TEST_P(seastore_test_t, list_objects_temp_only)
+{
+ run_async([this] { test_list(5, 0, temp_list_cases); });
+}
+
+TEST_P(seastore_test_t, list_objects_temp_overlap)
+{
+ run_async([this] { test_list(5, 5, temp_list_cases); });
+}
+
+static const seastore_test_t::list_test_cases_t normal_list_cases{
+ // list all normal, maybe overlap to temp on left
+ {MAX_LIMIT, bound_t::get_min() , bound_t::get_max() },
+ { 5, bound_t::get_normal_begin(), bound_t::get_max() },
+ { 6, bound_t::get_normal_begin(), bound_t::get_max() },
+ { 6, bound_t::get_temp(4) , bound_t::get_max() },
+
+ // list normal starting <= normal_begin < end
+ { 3, bound_t::get_normal_begin(), bound_t::get_normal(3)},
+ { 3, bound_t::get_normal_begin(), bound_t::get_normal(4)},
+ { 3, bound_t::get_normal_begin(), bound_t::get_normal(2)},
+ { 3, bound_t::get_temp(5) , bound_t::get_normal(2)},
+ { 3, bound_t::get_temp(4) , bound_t::get_normal(2)},
+
+ // list normal starting > min up to end
+ { 3, bound_t::get_normal(2) , bound_t::get_max() },
+ { 3, bound_t::get_normal(2) , bound_t::get_max() },
+ { 3, bound_t::get_normal(3) , bound_t::get_max() },
+ { 3, bound_t::get_normal(1) , bound_t::get_max() },
+
+ // 0 limit
+ { 0, bound_t::get_min() , bound_t::get_max() },
+ { 0, bound_t::get_normal(1) , bound_t::get_max() },
+ { 0, bound_t::get_normal_begin(), bound_t::get_max() },
+};
+
+TEST_P(seastore_test_t, list_objects_normal_only)
+{
+ run_async([this] { test_list(5, 0, normal_list_cases); });
+}
+
+TEST_P(seastore_test_t, list_objects_normal_overlap)
+{
+ run_async([this] { test_list(5, 5, normal_list_cases); });
+}
+
+bufferlist make_bufferlist(size_t len) {
+ bufferptr ptr(len);
+ bufferlist bl;
+ bl.append(ptr);
+ return bl;
+}
+
+TEST_P(seastore_test_t, omap_test_simple)
+{
+ run_async([this] {
+ auto &test_obj = get_object(make_oid(0));
+ test_obj.touch(*sharded_seastore);
+ test_obj.set_omap(
+ *sharded_seastore,
+ "asdf",
+ make_bufferlist(128));
+ test_obj.check_omap_key(
+ *sharded_seastore,
+ "asdf");
+ });
+}
+
+TEST_P(seastore_test_t, clone_aligned_extents)
+{
+ run_async([this] {
+ auto &test_obj = get_object(make_oid(0));
+ test_obj.write(*sharded_seastore, 0, 4096, 'a');
+
+ test_obj.clone(*sharded_seastore, 10);
+ std::cout << "reading origin after clone10" << std::endl;
+ test_obj.read(*sharded_seastore, 0, 4096);
+ test_obj.write(*sharded_seastore, 0, 4096, 'b');
+ test_obj.write(*sharded_seastore, 4096, 4096, 'c');
+ std::cout << "reading origin after clone10 and write" << std::endl;
+ test_obj.read(*sharded_seastore, 0, 8192);
+ auto clone_obj10 = test_obj.get_clone(10);
+ std::cout << "reading clone after clone10 and write" << std::endl;
+ clone_obj10.read(*sharded_seastore, 0, 8192);
+
+ test_obj.clone(*sharded_seastore, 20);
+ std::cout << "reading origin after clone20" << std::endl;
+ test_obj.read(*sharded_seastore, 0, 4096);
+ test_obj.write(*sharded_seastore, 0, 4096, 'd');
+ test_obj.write(*sharded_seastore, 4096, 4096, 'e');
+ test_obj.write(*sharded_seastore, 8192, 4096, 'f');
+ std::cout << "reading origin after clone20 and write" << std::endl;
+ test_obj.read(*sharded_seastore, 0, 12288);
+ auto clone_obj20 = test_obj.get_clone(20);
+ std::cout << "reading clone after clone20 and write" << std::endl;
+ clone_obj10.read(*sharded_seastore, 0, 12288);
+ clone_obj20.read(*sharded_seastore, 0, 12288);
+ });
+}
+
+TEST_P(seastore_test_t, clone_unaligned_extents)
+{
+ run_async([this] {
+ auto &test_obj = get_object(make_oid(0));
+ test_obj.write(*sharded_seastore, 0, 8192, 'a');
+ test_obj.write(*sharded_seastore, 8192, 8192, 'b');
+ test_obj.write(*sharded_seastore, 16384, 8192, 'c');
+
+ test_obj.clone(*sharded_seastore, 10);
+ test_obj.write(*sharded_seastore, 4096, 12288, 'd');
+ std::cout << "reading origin after clone10 and write" << std::endl;
+ test_obj.read(*sharded_seastore, 0, 24576);
+
+ auto clone_obj10 = test_obj.get_clone(10);
+ std::cout << "reading clone after clone10 and write" << std::endl;
+ clone_obj10.read(*sharded_seastore, 0, 24576);
+
+ test_obj.clone(*sharded_seastore, 20);
+ test_obj.write(*sharded_seastore, 8192, 12288, 'e');
+ std::cout << "reading origin after clone20 and write" << std::endl;
+ test_obj.read(*sharded_seastore, 0, 24576);
+
+ auto clone_obj20 = test_obj.get_clone(20);
+ std::cout << "reading clone after clone20 and write" << std::endl;
+ clone_obj10.read(*sharded_seastore, 0, 24576);
+ clone_obj20.read(*sharded_seastore, 0, 24576);
+
+ test_obj.write(*sharded_seastore, 0, 24576, 'f');
+ test_obj.clone(*sharded_seastore, 30);
+ test_obj.write(*sharded_seastore, 8192, 4096, 'g');
+ std::cout << "reading origin after clone30 and write" << std::endl;
+ test_obj.read(*sharded_seastore, 0, 24576);
+
+ auto clone_obj30 = test_obj.get_clone(30);
+ std::cout << "reading clone after clone30 and write" << std::endl;
+ clone_obj10.read(*sharded_seastore, 0, 24576);
+ clone_obj20.read(*sharded_seastore, 0, 24576);
+ clone_obj30.read(*sharded_seastore, 0, 24576);
+ });
+}
+
+TEST_P(seastore_test_t, attr)
+{
+ run_async([this] {
+ auto& test_obj = get_object(make_oid(0));
+ test_obj.touch(*sharded_seastore);
+ {
+ std::string oi("asdfasdfasdf");
+ bufferlist bl;
+ encode(oi, bl);
+ test_obj.set_attr(*sharded_seastore, OI_ATTR, bl);
+
+ std::string ss("fdsfdsfs");
+ bl.clear();
+ encode(ss, bl);
+ test_obj.set_attr(*sharded_seastore, SS_ATTR, bl);
+
+ std::string test_val("ssssssssssss");
+ bl.clear();
+ encode(test_val, bl);
+ test_obj.set_attr(*sharded_seastore, "test_key", bl);
+
+ auto attrs = test_obj.get_attrs(*sharded_seastore);
+ std::string oi2;
+ bufferlist bl2 = attrs[OI_ATTR];
+ decode(oi2, bl2);
+ bl2.clear();
+ bl2 = attrs[SS_ATTR];
+ std::string ss2;
+ decode(ss2, bl2);
+ std::string test_val2;
+ bl2.clear();
+ bl2 = attrs["test_key"];
+ decode(test_val2, bl2);
+ EXPECT_EQ(ss, ss2);
+ EXPECT_EQ(oi, oi2);
+ EXPECT_EQ(test_val, test_val2);
+
+ bl2.clear();
+ bl2 = test_obj.get_attr(*sharded_seastore, "test_key");
+ test_val2.clear();
+ decode(test_val2, bl2);
+ EXPECT_EQ(test_val, test_val2);
+ //test rm_attrs
+ test_obj.rm_attrs(*sharded_seastore);
+ attrs = test_obj.get_attrs(*sharded_seastore);
+ EXPECT_EQ(attrs.find(OI_ATTR), attrs.end());
+ EXPECT_EQ(attrs.find(SS_ATTR), attrs.end());
+ EXPECT_EQ(attrs.find("test_key"), attrs.end());
+
+ std::cout << "test_key passed" << std::endl;
+ //create OI_ATTR with len > onode_layout_t::MAX_OI_LENGTH, rm OI_ATTR
+ //create SS_ATTR with len > onode_layout_t::MAX_SS_LENGTH, rm SS_ATTR
+ char oi_array[onode_layout_t::MAX_OI_LENGTH + 1] = {'a'};
+ std::string oi_str(&oi_array[0], sizeof(oi_array));
+ bl.clear();
+ encode(oi_str, bl);
+ test_obj.set_attr(*sharded_seastore, OI_ATTR, bl);
+
+ char ss_array[onode_layout_t::MAX_SS_LENGTH + 1] = {'b'};
+ std::string ss_str(&ss_array[0], sizeof(ss_array));
+ bl.clear();
+ encode(ss_str, bl);
+ test_obj.set_attr(*sharded_seastore, SS_ATTR, bl);
+
+ attrs = test_obj.get_attrs(*sharded_seastore);
+ bl2.clear();
+ bl2 = attrs[OI_ATTR];
+ std::string oi_str2;
+ decode(oi_str2, bl2);
+ EXPECT_EQ(oi_str, oi_str2);
+
+ bl2.clear();
+ bl2 = attrs[SS_ATTR];
+ std::string ss_str2;
+ decode(ss_str2, bl2);
+ EXPECT_EQ(ss_str, ss_str2);
+
+ bl2.clear();
+ ss_str2.clear();
+ bl2 = test_obj.get_attr(*sharded_seastore, SS_ATTR);
+ decode(ss_str2, bl2);
+ EXPECT_EQ(ss_str, ss_str2);
+
+ bl2.clear();
+ oi_str2.clear();
+ bl2 = test_obj.get_attr(*sharded_seastore, OI_ATTR);
+ decode(oi_str2, bl2);
+ EXPECT_EQ(oi_str, oi_str2);
+
+ test_obj.rm_attr(*sharded_seastore, OI_ATTR);
+ test_obj.rm_attr(*sharded_seastore, SS_ATTR);
+
+ attrs = test_obj.get_attrs(*sharded_seastore);
+ EXPECT_EQ(attrs.find(OI_ATTR), attrs.end());
+ EXPECT_EQ(attrs.find(SS_ATTR), attrs.end());
+ }
+ {
+ //create OI_ATTR with len <= onode_layout_t::MAX_OI_LENGTH, rm OI_ATTR
+ //create SS_ATTR with len <= onode_layout_t::MAX_SS_LENGTH, rm SS_ATTR
+ std::string oi("asdfasdfasdf");
+ bufferlist bl;
+ encode(oi, bl);
+ test_obj.set_attr(*sharded_seastore, OI_ATTR, bl);
+
+ std::string ss("f");
+ bl.clear();
+ encode(ss, bl);
+ test_obj.set_attr(*sharded_seastore, SS_ATTR, bl);
+
+ std::string test_val("ssssssssssss");
+ bl.clear();
+ encode(test_val, bl);
+ test_obj.set_attr(*sharded_seastore, "test_key", bl);
+
+ auto attrs = test_obj.get_attrs(*sharded_seastore);
+ std::string oi2;
+ bufferlist bl2 = attrs[OI_ATTR];
+ decode(oi2, bl2);
+ bl2.clear();
+ bl2 = attrs[SS_ATTR];
+ std::string ss2;
+ decode(ss2, bl2);
+ std::string test_val2;
+ bl2.clear();
+ bl2 = attrs["test_key"];
+ decode(test_val2, bl2);
+ EXPECT_EQ(ss, ss2);
+ EXPECT_EQ(oi, oi2);
+ EXPECT_EQ(test_val, test_val2);
+
+ test_obj.rm_attr(*sharded_seastore, OI_ATTR);
+ test_obj.rm_attr(*sharded_seastore, SS_ATTR);
+ test_obj.rm_attr(*sharded_seastore, "test_key");
+
+ attrs = test_obj.get_attrs(*sharded_seastore);
+ EXPECT_EQ(attrs.find(OI_ATTR), attrs.end());
+ EXPECT_EQ(attrs.find(SS_ATTR), attrs.end());
+ EXPECT_EQ(attrs.find("test_key"), attrs.end());
+ }
+ {
+ // create OI_ATTR with len > onode_layout_t::MAX_OI_LENGTH, then
+ // overwrite it with another OI_ATTR len of which < onode_layout_t::MAX_OI_LENGTH
+ // create SS_ATTR with len > onode_layout_t::MAX_SS_LENGTH, then
+ // overwrite it with another SS_ATTR len of which < onode_layout_t::MAX_SS_LENGTH
+ char oi_array[onode_layout_t::MAX_OI_LENGTH + 1] = {'a'};
+ std::string oi(&oi_array[0], sizeof(oi_array));
+ bufferlist bl;
+ encode(oi, bl);
+ test_obj.set_attr(*sharded_seastore, OI_ATTR, bl);
+
+ oi = "asdfasdfasdf";
+ bl.clear();
+ encode(oi, bl);
+ test_obj.set_attr(*sharded_seastore, OI_ATTR, bl);
+
+ char ss_array[onode_layout_t::MAX_SS_LENGTH + 1] = {'b'};
+ std::string ss(&ss_array[0], sizeof(ss_array));
+ bl.clear();
+ encode(ss, bl);
+ test_obj.set_attr(*sharded_seastore, SS_ATTR, bl);
+
+ ss = "f";
+ bl.clear();
+ encode(ss, bl);
+ test_obj.set_attr(*sharded_seastore, SS_ATTR, bl);
+
+ auto attrs = test_obj.get_attrs(*sharded_seastore);
+ std::string oi2, ss2;
+ bufferlist bl2 = attrs[OI_ATTR];
+ decode(oi2, bl2);
+ bl2.clear();
+ bl2 = attrs[SS_ATTR];
+ decode(ss2, bl2);
+ EXPECT_EQ(oi, oi2);
+ EXPECT_EQ(ss, ss2);
+ }
+ });
+}
+
+TEST_P(seastore_test_t, omap_test_iterator)
+{
+ run_async([this] {
+ auto make_key = [](unsigned i) {
+ std::stringstream ss;
+ ss << "key" << i;
+ return ss.str();
+ };
+ auto &test_obj = get_object(make_oid(0));
+ test_obj.touch(*sharded_seastore);
+ for (unsigned i = 0; i < 20; ++i) {
+ test_obj.set_omap(
+ *sharded_seastore,
+ make_key(i),
+ make_bufferlist(128));
+ }
+ test_obj.check_omap(*sharded_seastore);
+ });
+}
+
+TEST_P(seastore_test_t, object_data_omap_remove)
+{
+ run_async([this] {
+ auto make_key = [](unsigned i) {
+ std::stringstream ss;
+ ss << "key" << i;
+ return ss.str();
+ };
+ auto &test_obj = get_object(make_oid(0));
+ test_obj.touch(*sharded_seastore);
+ for (unsigned i = 0; i < 1024; ++i) {
+ test_obj.set_omap(
+ *sharded_seastore,
+ make_key(i),
+ make_bufferlist(128));
+ }
+ test_obj.check_omap(*sharded_seastore);
+
+ for (uint64_t i = 0; i < 16; i++) {
+ test_obj.write(
+ *sharded_seastore,
+ 4096 * i,
+ 4096,
+ 'a');
+ }
+ test_obj.remove(*sharded_seastore);
+ });
+}
+
+
+TEST_P(seastore_test_t, simple_extent_test)
+{
+ run_async([this] {
+ auto &test_obj = get_object(make_oid(0));
+ test_obj.write(
+ *sharded_seastore,
+ 1024,
+ 1024,
+ 'a');
+ test_obj.read(
+ *sharded_seastore,
+ 1024,
+ 1024);
+ test_obj.check_size(*sharded_seastore);
+ });
+}
+
+TEST_P(seastore_test_t, fiemap_empty)
+{
+ run_async([this] {
+ auto &test_obj = get_object(make_oid(0));
+ test_obj.touch(*sharded_seastore);
+ test_obj.truncate(*sharded_seastore, 100000);
+
+ std::map<uint64_t, uint64_t> m;
+ m = test_obj.fiemap(*sharded_seastore, 0, 100000);
+ EXPECT_TRUE(m.empty());
+
+ test_obj.remove(*sharded_seastore);
+ });
+}
+
+TEST_P(seastore_test_t, fiemap_holes)
+{
+ run_async([this] {
+ const uint64_t MAX_EXTENTS = 100;
+
+ // large enough to ensure that seastore will allocate each write seperately
+ const uint64_t SKIP_STEP = 16 << 10;
+ auto &test_obj = get_object(make_oid(0));
+ bufferlist bl;
+ bl.append("foo");
+
+ test_obj.touch(*sharded_seastore);
+ for (uint64_t i = 0; i < MAX_EXTENTS; i++) {
+ test_obj.write(*sharded_seastore, SKIP_STEP * i, bl);
+ }
+
+ { // fiemap test from 0 to SKIP_STEP * (MAX_EXTENTS - 1) + 3
+ auto m = test_obj.fiemap(
+ *sharded_seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3);
+ ASSERT_EQ(m.size(), MAX_EXTENTS);
+ for (uint64_t i = 0; i < MAX_EXTENTS; i++) {
+ ASSERT_TRUE(m.count(SKIP_STEP * i));
+ ASSERT_GE(m[SKIP_STEP * i], bl.length());
+ }
+ }
+
+ { // fiemap test from SKIP_STEP to SKIP_STEP * (MAX_EXTENTS - 2) + 3
+ auto m = test_obj.fiemap(
+ *sharded_seastore, SKIP_STEP, SKIP_STEP * (MAX_EXTENTS - 3) + 3);
+ ASSERT_EQ(m.size(), MAX_EXTENTS - 2);
+ for (uint64_t i = 1; i < MAX_EXTENTS - 1; i++) {
+ ASSERT_TRUE(m.count(SKIP_STEP * i));
+ ASSERT_GE(m[SKIP_STEP * i], bl.length());
+ }
+ }
+
+ { // fiemap test SKIP_STEP + 1 to 2 * SKIP_STEP + 1 (partial overlap)
+ auto m = test_obj.fiemap(
+ *sharded_seastore, SKIP_STEP + 1, SKIP_STEP + 1);
+ ASSERT_EQ(m.size(), 2);
+ ASSERT_EQ(m.begin()->first, SKIP_STEP + 1);
+ ASSERT_GE(m.begin()->second, bl.length());
+ ASSERT_LE(m.rbegin()->first, (2 * SKIP_STEP) + 1);
+ ASSERT_EQ(m.rbegin()->first + m.rbegin()->second, 2 * SKIP_STEP + 2);
+ }
+
+ test_obj.remove(*sharded_seastore);
+ });
+}
+
+TEST_P(seastore_test_t, sparse_read)
+{
+ run_async([this] {
+ const uint64_t MAX_EXTENTS = 100;
+ const uint64_t SKIP_STEP = 16 << 10;
+ auto &test_obj = get_object(make_oid(0));
+ bufferlist wbl;
+ wbl.append("foo");
+
+ test_obj.touch(*sharded_seastore);
+ for (uint64_t i = 0; i < MAX_EXTENTS; i++) {
+ test_obj.write(*sharded_seastore, SKIP_STEP * i, wbl);
+ }
+ interval_set<uint64_t> m;
+ m = interval_set<uint64_t>(
+ test_obj.fiemap(*sharded_seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3));
+ ASSERT_TRUE(!m.empty());
+ uint64_t off = 0;
+ auto rbl = test_obj.readv(*sharded_seastore, m);
+
+ for (auto &&miter : m) {
+ bufferlist subl;
+ subl.substr_of(rbl, off, std::min(miter.second, uint64_t(wbl.length())));
+ ASSERT_TRUE(subl.contents_equal(wbl));
+ off += miter.second;
+ }
+ test_obj.remove(*sharded_seastore);
+ });
+}
+
+TEST_P(seastore_test_t, zero)
+{
+ run_async([this] {
+ auto test_zero = [this](
+ // [(off, len, repeat)]
+ std::vector<std::tuple<uint64_t, uint64_t, uint64_t>> writes,
+ uint64_t zero_off, uint64_t zero_len) {
+
+ // Test zero within a block
+ auto &test_obj = get_object(make_oid(0));
+ uint64_t size = 0;
+ for (auto &[off, len, repeat]: writes) {
+ for (decltype(repeat) i = 0; i < repeat; ++i) {
+ test_obj.write(*sharded_seastore, off + (len * repeat), len, 'a');
+ }
+ size = off + (len * (repeat + 1));
+ }
+ test_obj.read(
+ *sharded_seastore,
+ 0,
+ size);
+ test_obj.check_size(*sharded_seastore);
+ test_obj.zero(*sharded_seastore, zero_off, zero_len);
+ test_obj.read(
+ *sharded_seastore,
+ 0,
+ size);
+ test_obj.check_size(*sharded_seastore);
+ remove_object(test_obj);
+ };
+
+ const uint64_t BS = 4<<10;
+
+ // Test zero within a block
+ test_zero(
+ {{1<<10, 1<<10, 1}},
+ 1124, 200);
+
+ // Multiple writes, partial on left, partial on right.
+ test_zero(
+ {{BS, BS, 10}},
+ BS + 128,
+ BS * 4);
+
+ // Single large write, block boundary on right, partial on left.
+ test_zero(
+ {{BS, BS * 10, 1}},
+ BS + 128,
+ (BS * 4) - 128);
+
+ // Multiple writes, block boundary on left, partial on right.
+ test_zero(
+ {{BS, BS, 10}},
+ BS,
+ (BS * 4) + 128);
+ });
+}
+INSTANTIATE_TEST_SUITE_P(
+ seastore_test,
+ seastore_test_t,
+ ::testing::Values (
+ "segmented",
+ "circularbounded"
+ )
+);
diff --git a/src/test/crimson/seastore/test_seastore_cache.cc b/src/test/crimson/seastore/test_seastore_cache.cc
new file mode 100644
index 000000000..b249d27e4
--- /dev/null
+++ b/src/test/crimson/seastore/test_seastore_cache.cc
@@ -0,0 +1,260 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/segment_manager/ephemeral.h"
+
+#include "test/crimson/seastore/test_block.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+struct cache_test_t : public seastar_test_suite_t {
+ segment_manager::EphemeralSegmentManagerRef segment_manager;
+ ExtentPlacementManagerRef epm;
+ CacheRef cache;
+ paddr_t current;
+ journal_seq_t seq = JOURNAL_SEQ_MIN;
+
+ cache_test_t() = default;
+
+ seastar::future<paddr_t> submit_transaction(
+ TransactionRef t) {
+ auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL);
+
+ bufferlist bl;
+ for (auto &&block : record.extents) {
+ bl.append(block.bl);
+ }
+
+ ceph_assert((segment_off_t)bl.length() <
+ segment_manager->get_segment_size());
+ if (current.as_seg_paddr().get_segment_off() + (segment_off_t)bl.length() >
+ segment_manager->get_segment_size())
+ current = paddr_t::make_seg_paddr(
+ segment_id_t(
+ current.as_seg_paddr().get_segment_id().device_id(),
+ current.as_seg_paddr().get_segment_id().device_segment_id() + 1),
+ 0);
+
+ auto prev = current;
+ current.as_seg_paddr().set_segment_off(
+ current.as_seg_paddr().get_segment_off()
+ + bl.length());
+ return segment_manager->segment_write(
+ prev,
+ std::move(bl),
+ true
+ ).safe_then(
+ [this, prev, t=std::move(t)]() mutable {
+ cache->complete_commit(*t, prev, seq /* TODO */);
+ return prev;
+ },
+ crimson::ct_error::all_same_way([](auto e) {
+ ASSERT_FALSE("failed to submit");
+ })
+ );
+ }
+
+ auto get_transaction() {
+ return cache->create_transaction(
+ Transaction::src_t::MUTATE, "test_cache", false);
+ }
+
+ template <typename T, typename... Args>
+ auto get_extent(Transaction &t, Args&&... args) {
+ return with_trans_intr(
+ t,
+ [this](auto &&... args) {
+ return cache->get_extent<T>(args...);
+ },
+ std::forward<Args>(args)...);
+ }
+
+ seastar::future<> set_up_fut() final {
+ segment_manager = segment_manager::create_test_ephemeral();
+ return segment_manager->init(
+ ).safe_then([this] {
+ return segment_manager->mkfs(
+ segment_manager::get_ephemeral_device_config(0, 1, 0));
+ }).safe_then([this] {
+ epm.reset(new ExtentPlacementManager());
+ cache.reset(new Cache(*epm));
+ current = paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0);
+ epm->test_init_no_background(segment_manager.get());
+ return seastar::do_with(
+ get_transaction(),
+ [this](auto &ref_t) {
+ cache->init();
+ return with_trans_intr(*ref_t, [&](auto &t) {
+ return cache->mkfs(t);
+ }).safe_then([this, &ref_t] {
+ return submit_transaction(std::move(ref_t)
+ ).then([](auto p) {});
+ });
+ });
+ }).handle_error(
+ crimson::ct_error::all_same_way([](auto e) {
+ ASSERT_FALSE("failed to submit");
+ })
+ );
+ }
+
+ seastar::future<> tear_down_fut() final {
+ return cache->close(
+ ).safe_then([this] {
+ segment_manager.reset();
+ epm.reset();
+ cache.reset();
+ }).handle_error(
+ Cache::close_ertr::assert_all{}
+ );
+ }
+};
+
+TEST_F(cache_test_t, test_addr_fixup)
+{
+ run_async([this] {
+ paddr_t addr;
+ int csum = 0;
+ {
+ auto t = get_transaction();
+ auto extent = cache->alloc_new_extent<TestBlockPhysical>(
+ *t,
+ TestBlockPhysical::SIZE,
+ placement_hint_t::HOT,
+ 0);
+ extent->set_contents('c');
+ csum = extent->get_crc32c();
+ submit_transaction(std::move(t)).get0();
+ addr = extent->get_paddr();
+ }
+ {
+ auto t = get_transaction();
+ auto extent = get_extent<TestBlockPhysical>(
+ *t,
+ addr,
+ TestBlockPhysical::SIZE).unsafe_get0();
+ ASSERT_EQ(extent->get_paddr(), addr);
+ ASSERT_EQ(extent->get_crc32c(), csum);
+ }
+ });
+}
+
+TEST_F(cache_test_t, test_dirty_extent)
+{
+ run_async([this] {
+ paddr_t addr;
+ int csum = 0;
+ int csum2 = 0;
+ {
+ // write out initial test block
+ auto t = get_transaction();
+ auto extent = cache->alloc_new_extent<TestBlockPhysical>(
+ *t,
+ TestBlockPhysical::SIZE,
+ placement_hint_t::HOT,
+ 0);
+ extent->set_contents('c');
+ csum = extent->get_crc32c();
+ auto reladdr = extent->get_paddr();
+ ASSERT_TRUE(reladdr.is_relative());
+ {
+ // test that read with same transaction sees new block though
+ // uncommitted
+ auto extent = get_extent<TestBlockPhysical>(
+ *t,
+ reladdr,
+ TestBlockPhysical::SIZE).unsafe_get0();
+ ASSERT_TRUE(extent->is_clean());
+ ASSERT_TRUE(extent->is_pending());
+ ASSERT_TRUE(extent->get_paddr().is_relative());
+ ASSERT_EQ(extent->get_version(), 0);
+ ASSERT_EQ(csum, extent->get_crc32c());
+ }
+ submit_transaction(std::move(t)).get0();
+ addr = extent->get_paddr();
+ }
+ {
+ // test that consecutive reads on the same extent get the same ref
+ auto t = get_transaction();
+ auto extent = get_extent<TestBlockPhysical>(
+ *t,
+ addr,
+ TestBlockPhysical::SIZE).unsafe_get0();
+ auto t2 = get_transaction();
+ auto extent2 = get_extent<TestBlockPhysical>(
+ *t2,
+ addr,
+ TestBlockPhysical::SIZE).unsafe_get0();
+ ASSERT_EQ(&*extent, &*extent2);
+ }
+ {
+ // read back test block
+ auto t = get_transaction();
+ auto extent = get_extent<TestBlockPhysical>(
+ *t,
+ addr,
+ TestBlockPhysical::SIZE).unsafe_get0();
+ // duplicate and reset contents
+ extent = cache->duplicate_for_write(*t, extent)->cast<TestBlockPhysical>();
+ extent->set_contents('c');
+ csum2 = extent->get_crc32c();
+ ASSERT_EQ(extent->get_paddr(), addr);
+ {
+ // test that concurrent read with fresh transaction sees old
+ // block
+ auto t2 = get_transaction();
+ auto extent = get_extent<TestBlockPhysical>(
+ *t2,
+ addr,
+ TestBlockPhysical::SIZE).unsafe_get0();
+ ASSERT_TRUE(extent->is_clean());
+ ASSERT_FALSE(extent->is_pending());
+ ASSERT_EQ(addr, extent->get_paddr());
+ ASSERT_EQ(extent->get_version(), 0);
+ ASSERT_EQ(csum, extent->get_crc32c());
+ }
+ {
+ // test that read with same transaction sees new block
+ auto extent = get_extent<TestBlockPhysical>(
+ *t,
+ addr,
+ TestBlockPhysical::SIZE).unsafe_get0();
+ ASSERT_TRUE(extent->is_dirty());
+ ASSERT_TRUE(extent->is_pending());
+ ASSERT_EQ(addr, extent->get_paddr());
+ ASSERT_EQ(extent->get_version(), 1);
+ ASSERT_EQ(csum2, extent->get_crc32c());
+ }
+ // submit transaction
+ submit_transaction(std::move(t)).get0();
+ ASSERT_TRUE(extent->is_dirty());
+ ASSERT_EQ(addr, extent->get_paddr());
+ ASSERT_EQ(extent->get_version(), 1);
+ ASSERT_EQ(extent->get_crc32c(), csum2);
+ }
+ {
+ // test that fresh transaction now sees newly dirty block
+ auto t = get_transaction();
+ auto extent = get_extent<TestBlockPhysical>(
+ *t,
+ addr,
+ TestBlockPhysical::SIZE).unsafe_get0();
+ ASSERT_TRUE(extent->is_dirty());
+ ASSERT_EQ(addr, extent->get_paddr());
+ ASSERT_EQ(extent->get_version(), 1);
+ ASSERT_EQ(csum2, extent->get_crc32c());
+ }
+ });
+}
diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc
new file mode 100644
index 000000000..46ec723a3
--- /dev/null
+++ b/src/test/crimson/seastore/test_seastore_journal.cc
@@ -0,0 +1,343 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/gtest_seastar.h"
+
+#include <random>
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/async_cleaner.h"
+#include "crimson/os/seastore/journal.h"
+#include "crimson/os/seastore/segment_manager/ephemeral.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+struct record_validator_t {
+ record_t record;
+ paddr_t record_final_offset;
+
+ template <typename... T>
+ record_validator_t(T&&... record) : record(std::forward<T>(record)...) {}
+
+ void validate(SegmentManager &manager) {
+ paddr_t addr = make_record_relative_paddr(0);
+ for (auto &&block : record.extents) {
+ auto test = manager.read(
+ record_final_offset.add_relative(addr),
+ block.bl.length()).unsafe_get0();
+ addr = addr.add_offset(block.bl.length());
+ bufferlist bl;
+ bl.push_back(test);
+ ASSERT_EQ(
+ bl.length(),
+ block.bl.length());
+ ASSERT_EQ(
+ bl.begin().crc32c(bl.length(), 1),
+ block.bl.begin().crc32c(block.bl.length(), 1));
+ }
+ }
+
+ auto get_replay_handler() {
+ auto checker = [this, iter=record.deltas.begin()] (
+ paddr_t base,
+ const delta_info_t &di) mutable {
+ EXPECT_EQ(base, record_final_offset);
+ ceph_assert(iter != record.deltas.end());
+ EXPECT_EQ(di, *iter++);
+ EXPECT_EQ(base, record_final_offset);
+ return iter != record.deltas.end();
+ };
+ if (record.deltas.size()) {
+ return std::make_optional(std::move(checker));
+ } else {
+ return std::optional<decltype(checker)>();
+ }
+ }
+};
+
+struct journal_test_t : seastar_test_suite_t, SegmentProvider, JournalTrimmer {
+ segment_manager::EphemeralSegmentManagerRef segment_manager;
+ WritePipeline pipeline;
+ JournalRef journal;
+
+ std::vector<record_validator_t> records;
+
+ std::default_random_engine generator;
+
+ extent_len_t block_size;
+
+ SegmentManagerGroupRef sms;
+
+ segment_id_t next;
+
+ std::map<segment_id_t, segment_seq_t> segment_seqs;
+ std::map<segment_id_t, segment_type_t> segment_types;
+
+ journal_seq_t dummy_tail;
+
+ mutable segment_info_t tmp_info;
+
+ journal_test_t() = default;
+
+ /*
+ * JournalTrimmer interfaces
+ */
+ journal_seq_t get_journal_head() const final { return dummy_tail; }
+
+ void set_journal_head(journal_seq_t) final {}
+
+ journal_seq_t get_dirty_tail() const final { return dummy_tail; }
+
+ journal_seq_t get_alloc_tail() const final { return dummy_tail; }
+
+ void update_journal_tails(journal_seq_t, journal_seq_t) final {}
+
+ bool try_reserve_inline_usage(std::size_t) final { return true; }
+
+ void release_inline_usage(std::size_t) final {}
+
+ std::size_t get_trim_size_per_cycle() const final {
+ return 0;
+ }
+
+ /*
+ * SegmentProvider interfaces
+ */
+ const segment_info_t& get_seg_info(segment_id_t id) const final {
+ tmp_info = {};
+ tmp_info.seq = segment_seqs.at(id);
+ tmp_info.type = segment_types.at(id);
+ return tmp_info;
+ }
+
+ segment_id_t allocate_segment(
+ segment_seq_t seq,
+ segment_type_t type,
+ data_category_t,
+ rewrite_gen_t
+ ) final {
+ auto ret = next;
+ next = segment_id_t{
+ segment_manager->get_device_id(),
+ next.device_segment_id() + 1};
+ segment_seqs[ret] = seq;
+ segment_types[ret] = type;
+ return ret;
+ }
+
+ void close_segment(segment_id_t) final {}
+
+ void update_segment_avail_bytes(segment_type_t, paddr_t) final {}
+
+ void update_modify_time(segment_id_t, sea_time_point, std::size_t) final {}
+
+ SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); }
+
+ seastar::future<> set_up_fut() final {
+ segment_manager = segment_manager::create_test_ephemeral();
+ return segment_manager->init(
+ ).safe_then([this] {
+ return segment_manager->mkfs(
+ segment_manager::get_ephemeral_device_config(0, 1, 0));
+ }).safe_then([this] {
+ block_size = segment_manager->get_block_size();
+ sms.reset(new SegmentManagerGroup());
+ next = segment_id_t(segment_manager->get_device_id(), 0);
+ journal = journal::make_segmented(*this, *this);
+ journal->set_write_pipeline(&pipeline);
+ sms->add_segment_manager(segment_manager.get());
+ return journal->open_for_mkfs();
+ }).safe_then([this](auto) {
+ dummy_tail = journal_seq_t{0,
+ paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0)};
+ }, crimson::ct_error::all_same_way([] {
+ ASSERT_FALSE("Unable to mount");
+ }));
+ }
+
+ seastar::future<> tear_down_fut() final {
+ return journal->close(
+ ).safe_then([this] {
+ segment_manager.reset();
+ sms.reset();
+ journal.reset();
+ }).handle_error(
+ crimson::ct_error::all_same_way([](auto e) {
+ ASSERT_FALSE("Unable to close");
+ })
+ );
+ }
+
+ template <typename T>
+ auto replay(T &&f) {
+ return journal->close(
+ ).safe_then([this, f=std::move(f)]() mutable {
+ journal = journal::make_segmented(*this, *this);
+ journal->set_write_pipeline(&pipeline);
+ return journal->replay(std::forward<T>(std::move(f)));
+ }).safe_then([this] {
+ return journal->open_for_mount();
+ });
+ }
+
+ auto replay_and_check() {
+ auto record_iter = records.begin();
+ decltype(record_iter->get_replay_handler()) delta_checker = std::nullopt;
+ auto advance = [this, &record_iter, &delta_checker] {
+ ceph_assert(!delta_checker);
+ while (record_iter != records.end()) {
+ auto checker = record_iter->get_replay_handler();
+ record_iter++;
+ if (checker) {
+ delta_checker.emplace(std::move(*checker));
+ break;
+ }
+ }
+ };
+ advance();
+ replay(
+ [&advance,
+ &delta_checker]
+ (const auto &offsets,
+ const auto &di,
+ const journal_seq_t &,
+ const journal_seq_t &,
+ auto t) mutable {
+ if (!delta_checker) {
+ EXPECT_FALSE("No Deltas Left");
+ }
+ if (!(*delta_checker)(offsets.record_block_base, di)) {
+ delta_checker = std::nullopt;
+ advance();
+ }
+ return Journal::replay_ertr::make_ready_future<bool>(true);
+ }).unsafe_get0();
+ ASSERT_EQ(record_iter, records.end());
+ for (auto &i : records) {
+ i.validate(*segment_manager);
+ }
+ }
+
+ template <typename... T>
+ auto submit_record(T&&... _record) {
+ auto record{std::forward<T>(_record)...};
+ records.push_back(record);
+ OrderingHandle handle = get_dummy_ordering_handle();
+ auto [addr, _] = journal->submit_record(
+ std::move(record),
+ handle).unsafe_get0();
+ records.back().record_final_offset = addr;
+ return addr;
+ }
+
+ extent_t generate_extent(size_t blocks) {
+ std::uniform_int_distribution<char> distribution(
+ std::numeric_limits<char>::min(),
+ std::numeric_limits<char>::max()
+ );
+ char contents = distribution(generator);
+ bufferlist bl;
+ bl.append(buffer::ptr(buffer::create(blocks * block_size, contents)));
+ return extent_t{
+ extent_types_t::TEST_BLOCK,
+ L_ADDR_NULL,
+ bl};
+ }
+
+ delta_info_t generate_delta(size_t bytes) {
+ std::uniform_int_distribution<char> distribution(
+ std::numeric_limits<char>::min(),
+ std::numeric_limits<char>::max()
+ );
+ char contents = distribution(generator);
+ bufferlist bl;
+ bl.append(buffer::ptr(buffer::create(bytes, contents)));
+ return delta_info_t{
+ extent_types_t::TEST_BLOCK,
+ paddr_t{},
+ L_ADDR_NULL,
+ 0, 0,
+ block_size,
+ 1,
+ MAX_SEG_SEQ,
+ segment_type_t::NULL_SEG,
+ bl
+ };
+ }
+};
+
+TEST_F(journal_test_t, replay_one_journal_segment)
+{
+ run_async([this] {
+ submit_record(record_t{
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(23), generate_delta(30) }
+ });
+ replay_and_check();
+ });
+}
+
+TEST_F(journal_test_t, replay_two_records)
+{
+ run_async([this] {
+ submit_record(record_t{
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(23), generate_delta(30) }
+ });
+ submit_record(record_t{
+ { generate_extent(4), generate_extent(1) },
+ { generate_delta(23), generate_delta(400) }
+ });
+ replay_and_check();
+ });
+}
+
+TEST_F(journal_test_t, replay_twice)
+{
+ run_async([this] {
+ submit_record(record_t{
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(23), generate_delta(30) }
+ });
+ submit_record(record_t{
+ { generate_extent(4), generate_extent(1) },
+ { generate_delta(23), generate_delta(400) }
+ });
+ replay_and_check();
+ submit_record(record_t{
+ { generate_extent(2), generate_extent(5) },
+ { generate_delta(230), generate_delta(40) }
+ });
+ replay_and_check();
+ });
+}
+
+TEST_F(journal_test_t, roll_journal_and_replay)
+{
+ run_async([this] {
+ paddr_t current = submit_record(
+ record_t{
+ { generate_extent(1), generate_extent(2) },
+ { generate_delta(23), generate_delta(30) }
+ });
+ auto starting_segment = current.as_seg_paddr().get_segment_id();
+ unsigned so_far = 0;
+ while (current.as_seg_paddr().get_segment_id() == starting_segment) {
+ current = submit_record(record_t{
+ { generate_extent(512), generate_extent(512) },
+ { generate_delta(23), generate_delta(400) }
+ });
+ ++so_far;
+ ASSERT_FALSE(so_far > 10);
+ }
+ replay_and_check();
+ });
+}
diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc
new file mode 100644
index 000000000..1148884a0
--- /dev/null
+++ b/src/test/crimson/seastore/test_transaction_manager.cc
@@ -0,0 +1,1995 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <random>
+
+#include <boost/iterator/counting_iterator.hpp>
+
+#include "test/crimson/gtest_seastar.h"
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/segment_manager/ephemeral.h"
+#include "crimson/os/seastore/segment_manager.h"
+
+#include "test/crimson/seastore/test_block.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+struct test_extent_record_t {
+ test_extent_desc_t desc;
+ unsigned refcount = 0;
+ test_extent_record_t() = default;
+ test_extent_record_t(
+ const test_extent_desc_t &desc,
+ unsigned refcount) : desc(desc), refcount(refcount) {}
+
+ void update(const test_extent_desc_t &to) {
+ desc = to;
+ }
+
+ bool operator==(const test_extent_desc_t &rhs) const {
+ return desc == rhs;
+ }
+ bool operator!=(const test_extent_desc_t &rhs) const {
+ return desc != rhs;
+ }
+};
+
+template<>
+struct fmt::formatter<test_extent_record_t> : fmt::formatter<std::string_view> {
+ template <typename FormatContext>
+ auto format(const test_extent_record_t& r, FormatContext& ctx) const {
+ return fmt::format_to(ctx.out(), "test_extent_record_t({}, refcount={})",
+ r.desc, r.refcount);
+ }
+};
+
+struct transaction_manager_test_t :
+ public seastar_test_suite_t,
+ TMTestState {
+
+ std::random_device rd;
+ std::mt19937 gen;
+
+ transaction_manager_test_t(std::size_t num_main_devices, std::size_t num_cold_devices)
+ : TMTestState(num_main_devices, num_cold_devices), gen(rd()) {
+ }
+
+ laddr_t get_random_laddr(size_t block_size, laddr_t limit) {
+ return block_size *
+ std::uniform_int_distribution<>(0, (limit / block_size) - 1)(gen);
+ }
+
+ char get_random_contents() {
+ return static_cast<char>(std::uniform_int_distribution<>(0, 255)(gen));
+ }
+
+ seastar::future<> set_up_fut() final {
+ return tm_setup();
+ }
+
+ seastar::future<> tear_down_fut() final {
+ return tm_teardown();
+ }
+
+ struct test_extents_t : std::map<laddr_t, test_extent_record_t> {
+ using delta_t = std::map<laddr_t, std::optional<test_extent_record_t>>;
+ std::map<laddr_t, uint64_t> laddr_write_seq;
+
+ struct delta_overlay_t {
+ const test_extents_t &extents;
+ const delta_t &delta;
+
+ delta_overlay_t(
+ const test_extents_t &extents,
+ const delta_t &delta)
+ : extents(extents), delta(delta) {}
+
+
+ class iterator {
+ friend class test_extents_t;
+
+ const delta_overlay_t &parent;
+ test_extents_t::const_iterator biter;
+ delta_t::const_iterator oiter;
+ std::optional<std::pair<laddr_t, test_extent_record_t>> cur;
+
+ iterator(
+ const delta_overlay_t &parent,
+ test_extents_t::const_iterator biter,
+ delta_t::const_iterator oiter)
+ : parent(parent), biter(biter), oiter(oiter) {}
+
+ laddr_t get_bkey() {
+ return biter == parent.extents.end() ? L_ADDR_MAX : biter->first;
+ }
+
+ laddr_t get_okey() {
+ return oiter == parent.delta.end() ? L_ADDR_MAX : oiter->first;
+ }
+
+ bool is_end() {
+ return oiter == parent.delta.end() && biter == parent.extents.end();
+ }
+
+ bool is_valid() {
+ return is_end() ||
+ ((get_okey() < get_bkey()) && (oiter->second)) ||
+ (get_okey() > get_bkey());
+ }
+
+ auto get_pair() {
+ assert(is_valid());
+ assert(!is_end());
+ auto okey = get_okey();
+ auto bkey = get_bkey();
+ return (
+ bkey < okey ?
+ std::pair<laddr_t, test_extent_record_t>(*biter) :
+ std::make_pair(okey, *(oiter->second)));
+ }
+
+ void adjust() {
+ while (!is_valid()) {
+ if (get_okey() < get_bkey()) {
+ assert(!oiter->second);
+ ++oiter;
+ } else {
+ assert(get_okey() == get_bkey());
+ ++biter;
+ }
+ }
+ assert(is_valid());
+ if (!is_end()) {
+ cur = get_pair();
+ } else {
+ cur = std::nullopt;
+ }
+ }
+
+ public:
+ iterator(const iterator &) = default;
+ iterator(iterator &&) = default;
+
+ iterator &operator++() {
+ assert(is_valid());
+ assert(!is_end());
+ if (get_bkey() < get_okey()) {
+ ++biter;
+ } else {
+ ++oiter;
+ }
+ adjust();
+ return *this;
+ }
+
+ bool operator==(const iterator &o) const {
+ return o.biter == biter && o.oiter == oiter;
+ }
+ bool operator!=(const iterator &o) const {
+ return !(*this == o);
+ }
+
+ auto operator*() {
+ assert(!is_end());
+ return *cur;
+ }
+ auto operator->() {
+ assert(!is_end());
+ return &*cur;
+ }
+ };
+
+ iterator begin() {
+ auto ret = iterator{*this, extents.begin(), delta.begin()};
+ ret.adjust();
+ return ret;
+ }
+
+ iterator end() {
+ auto ret = iterator{*this, extents.end(), delta.end()};
+ // adjust unnecessary
+ return ret;
+ }
+
+ iterator lower_bound(laddr_t l) {
+ auto ret = iterator{*this, extents.lower_bound(l), delta.lower_bound(l)};
+ ret.adjust();
+ return ret;
+ }
+
+ iterator upper_bound(laddr_t l) {
+ auto ret = iterator{*this, extents.upper_bound(l), delta.upper_bound(l)};
+ ret.adjust();
+ return ret;
+ }
+
+ iterator find(laddr_t l) {
+ auto ret = lower_bound(l);
+ if (ret == end() || ret->first != l) {
+ return end();
+ } else {
+ return ret;
+ }
+ }
+ };
+ private:
+ void check_available(
+ laddr_t addr, extent_len_t len, const delta_t &delta
+ ) const {
+ delta_overlay_t overlay(*this, delta);
+ for (const auto &i: overlay) {
+ if (i.first < addr) {
+ EXPECT_FALSE(i.first + i.second.desc.len > addr);
+ } else {
+ EXPECT_FALSE(addr + len > i.first);
+ }
+ }
+ }
+
+ void check_hint(
+ laddr_t hint,
+ laddr_t addr,
+ extent_len_t len,
+ delta_t &delta) const {
+ delta_overlay_t overlay(*this, delta);
+ auto iter = overlay.lower_bound(hint);
+ laddr_t last = hint;
+ while (true) {
+ if (iter == overlay.end() || iter->first > addr) {
+ EXPECT_EQ(addr, last);
+ break;
+ }
+ EXPECT_FALSE(iter->first - last > len);
+ last = iter->first + iter->second.desc.len;
+ ++iter;
+ }
+ }
+
+ std::optional<test_extent_record_t> &populate_delta(
+ laddr_t addr, delta_t &delta, const test_extent_desc_t *desc) const {
+ auto diter = delta.find(addr);
+ if (diter != delta.end())
+ return diter->second;
+
+ auto iter = find(addr);
+ if (iter == end()) {
+ assert(desc);
+ auto ret = delta.emplace(
+ std::make_pair(addr, test_extent_record_t{*desc, 0}));
+ assert(ret.second);
+ return ret.first->second;
+ } else {
+ auto ret = delta.emplace(*iter);
+ assert(ret.second);
+ return ret.first->second;
+ }
+ }
+ public:
+ delta_overlay_t get_overlay(const delta_t &delta) const {
+ return delta_overlay_t{*this, delta};
+ }
+
+ void insert(TestBlock &extent, delta_t &delta) const {
+ check_available(extent.get_laddr(), extent.get_length(), delta);
+ delta[extent.get_laddr()] =
+ test_extent_record_t{extent.get_desc(), 1};
+ }
+
+ void alloced(laddr_t hint, TestBlock &extent, delta_t &delta) const {
+ check_hint(hint, extent.get_laddr(), extent.get_length(), delta);
+ insert(extent, delta);
+ }
+
+ bool contains(laddr_t addr, const delta_t &delta) const {
+ delta_overlay_t overlay(*this, delta);
+ return overlay.find(addr) != overlay.end();
+ }
+
+ test_extent_record_t get(laddr_t addr, const delta_t &delta) const {
+ delta_overlay_t overlay(*this, delta);
+ auto iter = overlay.find(addr);
+ assert(iter != overlay.end());
+ return iter->second;
+ }
+
+ void update(
+ laddr_t addr,
+ const test_extent_desc_t &desc,
+ delta_t &delta) const {
+ auto &rec = populate_delta(addr, delta, &desc);
+ assert(rec);
+ rec->desc = desc;
+ }
+
+ int inc_ref(
+ laddr_t addr,
+ delta_t &delta) const {
+ auto &rec = populate_delta(addr, delta, nullptr);
+ assert(rec);
+ return ++rec->refcount;
+ }
+
+ int dec_ref(
+ laddr_t addr,
+ delta_t &delta) const {
+ auto &rec = populate_delta(addr, delta, nullptr);
+ assert(rec);
+ assert(rec->refcount > 0);
+ rec->refcount--;
+ if (rec->refcount == 0) {
+ delta[addr] = std::nullopt;
+ return 0;
+ } else {
+ return rec->refcount;
+ }
+ }
+
+ void consume(const delta_t &delta, const uint64_t write_seq = 0) {
+ for (const auto &i : delta) {
+ if (i.second) {
+ if (laddr_write_seq.find(i.first) == laddr_write_seq.end() ||
+ laddr_write_seq[i.first] <= write_seq) {
+ (*this)[i.first] = *i.second;
+ laddr_write_seq[i.first] = write_seq;
+ }
+ } else {
+ erase(i.first);
+ }
+ }
+ }
+
+ } test_mappings;
+
+ struct test_transaction_t {
+ TransactionRef t;
+ test_extents_t::delta_t mapping_delta;
+ };
+
+ test_transaction_t create_transaction() {
+ return { create_mutate_transaction(), {} };
+ }
+
+ test_transaction_t create_read_test_transaction() {
+ return {create_read_transaction(), {} };
+ }
+
+ test_transaction_t create_weak_test_transaction() {
+ return { create_weak_transaction(), {} };
+ }
+
+ TestBlockRef alloc_extent(
+ test_transaction_t &t,
+ laddr_t hint,
+ extent_len_t len,
+ char contents) {
+ auto extent = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->alloc_extent<TestBlock>(trans, hint, len);
+ }).unsafe_get0();
+ extent->set_contents(contents);
+ EXPECT_FALSE(test_mappings.contains(extent->get_laddr(), t.mapping_delta));
+ EXPECT_EQ(len, extent->get_length());
+ test_mappings.alloced(hint, *extent, t.mapping_delta);
+ return extent;
+ }
+
+ TestBlockRef alloc_extent(
+ test_transaction_t &t,
+ laddr_t hint,
+ extent_len_t len) {
+ return alloc_extent(
+ t,
+ hint,
+ len,
+ get_random_contents());
+ }
+
+ bool check_usage() {
+ return epm->check_usage();
+ }
+
+ void replay() {
+ EXPECT_TRUE(check_usage());
+ restart();
+ }
+
+ void check() {
+ check_mappings();
+ check_usage();
+ }
+
+ void check_mappings() {
+ auto t = create_weak_test_transaction();
+ check_mappings(t);
+ }
+
+ TestBlockRef get_extent(
+ test_transaction_t &t,
+ laddr_t addr,
+ extent_len_t len) {
+ ceph_assert(test_mappings.contains(addr, t.mapping_delta));
+ ceph_assert(test_mappings.get(addr, t.mapping_delta).desc.len == len);
+
+ auto ext = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->read_extent<TestBlock>(trans, addr, len);
+ }).unsafe_get0();
+ EXPECT_EQ(addr, ext->get_laddr());
+ return ext;
+ }
+
+ TestBlockRef try_get_extent(
+ test_transaction_t &t,
+ laddr_t addr) {
+ ceph_assert(test_mappings.contains(addr, t.mapping_delta));
+
+ using ertr = with_trans_ertr<TransactionManager::read_extent_iertr>;
+ using ret = ertr::future<TestBlockRef>;
+ auto ext = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->read_extent<TestBlock>(trans, addr);
+ }).safe_then([](auto ext) -> ret {
+ return ertr::make_ready_future<TestBlockRef>(ext);
+ }).handle_error(
+ [](const crimson::ct_error::eagain &e) {
+ return seastar::make_ready_future<TestBlockRef>();
+ },
+ crimson::ct_error::assert_all{
+ "get_extent got invalid error"
+ }
+ ).get0();
+ if (ext) {
+ EXPECT_EQ(addr, ext->get_laddr());
+ }
+ return ext;
+ }
+
+ TestBlockRef try_get_extent(
+ test_transaction_t &t,
+ laddr_t addr,
+ extent_len_t len) {
+ ceph_assert(test_mappings.contains(addr, t.mapping_delta));
+ ceph_assert(test_mappings.get(addr, t.mapping_delta).desc.len == len);
+
+ using ertr = with_trans_ertr<TransactionManager::read_extent_iertr>;
+ using ret = ertr::future<TestBlockRef>;
+ auto ext = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->read_extent<TestBlock>(trans, addr, len);
+ }).safe_then([](auto ext) -> ret {
+ return ertr::make_ready_future<TestBlockRef>(ext);
+ }).handle_error(
+ [](const crimson::ct_error::eagain &e) {
+ return seastar::make_ready_future<TestBlockRef>();
+ },
+ crimson::ct_error::assert_all{
+ "get_extent got invalid error"
+ }
+ ).get0();
+ if (ext) {
+ EXPECT_EQ(addr, ext->get_laddr());
+ }
+ return ext;
+ }
+
+ TestBlockRef try_read_pin(
+ test_transaction_t &t,
+ LBAMappingRef &&pin) {
+ using ertr = with_trans_ertr<TransactionManager::base_iertr>;
+ using ret = ertr::future<TestBlockRef>;
+ auto addr = pin->get_key();
+ auto ext = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->read_pin<TestBlock>(trans, std::move(pin));
+ }).safe_then([](auto ext) -> ret {
+ return ertr::make_ready_future<TestBlockRef>(ext);
+ }).handle_error(
+ [](const crimson::ct_error::eagain &e) {
+ return seastar::make_ready_future<TestBlockRef>();
+ },
+ crimson::ct_error::assert_all{
+ "read_pin got invalid error"
+ }
+ ).get0();
+ if (ext) {
+ EXPECT_EQ(addr, ext->get_laddr());
+ }
+ if (t.t->is_conflicted()) {
+ return nullptr;
+ }
+ return ext;
+ }
+
+ test_block_mutator_t mutator;
+ TestBlockRef mutate_extent(
+ test_transaction_t &t,
+ TestBlockRef ref) {
+ ceph_assert(test_mappings.contains(ref->get_laddr(), t.mapping_delta));
+ ceph_assert(
+ test_mappings.get(ref->get_laddr(), t.mapping_delta).desc.len ==
+ ref->get_length());
+
+ auto ext = tm->get_mutable_extent(*t.t, ref)->cast<TestBlock>();
+ EXPECT_EQ(ext->get_laddr(), ref->get_laddr());
+ EXPECT_EQ(ext->get_desc(), ref->get_desc());
+ mutator.mutate(*ext, gen);
+
+ test_mappings.update(ext->get_laddr(), ext->get_desc(), t.mapping_delta);
+ return ext;
+ }
+
+ TestBlockRef mutate_addr(
+ test_transaction_t &t,
+ laddr_t offset,
+ size_t length) {
+ auto ext = get_extent(t, offset, length);
+ mutate_extent(t, ext);
+ return ext;
+ }
+
+ LBAMappingRef get_pin(
+ test_transaction_t &t,
+ laddr_t offset) {
+ ceph_assert(test_mappings.contains(offset, t.mapping_delta));
+ auto pin = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->get_pin(trans, offset);
+ }).unsafe_get0();
+ EXPECT_EQ(offset, pin->get_key());
+ return pin;
+ }
+
+ LBAMappingRef try_get_pin(
+ test_transaction_t &t,
+ laddr_t offset) {
+ ceph_assert(test_mappings.contains(offset, t.mapping_delta));
+ using ertr = with_trans_ertr<TransactionManager::get_pin_iertr>;
+ using ret = ertr::future<LBAMappingRef>;
+ auto pin = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->get_pin(trans, offset);
+ }).safe_then([](auto pin) -> ret {
+ return ertr::make_ready_future<LBAMappingRef>(std::move(pin));
+ }).handle_error(
+ [](const crimson::ct_error::eagain &e) {
+ return seastar::make_ready_future<LBAMappingRef>();
+ },
+ crimson::ct_error::assert_all{
+ "get_extent got invalid error"
+ }
+ ).get0();
+ if (pin) {
+ EXPECT_EQ(offset, pin->get_key());
+ }
+ return pin;
+ }
+
+ void inc_ref(test_transaction_t &t, laddr_t offset) {
+ ceph_assert(test_mappings.contains(offset, t.mapping_delta));
+ ceph_assert(test_mappings.get(offset, t.mapping_delta).refcount > 0);
+
+ auto refcnt = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->inc_ref(trans, offset);
+ }).unsafe_get0();
+ auto check_refcnt = test_mappings.inc_ref(offset, t.mapping_delta);
+ EXPECT_EQ(refcnt, check_refcnt);
+ }
+
+ void dec_ref(test_transaction_t &t, laddr_t offset) {
+ ceph_assert(test_mappings.contains(offset, t.mapping_delta));
+ ceph_assert(test_mappings.get(offset, t.mapping_delta).refcount > 0);
+
+ auto refcnt = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->dec_ref(trans, offset);
+ }).unsafe_get0();
+ auto check_refcnt = test_mappings.dec_ref(offset, t.mapping_delta);
+ EXPECT_EQ(refcnt, check_refcnt);
+ if (refcnt == 0)
+ logger().debug("dec_ref: {} at refcount 0", offset);
+ }
+
+ void check_mappings(test_transaction_t &t) {
+ auto overlay = test_mappings.get_overlay(t.mapping_delta);
+ for (const auto &i: overlay) {
+ logger().debug("check_mappings: {}->{}", i.first, i.second);
+ auto ext = get_extent(t, i.first, i.second.desc.len);
+ EXPECT_EQ(i.second, ext->get_desc());
+ }
+ with_trans_intr(
+ *t.t,
+ [this, &overlay](auto &t) {
+ return lba_manager->scan_mappings(
+ t,
+ 0,
+ L_ADDR_MAX,
+ [iter=overlay.begin(), &overlay](auto l, auto p, auto len) mutable {
+ EXPECT_NE(iter, overlay.end());
+ logger().debug(
+ "check_mappings: scan {}",
+ l);
+ EXPECT_EQ(l, iter->first);
+ ++iter;
+ });
+ }).unsafe_get0();
+ (void)with_trans_intr(
+ *t.t,
+ [=, this](auto &t) {
+ return lba_manager->check_child_trackers(t);
+ }).unsafe_get0();
+ }
+
+ bool try_submit_transaction(test_transaction_t t) {
+ using ertr = with_trans_ertr<TransactionManager::submit_transaction_iertr>;
+ using ret = ertr::future<bool>;
+ uint64_t write_seq = 0;
+ bool success = submit_transaction_fut_with_seq(*t.t
+ ).safe_then([&write_seq](auto seq) -> ret {
+ write_seq = seq;
+ return ertr::make_ready_future<bool>(true);
+ }).handle_error(
+ [](const crimson::ct_error::eagain &e) {
+ return seastar::make_ready_future<bool>(false);
+ },
+ crimson::ct_error::assert_all{
+ "try_submit_transaction hit invalid error"
+ }
+ ).then([this](auto ret) {
+ return epm->run_background_work_until_halt(
+ ).then([ret] { return ret; });
+ }).get0();
+
+ if (success) {
+ test_mappings.consume(t.mapping_delta, write_seq);
+ }
+
+ return success;
+ }
+
+ void submit_transaction(test_transaction_t &&t) {
+ bool success = try_submit_transaction(std::move(t));
+ EXPECT_TRUE(success);
+ }
+
+ void submit_transaction_expect_conflict(test_transaction_t &&t) {
+ bool success = try_submit_transaction(std::move(t));
+ EXPECT_FALSE(success);
+ }
+
+ auto allocate_sequentially(const size_t size, const int num, bool run_clean = true) {
+ return repeat_eagain([this, size, num] {
+ return seastar::do_with(
+ create_transaction(),
+ [this, size, num](auto &t) {
+ return with_trans_intr(
+ *t.t,
+ [&t, this, size, num](auto &) {
+ return trans_intr::do_for_each(
+ boost::make_counting_iterator(0),
+ boost::make_counting_iterator(num),
+ [&t, this, size](auto) {
+ return tm->alloc_extent<TestBlock>(
+ *(t.t), L_ADDR_MIN, size
+ ).si_then([&t, this, size](auto extent) {
+ extent->set_contents(get_random_contents());
+ EXPECT_FALSE(
+ test_mappings.contains(extent->get_laddr(), t.mapping_delta));
+ EXPECT_EQ(size, extent->get_length());
+ test_mappings.alloced(extent->get_laddr(), *extent, t.mapping_delta);
+ return seastar::now();
+ });
+ }).si_then([&t, this] {
+ return tm->submit_transaction(*t.t);
+ });
+ }).safe_then([&t, this] {
+ test_mappings.consume(t.mapping_delta);
+ });
+ });
+ }).safe_then([this, run_clean]() {
+ if (run_clean) {
+ return epm->run_background_work_until_halt();
+ } else {
+ return epm->background_process.trimmer->trim();
+ }
+ }).handle_error(
+ crimson::ct_error::assert_all{
+ "Invalid error in SeaStore::list_collections"
+ }
+ );
+ }
+
+ void test_parallel_extent_read() {
+ constexpr size_t TOTAL = 4<<20;
+ constexpr size_t BSIZE = 4<<10;
+ constexpr size_t BLOCKS = TOTAL / BSIZE;
+ run_async([this] {
+ for (unsigned i = 0; i < BLOCKS; ++i) {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ i * BSIZE,
+ BSIZE);
+ ASSERT_EQ(i * BSIZE, extent->get_laddr());
+ submit_transaction(std::move(t));
+ }
+
+ seastar::do_with(
+ create_read_test_transaction(),
+ [this](auto &t) {
+ return with_trans_intr(*(t.t), [this](auto &t) {
+ return trans_intr::parallel_for_each(
+ boost::make_counting_iterator(0lu),
+ boost::make_counting_iterator(BLOCKS),
+ [this, &t](auto i) {
+ return tm->read_extent<TestBlock>(t, i * BSIZE, BSIZE
+ ).si_then([](auto) {
+ return seastar::now();
+ });
+ });
+ });
+ }).unsafe_get0();
+ });
+ }
+
+ void test_random_writes_concurrent() {
+ constexpr unsigned WRITE_STREAMS = 256;
+
+ constexpr size_t TOTAL = 4<<20;
+ constexpr size_t BSIZE = 4<<10;
+ constexpr size_t BLOCKS = TOTAL / BSIZE;
+ run_async([this] {
+ std::for_each(
+ boost::make_counting_iterator(0u),
+ boost::make_counting_iterator(WRITE_STREAMS),
+ [&](auto idx) {
+ for (unsigned i = idx; i < BLOCKS; i += WRITE_STREAMS) {
+ while (true) {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ i * BSIZE,
+ BSIZE);
+ ASSERT_EQ(i * BSIZE, extent->get_laddr());
+ if (try_submit_transaction(std::move(t)))
+ break;
+ }
+ }
+ });
+
+ int writes = 0;
+ unsigned failures = 0;
+ seastar::parallel_for_each(
+ boost::make_counting_iterator(0u),
+ boost::make_counting_iterator(WRITE_STREAMS),
+ [&](auto) {
+ return seastar::async([&] {
+ while (writes < 300) {
+ auto t = create_transaction();
+ auto ext = try_get_extent(
+ t,
+ get_random_laddr(BSIZE, TOTAL),
+ BSIZE);
+ if (!ext){
+ failures++;
+ continue;
+ }
+ auto mut = mutate_extent(t, ext);
+ auto success = try_submit_transaction(std::move(t));
+ writes += success;
+ failures += !success;
+ }
+ });
+ }).get0();
+ replay();
+ logger().info("random_writes_concurrent: checking");
+ check();
+ logger().info(
+ "random_writes_concurrent: {} suceeded, {} failed",
+ writes,
+ failures
+ );
+ });
+ }
+
+ void test_evict() {
+ // only support segmented backend currently
+ ASSERT_EQ(epm->get_main_backend_type(), backend_type_t::SEGMENTED);
+ ASSERT_TRUE(epm->background_process.has_cold_tier());
+ constexpr size_t device_size =
+ segment_manager::DEFAULT_TEST_EPHEMERAL.size;
+ constexpr size_t block_size =
+ segment_manager::DEFAULT_TEST_EPHEMERAL.block_size;
+ constexpr size_t segment_size =
+ segment_manager::DEFAULT_TEST_EPHEMERAL.segment_size;
+ ASSERT_GE(segment_size, block_size * 20);
+
+ run_async([this] {
+ // indicates there is no available segments to reclaim
+ double stop_ratio = (double)segment_size / (double)device_size / 2;
+ // 1 segment
+ double default_ratio = stop_ratio * 2;
+ // 1.25 segment
+ double fast_ratio = stop_ratio * 2.5;
+
+ epm->background_process
+ .eviction_state
+ .init(stop_ratio, default_ratio, fast_ratio);
+
+ // these variables are described in
+ // EPM::BackgroundProcess::eviction_state_t::maybe_update_eviction_mode
+ size_t ratio_A_size = segment_size / 2 - block_size * 10;
+ size_t ratio_B_size = segment_size / 2 + block_size * 10;
+ size_t ratio_C_size = segment_size + block_size;
+ size_t ratio_D_size = segment_size * 1.25 + block_size;
+
+ auto run_until = [this](size_t size) -> seastar::future<> {
+ return seastar::repeat([this, size] {
+ size_t current_size = epm->background_process
+ .main_cleaner->get_stat().data_stored;
+ if (current_size >= size) {
+ return seastar::futurize_invoke([] {
+ return seastar::stop_iteration::yes;
+ });
+ } else {
+ int num = (size - current_size) / block_size;
+ return seastar::do_for_each(
+ boost::make_counting_iterator(0),
+ boost::make_counting_iterator(num),
+ [this](auto) {
+ // don't start background process to test the behavior
+ // of generation changes during alloc new extents
+ return allocate_sequentially(block_size, 1, false);
+ }).then([] {
+ return seastar::stop_iteration::no;
+ });
+ }
+ });
+ };
+
+ std::vector<extent_types_t> all_extent_types{
+ extent_types_t::ROOT,
+ extent_types_t::LADDR_INTERNAL,
+ extent_types_t::LADDR_LEAF,
+ extent_types_t::OMAP_INNER,
+ extent_types_t::OMAP_LEAF,
+ extent_types_t::ONODE_BLOCK_STAGED,
+ extent_types_t::COLL_BLOCK,
+ extent_types_t::OBJECT_DATA_BLOCK,
+ extent_types_t::RETIRED_PLACEHOLDER,
+ extent_types_t::ALLOC_INFO,
+ extent_types_t::JOURNAL_TAIL,
+ extent_types_t::TEST_BLOCK,
+ extent_types_t::TEST_BLOCK_PHYSICAL,
+ extent_types_t::BACKREF_INTERNAL,
+ extent_types_t::BACKREF_LEAF
+ };
+
+ std::vector<rewrite_gen_t> all_generations;
+ for (auto i = INIT_GENERATION; i < REWRITE_GENERATIONS; i++) {
+ all_generations.push_back(i);
+ }
+
+ // input target-generation -> expected generation after the adjustment
+ using generation_mapping_t = std::map<rewrite_gen_t, rewrite_gen_t>;
+ std::map<extent_types_t, generation_mapping_t> expected_generations;
+
+ // this loop should be consistent with EPM::adjust_generation
+ for (auto t : all_extent_types) {
+ expected_generations[t] = {};
+ if (!is_logical_type(t)) {
+ for (auto gen : all_generations) {
+ expected_generations[t][gen] = INLINE_GENERATION;
+ }
+ } else {
+ if (get_extent_category(t) == data_category_t::METADATA) {
+ expected_generations[t][INIT_GENERATION] = INLINE_GENERATION;
+ } else {
+ expected_generations[t][INIT_GENERATION] = OOL_GENERATION;
+ }
+
+ for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) {
+ expected_generations[t][i] = i;
+ }
+ }
+ }
+
+ auto update_data_gen_mapping = [&](std::function<rewrite_gen_t(rewrite_gen_t)> func) {
+ for (auto t : all_extent_types) {
+ if (!is_logical_type(t)) {
+ continue;
+ }
+ for (auto i = INIT_GENERATION + 1; i < REWRITE_GENERATIONS; i++) {
+ expected_generations[t][i] = func(i);
+ }
+ }
+ // since background process didn't start in allocate_sequentially
+ // we update eviction mode manually.
+ epm->background_process.maybe_update_eviction_mode();
+ };
+
+ auto test_gen = [&](const char *caller) {
+ for (auto t : all_extent_types) {
+ for (auto gen : all_generations) {
+ auto epm_gen = epm->adjust_generation(
+ get_extent_category(t),
+ t,
+ placement_hint_t::HOT,
+ gen);
+ if (expected_generations[t][gen] != epm_gen) {
+ logger().error("caller: {}, extent type: {}, input generation: {}, "
+ "expected generation : {}, adjust result from EPM: {}",
+ caller, t, gen, expected_generations[t][gen], epm_gen);
+ }
+ EXPECT_EQ(expected_generations[t][gen], epm_gen);
+ }
+ }
+ };
+
+ // verify that no data should go to the cold tier
+ update_data_gen_mapping([](rewrite_gen_t gen) -> rewrite_gen_t {
+ if (gen == MIN_COLD_GENERATION) {
+ return MIN_COLD_GENERATION - 1;
+ } else {
+ return gen;
+ }
+ });
+ test_gen("init");
+
+ run_until(ratio_A_size).get();
+ EXPECT_TRUE(epm->background_process.eviction_state.is_stop_mode());
+ test_gen("exceed ratio A");
+ epm->run_background_work_until_halt().get();
+
+ run_until(ratio_B_size).get();
+ EXPECT_TRUE(epm->background_process.eviction_state.is_stop_mode());
+ test_gen("exceed ratio B");
+ epm->run_background_work_until_halt().get();
+
+ // verify that data may go to the cold tier
+ run_until(ratio_C_size).get();
+ update_data_gen_mapping([](rewrite_gen_t gen) { return gen; });
+ EXPECT_TRUE(epm->background_process.eviction_state.is_default_mode());
+ test_gen("exceed ratio C");
+ epm->run_background_work_until_halt().get();
+
+ // verify that data must go to the cold tier
+ run_until(ratio_D_size).get();
+ update_data_gen_mapping([](rewrite_gen_t gen) {
+ if (gen >= MIN_REWRITE_GENERATION && gen < MIN_COLD_GENERATION) {
+ return MIN_COLD_GENERATION;
+ } else {
+ return gen;
+ }
+ });
+ EXPECT_TRUE(epm->background_process.eviction_state.is_fast_mode());
+ test_gen("exceed ratio D");
+
+ auto main_size = epm->background_process.main_cleaner->get_stat().data_stored;
+ auto cold_size = epm->background_process.cold_cleaner->get_stat().data_stored;
+ EXPECT_EQ(cold_size, 0);
+ epm->run_background_work_until_halt().get();
+ auto new_main_size = epm->background_process.main_cleaner->get_stat().data_stored;
+ auto new_cold_size = epm->background_process.cold_cleaner->get_stat().data_stored;
+ EXPECT_GE(main_size, new_main_size);
+ EXPECT_NE(new_cold_size, 0);
+
+ update_data_gen_mapping([](rewrite_gen_t gen) { return gen; });
+ EXPECT_TRUE(epm->background_process.eviction_state.is_default_mode());
+ test_gen("finish evict");
+ });
+ }
+
+ using remap_entry = TransactionManager::remap_entry;
+ LBAMappingRef remap_pin(
+ test_transaction_t &t,
+ LBAMappingRef &&opin,
+ extent_len_t new_offset,
+ extent_len_t new_len) {
+ if (t.t->is_conflicted()) {
+ return nullptr;
+ }
+ auto o_laddr = opin->get_key();
+ auto pin = with_trans_intr(*(t.t), [&](auto& trans) {
+ return tm->remap_pin<TestBlock>(
+ trans, std::move(opin), std::array{
+ remap_entry(new_offset, new_len)}
+ ).si_then([](auto ret) {
+ return std::move(ret[0]);
+ });
+ }).handle_error(crimson::ct_error::eagain::handle([] {
+ LBAMappingRef t = nullptr;
+ return t;
+ }), crimson::ct_error::pass_further_all{}).unsafe_get0();
+ if (t.t->is_conflicted()) {
+ return nullptr;
+ }
+ test_mappings.dec_ref(o_laddr, t.mapping_delta);
+ EXPECT_FALSE(test_mappings.contains(o_laddr, t.mapping_delta));
+ EXPECT_TRUE(pin);
+ EXPECT_EQ(pin->get_length(), new_len);
+ EXPECT_EQ(pin->get_key(), o_laddr + new_offset);
+
+ auto extent = try_read_pin(t, pin->duplicate());
+ if (extent) {
+ test_mappings.alloced(pin->get_key(), *extent, t.mapping_delta);
+ EXPECT_TRUE(extent->is_exist_clean());
+ } else {
+ ceph_assert(t.t->is_conflicted());
+ return nullptr;
+ }
+ return pin;
+ }
+
+ using _overwrite_pin_iertr = TransactionManager::get_pin_iertr;
+ using _overwrite_pin_ret = _overwrite_pin_iertr::future<
+ std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>;
+ _overwrite_pin_ret _overwrite_pin(
+ Transaction &t,
+ LBAMappingRef &&opin,
+ extent_len_t new_offset,
+ extent_len_t new_len,
+ ceph::bufferlist &bl) {
+ auto o_laddr = opin->get_key();
+ auto o_len = opin->get_length();
+ if (new_offset != 0 && o_len != new_offset + new_len) {
+ return tm->remap_pin<TestBlock, 2>(
+ t,
+ std::move(opin),
+ std::array{
+ remap_entry(
+ 0,
+ new_offset),
+ remap_entry(
+ new_offset + new_len,
+ o_len - new_offset - new_len)
+ }
+ ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) {
+ return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len
+ ).si_then([this, ret = std::move(ret), new_len,
+ new_offset, o_laddr, &t, &bl](auto ext) mutable {
+ ceph_assert(ret.size() == 2);
+ auto iter = bl.cbegin();
+ iter.copy(new_len, ext->get_bptr().c_str());
+ auto r_laddr = o_laddr + new_offset + new_len;
+ // old pins expired after alloc new extent, need to get it.
+ return tm->get_pin(t, o_laddr
+ ).si_then([this, &t, ext = std::move(ext), r_laddr](auto lpin) mutable {
+ return tm->get_pin(t, r_laddr
+ ).si_then([lpin = std::move(lpin), ext = std::move(ext)]
+ (auto rpin) mutable {
+ return _overwrite_pin_iertr::make_ready_future<
+ std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>(
+ std::make_tuple(
+ std::move(lpin), std::move(ext), std::move(rpin)));
+ });
+ });
+ });
+ });
+ } else if (new_offset == 0 && o_len != new_offset + new_len) {
+ return tm->remap_pin<TestBlock, 1>(
+ t,
+ std::move(opin),
+ std::array{
+ remap_entry(
+ new_offset + new_len,
+ o_len - new_offset - new_len)
+ }
+ ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) {
+ return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len
+ ).si_then([this, ret = std::move(ret), new_offset, new_len,
+ o_laddr, &t, &bl](auto ext) mutable {
+ ceph_assert(ret.size() == 1);
+ auto iter = bl.cbegin();
+ iter.copy(new_len, ext->get_bptr().c_str());
+ auto r_laddr = o_laddr + new_offset + new_len;
+ return tm->get_pin(t, r_laddr
+ ).si_then([ext = std::move(ext)](auto rpin) mutable {
+ return _overwrite_pin_iertr::make_ready_future<
+ std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>(
+ std::make_tuple(
+ nullptr, std::move(ext), std::move(rpin)));
+ });
+ });
+ });
+ } else if (new_offset != 0 && o_len == new_offset + new_len) {
+ return tm->remap_pin<TestBlock, 1>(
+ t,
+ std::move(opin),
+ std::array{
+ remap_entry(
+ 0,
+ new_offset)
+ }
+ ).si_then([this, new_offset, new_len, o_laddr, &t, &bl](auto ret) {
+ return tm->alloc_extent<TestBlock>(t, o_laddr + new_offset, new_len
+ ).si_then([this, ret = std::move(ret), new_len, o_laddr, &t, &bl]
+ (auto ext) mutable {
+ ceph_assert(ret.size() == 1);
+ auto iter = bl.cbegin();
+ iter.copy(new_len, ext->get_bptr().c_str());
+ return tm->get_pin(t, o_laddr
+ ).si_then([ext = std::move(ext)](auto lpin) mutable {
+ return _overwrite_pin_iertr::make_ready_future<
+ std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>(
+ std::make_tuple(
+ std::move(lpin), std::move(ext), nullptr));
+ });
+ });
+ });
+ } else {
+ ceph_abort("impossible");
+ return _overwrite_pin_iertr::make_ready_future<
+ std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>>(
+ std::make_tuple(nullptr, nullptr, nullptr));
+ }
+ }
+
+ using overwrite_pin_ret = std::tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>;
+ overwrite_pin_ret overwrite_pin(
+ test_transaction_t &t,
+ LBAMappingRef &&opin,
+ extent_len_t new_offset,
+ extent_len_t new_len,
+ ceph::bufferlist &bl) {
+ if (t.t->is_conflicted()) {
+ return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>(
+ nullptr, nullptr, nullptr);
+ }
+ auto o_laddr = opin->get_key();
+ auto o_paddr = opin->get_val();
+ auto o_len = opin->get_length();
+ auto res = with_trans_intr(*(t.t), [&](auto& trans) {
+ return _overwrite_pin(
+ trans, std::move(opin), new_offset, new_len, bl);
+ }).handle_error(crimson::ct_error::eagain::handle([] {
+ return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>(
+ nullptr, nullptr, nullptr);
+ }), crimson::ct_error::pass_further_all{}).unsafe_get0();
+ if (t.t->is_conflicted()) {
+ return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>(
+ nullptr, nullptr, nullptr);
+ }
+ test_mappings.dec_ref(o_laddr, t.mapping_delta);
+ EXPECT_FALSE(test_mappings.contains(o_laddr, t.mapping_delta));
+ auto &[lpin, ext, rpin] = res;
+
+ EXPECT_TRUE(ext);
+ EXPECT_TRUE(lpin || rpin);
+ EXPECT_TRUE(o_len > ext->get_length());
+ if (lpin) {
+ EXPECT_EQ(lpin->get_key(), o_laddr);
+ EXPECT_EQ(lpin->get_val(), o_paddr);
+ EXPECT_EQ(lpin->get_length(), new_offset);
+ auto lext = try_read_pin(t, lpin->duplicate());
+ if (lext) {
+ test_mappings.alloced(lpin->get_key(), *lext, t.mapping_delta);
+ EXPECT_TRUE(lext->is_exist_clean());
+ } else {
+ ceph_assert(t.t->is_conflicted());
+ return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>(
+ nullptr, nullptr, nullptr);
+ }
+ }
+ EXPECT_EQ(ext->get_laddr(), o_laddr + new_offset);
+ EXPECT_EQ(ext->get_length(), new_len);
+ test_mappings.alloced(ext->get_laddr(), *ext, t.mapping_delta);
+ if (rpin) {
+ EXPECT_EQ(rpin->get_key(), o_laddr + new_offset + new_len);
+ EXPECT_EQ(rpin->get_val(), o_paddr.add_offset(new_offset)
+ .add_offset(new_len));
+ EXPECT_EQ(rpin->get_length(), o_len - new_offset - new_len);
+ auto rext = try_read_pin(t, rpin->duplicate());
+ if (rext) {
+ test_mappings.alloced(rpin->get_key(), *rext, t.mapping_delta);
+ EXPECT_TRUE(rext->is_exist_clean());
+ } else {
+ ceph_assert(t.t->is_conflicted());
+ return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>(
+ nullptr, nullptr, nullptr);
+ }
+ }
+ return std::make_tuple<LBAMappingRef, TestBlockRef, LBAMappingRef>(
+ std::move(lpin), std::move(ext), std::move(rpin));
+ }
+
+ void test_remap_pin() {
+ run_async([this] {
+ constexpr size_t l_offset = 32 << 10;
+ constexpr size_t l_len = 32 << 10;
+ constexpr size_t r_offset = 64 << 10;
+ constexpr size_t r_len = 32 << 10;
+ {
+ auto t = create_transaction();
+ auto lext = alloc_extent(t, l_offset, l_len);
+ lext->set_contents('l', 0, 16 << 10);
+ auto rext = alloc_extent(t, r_offset, r_len);
+ rext->set_contents('r', 16 << 10, 16 << 10);
+ submit_transaction(std::move(t));
+ }
+ {
+ auto t = create_transaction();
+ auto lpin = get_pin(t, l_offset);
+ auto rpin = get_pin(t, r_offset);
+ //split left
+ auto pin1 = remap_pin(t, std::move(lpin), 0, 16 << 10);
+ ASSERT_TRUE(pin1);
+ auto pin2 = remap_pin(t, std::move(pin1), 0, 8 << 10);
+ ASSERT_TRUE(pin2);
+ auto pin3 = remap_pin(t, std::move(pin2), 0, 4 << 10);
+ ASSERT_TRUE(pin3);
+ auto lext = get_extent(t, pin3->get_key(), pin3->get_length());
+ EXPECT_EQ('l', lext->get_bptr().c_str()[0]);
+ auto mlext = mutate_extent(t, lext);
+ ASSERT_TRUE(mlext->is_exist_mutation_pending());
+ ASSERT_TRUE(mlext.get() == lext.get());
+
+ //split right
+ auto pin4 = remap_pin(t, std::move(rpin), 16 << 10, 16 << 10);
+ ASSERT_TRUE(pin4);
+ auto pin5 = remap_pin(t, std::move(pin4), 8 << 10, 8 << 10);
+ ASSERT_TRUE(pin5);
+ auto pin6 = remap_pin(t, std::move(pin5), 4 << 10, 4 << 10);
+ ASSERT_TRUE(pin6);
+ auto rext = get_extent(t, pin6->get_key(), pin6->get_length());
+ EXPECT_EQ('r', rext->get_bptr().c_str()[0]);
+ auto mrext = mutate_extent(t, rext);
+ ASSERT_TRUE(mrext->is_exist_mutation_pending());
+ ASSERT_TRUE(mrext.get() == rext.get());
+
+ submit_transaction(std::move(t));
+ check();
+ }
+ replay();
+ check();
+ });
+ }
+
+ void test_overwrite_pin() {
+ run_async([this] {
+ constexpr size_t m_offset = 8 << 10;
+ constexpr size_t m_len = 56 << 10;
+ constexpr size_t l_offset = 64 << 10;
+ constexpr size_t l_len = 64 << 10;
+ constexpr size_t r_offset = 128 << 10;
+ constexpr size_t r_len = 64 << 10;
+ {
+ auto t = create_transaction();
+ auto m_ext = alloc_extent(t, m_offset, m_len);
+ m_ext->set_contents('a', 0 << 10, 8 << 10);
+ m_ext->set_contents('b', 16 << 10, 4 << 10);
+ m_ext->set_contents('c', 36 << 10, 4 << 10);
+ m_ext->set_contents('d', 52 << 10, 4 << 10);
+
+ auto l_ext = alloc_extent(t, l_offset, l_len);
+ auto r_ext = alloc_extent(t, r_offset, r_len);
+ submit_transaction(std::move(t));
+ }
+ {
+ auto t = create_transaction();
+ auto mpin = get_pin(t, m_offset);
+ auto lpin = get_pin(t, l_offset);
+ auto rpin = get_pin(t, r_offset);
+
+ bufferlist mbl1, mbl2, mbl3;
+ mbl1.append(ceph::bufferptr(ceph::buffer::create(8 << 10, 0)));
+ mbl2.append(ceph::bufferptr(ceph::buffer::create(16 << 10, 0)));
+ mbl3.append(ceph::bufferptr(ceph::buffer::create(12 << 10, 0)));
+ auto [mlp1, mext1, mrp1] = overwrite_pin(
+ t, std::move(mpin), 8 << 10 , 8 << 10, mbl1);
+ auto [mlp2, mext2, mrp2] = overwrite_pin(
+ t, std::move(mrp1), 4 << 10 , 16 << 10, mbl2);
+ auto [mlpin3, me3, mrpin3] = overwrite_pin(
+ t, std::move(mrp2), 4 << 10 , 12 << 10, mbl3);
+ auto mlext1 = get_extent(t, mlp1->get_key(), mlp1->get_length());
+ auto mlext2 = get_extent(t, mlp2->get_key(), mlp2->get_length());
+ auto mlext3 = get_extent(t, mlpin3->get_key(), mlpin3->get_length());
+ auto mrext3 = get_extent(t, mrpin3->get_key(), mrpin3->get_length());
+ EXPECT_EQ('a', mlext1->get_bptr().c_str()[0]);
+ EXPECT_EQ('b', mlext2->get_bptr().c_str()[0]);
+ EXPECT_EQ('c', mlext3->get_bptr().c_str()[0]);
+ EXPECT_EQ('d', mrext3->get_bptr().c_str()[0]);
+ auto mutate_mlext1 = mutate_extent(t, mlext1);
+ auto mutate_mlext2 = mutate_extent(t, mlext2);
+ auto mutate_mlext3 = mutate_extent(t, mlext3);
+ auto mutate_mrext3 = mutate_extent(t, mrext3);
+ ASSERT_TRUE(mutate_mlext1->is_exist_mutation_pending());
+ ASSERT_TRUE(mutate_mlext2->is_exist_mutation_pending());
+ ASSERT_TRUE(mutate_mlext3->is_exist_mutation_pending());
+ ASSERT_TRUE(mutate_mrext3->is_exist_mutation_pending());
+ ASSERT_TRUE(mutate_mlext1.get() == mlext1.get());
+ ASSERT_TRUE(mutate_mlext2.get() == mlext2.get());
+ ASSERT_TRUE(mutate_mlext3.get() == mlext3.get());
+ ASSERT_TRUE(mutate_mrext3.get() == mrext3.get());
+
+ bufferlist lbl1, rbl1;
+ lbl1.append(ceph::bufferptr(ceph::buffer::create(32 << 10, 0)));
+ auto [llp1, lext1, lrp1] = overwrite_pin(
+ t, std::move(lpin), 0 , 32 << 10, lbl1);
+ EXPECT_FALSE(llp1);
+ EXPECT_TRUE(lrp1);
+ EXPECT_TRUE(lext1);
+
+ rbl1.append(ceph::bufferptr(ceph::buffer::create(32 << 10, 0)));
+ auto [rlp1, rext1, rrp1] = overwrite_pin(
+ t, std::move(rpin), 32 << 10 , 32 << 10, rbl1);
+ EXPECT_TRUE(rlp1);
+ EXPECT_TRUE(rext1);
+ EXPECT_FALSE(rrp1);
+
+ submit_transaction(std::move(t));
+ check();
+ }
+ replay();
+ check();
+ });
+ }
+
+ void test_remap_pin_concurrent() {
+ run_async([this] {
+ constexpr unsigned REMAP_NUM = 32;
+ constexpr size_t offset = 0;
+ constexpr size_t length = 256 << 10;
+ {
+ auto t = create_transaction();
+ auto extent = alloc_extent(t, offset, length);
+ ASSERT_EQ(length, extent->get_length());
+ submit_transaction(std::move(t));
+ }
+ int success = 0;
+ int early_exit = 0;
+ int conflicted = 0;
+
+ seastar::parallel_for_each(
+ boost::make_counting_iterator(0u),
+ boost::make_counting_iterator(REMAP_NUM),
+ [&](auto) {
+ return seastar::async([&] {
+ uint32_t pieces = std::uniform_int_distribution<>(6, 31)(gen);
+ std::set<uint32_t> split_points;
+ for (uint32_t i = 0; i < pieces; i++) {
+ auto p = std::uniform_int_distribution<>(1, 256)(gen);
+ split_points.insert(p - p % 4);
+ }
+
+ auto t = create_transaction();
+ auto pin0 = try_get_pin(t, offset);
+ if (!pin0 || pin0->get_length() != length) {
+ early_exit++;
+ return;
+ }
+
+ auto last_pin = pin0->duplicate();
+ ASSERT_TRUE(!split_points.empty());
+ for (auto off : split_points) {
+ if (off == 0 || off >= 255) {
+ continue;
+ }
+ auto new_off = (off << 10) - last_pin->get_key();
+ auto new_len = last_pin->get_length() - new_off;
+ //always remap right extent at new split_point
+ auto pin = remap_pin(t, std::move(last_pin), new_off, new_len);
+ if (!pin) {
+ conflicted++;
+ return;
+ }
+ last_pin = pin->duplicate();
+ }
+ auto last_ext = try_get_extent(t, last_pin->get_key());
+ if (last_ext) {
+ auto last_ext1 = mutate_extent(t, last_ext);
+ ASSERT_TRUE(last_ext1->is_exist_mutation_pending());
+ } else {
+ conflicted++;
+ return;
+ }
+
+ if (try_submit_transaction(std::move(t))) {
+ success++;
+ logger().info("transaction {} submit the transction",
+ static_cast<void*>(t.t.get()));
+ } else {
+ conflicted++;
+ }
+ });
+ }).handle_exception([](std::exception_ptr e) {
+ logger().info("{}", e);
+ }).get0();
+ logger().info("test_remap_pin_concurrent: "
+ "early_exit {} conflicted {} success {}",
+ early_exit, conflicted, success);
+ ASSERT_TRUE(success == 1);
+ ASSERT_EQ(success + conflicted + early_exit, REMAP_NUM);
+ replay();
+ check();
+ });
+ }
+
+ void test_overwrite_pin_concurrent() {
+ run_async([this] {
+ constexpr unsigned REMAP_NUM = 32;
+ constexpr size_t offset = 0;
+ constexpr size_t length = 256 << 10;
+ {
+ auto t = create_transaction();
+ auto extent = alloc_extent(t, offset, length);
+ ASSERT_EQ(length, extent->get_length());
+ submit_transaction(std::move(t));
+ }
+ int success = 0;
+ int early_exit = 0;
+ int conflicted = 0;
+
+ seastar::parallel_for_each(
+ boost::make_counting_iterator(0u),
+ boost::make_counting_iterator(REMAP_NUM),
+ [&](auto) {
+ return seastar::async([&] {
+ uint32_t pieces = std::uniform_int_distribution<>(6, 31)(gen);
+ if (pieces % 2 == 1) {
+ pieces++;
+ }
+ std::list<uint32_t> split_points;
+ for (uint32_t i = 0; i < pieces; i++) {
+ auto p = std::uniform_int_distribution<>(1, 120)(gen);
+ split_points.push_back(p - p % 4);
+ }
+ split_points.sort();
+
+ auto t = create_transaction();
+ auto pin0 = try_get_pin(t, offset);
+ if (!pin0 || pin0->get_length() != length) {
+ early_exit++;
+ return;
+ }
+
+ auto empty_transaction = true;
+ auto last_rpin = pin0->duplicate();
+ ASSERT_TRUE(!split_points.empty());
+ while(!split_points.empty()) {
+ // new overwrite area: start_off ~ end_off
+ auto start_off = split_points.front();
+ split_points.pop_front();
+ auto end_off = split_points.front();
+ split_points.pop_front();
+ ASSERT_TRUE(start_off <= end_off);
+ if (((end_off << 10) == pin0->get_key() + pin0->get_length())
+ || (start_off == end_off)) {
+ if (split_points.empty() && empty_transaction) {
+ early_exit++;
+ return;
+ }
+ continue;
+ }
+ empty_transaction = false;
+ auto new_off = (start_off << 10) - last_rpin->get_key();
+ auto new_len = (end_off - start_off) << 10;
+ bufferlist bl;
+ bl.append(ceph::bufferptr(ceph::buffer::create(new_len, 0)));
+ auto [lpin, ext, rpin] = overwrite_pin(
+ t, last_rpin->duplicate(), new_off, new_len, bl);
+ if (!ext) {
+ conflicted++;
+ return;
+ }
+ // lpin is nullptr might not cause by confliction,
+ // it might just not exist.
+ if (lpin) {
+ auto lext = try_get_extent(t, lpin->get_key());
+ if (!lext) {
+ conflicted++;
+ return;
+ }
+ if (get_random_contents() % 2 == 0) {
+ auto lext1 = mutate_extent(t, lext);
+ ASSERT_TRUE(lext1->is_exist_mutation_pending());
+ }
+ }
+ ASSERT_TRUE(rpin);
+ last_rpin = rpin->duplicate();
+ }
+ auto last_rext = try_get_extent(t, last_rpin->get_key());
+ if (!last_rext) {
+ conflicted++;
+ return;
+ }
+ if (get_random_contents() % 2 == 0) {
+ auto last_rext1 = mutate_extent(t, last_rext);
+ ASSERT_TRUE(last_rext1->is_exist_mutation_pending());
+ }
+
+ if (try_submit_transaction(std::move(t))) {
+ success++;
+ logger().info("transaction {} submit the transction",
+ static_cast<void*>(t.t.get()));
+ } else {
+ conflicted++;
+ }
+ });
+ }).handle_exception([](std::exception_ptr e) {
+ logger().info("{}", e);
+ }).get0();
+ logger().info("test_overwrite_pin_concurrent: "
+ "early_exit {} conflicted {} success {}",
+ early_exit, conflicted, success);
+ ASSERT_TRUE(success == 1 || early_exit == REMAP_NUM);
+ ASSERT_EQ(success + conflicted + early_exit, REMAP_NUM);
+ replay();
+ check();
+ });
+ }
+};
+
+struct tm_single_device_test_t :
+ public transaction_manager_test_t {
+
+ tm_single_device_test_t() : transaction_manager_test_t(1, 0) {}
+};
+
+struct tm_multi_device_test_t :
+ public transaction_manager_test_t {
+
+ tm_multi_device_test_t() : transaction_manager_test_t(3, 0) {}
+};
+
+struct tm_multi_tier_device_test_t :
+ public transaction_manager_test_t {
+
+ tm_multi_tier_device_test_t() : transaction_manager_test_t(1, 2) {}
+};
+
+TEST_P(tm_single_device_test_t, basic)
+{
+ constexpr laddr_t SIZE = 4096;
+ run_async([this] {
+ constexpr laddr_t ADDR = 0xFF * SIZE;
+ {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ ADDR,
+ SIZE,
+ 'a');
+ ASSERT_EQ(ADDR, extent->get_laddr());
+ check_mappings(t);
+ check();
+ submit_transaction(std::move(t));
+ check();
+ }
+ });
+}
+
+TEST_P(tm_single_device_test_t, mutate)
+{
+ constexpr laddr_t SIZE = 4096;
+ run_async([this] {
+ constexpr laddr_t ADDR = 0xFF * SIZE;
+ {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ ADDR,
+ SIZE,
+ 'a');
+ ASSERT_EQ(ADDR, extent->get_laddr());
+ check_mappings(t);
+ check();
+ submit_transaction(std::move(t));
+ check();
+ }
+ ASSERT_TRUE(check_usage());
+ replay();
+ {
+ auto t = create_transaction();
+ auto ext = get_extent(
+ t,
+ ADDR,
+ SIZE);
+ auto mut = mutate_extent(t, ext);
+ check_mappings(t);
+ check();
+ submit_transaction(std::move(t));
+ check();
+ }
+ ASSERT_TRUE(check_usage());
+ replay();
+ check();
+ });
+}
+
+TEST_P(tm_single_device_test_t, allocate_lba_conflict)
+{
+ constexpr laddr_t SIZE = 4096;
+ run_async([this] {
+ constexpr laddr_t ADDR = 0xFF * SIZE;
+ constexpr laddr_t ADDR2 = 0xFE * SIZE;
+ auto t = create_transaction();
+ auto t2 = create_transaction();
+
+ // These should conflict as they should both modify the lba root
+ auto extent = alloc_extent(
+ t,
+ ADDR,
+ SIZE,
+ 'a');
+ ASSERT_EQ(ADDR, extent->get_laddr());
+ check_mappings(t);
+ check();
+
+ auto extent2 = alloc_extent(
+ t2,
+ ADDR2,
+ SIZE,
+ 'a');
+ ASSERT_EQ(ADDR2, extent2->get_laddr());
+ check_mappings(t2);
+ extent2.reset();
+
+ submit_transaction(std::move(t2));
+ submit_transaction_expect_conflict(std::move(t));
+ });
+}
+
+TEST_P(tm_single_device_test_t, mutate_lba_conflict)
+{
+ constexpr laddr_t SIZE = 4096;
+ run_async([this] {
+ {
+ auto t = create_transaction();
+ for (unsigned i = 0; i < 300; ++i) {
+ auto extent = alloc_extent(
+ t,
+ laddr_t(i * SIZE),
+ SIZE);
+ }
+ check_mappings(t);
+ submit_transaction(std::move(t));
+ check();
+ }
+
+ constexpr laddr_t ADDR = 150 * SIZE;
+ {
+ auto t = create_transaction();
+ auto t2 = create_transaction();
+
+ mutate_addr(t, ADDR, SIZE);
+ mutate_addr(t2, ADDR, SIZE);
+
+ submit_transaction(std::move(t));
+ submit_transaction_expect_conflict(std::move(t2));
+ }
+ check();
+
+ {
+ auto t = create_transaction();
+ mutate_addr(t, ADDR, SIZE);
+ submit_transaction(std::move(t));
+ }
+ check();
+ });
+}
+
+TEST_P(tm_single_device_test_t, concurrent_mutate_lba_no_conflict)
+{
+ constexpr laddr_t SIZE = 4096;
+ constexpr size_t NUM = 500;
+ constexpr laddr_t addr = 0;
+ constexpr laddr_t addr2 = SIZE * (NUM - 1);
+ run_async([this] {
+ {
+ auto t = create_transaction();
+ for (unsigned i = 0; i < NUM; ++i) {
+ auto extent = alloc_extent(
+ t,
+ laddr_t(i * SIZE),
+ SIZE);
+ }
+ submit_transaction(std::move(t));
+ }
+
+ {
+ auto t = create_transaction();
+ auto t2 = create_transaction();
+
+ mutate_addr(t, addr, SIZE);
+ mutate_addr(t2, addr2, SIZE);
+
+ submit_transaction(std::move(t));
+ submit_transaction(std::move(t2));
+ }
+ check();
+ });
+}
+
+TEST_P(tm_single_device_test_t, create_remove_same_transaction)
+{
+ constexpr laddr_t SIZE = 4096;
+ run_async([this] {
+ constexpr laddr_t ADDR = 0xFF * SIZE;
+ {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ ADDR,
+ SIZE,
+ 'a');
+ ASSERT_EQ(ADDR, extent->get_laddr());
+ check_mappings(t);
+ dec_ref(t, ADDR);
+ check_mappings(t);
+
+ extent = alloc_extent(
+ t,
+ ADDR,
+ SIZE,
+ 'a');
+
+ submit_transaction(std::move(t));
+ check();
+ }
+ replay();
+ check();
+ });
+}
+
+TEST_P(tm_single_device_test_t, split_merge_read_same_transaction)
+{
+ constexpr laddr_t SIZE = 4096;
+ run_async([this] {
+ {
+ auto t = create_transaction();
+ for (unsigned i = 0; i < 300; ++i) {
+ auto extent = alloc_extent(
+ t,
+ laddr_t(i * SIZE),
+ SIZE);
+ }
+ check_mappings(t);
+ submit_transaction(std::move(t));
+ check();
+ }
+ {
+ auto t = create_transaction();
+ for (unsigned i = 0; i < 240; ++i) {
+ dec_ref(
+ t,
+ laddr_t(i * SIZE));
+ }
+ check_mappings(t);
+ submit_transaction(std::move(t));
+ check();
+ }
+ });
+}
+
+TEST_P(tm_single_device_test_t, inc_dec_ref)
+{
+ constexpr laddr_t SIZE = 4096;
+ run_async([this] {
+ constexpr laddr_t ADDR = 0xFF * SIZE;
+ {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ ADDR,
+ SIZE,
+ 'a');
+ ASSERT_EQ(ADDR, extent->get_laddr());
+ check_mappings(t);
+ check();
+ submit_transaction(std::move(t));
+ check();
+ }
+ replay();
+ {
+ auto t = create_transaction();
+ inc_ref(t, ADDR);
+ check_mappings(t);
+ check();
+ submit_transaction(std::move(t));
+ check();
+ }
+ {
+ auto t = create_transaction();
+ dec_ref(t, ADDR);
+ check_mappings(t);
+ check();
+ submit_transaction(std::move(t));
+ check();
+ }
+ replay();
+ {
+ auto t = create_transaction();
+ dec_ref(t, ADDR);
+ check_mappings(t);
+ check();
+ submit_transaction(std::move(t));
+ check();
+ }
+ });
+}
+
+TEST_P(tm_single_device_test_t, cause_lba_split)
+{
+ constexpr laddr_t SIZE = 4096;
+ run_async([this] {
+ for (unsigned i = 0; i < 200; ++i) {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ i * SIZE,
+ SIZE,
+ (char)(i & 0xFF));
+ ASSERT_EQ(i * SIZE, extent->get_laddr());
+ submit_transaction(std::move(t));
+ }
+ check();
+ });
+}
+
+TEST_P(tm_single_device_test_t, random_writes)
+{
+ constexpr size_t TOTAL = 4<<20;
+ constexpr size_t BSIZE = 4<<10;
+ constexpr size_t PADDING_SIZE = 256<<10;
+ constexpr size_t BLOCKS = TOTAL / BSIZE;
+ run_async([this] {
+ for (unsigned i = 0; i < BLOCKS; ++i) {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ i * BSIZE,
+ BSIZE);
+ ASSERT_EQ(i * BSIZE, extent->get_laddr());
+ submit_transaction(std::move(t));
+ }
+
+ for (unsigned i = 0; i < 4; ++i) {
+ for (unsigned j = 0; j < 65; ++j) {
+ auto t = create_transaction();
+ for (unsigned k = 0; k < 2; ++k) {
+ auto ext = get_extent(
+ t,
+ get_random_laddr(BSIZE, TOTAL),
+ BSIZE);
+ auto mut = mutate_extent(t, ext);
+ // pad out transaction
+ auto padding = alloc_extent(
+ t,
+ TOTAL + (k * PADDING_SIZE),
+ PADDING_SIZE);
+ dec_ref(t, padding->get_laddr());
+ }
+ submit_transaction(std::move(t));
+ }
+ replay();
+ logger().info("random_writes: {} checking", i);
+ check();
+ logger().info("random_writes: {} done replaying/checking", i);
+ }
+ });
+}
+
+TEST_P(tm_single_device_test_t, find_hole_assert_trigger)
+{
+ constexpr unsigned max = 10;
+ constexpr size_t BSIZE = 4<<10;
+ int num = 40;
+ run([&, this] {
+ return seastar::parallel_for_each(
+ boost::make_counting_iterator(0u),
+ boost::make_counting_iterator(max),
+ [&, this](auto idx) {
+ return allocate_sequentially(BSIZE, num);
+ });
+ });
+}
+
+TEST_P(tm_single_device_test_t, remap_lazy_read)
+{
+ constexpr laddr_t offset = 0;
+ constexpr size_t length = 256 << 10;
+ run_async([this, offset] {
+ {
+ auto t = create_transaction();
+ auto extent = alloc_extent(
+ t,
+ offset,
+ length,
+ 'a');
+ ASSERT_EQ(offset, extent->get_laddr());
+ check_mappings(t);
+ submit_transaction(std::move(t));
+ check();
+ }
+ replay();
+ {
+ auto t = create_transaction();
+ auto pin = get_pin(t, offset);
+ auto rpin = remap_pin(t, std::move(pin), 0, 128 << 10);
+ check_mappings(t);
+ submit_transaction(std::move(t));
+ check();
+ }
+ replay();
+ {
+ auto t = create_transaction();
+ auto pin = get_pin(t, offset);
+ bufferlist bl;
+ bl.append(ceph::bufferptr(ceph::buffer::create(64 << 10, 0)));
+ auto [lpin, ext, rpin] = overwrite_pin(
+ t, std::move(pin), 4 << 10 , 64 << 10, bl);
+ check_mappings(t);
+ submit_transaction(std::move(t));
+ check();
+ }
+ replay();
+ });
+}
+
+TEST_P(tm_single_device_test_t, random_writes_concurrent)
+{
+ test_random_writes_concurrent();
+}
+
+TEST_P(tm_multi_device_test_t, random_writes_concurrent)
+{
+ test_random_writes_concurrent();
+}
+
+TEST_P(tm_multi_tier_device_test_t, evict)
+{
+ test_evict();
+}
+
+TEST_P(tm_single_device_test_t, parallel_extent_read)
+{
+ test_parallel_extent_read();
+}
+
+TEST_P(tm_single_device_test_t, test_remap_pin)
+{
+ test_remap_pin();
+}
+
+TEST_P(tm_single_device_test_t, test_overwrite_pin)
+{
+ test_overwrite_pin();
+}
+
+TEST_P(tm_single_device_test_t, test_remap_pin_concurrent)
+{
+ test_remap_pin_concurrent();
+}
+
+TEST_P(tm_single_device_test_t, test_overwrite_pin_concurrent)
+{
+ test_overwrite_pin_concurrent();
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ transaction_manager_test,
+ tm_single_device_test_t,
+ ::testing::Values (
+ "segmented",
+ "circularbounded"
+ )
+);
+
+INSTANTIATE_TEST_SUITE_P(
+ transaction_manager_test,
+ tm_multi_device_test_t,
+ ::testing::Values (
+ "segmented"
+ )
+);
+
+INSTANTIATE_TEST_SUITE_P(
+ transaction_manager_test,
+ tm_multi_tier_device_test_t,
+ ::testing::Values (
+ "segmented"
+ )
+);
diff --git a/src/test/crimson/seastore/transaction_manager_test_state.h b/src/test/crimson/seastore/transaction_manager_test_state.h
new file mode 100644
index 000000000..81200b1db
--- /dev/null
+++ b/src/test/crimson/seastore/transaction_manager_test_state.h
@@ -0,0 +1,450 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <random>
+#include <boost/iterator/counting_iterator.hpp>
+
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/extent_placement_manager.h"
+#include "crimson/os/seastore/logging.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/segment_manager/ephemeral.h"
+#include "crimson/os/seastore/seastore.h"
+#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/collection_manager/flat_collection_manager.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h"
+#include "crimson/os/seastore/random_block_manager/rbm_device.h"
+#include "crimson/os/seastore/journal/circular_bounded_journal.h"
+#include "crimson/os/seastore/random_block_manager/block_rb_manager.h"
+#ifdef UNIT_TESTS_BUILT
+#include "test/crimson/gtest_seastar.h"
+#endif
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+
+class EphemeralDevices {
+public:
+ virtual seastar::future<> setup() = 0;
+ virtual void remount() = 0;
+ virtual std::size_t get_num_devices() const = 0;
+ virtual void reset() = 0;
+ virtual std::vector<Device*> get_secondary_devices() = 0;
+ virtual ~EphemeralDevices() {}
+ virtual Device* get_primary_device() = 0;
+ virtual DeviceRef get_primary_device_ref() = 0;
+ virtual void set_primary_device_ref(DeviceRef) = 0;
+};
+using EphemeralDevicesRef = std::unique_ptr<EphemeralDevices>;
+
+class EphemeralSegmentedDevices : public EphemeralDevices {
+ segment_manager::EphemeralSegmentManagerRef segment_manager;
+ std::list<segment_manager::EphemeralSegmentManagerRef> secondary_segment_managers;
+ std::size_t num_main_device_managers;
+ std::size_t num_cold_device_managers;
+
+public:
+ EphemeralSegmentedDevices(std::size_t num_main_devices,
+ std::size_t num_cold_devices)
+ : num_main_device_managers(num_main_devices),
+ num_cold_device_managers(num_cold_devices)
+ {
+ auto num_device_managers = num_main_device_managers + num_cold_device_managers;
+ assert(num_device_managers > 0);
+ secondary_segment_managers.resize(num_device_managers - 1);
+ }
+
+ seastar::future<> setup() final {
+ segment_manager = segment_manager::create_test_ephemeral();
+ for (auto &sec_sm : secondary_segment_managers) {
+ sec_sm = segment_manager::create_test_ephemeral();
+ }
+ return segment_manager->init(
+ ).safe_then([this] {
+ return crimson::do_for_each(
+ secondary_segment_managers.begin(),
+ secondary_segment_managers.end(),
+ [](auto &sec_sm)
+ {
+ return sec_sm->init();
+ });
+ }).safe_then([this] {
+ return segment_manager->mkfs(
+ segment_manager::get_ephemeral_device_config(
+ 0, num_main_device_managers, num_cold_device_managers));
+ }).safe_then([this] {
+ return seastar::do_with(std::size_t(0), [this](auto &cnt) {
+ return crimson::do_for_each(
+ secondary_segment_managers.begin(),
+ secondary_segment_managers.end(),
+ [this, &cnt](auto &sec_sm)
+ {
+ ++cnt;
+ return sec_sm->mkfs(
+ segment_manager::get_ephemeral_device_config(
+ cnt, num_main_device_managers, num_cold_device_managers));
+ });
+ });
+ }).handle_error(
+ crimson::ct_error::assert_all{}
+ );
+ }
+
+ void remount() final {
+ segment_manager->remount();
+ for (auto &sec_sm : secondary_segment_managers) {
+ sec_sm->remount();
+ }
+ }
+
+ std::size_t get_num_devices() const final {
+ return secondary_segment_managers.size() + 1;
+ }
+
+ void reset() final {
+ segment_manager.reset();
+ for (auto &sec_sm : secondary_segment_managers) {
+ sec_sm.reset();
+ }
+ }
+
+ std::vector<Device*> get_secondary_devices() final {
+ std::vector<Device*> sec_devices;
+ for (auto &sec_sm : secondary_segment_managers) {
+ sec_devices.emplace_back(sec_sm.get());
+ }
+ return sec_devices;
+ }
+
+ Device* get_primary_device() final {
+ return segment_manager.get();
+ }
+ DeviceRef get_primary_device_ref() final;
+ void set_primary_device_ref(DeviceRef) final;
+};
+
+class EphemeralRandomBlockDevices : public EphemeralDevices {
+ random_block_device::RBMDeviceRef rb_device;
+ std::list<random_block_device::RBMDeviceRef> secondary_rb_devices;
+
+public:
+ EphemeralRandomBlockDevices(std::size_t num_device_managers) {
+ assert(num_device_managers > 0);
+ secondary_rb_devices.resize(num_device_managers - 1);
+ }
+
+ seastar::future<> setup() final {
+ rb_device = random_block_device::create_test_ephemeral();
+ device_config_t config = get_rbm_ephemeral_device_config(0, 1);
+ return rb_device->mkfs(config).handle_error(crimson::ct_error::assert_all{});
+ }
+
+ void remount() final {}
+
+ std::size_t get_num_devices() const final {
+ return secondary_rb_devices.size() + 1;
+ }
+
+ void reset() final {
+ rb_device.reset();
+ for (auto &sec_rb : secondary_rb_devices) {
+ sec_rb.reset();
+ }
+ }
+
+ std::vector<Device*> get_secondary_devices() final {
+ std::vector<Device*> sec_devices;
+ for (auto &sec_rb : secondary_rb_devices) {
+ sec_devices.emplace_back(sec_rb.get());
+ }
+ return sec_devices;
+ }
+
+ Device* get_primary_device() final {
+ return rb_device.get();
+ }
+ DeviceRef get_primary_device_ref() final;
+ void set_primary_device_ref(DeviceRef) final;
+};
+
+class EphemeralTestState
+#ifdef UNIT_TESTS_BUILT
+ : public ::testing::WithParamInterface<const char*> {
+#else
+ {
+#endif
+protected:
+ journal_type_t journal_type;
+ size_t num_main_device_managers = 0;
+ size_t num_cold_device_managers = 0;
+ EphemeralDevicesRef devices;
+ bool secondary_is_cold;
+ EphemeralTestState(std::size_t num_main_device_managers,
+ std::size_t num_cold_device_managers) :
+ num_main_device_managers(num_main_device_managers),
+ num_cold_device_managers(num_cold_device_managers) {}
+
+ virtual seastar::future<> _init() = 0;
+
+ virtual seastar::future<> _destroy() = 0;
+ virtual seastar::future<> _teardown() = 0;
+ seastar::future<> teardown() {
+ return _teardown().then([this] {
+ return _destroy();
+ });
+ }
+
+ virtual FuturizedStore::mkfs_ertr::future<> _mkfs() = 0;
+ virtual FuturizedStore::mount_ertr::future<> _mount() = 0;
+
+ seastar::future<> restart_fut() {
+ LOG_PREFIX(EphemeralTestState::restart_fut);
+ SUBINFO(test, "begin ...");
+ return teardown().then([this] {
+ devices->remount();
+ return _init().then([this] {
+ return _mount().handle_error(crimson::ct_error::assert_all{});
+ });
+ }).then([FNAME] {
+ SUBINFO(test, "finish");
+ });
+ }
+
+ void restart() {
+ restart_fut().get0();
+ }
+
+ seastar::future<> tm_setup() {
+ LOG_PREFIX(EphemeralTestState::tm_setup);
+#ifdef UNIT_TESTS_BUILT
+ std::string j_type = GetParam();
+#else
+ std::string j_type = "segmented";
+#endif
+ if (j_type == "circularbounded") {
+ //TODO: multiple devices
+ ceph_assert(num_main_device_managers == 1);
+ ceph_assert(num_cold_device_managers == 0);
+ devices.reset(new EphemeralRandomBlockDevices(1));
+ } else {
+ // segmented by default
+ devices.reset(new
+ EphemeralSegmentedDevices(
+ num_main_device_managers, num_cold_device_managers));
+ }
+ SUBINFO(test, "begin with {} devices ...", devices->get_num_devices());
+ return devices->setup(
+ ).then([this] {
+ return _init();
+ }).then([this, FNAME] {
+ return _mkfs(
+ ).safe_then([this] {
+ return restart_fut();
+ }).handle_error(
+ crimson::ct_error::assert_all{}
+ ).then([FNAME] {
+ SUBINFO(test, "finish");
+ });
+ });
+ }
+
+ seastar::future<> tm_teardown() {
+ LOG_PREFIX(EphemeralTestState::tm_teardown);
+ SUBINFO(test, "begin");
+ return teardown().then([this, FNAME] {
+ devices->reset();
+ SUBINFO(test, "finish");
+ });
+ }
+};
+
+class TMTestState : public EphemeralTestState {
+protected:
+ TransactionManagerRef tm;
+ LBAManager *lba_manager;
+ Cache* cache;
+ ExtentPlacementManager *epm;
+ uint64_t seq = 0;
+
+ TMTestState() : EphemeralTestState(1, 0) {}
+
+ TMTestState(std::size_t num_main_devices, std::size_t num_cold_devices)
+ : EphemeralTestState(num_main_devices, num_cold_devices) {}
+
+ virtual seastar::future<> _init() override {
+ auto sec_devices = devices->get_secondary_devices();
+ auto p_dev = devices->get_primary_device();
+ tm = make_transaction_manager(p_dev, sec_devices, true);
+ epm = tm->get_epm();
+ lba_manager = tm->get_lba_manager();
+ cache = tm->get_cache();
+ return seastar::now();
+ }
+
+ virtual seastar::future<> _destroy() override {
+ epm = nullptr;
+ lba_manager = nullptr;
+ cache = nullptr;
+ tm.reset();
+ return seastar::now();
+ }
+
+ virtual seastar::future<> _teardown() {
+ return tm->close().handle_error(
+ crimson::ct_error::assert_all{"Error in teardown"}
+ );
+ }
+
+ virtual FuturizedStore::mount_ertr::future<> _mount() {
+ return tm->mount(
+ ).handle_error(
+ crimson::ct_error::assert_all{"Error in mount"}
+ ).then([this] {
+ return epm->stop_background();
+ }).then([this] {
+ return epm->run_background_work_until_halt();
+ });
+ }
+
+ virtual FuturizedStore::mkfs_ertr::future<> _mkfs() {
+ return tm->mkfs(
+ ).handle_error(
+ crimson::ct_error::assert_all{"Error in mkfs"}
+ );
+ }
+
+ auto create_mutate_transaction() {
+ return tm->create_transaction(
+ Transaction::src_t::MUTATE, "test_mutate");
+ }
+
+ auto create_read_transaction() {
+ return tm->create_transaction(
+ Transaction::src_t::READ, "test_read");
+ }
+
+ auto create_weak_transaction() {
+ return tm->create_transaction(
+ Transaction::src_t::READ, "test_read_weak", true);
+ }
+
+ auto submit_transaction_fut2(Transaction& t) {
+ return tm->submit_transaction(t);
+ }
+
+ auto submit_transaction_fut(Transaction &t) {
+ return with_trans_intr(
+ t,
+ [this](auto &t) {
+ return tm->submit_transaction(t);
+ });
+ }
+ auto submit_transaction_fut_with_seq(Transaction &t) {
+ using ertr = TransactionManager::base_iertr;
+ return with_trans_intr(
+ t,
+ [this](auto &t) {
+ return tm->submit_transaction(t
+ ).si_then([this] {
+ return ertr::make_ready_future<uint64_t>(seq++);
+ });
+ });
+ }
+
+ void submit_transaction(TransactionRef t) {
+ submit_transaction_fut(*t).unsafe_get0();
+ epm->run_background_work_until_halt().get0();
+ }
+};
+
+
+DeviceRef EphemeralSegmentedDevices::get_primary_device_ref() {
+ return std::move(segment_manager);
+}
+
+DeviceRef EphemeralRandomBlockDevices::get_primary_device_ref() {
+ return std::move(rb_device);
+}
+
+void EphemeralSegmentedDevices::set_primary_device_ref(DeviceRef dev) {
+ segment_manager =
+ segment_manager::EphemeralSegmentManagerRef(
+ static_cast<segment_manager::EphemeralSegmentManager*>(dev.release()));
+}
+
+void EphemeralRandomBlockDevices::set_primary_device_ref(DeviceRef dev) {
+ rb_device =
+ random_block_device::RBMDeviceRef(
+ static_cast<random_block_device::RBMDevice*>(dev.release()));
+}
+
+class SeaStoreTestState : public EphemeralTestState {
+ class TestMDStoreState {
+ std::map<std::string, std::string> md;
+ public:
+ class Store final : public SeaStore::MDStore {
+ TestMDStoreState &parent;
+ public:
+ Store(TestMDStoreState &parent) : parent(parent) {}
+
+ write_meta_ret write_meta(
+ const std::string& key, const std::string& value) final {
+ parent.md[key] = value;
+ return seastar::now();
+ }
+
+ read_meta_ret read_meta(const std::string& key) final {
+ auto iter = parent.md.find(key);
+ if (iter != parent.md.end()) {
+ return read_meta_ret(
+ read_meta_ertr::ready_future_marker{},
+ iter->second);
+ } else {
+ return read_meta_ret(
+ read_meta_ertr::ready_future_marker{},
+ std::nullopt);
+ }
+ }
+ };
+ Store get_mdstore() {
+ return Store(*this);
+ }
+ } mdstore_state;
+
+protected:
+ std::unique_ptr<SeaStore> seastore;
+ FuturizedStore::Shard *sharded_seastore;
+
+ SeaStoreTestState() : EphemeralTestState(1, 0) {}
+
+ virtual seastar::future<> _init() final {
+ seastore = make_test_seastore(
+ std::make_unique<TestMDStoreState::Store>(mdstore_state.get_mdstore()));
+ return seastore->test_start(devices->get_primary_device_ref()
+ ).then([this] {
+ sharded_seastore = &(seastore->get_sharded_store());
+ });
+ }
+
+ virtual seastar::future<> _destroy() final {
+ devices->set_primary_device_ref(seastore->get_primary_device_ref());
+ return seastore->stop().then([this] {
+ seastore.reset();
+ });
+ }
+
+ virtual seastar::future<> _teardown() final {
+ return seastore->umount();
+ }
+
+ virtual FuturizedStore::mount_ertr::future<> _mount() final {
+ return seastore->test_mount();
+ }
+
+ virtual FuturizedStore::mkfs_ertr::future<> _mkfs() final {
+ return seastore->test_mkfs(uuid_d{});
+ }
+};
diff --git a/src/test/crimson/test_alien_echo.cc b/src/test/crimson/test_alien_echo.cc
new file mode 100644
index 000000000..8bef5e651
--- /dev/null
+++ b/src/test/crimson/test_alien_echo.cc
@@ -0,0 +1,294 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#include "auth/Auth.h"
+#include "messages/MPing.h"
+#include "common/ceph_argparse.h"
+#include "crimson/auth/DummyAuth.h"
+#include "crimson/common/throttle.h"
+#include "crimson/net/Connection.h"
+#include "crimson/net/Dispatcher.h"
+#include "crimson/net/Messenger.h"
+
+#include <seastar/core/alien.hh>
+#include <seastar/core/app-template.hh>
+#include <seastar/core/future-util.hh>
+#include <seastar/core/internal/pollable_fd.hh>
+#include <seastar/core/posix.hh>
+#include <seastar/core/reactor.hh>
+
+using crimson::common::local_conf;
+
+enum class echo_role {
+ as_server,
+ as_client,
+};
+
+namespace seastar_pingpong {
+struct DummyAuthAuthorizer : public AuthAuthorizer {
+ DummyAuthAuthorizer()
+ : AuthAuthorizer(CEPH_AUTH_CEPHX)
+ {}
+ bool verify_reply(bufferlist::const_iterator&,
+ std::string *connection_secret) override {
+ return true;
+ }
+ bool add_challenge(CephContext*, const bufferlist&) override {
+ return true;
+ }
+};
+
+struct Server {
+ crimson::common::Throttle byte_throttler;
+ crimson::net::MessengerRef msgr;
+ crimson::auth::DummyAuthClientServer dummy_auth;
+ struct ServerDispatcher final : crimson::net::Dispatcher {
+ unsigned count = 0;
+ seastar::condition_variable on_reply;
+ std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef c,
+ MessageRef m) final
+ {
+ std::cout << "server got ping " << *m << std::endl;
+ // reply with a pong
+ return c->send(crimson::make_message<MPing>()).then([this] {
+ ++count;
+ on_reply.signal();
+ return seastar::now();
+ });
+ }
+ } dispatcher;
+ Server(crimson::net::MessengerRef msgr)
+ : byte_throttler(local_conf()->osd_client_message_size_cap),
+ msgr{msgr}
+ { }
+};
+
+struct Client {
+ crimson::common::Throttle byte_throttler;
+ crimson::net::MessengerRef msgr;
+ crimson::auth::DummyAuthClientServer dummy_auth;
+ struct ClientDispatcher final : crimson::net::Dispatcher {
+ unsigned count = 0;
+ seastar::condition_variable on_reply;
+ std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef c,
+ MessageRef m) final
+ {
+ std::cout << "client got pong " << *m << std::endl;
+ ++count;
+ on_reply.signal();
+ return seastar::now();
+ }
+ } dispatcher;
+ Client(crimson::net::MessengerRef msgr)
+ : byte_throttler(local_conf()->osd_client_message_size_cap),
+ msgr{msgr}
+ { }
+};
+} // namespace seastar_pingpong
+
+class SeastarContext {
+ int begin_fd;
+ seastar::file_desc on_end;
+
+public:
+ SeastarContext()
+ : begin_fd{eventfd(0, 0)},
+ on_end{seastar::file_desc::eventfd(0, 0)}
+ {}
+
+ template<class Func>
+ std::thread with_seastar(Func&& func) {
+ return std::thread{[this, on_end = on_end.get(),
+ func = std::forward<Func>(func)] {
+ // alien: are you ready?
+ wait_for_seastar();
+ // alien: could you help me apply(func)?
+ func();
+ // alien: i've sent my request. have you replied it?
+ // wait_for_seastar();
+ // alien: you are free to go!
+ ::eventfd_write(on_end, 1);
+ }};
+ }
+
+ void run(seastar::app_template& app, int argc, char** argv) {
+ app.run(argc, argv, [this] {
+ std::vector<const char*> args;
+ std::string cluster;
+ std::string conf_file_list;
+ auto init_params = ceph_argparse_early_args(args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ &cluster,
+ &conf_file_list);
+ return crimson::common::sharded_conf().start(init_params.name, cluster)
+ .then([conf_file_list] {
+ return local_conf().parse_config_files(conf_file_list);
+ }).then([this] {
+ return set_seastar_ready();
+ }).then([on_end = std::move(on_end)] () mutable {
+ // seastar: let me know once i am free to leave.
+ return seastar::do_with(seastar::pollable_fd(std::move(on_end)), []
+ (seastar::pollable_fd& on_end_fds) {
+ return on_end_fds.readable().then([&on_end_fds] {
+ eventfd_t result = 0;
+ on_end_fds.get_file_desc().read(&result, sizeof(result));
+ return seastar::make_ready_future<>();
+ });
+ });
+ }).then([]() {
+ return crimson::common::sharded_conf().stop();
+ }).handle_exception([](auto ep) {
+ std::cerr << "Error: " << ep << std::endl;
+ }).finally([] {
+ seastar::engine().exit(0);
+ });
+ });
+ }
+
+ seastar::future<> set_seastar_ready() {
+ // seastar: i am ready to serve!
+ ::eventfd_write(begin_fd, 1);
+ return seastar::now();
+ }
+
+private:
+ void wait_for_seastar() {
+ eventfd_t result = 0;
+ if (int r = ::eventfd_read(begin_fd, &result); r < 0) {
+ std::cerr << "unable to eventfd_read():" << errno << std::endl;
+ }
+ }
+};
+
+static seastar::future<>
+seastar_echo(const entity_addr_t addr, echo_role role, unsigned count)
+{
+ std::cout << "seastar/";
+ if (role == echo_role::as_server) {
+ return seastar::do_with(
+ seastar_pingpong::Server{crimson::net::Messenger::create(
+ entity_name_t::OSD(0), "server", addr.get_nonce(), true)},
+ [addr, count](auto& server) mutable {
+ std::cout << "server listening at " << addr << std::endl;
+ // bind the server
+ server.msgr->set_default_policy(crimson::net::SocketPolicy::stateless_server(0));
+ server.msgr->set_policy_throttler(entity_name_t::TYPE_OSD,
+ &server.byte_throttler);
+ server.msgr->set_auth_client(&server.dummy_auth);
+ server.msgr->set_auth_server(&server.dummy_auth);
+ return server.msgr->bind(entity_addrvec_t{addr}
+ ).safe_then([&server] {
+ return server.msgr->start({&server.dispatcher});
+ }, crimson::net::Messenger::bind_ertr::all_same_way([](auto& e) {
+ ceph_abort_msg("bind failed");
+ })).then([&dispatcher=server.dispatcher, count] {
+ return dispatcher.on_reply.wait([&dispatcher, count] {
+ return dispatcher.count >= count;
+ });
+ }).finally([&server] {
+ std::cout << "server shutting down" << std::endl;
+ server.msgr->stop();
+ return server.msgr->shutdown();
+ });
+ });
+ } else {
+ return seastar::do_with(
+ seastar_pingpong::Client{crimson::net::Messenger::create(
+ entity_name_t::OSD(1), "client", addr.get_nonce(), true)},
+ [addr, count](auto& client) {
+ std::cout << "client sending to " << addr << std::endl;
+ client.msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0));
+ client.msgr->set_policy_throttler(entity_name_t::TYPE_OSD,
+ &client.byte_throttler);
+ client.msgr->set_auth_client(&client.dummy_auth);
+ client.msgr->set_auth_server(&client.dummy_auth);
+ return client.msgr->start({&client.dispatcher}).then(
+ [addr, &client, &disp=client.dispatcher, count] {
+ auto conn = client.msgr->connect(addr, entity_name_t::TYPE_OSD);
+ return seastar::do_until(
+ [&disp,count] { return disp.count >= count; },
+ [&disp,conn] {
+ return conn->send(crimson::make_message<MPing>()).then([&] {
+ return disp.on_reply.wait();
+ });
+ }
+ );
+ }).finally([&client] {
+ std::cout << "client shutting down" << std::endl;
+ client.msgr->stop();
+ return client.msgr->shutdown();
+ });
+ });
+ }
+}
+
+int main(int argc, char** argv)
+{
+ namespace po = boost::program_options;
+ po::options_description desc{"Allowed options"};
+ desc.add_options()
+ ("help,h", "show help message")
+ ("role", po::value<std::string>()->default_value("pong"),
+ "role to play (ping | pong)")
+ ("port", po::value<uint16_t>()->default_value(9010),
+ "port #")
+ ("nonce", po::value<uint32_t>()->default_value(42),
+ "a unique number to identify the pong server")
+ ("count", po::value<unsigned>()->default_value(10),
+ "stop after sending/echoing <count> MPing messages");
+ po::variables_map vm;
+ std::vector<std::string> unrecognized_options;
+ try {
+ auto parsed = po::command_line_parser(argc, argv)
+ .options(desc)
+ .allow_unregistered()
+ .run();
+ po::store(parsed, vm);
+ if (vm.count("help")) {
+ std::cout << desc << std::endl;
+ return 0;
+ }
+ po::notify(vm);
+ unrecognized_options = po::collect_unrecognized(parsed.options, po::include_positional);
+ } catch(const po::error& e) {
+ std::cerr << "error: " << e.what() << std::endl;
+ return 1;
+ }
+
+ entity_addr_t addr;
+ addr.set_type(entity_addr_t::TYPE_MSGR2);
+ addr.set_family(AF_INET);
+ addr.set_port(vm["port"].as<std::uint16_t>());
+ addr.set_nonce(vm["nonce"].as<std::uint32_t>());
+
+ echo_role role = echo_role::as_server;
+ if (vm["role"].as<std::string>() == "ping") {
+ role = echo_role::as_client;
+ }
+
+ auto count = vm["count"].as<unsigned>();
+ seastar::app_template app;
+ SeastarContext sc;
+ auto job = sc.with_seastar([&] {
+ auto fut = seastar::alien::submit_to(app.alien(), 0, [addr, role, count] {
+ return seastar_echo(addr, role, count);
+ });
+ fut.wait();
+ });
+ std::vector<char*> av{argv[0]};
+ std::transform(begin(unrecognized_options),
+ end(unrecognized_options),
+ std::back_inserter(av),
+ [](auto& s) {
+ return const_cast<char*>(s.c_str());
+ });
+ sc.run(app, av.size(), av.data());
+ job.join();
+}
+
+/*
+ * Local Variables:
+ * compile-command: "make -j4 \
+ * -C ../../../build \
+ * unittest_seastar_echo"
+ * End:
+ */
diff --git a/src/test/crimson/test_alienstore_thread_pool.cc b/src/test/crimson/test_alienstore_thread_pool.cc
new file mode 100644
index 000000000..dbeed26cd
--- /dev/null
+++ b/src/test/crimson/test_alienstore_thread_pool.cc
@@ -0,0 +1,78 @@
+#include <chrono>
+#include <iostream>
+#include <numeric>
+#include <seastar/core/app-template.hh>
+#include "common/ceph_argparse.h"
+#include "crimson/common/config_proxy.h"
+#include "crimson/os/alienstore/thread_pool.h"
+#include "include/msgr.h"
+
+using namespace std::chrono_literals;
+using ThreadPool = crimson::os::ThreadPool;
+using crimson::common::local_conf;
+
+seastar::future<> test_accumulate(ThreadPool& tp) {
+ static constexpr auto N = 5;
+ static constexpr auto M = 1;
+ auto slow_plus = [&tp](int i) {
+ return tp.submit(::rand() % 2, [=] {
+ std::this_thread::sleep_for(10ns);
+ return i + M;
+ });
+ };
+ return seastar::map_reduce(
+ boost::irange(0, N), slow_plus, 0, std::plus{}).then([] (int sum) {
+ auto r = boost::irange(0 + M, N + M);
+ if (sum != std::accumulate(r.begin(), r.end(), 0)) {
+ throw std::runtime_error("test_accumulate failed");
+ }
+ });
+}
+
+seastar::future<> test_void_return(ThreadPool& tp) {
+ return tp.submit(::rand() % 2, [=] {
+ std::this_thread::sleep_for(10ns);
+ });
+}
+
+int main(int argc, char** argv)
+{
+ seastar::app_template app;
+ return app.run(argc, argv, [] {
+ std::vector<const char*> args;
+ std::string cluster;
+ std::string conf_file_list;
+ auto init_params = ceph_argparse_early_args(args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ &cluster,
+ &conf_file_list);
+ return crimson::common::sharded_conf().start(init_params.name, cluster)
+ .then([conf_file_list] {
+ return local_conf().parse_config_files(conf_file_list);
+ }).then([] {
+ return seastar::do_with(std::make_unique<crimson::os::ThreadPool>(2, 128, seastar::resource::cpuset{0}),
+ [](auto& tp) {
+ return tp->start().then([&tp] {
+ return test_accumulate(*tp);
+ }).then([&tp] {
+ return test_void_return(*tp);
+ }).finally([&tp] {
+ return tp->stop();
+ });
+ });
+ }).finally([] {
+ return crimson::common::sharded_conf().stop();
+ }).handle_exception([](auto e) {
+ std::cerr << "Error: " << e << std::endl;
+ seastar::engine().exit(1);
+ });
+ });
+}
+
+/*
+ * Local Variables:
+ * compile-command: "make -j4 \
+ * -C ../../../build \
+ * unittest_seastar_thread_pool"
+ * End:
+ */
diff --git a/src/test/crimson/test_async_echo.cc b/src/test/crimson/test_async_echo.cc
new file mode 100644
index 000000000..758bcf626
--- /dev/null
+++ b/src/test/crimson/test_async_echo.cc
@@ -0,0 +1,234 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#include <boost/program_options/variables_map.hpp>
+#include <boost/program_options/parsers.hpp>
+
+#include "auth/Auth.h"
+#include "global/global_init.h"
+#include "messages/MPing.h"
+#include "msg/Dispatcher.h"
+#include "msg/Messenger.h"
+
+#include "auth/DummyAuth.h"
+
+enum class echo_role {
+ as_server,
+ as_client,
+};
+
+namespace native_pingpong {
+
+constexpr int CEPH_OSD_PROTOCOL = 10;
+
+struct Server {
+ Server(CephContext* cct, const entity_inst_t& entity)
+ : dummy_auth(cct), dispatcher(cct)
+ {
+ msgr.reset(Messenger::create(cct, "async", entity.name, "pong", entity.addr.get_nonce()));
+ dummy_auth.auth_registry.refresh_config();
+ msgr->set_cluster_protocol(CEPH_OSD_PROTOCOL);
+ msgr->set_default_policy(Messenger::Policy::stateless_server(0));
+ msgr->set_auth_client(&dummy_auth);
+ msgr->set_auth_server(&dummy_auth);
+ }
+ DummyAuthClientServer dummy_auth;
+ std::unique_ptr<Messenger> msgr;
+ struct ServerDispatcher : Dispatcher {
+ std::mutex mutex;
+ std::condition_variable on_reply;
+ bool replied = false;
+ ServerDispatcher(CephContext* cct)
+ : Dispatcher(cct)
+ {}
+ bool ms_can_fast_dispatch_any() const override {
+ return true;
+ }
+ bool ms_can_fast_dispatch(const Message* m) const override {
+ return m->get_type() == CEPH_MSG_PING;
+ }
+ void ms_fast_dispatch(Message* m) override {
+ m->get_connection()->send_message(new MPing);
+ m->put();
+ {
+ std::lock_guard lock{mutex};
+ replied = true;
+ }
+ on_reply.notify_one();
+ }
+ bool ms_dispatch(Message*) override {
+ ceph_abort();
+ }
+ bool ms_handle_reset(Connection*) override {
+ return true;
+ }
+ void ms_handle_remote_reset(Connection*) override {
+ }
+ bool ms_handle_refused(Connection*) override {
+ return true;
+ }
+ void echo() {
+ replied = false;
+ std::unique_lock lock{mutex};
+ return on_reply.wait(lock, [this] { return replied; });
+ }
+ } dispatcher;
+ void echo() {
+ dispatcher.echo();
+ }
+};
+
+struct Client {
+ std::unique_ptr<Messenger> msgr;
+ Client(CephContext *cct)
+ : dummy_auth(cct), dispatcher(cct)
+ {
+ msgr.reset(Messenger::create(cct, "async", entity_name_t::CLIENT(-1), "ping", getpid()));
+ dummy_auth.auth_registry.refresh_config();
+ msgr->set_cluster_protocol(CEPH_OSD_PROTOCOL);
+ msgr->set_default_policy(Messenger::Policy::lossy_client(0));
+ msgr->set_auth_client(&dummy_auth);
+ msgr->set_auth_server(&dummy_auth);
+ }
+ DummyAuthClientServer dummy_auth;
+ struct ClientDispatcher : Dispatcher {
+ std::mutex mutex;
+ std::condition_variable on_reply;
+ bool replied = false;
+
+ ClientDispatcher(CephContext* cct)
+ : Dispatcher(cct)
+ {}
+ bool ms_can_fast_dispatch_any() const override {
+ return true;
+ }
+ bool ms_can_fast_dispatch(const Message* m) const override {
+ return m->get_type() == CEPH_MSG_PING;
+ }
+ void ms_fast_dispatch(Message* m) override {
+ m->put();
+ {
+ std::lock_guard lock{mutex};
+ replied = true;
+ }
+ on_reply.notify_one();
+ }
+ bool ms_dispatch(Message*) override {
+ ceph_abort();
+ }
+ bool ms_handle_reset(Connection *) override {
+ return true;
+ }
+ void ms_handle_remote_reset(Connection*) override {
+ }
+ bool ms_handle_refused(Connection*) override {
+ return true;
+ }
+ bool ping(Messenger* msgr, const entity_inst_t& peer) {
+ using namespace std::chrono_literals;
+ auto conn = msgr->connect_to(peer.name.type(),
+ entity_addrvec_t{peer.addr});
+ replied = false;
+ conn->send_message(new MPing);
+ std::unique_lock lock{mutex};
+ return on_reply.wait_for(lock, 500ms, [&] {
+ return replied;
+ });
+ }
+ } dispatcher;
+ void ping(const entity_inst_t& peer) {
+ dispatcher.ping(msgr.get(), peer);
+ }
+};
+} // namespace native_pingpong
+
+static void ceph_echo(CephContext* cct,
+ entity_addr_t addr, echo_role role, unsigned count)
+{
+ std::cout << "ceph/";
+ entity_inst_t entity{entity_name_t::OSD(0), addr};
+ if (role == echo_role::as_server) {
+ std::cout << "server listening at " << addr << std::endl;
+ native_pingpong::Server server{cct, entity};
+ server.msgr->bind(addr);
+ server.msgr->add_dispatcher_head(&server.dispatcher);
+ server.msgr->start();
+ for (unsigned i = 0; i < count; i++) {
+ server.echo();
+ }
+ server.msgr->shutdown();
+ server.msgr->wait();
+ } else {
+ std::cout << "client sending to " << addr << std::endl;
+ native_pingpong::Client client{cct};
+ client.msgr->add_dispatcher_head(&client.dispatcher);
+ client.msgr->start();
+ auto conn = client.msgr->connect_to(entity.name.type(),
+ entity_addrvec_t{entity.addr});
+ for (unsigned i = 0; i < count; i++) {
+ std::cout << "seq=" << i << std::endl;
+ client.ping(entity);
+ }
+ client.msgr->shutdown();
+ client.msgr->wait();
+ }
+}
+
+int main(int argc, char** argv)
+{
+ namespace po = boost::program_options;
+ po::options_description desc{"Allowed options"};
+ desc.add_options()
+ ("help,h", "show help message")
+ ("role", po::value<std::string>()->default_value("pong"),
+ "role to play (ping | pong)")
+ ("port", po::value<uint16_t>()->default_value(9010),
+ "port #")
+ ("nonce", po::value<uint32_t>()->default_value(42),
+ "a unique number to identify the pong server")
+ ("count", po::value<unsigned>()->default_value(10),
+ "stop after sending/echoing <count> MPing messages")
+ ("v2", po::value<bool>()->default_value(false),
+ "using msgr v2 protocol");
+ po::variables_map vm;
+ std::vector<std::string> unrecognized_options;
+ try {
+ auto parsed = po::command_line_parser(argc, argv)
+ .options(desc)
+ .allow_unregistered()
+ .run();
+ po::store(parsed, vm);
+ if (vm.count("help")) {
+ std::cout << desc << std::endl;
+ return 0;
+ }
+ po::notify(vm);
+ unrecognized_options = po::collect_unrecognized(parsed.options, po::include_positional);
+ } catch(const po::error& e) {
+ std::cerr << "error: " << e.what() << std::endl;
+ return 1;
+ }
+
+ entity_addr_t addr;
+ if (vm["v2"].as<bool>()) {
+ addr.set_type(entity_addr_t::TYPE_MSGR2);
+ } else {
+ addr.set_type(entity_addr_t::TYPE_LEGACY);
+ }
+ addr.set_family(AF_INET);
+ addr.set_port(vm["port"].as<std::uint16_t>());
+ addr.set_nonce(vm["nonce"].as<std::uint32_t>());
+
+ echo_role role = echo_role::as_server;
+ if (vm["role"].as<std::string>() == "ping") {
+ role = echo_role::as_client;
+ }
+
+ auto count = vm["count"].as<unsigned>();
+ std::vector<const char*> args(argv, argv + argc);
+ auto cct = global_init(nullptr, args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_MON_CONFIG);
+ common_init_finish(cct.get());
+ ceph_echo(cct.get(), addr, role, count);
+}
diff --git a/src/test/crimson/test_backfill.cc b/src/test/crimson/test_backfill.cc
new file mode 100644
index 000000000..6d7d62ce5
--- /dev/null
+++ b/src/test/crimson/test_backfill.cc
@@ -0,0 +1,501 @@
+#include <algorithm>
+#include <cstdlib>
+#include <deque>
+#include <functional>
+#include <initializer_list>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <set>
+#include <string>
+
+#include <boost/statechart/event_base.hpp>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "common/hobject.h"
+#include "crimson/osd/backfill_state.h"
+#include "osd/recovery_types.h"
+
+
+// The sole purpose is to convert from the string representation.
+// An alternative approach could use boost::range in FakeStore's
+// constructor.
+struct improved_hobject_t : hobject_t {
+ improved_hobject_t(const char parsable_name[]) {
+ this->parse(parsable_name);
+ }
+ improved_hobject_t(const hobject_t& obj)
+ : hobject_t(obj) {
+ }
+ bool operator==(const improved_hobject_t& rhs) const {
+ return static_cast<const hobject_t&>(*this) == \
+ static_cast<const hobject_t&>(rhs);
+ }
+};
+
+
+struct FakeStore {
+ using objs_t = std::map<improved_hobject_t, eversion_t>;
+
+ objs_t objs;
+
+ void push(const hobject_t& obj, eversion_t version) {
+ objs[obj] = version;
+ }
+
+ void drop(const hobject_t& obj, const eversion_t version) {
+ auto it = objs.find(obj);
+ ceph_assert(it != std::end(objs));
+ ceph_assert(it->second == version);
+ objs.erase(it);
+ }
+
+ template <class Func>
+ hobject_t list(const hobject_t& start, Func&& per_entry) const {
+ auto it = objs.lower_bound(start);
+ for (auto max = std::numeric_limits<std::uint64_t>::max();
+ it != std::end(objs) && max > 0;
+ ++it, --max) {
+ per_entry(*it);
+ }
+ return it != std::end(objs) ? static_cast<const hobject_t&>(it->first)
+ : hobject_t::get_max();
+ }
+
+ bool operator==(const FakeStore& rhs) const {
+ return std::size(objs) == std::size(rhs.objs) && \
+ std::equal(std::begin(objs), std::end(objs), std::begin(rhs.objs));
+ }
+ bool operator!=(const FakeStore& rhs) const {
+ return !(*this == rhs);
+ }
+};
+
+
+struct FakeReplica {
+ FakeStore store;
+ hobject_t last_backfill;
+
+ FakeReplica(FakeStore&& store)
+ : store(std::move(store)) {
+ }
+};
+
+struct FakePrimary {
+ FakeStore store;
+ eversion_t last_update;
+ eversion_t projected_last_update;
+ eversion_t log_tail;
+
+ FakePrimary(FakeStore&& store)
+ : store(std::move(store)) {
+ }
+};
+
+class BackfillFixture : public crimson::osd::BackfillState::BackfillListener {
+ friend class BackfillFixtureBuilder;
+
+ FakePrimary backfill_source;
+ std::map<pg_shard_t, FakeReplica> backfill_targets;
+ std::map<pg_shard_t,
+ std::vector<std::pair<hobject_t, eversion_t>>> enqueued_drops;
+ std::deque<
+ boost::intrusive_ptr<
+ const boost::statechart::event_base>> events_to_dispatch;
+ crimson::osd::BackfillState backfill_state;
+
+ BackfillFixture(FakePrimary&& backfill_source,
+ std::map<pg_shard_t, FakeReplica>&& backfill_targets);
+
+ template <class EventT>
+ void schedule_event(const EventT& event) {
+ events_to_dispatch.emplace_back(event.intrusive_from_this());
+ }
+
+ // BackfillListener {
+ void request_replica_scan(
+ const pg_shard_t& target,
+ const hobject_t& begin,
+ const hobject_t& end) override;
+
+ void request_primary_scan(
+ const hobject_t& begin) override;
+
+ void enqueue_push(
+ const hobject_t& obj,
+ const eversion_t& v) override;
+
+ void enqueue_drop(
+ const pg_shard_t& target,
+ const hobject_t& obj,
+ const eversion_t& v) override;
+
+ void maybe_flush() override;
+
+ void update_peers_last_backfill(
+ const hobject_t& new_last_backfill) override;
+
+ bool budget_available() const override;
+
+public:
+ MOCK_METHOD(void, backfilled, (), (override));
+ // }
+
+ void next_round(std::size_t how_many=1) {
+ ceph_assert(events_to_dispatch.size() >= how_many);
+ while (how_many-- > 0) {
+ backfill_state.process_event(std::move(events_to_dispatch.front()));
+ events_to_dispatch.pop_front();
+ }
+ }
+
+ void next_till_done() {
+ while (!events_to_dispatch.empty()) {
+ next_round();
+ }
+ }
+
+ bool all_stores_look_like(const FakeStore& reference) const {
+ const bool all_replica_match = std::all_of(
+ std::begin(backfill_targets), std::end(backfill_targets),
+ [&reference] (const auto kv) {
+ return kv.second.store == reference;
+ });
+ return backfill_source.store == reference && all_replica_match;
+ }
+
+ struct PeeringFacade;
+ struct PGFacade;
+};
+
+struct BackfillFixture::PeeringFacade
+ : public crimson::osd::BackfillState::PeeringFacade {
+ FakePrimary& backfill_source;
+ std::map<pg_shard_t, FakeReplica>& backfill_targets;
+ // sorry, this is duplicative but that's the interface
+ std::set<pg_shard_t> backfill_targets_as_set;
+
+ PeeringFacade(FakePrimary& backfill_source,
+ std::map<pg_shard_t, FakeReplica>& backfill_targets)
+ : backfill_source(backfill_source),
+ backfill_targets(backfill_targets) {
+ std::transform(
+ std::begin(backfill_targets), std::end(backfill_targets),
+ std::inserter(backfill_targets_as_set, std::end(backfill_targets_as_set)),
+ [](auto pair) {
+ return pair.first;
+ });
+ }
+
+ hobject_t earliest_backfill() const override {
+ hobject_t e = hobject_t::get_max();
+ for (const auto& kv : backfill_targets) {
+ e = std::min(kv.second.last_backfill, e);
+ }
+ return e;
+ }
+ const std::set<pg_shard_t>& get_backfill_targets() const override {
+ return backfill_targets_as_set;
+ }
+ const hobject_t& get_peer_last_backfill(pg_shard_t peer) const override {
+ return backfill_targets.at(peer).last_backfill;
+ }
+ const eversion_t& get_last_update() const override {
+ return backfill_source.last_update;
+ }
+ const eversion_t& get_log_tail() const override {
+ return backfill_source.log_tail;
+ }
+
+ void scan_log_after(eversion_t, scan_log_func_t) const override {
+ /* NOP */
+ }
+
+ bool is_backfill_target(pg_shard_t peer) const override {
+ return backfill_targets.count(peer) == 1;
+ }
+ void update_complete_backfill_object_stats(const hobject_t &hoid,
+ const pg_stat_t &stats) override {
+ }
+ bool is_backfilling() const override {
+ return true;
+ }
+};
+
+struct BackfillFixture::PGFacade : public crimson::osd::BackfillState::PGFacade {
+ FakePrimary& backfill_source;
+
+ PGFacade(FakePrimary& backfill_source)
+ : backfill_source(backfill_source) {
+ }
+
+ const eversion_t& get_projected_last_update() const override {
+ return backfill_source.projected_last_update;
+ }
+};
+
+BackfillFixture::BackfillFixture(
+ FakePrimary&& backfill_source,
+ std::map<pg_shard_t, FakeReplica>&& backfill_targets)
+ : backfill_source(std::move(backfill_source)),
+ backfill_targets(std::move(backfill_targets)),
+ backfill_state(*this,
+ std::make_unique<PeeringFacade>(this->backfill_source,
+ this->backfill_targets),
+ std::make_unique<PGFacade>(this->backfill_source))
+{
+ backfill_state.process_event(crimson::osd::BackfillState::Triggered{}.intrusive_from_this());
+}
+
+void BackfillFixture::request_replica_scan(
+ const pg_shard_t& target,
+ const hobject_t& begin,
+ const hobject_t& end)
+{
+ BackfillInterval bi;
+ bi.end = backfill_targets.at(target).store.list(begin, [&bi](auto kv) {
+ bi.objects.insert(std::move(kv));
+ });
+ bi.begin = begin;
+ bi.version = backfill_source.last_update;
+
+ schedule_event(crimson::osd::BackfillState::ReplicaScanned{ target, std::move(bi) });
+}
+
+void BackfillFixture::request_primary_scan(
+ const hobject_t& begin)
+{
+ BackfillInterval bi;
+ bi.end = backfill_source.store.list(begin, [&bi](auto kv) {
+ bi.objects.insert(std::move(kv));
+ });
+ bi.begin = begin;
+ bi.version = backfill_source.last_update;
+
+ schedule_event(crimson::osd::BackfillState::PrimaryScanned{ std::move(bi) });
+}
+
+void BackfillFixture::enqueue_push(
+ const hobject_t& obj,
+ const eversion_t& v)
+{
+ for (auto& [ _, bt ] : backfill_targets) {
+ bt.store.push(obj, v);
+ }
+ schedule_event(crimson::osd::BackfillState::ObjectPushed{ obj });
+}
+
+void BackfillFixture::enqueue_drop(
+ const pg_shard_t& target,
+ const hobject_t& obj,
+ const eversion_t& v)
+{
+ enqueued_drops[target].emplace_back(obj, v);
+}
+
+void BackfillFixture::maybe_flush()
+{
+ for (const auto& [target, versioned_objs] : enqueued_drops) {
+ for (const auto& [obj, v] : versioned_objs) {
+ backfill_targets.at(target).store.drop(obj, v);
+ }
+ }
+ enqueued_drops.clear();
+}
+
+void BackfillFixture::update_peers_last_backfill(
+ const hobject_t& new_last_backfill)
+{
+}
+
+bool BackfillFixture::budget_available() const
+{
+ return true;
+}
+
+struct BackfillFixtureBuilder {
+ FakeStore backfill_source;
+ std::map<pg_shard_t, FakeReplica> backfill_targets;
+
+ static BackfillFixtureBuilder add_source(FakeStore::objs_t objs) {
+ BackfillFixtureBuilder bfb;
+ bfb.backfill_source = FakeStore{ std::move(objs) };
+ return bfb;
+ }
+
+ BackfillFixtureBuilder&& add_target(FakeStore::objs_t objs) && {
+ const auto new_osd_num = std::size(backfill_targets);
+ const auto [ _, inserted ] = backfill_targets.emplace(
+ new_osd_num, FakeReplica{ FakeStore{std::move(objs)} });
+ ceph_assert(inserted);
+ return std::move(*this);
+ }
+
+ BackfillFixture get_result() && {
+ return BackfillFixture{ std::move(backfill_source),
+ std::move(backfill_targets) };
+ }
+};
+
+// The straightest case: single primary, single replica. All have the same
+// content in their object stores, so the entire backfill boils into just
+// `request_primary_scan()` and `request_replica_scan()`.
+TEST(backfill, same_primary_same_replica)
+{
+ const auto reference_store = FakeStore{ {
+ { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
+ { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} },
+ { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} },
+ }};
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ reference_store.objs
+ ).get_result();
+
+ cluster_fixture.next_round();
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.next_round();
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
+
+TEST(backfill, one_empty_replica)
+{
+ const auto reference_store = FakeStore{ {
+ { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
+ { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} },
+ { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} },
+ }};
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ { /* nothing */ }
+ ).get_result();
+
+ cluster_fixture.next_round();
+ cluster_fixture.next_round();
+ cluster_fixture.next_round(2);
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.next_round();
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
+
+TEST(backfill, two_empty_replicas)
+{
+ const auto reference_store = FakeStore{ {
+ { "1:00058bcc:::rbd_data.1018ac3e755.00000000000000d5:head", {10, 234} },
+ { "1:00ed7f8e:::rbd_data.1018ac3e755.00000000000000af:head", {10, 196} },
+ { "1:01483aea:::rbd_data.1018ac3e755.0000000000000095:head", {10, 169} },
+ }};
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ { /* nothing 1 */ }
+ ).add_target(
+ { /* nothing 2 */ }
+ ).get_result();
+
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.next_till_done();
+
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
+
+namespace StoreRandomizer {
+ // FIXME: copied & pasted from test/test_snap_mapper.cc. We need to
+ // find a way to avoid code duplication in test. A static library?
+ std::string random_string(std::size_t size) {
+ std::string name;
+ for (size_t j = 0; j < size; ++j) {
+ name.push_back('a' + (std::rand() % 26));
+ }
+ return name;
+ }
+
+ hobject_t random_hobject() {
+ uint32_t mask{0};
+ uint32_t bits{0};
+ return hobject_t(
+ random_string(1+(std::rand() % 16)),
+ random_string(1+(std::rand() % 16)),
+ snapid_t(std::rand() % 1000),
+ (std::rand() & ((~0)<<bits)) | (mask & ~((~0)<<bits)),
+ 0, random_string(std::rand() % 16));
+ }
+
+ eversion_t random_eversion() {
+ return eversion_t{ std::rand() % 512U, std::rand() % 256UL };
+ }
+
+ FakeStore create() {
+ FakeStore store;
+ for (std::size_t i = std::rand() % 2048; i > 0; --i) {
+ store.push(random_hobject(), random_eversion());
+ }
+ return store;
+ }
+
+ template <class... Args>
+ void execute_random(Args&&... args) {
+ std::array<std::function<void()>, sizeof...(Args)> funcs = {
+ std::forward<Args>(args)...
+ };
+ return std::move(funcs[std::rand() % std::size(funcs)])();
+ }
+
+ FakeStore mutate(const FakeStore& source_store) {
+ FakeStore mutated_store;
+ source_store.list(hobject_t{}, [&] (const auto& kv) {
+ const auto &oid = kv.first;
+ const auto &version = kv.second;
+ execute_random(
+ [] { /* just drop the entry */ },
+ [&] { mutated_store.push(oid, version); },
+ [&] { mutated_store.push(oid, random_eversion()); },
+ [&] { mutated_store.push(random_hobject(), version); },
+ [&] {
+ for (auto how_many = std::rand() % 8; how_many > 0; --how_many) {
+ mutated_store.push(random_hobject(), random_eversion());
+ }
+ }
+ );
+ });
+ return mutated_store;
+ }
+}
+
+// The name might suggest randomness is involved here. Well, that's true
+// but till we know the seed the test still is repeatable.
+TEST(backfill, one_pseudorandomized_replica)
+{
+ const auto reference_store = StoreRandomizer::create();
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ StoreRandomizer::mutate(reference_store).objs
+ ).get_result();
+
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.next_till_done();
+
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
+
+TEST(backfill, two_pseudorandomized_replicas)
+{
+ const auto reference_store = StoreRandomizer::create();
+ auto cluster_fixture = BackfillFixtureBuilder::add_source(
+ reference_store.objs
+ ).add_target(
+ StoreRandomizer::mutate(reference_store).objs
+ ).add_target(
+ StoreRandomizer::mutate(reference_store).objs
+ ).get_result();
+
+ EXPECT_CALL(cluster_fixture, backfilled);
+ cluster_fixture.next_till_done();
+
+ EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));
+}
diff --git a/src/test/crimson/test_buffer.cc b/src/test/crimson/test_buffer.cc
new file mode 100644
index 000000000..64a815bd2
--- /dev/null
+++ b/src/test/crimson/test_buffer.cc
@@ -0,0 +1,50 @@
+#include <iostream>
+#include <seastar/core/app-template.hh>
+#include <seastar/core/future-util.hh>
+#include <seastar/core/reactor.hh>
+#include "include/buffer.h"
+
+// allocate a foreign buffer on each cpu, collect them all into a bufferlist,
+// and destruct it on this cpu
+seastar::future<> test_foreign_bufferlist()
+{
+ auto make_foreign_buffer = [] (unsigned cpu) {
+ return seastar::smp::submit_to(cpu, [=] {
+ bufferlist bl;
+ seastar::temporary_buffer<char> buf("abcd", 4);
+ bl.append(buffer::create(std::move(buf)));
+ return bl;
+ });
+ };
+ auto reduce = [] (bufferlist&& lhs, bufferlist&& rhs) {
+ bufferlist bl;
+ bl.claim_append(lhs);
+ bl.claim_append(rhs);
+ return bl;
+ };
+ return seastar::map_reduce(seastar::smp::all_cpus(), make_foreign_buffer,
+ bufferlist(), reduce).then(
+ [] (bufferlist&& bl) {
+ if (bl.length() != 4 * seastar::smp::count) {
+ auto e = std::make_exception_ptr(std::runtime_error("wrong buffer size"));
+ return seastar::make_exception_future<>(e);
+ }
+ bl.clear();
+ return seastar::make_ready_future<>();
+ });
+}
+
+int main(int argc, char** argv)
+{
+ seastar::app_template app;
+ return app.run(argc, argv, [] {
+ return seastar::now().then(
+ &test_foreign_bufferlist
+ ).then([] {
+ std::cout << "All tests succeeded" << std::endl;
+ }).handle_exception([] (auto eptr) {
+ std::cout << "Test failure" << std::endl;
+ return seastar::make_exception_future<>(eptr);
+ });
+ });
+}
diff --git a/src/test/crimson/test_config.cc b/src/test/crimson/test_config.cc
new file mode 100644
index 000000000..7541c0931
--- /dev/null
+++ b/src/test/crimson/test_config.cc
@@ -0,0 +1,109 @@
+#include <chrono>
+#include <string>
+#include <numeric>
+#include <seastar/core/app-template.hh>
+#include <seastar/core/sharded.hh>
+#include "common/ceph_argparse.h"
+#include "common/config_obs.h"
+#include "crimson/common/config_proxy.h"
+
+using namespace std::literals;
+using Config = crimson::common::ConfigProxy;
+const std::string test_uint_option = "osd_max_pgls";
+const uint64_t INVALID_VALUE = (uint64_t)(-1);
+const uint64_t EXPECTED_VALUE = 42;
+
+class ConfigObs : public ceph::md_config_obs_impl<Config> {
+ uint64_t last_change = INVALID_VALUE;
+ uint64_t num_changes = 0;
+
+ const char** get_tracked_conf_keys() const override {
+ static const char* keys[] = {
+ test_uint_option.c_str(),
+ nullptr,
+ };
+ return keys;
+ }
+ void handle_conf_change(const Config& conf,
+ const std::set <std::string> &changes) override{
+ if (changes.count(test_uint_option)) {
+ last_change = conf.get_val<uint64_t>(test_uint_option);
+ num_changes += 1;
+ }
+ }
+public:
+ ConfigObs() {
+ crimson::common::local_conf().add_observer(this);
+ }
+
+ uint64_t get_last_change() const { return last_change; }
+ uint64_t get_num_changes() const { return num_changes; }
+ seastar::future<> stop() {
+ crimson::common::local_conf().remove_observer(this);
+ return seastar::make_ready_future<>();
+ }
+};
+
+seastar::sharded<ConfigObs> sharded_cobs;
+
+static seastar::future<> test_config()
+{
+ return crimson::common::sharded_conf().start(EntityName{}, "ceph"sv).then([] {
+ std::vector<const char*> args;
+ std::string cluster;
+ std::string conf_file_list;
+ auto init_params = ceph_argparse_early_args(args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ &cluster,
+ &conf_file_list);
+ auto& conf = crimson::common::local_conf();
+ conf->name = init_params.name;
+ conf->cluster = cluster;
+ return conf.parse_config_files(conf_file_list);
+ }).then([] {
+ return crimson::common::sharded_conf().invoke_on(0, &Config::start);
+ }).then([] {
+ return sharded_cobs.start();
+ }).then([] {
+ auto& conf = crimson::common::local_conf();
+ return conf.set_val(test_uint_option, std::to_string(EXPECTED_VALUE));
+ }).then([] {
+ return crimson::common::sharded_conf().invoke_on_all([](Config& config) {
+ if (config.get_val<uint64_t>(test_uint_option) != EXPECTED_VALUE) {
+ throw std::runtime_error("configurations don't match");
+ }
+ if (sharded_cobs.local().get_last_change() != EXPECTED_VALUE) {
+ throw std::runtime_error("last applied changes don't match the latest config");
+ }
+ if (sharded_cobs.local().get_num_changes() != 1) {
+ throw std::runtime_error("num changes don't match actual changes");
+ }
+ });
+ }).finally([] {
+ return sharded_cobs.stop();
+ }).finally([] {
+ return crimson::common::sharded_conf().stop();
+ });
+}
+
+int main(int argc, char** argv)
+{
+ seastar::app_template app;
+ return app.run(argc, argv, [&] {
+ return test_config().then([] {
+ std::cout << "All tests succeeded" << std::endl;
+ }).handle_exception([] (auto eptr) {
+ std::cout << "Test failure" << std::endl;
+ return seastar::make_exception_future<>(eptr);
+ });
+ });
+}
+
+
+/*
+ * Local Variables:
+ * compile-command: "make -j4 \
+ * -C ../../../build \
+ * unittest_seastar_config"
+ * End:
+ */
diff --git a/src/test/crimson/test_denc.cc b/src/test/crimson/test_denc.cc
new file mode 100644
index 000000000..10ebd6dce
--- /dev/null
+++ b/src/test/crimson/test_denc.cc
@@ -0,0 +1,53 @@
+#include <string>
+#include <seastar/core/temporary_buffer.hh>
+#include <gtest/gtest.h>
+#include "include/denc.h"
+#include "common/buffer_seastar.h"
+
+using temporary_buffer = seastar::temporary_buffer<char>;
+using buffer_iterator = seastar_buffer_iterator;
+using const_buffer_iterator = const_seastar_buffer_iterator;
+
+template<typename T>
+void test_denc(T v) {
+ // estimate
+ size_t s = 0;
+ denc(v, s);
+ ASSERT_NE(s, 0u);
+
+ // encode
+ temporary_buffer buf{s};
+ buffer_iterator enc{buf};
+ denc(v, enc);
+ size_t len = enc.get() - buf.begin();
+ ASSERT_LE(len, s);
+
+ // decode
+ T out;
+ temporary_buffer encoded = buf.share();
+ encoded.trim(len);
+ const_buffer_iterator dec{encoded};
+ denc(out, dec);
+ ASSERT_EQ(v, out);
+ ASSERT_EQ(dec.get(), enc.get());
+}
+
+TEST(denc, simple)
+{
+ test_denc((uint8_t)4);
+ test_denc((int8_t)-5);
+ test_denc((uint16_t)6);
+ test_denc((int16_t)-7);
+ test_denc((uint32_t)8);
+ test_denc((int32_t)-9);
+ test_denc((uint64_t)10);
+ test_denc((int64_t)-11);
+}
+
+TEST(denc, string)
+{
+ std::string a, b("hi"), c("multi\nline\n");
+ test_denc(a);
+ test_denc(b);
+ test_denc(c);
+}
diff --git a/src/test/crimson/test_errorator.cc b/src/test/crimson/test_errorator.cc
new file mode 100644
index 000000000..939c6cde8
--- /dev/null
+++ b/src/test/crimson/test_errorator.cc
@@ -0,0 +1,99 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <boost/iterator/counting_iterator.hpp>
+#include <numeric>
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "crimson/common/errorator.h"
+#include "crimson/common/errorator-loop.h"
+#include "crimson/common/log.h"
+#include "seastar/core/sleep.hh"
+
+struct errorator_test_t : public seastar_test_suite_t {
+ using ertr = crimson::errorator<crimson::ct_error::invarg>;
+ ertr::future<> test_do_until() {
+ return crimson::repeat([i=0]() mutable {
+ if (i < 5) {
+ ++i;
+ return ertr::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::no);
+ } else {
+ return ertr::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::yes);
+ }
+ });
+ }
+ static constexpr int SIZE = 42;
+ ertr::future<> test_parallel_for_each() {
+ auto sum = std::make_unique<int>(0);
+ return ertr::parallel_for_each(
+ boost::make_counting_iterator(0),
+ boost::make_counting_iterator(SIZE),
+ [sum=sum.get()](int i) {
+ *sum += i;
+ }).safe_then([sum=std::move(sum)] {
+ int expected = std::accumulate(boost::make_counting_iterator(0),
+ boost::make_counting_iterator(SIZE),
+ 0);
+ ASSERT_EQ(*sum, expected);
+ });
+ }
+ struct noncopyable_t {
+ constexpr noncopyable_t() = default;
+ ~noncopyable_t() = default;
+ noncopyable_t(noncopyable_t&&) = default;
+ private:
+ noncopyable_t(const noncopyable_t&) = delete;
+ noncopyable_t& operator=(const noncopyable_t&) = delete;
+ };
+ ertr::future<> test_non_copy_then() {
+ return create_noncopyable().safe_then([](auto t) {
+ return ertr::now();
+ });
+ }
+ ertr::future<int> test_futurization() {
+ // we don't want to be enforced to always do `make_ready_future(...)`.
+ // as in seastar::future, the futurization should take care about
+ // turning non-future types (e.g. int) into futurized ones (e.g.
+ // ertr::future<int>).
+ return ertr::now().safe_then([] {
+ return 42;
+ }).safe_then([](int life) {
+ return ertr::make_ready_future<int>(life);
+ });
+ }
+private:
+ ertr::future<noncopyable_t> create_noncopyable() {
+ return ertr::make_ready_future<noncopyable_t>();
+ }
+};
+
+TEST_F(errorator_test_t, basic)
+{
+ run_async([this] {
+ test_do_until().unsafe_get0();
+ });
+}
+
+TEST_F(errorator_test_t, parallel_for_each)
+{
+ run_async([this] {
+ test_parallel_for_each().unsafe_get0();
+ });
+}
+
+TEST_F(errorator_test_t, non_copy_then)
+{
+ run_async([this] {
+ test_non_copy_then().unsafe_get0();
+ });
+}
+
+TEST_F(errorator_test_t, test_futurization)
+{
+ run_async([this] {
+ test_futurization().unsafe_get0();
+ });
+}
diff --git a/src/test/crimson/test_fixed_kv_node_layout.cc b/src/test/crimson/test_fixed_kv_node_layout.cc
new file mode 100644
index 000000000..e6377ec14
--- /dev/null
+++ b/src/test/crimson/test_fixed_kv_node_layout.cc
@@ -0,0 +1,376 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <stdio.h>
+#include <iostream>
+
+#include "gtest/gtest.h"
+
+#include "crimson/common/fixed_kv_node_layout.h"
+
+using namespace crimson;
+using namespace crimson::common;
+
+struct test_val_t {
+ uint32_t t1 = 0;
+ int32_t t2 = 0;
+
+ bool operator==(const test_val_t &rhs) const {
+ return rhs.t1 == t1 && rhs.t2 == t2;
+ }
+ bool operator!=(const test_val_t &rhs) const {
+ return !(*this == rhs);
+ }
+};
+
+struct test_val_le_t {
+ ceph_le32 t1{0};
+ ceph_les32 t2{0};
+
+ test_val_le_t() = default;
+ test_val_le_t(const test_val_le_t &) = default;
+ test_val_le_t(const test_val_t &nv)
+ : t1(nv.t1), t2(nv.t2) {}
+
+ operator test_val_t() const {
+ return test_val_t{t1, t2};
+ }
+};
+
+struct test_meta_t {
+ uint32_t t1 = 0;
+ uint32_t t2 = 0;
+
+ bool operator==(const test_meta_t &rhs) const {
+ return rhs.t1 == t1 && rhs.t2 == t2;
+ }
+ bool operator!=(const test_meta_t &rhs) const {
+ return !(*this == rhs);
+ }
+
+ std::pair<test_meta_t, test_meta_t> split_into(uint32_t pivot) const {
+ return std::make_pair(
+ test_meta_t{t1, pivot},
+ test_meta_t{pivot, t2});
+ }
+
+ static test_meta_t merge_from(const test_meta_t &lhs, const test_meta_t &rhs) {
+ return test_meta_t{lhs.t1, rhs.t2};
+ }
+
+ static std::pair<test_meta_t, test_meta_t>
+ rebalance(const test_meta_t &lhs, const test_meta_t &rhs, uint32_t pivot) {
+ return std::make_pair(
+ test_meta_t{lhs.t1, pivot},
+ test_meta_t{pivot, rhs.t2});
+ }
+};
+
+struct test_meta_le_t {
+ ceph_le32 t1{0};
+ ceph_le32 t2{0};
+
+ test_meta_le_t() = default;
+ test_meta_le_t(const test_meta_le_t &) = default;
+ test_meta_le_t(const test_meta_t &nv)
+ : t1(nv.t1), t2(nv.t2) {}
+
+ operator test_meta_t() const {
+ return test_meta_t{t1, t2};
+ }
+};
+
+constexpr size_t CAPACITY = 339;
+
+struct TestNode : FixedKVNodeLayout<
+ CAPACITY,
+ test_meta_t, test_meta_le_t,
+ uint32_t, ceph_le32,
+ test_val_t, test_val_le_t> {
+ char buf[4096];
+ TestNode() : FixedKVNodeLayout(buf) {
+ memset(buf, 0, sizeof(buf));
+ set_meta({0, std::numeric_limits<uint32_t>::max()});
+ }
+ TestNode(const TestNode &rhs)
+ : FixedKVNodeLayout(buf) {
+ ::memcpy(buf, rhs.buf, sizeof(buf));
+ }
+
+ TestNode &operator=(const TestNode &rhs) {
+ memcpy(buf, rhs.buf, sizeof(buf));
+ return *this;
+ }
+};
+
+TEST(FixedKVNodeTest, basic) {
+ auto node = TestNode();
+ ASSERT_EQ(node.get_size(), 0);
+
+ auto val = test_val_t{ 1, 1 };
+ node.journal_insert(node.begin(), 1, val, nullptr);
+ ASSERT_EQ(node.get_size(), 1);
+
+ auto iter = node.begin();
+ ASSERT_EQ(iter.get_key(), 1);
+ ASSERT_EQ(val, iter.get_val());
+
+ ASSERT_EQ(std::numeric_limits<uint32_t>::max(), iter.get_next_key_or_max());
+}
+
+TEST(FixedKVNodeTest, at_capacity) {
+ auto node = TestNode();
+ ASSERT_EQ(CAPACITY, node.get_capacity());
+
+ ASSERT_EQ(node.get_size(), 0);
+
+ unsigned short num = 0;
+ auto iter = node.begin();
+ while (num < CAPACITY) {
+ node.journal_insert(iter, num, test_val_t{num, num}, nullptr);
+ ++num;
+ ++iter;
+ }
+ ASSERT_EQ(node.get_size(), CAPACITY);
+
+ num = 0;
+ for (auto &i : node) {
+ ASSERT_EQ(i.get_key(), num);
+ ASSERT_EQ(i.get_val(), (test_val_t{num, num}));
+ if (num < (CAPACITY - 1)) {
+ ASSERT_EQ(i.get_next_key_or_max(), num + 1);
+ } else {
+ ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max());
+ }
+ ++num;
+ }
+}
+
+TEST(FixedKVNodeTest, split) {
+ auto node = TestNode();
+
+ ASSERT_EQ(node.get_size(), 0);
+
+ unsigned short num = 0;
+ auto iter = node.begin();
+ while (num < CAPACITY) {
+ node.journal_insert(iter, num, test_val_t{num, num}, nullptr);
+ ++num;
+ ++iter;
+ }
+ ASSERT_EQ(node.get_size(), CAPACITY);
+
+ auto split_left = TestNode();
+ auto split_right = TestNode();
+ node.split_into(split_left, split_right);
+
+ ASSERT_EQ(split_left.get_size() + split_right.get_size(), CAPACITY);
+ ASSERT_EQ(split_left.get_meta().t1, split_left.begin()->get_key());
+ ASSERT_EQ(split_left.get_meta().t2, split_right.get_meta().t1);
+ ASSERT_EQ(split_right.get_meta().t2, std::numeric_limits<uint32_t>::max());
+
+ num = 0;
+ for (auto &i : split_left) {
+ ASSERT_EQ(i.get_key(), num);
+ ASSERT_EQ(i.get_val(), (test_val_t{num, num}));
+ if (num < split_left.get_size() - 1) {
+ ASSERT_EQ(i.get_next_key_or_max(), num + 1);
+ } else {
+ ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max());
+ }
+ ++num;
+ }
+ for (auto &i : split_right) {
+ ASSERT_EQ(i.get_key(), num);
+ ASSERT_EQ(i.get_val(), (test_val_t{num, num}));
+ if (num < CAPACITY - 1) {
+ ASSERT_EQ(i.get_next_key_or_max(), num + 1);
+ } else {
+ ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max());
+ }
+ ++num;
+ }
+ ASSERT_EQ(num, CAPACITY);
+}
+
+TEST(FixedKVNodeTest, merge) {
+ auto node = TestNode();
+ auto node2 = TestNode();
+
+ ASSERT_EQ(node.get_size(), 0);
+ ASSERT_EQ(node2.get_size(), 0);
+
+ unsigned short num = 0;
+ auto iter = node.begin();
+ while (num < CAPACITY/2) {
+ node.journal_insert(iter, num, test_val_t{num, num}, nullptr);
+ ++num;
+ ++iter;
+ }
+ node.set_meta({0, num});
+ node2.set_meta({num, std::numeric_limits<uint32_t>::max()});
+ iter = node2.begin();
+ while (num < (2 * (CAPACITY / 2))) {
+ node2.journal_insert(iter, num, test_val_t{num, num}, nullptr);
+ ++num;
+ ++iter;
+ }
+
+ ASSERT_EQ(node.get_size(), CAPACITY / 2);
+ ASSERT_EQ(node2.get_size(), CAPACITY / 2);
+
+ auto total = node.get_size() + node2.get_size();
+
+ auto node_merged = TestNode();
+ node_merged.merge_from(node, node2);
+
+ ASSERT_EQ(
+ node_merged.get_meta(),
+ (test_meta_t{0, std::numeric_limits<uint32_t>::max()}));
+
+ ASSERT_EQ(node_merged.get_size(), total);
+ num = 0;
+ for (auto &i : node_merged) {
+ ASSERT_EQ(i.get_key(), num);
+ ASSERT_EQ(i.get_val(), (test_val_t{num, num}));
+ if (num < node_merged.get_size() - 1) {
+ ASSERT_EQ(i.get_next_key_or_max(), num + 1);
+ } else {
+ ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max());
+ }
+ ++num;
+ }
+ ASSERT_EQ(num, total);
+}
+
+void run_balance_test(unsigned left, unsigned right, bool prefer_left)
+{
+ auto node = TestNode();
+ auto node2 = TestNode();
+
+ ASSERT_EQ(node.get_size(), 0);
+ ASSERT_EQ(node2.get_size(), 0);
+
+ unsigned short num = 0;
+ auto iter = node.begin();
+ while (num < left) {
+ node.journal_insert(iter, num, test_val_t{num, num}, nullptr);
+ ++num;
+ ++iter;
+ }
+ node.set_meta({0, num});
+ node2.set_meta({num, std::numeric_limits<uint32_t>::max()});
+ iter = node2.begin();
+ while (num < (left + right)) {
+ node2.journal_insert(iter, num, test_val_t{num, num}, nullptr);
+ ++num;
+ ++iter;
+ }
+
+ ASSERT_EQ(node.get_size(), left);
+ ASSERT_EQ(node2.get_size(), right);
+
+ auto total = node.get_size() + node2.get_size();
+
+ auto node_balanced = TestNode();
+ auto node_balanced2 = TestNode();
+ auto pivot = TestNode::balance_into_new_nodes(
+ node,
+ node2,
+ prefer_left,
+ node_balanced,
+ node_balanced2);
+
+ ASSERT_EQ(total, node_balanced.get_size() + node_balanced2.get_size());
+
+ unsigned left_size, right_size;
+ if (total % 2) {
+ if (prefer_left) {
+ left_size = (total/2) + 1;
+ right_size = total/2;
+ } else {
+ left_size = total/2;
+ right_size = (total/2) + 1;
+ }
+ } else {
+ left_size = right_size = total/2;
+ }
+ ASSERT_EQ(pivot, left_size);
+ ASSERT_EQ(left_size, node_balanced.get_size());
+ ASSERT_EQ(right_size, node_balanced2.get_size());
+
+ ASSERT_EQ(
+ node_balanced.get_meta(),
+ (test_meta_t{0, left_size}));
+ ASSERT_EQ(
+ node_balanced2.get_meta(),
+ (test_meta_t{left_size, std::numeric_limits<uint32_t>::max()}));
+
+ num = 0;
+ for (auto &i: node_balanced) {
+ ASSERT_EQ(i.get_key(), num);
+ ASSERT_EQ(i.get_val(), (test_val_t{num, num}));
+ if (num < node_balanced.get_size() - 1) {
+ ASSERT_EQ(i.get_next_key_or_max(), num + 1);
+ } else {
+ ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max());
+ }
+ ++num;
+ }
+ for (auto &i: node_balanced2) {
+ ASSERT_EQ(i.get_key(), num);
+ ASSERT_EQ(i.get_val(), (test_val_t{num, num}));
+ if (num < total - 1) {
+ ASSERT_EQ(i.get_next_key_or_max(), num + 1);
+ } else {
+ ASSERT_EQ(std::numeric_limits<uint32_t>::max(), i.get_next_key_or_max());
+ }
+ ++num;
+ }
+}
+
+TEST(FixedKVNodeTest, balanced) {
+ run_balance_test(CAPACITY / 2, CAPACITY, true);
+ run_balance_test(CAPACITY / 2, CAPACITY, false);
+ run_balance_test(CAPACITY, CAPACITY / 2, true);
+ run_balance_test(CAPACITY, CAPACITY / 2, false);
+ run_balance_test(CAPACITY - 1, CAPACITY / 2, true);
+ run_balance_test(CAPACITY / 2, CAPACITY - 1, false);
+ run_balance_test(CAPACITY / 2, CAPACITY / 2, false);
+}
+
+void run_replay_test(
+ std::vector<std::function<void(TestNode&, TestNode::delta_buffer_t&)>> &&f
+) {
+ TestNode node;
+ for (unsigned i = 0; i < f.size(); ++i) {
+ TestNode::delta_buffer_t buf;
+ TestNode replayed = node;
+ f[i](node, buf);
+ buf.replay(replayed);
+ ASSERT_EQ(node.get_size(), replayed.get_size());
+ ASSERT_EQ(node, replayed);
+ }
+}
+
+TEST(FixedKVNodeTest, replay) {
+ run_replay_test({
+ [](auto &n, auto &b) {
+ n.journal_insert(n.lower_bound(1), 1, test_val_t{1, 1}, &b);
+ ASSERT_EQ(1, n.get_size());
+ },
+ [](auto &n, auto &b) {
+ n.journal_insert(n.lower_bound(3), 3, test_val_t{1, 2}, &b);
+ ASSERT_EQ(2, n.get_size());
+ },
+ [](auto &n, auto &b) {
+ n.journal_remove(n.find(3), &b);
+ ASSERT_EQ(1, n.get_size());
+ },
+ [](auto &n, auto &b) {
+ n.journal_insert(n.lower_bound(2), 2, test_val_t{5, 1}, &b);
+ ASSERT_EQ(2, n.get_size());
+ }
+ });
+
+}
diff --git a/src/test/crimson/test_interruptible_future.cc b/src/test/crimson/test_interruptible_future.cc
new file mode 100644
index 000000000..bb938de24
--- /dev/null
+++ b/src/test/crimson/test_interruptible_future.cc
@@ -0,0 +1,301 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <seastar/core/sleep.hh>
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "crimson/common/interruptible_future.h"
+#include "crimson/common/log.h"
+
+using namespace crimson;
+
+class test_interruption : public std::exception
+{};
+
+class TestInterruptCondition {
+public:
+ TestInterruptCondition(bool interrupt)
+ : interrupt(interrupt) {}
+
+ template <typename T>
+ std::optional<T> may_interrupt() {
+ if (interrupt) {
+ return seastar::futurize<T>::make_exception_future(test_interruption());
+ } else {
+ return std::optional<T>();
+ }
+ }
+
+ template <typename T>
+ static constexpr bool is_interruption_v = std::is_same_v<T, test_interruption>;
+
+ static bool is_interruption(std::exception_ptr& eptr) {
+ if (*eptr.__cxa_exception_type() == typeid(test_interruption))
+ return true;
+ return false;
+ }
+private:
+ bool interrupt = false;
+};
+
+namespace crimson::interruptible {
+template
+thread_local interrupt_cond_t<TestInterruptCondition>
+interrupt_cond<TestInterruptCondition>;
+}
+
+TEST_F(seastar_test_suite_t, basic)
+{
+ using interruptor =
+ interruptible::interruptor<TestInterruptCondition>;
+ run_async([] {
+ interruptor::with_interruption(
+ [] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return interruptor::make_interruptible(seastar::now())
+ .then_interruptible([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ }).then_interruptible([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return errorator<ct_error::enoent>::make_ready_future<>();
+ }).safe_then_interruptible([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return seastar::now();
+ }, errorator<ct_error::enoent>::all_same_way([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ })
+ );
+ }, [](std::exception_ptr) {}, false).get0();
+
+ interruptor::with_interruption(
+ [] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return interruptor::make_interruptible(seastar::now())
+ .then_interruptible([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ });
+ }, [](std::exception_ptr) {
+ ceph_assert(!interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return seastar::now();
+ }, true).get0();
+
+
+ });
+}
+
+TEST_F(seastar_test_suite_t, loops)
+{
+ using interruptor =
+ interruptible::interruptor<TestInterruptCondition>;
+ std::cout << "testing interruptible loops" << std::endl;
+ run_async([] {
+ std::cout << "beginning" << std::endl;
+ interruptor::with_interruption(
+ [] {
+ std::cout << "interruptiion enabled" << std::endl;
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return interruptor::make_interruptible(seastar::now())
+ .then_interruptible([] {
+ std::cout << "test seastar future do_for_each" << std::endl;
+ std::vector<int> vec = {1, 2};
+ return seastar::do_with(std::move(vec), [](auto& vec) {
+ return interruptor::do_for_each(std::begin(vec), std::end(vec), [](int) {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return seastar::now();
+ });
+ });
+ }).then_interruptible([] {
+ std::cout << "test interruptible seastar future do_for_each" << std::endl;
+ std::vector<int> vec = {1, 2};
+ return seastar::do_with(std::move(vec), [](auto& vec) {
+ return interruptor::do_for_each(std::begin(vec), std::end(vec), [](int) {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return interruptor::make_interruptible(seastar::now());
+ });
+ });
+ }).then_interruptible([] {
+ std::cout << "test seastar future repeat" << std::endl;
+ return interruptor::repeat([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return interruptor::make_interruptible(
+ seastar::make_ready_future<
+ seastar::stop_iteration>(
+ seastar::stop_iteration::yes));
+ });
+ }).then_interruptible([] {
+ std::cout << "test interruptible seastar future repeat" << std::endl;
+ return interruptor::repeat([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return seastar::make_ready_future<
+ seastar::stop_iteration>(
+ seastar::stop_iteration::yes);
+ });
+ }).then_interruptible([] {
+ std::cout << "test interruptible errorated future do_for_each" << std::endl;
+ std::vector<int> vec = {1, 2};
+ return seastar::do_with(std::move(vec), [](auto& vec) {
+ using namespace std::chrono_literals;
+ return interruptor::make_interruptible(seastar::now()).then_interruptible([&vec] {
+ return interruptor::do_for_each(std::begin(vec), std::end(vec), [](int) {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return interruptor::make_interruptible(
+ errorator<ct_error::enoent>::make_ready_future<>());
+ }).safe_then_interruptible([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return seastar::now();
+ }, errorator<ct_error::enoent>::all_same_way([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ }));
+ });
+ });
+ }).then_interruptible([] {
+ std::cout << "test errorated future do_for_each" << std::endl;
+ std::vector<int> vec;
+ // set a big enough iteration times to test if there is stack overflow in do_for_each
+ for (int i = 0; i < 1000000; i++) {
+ vec.push_back(i);
+ }
+ return seastar::do_with(std::move(vec), [](auto& vec) {
+ using namespace std::chrono_literals;
+ return interruptor::make_interruptible(seastar::now()).then_interruptible([&vec] {
+ return interruptor::do_for_each(std::begin(vec), std::end(vec), [](int) {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return errorator<ct_error::enoent>::make_ready_future<>();
+ }).safe_then_interruptible([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return seastar::now();
+ }, errorator<ct_error::enoent>::all_same_way([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ }));
+ });
+ });
+ }).then_interruptible([] {
+ ceph_assert(interruptible::interrupt_cond<TestInterruptCondition>.interrupt_cond);
+ return seastar::now();
+ });
+ }, [](std::exception_ptr) {}, false).get0();
+ });
+}
+
+using base_intr = interruptible::interruptor<TestInterruptCondition>;
+
+using base_ertr = errorator<ct_error::enoent, ct_error::eagain>;
+using base_iertr = interruptible::interruptible_errorator<
+ TestInterruptCondition,
+ base_ertr>;
+
+using base2_ertr = base_ertr::extend<ct_error::input_output_error>;
+using base2_iertr = interruptible::interruptible_errorator<
+ TestInterruptCondition,
+ base2_ertr>;
+
+template <typename F>
+auto with_intr(F &&f) {
+ return base_intr::with_interruption_to_error<ct_error::eagain>(
+ std::forward<F>(f),
+ TestInterruptCondition(false));
+}
+
+TEST_F(seastar_test_suite_t, errorated)
+{
+ run_async([] {
+ base_ertr::future<> ret = with_intr(
+ []() {
+ return base_iertr::now();
+ }
+ );
+ ret.unsafe_get0();
+ });
+}
+
+TEST_F(seastar_test_suite_t, errorated_value)
+{
+ run_async([] {
+ base_ertr::future<int> ret = with_intr(
+ []() {
+ return base_iertr::make_ready_future<int>(
+ 1
+ );
+ });
+ EXPECT_EQ(ret.unsafe_get0(), 1);
+ });
+}
+
+TEST_F(seastar_test_suite_t, expand_errorated_value)
+{
+ run_async([] {
+ base2_ertr::future<> ret = with_intr(
+ []() {
+ return base_iertr::make_ready_future<int>(
+ 1
+ ).si_then([](auto) {
+ return base2_iertr::make_ready_future<>();
+ });
+ });
+ ret.unsafe_get0();
+ });
+}
+
+TEST_F(seastar_test_suite_t, interruptible_async)
+{
+ using interruptor =
+ interruptible::interruptor<TestInterruptCondition>;
+
+ run_async([] {
+ interruptor::with_interruption([] {
+ auto fut = interruptor::async([] {
+ interruptor::make_interruptible(
+ seastar::sleep(std::chrono::milliseconds(10))).get();
+ ceph_assert(interruptible::interrupt_cond<
+ TestInterruptCondition>.interrupt_cond);
+ ceph_assert(interruptible::interrupt_cond<
+ TestInterruptCondition>.ref_count == 1);
+ });
+ ceph_assert(interruptible::interrupt_cond<
+ TestInterruptCondition>.interrupt_cond);
+ ceph_assert(interruptible::interrupt_cond<
+ TestInterruptCondition>.ref_count == 1);
+ return fut;
+ }, [](std::exception_ptr) {}, false).get0();
+ });
+}
+
+TEST_F(seastar_test_suite_t, DISABLED_nested_interruptors)
+{
+ run_async([] {
+ base_ertr::future<> ret = with_intr(
+ []() {
+ return base_iertr::now().safe_then_interruptible([]() {
+ return with_intr(
+ []() {
+ return base_iertr::now();
+ }
+ );
+ });
+ }
+ );
+ ret.unsafe_get0();
+ });
+}
+
+#if 0
+// This seems to cause a hang in the gcc-9 linker on bionic
+TEST_F(seastar_test_suite_t, handle_error)
+{
+ run_async([] {
+ base_ertr::future<> ret = with_intr(
+ []() {
+ return base2_iertr::make_ready_future<int>(
+ 1
+ ).handle_error_interruptible(
+ base_iertr::pass_further{},
+ ct_error::assert_all{"crash on eio"}
+ ).si_then([](auto) {
+ return base_iertr::now();
+ });
+ });
+ ret.unsafe_get0();
+ });
+}
+#endif
diff --git a/src/test/crimson/test_lru.cc b/src/test/crimson/test_lru.cc
new file mode 100644
index 000000000..40ab41539
--- /dev/null
+++ b/src/test/crimson/test_lru.cc
@@ -0,0 +1,213 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
+ *
+ * Author: Loic Dachary <loic@dachary.org>
+ * Cheng Cheng <ccheng.leo@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include "gtest/gtest.h"
+#include "crimson/common/shared_lru.h"
+
+class LRUTest : public SharedLRU<unsigned int, int> {
+public:
+ auto add(unsigned int key, int value, bool* existed = nullptr) {
+ auto pv = new int{value};
+ auto ptr = insert(key, std::unique_ptr<int>{pv});
+ if (existed) {
+ *existed = (ptr.get() != pv);
+ }
+ return ptr;
+ }
+};
+
+TEST(LRU, add) {
+ LRUTest cache;
+ unsigned int key = 1;
+ int value1 = 2;
+ bool existed = false;
+ {
+ auto ptr = cache.add(key, value1, &existed);
+ ASSERT_TRUE(ptr);
+ ASSERT_TRUE(ptr.get());
+ ASSERT_EQ(value1, *ptr);
+ ASSERT_FALSE(existed);
+ }
+ {
+ auto ptr = cache.add(key, 3, &existed);
+ ASSERT_EQ(value1, *ptr);
+ ASSERT_TRUE(existed);
+ }
+}
+
+TEST(LRU, empty) {
+ LRUTest cache;
+ unsigned int key = 1;
+ bool existed = false;
+
+ ASSERT_TRUE(cache.empty());
+ {
+ int value1 = 2;
+ auto ptr = cache.add(key, value1, &existed);
+ ASSERT_EQ(value1, *ptr);
+ ASSERT_FALSE(existed);
+ }
+ ASSERT_FALSE(cache.empty());
+
+ cache.clear();
+ ASSERT_TRUE(cache.empty());
+}
+
+TEST(LRU, lookup) {
+ LRUTest cache;
+ unsigned int key = 1;
+ {
+ int value = 2;
+ auto ptr = cache.add(key, value);
+ ASSERT_TRUE(ptr);
+ ASSERT_TRUE(ptr.get());
+ ASSERT_TRUE(cache.find(key).get());
+ ASSERT_EQ(value, *cache.find(key));
+ }
+ ASSERT_TRUE(cache.find(key).get());
+}
+
+TEST(LRU, lookup_or_create) {
+ LRUTest cache;
+ {
+ int value = 2;
+ unsigned int key = 1;
+ ASSERT_TRUE(cache.add(key, value).get());
+ ASSERT_TRUE(cache[key].get());
+ ASSERT_EQ(value, *cache.find(key));
+ }
+ {
+ unsigned int key = 2;
+ ASSERT_TRUE(cache[key].get());
+ ASSERT_EQ(0, *cache.find(key));
+ }
+ ASSERT_TRUE(cache.find(1).get());
+ ASSERT_TRUE(cache.find(2).get());
+}
+
+TEST(LRU, lower_bound) {
+ LRUTest cache;
+
+ {
+ unsigned int key = 1;
+ ASSERT_FALSE(cache.lower_bound(key));
+ int value = 2;
+
+ ASSERT_TRUE(cache.add(key, value).get());
+ ASSERT_TRUE(cache.lower_bound(key).get());
+ EXPECT_EQ(value, *cache.lower_bound(key));
+ }
+}
+
+TEST(LRU, get_next) {
+
+ {
+ LRUTest cache;
+ const unsigned int key = 0;
+ EXPECT_FALSE(cache.upper_bound(key));
+ }
+ {
+ LRUTest cache;
+ const unsigned int key1 = 111;
+ auto ptr1 = cache[key1];
+ const unsigned int key2 = 222;
+ auto ptr2 = cache[key2];
+
+ auto i = cache.upper_bound(0);
+ ASSERT_TRUE(i);
+ EXPECT_EQ(i->first, key1);
+ auto j = cache.upper_bound(i->first);
+ ASSERT_TRUE(j);
+ EXPECT_EQ(j->first, key2);
+ }
+}
+
+TEST(LRU, clear) {
+ LRUTest cache;
+ unsigned int key = 1;
+ int value = 2;
+ cache.add(key, value);
+ {
+ auto found = cache.find(key);
+ ASSERT_TRUE(found);
+ ASSERT_EQ(value, *found);
+ }
+ ASSERT_TRUE(cache.find(key).get());
+ cache.clear();
+ ASSERT_FALSE(cache.find(key));
+ ASSERT_TRUE(cache.empty());
+}
+
+TEST(LRU, eviction) {
+ LRUTest cache{5};
+ bool existed;
+ // add a bunch of elements, some of them will be evicted
+ for (size_t i = 0; i < 2 * cache.capacity(); ++i) {
+ cache.add(i, i, &existed);
+ ASSERT_FALSE(existed);
+ }
+ size_t i = 0;
+ for (; i < cache.capacity(); ++i) {
+ ASSERT_FALSE(cache.find(i));
+ }
+ for (; i < 2 * cache.capacity(); ++i) {
+ ASSERT_TRUE(cache.find(i));
+ }
+}
+
+TEST(LRU, track_weak) {
+ constexpr int SIZE = 5;
+ LRUTest cache{SIZE};
+
+ bool existed = false;
+ // strong reference to keep 0 alive
+ auto ptr = cache.add(0, 0, &existed);
+ ASSERT_FALSE(existed);
+
+ // add a bunch of elements to get 0 evicted
+ for (size_t i = 1; i < 2 * cache.capacity(); ++i) {
+ cache.add(i, i, &existed);
+ ASSERT_FALSE(existed);
+ }
+ // 0 is still reachable via the cache
+ ASSERT_TRUE(cache.find(0));
+ ASSERT_TRUE(cache.find(0).get());
+ ASSERT_EQ(0, *cache.find(0));
+
+ // [0..SIZE) are evicted when adding [SIZE..2*SIZE)
+ // [SIZE..SIZE * 2) were still in the cache before accessing 0,
+ // but SIZE got evicted when accessing 0
+ ASSERT_FALSE(cache.find(SIZE-1));
+ ASSERT_FALSE(cache.find(SIZE));
+ ASSERT_TRUE(cache.find(SIZE+1));
+ ASSERT_TRUE(cache.find(SIZE+1).get());
+ ASSERT_EQ((int)SIZE+1, *cache.find(SIZE+1));
+
+ ptr.reset();
+ // 0 is still reachable, as it is now put back into LRU cache
+ ASSERT_TRUE(cache.find(0));
+}
+
+// Local Variables:
+// compile-command: "cmake --build ../../../build -j 8 --target unittest_seastar_lru && ctest -R unittest_seastar_lru # --gtest_filter=*.* --log-to-stderr=true"
+// End:
diff --git a/src/test/crimson/test_messenger.cc b/src/test/crimson/test_messenger.cc
new file mode 100644
index 000000000..a42572246
--- /dev/null
+++ b/src/test/crimson/test_messenger.cc
@@ -0,0 +1,3874 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_argparse.h"
+#include "common/ceph_time.h"
+#include "messages/MPing.h"
+#include "messages/MCommand.h"
+#include "messages/MCommandReply.h"
+#include "messages/MOSDOp.h"
+#include "messages/MOSDOpReply.h"
+#include "crimson/auth/DummyAuth.h"
+#include "crimson/common/log.h"
+#include "crimson/net/Connection.h"
+#include "crimson/net/Dispatcher.h"
+#include "crimson/net/Messenger.h"
+#include "crimson/net/Interceptor.h"
+
+#include <map>
+#include <random>
+#include <boost/program_options.hpp>
+#include <fmt/format.h>
+#include <fmt/ostream.h>
+#include <seastar/core/app-template.hh>
+#include <seastar/core/do_with.hh>
+#include <seastar/core/future-util.hh>
+#include <seastar/core/gate.hh>
+#include <seastar/core/reactor.hh>
+#include <seastar/core/sleep.hh>
+#include <seastar/core/with_timeout.hh>
+
+#include "test_messenger.h"
+
+using namespace std::chrono_literals;
+namespace bpo = boost::program_options;
+using crimson::common::local_conf;
+
+namespace {
+
+seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+}
+
+static std::random_device rd;
+static std::default_random_engine rng{rd()};
+static bool verbose = false;
+
+static entity_addr_t get_server_addr() {
+ static int port = 9030;
+ ++port;
+ entity_addr_t saddr;
+ saddr.parse("127.0.0.1", nullptr);
+ saddr.set_port(port);
+ return saddr;
+}
+
+template <typename T, typename... Args>
+seastar::future<T*> create_sharded(Args... args) {
+ // we should only construct/stop shards on #0
+ return seastar::smp::submit_to(0, [=] {
+ auto sharded_obj = seastar::make_lw_shared<seastar::sharded<T>>();
+ return sharded_obj->start(args...
+ ).then([sharded_obj] {
+ seastar::engine().at_exit([sharded_obj] {
+ return sharded_obj->stop().then([sharded_obj] {});
+ });
+ return sharded_obj.get();
+ });
+ }).then([](seastar::sharded<T> *ptr_shard) {
+ return &ptr_shard->local();
+ });
+}
+
+class ShardedGates
+ : public seastar::peering_sharded_service<ShardedGates> {
+public:
+ ShardedGates() = default;
+ ~ShardedGates() {
+ assert(gate.is_closed());
+ }
+
+ template <typename Func>
+ void dispatch_in_background(const char *what, Func &&f) {
+ std::ignore = seastar::with_gate(
+ container().local().gate, std::forward<Func>(f)
+ ).handle_exception([what](std::exception_ptr eptr) {
+ try {
+ std::rethrow_exception(eptr);
+ } catch (std::exception &e) {
+ logger().error("ShardedGates::dispatch_in_background: "
+ "{} got exxception {}", what, e.what());
+ }
+ });
+ }
+
+ seastar::future<> close() {
+ return container().invoke_on_all([](auto &local) {
+ return local.gate.close();
+ });
+ }
+
+ static seastar::future<ShardedGates*> create() {
+ return create_sharded<ShardedGates>();
+ }
+
+ // seastar::future<> stop() is intentially not implemented
+
+private:
+ seastar::gate gate;
+};
+
+static seastar::future<> test_echo(unsigned rounds,
+ double keepalive_ratio)
+{
+ struct test_state {
+ struct Server final
+ : public crimson::net::Dispatcher {
+ ShardedGates &gates;
+ crimson::net::MessengerRef msgr;
+ crimson::auth::DummyAuthClientServer dummy_auth;
+
+ Server(ShardedGates &gates) : gates{gates} {}
+
+ void ms_handle_accept(
+ crimson::net::ConnectionRef conn,
+ seastar::shard_id prv_shard,
+ bool is_replace) override {
+ logger().info("server accepted {}", *conn);
+ ceph_assert(prv_shard == seastar::this_shard_id());
+ ceph_assert(!is_replace);
+ }
+
+ std::optional<seastar::future<>> ms_dispatch(
+ crimson::net::ConnectionRef c, MessageRef m) override {
+ if (verbose) {
+ logger().info("server got {}", *m);
+ }
+ // reply with a pong
+ gates.dispatch_in_background("echo_send_pong", [c] {
+ return c->send(crimson::make_message<MPing>());
+ });
+ return {seastar::now()};
+ }
+
+ seastar::future<> init(const entity_name_t& name,
+ const std::string& lname,
+ const uint64_t nonce,
+ const entity_addr_t& addr) {
+ msgr = crimson::net::Messenger::create(
+ name, lname, nonce, false);
+ msgr->set_default_policy(crimson::net::SocketPolicy::stateless_server(0));
+ msgr->set_auth_client(&dummy_auth);
+ msgr->set_auth_server(&dummy_auth);
+ return msgr->bind(entity_addrvec_t{addr}).safe_then([this] {
+ return msgr->start({this});
+ }, crimson::net::Messenger::bind_ertr::all_same_way(
+ [addr] (const std::error_code& e) {
+ logger().error("test_echo(): "
+ "there is another instance running at {}", addr);
+ ceph_abort();
+ }));
+ }
+ seastar::future<> shutdown() {
+ ceph_assert(msgr);
+ msgr->stop();
+ return msgr->shutdown();
+ }
+ };
+
+ class Client final
+ : public crimson::net::Dispatcher,
+ public seastar::peering_sharded_service<Client> {
+ public:
+ Client(seastar::shard_id primary_sid,
+ unsigned rounds,
+ double keepalive_ratio,
+ ShardedGates *gates)
+ : primary_sid{primary_sid},
+ keepalive_dist(std::bernoulli_distribution{keepalive_ratio}),
+ rounds(rounds),
+ gates{*gates} {}
+
+ seastar::future<> init(const entity_name_t& name,
+ const std::string& lname,
+ const uint64_t nonce) {
+ assert(seastar::this_shard_id() == primary_sid);
+ msgr = crimson::net::Messenger::create(
+ name, lname, nonce, false);
+ msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0));
+ msgr->set_auth_client(&dummy_auth);
+ msgr->set_auth_server(&dummy_auth);
+ return msgr->start({this});
+ }
+
+ seastar::future<> shutdown() {
+ assert(seastar::this_shard_id() == primary_sid);
+ ceph_assert(msgr);
+ msgr->stop();
+ return msgr->shutdown();
+ }
+
+ seastar::future<> dispatch_pingpong(const entity_addr_t& peer_addr) {
+ assert(seastar::this_shard_id() == primary_sid);
+ mono_time start_time = mono_clock::now();
+ auto conn = msgr->connect(peer_addr, entity_name_t::TYPE_OSD);
+ return seastar::futurize_invoke([this, conn] {
+ return do_dispatch_pingpong(conn);
+ }).then([] {
+ // 500ms should be enough to establish the connection
+ return seastar::sleep(500ms);
+ }).then([this, conn, start_time] {
+ return container().invoke_on(
+ conn->get_shard_id(),
+ [pconn=&*conn, start_time](auto &local) {
+ assert(pconn->is_connected());
+ auto session = local.find_session(pconn);
+ std::chrono::duration<double> dur_handshake = session->connected_time - start_time;
+ std::chrono::duration<double> dur_pingpong = session->finish_time - session->connected_time;
+ logger().info("{}: handshake {}, pingpong {}",
+ *pconn, dur_handshake.count(), dur_pingpong.count());
+ }).then([conn] {});
+ });
+ }
+
+ static seastar::future<Client*> create(
+ unsigned rounds,
+ double keepalive_ratio,
+ ShardedGates *gates) {
+ return create_sharded<Client>(
+ seastar::this_shard_id(),
+ rounds,
+ keepalive_ratio,
+ gates);
+ }
+
+ private:
+ struct PingSession : public seastar::enable_shared_from_this<PingSession> {
+ unsigned count = 0u;
+ mono_time connected_time;
+ mono_time finish_time;
+ };
+ using PingSessionRef = seastar::shared_ptr<PingSession>;
+
+ void ms_handle_connect(
+ crimson::net::ConnectionRef conn,
+ seastar::shard_id prv_shard) override {
+ auto &local = container().local();
+ assert(prv_shard == seastar::this_shard_id());
+ auto session = seastar::make_shared<PingSession>();
+ auto [i, added] = local.sessions.emplace(&*conn, session);
+ std::ignore = i;
+ ceph_assert(added);
+ session->connected_time = mono_clock::now();
+ }
+
+ std::optional<seastar::future<>> ms_dispatch(
+ crimson::net::ConnectionRef c, MessageRef m) override {
+ auto &local = container().local();
+ auto session = local.find_session(&*c);
+ ++(session->count);
+ if (verbose) {
+ logger().info("client ms_dispatch {}", session->count);
+ }
+
+ if (session->count > rounds) {
+ logger().error("{}: got {} pongs, more than expected {}", *c, session->count, rounds);
+ ceph_abort();
+ } else if (session->count == rounds) {
+ logger().info("{}: finished receiving {} pongs", *c, session->count);
+ session->finish_time = mono_clock::now();
+ gates.dispatch_in_background("echo_notify_done", [c, this] {
+ return container().invoke_on(primary_sid, [pconn=&*c](auto &local) {
+ auto found = local.pending_conns.find(pconn);
+ ceph_assert(found != local.pending_conns.end());
+ found->second.set_value();
+ }).then([c] {});
+ });
+ }
+ return {seastar::now()};
+ }
+
+ PingSessionRef find_session(crimson::net::Connection *c) {
+ auto found = sessions.find(c);
+ if (found == sessions.end()) {
+ ceph_assert(false);
+ }
+ return found->second;
+ }
+
+ seastar::future<> do_dispatch_pingpong(crimson::net::ConnectionRef conn) {
+ auto [i, added] = pending_conns.emplace(&*conn, seastar::promise<>());
+ std::ignore = i;
+ ceph_assert(added);
+ return seastar::do_with(0u, 0u,
+ [this, conn](auto &count_ping, auto &count_keepalive) {
+ return seastar::do_until(
+ [this, conn, &count_ping, &count_keepalive] {
+ bool stop = (count_ping == rounds);
+ if (stop) {
+ logger().info("{}: finished sending {} pings with {} keepalives",
+ *conn, count_ping, count_keepalive);
+ }
+ return stop;
+ },
+ [this, conn, &count_ping, &count_keepalive] {
+ return seastar::repeat([this, conn, &count_ping, &count_keepalive] {
+ if (keepalive_dist(rng)) {
+ return conn->send_keepalive(
+ ).then([&count_keepalive] {
+ count_keepalive += 1;
+ return seastar::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::no);
+ });
+ } else {
+ return conn->send(crimson::make_message<MPing>()
+ ).then([&count_ping] {
+ count_ping += 1;
+ return seastar::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::yes);
+ });
+ }
+ });
+ }).then([this, conn] {
+ auto found = pending_conns.find(&*conn);
+ assert(found != pending_conns.end());
+ return found->second.get_future();
+ }
+ );
+ });
+ }
+
+ private:
+ // primary shard only
+ const seastar::shard_id primary_sid;
+ std::bernoulli_distribution keepalive_dist;
+ crimson::net::MessengerRef msgr;
+ std::map<crimson::net::Connection*, seastar::promise<>> pending_conns;
+ crimson::auth::DummyAuthClientServer dummy_auth;
+
+ // per shard
+ const unsigned rounds;
+ std::map<crimson::net::Connection*, PingSessionRef> sessions;
+ ShardedGates &gates;
+ };
+ };
+
+ logger().info("test_echo(rounds={}, keepalive_ratio={}):",
+ rounds, keepalive_ratio);
+ return ShardedGates::create(
+ ).then([rounds, keepalive_ratio](auto *gates) {
+ return seastar::when_all_succeed(
+ test_state::Client::create(rounds, keepalive_ratio, gates),
+ test_state::Client::create(rounds, keepalive_ratio, gates),
+ seastar::make_ready_future<ShardedGates*>(gates));
+ }).then_unpack([](auto *client1, auto *client2, auto *gates) {
+ auto server1 = seastar::make_shared<test_state::Server>(*gates);
+ auto server2 = seastar::make_shared<test_state::Server>(*gates);
+ // start servers and clients
+ auto addr1 = get_server_addr();
+ auto addr2 = get_server_addr();
+ addr1.set_type(entity_addr_t::TYPE_MSGR2);
+ addr2.set_type(entity_addr_t::TYPE_MSGR2);
+ return seastar::when_all_succeed(
+ server1->init(entity_name_t::OSD(0), "server1", 1, addr1),
+ server2->init(entity_name_t::OSD(1), "server2", 2, addr2),
+ client1->init(entity_name_t::OSD(2), "client1", 3),
+ client2->init(entity_name_t::OSD(3), "client2", 4)
+ // dispatch pingpoing
+ ).then_unpack([client1, client2, server1, server2] {
+ return seastar::when_all_succeed(
+ // test connecting in parallel, accepting in parallel
+ client1->dispatch_pingpong(server1->msgr->get_myaddr()),
+ client1->dispatch_pingpong(server2->msgr->get_myaddr()),
+ client2->dispatch_pingpong(server1->msgr->get_myaddr()),
+ client2->dispatch_pingpong(server2->msgr->get_myaddr()));
+ // shutdown
+ }).then_unpack([client1] {
+ logger().info("client1 shutdown...");
+ return client1->shutdown();
+ }).then([client2] {
+ logger().info("client2 shutdown...");
+ return client2->shutdown();
+ }).then([server1] {
+ logger().info("server1 shutdown...");
+ return server1->shutdown();
+ }).then([server2] {
+ logger().info("server2 shutdown...");
+ return server2->shutdown();
+ }).then([] {
+ logger().info("test_echo() done!\n");
+ }).handle_exception([](auto eptr) {
+ logger().error("test_echo() failed: got exception {}", eptr);
+ throw;
+ }).finally([gates, server1, server2] {
+ return gates->close();
+ });
+ });
+}
+
+seastar::future<> test_preemptive_shutdown() {
+ struct test_state {
+ class Server final
+ : public crimson::net::Dispatcher {
+ crimson::net::MessengerRef msgr;
+ crimson::auth::DummyAuthClientServer dummy_auth;
+
+ std::optional<seastar::future<>> ms_dispatch(
+ crimson::net::ConnectionRef c, MessageRef m) override {
+ std::ignore = c->send(crimson::make_message<MPing>());
+ return {seastar::now()};
+ }
+
+ public:
+ seastar::future<> init(const entity_name_t& name,
+ const std::string& lname,
+ const uint64_t nonce,
+ const entity_addr_t& addr) {
+ msgr = crimson::net::Messenger::create(
+ name, lname, nonce, true);
+ msgr->set_default_policy(crimson::net::SocketPolicy::stateless_server(0));
+ msgr->set_auth_client(&dummy_auth);
+ msgr->set_auth_server(&dummy_auth);
+ return msgr->bind(entity_addrvec_t{addr}).safe_then([this] {
+ return msgr->start({this});
+ }, crimson::net::Messenger::bind_ertr::all_same_way(
+ [addr] (const std::error_code& e) {
+ logger().error("test_preemptive_shutdown(): "
+ "there is another instance running at {}", addr);
+ ceph_abort();
+ }));
+ }
+ entity_addr_t get_addr() const {
+ return msgr->get_myaddr();
+ }
+ seastar::future<> shutdown() {
+ msgr->stop();
+ return msgr->shutdown();
+ }
+ };
+
+ class Client final
+ : public crimson::net::Dispatcher {
+ crimson::net::MessengerRef msgr;
+ crimson::auth::DummyAuthClientServer dummy_auth;
+
+ bool stop_send = false;
+ seastar::promise<> stopped_send_promise;
+
+ std::optional<seastar::future<>> ms_dispatch(
+ crimson::net::ConnectionRef, MessageRef m) override {
+ return {seastar::now()};
+ }
+
+ public:
+ seastar::future<> init(const entity_name_t& name,
+ const std::string& lname,
+ const uint64_t nonce) {
+ msgr = crimson::net::Messenger::create(
+ name, lname, nonce, true);
+ msgr->set_default_policy(crimson::net::SocketPolicy::lossy_client(0));
+ msgr->set_auth_client(&dummy_auth);
+ msgr->set_auth_server(&dummy_auth);
+ return msgr->start({this});
+ }
+ void send_pings(const entity_addr_t& addr) {
+ auto conn = msgr->connect(addr, entity_name_t::TYPE_OSD);
+ // forwarded to stopped_send_promise
+ (void) seastar::do_until(
+ [this] { return stop_send; },
+ [conn] {
+ return conn->send(crimson::make_message<MPing>()).then([] {
+ return seastar::sleep(0ms);
+ });
+ }
+ ).then_wrapped([this, conn] (auto fut) {
+ fut.forward_to(std::move(stopped_send_promise));
+ });
+ }
+ seastar::future<> shutdown() {
+ msgr->stop();
+ return msgr->shutdown().then([this] {
+ stop_send = true;
+ return stopped_send_promise.get_future();
+ });
+ }
+ };
+ };
+
+ logger().info("test_preemptive_shutdown():");
+ auto server = seastar::make_shared<test_state::Server>();
+ auto client = seastar::make_shared<test_state::Client>();
+ auto addr = get_server_addr();
+ addr.set_type(entity_addr_t::TYPE_MSGR2);
+ addr.set_family(AF_INET);
+ return seastar::when_all_succeed(
+ server->init(entity_name_t::OSD(6), "server4", 7, addr),
+ client->init(entity_name_t::OSD(7), "client4", 8)
+ ).then_unpack([server, client] {
+ client->send_pings(server->get_addr());
+ return seastar::sleep(100ms);
+ }).then([client] {
+ logger().info("client shutdown...");
+ return client->shutdown();
+ }).then([server] {
+ logger().info("server shutdown...");
+ return server->shutdown();
+ }).then([] {
+ logger().info("test_preemptive_shutdown() done!\n");
+ }).handle_exception([server, client] (auto eptr) {
+ logger().error("test_preemptive_shutdown() failed: got exception {}", eptr);
+ throw;
+ });
+}
+
+using ceph::msgr::v2::Tag;
+using crimson::net::bp_action_t;
+using crimson::net::bp_type_t;
+using crimson::net::Breakpoint;
+using crimson::net::Connection;
+using crimson::net::ConnectionRef;
+using crimson::net::custom_bp_t;
+using crimson::net::Dispatcher;
+using crimson::net::Interceptor;
+using crimson::net::Messenger;
+using crimson::net::MessengerRef;
+using crimson::net::SocketPolicy;
+using crimson::net::tag_bp_t;
+using namespace ceph::net::test;
+
+struct counter_t { unsigned counter = 0; };
+
+enum class conn_state_t {
+ unknown = 0,
+ established,
+ closed,
+ replaced,
+};
+
+std::ostream& operator<<(std::ostream& out, const conn_state_t& state) {
+ switch(state) {
+ case conn_state_t::unknown:
+ return out << "unknown";
+ case conn_state_t::established:
+ return out << "established";
+ case conn_state_t::closed:
+ return out << "closed";
+ case conn_state_t::replaced:
+ return out << "replaced";
+ default:
+ ceph_abort();
+ }
+}
+
+} // anonymous namespace
+
+#if FMT_VERSION >= 90000
+template<>
+struct fmt::formatter<conn_state_t> : fmt::ostream_formatter {};
+#endif
+
+namespace {
+
+struct ConnResult {
+ ConnectionRef conn;
+ unsigned index;
+ conn_state_t state = conn_state_t::unknown;
+
+ unsigned connect_attempts = 0;
+ unsigned client_connect_attempts = 0;
+ unsigned client_reconnect_attempts = 0;
+ unsigned cnt_connect_dispatched = 0;
+
+ unsigned accept_attempts = 0;
+ unsigned server_connect_attempts = 0;
+ unsigned server_reconnect_attempts = 0;
+ unsigned cnt_accept_dispatched = 0;
+
+ unsigned cnt_reset_dispatched = 0;
+ unsigned cnt_remote_reset_dispatched = 0;
+
+ ConnResult(ConnectionRef conn, unsigned index)
+ : conn(conn), index(index) {}
+
+ template <typename T>
+ void _assert_eq(const char* expr_actual, T actual,
+ const char* expr_expected, T expected) const {
+ if (actual != expected) {
+ throw std::runtime_error(fmt::format(
+ "[{}] {} '{}' is actually {}, not the expected '{}' {}",
+ index, *conn, expr_actual, actual, expr_expected, expected));
+ }
+ }
+
+#define ASSERT_EQUAL(actual, expected) \
+ _assert_eq(#actual, actual, #expected, expected)
+
+ void assert_state_at(conn_state_t expected) const {
+ ASSERT_EQUAL(state, expected);
+ }
+
+ void assert_connect(unsigned attempts,
+ unsigned connects,
+ unsigned reconnects,
+ unsigned dispatched) const {
+ ASSERT_EQUAL(connect_attempts, attempts);
+ ASSERT_EQUAL(client_connect_attempts, connects);
+ ASSERT_EQUAL(client_reconnect_attempts, reconnects);
+ ASSERT_EQUAL(cnt_connect_dispatched, dispatched);
+ }
+
+ void assert_connect(unsigned attempts,
+ unsigned dispatched) const {
+ ASSERT_EQUAL(connect_attempts, attempts);
+ ASSERT_EQUAL(cnt_connect_dispatched, dispatched);
+ }
+
+ void assert_accept(unsigned attempts,
+ unsigned accepts,
+ unsigned reaccepts,
+ unsigned dispatched) const {
+ ASSERT_EQUAL(accept_attempts, attempts);
+ ASSERT_EQUAL(server_connect_attempts, accepts);
+ ASSERT_EQUAL(server_reconnect_attempts, reaccepts);
+ ASSERT_EQUAL(cnt_accept_dispatched, dispatched);
+ }
+
+ void assert_accept(unsigned attempts,
+ unsigned dispatched) const {
+ ASSERT_EQUAL(accept_attempts, attempts);
+ ASSERT_EQUAL(cnt_accept_dispatched, dispatched);
+ }
+
+ void assert_reset(unsigned local, unsigned remote) const {
+ ASSERT_EQUAL(cnt_reset_dispatched, local);
+ ASSERT_EQUAL(cnt_remote_reset_dispatched, remote);
+ }
+
+ void dump() const {
+ logger().info("\nResult({}):\n"
+ " conn: [{}] {}:\n"
+ " state: {}\n"
+ " connect_attempts: {}\n"
+ " client_connect_attempts: {}\n"
+ " client_reconnect_attempts: {}\n"
+ " cnt_connect_dispatched: {}\n"
+ " accept_attempts: {}\n"
+ " server_connect_attempts: {}\n"
+ " server_reconnect_attempts: {}\n"
+ " cnt_accept_dispatched: {}\n"
+ " cnt_reset_dispatched: {}\n"
+ " cnt_remote_reset_dispatched: {}\n",
+ static_cast<const void*>(this),
+ index, *conn,
+ state,
+ connect_attempts,
+ client_connect_attempts,
+ client_reconnect_attempts,
+ cnt_connect_dispatched,
+ accept_attempts,
+ server_connect_attempts,
+ server_reconnect_attempts,
+ cnt_accept_dispatched,
+ cnt_reset_dispatched,
+ cnt_remote_reset_dispatched);
+ }
+};
+using ConnResults = std::vector<ConnResult>;
+
+struct TestInterceptor : public Interceptor {
+ std::map<Breakpoint, std::map<unsigned, bp_action_t>> breakpoints;
+ std::map<Breakpoint, counter_t> breakpoints_counter;
+ std::map<Connection*, unsigned> conns;
+ ConnResults results;
+ std::optional<seastar::abort_source> signal;
+ const seastar::shard_id primary_sid;
+
+ TestInterceptor() : primary_sid{seastar::this_shard_id()} {}
+
+ // only used for copy breakpoint configurations
+ TestInterceptor(const TestInterceptor& other) : primary_sid{other.primary_sid} {
+ assert(other.breakpoints_counter.empty());
+ assert(other.conns.empty());
+ assert(other.results.empty());
+ breakpoints = other.breakpoints;
+ assert(!other.signal);
+ assert(seastar::this_shard_id() == primary_sid);
+ }
+
+ void make_fault(Breakpoint bp, unsigned round = 1) {
+ assert(round >= 1);
+ breakpoints[bp][round] = bp_action_t::FAULT;
+ }
+
+ void make_block(Breakpoint bp, unsigned round = 1) {
+ assert(round >= 1);
+ breakpoints[bp][round] = bp_action_t::BLOCK;
+ }
+
+ void make_stall(Breakpoint bp, unsigned round = 1) {
+ assert(round >= 1);
+ breakpoints[bp][round] = bp_action_t::STALL;
+ }
+
+ ConnResult* find_result(Connection *conn) {
+ assert(seastar::this_shard_id() == primary_sid);
+ auto it = conns.find(conn);
+ if (it == conns.end()) {
+ return nullptr;
+ } else {
+ return &results[it->second];
+ }
+ }
+
+ seastar::future<> wait() {
+ assert(seastar::this_shard_id() == primary_sid);
+ assert(!signal);
+ signal = seastar::abort_source();
+ return seastar::sleep_abortable(10s, *signal).then([] {
+ throw std::runtime_error("Timeout (10s) in TestInterceptor::wait()");
+ }).handle_exception_type([] (const seastar::sleep_aborted& e) {
+ // wait done!
+ });
+ }
+
+ void notify() {
+ assert(seastar::this_shard_id() == primary_sid);
+ if (signal) {
+ signal->request_abort();
+ signal = std::nullopt;
+ }
+ }
+
+ private:
+ void register_conn(ConnectionRef conn) override {
+ auto result = find_result(&*conn);
+ if (result != nullptr) {
+ logger().error("The connection [{}] {} already exists when register {}",
+ result->index, *result->conn, *conn);
+ ceph_abort();
+ }
+ unsigned index = results.size();
+ results.emplace_back(conn, index);
+ conns[&*conn] = index;
+ notify();
+ logger().info("[{}] {} new connection registered", index, *conn);
+ }
+
+ void register_conn_closed(ConnectionRef conn) override {
+ auto result = find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked closed connection: {}", *conn);
+ ceph_abort();
+ }
+
+ if (result->state != conn_state_t::replaced) {
+ result->state = conn_state_t::closed;
+ }
+ notify();
+ logger().info("[{}] {} closed({})", result->index, *conn, result->state);
+ }
+
+ void register_conn_ready(ConnectionRef conn) override {
+ auto result = find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked ready connection: {}", *conn);
+ ceph_abort();
+ }
+
+ ceph_assert(conn->is_protocol_ready());
+ notify();
+ logger().info("[{}] {} ready", result->index, *conn);
+ }
+
+ void register_conn_replaced(ConnectionRef conn) override {
+ auto result = find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked replaced connection: {}", *conn);
+ ceph_abort();
+ }
+
+ result->state = conn_state_t::replaced;
+ logger().info("[{}] {} {}", result->index, *conn, result->state);
+ }
+
+ seastar::future<bp_action_t>
+ intercept(Connection &_conn, std::vector<Breakpoint> bps) override {
+ assert(bps.size() >= 1);
+ Connection *conn = &_conn;
+
+ return seastar::smp::submit_to(primary_sid, [conn, bps, this] {
+ std::vector<bp_action_t> actions;
+ for (const Breakpoint &bp : bps) {
+ ++breakpoints_counter[bp].counter;
+
+ auto result = find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked intercepted connection: {}, at breakpoint {}({})",
+ *conn, bp, breakpoints_counter[bp].counter);
+ ceph_abort();
+ }
+
+ if (bp == custom_bp_t::SOCKET_CONNECTING) {
+ ++result->connect_attempts;
+ logger().info("[Test] connect_attempts={}", result->connect_attempts);
+ } else if (bp == tag_bp_t{Tag::CLIENT_IDENT, bp_type_t::WRITE}) {
+ ++result->client_connect_attempts;
+ logger().info("[Test] client_connect_attempts={}", result->client_connect_attempts);
+ } else if (bp == tag_bp_t{Tag::SESSION_RECONNECT, bp_type_t::WRITE}) {
+ ++result->client_reconnect_attempts;
+ logger().info("[Test] client_reconnect_attempts={}", result->client_reconnect_attempts);
+ } else if (bp == custom_bp_t::SOCKET_ACCEPTED) {
+ ++result->accept_attempts;
+ logger().info("[Test] accept_attempts={}", result->accept_attempts);
+ } else if (bp == tag_bp_t{Tag::CLIENT_IDENT, bp_type_t::READ}) {
+ ++result->server_connect_attempts;
+ logger().info("[Test] server_connect_attemps={}", result->server_connect_attempts);
+ } else if (bp == tag_bp_t{Tag::SESSION_RECONNECT, bp_type_t::READ}) {
+ ++result->server_reconnect_attempts;
+ logger().info("[Test] server_reconnect_attempts={}", result->server_reconnect_attempts);
+ }
+
+ auto it_bp = breakpoints.find(bp);
+ if (it_bp != breakpoints.end()) {
+ auto it_cnt = it_bp->second.find(breakpoints_counter[bp].counter);
+ if (it_cnt != it_bp->second.end()) {
+ logger().info("[{}] {} intercepted {}({}) => {}",
+ result->index, *conn, bp,
+ breakpoints_counter[bp].counter, it_cnt->second);
+ actions.emplace_back(it_cnt->second);
+ continue;
+ }
+ }
+ logger().info("[{}] {} intercepted {}({})",
+ result->index, *conn, bp, breakpoints_counter[bp].counter);
+ actions.emplace_back(bp_action_t::CONTINUE);
+ }
+
+ bp_action_t action = bp_action_t::CONTINUE;
+ for (bp_action_t &a : actions) {
+ if (a != bp_action_t::CONTINUE) {
+ if (action == bp_action_t::CONTINUE) {
+ action = a;
+ } else {
+ ceph_abort("got multiple incompatible actions");
+ }
+ }
+ }
+ return seastar::make_ready_future<bp_action_t>(action);
+ });
+ }
+};
+
+SocketPolicy to_socket_policy(policy_t policy) {
+ switch (policy) {
+ case policy_t::stateful_server:
+ return SocketPolicy::stateful_server(0);
+ case policy_t::stateless_server:
+ return SocketPolicy::stateless_server(0);
+ case policy_t::lossless_peer:
+ return SocketPolicy::lossless_peer(0);
+ case policy_t::lossless_peer_reuse:
+ return SocketPolicy::lossless_peer_reuse(0);
+ case policy_t::lossy_client:
+ return SocketPolicy::lossy_client(0);
+ case policy_t::lossless_client:
+ return SocketPolicy::lossless_client(0);
+ default:
+ logger().error("unexpected policy type");
+ ceph_abort();
+ }
+}
+
+class FailoverSuite : public Dispatcher {
+ crimson::auth::DummyAuthClientServer dummy_auth;
+ MessengerRef test_msgr;
+ const entity_addr_t test_peer_addr;
+ TestInterceptor interceptor;
+
+ unsigned tracked_index = 0;
+ Connection *tracked_conn = nullptr;
+ unsigned pending_send = 0;
+ unsigned pending_peer_receive = 0;
+ unsigned pending_receive = 0;
+
+ ShardedGates &gates;
+ const seastar::shard_id primary_sid;
+
+ std::optional<seastar::future<>> ms_dispatch(
+ ConnectionRef conn_ref, MessageRef m) override {
+ ceph_assert(m->get_type() == CEPH_MSG_OSD_OP);
+ Connection *conn = &*conn_ref;
+ gates.dispatch_in_background("TestSuite_ms_dispatch",
+ [this, conn, conn_ref] {
+ return seastar::smp::submit_to(primary_sid, [this, conn] {
+ auto result = interceptor.find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked ms dispatched connection: {}", *conn);
+ ceph_abort();
+ }
+
+ if (tracked_conn != &*conn) {
+ logger().warn("[{}] {} got op, but doesn't match tracked_conn [{}] {}",
+ result->index, *conn, tracked_index, *tracked_conn);
+ } else {
+ ceph_assert(result->index == tracked_index);
+ }
+
+ ceph_assert(pending_receive > 0);
+ --pending_receive;
+ if (pending_receive == 0) {
+ interceptor.notify();
+ }
+ logger().info("[Test] got op, left {} ops -- [{}] {}",
+ pending_receive, result->index, *conn);
+ }).then([conn_ref] {});
+ });
+ return {seastar::now()};
+ }
+
+ void ms_handle_accept(
+ ConnectionRef conn_ref,
+ seastar::shard_id prv_shard,
+ bool is_replace) override {
+ Connection *conn = &*conn_ref;
+ gates.dispatch_in_background("TestSuite_ms_dispatch",
+ [this, conn, conn_ref] {
+ return seastar::smp::submit_to(primary_sid, [this, conn] {
+ auto result = interceptor.find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked accepted connection: {}", *conn);
+ ceph_abort();
+ }
+
+ if (tracked_conn &&
+ !tracked_conn->is_protocol_closed() &&
+ tracked_conn != &*conn) {
+ logger().error("[{}] {} got accepted, but there's already a valid traced_conn [{}] {}",
+ result->index, *conn, tracked_index, *tracked_conn);
+ ceph_abort();
+ }
+
+ tracked_index = result->index;
+ tracked_conn = &*conn;
+ ++result->cnt_accept_dispatched;
+ logger().info("[Test] got accept (cnt_accept_dispatched={}), track [{}] {}",
+ result->cnt_accept_dispatched, result->index, *conn);
+ return flush_pending_send();
+ }).then([conn_ref] {});
+ });
+ }
+
+ void ms_handle_connect(
+ ConnectionRef conn_ref,
+ seastar::shard_id prv_shard) override {
+ Connection *conn = &*conn_ref;
+ gates.dispatch_in_background("TestSuite_ms_dispatch",
+ [this, conn, conn_ref] {
+ return seastar::smp::submit_to(primary_sid, [this, conn] {
+ auto result = interceptor.find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked connected connection: {}", *conn);
+ ceph_abort();
+ }
+
+ if (tracked_conn &&
+ !tracked_conn->is_protocol_closed() &&
+ tracked_conn != &*conn) {
+ logger().error("[{}] {} got connected, but there's already a avlid tracked_conn [{}] {}",
+ result->index, *conn, tracked_index, *tracked_conn);
+ ceph_abort();
+ }
+
+ if (tracked_conn == &*conn) {
+ ceph_assert(result->index == tracked_index);
+ }
+
+ ++result->cnt_connect_dispatched;
+ logger().info("[Test] got connected (cnt_connect_dispatched={}) -- [{}] {}",
+ result->cnt_connect_dispatched, result->index, *conn);
+ }).then([conn_ref] {});
+ });
+ }
+
+ void ms_handle_reset(
+ ConnectionRef conn_ref,
+ bool is_replace) override {
+ Connection *conn = &*conn_ref;
+ gates.dispatch_in_background("TestSuite_ms_dispatch",
+ [this, conn, conn_ref] {
+ return seastar::smp::submit_to(primary_sid, [this, conn] {
+ auto result = interceptor.find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked reset connection: {}", *conn);
+ ceph_abort();
+ }
+
+ if (tracked_conn != &*conn) {
+ logger().warn("[{}] {} got reset, but doesn't match tracked_conn [{}] {}",
+ result->index, *conn, tracked_index, *tracked_conn);
+ } else {
+ ceph_assert(result->index == tracked_index);
+ tracked_index = 0;
+ tracked_conn = nullptr;
+ }
+
+ ++result->cnt_reset_dispatched;
+ logger().info("[Test] got reset (cnt_reset_dispatched={}), untrack [{}] {}",
+ result->cnt_reset_dispatched, result->index, *conn);
+ }).then([conn_ref] {});
+ });
+ }
+
+ void ms_handle_remote_reset(
+ ConnectionRef conn_ref) override {
+ Connection *conn = &*conn_ref;
+ gates.dispatch_in_background("TestSuite_ms_dispatch",
+ [this, conn, conn_ref] {
+ return seastar::smp::submit_to(primary_sid, [this, conn] {
+ auto result = interceptor.find_result(&*conn);
+ if (result == nullptr) {
+ logger().error("Untracked remotely reset connection: {}", *conn);
+ ceph_abort();
+ }
+
+ if (tracked_conn != &*conn) {
+ logger().warn("[{}] {} got remotely reset, but doesn't match tracked_conn [{}] {}",
+ result->index, *conn, tracked_index, *tracked_conn);
+ } else {
+ ceph_assert(result->index == tracked_index);
+ }
+
+ ++result->cnt_remote_reset_dispatched;
+ logger().info("[Test] got remote reset (cnt_remote_reset_dispatched={}) -- [{}] {}",
+ result->cnt_remote_reset_dispatched, result->index, *conn);
+ }).then([conn_ref] {});
+ });
+ }
+
+ private:
+ seastar::future<> init(entity_addr_t test_addr, SocketPolicy policy) {
+ test_msgr->set_default_policy(policy);
+ test_msgr->set_auth_client(&dummy_auth);
+ test_msgr->set_auth_server(&dummy_auth);
+ test_msgr->set_interceptor(&interceptor);
+ return test_msgr->bind(entity_addrvec_t{test_addr}).safe_then([this] {
+ return test_msgr->start({this});
+ }, Messenger::bind_ertr::all_same_way([test_addr] (const std::error_code& e) {
+ logger().error("FailoverSuite: "
+ "there is another instance running at {}", test_addr);
+ ceph_abort();
+ }));
+ }
+
+ seastar::future<> send_op(bool expect_reply=true) {
+ ceph_assert(tracked_conn);
+ ceph_assert(!tracked_conn->is_protocol_closed());
+ if (expect_reply) {
+ ++pending_peer_receive;
+ }
+ pg_t pgid;
+ object_locator_t oloc;
+ hobject_t hobj(object_t(), oloc.key, CEPH_NOSNAP, pgid.ps(),
+ pgid.pool(), oloc.nspace);
+ spg_t spgid(pgid);
+ return tracked_conn->send(crimson::make_message<MOSDOp>(0, 0, hobj, spgid, 0, 0, 0));
+ }
+
+ seastar::future<> flush_pending_send() {
+ if (pending_send != 0) {
+ logger().info("[Test] flush sending {} ops", pending_send);
+ }
+ ceph_assert(tracked_conn);
+ ceph_assert(!tracked_conn->is_protocol_closed());
+ return seastar::do_until(
+ [this] { return pending_send == 0; },
+ [this] {
+ --pending_send;
+ return send_op();
+ });
+ }
+
+ seastar::future<> wait_ready(unsigned num_ready_conns,
+ unsigned num_replaced,
+ bool wait_received) {
+ assert(seastar::this_shard_id() == primary_sid);
+ unsigned pending_conns = 0;
+ unsigned pending_establish = 0;
+ unsigned replaced_conns = 0;
+ for (auto& result : interceptor.results) {
+ if (result.conn->is_protocol_closed_clean()) {
+ if (result.state == conn_state_t::replaced) {
+ ++replaced_conns;
+ }
+ } else if (result.conn->is_protocol_ready()) {
+ if (pending_send == 0 && pending_peer_receive == 0 && pending_receive == 0) {
+ result.state = conn_state_t::established;
+ } else {
+ ++pending_establish;
+ }
+ } else {
+ ++pending_conns;
+ }
+ }
+
+ bool do_wait = false;
+ if (num_ready_conns > 0) {
+ if (interceptor.results.size() > num_ready_conns) {
+ throw std::runtime_error(fmt::format(
+ "{} connections, more than expected: {}",
+ interceptor.results.size(), num_ready_conns));
+ } else if (interceptor.results.size() < num_ready_conns || pending_conns > 0) {
+ logger().info("[Test] wait_ready(): wait for connections,"
+ " currently {} out of {}, pending {} ready ...",
+ interceptor.results.size(), num_ready_conns, pending_conns);
+ do_wait = true;
+ }
+ }
+ if (wait_received) {
+ if (pending_send || pending_peer_receive || pending_receive) {
+ if (pending_conns || pending_establish) {
+ logger().info("[Test] wait_ready(): wait for pending_send={},"
+ " pending_peer_receive={}, pending_receive={},"
+ " pending {}/{} ready/establish connections ...",
+ pending_send, pending_peer_receive, pending_receive,
+ pending_conns, pending_establish);
+ do_wait = true;
+ } else {
+ // If there are pending messages, stop waiting if there are
+ // no longer pending connections.
+ }
+ } else {
+ // Stop waiting if there are no pending messages. Pending connections
+ // should not be important.
+ }
+ }
+ if (num_replaced > 0) {
+ if (replaced_conns > num_replaced) {
+ throw std::runtime_error(fmt::format(
+ "{} replaced connections, more than expected: {}",
+ replaced_conns, num_replaced));
+ }
+ if (replaced_conns < num_replaced) {
+ logger().info("[Test] wait_ready(): wait for {} replaced connections,"
+ " currently {} ...",
+ num_replaced, replaced_conns);
+ do_wait = true;
+ }
+ }
+
+ if (do_wait) {
+ return interceptor.wait(
+ ).then([this, num_ready_conns, num_replaced, wait_received] {
+ return wait_ready(num_ready_conns, num_replaced, wait_received);
+ });
+ } else {
+ logger().info("[Test] wait_ready(): wait done!");
+ return seastar::now();
+ }
+ }
+
+ // called by FailoverTest
+ public:
+ FailoverSuite(MessengerRef test_msgr,
+ entity_addr_t test_peer_addr,
+ const TestInterceptor& interceptor,
+ ShardedGates &gates)
+ : test_msgr(test_msgr),
+ test_peer_addr(test_peer_addr),
+ interceptor(interceptor),
+ gates{gates},
+ primary_sid{seastar::this_shard_id()} { }
+
+ entity_addr_t get_addr() const {
+ return test_msgr->get_myaddr();
+ }
+
+ seastar::future<> shutdown() {
+ test_msgr->stop();
+ return test_msgr->shutdown();
+ }
+
+ void needs_receive() {
+ ++pending_receive;
+ }
+
+ void notify_peer_reply() {
+ ceph_assert(pending_peer_receive > 0);
+ --pending_peer_receive;
+ logger().info("[Test] TestPeer said got op, left {} ops",
+ pending_peer_receive);
+ if (pending_peer_receive == 0) {
+ interceptor.notify();
+ }
+ }
+
+ void post_check() const {
+ // make sure all breakpoints were hit
+ for (auto& kv : interceptor.breakpoints) {
+ auto it = interceptor.breakpoints_counter.find(kv.first);
+ if (it == interceptor.breakpoints_counter.end()) {
+ throw std::runtime_error(fmt::format("{} was missed", kv.first));
+ }
+ auto expected = kv.second.rbegin()->first;
+ if (expected > it->second.counter) {
+ throw std::runtime_error(fmt::format(
+ "{} only triggered {} times, not the expected {}",
+ kv.first, it->second.counter, expected));
+ }
+ }
+ }
+
+ void dump_results() const {
+ for (auto& result : interceptor.results) {
+ result.dump();
+ }
+ }
+
+ static seastar::future<std::unique_ptr<FailoverSuite>>
+ create(entity_addr_t test_addr,
+ SocketPolicy test_policy,
+ entity_addr_t test_peer_addr,
+ const TestInterceptor& interceptor,
+ ShardedGates &gates) {
+ auto suite = std::make_unique<FailoverSuite>(
+ Messenger::create(
+ entity_name_t::OSD(TEST_OSD),
+ "Test",
+ TEST_NONCE,
+ false),
+ test_peer_addr,
+ interceptor,
+ gates);
+ return suite->init(test_addr, test_policy
+ ).then([suite = std::move(suite)] () mutable {
+ return std::move(suite);
+ });
+ }
+
+ // called by tests
+ public:
+ seastar::future<> connect_peer() {
+ logger().info("[Test] connect_peer({})", test_peer_addr);
+ assert(seastar::this_shard_id() == primary_sid);
+ auto conn = test_msgr->connect(test_peer_addr, entity_name_t::TYPE_OSD);
+ auto result = interceptor.find_result(&*conn);
+ ceph_assert(result != nullptr);
+
+ if (tracked_conn) {
+ if (tracked_conn->is_protocol_closed()) {
+ logger().info("[Test] this is a new session"
+ " replacing an closed one");
+ ceph_assert(tracked_conn != &*conn);
+ } else {
+ logger().info("[Test] this is not a new session");
+ ceph_assert(tracked_index == result->index);
+ ceph_assert(tracked_conn == &*conn);
+ }
+ } else {
+ logger().info("[Test] this is a new session");
+ }
+ tracked_index = result->index;
+ tracked_conn = &*conn;
+
+ return flush_pending_send();
+ }
+
+ seastar::future<> send_peer() {
+ assert(seastar::this_shard_id() == primary_sid);
+ if (tracked_conn) {
+ logger().info("[Test] send_peer()");
+ ceph_assert(!tracked_conn->is_protocol_closed());
+ ceph_assert(!pending_send);
+ return send_op();
+ } else {
+ ++pending_send;
+ logger().info("[Test] send_peer() (pending {})", pending_send);
+ return seastar::now();
+ }
+ }
+
+ seastar::future<> keepalive_peer() {
+ logger().info("[Test] keepalive_peer()");
+ assert(seastar::this_shard_id() == primary_sid);
+ ceph_assert(tracked_conn);
+ ceph_assert(!tracked_conn->is_protocol_closed());
+ return tracked_conn->send_keepalive();
+ }
+
+ seastar::future<> try_send_peer() {
+ logger().info("[Test] try_send_peer()");
+ assert(seastar::this_shard_id() == primary_sid);
+ ceph_assert(tracked_conn);
+ ceph_assert(!tracked_conn->is_protocol_closed());
+ return send_op(false);
+ }
+
+ seastar::future<> markdown() {
+ logger().info("[Test] markdown() in 100ms ...");
+ assert(seastar::this_shard_id() == primary_sid);
+ ceph_assert(tracked_conn);
+ // sleep to propagate potential remaining acks
+ return seastar::sleep(50ms
+ ).then([this] {
+ return seastar::smp::submit_to(
+ tracked_conn->get_shard_id(), [tracked_conn=tracked_conn] {
+ assert(tracked_conn->get_shard_id() == seastar::this_shard_id());
+ tracked_conn->mark_down();
+ });
+ }).then([] {
+ // sleep to wait for markdown propagate to the primary sid
+ return seastar::sleep(100ms);
+ });
+ }
+
+ seastar::future<> wait_blocked() {
+ logger().info("[Test] wait_blocked() ...");
+ assert(seastar::this_shard_id() == primary_sid);
+ return interceptor.blocker.wait_blocked();
+ }
+
+ void unblock() {
+ logger().info("[Test] unblock()");
+ assert(seastar::this_shard_id() == primary_sid);
+ return interceptor.blocker.unblock();
+ }
+
+ seastar::future<> wait_replaced(unsigned count) {
+ logger().info("[Test] wait_replaced({}) ...", count);
+ return wait_ready(0, count, false);
+ }
+
+ seastar::future<> wait_established() {
+ logger().info("[Test] wait_established() ...");
+ return wait_ready(0, 0, true);
+ }
+
+ seastar::future<std::reference_wrapper<ConnResults>>
+ wait_results(unsigned count) {
+ logger().info("[Test] wait_result({}) ...", count);
+ return wait_ready(count, 0, true).then([this] {
+ return std::reference_wrapper<ConnResults>(interceptor.results);
+ });
+ }
+
+ bool is_standby() {
+ assert(seastar::this_shard_id() == primary_sid);
+ ceph_assert(tracked_conn);
+ return tracked_conn->is_protocol_standby();
+ }
+};
+
+class FailoverTest : public Dispatcher {
+ crimson::auth::DummyAuthClientServer dummy_auth;
+ MessengerRef cmd_msgr;
+ ConnectionRef cmd_conn;
+ const entity_addr_t test_addr;
+ const entity_addr_t test_peer_addr;
+
+ std::optional<seastar::promise<>> recv_pong;
+ std::optional<seastar::promise<>> recv_cmdreply;
+
+ std::unique_ptr<FailoverSuite> test_suite;
+
+ std::optional<seastar::future<>> ms_dispatch(ConnectionRef c, MessageRef m) override {
+ switch (m->get_type()) {
+ case CEPH_MSG_PING:
+ ceph_assert(recv_pong);
+ recv_pong->set_value();
+ recv_pong = std::nullopt;
+ break;
+ case MSG_COMMAND_REPLY:
+ ceph_assert(recv_cmdreply);
+ recv_cmdreply->set_value();
+ recv_cmdreply = std::nullopt;
+ break;
+ case MSG_COMMAND: {
+ auto m_cmd = boost::static_pointer_cast<MCommand>(m);
+ ceph_assert(static_cast<cmd_t>(m_cmd->cmd[0][0]) == cmd_t::suite_recv_op);
+ ceph_assert(test_suite);
+ test_suite->notify_peer_reply();
+ break;
+ }
+ default:
+ logger().error("{} got unexpected msg from cmd server: {}", *c, *m);
+ ceph_abort();
+ }
+ return {seastar::now()};
+ }
+
+ private:
+ seastar::future<> prepare_cmd(
+ cmd_t cmd,
+ std::function<void(MCommand&)>
+ f_prepare = [] (auto& m) { return; }) {
+ assert(!recv_cmdreply);
+ recv_cmdreply = seastar::promise<>();
+ auto fut = recv_cmdreply->get_future();
+ auto m = crimson::make_message<MCommand>();
+ m->cmd.emplace_back(1, static_cast<char>(cmd));
+ f_prepare(*m);
+ return cmd_conn->send(std::move(m)).then([fut = std::move(fut)] () mutable {
+ return std::move(fut);
+ });
+ }
+
+ seastar::future<> start_peer(policy_t peer_policy) {
+ return prepare_cmd(cmd_t::suite_start,
+ [peer_policy] (auto& m) {
+ m.cmd.emplace_back(1, static_cast<char>(peer_policy));
+ });
+ }
+
+ seastar::future<> stop_peer() {
+ return prepare_cmd(cmd_t::suite_stop);
+ }
+
+ seastar::future<> pingpong() {
+ assert(!recv_pong);
+ recv_pong = seastar::promise<>();
+ auto fut = recv_pong->get_future();
+ return cmd_conn->send(crimson::make_message<MPing>()
+ ).then([fut = std::move(fut)] () mutable {
+ return std::move(fut);
+ });
+ }
+
+ seastar::future<> init(entity_addr_t cmd_peer_addr) {
+ cmd_msgr->set_default_policy(SocketPolicy::lossy_client(0));
+ cmd_msgr->set_auth_client(&dummy_auth);
+ cmd_msgr->set_auth_server(&dummy_auth);
+ return cmd_msgr->start({this}).then([this, cmd_peer_addr] {
+ logger().info("CmdCli connect to CmdSrv({}) ...", cmd_peer_addr);
+ cmd_conn = cmd_msgr->connect(cmd_peer_addr, entity_name_t::TYPE_OSD);
+ return pingpong();
+ });
+ }
+
+ public:
+ FailoverTest(MessengerRef cmd_msgr,
+ entity_addr_t test_addr,
+ entity_addr_t test_peer_addr)
+ : cmd_msgr(cmd_msgr),
+ test_addr(test_addr),
+ test_peer_addr(test_peer_addr) { }
+
+ seastar::future<> shutdown() {
+ logger().info("CmdCli shutdown...");
+ assert(!recv_cmdreply);
+ auto m = crimson::make_message<MCommand>();
+ m->cmd.emplace_back(1, static_cast<char>(cmd_t::shutdown));
+ return cmd_conn->send(std::move(m)).then([] {
+ return seastar::sleep(200ms);
+ }).then([this] {
+ cmd_msgr->stop();
+ return cmd_msgr->shutdown();
+ });
+ }
+
+ static seastar::future<seastar::lw_shared_ptr<FailoverTest>>
+ create(entity_addr_t test_addr,
+ entity_addr_t cmd_peer_addr,
+ entity_addr_t test_peer_addr) {
+ auto test = seastar::make_lw_shared<FailoverTest>(
+ Messenger::create(
+ entity_name_t::OSD(CMD_CLI_OSD),
+ "CmdCli",
+ CMD_CLI_NONCE,
+ true),
+ test_addr, test_peer_addr);
+ return test->init(cmd_peer_addr).then([test] {
+ logger().info("CmdCli ready");
+ return test;
+ });
+ }
+
+ // called by tests
+ public:
+ seastar::future<> run_suite(
+ std::string name,
+ const TestInterceptor& interceptor,
+ policy_t test_policy,
+ policy_t peer_policy,
+ std::function<seastar::future<>(FailoverSuite&)>&& f) {
+ logger().info("\n\n[{}]", name);
+ ceph_assert(!test_suite);
+ SocketPolicy test_policy_ = to_socket_policy(test_policy);
+ return ShardedGates::create(
+ ).then([this, test_policy_, peer_policy, interceptor,
+ f=std::move(f)](auto *gates) mutable {
+ return FailoverSuite::create(
+ test_addr, test_policy_, test_peer_addr, interceptor, *gates
+ ).then([this, peer_policy, f = std::move(f)](auto suite) mutable {
+ ceph_assert(suite->get_addr() == test_addr);
+ test_suite.swap(suite);
+ return start_peer(peer_policy
+ ).then([this, f = std::move(f)] {
+ return f(*test_suite);
+ }).then([this] {
+ test_suite->post_check();
+ logger().info("\n[SUCCESS]");
+ }).handle_exception([this](auto eptr) {
+ logger().info("\n[FAIL: {}]", eptr);
+ test_suite->dump_results();
+ throw;
+ }).then([this] {
+ return stop_peer();
+ }).then([this] {
+ return test_suite->shutdown(
+ ).then([this] {
+ test_suite.reset();
+ });
+ });
+ }).then([gates] {
+ return gates->close();
+ });
+ });
+ }
+
+ seastar::future<> peer_connect_me() {
+ logger().info("[Test] peer_connect_me({})", test_addr);
+ return prepare_cmd(cmd_t::suite_connect_me,
+ [this] (auto& m) {
+ m.cmd.emplace_back(fmt::format("{}", test_addr));
+ });
+ }
+
+ seastar::future<> peer_send_me() {
+ logger().info("[Test] peer_send_me()");
+ ceph_assert(test_suite);
+ test_suite->needs_receive();
+ return prepare_cmd(cmd_t::suite_send_me);
+ }
+
+ seastar::future<> try_peer_send_me() {
+ logger().info("[Test] try_peer_send_me()");
+ ceph_assert(test_suite);
+ return prepare_cmd(cmd_t::suite_send_me);
+ }
+
+ seastar::future<> send_bidirectional() {
+ ceph_assert(test_suite);
+ return test_suite->send_peer().then([this] {
+ return peer_send_me();
+ });
+ }
+
+ seastar::future<> peer_keepalive_me() {
+ logger().info("[Test] peer_keepalive_me()");
+ ceph_assert(test_suite);
+ return prepare_cmd(cmd_t::suite_keepalive_me);
+ }
+
+ seastar::future<> markdown_peer() {
+ logger().info("[Test] markdown_peer() in 150ms ...");
+ // sleep to propagate potential remaining acks
+ return seastar::sleep(50ms
+ ).then([this] {
+ return prepare_cmd(cmd_t::suite_markdown);
+ }).then([] {
+ // sleep awhile for peer markdown propagated
+ return seastar::sleep(100ms);
+ });
+ }
+};
+
+class FailoverSuitePeer : public Dispatcher {
+ using cb_t = std::function<seastar::future<>()>;
+ crimson::auth::DummyAuthClientServer dummy_auth;
+ MessengerRef peer_msgr;
+ cb_t op_callback;
+
+ ConnectionRef tracked_conn;
+ unsigned pending_send = 0;
+
+ std::optional<seastar::future<>> ms_dispatch(ConnectionRef conn, MessageRef m) override {
+ logger().info("[TestPeer] got op from Test");
+ ceph_assert(m->get_type() == CEPH_MSG_OSD_OP);
+ std::ignore = op_callback();
+ return {seastar::now()};
+ }
+
+ void ms_handle_accept(
+ ConnectionRef conn,
+ seastar::shard_id prv_shard,
+ bool is_replace) override {
+ assert(prv_shard == seastar::this_shard_id());
+ logger().info("[TestPeer] got accept from Test");
+
+ if (tracked_conn &&
+ !tracked_conn->is_protocol_closed() &&
+ tracked_conn != conn) {
+ logger().error("[TestPeer] {} got accepted, but there's already a valid traced_conn {}",
+ *conn, *tracked_conn);
+ }
+ tracked_conn = conn;
+ std::ignore = flush_pending_send();
+ }
+
+ void ms_handle_reset(ConnectionRef conn, bool is_replace) override {
+ logger().info("[TestPeer] got reset from Test");
+ }
+
+ private:
+ seastar::future<> init(entity_addr_t test_peer_addr, SocketPolicy policy) {
+ peer_msgr->set_default_policy(policy);
+ peer_msgr->set_auth_client(&dummy_auth);
+ peer_msgr->set_auth_server(&dummy_auth);
+ return peer_msgr->bind(entity_addrvec_t{test_peer_addr}).safe_then([this] {
+ return peer_msgr->start({this});
+ }, Messenger::bind_ertr::all_same_way([test_peer_addr] (const std::error_code& e) {
+ logger().error("FailoverSuitePeer: "
+ "there is another instance running at {}", test_peer_addr);
+ ceph_abort();
+ }));
+ }
+
+ seastar::future<> send_op() {
+ ceph_assert(tracked_conn);
+ if (tracked_conn->is_protocol_closed()) {
+ logger().error("[TestPeer] send op but the connection is closed -- {}",
+ *tracked_conn);
+ }
+
+ pg_t pgid;
+ object_locator_t oloc;
+ hobject_t hobj(object_t(), oloc.key, CEPH_NOSNAP, pgid.ps(),
+ pgid.pool(), oloc.nspace);
+ spg_t spgid(pgid);
+ return tracked_conn->send(crimson::make_message<MOSDOp>(0, 0, hobj, spgid, 0, 0, 0));
+ }
+
+ seastar::future<> flush_pending_send() {
+ if (pending_send != 0) {
+ logger().info("[TestPeer] flush sending {} ops", pending_send);
+ }
+ ceph_assert(tracked_conn);
+ return seastar::do_until(
+ [this] { return pending_send == 0; },
+ [this] {
+ --pending_send;
+ return send_op();
+ });
+ }
+
+ public:
+ FailoverSuitePeer(MessengerRef peer_msgr, cb_t op_callback)
+ : peer_msgr(peer_msgr),
+ op_callback(op_callback) { }
+
+ seastar::future<> shutdown() {
+ peer_msgr->stop();
+ return peer_msgr->shutdown();
+ }
+
+ seastar::future<> connect_peer(entity_addr_t test_addr_decoded) {
+ logger().info("[TestPeer] connect_peer({})", test_addr_decoded);
+ auto conn = peer_msgr->connect(test_addr_decoded, entity_name_t::TYPE_OSD);
+
+ if (tracked_conn) {
+ if (tracked_conn->is_protocol_closed()) {
+ logger().info("[TestPeer] this is a new session"
+ " replacing an closed one");
+ ceph_assert(tracked_conn != conn);
+ } else {
+ logger().info("[TestPeer] this is not a new session");
+ ceph_assert(tracked_conn == conn);
+ }
+ } else {
+ logger().info("[TestPeer] this is a new session");
+ }
+ tracked_conn = conn;
+
+ return flush_pending_send();
+ }
+
+ seastar::future<> send_peer() {
+ if (tracked_conn) {
+ logger().info("[TestPeer] send_peer()");
+ ceph_assert(!pending_send);
+ return send_op();
+ } else {
+ ++pending_send;
+ logger().info("[TestPeer] send_peer() (pending {})", pending_send);
+ return seastar::now();
+ }
+ }
+
+ seastar::future<> keepalive_peer() {
+ logger().info("[TestPeer] keepalive_peer()");
+ ceph_assert(tracked_conn);
+ return tracked_conn->send_keepalive();
+ }
+
+ seastar::future<> markdown() {
+ logger().info("[TestPeer] markdown()");
+ ceph_assert(tracked_conn);
+ tracked_conn->mark_down();
+ return seastar::now();
+ }
+
+ static seastar::future<std::unique_ptr<FailoverSuitePeer>>
+ create(entity_addr_t test_peer_addr, const SocketPolicy& policy, cb_t op_callback) {
+ auto suite = std::make_unique<FailoverSuitePeer>(
+ Messenger::create(
+ entity_name_t::OSD(TEST_PEER_OSD),
+ "TestPeer",
+ TEST_PEER_NONCE,
+ true),
+ op_callback
+ );
+ return suite->init(test_peer_addr, policy
+ ).then([suite = std::move(suite)] () mutable {
+ return std::move(suite);
+ });
+ }
+};
+
+class FailoverTestPeer : public Dispatcher {
+ crimson::auth::DummyAuthClientServer dummy_auth;
+ MessengerRef cmd_msgr;
+ ConnectionRef cmd_conn;
+ const entity_addr_t test_peer_addr;
+ std::unique_ptr<FailoverSuitePeer> test_suite;
+
+ std::optional<seastar::future<>> ms_dispatch(ConnectionRef c, MessageRef m) override {
+ ceph_assert(cmd_conn == c);
+ switch (m->get_type()) {
+ case CEPH_MSG_PING:
+ std::ignore = c->send(crimson::make_message<MPing>());
+ break;
+ case MSG_COMMAND: {
+ auto m_cmd = boost::static_pointer_cast<MCommand>(m);
+ auto cmd = static_cast<cmd_t>(m_cmd->cmd[0][0]);
+ if (cmd == cmd_t::shutdown) {
+ logger().info("CmdSrv shutdown...");
+ // forwarded to FailoverTestPeer::wait()
+ cmd_msgr->stop();
+ std::ignore = cmd_msgr->shutdown();
+ } else {
+ std::ignore = handle_cmd(cmd, m_cmd).then([c] {
+ return c->send(crimson::make_message<MCommandReply>());
+ });
+ }
+ break;
+ }
+ default:
+ logger().error("{} got unexpected msg from cmd client: {}", *c, *m);
+ ceph_abort();
+ }
+ return {seastar::now()};
+ }
+
+ void ms_handle_accept(
+ ConnectionRef conn,
+ seastar::shard_id prv_shard,
+ bool is_replace) override {
+ assert(prv_shard == seastar::this_shard_id());
+ cmd_conn = conn;
+ }
+
+ private:
+ seastar::future<> notify_recv_op() {
+ ceph_assert(cmd_conn);
+ auto m = crimson::make_message<MCommand>();
+ m->cmd.emplace_back(1, static_cast<char>(cmd_t::suite_recv_op));
+ return cmd_conn->send(std::move(m));
+ }
+
+ seastar::future<> handle_cmd(cmd_t cmd, MRef<MCommand> m_cmd) {
+ switch (cmd) {
+ case cmd_t::suite_start: {
+ ceph_assert(!test_suite);
+ auto policy = to_socket_policy(static_cast<policy_t>(m_cmd->cmd[1][0]));
+ return FailoverSuitePeer::create(
+ test_peer_addr, policy, [this] { return notify_recv_op(); }
+ ).then([this] (auto suite) {
+ test_suite.swap(suite);
+ });
+ }
+ case cmd_t::suite_stop:
+ ceph_assert(test_suite);
+ return test_suite->shutdown().then([this] {
+ test_suite.reset();
+ });
+ case cmd_t::suite_connect_me: {
+ ceph_assert(test_suite);
+ entity_addr_t test_addr_decoded = entity_addr_t();
+ test_addr_decoded.parse(m_cmd->cmd[1].c_str(), nullptr);
+ return test_suite->connect_peer(test_addr_decoded);
+ }
+ case cmd_t::suite_send_me:
+ ceph_assert(test_suite);
+ return test_suite->send_peer();
+ case cmd_t::suite_keepalive_me:
+ ceph_assert(test_suite);
+ return test_suite->keepalive_peer();
+ case cmd_t::suite_markdown:
+ ceph_assert(test_suite);
+ return test_suite->markdown();
+ default:
+ logger().error("TestPeer got unexpected command {} from Test",
+ fmt::ptr(m_cmd.get()));
+ ceph_abort();
+ return seastar::now();
+ }
+ }
+
+ seastar::future<> init(entity_addr_t cmd_peer_addr) {
+ cmd_msgr->set_default_policy(SocketPolicy::stateless_server(0));
+ cmd_msgr->set_auth_client(&dummy_auth);
+ cmd_msgr->set_auth_server(&dummy_auth);
+ return cmd_msgr->bind(entity_addrvec_t{cmd_peer_addr}).safe_then([this] {
+ return cmd_msgr->start({this});
+ }, Messenger::bind_ertr::all_same_way([cmd_peer_addr] (const std::error_code& e) {
+ logger().error("FailoverTestPeer: "
+ "there is another instance running at {}", cmd_peer_addr);
+ ceph_abort();
+ }));
+ }
+
+ public:
+ FailoverTestPeer(MessengerRef cmd_msgr,
+ entity_addr_t test_peer_addr)
+ : cmd_msgr(cmd_msgr),
+ test_peer_addr(test_peer_addr) { }
+
+ seastar::future<> wait() {
+ return cmd_msgr->wait();
+ }
+
+ static seastar::future<std::unique_ptr<FailoverTestPeer>>
+ create(entity_addr_t cmd_peer_addr, entity_addr_t test_peer_addr) {
+ auto test_peer = std::make_unique<FailoverTestPeer>(
+ Messenger::create(
+ entity_name_t::OSD(CMD_SRV_OSD),
+ "CmdSrv",
+ CMD_SRV_NONCE,
+ true),
+ test_peer_addr);
+ return test_peer->init(cmd_peer_addr
+ ).then([test_peer = std::move(test_peer)] () mutable {
+ logger().info("CmdSrv ready");
+ return std::move(test_peer);
+ });
+ }
+};
+
+seastar::future<>
+test_v2_lossy_early_connect_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {custom_bp_t::SOCKET_CONNECTING},
+ {custom_bp_t::BANNER_WRITE},
+ {custom_bp_t::BANNER_READ},
+ {custom_bp_t::BANNER_PAYLOAD_READ},
+ {Tag::HELLO, bp_type_t::WRITE},
+ {Tag::HELLO, bp_type_t::READ},
+ {Tag::AUTH_REQUEST, bp_type_t::WRITE},
+ {Tag::AUTH_DONE, bp_type_t::READ},
+ {Tag::AUTH_SIGNATURE, bp_type_t::WRITE},
+ {Tag::AUTH_SIGNATURE, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossy_early_connect_fault -- {}", bp),
+ interceptor,
+ policy_t::lossy_client,
+ policy_t::stateless_server,
+ [] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossy_connect_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {Tag::CLIENT_IDENT, bp_type_t::WRITE},
+ {Tag::SERVER_IDENT, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossy_connect_fault -- {}", bp),
+ interceptor,
+ policy_t::lossy_client,
+ policy_t::stateless_server,
+ [] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 2, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossy_connected_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {Tag::MESSAGE, bp_type_t::WRITE},
+ {Tag::MESSAGE, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossy_connected_fault -- {}", bp),
+ interceptor,
+ policy_t::lossy_client,
+ policy_t::stateless_server,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(1, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossy_early_accept_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {custom_bp_t::BANNER_WRITE},
+ {custom_bp_t::BANNER_READ},
+ {custom_bp_t::BANNER_PAYLOAD_READ},
+ {Tag::HELLO, bp_type_t::WRITE},
+ {Tag::HELLO, bp_type_t::READ},
+ {Tag::AUTH_REQUEST, bp_type_t::READ},
+ {Tag::AUTH_DONE, bp_type_t::WRITE},
+ {Tag::AUTH_SIGNATURE, bp_type_t::WRITE},
+ {Tag::AUTH_SIGNATURE, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossy_early_accept_fault -- {}", bp),
+ interceptor,
+ policy_t::stateless_server,
+ policy_t::lossy_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossy_accept_fault(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::CLIENT_IDENT, bp_type_t::READ};
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossy_accept_fault -- {}", bp),
+ interceptor,
+ policy_t::stateless_server,
+ policy_t::lossy_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossy_establishing_fault(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::WRITE};
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossy_establishing_fault -- {}", bp),
+ interceptor,
+ policy_t::stateless_server,
+ policy_t::lossy_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(1, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossy_accepted_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {Tag::MESSAGE, bp_type_t::WRITE},
+ {Tag::MESSAGE, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossy_accepted_fault -- {}", bp),
+ interceptor,
+ policy_t::stateless_server,
+ policy_t::lossy_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(1, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_connect_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {Tag::CLIENT_IDENT, bp_type_t::WRITE},
+ {Tag::SERVER_IDENT, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossless_connect_fault -- {}", bp),
+ interceptor,
+ policy_t::lossless_client,
+ policy_t::stateful_server,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 2, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_connected_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {Tag::MESSAGE, bp_type_t::WRITE},
+ {Tag::MESSAGE, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossless_connected_fault -- {}", bp),
+ interceptor,
+ policy_t::lossless_client,
+ policy_t::stateful_server,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 1, 1, 2);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_connected_fault2(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {Tag::ACK, bp_type_t::READ},
+ {Tag::ACK, bp_type_t::WRITE},
+ {Tag::KEEPALIVE2, bp_type_t::READ},
+ {Tag::KEEPALIVE2, bp_type_t::WRITE},
+ {Tag::KEEPALIVE2_ACK, bp_type_t::READ},
+ {Tag::KEEPALIVE2_ACK, bp_type_t::WRITE},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossless_connected_fault2 -- {}", bp),
+ interceptor,
+ policy_t::lossless_client,
+ policy_t::stateful_server,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.keepalive_peer();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_keepalive_me();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 1, 1, 2);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_reconnect_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<std::pair<Breakpoint, Breakpoint>>{
+ {{Tag::MESSAGE, bp_type_t::WRITE},
+ {Tag::SESSION_RECONNECT, bp_type_t::WRITE}},
+ {{Tag::MESSAGE, bp_type_t::WRITE},
+ {Tag::SESSION_RECONNECT_OK, bp_type_t::READ}},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp_pair) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp_pair.first);
+ interceptor.make_fault(bp_pair.second);
+ return test.run_suite(
+ fmt::format("test_v2_lossless_reconnect_fault -- {}, {}",
+ bp_pair.first, bp_pair.second),
+ interceptor,
+ policy_t::lossless_client,
+ policy_t::stateful_server,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(3, 1, 2, 2);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_accept_fault(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::CLIENT_IDENT, bp_type_t::READ};
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossless_accept_fault -- {}", bp),
+ interceptor,
+ policy_t::stateful_server,
+ policy_t::lossless_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_establishing_fault(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::WRITE};
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossless_establishing_fault -- {}", bp),
+ interceptor,
+ policy_t::stateful_server,
+ policy_t::lossless_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_accepted_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {Tag::MESSAGE, bp_type_t::WRITE},
+ {Tag::MESSAGE, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_lossless_accepted_fault -- {}", bp),
+ interceptor,
+ policy_t::stateful_server,
+ policy_t::lossless_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_reaccept_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<std::pair<Breakpoint, Breakpoint>>{
+ {{Tag::MESSAGE, bp_type_t::READ},
+ {Tag::SESSION_RECONNECT, bp_type_t::READ}},
+ {{Tag::MESSAGE, bp_type_t::READ},
+ {Tag::SESSION_RECONNECT_OK, bp_type_t::WRITE}},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp_pair) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp_pair.first);
+ interceptor.make_fault(bp_pair.second);
+ return test.run_suite(
+ fmt::format("test_v2_lossless_reaccept_fault -- {}, {}",
+ bp_pair.first, bp_pair.second),
+ interceptor,
+ policy_t::stateful_server,
+ policy_t::lossless_client,
+ [&test, bp = bp_pair.second] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.send_bidirectional();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(3);
+ }).then([bp] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ if (bp == Breakpoint{Tag::SESSION_RECONNECT, bp_type_t::READ}) {
+ results[0].assert_accept(1, 1, 0, 2);
+ } else {
+ results[0].assert_accept(1, 1, 0, 3);
+ }
+ results[0].assert_reset(0, 0);
+ if (bp == Breakpoint{Tag::SESSION_RECONNECT, bp_type_t::READ}) {
+ results[1].assert_state_at(conn_state_t::closed);
+ } else {
+ results[1].assert_state_at(conn_state_t::replaced);
+ }
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0, 1, 0);
+ results[1].assert_reset(0, 0);
+ results[2].assert_state_at(conn_state_t::replaced);
+ results[2].assert_connect(0, 0, 0, 0);
+ results[2].assert_accept(1, 0, 1, 0);
+ results[2].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_peer_connect_fault(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {Tag::CLIENT_IDENT, bp_type_t::WRITE},
+ {Tag::SERVER_IDENT, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_peer_connect_fault -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 2, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_peer_accept_fault(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::CLIENT_IDENT, bp_type_t::READ};
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_peer_accept_fault -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_peer_establishing_fault(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::WRITE};
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_peer_establishing_fault -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_peer_connected_fault_reconnect(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::MESSAGE, bp_type_t::WRITE};
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_peer_connected_fault_reconnect -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 1, 1, 2);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_peer_connected_fault_reaccept(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::MESSAGE, bp_type_t::READ};
+ TestInterceptor interceptor;
+ interceptor.make_fault(bp);
+ return test.run_suite(
+ fmt::format("test_v2_peer_connected_fault_reaccept -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 1);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0, 1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<bool>
+check_peer_wins(FailoverTest& test) {
+ return seastar::do_with(bool(), [&test] (auto& ret) {
+ return test.run_suite("check_peer_wins",
+ TestInterceptor(),
+ policy_t::lossy_client,
+ policy_t::stateless_server,
+ [&ret] (FailoverSuite& suite) {
+ return suite.connect_peer().then([&suite] {
+ return suite.wait_results(1);
+ }).then([&ret] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ ret = results[0].conn->peer_wins();
+ logger().info("check_peer_wins: {}", ret);
+ });
+ }).then([&ret] {
+ return ret;
+ });
+ });
+}
+
+seastar::future<>
+test_v2_racing_reconnect_acceptor_lose(FailoverTest& test) {
+ return seastar::do_with(std::vector<std::pair<unsigned, Breakpoint>>{
+ {1, {Tag::SESSION_RECONNECT, bp_type_t::READ}},
+ {2, {custom_bp_t::BANNER_WRITE}},
+ {2, {custom_bp_t::BANNER_READ}},
+ {2, {custom_bp_t::BANNER_PAYLOAD_READ}},
+ {2, {Tag::HELLO, bp_type_t::WRITE}},
+ {2, {Tag::HELLO, bp_type_t::READ}},
+ {2, {Tag::AUTH_REQUEST, bp_type_t::READ}},
+ {2, {Tag::AUTH_DONE, bp_type_t::WRITE}},
+ {2, {Tag::AUTH_SIGNATURE, bp_type_t::WRITE}},
+ {2, {Tag::AUTH_SIGNATURE, bp_type_t::READ}},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ // fault acceptor
+ interceptor.make_fault({Tag::MESSAGE, bp_type_t::READ});
+ // block acceptor
+ interceptor.make_block(bp.second, bp.first);
+ return test.run_suite(
+ fmt::format("test_v2_racing_reconnect_acceptor_lose -- {}({})",
+ bp.second, bp.first),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(1, 0, 1, 1);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::closed);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_racing_reconnect_acceptor_win(FailoverTest& test) {
+ return seastar::do_with(std::vector<std::pair<unsigned, Breakpoint>>{
+ {1, {Tag::SESSION_RECONNECT, bp_type_t::WRITE}},
+ {2, {custom_bp_t::SOCKET_CONNECTING}},
+ {2, {custom_bp_t::BANNER_WRITE}},
+ {2, {custom_bp_t::BANNER_READ}},
+ {2, {custom_bp_t::BANNER_PAYLOAD_READ}},
+ {2, {Tag::HELLO, bp_type_t::WRITE}},
+ {2, {Tag::HELLO, bp_type_t::READ}},
+ {2, {Tag::AUTH_REQUEST, bp_type_t::WRITE}},
+ {2, {Tag::AUTH_DONE, bp_type_t::READ}},
+ {2, {Tag::AUTH_SIGNATURE, bp_type_t::WRITE}},
+ {2, {Tag::AUTH_SIGNATURE, bp_type_t::READ}},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ // fault connector
+ interceptor.make_fault({Tag::MESSAGE, bp_type_t::WRITE});
+ // block connector
+ interceptor.make_block(bp.second, bp.first);
+ return test.run_suite(
+ fmt::format("test_v2_racing_reconnect_acceptor_win -- {}({})",
+ bp.second, bp.first),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_replaced(1);
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 1);
+ results[0].assert_accept(0, 0, 0, 1);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0, 1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_racing_connect_acceptor_lose(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {custom_bp_t::BANNER_WRITE},
+ {custom_bp_t::BANNER_READ},
+ {custom_bp_t::BANNER_PAYLOAD_READ},
+ {Tag::HELLO, bp_type_t::WRITE},
+ {Tag::HELLO, bp_type_t::READ},
+ {Tag::AUTH_REQUEST, bp_type_t::READ},
+ {Tag::AUTH_DONE, bp_type_t::WRITE},
+ {Tag::AUTH_SIGNATURE, bp_type_t::WRITE},
+ {Tag::AUTH_SIGNATURE, bp_type_t::READ},
+ {Tag::CLIENT_IDENT, bp_type_t::READ},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ // block acceptor
+ interceptor.make_block(bp);
+ return test.run_suite(
+ fmt::format("test_v2_racing_connect_acceptor_lose -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(1, 1, 0, 1);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_racing_connect_acceptor_win(FailoverTest& test) {
+ return seastar::do_with(std::vector<Breakpoint>{
+ {custom_bp_t::SOCKET_CONNECTING},
+ {custom_bp_t::BANNER_WRITE},
+ {custom_bp_t::BANNER_READ},
+ {custom_bp_t::BANNER_PAYLOAD_READ},
+ {Tag::HELLO, bp_type_t::WRITE},
+ {Tag::HELLO, bp_type_t::READ},
+ {Tag::AUTH_REQUEST, bp_type_t::WRITE},
+ {Tag::AUTH_DONE, bp_type_t::READ},
+ {Tag::AUTH_SIGNATURE, bp_type_t::WRITE},
+ {Tag::AUTH_SIGNATURE, bp_type_t::READ},
+ {Tag::CLIENT_IDENT, bp_type_t::WRITE},
+ }, [&test] (auto& failure_cases) {
+ return seastar::do_for_each(failure_cases, [&test] (auto bp) {
+ TestInterceptor interceptor;
+ // block connector
+ interceptor.make_block(bp);
+ return test.run_suite(
+ fmt::format("test_v2_racing_connect_acceptor_win -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_replaced(1);
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(1, 0);
+ results[0].assert_accept(0, 0, 0, 1);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+ });
+ });
+}
+
+seastar::future<>
+test_v2_racing_connect_reconnect_lose(FailoverTest& test) {
+ TestInterceptor interceptor;
+ interceptor.make_fault({Tag::SERVER_IDENT, bp_type_t::READ});
+ interceptor.make_block({Tag::CLIENT_IDENT, bp_type_t::WRITE}, 2);
+ return test.run_suite("test_v2_racing_connect_reconnect_lose",
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_replaced(1);
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 2, 0, 0);
+ results[0].assert_accept(0, 0, 0, 1);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_racing_connect_reconnect_win(FailoverTest& test) {
+ TestInterceptor interceptor;
+ interceptor.make_fault({Tag::SERVER_IDENT, bp_type_t::READ});
+ interceptor.make_block({Tag::SESSION_RECONNECT, bp_type_t::READ});
+ return test.run_suite("test_v2_racing_connect_reconnect_win",
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 2, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::closed);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0, 1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_stale_connect(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::READ};
+ TestInterceptor interceptor;
+ interceptor.make_stall(bp);
+ return test.run_suite(
+ fmt::format("test_v2_stale_connect -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_replaced(1);
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(1, 1, 0, 0);
+ results[0].assert_accept(0, 0, 0, 1);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_stale_reconnect(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::SESSION_RECONNECT_OK, bp_type_t::READ};
+ TestInterceptor interceptor;
+ interceptor.make_fault({Tag::MESSAGE, bp_type_t::WRITE});
+ interceptor.make_stall(bp);
+ return test.run_suite(
+ fmt::format("test_v2_stale_reconnect -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_replaced(1);
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(2, 1, 1, 1);
+ results[0].assert_accept(0, 0, 0, 1);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0, 1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_stale_accept(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::CLIENT_IDENT, bp_type_t::READ};
+ TestInterceptor interceptor;
+ interceptor.make_stall(bp);
+ return test.run_suite(
+ fmt::format("test_v2_stale_accept -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_established();
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_stale_establishing(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::SERVER_IDENT, bp_type_t::WRITE};
+ TestInterceptor interceptor;
+ interceptor.make_stall(bp);
+ return test.run_suite(
+ fmt::format("test_v2_stale_establishing -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_replaced(1);
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0);
+ results[1].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_stale_reaccept(FailoverTest& test) {
+ auto bp = Breakpoint{Tag::SESSION_RECONNECT_OK, bp_type_t::WRITE};
+ TestInterceptor interceptor;
+ interceptor.make_fault({Tag::MESSAGE, bp_type_t::READ});
+ interceptor.make_stall(bp);
+ return test.run_suite(
+ fmt::format("test_v2_stale_reaccept -- {}", bp),
+ interceptor,
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ return test.peer_send_me();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&suite] {
+ return suite.wait_blocked();
+ }).then([] {
+ logger().info("[Test] block the broken REPLACING for 210ms...");
+ return seastar::sleep(210ms);
+ }).then([&suite] {
+ suite.unblock();
+ return suite.wait_results(3);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 3);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 0, 1, 0);
+ results[1].assert_reset(0, 0);
+ results[2].assert_state_at(conn_state_t::replaced);
+ results[2].assert_connect(0, 0, 0, 0);
+ results[2].assert_accept(1, 0);
+ results[2].assert_reset(0, 0);
+ ceph_assert(results[2].server_reconnect_attempts >= 1);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossy_client(FailoverTest& test) {
+ return test.run_suite(
+ "test_v2_lossy_client",
+ TestInterceptor(),
+ policy_t::lossy_client,
+ policy_t::stateless_server,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ logger().info("-- 0 --");
+ logger().info("[Test] setup connection...");
+ return suite.connect_peer();
+ }).then([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ }).then([&suite] {
+ logger().info("-- 1 --");
+ logger().info("[Test] client markdown...");
+ return suite.markdown();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(1, 1, 0, 1);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 2 --");
+ logger().info("[Test] server markdown...");
+ return test.markdown_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::closed);
+ results[1].assert_connect(1, 1, 0, 1);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(1, 0);
+ }).then([&suite] {
+ logger().info("-- 3 --");
+ logger().info("[Test] client reconnect...");
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_results(3);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::closed);
+ results[1].assert_connect(1, 1, 0, 1);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(1, 0);
+ results[2].assert_state_at(conn_state_t::established);
+ results[2].assert_connect(1, 1, 0, 1);
+ results[2].assert_accept(0, 0, 0, 0);
+ results[2].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_stateless_server(FailoverTest& test) {
+ return test.run_suite(
+ "test_v2_stateless_server",
+ TestInterceptor(),
+ policy_t::stateless_server,
+ policy_t::lossy_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ logger().info("-- 0 --");
+ logger().info("[Test] setup connection...");
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 1 --");
+ logger().info("[Test] client markdown...");
+ return test.markdown_peer();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(1, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ }).then([&suite] {
+ logger().info("-- 2 --");
+ logger().info("[Test] server markdown...");
+ return suite.markdown();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(1, 0);
+ results[1].assert_state_at(conn_state_t::closed);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 3 --");
+ logger().info("[Test] client reconnect...");
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_results(3);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(1, 0);
+ results[1].assert_state_at(conn_state_t::closed);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 1);
+ results[1].assert_reset(0, 0);
+ results[2].assert_state_at(conn_state_t::established);
+ results[2].assert_connect(0, 0, 0, 0);
+ results[2].assert_accept(1, 1, 0, 1);
+ results[2].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_client(FailoverTest& test) {
+ return test.run_suite(
+ "test_v2_lossless_client",
+ TestInterceptor(),
+ policy_t::lossless_client,
+ policy_t::stateful_server,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ logger().info("-- 0 --");
+ logger().info("[Test] setup connection...");
+ return suite.connect_peer();
+ }).then([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ }).then([&suite] {
+ logger().info("-- 1 --");
+ logger().info("[Test] client markdown...");
+ return suite.markdown();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(1, 1, 0, 1);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 2 --");
+ logger().info("[Test] server markdown...");
+ return test.markdown_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(2, 2, 1, 2);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 1);
+ }).then([&suite] {
+ logger().info("-- 3 --");
+ logger().info("[Test] client reconnect...");
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(2, 2, 1, 2);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 1);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_stateful_server(FailoverTest& test) {
+ return test.run_suite(
+ "test_v2_stateful_server",
+ TestInterceptor(),
+ policy_t::stateful_server,
+ policy_t::lossless_client,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ logger().info("-- 0 --");
+ logger().info("[Test] setup connection...");
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 1 --");
+ logger().info("[Test] client markdown...");
+ return test.markdown_peer();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 1);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&suite] {
+ logger().info("-- 2 --");
+ logger().info("[Test] server markdown...");
+ return suite.markdown();
+ }).then([&suite] {
+ return suite.wait_results(3);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 1);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ results[2].assert_state_at(conn_state_t::established);
+ results[2].assert_connect(0, 0, 0, 0);
+ results[2].assert_accept(1, 1, 1, 1);
+ results[2].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 3 --");
+ logger().info("[Test] client reconnect...");
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_results(3);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 1);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ results[2].assert_state_at(conn_state_t::established);
+ results[2].assert_connect(0, 0, 0, 0);
+ results[2].assert_accept(1, 1, 1, 1);
+ results[2].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_peer_reuse_connector(FailoverTest& test) {
+ return test.run_suite(
+ "test_v2_peer_reuse_connector",
+ TestInterceptor(),
+ policy_t::lossless_peer_reuse,
+ policy_t::lossless_peer_reuse,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ logger().info("-- 0 --");
+ logger().info("[Test] setup connection...");
+ return suite.connect_peer();
+ }).then([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ }).then([&suite] {
+ logger().info("-- 1 --");
+ logger().info("[Test] connector markdown...");
+ return suite.markdown();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(1, 1, 0, 1);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 2 --");
+ logger().info("[Test] acceptor markdown...");
+ return test.markdown_peer();
+ }).then([&suite] {
+ ceph_assert(suite.is_standby());
+ logger().info("-- 3 --");
+ logger().info("[Test] connector reconnect...");
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.try_send_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(2, 2, 1, 2);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 1);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_peer_reuse_acceptor(FailoverTest& test) {
+ return test.run_suite(
+ "test_v2_peer_reuse_acceptor",
+ TestInterceptor(),
+ policy_t::lossless_peer_reuse,
+ policy_t::lossless_peer_reuse,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ logger().info("-- 0 --");
+ logger().info("[Test] setup connection...");
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 1 --");
+ logger().info("[Test] connector markdown...");
+ return test.markdown_peer();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 1);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&suite] {
+ logger().info("-- 2 --");
+ logger().info("[Test] acceptor markdown...");
+ return suite.markdown();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 1);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 3 --");
+ logger().info("[Test] connector reconnect...");
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.try_peer_send_me();
+ }).then([&suite] {
+ return suite.wait_results(3);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 1);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ results[2].assert_state_at(conn_state_t::established);
+ results[2].assert_connect(0, 0, 0, 0);
+ results[2].assert_accept(1, 1, 1, 1);
+ results[2].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_peer_connector(FailoverTest& test) {
+ return test.run_suite(
+ "test_v2_lossless_peer_connector",
+ TestInterceptor(),
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&suite] {
+ logger().info("-- 0 --");
+ logger().info("[Test] setup connection...");
+ return suite.connect_peer();
+ }).then([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ }).then([&suite] {
+ logger().info("-- 1 --");
+ logger().info("[Test] connector markdown...");
+ return suite.markdown();
+ }).then([&suite] {
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.send_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(1, 1, 0, 1);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 2 --");
+ logger().info("[Test] acceptor markdown...");
+ return test.markdown_peer();
+ }).then([&suite] {
+ ceph_assert(suite.is_standby());
+ logger().info("-- 3 --");
+ logger().info("[Test] connector reconnect...");
+ return suite.connect_peer();
+ }).then([&suite] {
+ return suite.try_send_peer();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(1, 1, 0, 1);
+ results[0].assert_accept(0, 0, 0, 0);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::established);
+ results[1].assert_connect(2, 2, 1, 2);
+ results[1].assert_accept(0, 0, 0, 0);
+ results[1].assert_reset(0, 1);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_lossless_peer_acceptor(FailoverTest& test) {
+ return test.run_suite(
+ "test_v2_lossless_peer_acceptor",
+ TestInterceptor(),
+ policy_t::lossless_peer,
+ policy_t::lossless_peer,
+ [&test] (FailoverSuite& suite) {
+ return seastar::futurize_invoke([&test] {
+ logger().info("-- 0 --");
+ logger().info("[Test] setup connection...");
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.send_bidirectional();
+ }).then([&suite] {
+ return suite.wait_results(1);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 1);
+ results[0].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 1 --");
+ logger().info("[Test] connector markdown...");
+ return test.markdown_peer();
+ }).then([&test] {
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.peer_send_me();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::established);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&suite] {
+ logger().info("-- 2 --");
+ logger().info("[Test] acceptor markdown...");
+ return suite.markdown();
+ }).then([&suite] {
+ return suite.wait_results(2);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ }).then([&test] {
+ logger().info("-- 3 --");
+ logger().info("[Test] connector reconnect...");
+ return test.peer_connect_me();
+ }).then([&test] {
+ return test.try_peer_send_me();
+ }).then([&suite] {
+ return suite.wait_results(3);
+ }).then([] (ConnResults& results) {
+ results[0].assert_state_at(conn_state_t::closed);
+ results[0].assert_connect(0, 0, 0, 0);
+ results[0].assert_accept(1, 1, 0, 2);
+ results[0].assert_reset(0, 0);
+ results[1].assert_state_at(conn_state_t::replaced);
+ results[1].assert_connect(0, 0, 0, 0);
+ results[1].assert_accept(1, 1, 0, 0);
+ results[1].assert_reset(0, 0);
+ results[2].assert_state_at(conn_state_t::established);
+ results[2].assert_connect(0, 0, 0, 0);
+ results[2].assert_accept(1, 1, 1, 1);
+ results[2].assert_reset(0, 0);
+ });
+ });
+}
+
+seastar::future<>
+test_v2_protocol(entity_addr_t test_addr,
+ entity_addr_t cmd_peer_addr,
+ entity_addr_t test_peer_addr,
+ bool test_peer_islocal,
+ bool peer_wins) {
+ ceph_assert_always(test_addr.is_msgr2());
+ ceph_assert_always(cmd_peer_addr.is_msgr2());
+ ceph_assert_always(test_peer_addr.is_msgr2());
+
+ if (test_peer_islocal) {
+ // initiate crimson test peer locally
+ logger().info("test_v2_protocol: start local TestPeer at {}...", cmd_peer_addr);
+ return FailoverTestPeer::create(cmd_peer_addr, test_peer_addr
+ ).then([test_addr, cmd_peer_addr, test_peer_addr, peer_wins](auto peer) {
+ return test_v2_protocol(
+ test_addr,
+ cmd_peer_addr,
+ test_peer_addr,
+ false,
+ peer_wins
+ ).then([peer = std::move(peer)] () mutable {
+ return peer->wait().then([peer = std::move(peer)] {});
+ });
+ }).handle_exception([] (auto eptr) {
+ logger().error("FailoverTestPeer failed: got exception {}", eptr);
+ throw;
+ });
+ }
+
+ return FailoverTest::create(test_addr, cmd_peer_addr, test_peer_addr
+ ).then([peer_wins](auto test) {
+ return seastar::futurize_invoke([test] {
+ return test_v2_lossy_early_connect_fault(*test);
+ }).then([test] {
+ return test_v2_lossy_connect_fault(*test);
+ }).then([test] {
+ return test_v2_lossy_connected_fault(*test);
+ }).then([test] {
+ return test_v2_lossy_early_accept_fault(*test);
+ }).then([test] {
+ return test_v2_lossy_accept_fault(*test);
+ }).then([test] {
+ return test_v2_lossy_establishing_fault(*test);
+ }).then([test] {
+ return test_v2_lossy_accepted_fault(*test);
+ }).then([test] {
+ return test_v2_lossless_connect_fault(*test);
+ }).then([test] {
+ return test_v2_lossless_connected_fault(*test);
+ }).then([test] {
+ return test_v2_lossless_connected_fault2(*test);
+ }).then([test] {
+ return test_v2_lossless_reconnect_fault(*test);
+ }).then([test] {
+ return test_v2_lossless_accept_fault(*test);
+ }).then([test] {
+ return test_v2_lossless_establishing_fault(*test);
+ }).then([test] {
+ return test_v2_lossless_accepted_fault(*test);
+ }).then([test] {
+ return test_v2_lossless_reaccept_fault(*test);
+ }).then([test] {
+ return test_v2_peer_connect_fault(*test);
+ }).then([test] {
+ return test_v2_peer_accept_fault(*test);
+ }).then([test] {
+ return test_v2_peer_establishing_fault(*test);
+ }).then([test] {
+ return test_v2_peer_connected_fault_reconnect(*test);
+ }).then([test] {
+ return test_v2_peer_connected_fault_reaccept(*test);
+ }).then([test] {
+ return check_peer_wins(*test);
+ }).then([test, peer_wins](bool ret_peer_wins) {
+ ceph_assert(peer_wins == ret_peer_wins);
+ if (ret_peer_wins) {
+ return seastar::futurize_invoke([test] {
+ return test_v2_racing_connect_acceptor_win(*test);
+ }).then([test] {
+ return test_v2_racing_reconnect_acceptor_win(*test);
+ });
+ } else {
+ return seastar::futurize_invoke([test] {
+ return test_v2_racing_connect_acceptor_lose(*test);
+ }).then([test] {
+ return test_v2_racing_reconnect_acceptor_lose(*test);
+ });
+ }
+ }).then([test] {
+ return test_v2_racing_connect_reconnect_win(*test);
+ }).then([test] {
+ return test_v2_racing_connect_reconnect_lose(*test);
+ }).then([test] {
+ return test_v2_stale_connect(*test);
+ }).then([test] {
+ return test_v2_stale_reconnect(*test);
+ }).then([test] {
+ return test_v2_stale_accept(*test);
+ }).then([test] {
+ return test_v2_stale_establishing(*test);
+ }).then([test] {
+ return test_v2_stale_reaccept(*test);
+ }).then([test] {
+ return test_v2_lossy_client(*test);
+ }).then([test] {
+ return test_v2_stateless_server(*test);
+ }).then([test] {
+ return test_v2_lossless_client(*test);
+ }).then([test] {
+ return test_v2_stateful_server(*test);
+ }).then([test] {
+ return test_v2_peer_reuse_connector(*test);
+ }).then([test] {
+ return test_v2_peer_reuse_acceptor(*test);
+ }).then([test] {
+ return test_v2_lossless_peer_connector(*test);
+ }).then([test] {
+ return test_v2_lossless_peer_acceptor(*test);
+ }).then([test] {
+ return test->shutdown().then([test] {});
+ });
+ }).handle_exception([] (auto eptr) {
+ logger().error("FailoverTest failed: got exception {}", eptr);
+ throw;
+ });
+}
+
+}
+
+seastar::future<int> do_test(seastar::app_template& app)
+{
+ std::vector<const char*> args;
+ std::string cluster;
+ std::string conf_file_list;
+ auto init_params = ceph_argparse_early_args(args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ &cluster,
+ &conf_file_list);
+ return crimson::common::sharded_conf().start(
+ init_params.name, cluster
+ ).then([] {
+ return local_conf().start();
+ }).then([conf_file_list] {
+ return local_conf().parse_config_files(conf_file_list);
+ }).then([&app] {
+ auto&& config = app.configuration();
+ verbose = config["verbose"].as<bool>();
+ auto rounds = config["rounds"].as<unsigned>();
+ auto keepalive_ratio = config["keepalive-ratio"].as<double>();
+ auto testpeer_islocal = config["testpeer-islocal"].as<bool>();
+
+ entity_addr_t test_addr;
+ ceph_assert(test_addr.parse(
+ config["test-addr"].as<std::string>().c_str(), nullptr));
+ test_addr.set_nonce(TEST_NONCE);
+
+ entity_addr_t cmd_peer_addr;
+ ceph_assert(cmd_peer_addr.parse(
+ config["testpeer-addr"].as<std::string>().c_str(), nullptr));
+ cmd_peer_addr.set_nonce(CMD_SRV_NONCE);
+
+ entity_addr_t test_peer_addr = get_test_peer_addr(cmd_peer_addr);
+ bool peer_wins = (test_addr > test_peer_addr);
+
+ logger().info("test configuration: verbose={}, rounds={}, keepalive_ratio={}, "
+ "test_addr={}, cmd_peer_addr={}, test_peer_addr={}, "
+ "testpeer_islocal={}, peer_wins={}, smp={}",
+ verbose, rounds, keepalive_ratio,
+ test_addr, cmd_peer_addr, test_peer_addr,
+ testpeer_islocal, peer_wins,
+ seastar::smp::count);
+ return test_echo(rounds, keepalive_ratio
+ ).then([] {
+ return test_preemptive_shutdown();
+ }).then([test_addr, cmd_peer_addr, test_peer_addr, testpeer_islocal, peer_wins] {
+ return test_v2_protocol(
+ test_addr,
+ cmd_peer_addr,
+ test_peer_addr,
+ testpeer_islocal,
+ peer_wins);
+ }).then([] {
+ logger().info("All tests succeeded");
+ // Seastar has bugs to have events undispatched during shutdown,
+ // which will result in memory leak and thus fail LeakSanitizer.
+ return seastar::sleep(100ms);
+ });
+ }).then([] {
+ return crimson::common::sharded_conf().stop();
+ }).then([] {
+ return 0;
+ }).handle_exception([] (auto eptr) {
+ logger().error("Test failed: got exception {}", eptr);
+ return 1;
+ });
+}
+
+int main(int argc, char** argv)
+{
+ seastar::app_template app;
+ app.add_options()
+ ("verbose,v", bpo::value<bool>()->default_value(false),
+ "chatty if true")
+ ("rounds", bpo::value<unsigned>()->default_value(512),
+ "number of pingpong rounds")
+ ("keepalive-ratio", bpo::value<double>()->default_value(0.1),
+ "ratio of keepalive in ping messages")
+ ("test-addr", bpo::value<std::string>()->default_value("v2:127.0.0.1:9014"),
+ "address of v2 failover tests")
+ ("testpeer-addr", bpo::value<std::string>()->default_value("v2:127.0.0.1:9012"),
+ "addresses of v2 failover testpeer"
+ " (This is CmdSrv address, and TestPeer address is at port+=1)")
+ ("testpeer-islocal", bpo::value<bool>()->default_value(true),
+ "create a local crimson testpeer, or connect to a remote testpeer");
+ return app.run(argc, argv, [&app] {
+ // This test normally succeeds within 60 seconds, so kill it after 300
+ // seconds in case it is blocked forever due to unaddressed bugs.
+ return seastar::with_timeout(seastar::lowres_clock::now() + 300s, do_test(app))
+ .handle_exception_type([](seastar::timed_out_error&) {
+ logger().error("test_messenger timeout after 300s, abort! "
+ "Consider to extend the period if the test is still running.");
+ // use the retcode of timeout(1)
+ return 124;
+ });
+ });
+}
diff --git a/src/test/crimson/test_messenger.h b/src/test/crimson/test_messenger.h
new file mode 100644
index 000000000..635f7fae3
--- /dev/null
+++ b/src/test/crimson/test_messenger.h
@@ -0,0 +1,95 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "msg/msg_types.h"
+
+namespace ceph::net::test {
+
+constexpr uint64_t CMD_CLI_NONCE = 1;
+constexpr int64_t CMD_CLI_OSD = 1;
+constexpr uint64_t TEST_NONCE = 2;
+constexpr int64_t TEST_OSD = 2;
+constexpr uint64_t CMD_SRV_NONCE = 3;
+constexpr int64_t CMD_SRV_OSD = 3;
+constexpr uint64_t TEST_PEER_NONCE = 2;
+constexpr int64_t TEST_PEER_OSD = 4;
+
+inline entity_addr_t get_test_peer_addr(
+ const entity_addr_t &cmd_peer_addr) {
+ entity_addr_t test_peer_addr = cmd_peer_addr;
+ test_peer_addr.set_port(cmd_peer_addr.get_port() + 1);
+ test_peer_addr.set_nonce(TEST_PEER_NONCE);
+ return test_peer_addr;
+}
+
+enum class cmd_t : char {
+ none = '\0',
+ shutdown,
+ suite_start,
+ suite_stop,
+ suite_connect_me,
+ suite_send_me,
+ suite_keepalive_me,
+ suite_markdown,
+ suite_recv_op
+};
+
+enum class policy_t : char {
+ none = '\0',
+ stateful_server,
+ stateless_server,
+ lossless_peer,
+ lossless_peer_reuse,
+ lossy_client,
+ lossless_client
+};
+
+inline std::ostream& operator<<(std::ostream& out, const cmd_t& cmd) {
+ switch(cmd) {
+ case cmd_t::none:
+ return out << "none";
+ case cmd_t::shutdown:
+ return out << "shutdown";
+ case cmd_t::suite_start:
+ return out << "suite_start";
+ case cmd_t::suite_stop:
+ return out << "suite_stop";
+ case cmd_t::suite_connect_me:
+ return out << "suite_connect_me";
+ case cmd_t::suite_send_me:
+ return out << "suite_send_me";
+ case cmd_t::suite_keepalive_me:
+ return out << "suite_keepalive_me";
+ case cmd_t::suite_markdown:
+ return out << "suite_markdown";
+ case cmd_t::suite_recv_op:
+ return out << "suite_recv_op";
+ default:
+ ceph_abort();
+ }
+}
+
+inline std::ostream& operator<<(std::ostream& out, const policy_t& policy) {
+ switch(policy) {
+ case policy_t::none:
+ return out << "none";
+ case policy_t::stateful_server:
+ return out << "stateful_server";
+ case policy_t::stateless_server:
+ return out << "stateless_server";
+ case policy_t::lossless_peer:
+ return out << "lossless_peer";
+ case policy_t::lossless_peer_reuse:
+ return out << "lossless_peer_reuse";
+ case policy_t::lossy_client:
+ return out << "lossy_client";
+ case policy_t::lossless_client:
+ return out << "lossless_client";
+ default:
+ ceph_abort();
+ }
+}
+
+} // namespace ceph::net::test
diff --git a/src/test/crimson/test_messenger_peer.cc b/src/test/crimson/test_messenger_peer.cc
new file mode 100644
index 000000000..28d8a3d38
--- /dev/null
+++ b/src/test/crimson/test_messenger_peer.cc
@@ -0,0 +1,462 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+
+#include <boost/pointer_cast.hpp>
+#include <boost/program_options/variables_map.hpp>
+#include <boost/program_options/parsers.hpp>
+
+#include "auth/DummyAuth.h"
+#include "common/dout.h"
+#include "global/global_init.h"
+#include "messages/MPing.h"
+#include "messages/MCommand.h"
+#include "messages/MCommandReply.h"
+#include "messages/MOSDOp.h"
+#include "msg/Dispatcher.h"
+#include "msg/Messenger.h"
+
+#include "test_messenger.h"
+
+namespace {
+
+#define dout_subsys ceph_subsys_test
+
+using namespace ceph::net::test;
+using SocketPolicy = Messenger::Policy;
+
+constexpr int CEPH_OSD_PROTOCOL = 10;
+
+class FailoverSuitePeer : public Dispatcher {
+ using cb_t = std::function<void()>;
+ DummyAuthClientServer dummy_auth;
+ std::unique_ptr<Messenger> peer_msgr;
+ cb_t op_callback;
+
+ Connection* tracked_conn = nullptr;
+ unsigned pending_send = 0;
+
+ bool ms_can_fast_dispatch_any() const override { return true; }
+ bool ms_can_fast_dispatch(const Message* m) const override { return true; }
+ void ms_fast_dispatch(Message* m) override {
+ auto conn = m->get_connection().get();
+ if (tracked_conn == nullptr) {
+ ldout(cct, 0) << "[!TestPeer] got op from Test(conn "
+ << conn << "not tracked yet)" << dendl;
+ tracked_conn = conn;
+ } else if (tracked_conn != conn) {
+ lderr(cct) << "[TestPeer] got op from Test: conn(" << conn
+ << ") != tracked_conn(" << tracked_conn
+ << ")" << dendl;
+ ceph_abort();
+ } else {
+ ldout(cct, 0) << "[TestPeer] got op from Test" << dendl;
+ }
+ op_callback();
+ }
+ bool ms_dispatch(Message* m) override { ceph_abort(); }
+ void ms_handle_fast_connect(Connection* conn) override {
+ if (tracked_conn == conn) {
+ ldout(cct, 0) << "[TestPeer] connected: " << conn << dendl;
+ } else {
+ lderr(cct) << "[TestPeer] connected: conn(" << conn
+ << ") != tracked_conn(" << tracked_conn
+ << ")" << dendl;
+ ceph_abort();
+ }
+ }
+ void ms_handle_fast_accept(Connection* conn) override {
+ if (tracked_conn == nullptr) {
+ ldout(cct, 0) << "[TestPeer] accepted: " << conn << dendl;
+ tracked_conn = conn;
+ } else if (tracked_conn != conn) {
+ lderr(cct) << "[TestPeer] accepted: conn(" << conn
+ << ") != tracked_conn(" << tracked_conn
+ << ")" << dendl;
+ ceph_abort();
+ } else {
+ ldout(cct, 0) << "[!TestPeer] accepted(stale event): " << conn << dendl;
+ }
+ flush_pending_send();
+ }
+ bool ms_handle_reset(Connection* conn) override {
+ if (tracked_conn == conn) {
+ ldout(cct, 0) << "[TestPeer] reset: " << conn << dendl;
+ tracked_conn = nullptr;
+ } else {
+ ldout(cct, 0) << "[!TestPeer] reset(invalid event): conn(" << conn
+ << ") != tracked_conn(" << tracked_conn
+ << ")" << dendl;
+ }
+ return true;
+ }
+ void ms_handle_remote_reset(Connection* conn) override {
+ if (tracked_conn == conn) {
+ ldout(cct, 0) << "[TestPeer] remote reset: " << conn << dendl;
+ } else {
+ ldout(cct, 0) << "[!TestPeer] reset(invalid event): conn(" << conn
+ << ") != tracked_conn(" << tracked_conn
+ << ")" << dendl;
+ }
+ }
+ bool ms_handle_refused(Connection* conn) override {
+ ldout(cct, 0) << "[!TestPeer] refused: " << conn << dendl;
+ return true;
+ }
+
+ private:
+ void init(entity_addr_t test_peer_addr, SocketPolicy policy) {
+ peer_msgr.reset(Messenger::create(
+ cct, "async",
+ entity_name_t::OSD(TEST_PEER_OSD),
+ "TestPeer",
+ TEST_PEER_NONCE));
+ dummy_auth.auth_registry.refresh_config();
+ peer_msgr->set_cluster_protocol(CEPH_OSD_PROTOCOL);
+ peer_msgr->set_default_policy(policy);
+ peer_msgr->set_auth_client(&dummy_auth);
+ peer_msgr->set_auth_server(&dummy_auth);
+ peer_msgr->bind(test_peer_addr);
+ peer_msgr->add_dispatcher_head(this);
+ peer_msgr->start();
+ }
+
+ void send_op() {
+ ceph_assert(tracked_conn);
+ pg_t pgid;
+ object_locator_t oloc;
+ hobject_t hobj(object_t(), oloc.key, CEPH_NOSNAP, pgid.ps(),
+ pgid.pool(), oloc.nspace);
+ spg_t spgid(pgid);
+ tracked_conn->send_message2(make_message<MOSDOp>(0, 0, hobj, spgid, 0, 0, 0));
+ }
+
+ void flush_pending_send() {
+ if (pending_send != 0) {
+ ldout(cct, 0) << "[TestPeer] flush sending "
+ << pending_send << " ops" << dendl;
+ }
+ ceph_assert(tracked_conn);
+ while (pending_send) {
+ send_op();
+ --pending_send;
+ }
+ }
+
+ public:
+ FailoverSuitePeer(CephContext* cct, cb_t op_callback)
+ : Dispatcher(cct), dummy_auth(cct), op_callback(op_callback) { }
+
+ void shutdown() {
+ peer_msgr->shutdown();
+ peer_msgr->wait();
+ }
+
+ void connect_peer(entity_addr_t test_addr) {
+ ldout(cct, 0) << "[TestPeer] connect_peer(" << test_addr << ")" << dendl;
+ auto conn = peer_msgr->connect_to_osd(entity_addrvec_t{test_addr});
+ if (tracked_conn) {
+ if (tracked_conn == conn.get()) {
+ ldout(cct, 0) << "[TestPeer] this is not a new session " << conn.get() << dendl;
+ } else {
+ ldout(cct, 0) << "[TestPeer] this is a new session " << conn.get()
+ << ", replacing old one " << tracked_conn << dendl;
+ }
+ } else {
+ ldout(cct, 0) << "[TestPeer] this is a new session " << conn.get() << dendl;
+ }
+ tracked_conn = conn.get();
+ flush_pending_send();
+ }
+
+ void send_peer() {
+ if (tracked_conn) {
+ ldout(cct, 0) << "[TestPeer] send_peer()" << dendl;
+ send_op();
+ } else {
+ ++pending_send;
+ ldout(cct, 0) << "[TestPeer] send_peer() (pending " << pending_send << ")" << dendl;
+ }
+ }
+
+ void keepalive_peer() {
+ ldout(cct, 0) << "[TestPeer] keepalive_peer()" << dendl;
+ ceph_assert(tracked_conn);
+ tracked_conn->send_keepalive();
+ }
+
+ void markdown() {
+ ldout(cct, 0) << "[TestPeer] markdown()" << dendl;
+ ceph_assert(tracked_conn);
+ tracked_conn->mark_down();
+ tracked_conn = nullptr;
+ }
+
+ static std::unique_ptr<FailoverSuitePeer>
+ create(CephContext* cct, entity_addr_t test_peer_addr,
+ SocketPolicy policy, cb_t op_callback) {
+ auto suite = std::make_unique<FailoverSuitePeer>(cct, op_callback);
+ suite->init(test_peer_addr, policy);
+ return suite;
+ }
+};
+
+SocketPolicy to_socket_policy(CephContext* cct, policy_t policy) {
+ switch (policy) {
+ case policy_t::stateful_server:
+ return SocketPolicy::stateful_server(0);
+ case policy_t::stateless_server:
+ return SocketPolicy::stateless_server(0);
+ case policy_t::lossless_peer:
+ return SocketPolicy::lossless_peer(0);
+ case policy_t::lossless_peer_reuse:
+ return SocketPolicy::lossless_peer_reuse(0);
+ case policy_t::lossy_client:
+ return SocketPolicy::lossy_client(0);
+ case policy_t::lossless_client:
+ return SocketPolicy::lossless_client(0);
+ default:
+ lderr(cct) << "[CmdSrv] unexpected policy type" << dendl;
+ ceph_abort();
+ }
+}
+
+class FailoverTestPeer : public Dispatcher {
+ DummyAuthClientServer dummy_auth;
+ std::unique_ptr<Messenger> cmd_msgr;
+ Connection *cmd_conn = nullptr;
+ const entity_addr_t test_peer_addr;
+ std::unique_ptr<FailoverSuitePeer> test_suite;
+ const bool nonstop;
+
+ bool ms_can_fast_dispatch_any() const override { return false; }
+ bool ms_can_fast_dispatch(const Message* m) const override { return false; }
+ void ms_fast_dispatch(Message* m) override { ceph_abort(); }
+ bool ms_dispatch(Message* m) override {
+ auto conn = m->get_connection().get();
+ if (cmd_conn == nullptr) {
+ ldout(cct, 0) << "[!CmdSrv] got msg from CmdCli(conn "
+ << conn << "not tracked yet)" << dendl;
+ cmd_conn = conn;
+ } else if (cmd_conn != conn) {
+ lderr(cct) << "[CmdSrv] got msg from CmdCli: conn(" << conn
+ << ") != cmd_conn(" << cmd_conn
+ << ")" << dendl;
+ ceph_abort();
+ } else {
+ // good!
+ }
+ switch (m->get_type()) {
+ case CEPH_MSG_PING: {
+ ldout(cct, 0) << "[CmdSrv] got PING, sending PONG ..." << dendl;
+ cmd_conn->send_message2(make_message<MPing>());
+ break;
+ }
+ case MSG_COMMAND: {
+ auto m_cmd = boost::static_pointer_cast<MCommand>(m);
+ auto cmd = static_cast<cmd_t>(m_cmd->cmd[0][0]);
+ if (cmd == cmd_t::shutdown) {
+ ldout(cct, 0) << "All tests succeeded" << dendl;
+ if (!nonstop) {
+ ldout(cct, 0) << "[CmdSrv] shutdown ..." << dendl;
+ cmd_msgr->shutdown();
+ } else {
+ ldout(cct, 0) << "[CmdSrv] nonstop set ..." << dendl;
+ }
+ } else {
+ ldout(cct, 0) << "[CmdSrv] got cmd " << cmd << dendl;
+ handle_cmd(cmd, m_cmd);
+ ldout(cct, 0) << "[CmdSrv] done, send cmd reply ..." << dendl;
+ cmd_conn->send_message2(make_message<MCommandReply>());
+ }
+ break;
+ }
+ default:
+ lderr(cct) << "[CmdSrv] " << __func__ << " " << cmd_conn
+ << " got unexpected msg from CmdCli: "
+ << m << dendl;
+ ceph_abort();
+ }
+ m->put();
+ return true;
+ }
+ void ms_handle_fast_connect(Connection*) override { ceph_abort(); }
+ void ms_handle_fast_accept(Connection *conn) override {
+ if (cmd_conn == nullptr) {
+ ldout(cct, 0) << "[CmdSrv] accepted: " << conn << dendl;
+ cmd_conn = conn;
+ } else if (cmd_conn != conn) {
+ lderr(cct) << "[CmdSrv] accepted: conn(" << conn
+ << ") != cmd_conn(" << cmd_conn
+ << ")" << dendl;
+ ceph_abort();
+ } else {
+ ldout(cct, 0) << "[!CmdSrv] accepted(stale event): " << conn << dendl;
+ }
+ }
+ bool ms_handle_reset(Connection* conn) override {
+ if (cmd_conn == conn) {
+ ldout(cct, 0) << "[CmdSrv] reset: " << conn << dendl;
+ cmd_conn = nullptr;
+ } else {
+ ldout(cct, 0) << "[!CmdSrv] reset(invalid event): conn(" << conn
+ << ") != cmd_conn(" << cmd_conn
+ << ")" << dendl;
+ }
+ return true;
+ }
+ void ms_handle_remote_reset(Connection*) override { ceph_abort(); }
+ bool ms_handle_refused(Connection*) override { ceph_abort(); }
+
+ private:
+ void notify_recv_op() {
+ ceph_assert(cmd_conn);
+ auto m = make_message<MCommand>();
+ m->cmd.emplace_back(1, static_cast<char>(cmd_t::suite_recv_op));
+ cmd_conn->send_message2(m);
+ }
+
+ void handle_cmd(cmd_t cmd, MRef<MCommand> m_cmd) {
+ switch (cmd) {
+ case cmd_t::suite_start: {
+ if (test_suite) {
+ test_suite->shutdown();
+ test_suite.reset();
+ ldout(cct, 0) << "-------- suite stopped (force) --------\n\n" << dendl;
+ }
+ auto p = static_cast<policy_t>(m_cmd->cmd[1][0]);
+ ldout(cct, 0) << "[CmdSrv] suite starting (" << p
+ <<", " << test_peer_addr << ") ..." << dendl;
+ auto policy = to_socket_policy(cct, p);
+ auto suite = FailoverSuitePeer::create(cct, test_peer_addr, policy,
+ [this] { notify_recv_op(); });
+ test_suite.swap(suite);
+ return;
+ }
+ case cmd_t::suite_stop:
+ ceph_assert(test_suite);
+ test_suite->shutdown();
+ test_suite.reset();
+ ldout(cct, 0) << "-------- suite stopped --------\n\n" << dendl;
+ return;
+ case cmd_t::suite_connect_me: {
+ ceph_assert(test_suite);
+ entity_addr_t test_addr = entity_addr_t();
+ test_addr.parse(m_cmd->cmd[1].c_str(), nullptr);
+ test_suite->connect_peer(test_addr);
+ return;
+ }
+ case cmd_t::suite_send_me:
+ ceph_assert(test_suite);
+ test_suite->send_peer();
+ return;
+ case cmd_t::suite_keepalive_me:
+ ceph_assert(test_suite);
+ test_suite->keepalive_peer();
+ return;
+ case cmd_t::suite_markdown:
+ ceph_assert(test_suite);
+ test_suite->markdown();
+ return;
+ default:
+ lderr(cct) << "[CmdSrv] got unexpected command " << m_cmd
+ << " from CmdCli" << dendl;
+ ceph_abort();
+ }
+ }
+
+ void init(entity_addr_t cmd_peer_addr) {
+ cmd_msgr.reset(Messenger::create(
+ cct, "async",
+ entity_name_t::OSD(CMD_SRV_OSD),
+ "CmdSrv",
+ CMD_SRV_NONCE));
+ dummy_auth.auth_registry.refresh_config();
+ cmd_msgr->set_cluster_protocol(CEPH_OSD_PROTOCOL);
+ cmd_msgr->set_default_policy(Messenger::Policy::stateless_server(0));
+ cmd_msgr->set_auth_client(&dummy_auth);
+ cmd_msgr->set_auth_server(&dummy_auth);
+ cmd_msgr->bind(cmd_peer_addr);
+ cmd_msgr->add_dispatcher_head(this);
+ cmd_msgr->start();
+ }
+
+ public:
+ FailoverTestPeer(CephContext* cct,
+ entity_addr_t test_peer_addr,
+ bool nonstop)
+ : Dispatcher(cct),
+ dummy_auth(cct),
+ test_peer_addr(test_peer_addr),
+ nonstop(nonstop) { }
+
+ void wait() { cmd_msgr->wait(); }
+
+ static std::unique_ptr<FailoverTestPeer>
+ create(CephContext* cct,
+ entity_addr_t cmd_peer_addr,
+ entity_addr_t test_peer_addr,
+ bool nonstop) {
+ auto test_peer = std::make_unique<FailoverTestPeer>(
+ cct, test_peer_addr, nonstop);
+ test_peer->init(cmd_peer_addr);
+ ldout(cct, 0) << "[CmdSrv] ready" << dendl;
+ return test_peer;
+ }
+};
+
+}
+
+int main(int argc, char** argv)
+{
+ namespace po = boost::program_options;
+ po::options_description desc{"Allowed options"};
+ desc.add_options()
+ ("help,h", "show help message")
+ ("addr", po::value<std::string>()->default_value("v2:127.0.0.1:9012"),
+ "This is CmdSrv address, and TestPeer address is at port+=1")
+ ("nonstop", po::value<bool>()->default_value(false),
+ "Do not shutdown TestPeer when all tests are successful");
+ po::variables_map vm;
+ std::vector<std::string> unrecognized_options;
+ try {
+ auto parsed = po::command_line_parser(argc, argv)
+ .options(desc)
+ .allow_unregistered()
+ .run();
+ po::store(parsed, vm);
+ if (vm.count("help")) {
+ std::cout << desc << std::endl;
+ return 0;
+ }
+ po::notify(vm);
+ unrecognized_options = po::collect_unrecognized(parsed.options, po::include_positional);
+ } catch(const po::error& e) {
+ std::cerr << "error: " << e.what() << std::endl;
+ return 1;
+ }
+
+ std::vector<const char*> args(argv, argv + argc);
+ auto cct = global_init(nullptr, args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_MON_CONFIG);
+ common_init_finish(cct.get());
+
+ auto addr = vm["addr"].as<std::string>();
+ entity_addr_t cmd_peer_addr;
+ cmd_peer_addr.parse(addr.c_str(), nullptr);
+ cmd_peer_addr.set_nonce(CMD_SRV_NONCE);
+ ceph_assert_always(cmd_peer_addr.is_msgr2());
+ auto test_peer_addr = get_test_peer_addr(cmd_peer_addr);
+ auto nonstop = vm["nonstop"].as<bool>();
+ ldout(cct, 0) << "test configuration: cmd_peer_addr=" << cmd_peer_addr
+ << ", test_peer_addr=" << test_peer_addr
+ << ", nonstop=" << nonstop
+ << dendl;
+
+ auto test_peer = FailoverTestPeer::create(
+ cct.get(),
+ cmd_peer_addr,
+ test_peer_addr,
+ nonstop);
+ test_peer->wait();
+}
diff --git a/src/test/crimson/test_messenger_thrash.cc b/src/test/crimson/test_messenger_thrash.cc
new file mode 100644
index 000000000..f2b1828f1
--- /dev/null
+++ b/src/test/crimson/test_messenger_thrash.cc
@@ -0,0 +1,672 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <map>
+#include <random>
+#include <fmt/format.h>
+#include <fmt/ostream.h>
+#include <seastar/core/app-template.hh>
+#include <seastar/core/do_with.hh>
+#include <seastar/core/future-util.hh>
+#include <seastar/core/reactor.hh>
+#include <seastar/core/sleep.hh>
+#include <seastar/core/with_timeout.hh>
+
+#include "common/ceph_argparse.h"
+#include "messages/MPing.h"
+#include "messages/MCommand.h"
+#include "crimson/auth/DummyAuth.h"
+#include "crimson/common/log.h"
+#include "crimson/net/Connection.h"
+#include "crimson/net/Dispatcher.h"
+#include "crimson/net/Messenger.h"
+
+using namespace std::chrono_literals;
+namespace bpo = boost::program_options;
+using crimson::common::local_conf;
+using payload_seq_t = uint64_t;
+
+struct Payload {
+ enum Who : uint8_t {
+ PING = 0,
+ PONG = 1,
+ };
+ uint8_t who = 0;
+ payload_seq_t seq = 0;
+ bufferlist data;
+
+ Payload(Who who, uint64_t seq, const bufferlist& data)
+ : who(who), seq(seq), data(data)
+ {}
+ Payload() = default;
+ DENC(Payload, v, p) {
+ DENC_START(1, 1, p);
+ denc(v.who, p);
+ denc(v.seq, p);
+ denc(v.data, p);
+ DENC_FINISH(p);
+ }
+};
+WRITE_CLASS_DENC(Payload)
+
+template<>
+struct fmt::formatter<Payload> : fmt::formatter<std::string_view> {
+ template <typename FormatContext>
+ auto format(const Payload& pl, FormatContext& ctx) const {
+ return fmt::format_to(ctx.out(), "reply={} i={}", pl.who, pl.seq);
+ }
+};
+
+namespace {
+
+seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+}
+
+std::random_device rd;
+std::default_random_engine rng{rd()};
+std::uniform_int_distribution<> prob(0,99);
+bool verbose = false;
+
+entity_addr_t get_server_addr() {
+ static int port = 16800;
+ ++port;
+ entity_addr_t saddr;
+ saddr.parse("127.0.0.1", nullptr);
+ saddr.set_port(port);
+ return saddr;
+}
+
+uint64_t get_nonce() {
+ static uint64_t nonce = 1;
+ ++nonce;
+ return nonce;
+}
+
+struct thrash_params_t {
+ std::size_t servers;
+ std::size_t clients;
+ std::size_t connections;
+ std::size_t random_op;
+};
+
+class SyntheticWorkload;
+
+class SyntheticDispatcher final
+ : public crimson::net::Dispatcher {
+ public:
+ std::map<crimson::net::Connection*, std::deque<payload_seq_t> > conn_sent;
+ std::map<payload_seq_t, bufferlist> sent;
+ unsigned index;
+ SyntheticWorkload *workload;
+
+ SyntheticDispatcher(bool s, SyntheticWorkload *wl):
+ index(0), workload(wl) {
+ }
+
+ std::optional<seastar::future<>> ms_dispatch(crimson::net::ConnectionRef con,
+ MessageRef m) final {
+ if (verbose) {
+ logger().warn("{}: con = {}", __func__, *con);
+ }
+ // MSG_COMMAND is used to disorganize regular message flow
+ if (m->get_type() == MSG_COMMAND) {
+ return seastar::now();
+ }
+
+ Payload pl;
+ auto p = m->get_data().cbegin();
+ decode(pl, p);
+ if (pl.who == Payload::PING) {
+ logger().info(" {} conn= {} {}", __func__, *con, pl);
+ return reply_message(m, con, pl);
+ } else {
+ ceph_assert(pl.who == Payload::PONG);
+ if (sent.count(pl.seq)) {
+ logger().info(" {} conn= {} {}", __func__, *con, pl);
+ ceph_assert(conn_sent[&*con].front() == pl.seq);
+ ceph_assert(pl.data.contents_equal(sent[pl.seq]));
+ conn_sent[&*con].pop_front();
+ sent.erase(pl.seq);
+ }
+
+ return seastar::now();
+ }
+ }
+
+ void ms_handle_accept(
+ crimson::net::ConnectionRef conn,
+ seastar::shard_id prv_shard,
+ bool is_replace) final {
+ logger().info("{} - Connection:{}", __func__, *conn);
+ assert(prv_shard == seastar::this_shard_id());
+ }
+
+ void ms_handle_connect(
+ crimson::net::ConnectionRef conn,
+ seastar::shard_id prv_shard) final {
+ logger().info("{} - Connection:{}", __func__, *conn);
+ assert(prv_shard == seastar::this_shard_id());
+ }
+
+ void ms_handle_reset(crimson::net::ConnectionRef con, bool is_replace) final;
+
+ void ms_handle_remote_reset(crimson::net::ConnectionRef con) final {
+ clear_pending(con);
+ }
+
+ std::optional<seastar::future<>> reply_message(
+ const MessageRef m,
+ crimson::net::ConnectionRef con,
+ Payload& pl) {
+ pl.who = Payload::PONG;
+ bufferlist bl;
+ encode(pl, bl);
+ auto rm = crimson::make_message<MPing>();
+ rm->set_data(bl);
+ if (verbose) {
+ logger().info("{} conn= {} reply i= {}",
+ __func__, *con, pl.seq);
+ }
+ return con->send(std::move(rm));
+ }
+
+ seastar::future<> send_message_wrap(crimson::net::ConnectionRef con,
+ const bufferlist& data) {
+ auto m = crimson::make_message<MPing>();
+ Payload pl{Payload::PING, index++, data};
+ bufferlist bl;
+ encode(pl, bl);
+ m->set_data(bl);
+ sent[pl.seq] = pl.data;
+ conn_sent[&*con].push_back(pl.seq);
+ logger().info("{} conn= {} send i= {}",
+ __func__, *con, pl.seq);
+
+ return con->send(std::move(m));
+ }
+
+ uint64_t get_num_pending_msgs() {
+ return sent.size();
+ }
+
+ void clear_pending(crimson::net::ConnectionRef con) {
+ for (std::deque<uint64_t>::iterator it = conn_sent[&*con].begin();
+ it != conn_sent[&*con].end(); ++it)
+ sent.erase(*it);
+ conn_sent.erase(&*con);
+ }
+
+ void print() {
+ for (auto && [connptr, list] : conn_sent) {
+ if (!list.empty()) {
+ logger().info("{} {} wait {}", __func__,
+ (void*)connptr, list.size());
+ }
+ }
+ }
+};
+
+class SyntheticWorkload {
+ // messengers must be freed after its connections
+ std::set<crimson::net::MessengerRef> available_servers;
+ std::set<crimson::net::MessengerRef> available_clients;
+
+ crimson::net::SocketPolicy server_policy;
+ crimson::net::SocketPolicy client_policy;
+ std::map<crimson::net::ConnectionRef,
+ std::pair<crimson::net::MessengerRef,
+ crimson::net::MessengerRef>> available_connections;
+ SyntheticDispatcher dispatcher;
+ std::vector<bufferlist> rand_data;
+ crimson::auth::DummyAuthClientServer dummy_auth;
+
+ seastar::future<crimson::net::ConnectionRef> get_random_connection() {
+ return seastar::do_until(
+ [this] { return dispatcher.get_num_pending_msgs() <= max_in_flight; },
+ [] { return seastar::sleep(100ms); }
+ ).then([this] {
+ boost::uniform_int<> choose(0, available_connections.size() - 1);
+ int index = choose(rng);
+ std::map<crimson::net::ConnectionRef,
+ std::pair<crimson::net::MessengerRef, crimson::net::MessengerRef>>::iterator i
+ = available_connections.begin();
+ for (; index > 0; --index, ++i) ;
+ return seastar::make_ready_future<crimson::net::ConnectionRef>(i->first);
+ });
+ }
+
+ public:
+ const unsigned min_connections = 10;
+ const unsigned max_in_flight = 64;
+ const unsigned max_connections = 128;
+ const unsigned max_message_len = 1024 * 1024 * 4;
+ const uint64_t servers, clients;
+
+ SyntheticWorkload(int servers, int clients, int random_num,
+ crimson::net::SocketPolicy srv_policy,
+ crimson::net::SocketPolicy cli_policy)
+ : server_policy(srv_policy),
+ client_policy(cli_policy),
+ dispatcher(false, this),
+ servers(servers),
+ clients(clients) {
+
+ for (int i = 0; i < random_num; i++) {
+ bufferlist bl;
+ boost::uniform_int<> u(32, max_message_len);
+ uint64_t value_len = u(rng);
+ bufferptr bp(value_len);
+ bp.zero();
+ for (uint64_t j = 0; j < value_len-sizeof(i); ) {
+ memcpy(bp.c_str()+j, &i, sizeof(i));
+ j += 4096;
+ }
+
+ bl.append(bp);
+ rand_data.push_back(bl);
+ }
+ }
+
+
+ bool can_create_connection() {
+ return available_connections.size() < max_connections;
+ }
+
+ seastar::future<> maybe_generate_connection() {
+ if (!can_create_connection()) {
+ return seastar::now();
+ }
+ crimson::net::MessengerRef server, client;
+ {
+ boost::uniform_int<> choose(0, available_servers.size() - 1);
+ int index = choose(rng);
+ std::set<crimson::net::MessengerRef>::iterator i
+ = available_servers.begin();
+ for (; index > 0; --index, ++i) ;
+ server = *i;
+ }
+ {
+ boost::uniform_int<> choose(0, available_clients.size() - 1);
+ int index = choose(rng);
+ std::set<crimson::net::MessengerRef>::iterator i
+ = available_clients.begin();
+ for (; index > 0; --index, ++i) ;
+ client = *i;
+ }
+
+
+ std::pair<crimson::net::MessengerRef, crimson::net::MessengerRef>
+ connected_pair;
+ {
+ crimson::net::ConnectionRef conn = client->connect(
+ server->get_myaddr(),
+ entity_name_t::TYPE_OSD);
+ connected_pair = std::make_pair(client, server);
+ available_connections[conn] = connected_pair;
+ }
+ return seastar::now();
+ }
+
+ seastar::future<> random_op (const uint64_t& iter) {
+ return seastar::do_with(iter, [this] (uint64_t& iter) {
+ return seastar::do_until(
+ [&] { return iter == 0; },
+ [&, this]
+ {
+ if (!(iter % 10)) {
+ logger().info("{} Op {} : ", __func__ ,iter);
+ print_internal_state();
+ }
+ --iter;
+ int val = prob(rng);
+ if(val > 90) {
+ return maybe_generate_connection();
+ } else if (val > 80) {
+ return drop_connection();
+ } else if (val > 10) {
+ return send_message();
+ } else {
+ return seastar::sleep(
+ std::chrono::milliseconds(rand() % 1000 + 500));
+ }
+ });
+ });
+ }
+
+ seastar::future<> generate_connections (const uint64_t& iter) {
+ return seastar::do_with(iter, [this] (uint64_t& iter) {
+ return seastar::do_until(
+ [&] { return iter == 0; },
+ [&, this]
+ {
+ --iter;
+ if (!(connections_count() % 10)) {
+ logger().info("seeding connection {}",
+ connections_count());
+ }
+ return maybe_generate_connection();
+ });
+ });
+ }
+
+ seastar::future<> init_server(const entity_name_t& name,
+ const std::string& lname,
+ const uint64_t nonce,
+ const entity_addr_t& addr) {
+ crimson::net::MessengerRef msgr =
+ crimson::net::Messenger::create(
+ name, lname, nonce, true);
+ msgr->set_default_policy(server_policy);
+ msgr->set_auth_client(&dummy_auth);
+ msgr->set_auth_server(&dummy_auth);
+ available_servers.insert(msgr);
+ return msgr->bind(entity_addrvec_t{addr}).safe_then(
+ [this, msgr] {
+ return msgr->start({&dispatcher});
+ }, crimson::net::Messenger::bind_ertr::all_same_way(
+ [addr] (const std::error_code& e) {
+ logger().error("{} test_messenger_thrash(): "
+ "there is another instance running at {}",
+ __func__, addr);
+ ceph_abort();
+ }));
+ }
+
+ seastar::future<> init_client(const entity_name_t& name,
+ const std::string& lname,
+ const uint64_t nonce) {
+ crimson::net::MessengerRef msgr =
+ crimson::net::Messenger::create(
+ name, lname, nonce, true);
+ msgr->set_default_policy(client_policy);
+ msgr->set_auth_client(&dummy_auth);
+ msgr->set_auth_server(&dummy_auth);
+ available_clients.insert(msgr);
+ return msgr->start({&dispatcher});
+ }
+
+ seastar::future<> send_message() {
+ return get_random_connection()
+ .then([this] (crimson::net::ConnectionRef conn) {
+ boost::uniform_int<> true_false(0, 99);
+ int val = true_false(rng);
+ if (val >= 95) {
+ uuid_d uuid;
+ uuid.generate_random();
+ auto m = crimson::make_message<MCommand>(uuid);
+ std::vector<std::string> cmds;
+ cmds.push_back("command");
+ m->cmd = cmds;
+ m->set_priority(200);
+ return conn->send(std::move(m));
+ } else {
+ boost::uniform_int<> u(0, rand_data.size()-1);
+ return dispatcher.send_message_wrap(conn, rand_data[u(rng)]);
+ }
+ });
+ }
+
+ seastar::future<> drop_connection() {
+ if (available_connections.size() < min_connections) {
+ return seastar::now();
+ }
+
+ return get_random_connection()
+ .then([this] (crimson::net::ConnectionRef conn) {
+ dispatcher.clear_pending(conn);
+ conn->mark_down();
+ if (!client_policy.server &&
+ client_policy.standby) {
+ // it's a lossless policy, so we need to mark down each side
+ std::pair<crimson::net::MessengerRef, crimson::net::MessengerRef> &p =
+ available_connections[conn];
+ if (!p.first->get_default_policy().server &&
+ !p.second->get_default_policy().server) {
+ //verify that equal-to operator applies here
+ ceph_assert(p.first->owns_connection(*conn));
+ crimson::net::ConnectionRef peer = p.second->connect(
+ p.first->get_myaddr(), p.first->get_mytype());
+ peer->mark_down();
+ dispatcher.clear_pending(peer);
+ available_connections.erase(peer);
+ }
+ }
+ ceph_assert(available_connections.erase(conn) == 1U);
+ return seastar::now();
+ });
+ }
+
+ void print_internal_state(bool detail=false) {
+ logger().info("available_connections: {} inflight messages: {}",
+ available_connections.size(),
+ dispatcher.get_num_pending_msgs());
+ if (detail && !available_connections.empty()) {
+ dispatcher.print();
+ }
+ }
+
+ seastar::future<> wait_for_done() {
+ int i = 0;
+ return seastar::do_until(
+ [this] { return !dispatcher.get_num_pending_msgs(); },
+ [this, &i]
+ {
+ if (i++ % 50 == 0){
+ print_internal_state(true);
+ }
+ return seastar::sleep(100ms);
+ }).then([this] {
+ return seastar::do_for_each(available_servers, [] (auto server) {
+ if (verbose) {
+ logger().info("server {} shutdown" , server->get_myaddrs());
+ }
+ server->stop();
+ return server->shutdown();
+ });
+ }).then([this] {
+ return seastar::do_for_each(available_clients, [] (auto client) {
+ if (verbose) {
+ logger().info("client {} shutdown" , client->get_myaddrs());
+ }
+ client->stop();
+ return client->shutdown();
+ });
+ });
+ }
+
+ void handle_reset(crimson::net::ConnectionRef con) {
+ available_connections.erase(con);
+ }
+
+ uint64_t servers_count() {
+ return available_servers.size();
+ }
+
+ uint64_t clients_count() {
+ return available_clients.size();
+ }
+
+ uint64_t connections_count() {
+ return available_connections.size();
+ }
+};
+
+void SyntheticDispatcher::ms_handle_reset(crimson::net::ConnectionRef con,
+ bool is_replace) {
+ workload->handle_reset(con);
+ clear_pending(con);
+}
+
+seastar::future<> reset_conf() {
+ return seastar::when_all_succeed(
+ local_conf().set_val("ms_inject_socket_failures", "0"),
+ local_conf().set_val("ms_inject_internal_delays", "0"),
+ local_conf().set_val("ms_inject_delay_probability", "0"),
+ local_conf().set_val("ms_inject_delay_max", "0")
+ ).then_unpack([] {
+ return seastar::now();
+ });
+}
+
+// Testing Crimson messenger (with msgr-v2 protocol) robustness against
+// network delays and failures. The test includes stress tests and
+// socket level delays/failures injection tests, letting time
+// and randomness achieve the best test coverage.
+
+// Test Parameters:
+// Clients: 8 (stateful)
+// Servers: 32 (lossless)
+// Connections: 100 (Generated between random clients/server)
+// Random Operations: 120 (Generate/Drop Connection, Send Message, Sleep)
+seastar::future<> test_stress(thrash_params_t tp)
+{
+
+ logger().info("test_stress():");
+
+ SyntheticWorkload test_msg(tp.servers, tp.clients, 100,
+ crimson::net::SocketPolicy::stateful_server(0),
+ crimson::net::SocketPolicy::lossless_client(0));
+
+ return seastar::do_with(test_msg, [tp]
+ (SyntheticWorkload& test_msg) {
+ return seastar::do_until([&test_msg] {
+ return test_msg.servers_count() == test_msg.servers; },
+ [&test_msg] {
+ entity_addr_t bind_addr = get_server_addr();
+ bind_addr.set_type(entity_addr_t::TYPE_MSGR2);
+ uint64_t server_num = get_nonce();
+ return test_msg.init_server(entity_name_t::OSD(server_num),
+ "server", server_num , bind_addr);
+ }).then([&test_msg] {
+ return seastar::do_until([&test_msg] {
+ return test_msg.clients_count() == test_msg.clients; },
+ [&test_msg] {
+ return test_msg.init_client(entity_name_t::CLIENT(-1),
+ "client", get_nonce());
+ });
+ }).then([&test_msg, tp] {
+ return test_msg.generate_connections(tp.connections);
+ }).then([&test_msg, tp] {
+ return test_msg.random_op(tp.random_op);
+ }).then([&test_msg] {
+ return test_msg.wait_for_done();
+ }).then([] {
+ logger().info("test_stress() DONE");
+ }).handle_exception([] (auto eptr) {
+ logger().error(
+ "test_stress() failed: got exception {}",
+ eptr);
+ throw;
+ });
+ });
+}
+
+// Test Parameters:
+// Clients: 8 (statefull)
+// Servers: 32 (loseless)
+// Connections: 100 (Generated between random clients/server)
+// Random Operations: 120 (Generate/Drop Connection, Send Message, Sleep)
+seastar::future<> test_injection(thrash_params_t tp)
+{
+
+ logger().info("test_injection():");
+
+ SyntheticWorkload test_msg(tp.servers, tp.clients, 100,
+ crimson::net::SocketPolicy::stateful_server(0),
+ crimson::net::SocketPolicy::lossless_client(0));
+
+ return seastar::do_with(test_msg, [tp]
+ (SyntheticWorkload& test_msg) {
+ return seastar::do_until([&test_msg] {
+ return test_msg.servers_count() == test_msg.servers; },
+ [&test_msg] {
+ entity_addr_t bind_addr = get_server_addr();
+ bind_addr.set_type(entity_addr_t::TYPE_MSGR2);
+ uint64_t server_num = get_nonce();
+ return test_msg.init_server(entity_name_t::OSD(server_num),
+ "server", server_num , bind_addr);
+ }).then([&test_msg] {
+ return seastar::do_until([&test_msg] {
+ return test_msg.clients_count() == test_msg.clients; },
+ [&test_msg] {
+ return test_msg.init_client(entity_name_t::CLIENT(-1),
+ "client", get_nonce());
+ });
+ }).then([] {
+ return seastar::when_all_succeed(
+ local_conf().set_val("ms_inject_socket_failures", "30"),
+ local_conf().set_val("ms_inject_internal_delays", "0.1"),
+ local_conf().set_val("ms_inject_delay_probability", "1"),
+ local_conf().set_val("ms_inject_delay_max", "5"));
+ }).then_unpack([] {
+ return seastar::now();
+ }).then([&test_msg, tp] {
+ return test_msg.generate_connections(tp.connections);
+ }).then([&test_msg, tp] {
+ return test_msg.random_op(tp.random_op);
+ }).then([&test_msg] {
+ return test_msg.wait_for_done();
+ }).then([] {
+ logger().info("test_inejction() DONE");
+ return seastar::now();
+ }).then([] {
+ return reset_conf();
+ }).handle_exception([] (auto eptr) {
+ logger().error(
+ "test_injection() failed: got exception {}",
+ eptr);
+ throw;
+ });
+ });
+}
+
+}
+
+seastar::future<int> do_test(seastar::app_template& app)
+{
+ std::vector<const char*> args;
+ std::string cluster;
+ std::string conf_file_list;
+ auto init_params = ceph_argparse_early_args(args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ &cluster,
+ &conf_file_list);
+ return crimson::common::sharded_conf().start(
+ init_params.name, cluster
+ ).then([] {
+ return local_conf().start();
+ }).then([conf_file_list] {
+ return local_conf().parse_config_files(conf_file_list);
+ }).then([&app] {
+ auto&& config = app.configuration();
+ verbose = config["verbose"].as<bool>();
+ return test_stress(thrash_params_t{8, 32, 50, 120})
+ .then([] {
+ return test_injection(thrash_params_t{16, 32, 50, 120});
+ }).then([] {
+ logger().info("All tests succeeded");
+ // Seastar has bugs to have events undispatched during shutdown,
+ // which will result in memory leak and thus fail LeakSanitizer.
+ return seastar::sleep(100ms);
+ });
+ }).then([] {
+ return crimson::common::sharded_conf().stop();
+ }).then([] {
+ return 0;
+ }).handle_exception([] (auto eptr) {
+ logger().error("Test failed: got exception {}", eptr);
+ return 1;
+ });
+}
+
+int main(int argc, char** argv)
+{
+ seastar::app_template app;
+ app.add_options()
+ ("verbose,v", bpo::value<bool>()->default_value(false),
+ "chatty if true");
+ return app.run(argc, argv, [&app] {
+ return do_test(app);
+ });
+}
diff --git a/src/test/crimson/test_monc.cc b/src/test/crimson/test_monc.cc
new file mode 100644
index 000000000..e60df4525
--- /dev/null
+++ b/src/test/crimson/test_monc.cc
@@ -0,0 +1,84 @@
+#include <seastar/core/app-template.hh>
+#include "common/ceph_argparse.h"
+#include "crimson/common/auth_handler.h"
+#include "crimson/common/config_proxy.h"
+#include "crimson/mon/MonClient.h"
+#include "crimson/net/Connection.h"
+#include "crimson/net/Messenger.h"
+
+using Config = crimson::common::ConfigProxy;
+using MonClient = crimson::mon::Client;
+
+namespace {
+
+class DummyAuthHandler : public crimson::common::AuthHandler {
+public:
+ void handle_authentication(const EntityName& name,
+ const AuthCapsInfo& caps) final
+ {}
+};
+
+DummyAuthHandler dummy_handler;
+
+}
+
+using namespace std::literals;
+
+static seastar::future<> test_monc()
+{
+ return crimson::common::sharded_conf().start(EntityName{}, "ceph"sv).then([] {
+ std::vector<const char*> args;
+ std::string cluster;
+ std::string conf_file_list;
+ auto init_params = ceph_argparse_early_args(args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ &cluster,
+ &conf_file_list);
+ auto& conf = crimson::common::local_conf();
+ conf->name = init_params.name;
+ conf->cluster = cluster;
+ return conf.parse_config_files(conf_file_list);
+ }).then([] {
+ return crimson::common::sharded_perf_coll().start();
+ }).then([]() mutable {
+ auto msgr = crimson::net::Messenger::create(entity_name_t::OSD(0), "monc", 0, true);
+ return seastar::do_with(MonClient{*msgr, dummy_handler},
+ [msgr](auto& monc) mutable {
+ return msgr->start({&monc}).then([&monc] {
+ return seastar::with_timeout(
+ seastar::lowres_clock::now() + std::chrono::seconds{10},
+ monc.start());
+ }).then([&monc] {
+ return monc.stop();
+ });
+ }).finally([msgr] {
+ return msgr->shutdown();
+ });
+ }).finally([] {
+ return crimson::common::sharded_perf_coll().stop().then([] {
+ return crimson::common::sharded_conf().stop();
+ });
+ });
+}
+
+int main(int argc, char** argv)
+{
+ seastar::app_template app;
+ return app.run(argc, argv, [&] {
+ return test_monc().then([] {
+ std::cout << "All tests succeeded" << std::endl;
+ }).handle_exception([] (auto eptr) {
+ std::cout << "Test failure" << std::endl;
+ return seastar::make_exception_future<>(eptr);
+ });
+ });
+}
+
+
+/*
+ * Local Variables:
+ * compile-command: "make -j4 \
+ * -C ../../../build \
+ * unittest_seastar_monc"
+ * End:
+ */
diff --git a/src/test/crimson/test_perfcounters.cc b/src/test/crimson/test_perfcounters.cc
new file mode 100644
index 000000000..8aecbf911
--- /dev/null
+++ b/src/test/crimson/test_perfcounters.cc
@@ -0,0 +1,62 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <iostream>
+#include <fmt/format.h>
+
+#include "common/Formatter.h"
+#include "common/perf_counters.h"
+#include "crimson/common/perf_counters_collection.h"
+
+#include <seastar/core/app-template.hh>
+#include <seastar/core/sharded.hh>
+
+enum {
+ PERFTEST_FIRST = 1000000,
+ PERFTEST_INDEX,
+ PERFTEST_LAST,
+};
+
+static constexpr uint64_t PERF_VAL = 42;
+
+static seastar::future<> test_perfcounters(){
+ return crimson::common::sharded_perf_coll().start().then([] {
+ return crimson::common::sharded_perf_coll().invoke_on_all([] (auto& s){
+ std::string name =fmt::format("seastar-osd::shard-{}",seastar::this_shard_id());
+ PerfCountersBuilder plb(NULL, name, PERFTEST_FIRST,PERFTEST_LAST);
+ plb.add_u64_counter(PERFTEST_INDEX, "perftest_count", "count perftest");
+ auto perf_logger = plb.create_perf_counters();
+ perf_logger->inc(PERFTEST_INDEX,PERF_VAL);
+ s.get_perf_collection()->add(perf_logger);
+ });
+ }).then([]{
+ return crimson::common::sharded_perf_coll().invoke_on_all([] (auto& s){
+ auto pcc = s.get_perf_collection();
+ pcc->with_counters([](auto& by_path){
+ for (auto& perf_counter : by_path) {
+ if (PERF_VAL != perf_counter.second.perf_counters->get(PERFTEST_INDEX)) {
+ throw std::runtime_error("perf counter does not match");
+ }
+ }
+ });
+ });
+ }).finally([] {
+ return crimson::common::sharded_perf_coll().stop();
+ });
+
+}
+
+int main(int argc, char** argv)
+{
+ seastar::app_template app;
+ return app.run(argc, argv, [&] {
+ return test_perfcounters().then([] {
+ std::cout << "All tests succeeded" << std::endl;
+ }).handle_exception([] (auto eptr) {
+ std::cout << "Test failure" << std::endl;
+ return seastar::make_exception_future<>(eptr);
+ });
+ });
+
+}
+
+
diff --git a/src/test/crimson/test_socket.cc b/src/test/crimson/test_socket.cc
new file mode 100644
index 000000000..2b61196ea
--- /dev/null
+++ b/src/test/crimson/test_socket.cc
@@ -0,0 +1,558 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "common/ceph_argparse.h"
+#include <fmt/os.h>
+#include <seastar/core/app-template.hh>
+#include <seastar/core/gate.hh>
+#include <seastar/core/sharded.hh>
+#include <seastar/core/sleep.hh>
+#include <seastar/core/when_all.hh>
+#include <seastar/util/later.hh>
+
+#include "crimson/common/log.h"
+#include "crimson/net/Errors.h"
+#include "crimson/net/Fwd.h"
+#include "crimson/net/Socket.h"
+
+using crimson::common::local_conf;
+
+namespace {
+
+using namespace std::chrono_literals;
+
+using seastar::engine;
+using seastar::future;
+using crimson::net::error;
+using crimson::net::listen_ertr;
+using crimson::net::ShardedServerSocket;
+using crimson::net::Socket;
+using crimson::net::SocketRef;
+using crimson::net::stop_t;
+
+using SocketFRef = seastar::foreign_ptr<SocketRef>;
+
+seastar::logger &logger() {
+ return crimson::get_logger(ceph_subsys_test);
+}
+
+entity_addr_t get_server_addr() {
+ entity_addr_t saddr;
+ saddr.parse("127.0.0.1", nullptr);
+ saddr.set_port(9020);
+ return saddr;
+}
+
+future<SocketRef> socket_connect(const entity_addr_t& saddr) {
+ logger().debug("socket_connect() to {} ...", saddr);
+ return Socket::connect(saddr).then([](auto socket) {
+ logger().debug("socket_connect() connected");
+ return socket;
+ });
+}
+
+future<> test_refused() {
+ logger().info("test_refused()...");
+ auto saddr = get_server_addr();
+ return socket_connect(saddr).discard_result().then([saddr] {
+ logger().error("test_refused(): connection to {} is not refused", saddr);
+ ceph_abort();
+ }).handle_exception_type([](const std::system_error& e) {
+ if (e.code() != std::errc::connection_refused) {
+ logger().error("test_refused() got unexpeted error {}", e);
+ ceph_abort();
+ } else {
+ logger().info("test_refused() ok\n");
+ }
+ }).handle_exception([](auto eptr) {
+ logger().error("test_refused() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+}
+
+future<> test_bind_same(bool is_fixed_cpu) {
+ logger().info("test_bind_same()...");
+ return ShardedServerSocket::create(is_fixed_cpu
+ ).then([is_fixed_cpu](auto pss1) {
+ auto saddr = get_server_addr();
+ return pss1->listen(saddr).safe_then([saddr, is_fixed_cpu] {
+ // try to bind the same address
+ return ShardedServerSocket::create(is_fixed_cpu
+ ).then([saddr](auto pss2) {
+ return pss2->listen(saddr).safe_then([] {
+ logger().error("test_bind_same() should raise address_in_use");
+ ceph_abort();
+ }, listen_ertr::all_same_way(
+ [](const std::error_code& e) {
+ if (e == std::errc::address_in_use) {
+ // successful!
+ logger().info("test_bind_same() ok\n");
+ } else {
+ logger().error("test_bind_same() got unexpected error {}", e);
+ ceph_abort();
+ }
+ // Note: need to return a explicit ready future, or there will be a
+ // runtime error: member access within null pointer of type 'struct promise_base'
+ return seastar::now();
+ })).then([pss2] {
+ return pss2->shutdown_destroy();
+ });
+ });
+ }, listen_ertr::all_same_way(
+ [saddr](const std::error_code& e) {
+ logger().error("test_bind_same(): there is another instance running at {}",
+ saddr);
+ ceph_abort();
+ })).then([pss1] {
+ return pss1->shutdown_destroy();
+ }).handle_exception([](auto eptr) {
+ logger().error("test_bind_same() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+ });
+}
+
+future<> test_accept(bool is_fixed_cpu) {
+ logger().info("test_accept()");
+ return ShardedServerSocket::create(is_fixed_cpu
+ ).then([](auto pss) {
+ auto saddr = get_server_addr();
+ return pss->listen(saddr
+ ).safe_then([pss] {
+ return pss->accept([](auto socket, auto paddr) {
+ logger().info("test_accept(): accepted at shard {}", seastar::this_shard_id());
+ // simple accept
+ return seastar::sleep(100ms
+ ).then([socket = std::move(socket)]() mutable {
+ return socket->close(
+ ).finally([cleanup = std::move(socket)] {});
+ });
+ });
+ }, listen_ertr::all_same_way(
+ [saddr](const std::error_code& e) {
+ logger().error("test_accept(): there is another instance running at {}",
+ saddr);
+ ceph_abort();
+ })).then([saddr] {
+ return seastar::when_all(
+ socket_connect(saddr).then([](auto socket) {
+ return socket->close().finally([cleanup = std::move(socket)] {}); }),
+ socket_connect(saddr).then([](auto socket) {
+ return socket->close().finally([cleanup = std::move(socket)] {}); }),
+ socket_connect(saddr).then([](auto socket) {
+ return socket->close().finally([cleanup = std::move(socket)] {}); })
+ ).discard_result();
+ }).then([] {
+ // should be enough to be connected locally
+ return seastar::sleep(50ms);
+ }).then([] {
+ logger().info("test_accept() ok\n");
+ }).then([pss] {
+ return pss->shutdown_destroy();
+ }).handle_exception([](auto eptr) {
+ logger().error("test_accept() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+ });
+}
+
+class SocketFactory {
+ static constexpr seastar::shard_id CLIENT_CPU = 0u;
+ SocketRef client_socket;
+ seastar::promise<> server_connected;
+
+ static constexpr seastar::shard_id SERVER_CPU = 1u;
+ ShardedServerSocket *pss = nullptr;
+
+ seastar::shard_id server_socket_CPU;
+ SocketFRef server_socket;
+
+ public:
+ template <typename FuncC, typename FuncS>
+ static future<> dispatch_sockets(
+ bool is_fixed_cpu,
+ FuncC&& cb_client,
+ FuncS&& cb_server) {
+ ceph_assert_always(seastar::this_shard_id() == CLIENT_CPU);
+ auto owner = std::make_unique<SocketFactory>();
+ auto psf = owner.get();
+ auto saddr = get_server_addr();
+ return seastar::smp::submit_to(SERVER_CPU, [psf, saddr, is_fixed_cpu] {
+ return ShardedServerSocket::create(is_fixed_cpu
+ ).then([psf, saddr](auto pss) {
+ psf->pss = pss;
+ return pss->listen(saddr
+ ).safe_then([] {
+ }, listen_ertr::all_same_way([saddr](const std::error_code& e) {
+ logger().error("dispatch_sockets(): there is another instance running at {}",
+ saddr);
+ ceph_abort();
+ }));
+ });
+ }).then([psf, saddr] {
+ return seastar::when_all_succeed(
+ seastar::smp::submit_to(CLIENT_CPU, [psf, saddr] {
+ return socket_connect(saddr).then([psf](auto socket) {
+ ceph_assert_always(seastar::this_shard_id() == CLIENT_CPU);
+ psf->client_socket = std::move(socket);
+ });
+ }),
+ seastar::smp::submit_to(SERVER_CPU, [psf] {
+ return psf->pss->accept([psf](auto _socket, auto paddr) {
+ logger().info("dispatch_sockets(): accepted at shard {}",
+ seastar::this_shard_id());
+ psf->server_socket_CPU = seastar::this_shard_id();
+ if (psf->pss->is_fixed_shard_dispatching()) {
+ ceph_assert_always(SERVER_CPU == seastar::this_shard_id());
+ }
+ SocketFRef socket = seastar::make_foreign(std::move(_socket));
+ psf->server_socket = std::move(socket);
+ return seastar::smp::submit_to(CLIENT_CPU, [psf] {
+ psf->server_connected.set_value();
+ });
+ });
+ })
+ );
+ }).then_unpack([] {
+ return seastar::now();
+ }).then([psf] {
+ return psf->server_connected.get_future();
+ }).then([psf] {
+ if (psf->pss) {
+ return seastar::smp::submit_to(SERVER_CPU, [psf] {
+ return psf->pss->shutdown_destroy();
+ });
+ }
+ return seastar::now();
+ }).then([psf,
+ cb_client = std::move(cb_client),
+ cb_server = std::move(cb_server)]() mutable {
+ logger().debug("dispatch_sockets(): client/server socket are ready");
+ return seastar::when_all_succeed(
+ seastar::smp::submit_to(CLIENT_CPU,
+ [socket = psf->client_socket.get(), cb_client = std::move(cb_client)] {
+ return cb_client(socket).then([socket] {
+ logger().debug("closing client socket...");
+ return socket->close();
+ }).handle_exception([](auto eptr) {
+ logger().error("dispatch_sockets():"
+ " cb_client() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+ }),
+ seastar::smp::submit_to(psf->server_socket_CPU,
+ [socket = psf->server_socket.get(), cb_server = std::move(cb_server)] {
+ return cb_server(socket).then([socket] {
+ logger().debug("closing server socket...");
+ return socket->close();
+ }).handle_exception([](auto eptr) {
+ logger().error("dispatch_sockets():"
+ " cb_server() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+ })
+ );
+ }).then_unpack([] {
+ return seastar::now();
+ }).finally([cleanup = std::move(owner)] {});
+ }
+};
+
+class Connection {
+ static const uint64_t DATA_TAIL = 5327;
+ static const unsigned DATA_SIZE = 4096;
+ std::array<uint64_t, DATA_SIZE> data = {0};
+
+ void verify_data_read(const uint64_t read_data[]) {
+ ceph_assert(read_data[0] == read_count);
+ ceph_assert(data[DATA_SIZE - 1] = DATA_TAIL);
+ }
+
+ Socket* socket = nullptr;
+ uint64_t write_count = 0;
+ uint64_t read_count = 0;
+
+ Connection(Socket* socket) : socket{socket} {
+ assert(socket);
+ data[DATA_SIZE - 1] = DATA_TAIL;
+ }
+
+ future<> dispatch_write(unsigned round = 0, bool force_shut = false) {
+ logger().debug("dispatch_write(round={}, force_shut={})...", round, force_shut);
+ return seastar::repeat([this, round, force_shut] {
+ if (round != 0 && round <= write_count) {
+ return seastar::futurize_invoke([this, force_shut] {
+ if (force_shut) {
+ logger().debug("dispatch_write() done, force shutdown output");
+ socket->force_shutdown_out();
+ } else {
+ logger().debug("dispatch_write() done");
+ }
+ }).then([] {
+ return seastar::make_ready_future<stop_t>(stop_t::yes);
+ });
+ } else {
+ data[0] = write_count;
+ bufferlist bl;
+ bl.append(buffer::copy(
+ reinterpret_cast<const char*>(&data), sizeof(data)));
+ return socket->write(bl
+ ).then([this] {
+ return socket->flush();
+ }).then([this] {
+ write_count += 1;
+ return seastar::make_ready_future<stop_t>(stop_t::no);
+ });
+ }
+ });
+ }
+
+ future<> dispatch_write_unbounded() {
+ return dispatch_write(
+ ).then([] {
+ ceph_abort();
+ }).handle_exception_type([this](const std::system_error& e) {
+ if (e.code() != std::errc::broken_pipe &&
+ e.code() != std::errc::connection_reset) {
+ logger().error("dispatch_write_unbounded(): "
+ "unexpected error {}", e);
+ throw;
+ }
+ // successful
+ logger().debug("dispatch_write_unbounded(): "
+ "expected error {}", e);
+ shutdown();
+ });
+ }
+
+ future<> dispatch_read(unsigned round = 0, bool force_shut = false) {
+ logger().debug("dispatch_read(round={}, force_shut={})...", round, force_shut);
+ return seastar::repeat([this, round, force_shut] {
+ if (round != 0 && round <= read_count) {
+ return seastar::futurize_invoke([this, force_shut] {
+ if (force_shut) {
+ logger().debug("dispatch_read() done, force shutdown input");
+ socket->force_shutdown_in();
+ } else {
+ logger().debug("dispatch_read() done");
+ }
+ }).then([] {
+ return seastar::make_ready_future<stop_t>(stop_t::yes);
+ });
+ } else {
+ return seastar::futurize_invoke([this] {
+ // we want to test both Socket::read() and Socket::read_exactly()
+ if (read_count % 2) {
+ return socket->read(DATA_SIZE * sizeof(uint64_t)
+ ).then([this](ceph::bufferlist bl) {
+ uint64_t read_data[DATA_SIZE];
+ auto p = bl.cbegin();
+ ::ceph::decode_raw(read_data, p);
+ verify_data_read(read_data);
+ });
+ } else {
+ return socket->read_exactly(DATA_SIZE * sizeof(uint64_t)
+ ).then([this](auto bptr) {
+ uint64_t read_data[DATA_SIZE];
+ std::memcpy(read_data, bptr.c_str(), DATA_SIZE * sizeof(uint64_t));
+ verify_data_read(read_data);
+ });
+ }
+ }).then([this] {
+ ++read_count;
+ return seastar::make_ready_future<stop_t>(stop_t::no);
+ });
+ }
+ });
+ }
+
+ future<> dispatch_read_unbounded() {
+ return dispatch_read(
+ ).then([] {
+ ceph_abort();
+ }).handle_exception_type([this](const std::system_error& e) {
+ if (e.code() != error::read_eof
+ && e.code() != std::errc::connection_reset) {
+ logger().error("dispatch_read_unbounded(): "
+ "unexpected error {}", e);
+ throw;
+ }
+ // successful
+ logger().debug("dispatch_read_unbounded(): "
+ "expected error {}", e);
+ shutdown();
+ });
+ }
+
+ void shutdown() {
+ socket->shutdown();
+ }
+
+ public:
+ static future<> dispatch_rw_bounded(Socket* socket, unsigned round,
+ bool force_shut = false) {
+ logger().debug("dispatch_rw_bounded(round={}, force_shut={})...",
+ round, force_shut);
+ return seastar::do_with(Connection{socket},
+ [round, force_shut](auto& conn) {
+ ceph_assert(round != 0);
+ return seastar::when_all_succeed(
+ conn.dispatch_write(round, force_shut),
+ conn.dispatch_read(round, force_shut)
+ ).then_unpack([] {
+ return seastar::now();
+ });
+ });
+ }
+
+ static future<> dispatch_rw_unbounded(Socket* socket, bool preemptive_shut = false) {
+ logger().debug("dispatch_rw_unbounded(preemptive_shut={})...", preemptive_shut);
+ return seastar::do_with(Connection{socket}, [preemptive_shut](auto& conn) {
+ return seastar::when_all_succeed(
+ conn.dispatch_write_unbounded(),
+ conn.dispatch_read_unbounded(),
+ seastar::futurize_invoke([&conn, preemptive_shut] {
+ if (preemptive_shut) {
+ return seastar::sleep(100ms).then([&conn] {
+ logger().debug("dispatch_rw_unbounded() shutdown socket preemptively(100ms)");
+ conn.shutdown();
+ });
+ } else {
+ return seastar::now();
+ }
+ })
+ ).then_unpack([] {
+ return seastar::now();
+ });
+ });
+ }
+};
+
+future<> test_read_write(bool is_fixed_cpu) {
+ logger().info("test_read_write()...");
+ return SocketFactory::dispatch_sockets(
+ is_fixed_cpu,
+ [](auto cs) { return Connection::dispatch_rw_bounded(cs, 128); },
+ [](auto ss) { return Connection::dispatch_rw_bounded(ss, 128); }
+ ).then([] {
+ logger().info("test_read_write() ok\n");
+ }).handle_exception([](auto eptr) {
+ logger().error("test_read_write() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+}
+
+future<> test_unexpected_down(bool is_fixed_cpu) {
+ logger().info("test_unexpected_down()...");
+ return SocketFactory::dispatch_sockets(
+ is_fixed_cpu,
+ [](auto cs) {
+ return Connection::dispatch_rw_bounded(cs, 128, true
+ ).handle_exception_type([](const std::system_error& e) {
+ logger().debug("test_unexpected_down(): client get error {}", e);
+ ceph_assert(e.code() == error::read_eof);
+ });
+ },
+ [](auto ss) { return Connection::dispatch_rw_unbounded(ss); }
+ ).then([] {
+ logger().info("test_unexpected_down() ok\n");
+ }).handle_exception([](auto eptr) {
+ logger().error("test_unexpected_down() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+}
+
+future<> test_shutdown_propagated(bool is_fixed_cpu) {
+ logger().info("test_shutdown_propagated()...");
+ return SocketFactory::dispatch_sockets(
+ is_fixed_cpu,
+ [](auto cs) {
+ logger().debug("test_shutdown_propagated() shutdown client socket");
+ cs->shutdown();
+ return seastar::now();
+ },
+ [](auto ss) { return Connection::dispatch_rw_unbounded(ss); }
+ ).then([] {
+ logger().info("test_shutdown_propagated() ok\n");
+ }).handle_exception([](auto eptr) {
+ logger().error("test_shutdown_propagated() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+}
+
+future<> test_preemptive_down(bool is_fixed_cpu) {
+ logger().info("test_preemptive_down()...");
+ return SocketFactory::dispatch_sockets(
+ is_fixed_cpu,
+ [](auto cs) { return Connection::dispatch_rw_unbounded(cs, true); },
+ [](auto ss) { return Connection::dispatch_rw_unbounded(ss); }
+ ).then([] {
+ logger().info("test_preemptive_down() ok\n");
+ }).handle_exception([](auto eptr) {
+ logger().error("test_preemptive_down() got unexpeted exception {}", eptr);
+ ceph_abort();
+ });
+}
+
+future<> do_test_with_type(bool is_fixed_cpu) {
+ return test_bind_same(is_fixed_cpu
+ ).then([is_fixed_cpu] {
+ return test_accept(is_fixed_cpu);
+ }).then([is_fixed_cpu] {
+ return test_read_write(is_fixed_cpu);
+ }).then([is_fixed_cpu] {
+ return test_unexpected_down(is_fixed_cpu);
+ }).then([is_fixed_cpu] {
+ return test_shutdown_propagated(is_fixed_cpu);
+ }).then([is_fixed_cpu] {
+ return test_preemptive_down(is_fixed_cpu);
+ });
+}
+
+}
+
+seastar::future<int> do_test(seastar::app_template& app)
+{
+ std::vector<const char*> args;
+ std::string cluster;
+ std::string conf_file_list;
+ auto init_params = ceph_argparse_early_args(args,
+ CEPH_ENTITY_TYPE_CLIENT,
+ &cluster,
+ &conf_file_list);
+ return crimson::common::sharded_conf().start(
+ init_params.name, cluster
+ ).then([] {
+ return local_conf().start();
+ }).then([conf_file_list] {
+ return local_conf().parse_config_files(conf_file_list);
+ }).then([] {
+ return local_conf().set_val("ms_inject_internal_delays", "0");
+ }).then([] {
+ return test_refused();
+ }).then([] {
+ return do_test_with_type(true);
+ }).then([] {
+ return do_test_with_type(false);
+ }).then([] {
+ logger().info("All tests succeeded");
+ // Seastar has bugs to have events undispatched during shutdown,
+ // which will result in memory leak and thus fail LeakSanitizer.
+ return seastar::sleep(100ms);
+ }).then([] {
+ return crimson::common::sharded_conf().stop();
+ }).then([] {
+ return 0;
+ }).handle_exception([](auto eptr) {
+ logger().error("Test failed: got exception {}", eptr);
+ return 1;
+ });
+}
+
+int main(int argc, char** argv)
+{
+ seastar::app_template app;
+ return app.run(argc, argv, [&app] {
+ return do_test(app);
+ });
+}