summaryrefslogtreecommitdiffstats
path: root/src/crimson/common
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/crimson/common
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/crimson/common')
-rw-r--r--src/crimson/common/assert.cc81
-rw-r--r--src/crimson/common/auth_handler.h17
-rw-r--r--src/crimson/common/buffer_io.cc57
-rw-r--r--src/crimson/common/buffer_io.h21
-rw-r--r--src/crimson/common/config_proxy.cc93
-rw-r--r--src/crimson/common/config_proxy.h200
-rw-r--r--src/crimson/common/errorator.h1140
-rw-r--r--src/crimson/common/exception.h50
-rw-r--r--src/crimson/common/fixed_kv_node_layout.h700
-rw-r--r--src/crimson/common/formatter.cc64
-rw-r--r--src/crimson/common/formatter.h15
-rw-r--r--src/crimson/common/gated.h51
-rw-r--r--src/crimson/common/layout.h737
-rw-r--r--src/crimson/common/log.cc21
-rw-r--r--src/crimson/common/log.h24
-rw-r--r--src/crimson/common/perf_counters_collection.cc25
-rw-r--r--src/crimson/common/perf_counters_collection.h37
-rw-r--r--src/crimson/common/shared_lru.h178
-rw-r--r--src/crimson/common/simple_lru.h141
-rw-r--r--src/crimson/common/throttle.cc59
-rw-r--r--src/crimson/common/throttle.h39
-rw-r--r--src/crimson/common/tri_mutex.cc225
-rw-r--r--src/crimson/common/tri_mutex.h156
-rw-r--r--src/crimson/common/type_helpers.h8
24 files changed, 4139 insertions, 0 deletions
diff --git a/src/crimson/common/assert.cc b/src/crimson/common/assert.cc
new file mode 100644
index 000000000..07610c33f
--- /dev/null
+++ b/src/crimson/common/assert.cc
@@ -0,0 +1,81 @@
+#include <cstdarg>
+#include <iostream>
+
+#include <seastar/util/backtrace.hh>
+#include <seastar/core/reactor.hh>
+
+#include "include/ceph_assert.h"
+
+#include "crimson/common/log.h"
+
+namespace ceph {
+ [[gnu::cold]] void __ceph_assert_fail(const ceph::assert_data &ctx)
+ {
+ __ceph_assert_fail(ctx.assertion, ctx.file, ctx.line, ctx.function);
+ }
+
+ [[gnu::cold]] void __ceph_assert_fail(const char* assertion,
+ const char* file, int line,
+ const char* func)
+ {
+ seastar::logger& logger = crimson::get_logger(0);
+ logger.error("{}:{} : In function '{}', ceph_assert(%s)\n"
+ "{}",
+ file, line, func, assertion,
+ seastar::current_backtrace());
+ std::cout << std::flush;
+ abort();
+ }
+ [[gnu::cold]] void __ceph_assertf_fail(const char *assertion,
+ const char *file, int line,
+ const char *func, const char* msg,
+ ...)
+ {
+ char buf[8096];
+ va_list args;
+ va_start(args, msg);
+ std::vsnprintf(buf, sizeof(buf), msg, args);
+ va_end(args);
+
+ seastar::logger& logger = crimson::get_logger(0);
+ logger.error("{}:{} : In function '{}', ceph_assert(%s)\n"
+ "{}\n{}\n",
+ file, line, func, assertion,
+ buf,
+ seastar::current_backtrace());
+ std::cout << std::flush;
+ abort();
+ }
+
+ [[gnu::cold]] void __ceph_abort(const char* file, int line,
+ const char* func, const std::string& msg)
+ {
+ seastar::logger& logger = crimson::get_logger(0);
+ logger.error("{}:{} : In function '{}', abort(%s)\n"
+ "{}",
+ file, line, func, msg,
+ seastar::current_backtrace());
+ std::cout << std::flush;
+ abort();
+ }
+
+ [[gnu::cold]] void __ceph_abortf(const char* file, int line,
+ const char* func, const char* fmt,
+ ...)
+ {
+ char buf[8096];
+ va_list args;
+ va_start(args, fmt);
+ std::vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ seastar::logger& logger = crimson::get_logger(0);
+ logger.error("{}:{} : In function '{}', abort()\n"
+ "{}\n{}\n",
+ file, line, func,
+ buf,
+ seastar::current_backtrace());
+ std::cout << std::flush;
+ abort();
+ }
+}
diff --git a/src/crimson/common/auth_handler.h b/src/crimson/common/auth_handler.h
new file mode 100644
index 000000000..d4140b6a2
--- /dev/null
+++ b/src/crimson/common/auth_handler.h
@@ -0,0 +1,17 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+class EntityName;
+class AuthCapsInfo;
+
+namespace crimson::common {
+class AuthHandler {
+public:
+ // the peer just got authorized
+ virtual void handle_authentication(const EntityName& name,
+ const AuthCapsInfo& caps) = 0;
+ virtual ~AuthHandler() = default;
+};
+}
diff --git a/src/crimson/common/buffer_io.cc b/src/crimson/common/buffer_io.cc
new file mode 100644
index 000000000..86edf7a6f
--- /dev/null
+++ b/src/crimson/common/buffer_io.cc
@@ -0,0 +1,57 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "buffer_io.h"
+
+#include <seastar/core/reactor.hh>
+#include <seastar/core/fstream.hh>
+#include <seastar/core/do_with.hh>
+
+#include "include/buffer.h"
+
+namespace crimson {
+
+seastar::future<> write_file(ceph::buffer::list&& bl,
+ seastar::sstring fn,
+ seastar::file_permissions permissions)
+{
+ const auto flags = (seastar::open_flags::wo |
+ seastar::open_flags::create |
+ seastar::open_flags::truncate);
+ seastar::file_open_options foo;
+ foo.create_permissions = permissions;
+ return seastar::open_file_dma(fn, flags, foo).then(
+ [bl=std::move(bl)](seastar::file f) {
+ return seastar::make_file_output_stream(f).then(
+ [bl=std::move(bl), f=std::move(f)](seastar::output_stream<char> out) {
+ return seastar::do_with(std::move(out),
+ std::move(f),
+ std::move(bl),
+ [](seastar::output_stream<char>& out,
+ seastar::file& f,
+ ceph::buffer::list& bl) {
+ return seastar::do_for_each(bl.buffers(), [&out](auto& buf) {
+ return out.write(buf.c_str(), buf.length());
+ }).then([&out] {
+ return out.close();
+ });
+ });
+ });
+ });
+}
+
+seastar::future<seastar::temporary_buffer<char>>
+read_file(const seastar::sstring fn)
+{
+ return seastar::open_file_dma(fn, seastar::open_flags::ro).then(
+ [] (seastar::file f) {
+ return f.size().then([f = std::move(f)](size_t s) {
+ return seastar::do_with(seastar::make_file_input_stream(f),
+ [s](seastar::input_stream<char>& in) {
+ return in.read_exactly(s);
+ });
+ });
+ });
+}
+
+}
diff --git a/src/crimson/common/buffer_io.h b/src/crimson/common/buffer_io.h
new file mode 100644
index 000000000..c5ece4a6f
--- /dev/null
+++ b/src/crimson/common/buffer_io.h
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <seastar/core/future.hh>
+#include <seastar/core/file-types.hh>
+
+#include "include/buffer_fwd.h"
+
+namespace crimson {
+ seastar::future<> write_file(ceph::buffer::list&& bl,
+ seastar::sstring fn,
+ seastar::file_permissions= // 0644
+ (seastar::file_permissions::user_read |
+ seastar::file_permissions::user_write |
+ seastar::file_permissions::group_read |
+ seastar::file_permissions::others_read));
+ seastar::future<seastar::temporary_buffer<char>>
+ read_file(const seastar::sstring fn);
+}
diff --git a/src/crimson/common/config_proxy.cc b/src/crimson/common/config_proxy.cc
new file mode 100644
index 000000000..88d4679d5
--- /dev/null
+++ b/src/crimson/common/config_proxy.cc
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "config_proxy.h"
+
+#include <filesystem>
+
+#include "crimson/common/buffer_io.h"
+
+namespace crimson::common {
+
+ConfigProxy::ConfigProxy(const EntityName& name, std::string_view cluster)
+{
+ if (seastar::this_shard_id() != 0) {
+ return;
+ }
+ // set the initial value on CPU#0
+ values.reset(seastar::make_lw_shared<ConfigValues>());
+ values.get()->name = name;
+ values.get()->cluster = cluster;
+ // and the only copy of md_config_impl<> is allocated on CPU#0
+ local_config.reset(new md_config_t{*values, obs_mgr, true});
+ if (name.is_mds()) {
+ local_config->set_val_default(*values, obs_mgr,
+ "keyring", "$mds_data/keyring");
+ } else if (name.is_osd()) {
+ local_config->set_val_default(*values, obs_mgr,
+ "keyring", "$osd_data/keyring");
+ }
+}
+
+seastar::future<> ConfigProxy::start()
+{
+ // populate values and config to all other shards
+ if (!values) {
+ return seastar::make_ready_future<>();
+ }
+ return container().invoke_on_others([this](auto& proxy) {
+ return values.copy().then([config=local_config.get(),
+ &proxy](auto foreign_values) {
+ proxy.values.reset();
+ proxy.values = std::move(foreign_values);
+ proxy.remote_config = config;
+ return seastar::make_ready_future<>();
+ });
+ });
+}
+
+void ConfigProxy::show_config(ceph::Formatter* f) const {
+ get_config().show_config(*values, f);
+}
+
+seastar::future<> ConfigProxy::parse_config_files(const std::string& conf_files)
+{
+ auto conffile_paths =
+ get_config().get_conffile_paths(*values,
+ conf_files.empty() ? nullptr : conf_files.c_str(),
+ &std::cerr,
+ CODE_ENVIRONMENT_DAEMON);
+ return seastar::do_with(std::move(conffile_paths), [this] (auto& paths) {
+ return seastar::repeat([path=paths.begin(), e=paths.end(), this]() mutable {
+ if (path == e) {
+ // tried all conffile, none of them works
+ return seastar::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::yes);
+ }
+ return crimson::read_file(*path++).then([this](auto&& buf) {
+ return do_change([buf=std::move(buf), this](ConfigValues& values) {
+ if (get_config().parse_buffer(values, obs_mgr,
+ buf.get(), buf.size(),
+ &std::cerr) == 0) {
+ get_config().update_legacy_vals(values);
+ } else {
+ throw std::invalid_argument("parse error");
+ }
+ }).then([] {
+ // this one works!
+ return seastar::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::yes);
+ });
+ }).handle_exception_type([] (const std::filesystem::filesystem_error&) {
+ return seastar::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::no);
+ }).handle_exception_type([] (const std::invalid_argument&) {
+ return seastar::make_ready_future<seastar::stop_iteration>(
+ seastar::stop_iteration::no);
+ });
+ });
+ });
+}
+
+ConfigProxy::ShardedConfig ConfigProxy::sharded_conf;
+}
diff --git a/src/crimson/common/config_proxy.h b/src/crimson/common/config_proxy.h
new file mode 100644
index 000000000..f50a63431
--- /dev/null
+++ b/src/crimson/common/config_proxy.h
@@ -0,0 +1,200 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <seastar/core/reactor.hh>
+#include <seastar/core/sharded.hh>
+#include "common/config.h"
+#include "common/config_obs.h"
+#include "common/config_obs_mgr.h"
+#include "common/errno.h"
+
+namespace ceph {
+class Formatter;
+}
+
+namespace crimson::common {
+
+// a facade for managing config. each shard has its own copy of ConfigProxy.
+//
+// In seastar-osd, there could be multiple instances of @c ConfigValues in a
+// single process, as we are using a variant of read-copy-update mechinary to
+// update the settings at runtime.
+class ConfigProxy : public seastar::peering_sharded_service<ConfigProxy>
+{
+ using LocalConfigValues = seastar::lw_shared_ptr<ConfigValues>;
+ seastar::foreign_ptr<LocalConfigValues> values;
+
+ md_config_t* remote_config = nullptr;
+ std::unique_ptr<md_config_t> local_config;
+
+ using ConfigObserver = ceph::md_config_obs_impl<ConfigProxy>;
+ ObserverMgr<ConfigObserver> obs_mgr;
+
+ const md_config_t& get_config() const {
+ return remote_config ? *remote_config : * local_config;
+ }
+ md_config_t& get_config() {
+ return remote_config ? *remote_config : * local_config;
+ }
+
+ // apply changes to all shards
+ // @param func a functor which accepts @c "ConfigValues&"
+ template<typename Func>
+ seastar::future<> do_change(Func&& func) {
+ return container().invoke_on(values.get_owner_shard(),
+ [func = std::move(func)](ConfigProxy& owner) {
+ // apply the changes to a copy of the values
+ auto new_values = seastar::make_lw_shared(*owner.values);
+ new_values->changed.clear();
+ func(*new_values);
+
+ // always apply the new settings synchronously on the owner shard, to
+ // avoid racings with other do_change() calls in parallel.
+ ObserverMgr<ConfigObserver>::rev_obs_map rev_obs;
+ owner.values.reset(new_values);
+ owner.obs_mgr.for_each_change(owner.values->changed, owner,
+ [&rev_obs](ConfigObserver *obs,
+ const std::string &key) {
+ rev_obs[obs].insert(key);
+ }, nullptr);
+ for (auto& [obs, keys] : rev_obs) {
+ obs->handle_conf_change(owner, keys);
+ }
+
+ return seastar::parallel_for_each(boost::irange(1u, seastar::smp::count),
+ [&owner, new_values] (auto cpu) {
+ return owner.container().invoke_on(cpu,
+ [foreign_values = seastar::make_foreign(new_values)](ConfigProxy& proxy) mutable {
+ proxy.values.reset();
+ proxy.values = std::move(foreign_values);
+
+ ObserverMgr<ConfigObserver>::rev_obs_map rev_obs;
+ proxy.obs_mgr.for_each_change(proxy.values->changed, proxy,
+ [&rev_obs](ConfigObserver *obs, const std::string& key) {
+ rev_obs[obs].insert(key);
+ }, nullptr);
+ for (auto& obs_keys : rev_obs) {
+ obs_keys.first->handle_conf_change(proxy, obs_keys.second);
+ }
+ });
+ }).finally([new_values] {
+ new_values->changed.clear();
+ });
+ });
+ }
+public:
+ ConfigProxy(const EntityName& name, std::string_view cluster);
+ const ConfigValues* operator->() const noexcept {
+ return values.get();
+ }
+ const ConfigValues get_config_values() {
+ return *values.get();
+ }
+ ConfigValues* operator->() noexcept {
+ return values.get();
+ }
+
+ // required by sharded<>
+ seastar::future<> start();
+ seastar::future<> stop() {
+ return seastar::make_ready_future<>();
+ }
+ void add_observer(ConfigObserver* obs) {
+ obs_mgr.add_observer(obs);
+ }
+ void remove_observer(ConfigObserver* obs) {
+ obs_mgr.remove_observer(obs);
+ }
+ seastar::future<> rm_val(const std::string& key) {
+ return do_change([key, this](ConfigValues& values) {
+ auto ret = get_config().rm_val(values, key);
+ if (ret < 0) {
+ throw std::invalid_argument(cpp_strerror(ret));
+ }
+ });
+ }
+ seastar::future<> set_val(const std::string& key,
+ const std::string& val) {
+ return do_change([key, val, this](ConfigValues& values) {
+ std::stringstream err;
+ auto ret = get_config().set_val(values, obs_mgr, key, val, &err);
+ if (ret < 0) {
+ throw std::invalid_argument(err.str());
+ }
+ });
+ }
+ int get_val(const std::string &key, std::string *val) const {
+ return get_config().get_val(*values, key, val);
+ }
+ template<typename T>
+ const T get_val(const std::string& key) const {
+ return get_config().template get_val<T>(*values, key);
+ }
+
+ int get_all_sections(std::vector<std::string>& sections) const {
+ return get_config().get_all_sections(sections);
+ }
+
+ int get_val_from_conf_file(const std::vector<std::string>& sections,
+ const std::string& key, std::string& out,
+ bool expand_meta) const {
+ return get_config().get_val_from_conf_file(*values, sections, key,
+ out, expand_meta);
+ }
+
+ unsigned get_osd_pool_default_min_size(uint8_t size) const {
+ return get_config().get_osd_pool_default_min_size(*values, size);
+ }
+
+ seastar::future<>
+ set_mon_vals(const std::map<std::string,std::string,std::less<>>& kv) {
+ return do_change([kv, this](ConfigValues& values) {
+ get_config().set_mon_vals(nullptr, values, obs_mgr, kv, nullptr);
+ });
+ }
+
+ seastar::future<> inject_args(const std::string& s) {
+ return do_change([s, this](ConfigValues& values) {
+ std::stringstream err;
+ if (get_config().injectargs(values, obs_mgr, s, &err)) {
+ throw std::invalid_argument(err.str());
+ }
+ });
+ }
+ void show_config(ceph::Formatter* f) const;
+
+ seastar::future<> parse_argv(std::vector<const char*>& argv) {
+ // we could pass whatever is unparsed to seastar, but seastar::app_template
+ // is used for driving the seastar application, and
+ // crimson::common::ConfigProxy is not available until seastar engine is up
+ // and running, so we have to feed the command line args to app_template
+ // first, then pass them to ConfigProxy.
+ return do_change([&argv, this](ConfigValues& values) {
+ get_config().parse_argv(values,
+ obs_mgr,
+ argv,
+ CONF_CMDLINE);
+ });
+ }
+
+ seastar::future<> parse_config_files(const std::string& conf_files);
+
+ using ShardedConfig = seastar::sharded<ConfigProxy>;
+
+private:
+ static ShardedConfig sharded_conf;
+ friend ConfigProxy& local_conf();
+ friend ShardedConfig& sharded_conf();
+};
+
+inline ConfigProxy& local_conf() {
+ return ConfigProxy::sharded_conf.local();
+}
+
+inline ConfigProxy::ShardedConfig& sharded_conf() {
+ return ConfigProxy::sharded_conf;
+}
+
+}
diff --git a/src/crimson/common/errorator.h b/src/crimson/common/errorator.h
new file mode 100644
index 000000000..af1e6ea45
--- /dev/null
+++ b/src/crimson/common/errorator.h
@@ -0,0 +1,1140 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <exception>
+#include <system_error>
+
+#include <seastar/core/future-util.hh>
+
+#include "include/ceph_assert.h"
+
+namespace crimson {
+
+template<typename Iterator, typename AsyncAction>
+inline auto do_for_each(Iterator begin, Iterator end, AsyncAction action) {
+ using futurator = \
+ ::seastar::futurize<std::invoke_result_t<AsyncAction, decltype(*begin)>>;
+
+ if (begin == end) {
+ return futurator::type::errorator_type::template make_ready_future<>();
+ }
+ while (true) {
+ auto f = futurator::invoke(action, *begin);
+ ++begin;
+ if (begin == end) {
+ return f;
+ }
+ if (!f.available() || seastar::need_preempt()) {
+ return std::move(f)._then(
+ [ action = std::move(action),
+ begin = std::move(begin),
+ end = std::move(end)
+ ] () mutable {
+ return ::crimson::do_for_each(std::move(begin),
+ std::move(end),
+ std::move(action));
+ });
+ }
+ if (f.failed()) {
+ return f;
+ }
+ }
+}
+template<typename Container, typename AsyncAction>
+inline auto do_for_each(Container& c, AsyncAction action) {
+ return ::crimson::do_for_each(std::begin(c), std::end(c), std::move(action));
+}
+
+template<typename AsyncAction>
+inline auto do_until(AsyncAction action) {
+ using errorator_t =
+ typename ::seastar::futurize_t<std::invoke_result_t<AsyncAction>>::errorator_type;
+
+ while (true) {
+ auto f = ::seastar::futurize_invoke(action);
+ if (f.failed()) {
+ return errorator_t::template make_exception_future2<>(
+ f.get_exception()
+ );
+ } else if (f.available()) {
+ if (auto done = f.get0()) {
+ return errorator_t::template make_ready_future<>();
+ }
+ } else {
+ return std::move(f)._then(
+ [action = std::move(action)] (auto &&done) mutable {
+ if (done) {
+ return errorator_t::template make_ready_future<>();
+ }
+ return ::crimson::do_until(
+ std::move(action));
+ });
+ }
+ }
+}
+
+// define the interface between error types and errorator
+template <class ConcreteErrorT>
+class error_t {
+ static constexpr const std::type_info& get_exception_ptr_type_info() {
+ return ConcreteErrorT::exception_ptr_type_info();
+ }
+
+ std::exception_ptr to_exception_ptr() const {
+ const auto* concrete_error = static_cast<const ConcreteErrorT*>(this);
+ return concrete_error->to_exception_ptr();
+ }
+
+ decltype(auto) static from_exception_ptr(std::exception_ptr ep) {
+ return ConcreteErrorT::from_exception_ptr(std::move(ep));
+ }
+
+ template <class... AllowedErrorsT>
+ friend struct errorator;
+
+ template <class ErrorVisitorT, class FuturatorT>
+ friend class maybe_handle_error_t;
+
+public:
+ template <class Func>
+ static decltype(auto) handle(Func&& func) {
+ return ConcreteErrorT::handle(std::forward<Func>(func));
+ }
+};
+
+// unthrowable_wrapper ensures compilation failure when somebody
+// would like to `throw make_error<...>)()` instead of returning.
+// returning allows for the compile-time verification of future's
+// AllowedErrorsV and also avoid the burden of throwing.
+template <class ErrorT, ErrorT ErrorV>
+struct unthrowable_wrapper : error_t<unthrowable_wrapper<ErrorT, ErrorV>> {
+ unthrowable_wrapper(const unthrowable_wrapper&) = delete;
+ [[nodiscard]] static const auto& make() {
+ static constexpr unthrowable_wrapper instance{};
+ return instance;
+ }
+
+ template<class Func>
+ static auto handle(Func&& func) {
+ return [
+ func = std::forward<Func>(func)
+ ] (const unthrowable_wrapper&) mutable -> decltype(auto) {
+ if constexpr (std::is_invocable_v<Func, ErrorT>) {
+ return std::invoke(std::forward<Func>(func), ErrorV);
+ } else {
+ return std::invoke(std::forward<Func>(func));
+ }
+ };
+ }
+
+ struct pass_further {
+ decltype(auto) operator()(const unthrowable_wrapper& e) {
+ return e;
+ }
+ };
+
+ struct discard {
+ decltype(auto) operator()(const unthrowable_wrapper&) {
+ }
+ };
+
+
+private:
+ // can be used only to initialize the `instance` member
+ explicit unthrowable_wrapper() = default;
+
+ // implement the errorable interface
+ struct throwable_carrier{};
+ static std::exception_ptr carrier_instance;
+
+ static constexpr const std::type_info& exception_ptr_type_info() {
+ return typeid(throwable_carrier);
+ }
+ auto to_exception_ptr() const {
+ // error codes don't need to instantiate `std::exception_ptr` each
+ // time as the code is actually a part of the type itself.
+ // `std::make_exception_ptr()` on modern enough GCCs is quite cheap
+ // (see the Gleb Natapov's patch eradicating throw/catch there),
+ // but using one instance per type boils down the overhead to just
+ // ref-counting.
+ return carrier_instance;
+ }
+ static const auto& from_exception_ptr(std::exception_ptr) {
+ return make();
+ }
+
+ friend class error_t<unthrowable_wrapper<ErrorT, ErrorV>>;
+};
+
+template <class ErrorT, ErrorT ErrorV>
+std::exception_ptr unthrowable_wrapper<ErrorT, ErrorV>::carrier_instance = \
+ std::make_exception_ptr<
+ unthrowable_wrapper<ErrorT, ErrorV>::throwable_carrier>({});
+
+
+template <class ErrorT>
+struct stateful_error_t : error_t<stateful_error_t<ErrorT>> {
+ template <class... Args>
+ explicit stateful_error_t(Args&&... args)
+ : ep(std::make_exception_ptr<ErrorT>(std::forward<Args>(args)...)) {
+ }
+
+ template<class Func>
+ static auto handle(Func&& func) {
+ static_assert(std::is_invocable_v<Func, ErrorT>);
+ return [
+ func = std::forward<Func>(func)
+ ] (stateful_error_t<ErrorT>&& e) mutable -> decltype(auto) {
+ try {
+ std::rethrow_exception(e.ep);
+ } catch (const ErrorT& obj) {
+ return std::invoke(std::forward<Func>(func), obj);
+ }
+ ceph_abort_msg("exception type mismatch – impossible!");
+ };
+ }
+
+private:
+ std::exception_ptr ep;
+
+ explicit stateful_error_t(std::exception_ptr ep) : ep(std::move(ep)) {}
+
+ static constexpr const std::type_info& exception_ptr_type_info() {
+ return typeid(ErrorT);
+ }
+ auto to_exception_ptr() const {
+ return ep;
+ }
+ static stateful_error_t<ErrorT> from_exception_ptr(std::exception_ptr ep) {
+ return stateful_error_t<ErrorT>(std::move(ep));
+ }
+
+ friend class error_t<stateful_error_t<ErrorT>>;
+};
+
+namespace _impl {
+ template <class T> struct always_false : std::false_type {};
+};
+
+template <class ErrorVisitorT, class FuturatorT>
+class maybe_handle_error_t {
+ const std::type_info& type_info;
+ typename FuturatorT::type result;
+ ErrorVisitorT errfunc;
+
+public:
+ maybe_handle_error_t(ErrorVisitorT&& errfunc, std::exception_ptr ep)
+ : type_info(*ep.__cxa_exception_type()),
+ result(FuturatorT::make_exception_future(std::move(ep))),
+ errfunc(std::forward<ErrorVisitorT>(errfunc)) {
+ }
+
+ template <class ErrorT>
+ void handle() {
+ static_assert(std::is_invocable<ErrorVisitorT, ErrorT>::value,
+ "provided Error Visitor is not exhaustive");
+ // In C++ throwing an exception isn't the sole way to signal
+ // error with it. This approach nicely fits cold, infrequent cases
+ // but when applied to a hot one, it will likely hurt performance.
+ //
+ // Alternative approach is to create `std::exception_ptr` on our
+ // own and place it in the future via `make_exception_future()`.
+ // When it comes to handling, the pointer can be interrogated for
+ // pointee's type with `__cxa_exception_type()` instead of costly
+ // re-throwing (via `std::rethrow_exception()`) and matching with
+ // `catch`. The limitation here is lack of support for hierarchies
+ // of exceptions. The code below checks for exact match only while
+ // `catch` would allow to match against a base class as well.
+ // However, this shouldn't be a big issue for `errorator` as Error
+ // Visitors are already checked for exhaustiveness at compile-time.
+ //
+ // NOTE: `__cxa_exception_type()` is an extension of the language.
+ // It should be available both in GCC and Clang but a fallback
+ // (based on `std::rethrow_exception()` and `catch`) can be made
+ // to handle other platforms if necessary.
+ if (type_info == ErrorT::error_t::get_exception_ptr_type_info()) {
+ // set `state::invalid` in internals of `seastar::future` to not
+ // call `report_failed_future()` during `operator=()`.
+ [[maybe_unused]] auto&& ep = std::move(result).get_exception();
+
+ using return_t = std::invoke_result_t<ErrorVisitorT, ErrorT>;
+ if constexpr (std::is_assignable_v<decltype(result), return_t>) {
+ result = std::invoke(std::forward<ErrorVisitorT>(errfunc),
+ ErrorT::error_t::from_exception_ptr(std::move(ep)));
+ } else if constexpr (std::is_same_v<return_t, void>) {
+ // void denotes explicit discarding
+ // execute for the sake a side effects. Typically this boils down
+ // to throwing an exception by the handler.
+ std::invoke(std::forward<ErrorVisitorT>(errfunc),
+ ErrorT::error_t::from_exception_ptr(std::move(ep)));
+ } else {
+ static_assert(_impl::always_false<return_t>::value,
+ "return of Error Visitor is not assignable to future");
+ // do nothing with `ep`.
+ }
+ }
+ }
+
+ auto get_result() && {
+ return std::move(result);
+ }
+};
+
+template <class FuncHead, class... FuncTail>
+static constexpr auto composer(FuncHead&& head, FuncTail&&... tail) {
+ return [
+ head = std::forward<FuncHead>(head),
+ // perfect forwarding in lambda's closure isn't available in C++17
+ // using tuple as workaround; see: https://stackoverflow.com/a/49902823
+ tail = std::make_tuple(std::forward<FuncTail>(tail)...)
+ ] (auto&&... args) mutable -> decltype(auto) {
+ if constexpr (std::is_invocable_v<FuncHead, decltype(args)...>) {
+ return std::invoke(std::forward<FuncHead>(head),
+ std::forward<decltype(args)>(args)...);
+ } else if constexpr (sizeof...(FuncTail) > 0) {
+ using next_composer_t = decltype(composer<FuncTail...>);
+ auto&& next = std::apply<next_composer_t>(composer<FuncTail...>,
+ std::move(tail));
+ return std::invoke(std::move(next),
+ std::forward<decltype(args)>(args)...);
+ } else {
+ static_assert(
+ std::is_invocable_v<FuncHead, decltype(args)...> ||
+ (sizeof...(FuncTail) > 0),
+ "composition is not exhaustive");
+ }
+ };
+}
+
+template <class ValueT>
+struct errorated_future_marker{};
+
+template <class... AllowedErrors>
+struct errorator {
+ template <class T>
+ static inline constexpr bool is_error_v = std::is_base_of_v<error_t<T>, T>;
+
+ static_assert((... && is_error_v<AllowedErrors>),
+ "errorator expects presence of ::is_error in all error types");
+
+ template <class ErrorT>
+ struct contains_once {
+ static constexpr bool value =
+ (0 + ... + std::is_same_v<ErrorT, AllowedErrors>) == 1;
+ };
+ template <class... Errors>
+ struct contains_once<errorator<Errors...>> {
+ static constexpr bool value = (... && contains_once<Errors>::value);
+ };
+ template <class T>
+ static constexpr bool contains_once_v = contains_once<T>::value;
+
+ static_assert((... && contains_once_v<AllowedErrors>),
+ "no error type in errorator can be duplicated");
+
+ struct ready_future_marker{};
+ struct exception_future_marker{};
+
+private:
+ // see the comment for `using future = _future` below.
+ template <class>
+ class _future {};
+ template <class ValueT>
+ class _future<::crimson::errorated_future_marker<ValueT>>
+ : private seastar::future<ValueT> {
+ using base_t = seastar::future<ValueT>;
+ // we need the friendship for the sake of `get_exception() &&` when
+ // `safe_then()` is going to return an errorated future as a result of
+ // chaining. In contrast to `seastar::future`, errorator<T...>::future`
+ // has this member private.
+ template <class ErrorVisitor, class Futurator>
+ friend class maybe_handle_error_t;
+
+ // any `seastar::futurize` specialization must be able to access the base.
+ // see : `satisfy_with_result_of()` far below.
+ template <typename>
+ friend class seastar::futurize;
+
+ template <typename T1, typename T2, typename... More>
+ friend auto seastar::internal::do_with_impl(T1&& rv1, T2&& rv2, More&&... more);
+
+ template <class, class = std::void_t<>>
+ struct get_errorator {
+ // generic template for non-errorated things (plain types and
+ // vanilla seastar::future as well).
+ using type = errorator<>;
+ };
+ template <class FutureT>
+ struct get_errorator<FutureT,
+ std::void_t<typename FutureT::errorator_type>> {
+ using type = typename FutureT::errorator_type;
+ };
+ template <class T>
+ using get_errorator_t = typename get_errorator<T>::type;
+
+ template <class ValueFuncErroratorT, class... ErrorVisitorRetsT>
+ struct make_errorator {
+ // NOP. The generic template.
+ };
+ template <class... ValueFuncAllowedErrors,
+ class ErrorVisitorRetsHeadT,
+ class... ErrorVisitorRetsTailT>
+ struct make_errorator<errorator<ValueFuncAllowedErrors...>,
+ ErrorVisitorRetsHeadT,
+ ErrorVisitorRetsTailT...> {
+ private:
+ using step_errorator = errorator<ValueFuncAllowedErrors...>;
+ // add ErrorVisitorRetsHeadT only if 1) it's an error type and
+ // 2) isn't already included in the errorator's error set.
+ // It's enough to negate contains_once_v as any errorator<...>
+ // type is already guaranteed to be free of duplications.
+ using next_errorator = std::conditional_t<
+ is_error_v<ErrorVisitorRetsHeadT> &&
+ !step_errorator::template contains_once_v<ErrorVisitorRetsHeadT>,
+ typename step_errorator::template extend<ErrorVisitorRetsHeadT>,
+ step_errorator>;
+
+ public:
+ using type = typename make_errorator<next_errorator,
+ ErrorVisitorRetsTailT...>::type;
+ };
+ // finish the recursion
+ template <class... ValueFuncAllowedErrors>
+ struct make_errorator<errorator<ValueFuncAllowedErrors...>> {
+ using type = ::crimson::errorator<ValueFuncAllowedErrors...>;
+ };
+ template <class... Args>
+ using make_errorator_t = typename make_errorator<Args...>::type;
+
+ using base_t::base_t;
+
+ template <class Futurator, class Future, class ErrorVisitor>
+ [[gnu::noinline]]
+ static auto _safe_then_handle_errors(Future&& future,
+ ErrorVisitor&& errfunc) {
+ maybe_handle_error_t<ErrorVisitor, Futurator> maybe_handle_error(
+ std::forward<ErrorVisitor>(errfunc),
+ std::move(future).get_exception()
+ );
+ (maybe_handle_error.template handle<AllowedErrors>() , ...);
+ return std::move(maybe_handle_error).get_result();
+ }
+
+ public:
+ using errorator_type = ::crimson::errorator<AllowedErrors...>;
+ using promise_type = seastar::promise<ValueT>;
+
+ using base_t::available;
+ using base_t::failed;
+ // need this because of the legacy in PG::do_osd_ops().
+ using base_t::handle_exception_type;
+
+ [[gnu::always_inline]]
+ _future(base_t&& base)
+ : base_t(std::move(base)) {
+ }
+
+ template <class... A>
+ [[gnu::always_inline]]
+ _future(ready_future_marker, A&&... a)
+ : base_t(::seastar::make_ready_future<ValueT>(std::forward<A>(a)...)) {
+ }
+ [[gnu::always_inline]]
+ _future(exception_future_marker, ::seastar::future_state_base&& state) noexcept
+ : base_t(::seastar::futurize<base_t>::make_exception_future(std::move(state))) {
+ }
+ [[gnu::always_inline]]
+ _future(exception_future_marker, std::exception_ptr&& ep) noexcept
+ : base_t(::seastar::futurize<base_t>::make_exception_future(std::move(ep))) {
+ }
+
+ template <template <class...> class ErroratedFuture,
+ class = std::void_t<
+ typename ErroratedFuture<
+ ::crimson::errorated_future_marker<ValueT>>::errorator_type>>
+ operator ErroratedFuture<errorated_future_marker<ValueT>> () && {
+ using dest_errorator_t = \
+ typename ErroratedFuture<
+ ::crimson::errorated_future_marker<ValueT>>::errorator_type;
+ static_assert(dest_errorator_t::template contains_once_v<errorator_type>,
+ "conversion is possible to more-or-eq errorated future!");
+ return static_cast<base_t&&>(*this);
+ }
+
+ // initialize future as failed without throwing. `make_exception_future()`
+ // internally uses `std::make_exception_ptr()`. cppreference.com shouldn't
+ // be misinterpreted when it says:
+ //
+ // "This is done as if executing the following code:
+ // try {
+ // throw e;
+ // } catch(...) {
+ // return std::current_exception();
+ // }",
+ //
+ // the "as if" is absolutely crucial because modern GCCs employ optimized
+ // path for it. See:
+ // * https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=cce8e59224e18858749a2324bce583bcfd160d6c,
+ // * https://gcc.gnu.org/ml/gcc-patches/2016-08/msg00373.html.
+ //
+ // This behavior, combined with `__cxa_exception_type()` for inspecting
+ // exception's type, allows for throw/catch-free handling of stateless
+ // exceptions (which is fine for error codes). Stateful jumbos would be
+ // actually a bit harder as `_M_get()` is private, and thus rethrowing is
+ // necessary to get to the state inside. However, it's not unthinkable to
+ // see another extension bringing operator*() to the exception pointer...
+ //
+ // TODO: we don't really need to `make_exception_ptr` each time. It still
+ // allocates memory underneath while can be replaced with single instance
+ // per type created on start-up.
+ template <class ErrorT,
+ class DecayedT = std::decay_t<ErrorT>,
+ bool IsError = is_error_v<DecayedT>,
+ class = std::enable_if_t<IsError>>
+ _future(ErrorT&& e)
+ : base_t(
+ seastar::make_exception_future<ValueT>(
+ errorator_type::make_exception_ptr(e))) {
+ static_assert(errorator_type::contains_once_v<DecayedT>,
+ "ErrorT is not enlisted in errorator");
+ }
+
+ template <class ValueFuncT, class ErrorVisitorT>
+ auto safe_then(ValueFuncT&& valfunc, ErrorVisitorT&& errfunc) {
+ static_assert((... && std::is_invocable_v<ErrorVisitorT,
+ AllowedErrors>),
+ "provided Error Visitor is not exhaustive");
+
+ using value_func_result_t =
+ typename std::conditional_t<std::is_void_v<ValueT>,
+ std::invoke_result<ValueFuncT>,
+ std::invoke_result<ValueFuncT, ValueT>>::type;
+ // recognize whether there can be any error coming from the Value
+ // Function.
+ using value_func_errorator_t = get_errorator_t<value_func_result_t>;
+ // mutate the Value Function's errorator to harvest errors coming
+ // from the Error Visitor. Yes, it's perfectly fine to fail error
+ // handling at one step and delegate even broader set of issues
+ // to next continuation.
+ using return_errorator_t = make_errorator_t<
+ value_func_errorator_t,
+ std::decay_t<std::invoke_result_t<ErrorVisitorT, AllowedErrors>>...>;
+ // OK, now we know about all errors next continuation must take
+ // care about. If Visitor handled everything and the Value Func
+ // doesn't return any, we'll finish with errorator<>::future
+ // which is just vanilla seastar::future – that's it, next cont
+ // finally could use `.then()`!
+ using futurator_t = \
+ typename return_errorator_t::template futurize<value_func_result_t>;
+ // `seastar::futurize`, used internally by `then_wrapped()`, would
+ // wrap any non-`seastar::future` type coming from Value Func into
+ // `seastar::future`. As we really don't want to end with things
+ // like `seastar::future<errorator::future<...>>`, we need either:
+ // * convert the errorated future into plain in the lambda below
+ // and back here or
+ // * specialize the `seastar::futurize<T>` to get proper kind of
+ // future directly from `::then_wrapped()`.
+ // As C++17 doesn't guarantee copy elision when non-same types are
+ // involved while examination of assemblies from GCC 8.1 confirmed
+ // extra copying, switch to the second approach has been made.
+ return this->then_wrapped(
+ [ valfunc = std::forward<ValueFuncT>(valfunc),
+ errfunc = std::forward<ErrorVisitorT>(errfunc)
+ ] (auto&& future) mutable noexcept {
+ if (__builtin_expect(future.failed(), false)) {
+ return _safe_then_handle_errors<futurator_t>(
+ std::move(future), std::forward<ErrorVisitorT>(errfunc));
+ } else {
+ // NOTE: using `seastar::future::get()` here is a bit bloaty
+ // as the method rechecks availability of future's value and,
+ // if it's unavailable, does the `::do_wait()` path (yes, it
+ // targets `seastar::thread`). Actually this is dead code as
+ // `then_wrapped()` executes the lambda only when the future
+ // is available (which means: failed or ready). However, GCC
+ // hasn't optimized it out:
+ //
+ // if (__builtin_expect(future.failed(), false)) {
+ // ea25: 48 83 bd c8 fe ff ff cmpq $0x2,-0x138(%rbp)
+ // ea2c: 02
+ // ea2d: 0f 87 f0 05 00 00 ja f023 <ceph::osd::
+ // ...
+ // /// If get() is called in a \ref seastar::thread context,
+ // /// then it need not be available; instead, the thread will
+ // /// be paused until the future becomes available.
+ // [[gnu::always_inline]]
+ // std::tuple<T...> get() {
+ // if (!_state.available()) {
+ // ea3a: 0f 85 1b 05 00 00 jne ef5b <ceph::osd::
+ // }
+ // ...
+ //
+ // I don't perceive this as huge issue. Though, it cannot be
+ // claimed errorator has 0 overhead on hot path. The perfect
+ // solution here would be mark the `::get_available_state()`
+ // as `protected` and use dedicated `get_value()` exactly as
+ // `::then()` already does.
+ return futurator_t::invoke(std::forward<ValueFuncT>(valfunc),
+ std::move(future).get());
+ }
+ });
+ }
+
+ /**
+ * unsafe_thread_get
+ *
+ * Only valid within a seastar_thread. Ignores errorator protections
+ * and throws any contained exceptions.
+ *
+ * Should really only be used within test code
+ * (see test/crimson/gtest_seastar.h).
+ */
+ auto &&unsafe_get() {
+ return seastar::future<ValueT>::get();
+ }
+ auto unsafe_get0() {
+ return seastar::future<ValueT>::get0();
+ }
+
+ template <class FuncT>
+ _future finally(FuncT &&func) {
+ return this->then_wrapped(
+ [func = std::forward<FuncT>(func)](auto &&result) mutable noexcept {
+ if constexpr (seastar::is_future<std::invoke_result_t<FuncT>>::value) {
+ return ::seastar::futurize_invoke(std::forward<FuncT>(func)).then_wrapped(
+ [result = std::move(result)](auto&& f_res) mutable {
+ // TODO: f_res.failed()
+ (void)f_res.discard_result();
+ return std::move(result);
+ });
+ } else {
+ try {
+ func();
+ } catch (...) {
+ // TODO: rethrow
+ }
+ return std::move(result);
+ }
+ });
+ }
+
+ // taking ErrorFuncOne and ErrorFuncTwo separately from ErrorFuncTail
+ // to avoid SFINAE
+ template <class ValueFunc,
+ class ErrorFuncHead,
+ class... ErrorFuncTail>
+ auto safe_then(ValueFunc&& value_func,
+ ErrorFuncHead&& error_func_head,
+ ErrorFuncTail&&... error_func_tail) {
+ static_assert(sizeof...(ErrorFuncTail) > 0);
+ return safe_then(
+ std::forward<ValueFunc>(value_func),
+ composer(std::forward<ErrorFuncHead>(error_func_head),
+ std::forward<ErrorFuncTail>(error_func_tail)...));
+ }
+
+ template <class ValueFunc>
+ auto safe_then(ValueFunc&& value_func) {
+ return safe_then(std::forward<ValueFunc>(value_func),
+ errorator_type::pass_further{});
+ }
+
+ template <class Func>
+ void then(Func&&) = delete;
+
+ template <class ErrorVisitorT>
+ auto handle_error(ErrorVisitorT&& errfunc) {
+ static_assert((... && std::is_invocable_v<ErrorVisitorT,
+ AllowedErrors>),
+ "provided Error Visitor is not exhaustive");
+ using return_errorator_t = make_errorator_t<
+ errorator<>,
+ std::decay_t<std::invoke_result_t<ErrorVisitorT, AllowedErrors>>...>;
+ using futurator_t = \
+ typename return_errorator_t::template futurize<::seastar::future<ValueT>>;
+ return this->then_wrapped(
+ [ errfunc = std::forward<ErrorVisitorT>(errfunc)
+ ] (auto&& future) mutable noexcept {
+ if (__builtin_expect(future.failed(), false)) {
+ return _safe_then_handle_errors<futurator_t>(
+ std::move(future), std::forward<ErrorVisitorT>(errfunc));
+ } else {
+ return typename futurator_t::type{ std::move(future) };
+ }
+ });
+ }
+ template <class ErrorFuncHead,
+ class... ErrorFuncTail>
+ auto handle_error(ErrorFuncHead&& error_func_head,
+ ErrorFuncTail&&... error_func_tail) {
+ static_assert(sizeof...(ErrorFuncTail) > 0);
+ return this->handle_error(
+ composer(std::forward<ErrorFuncHead>(error_func_head),
+ std::forward<ErrorFuncTail>(error_func_tail)...));
+ }
+
+ private:
+ // for ::crimson::do_for_each
+ template <class Func>
+ auto _then(Func&& func) {
+ return base_t::then(std::forward<Func>(func));
+ }
+ template<typename Iterator, typename AsyncAction>
+ friend inline auto ::crimson::do_for_each(Iterator begin,
+ Iterator end,
+ AsyncAction action);
+
+ template<typename AsyncAction>
+ friend inline auto ::crimson::do_until(AsyncAction action);
+
+ template <typename Result>
+ friend class ::seastar::future;
+
+ // let seastar::do_with_impl to up-cast us to seastar::future.
+ template<typename T, typename F>
+ friend inline auto ::seastar::internal::do_with_impl(T&& rvalue, F&& f);
+ template<typename T1, typename T2, typename T3_or_F, typename... More>
+ friend inline auto ::seastar::internal::do_with_impl(T1&& rv1, T2&& rv2, T3_or_F&& rv3, More&&... more);
+ };
+
+ class Enabler {};
+
+ template <typename T>
+ using EnableIf = typename std::enable_if<contains_once_v<std::decay_t<T>>, Enabler>::type;
+
+ template <typename ErrorFunc>
+ struct all_same_way_t {
+ ErrorFunc func;
+ all_same_way_t(ErrorFunc &&error_func)
+ : func(std::forward<ErrorFunc>(error_func)) {}
+
+ template <typename ErrorT, EnableIf<ErrorT>...>
+ decltype(auto) operator()(ErrorT&& e) {
+ using decayed_t = std::decay_t<decltype(e)>;
+ auto&& handler =
+ decayed_t::error_t::handle(std::forward<ErrorFunc>(func));
+ static_assert(std::is_invocable_v<decltype(handler), ErrorT>);
+ return std::invoke(std::move(handler), std::forward<ErrorT>(e));
+ }
+ };
+
+public:
+ // HACK: `errorated_future_marker` and `_future` is just a hack to
+ // specialize `seastar::futurize` for category of class templates:
+ // `future<...>` from distinct errorators. Such tricks are usually
+ // performed basing on SFINAE and `std::void_t` to check existence
+ // of a trait/member (`future<...>::errorator_type` in our case).
+ // Unfortunately, this technique can't be applied as the `futurize`
+ // lacks the optional parameter. The problem looks awfully similar
+ // to following SO item: https://stackoverflow.com/a/38860413.
+ template <class ValueT=void>
+ using future = _future<::crimson::errorated_future_marker<ValueT>>;
+
+ // the visitor that forwards handling of all errors to next continuation
+ struct pass_further {
+ template <class ErrorT, EnableIf<ErrorT>...>
+ decltype(auto) operator()(ErrorT&& e) {
+ static_assert(contains_once_v<std::decay_t<ErrorT>>,
+ "passing further disallowed ErrorT");
+ return std::forward<ErrorT>(e);
+ }
+ };
+
+ struct discard_all {
+ template <class ErrorT, EnableIf<ErrorT>...>
+ void operator()(ErrorT&&) {
+ static_assert(contains_once_v<std::decay_t<ErrorT>>,
+ "discarding disallowed ErrorT");
+ }
+ };
+
+ // assert_all{ "TODO" };
+ class assert_all {
+ const char* const msg = nullptr;
+ public:
+ template <std::size_t N>
+ assert_all(const char (&msg)[N])
+ : msg(msg) {
+ }
+ assert_all() = default;
+
+ template <class ErrorT, EnableIf<ErrorT>...>
+ void operator()(ErrorT&&) {
+ static_assert(contains_once_v<std::decay_t<ErrorT>>,
+ "discarding disallowed ErrorT");
+ if (msg) {
+ ceph_abort_msg(msg);
+ } else {
+ ceph_abort();
+ }
+ }
+ };
+
+ template <class ErrorFunc>
+ static decltype(auto) all_same_way(ErrorFunc&& error_func) {
+ return all_same_way_t<ErrorFunc>{std::forward<ErrorFunc>(error_func)};
+ };
+
+ // get a new errorator by extending current one with new error
+ template <class... NewAllowedErrorsT>
+ using extend = errorator<AllowedErrors..., NewAllowedErrorsT...>;
+
+ // get a new errorator by summing and deduplicating error set of
+ // the errorator `unify<>` is applied on with another errorator
+ // provided as template parameter.
+ template <class OtherErroratorT>
+ struct unify {
+ // 1st: generic NOP template
+ };
+ template <class OtherAllowedErrorsHead,
+ class... OtherAllowedErrorsTail>
+ struct unify<errorator<OtherAllowedErrorsHead,
+ OtherAllowedErrorsTail...>> {
+ private:
+ // 2nd: specialization for errorators with non-empty error set.
+ //
+ // split error set of other errorator, passed as template param,
+ // into head and tail. Mix error set of this errorator with head
+ // of the other one only if it isn't already present in the set.
+ using step_errorator = std::conditional_t<
+ contains_once_v<OtherAllowedErrorsHead> == false,
+ errorator<AllowedErrors..., OtherAllowedErrorsHead>,
+ errorator<AllowedErrors...>>;
+ using rest_errorator = errorator<OtherAllowedErrorsTail...>;
+
+ public:
+ using type = typename step_errorator::template unify<rest_errorator>::type;
+ };
+ template <class... EmptyPack>
+ struct unify<errorator<EmptyPack...>> {
+ // 3rd: recursion finisher
+ static_assert(sizeof...(EmptyPack) == 0);
+ using type = errorator<AllowedErrors...>;
+ };
+
+ template <typename T=void, typename... A>
+ static future<T> make_ready_future(A&&... value) {
+ return future<T>(ready_future_marker(), std::forward<A>(value)...);
+ }
+
+ template <typename T=void>
+ static
+ future<T> make_exception_future2(std::exception_ptr&& ex) noexcept {
+ return future<T>(exception_future_marker(), std::move(ex));
+ }
+ template <typename T=void>
+ static
+ future<T> make_exception_future2(seastar::future_state_base&& state) noexcept {
+ return future<T>(exception_future_marker(), std::move(state));
+ }
+ template <typename T=void, typename Exception>
+ static
+ future<T> make_exception_future2(Exception&& ex) noexcept {
+ return make_exception_future2<T>(std::make_exception_ptr(std::forward<Exception>(ex)));
+ }
+
+ static auto now() {
+ return make_ready_future<>();
+ }
+
+private:
+ template <class T, class = std::void_t<T>>
+ class futurize {
+ using vanilla_futurize = seastar::futurize<T>;
+
+ // explicit specializations for nested type is not allowed unless both
+ // the member template and the enclosing template are specialized. see
+ // section temp.expl.spec, N4659
+ template <class Stored, int Dummy = 0>
+ struct stored_to_future {
+ using type = future<Stored>;
+ };
+ template <int Dummy>
+ struct stored_to_future <seastar::internal::monostate, Dummy> {
+ using type = future<>;
+ };
+
+ public:
+ using type =
+ typename stored_to_future<typename vanilla_futurize::value_type>::type;
+
+ template <class Func, class... Args>
+ static type invoke(Func&& func, Args&&... args) {
+ try {
+ return vanilla_futurize::invoke(std::forward<Func>(func),
+ std::forward<Args>(args)...);
+ } catch (...) {
+ return make_exception_future(std::current_exception());
+ }
+ }
+
+ template <class Func>
+ static type invoke(Func&& func, seastar::internal::monostate) {
+ try {
+ return vanilla_futurize::invoke(std::forward<Func>(func));
+ } catch (...) {
+ return make_exception_future(std::current_exception());
+ }
+ }
+
+ template <typename Arg>
+ static type make_exception_future(Arg&& arg) {
+ return vanilla_futurize::make_exception_future(std::forward<Arg>(arg));
+ }
+ };
+ template <template <class...> class ErroratedFutureT,
+ class ValueT>
+ class futurize<ErroratedFutureT<::crimson::errorated_future_marker<ValueT>>,
+ std::void_t<
+ typename ErroratedFutureT<
+ ::crimson::errorated_future_marker<ValueT>>::errorator_type>> {
+ public:
+ using type = ::crimson::errorator<AllowedErrors...>::future<ValueT>;
+
+ template <class Func, class... Args>
+ static type apply(Func&& func, std::tuple<Args...>&& args) {
+ try {
+ return ::seastar::futurize_apply(std::forward<Func>(func),
+ std::forward<std::tuple<Args...>>(args));
+ } catch (...) {
+ return make_exception_future(std::current_exception());
+ }
+ }
+
+ template <class Func, class... Args>
+ static type invoke(Func&& func, Args&&... args) {
+ try {
+ return ::seastar::futurize_invoke(std::forward<Func>(func),
+ std::forward<Args>(args)...);
+ } catch (...) {
+ return make_exception_future(std::current_exception());
+ }
+ }
+
+ template <class Func>
+ static type invoke(Func&& func, seastar::internal::monostate) {
+ try {
+ return ::seastar::futurize_invoke(std::forward<Func>(func));
+ } catch (...) {
+ return make_exception_future(std::current_exception());
+ }
+ }
+
+ template <typename Arg>
+ static type make_exception_future(Arg&& arg) {
+ return ::crimson::errorator<AllowedErrors...>::make_exception_future2<ValueT>(std::forward<Arg>(arg));
+ }
+ };
+
+ template <class ErrorT>
+ static std::exception_ptr make_exception_ptr(ErrorT&& e) {
+ // calling via interface class due to encapsulation and friend relations.
+ return e.error_t<std::decay_t<ErrorT>>::to_exception_ptr();
+ }
+
+ // needed because of:
+ // * return_errorator_t::template futurize<...> in `safe_then()`,
+ // * conversion to `std::exception_ptr` in `future::future(ErrorT&&)`.
+ // the friendship with all errorators is an idea from Kefu to fix build
+ // issues on GCC 9. This version likely fixes some access violation bug
+ // we were exploiting before.
+ template <class...>
+ friend class errorator;
+}; // class errorator, generic template
+
+// no errors? errorator<>::future is plain seastar::future then!
+template <>
+class errorator<> {
+public:
+ template <class ValueT>
+ using future = ::seastar::future<ValueT>;
+
+ template <class T>
+ using futurize = ::seastar::futurize<T>;
+
+ // get a new errorator by extending current one with new error
+ template <class... NewAllowedErrors>
+ using extend = errorator<NewAllowedErrors...>;
+
+ // errorator with empty error set never contains any error
+ template <class T>
+ static constexpr bool contains_once_v = false;
+}; // class errorator, <> specialization
+
+
+template <class ErroratorOne,
+ class ErroratorTwo,
+ class... FurtherErrators>
+struct compound_errorator {
+private:
+ // generic template. Empty `FurtherErrators` are handled by
+ // the specialization below.
+ static_assert(sizeof...(FurtherErrators) > 0);
+ using step =
+ typename compound_errorator<ErroratorOne, ErroratorTwo>::type;
+
+public:
+ using type =
+ typename compound_errorator<step, FurtherErrators...>::type;
+};
+template <class ErroratorOne,
+ class ErroratorTwo>
+struct compound_errorator<ErroratorOne, ErroratorTwo> {
+ // specialization for empty `FurtherErrators` arg pack
+ using type =
+ typename ErroratorOne::template unify<ErroratorTwo>::type;
+};
+template <class... Args>
+using compound_errorator_t = typename compound_errorator<Args...>::type;
+
+// this is conjunction of two nasty features: C++14's variable template
+// and inline global variable of C++17. The latter is crucial to ensure
+// the variable will get the same address across all translation units.
+template <std::errc ErrorV>
+inline std::error_code ec = std::make_error_code(ErrorV);
+
+template <std::errc ErrorV>
+using ct_error_code = unthrowable_wrapper<const std::error_code&, ec<ErrorV>>;
+
+namespace ct_error {
+ using enoent = ct_error_code<std::errc::no_such_file_or_directory>;
+ using enodata = ct_error_code<std::errc::no_message_available>;
+ using invarg = ct_error_code<std::errc::invalid_argument>;
+ using input_output_error = ct_error_code<std::errc::io_error>;
+ using object_corrupted = ct_error_code<std::errc::illegal_byte_sequence>;
+ using permission_denied = ct_error_code<std::errc::permission_denied>;
+ using operation_not_supported =
+ ct_error_code<std::errc::operation_not_supported>;
+ using not_connected = ct_error_code<std::errc::not_connected>;
+ using timed_out = ct_error_code<std::errc::timed_out>;
+ using erange =
+ ct_error_code<std::errc::result_out_of_range>;
+ using ebadf =
+ ct_error_code<std::errc::bad_file_descriptor>;
+ using enospc =
+ ct_error_code<std::errc::no_space_on_device>;
+ using value_too_large = ct_error_code<std::errc::value_too_large>;
+ using eagain =
+ ct_error_code<std::errc::resource_unavailable_try_again>;
+ using file_too_large =
+ ct_error_code<std::errc::file_too_large>;
+ using address_in_use = ct_error_code<std::errc::address_in_use>;
+
+ struct pass_further_all {
+ template <class ErrorT>
+ decltype(auto) operator()(ErrorT&& e) {
+ return std::forward<ErrorT>(e);
+ }
+ };
+
+ struct discard_all {
+ template <class ErrorT>
+ void operator()(ErrorT&&) {
+ }
+ };
+
+ class assert_all {
+ const char* const msg = nullptr;
+ public:
+ template <std::size_t N>
+ assert_all(const char (&msg)[N])
+ : msg(msg) {
+ }
+ assert_all() = default;
+
+ template <class ErrorT>
+ void operator()(ErrorT&&) {
+ if (msg) {
+ ceph_abort(msg);
+ } else {
+ ceph_abort();
+ }
+ }
+ };
+
+ template <class ErrorFunc>
+ static decltype(auto) all_same_way(ErrorFunc&& error_func) {
+ return [
+ error_func = std::forward<ErrorFunc>(error_func)
+ ] (auto&& e) mutable -> decltype(auto) {
+ using decayed_t = std::decay_t<decltype(e)>;
+ auto&& handler =
+ decayed_t::error_t::handle(std::forward<ErrorFunc>(error_func));
+ return std::invoke(std::move(handler), std::forward<decltype(e)>(e));
+ };
+ };
+}
+
+using stateful_errc = stateful_error_t<std::errc>;
+using stateful_errint = stateful_error_t<int>;
+using stateful_ec = stateful_error_t<std::error_code>;
+
+} // namespace crimson
+
+
+// open the `seastar` namespace to specialize `futurize`. This is not
+// pretty for sure. I just hope it's not worse than e.g. specializing
+// `hash` in the `std` namespace. The justification is copy avoidance
+// in `future<...>::safe_then()`. See the comments there for details.
+namespace seastar {
+
+// Container is a placeholder for errorator::_future<> template
+template <template <class> class Container,
+ class Value>
+struct futurize<Container<::crimson::errorated_future_marker<Value>>> {
+ using errorator_type = typename Container<
+ ::crimson::errorated_future_marker<Value>>::errorator_type;
+
+ using type = typename errorator_type::template future<Value>;
+ using value_type = seastar::internal::future_stored_type_t<Value>;
+
+ template<typename Func, typename... FuncArgs>
+ [[gnu::always_inline]]
+ static inline type invoke(Func&& func, FuncArgs&&... args) noexcept {
+ try {
+ return func(std::forward<FuncArgs>(args)...);
+ } catch (...) {
+ return make_exception_future(std::current_exception());
+ }
+ }
+
+ template <class Func>
+ [[gnu::always_inline]]
+ static type invoke(Func&& func, seastar::internal::monostate) noexcept {
+ try {
+ return func();
+ } catch (...) {
+ return make_exception_future(std::current_exception());
+ }
+ }
+
+ template <typename Arg>
+ [[gnu::always_inline]]
+ static type make_exception_future(Arg&& arg) {
+ return errorator_type::template make_exception_future2<Value>(std::forward<Arg>(arg));
+ }
+
+private:
+ template<typename PromiseT, typename Func>
+ static void satisfy_with_result_of(PromiseT&& pr, Func&& func) {
+ // this may use the protected variant of `seastar::future::forward_to()`
+ // because:
+ // 1. `seastar::future` established a friendship with with all
+ // specializations of `seastar::futurize`, including this
+ // one (we're in the `seastar` namespace!) WHILE
+ // 2. any errorated future declares now the friendship with any
+ // `seastar::futurize<...>`.
+ func().forward_to(std::move(pr));
+ }
+ template <typename U>
+ friend class future;
+};
+
+template <template <class> class Container,
+ class Value>
+struct continuation_base_from_future<Container<::crimson::errorated_future_marker<Value>>> {
+ using type = continuation_base<Value>;
+};
+
+} // namespace seastar
diff --git a/src/crimson/common/exception.h b/src/crimson/common/exception.h
new file mode 100644
index 000000000..05caf5ebd
--- /dev/null
+++ b/src/crimson/common/exception.h
@@ -0,0 +1,50 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <exception>
+#include <seastar/core/future.hh>
+#include <seastar/core/future-util.hh>
+
+#include "crimson/common/log.h"
+
+namespace crimson::common {
+
+class system_shutdown_exception final : public std::exception{
+public:
+ const char* what() const noexcept final {
+ return "system shutting down";
+ }
+};
+
+class actingset_changed final : public std::exception {
+public:
+ actingset_changed(bool sp) : still_primary(sp) {}
+ const char* what() const noexcept final {
+ return "acting set changed";
+ }
+ bool is_primary() const {
+ return still_primary;
+ }
+private:
+ const bool still_primary;
+};
+
+template<typename Func, typename... Args>
+inline seastar::future<> handle_system_shutdown(Func&& func, Args&&... args)
+{
+ return seastar::futurize_invoke(std::forward<Func>(func),
+ std::forward<Args>(args)...)
+ .handle_exception([](std::exception_ptr eptr) {
+ if (*eptr.__cxa_exception_type() ==
+ typeid(crimson::common::system_shutdown_exception)) {
+ crimson::get_logger(ceph_subsys_osd).debug(
+ "operation skipped, system shutdown");
+ return seastar::now();
+ }
+ std::rethrow_exception(eptr);
+ });
+}
+
+}
diff --git a/src/crimson/common/fixed_kv_node_layout.h b/src/crimson/common/fixed_kv_node_layout.h
new file mode 100644
index 000000000..4c7cc2e76
--- /dev/null
+++ b/src/crimson/common/fixed_kv_node_layout.h
@@ -0,0 +1,700 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <iostream>
+
+#include "include/byteorder.h"
+
+#include "crimson/common/layout.h"
+
+namespace crimson::common {
+
+template <typename T, bool is_const>
+struct maybe_const_t {
+};
+template<typename T>
+struct maybe_const_t<T, true> {
+ using type = const T*;
+};
+template<typename T>
+struct maybe_const_t<T, false> {
+ using type = T*;
+};
+
+
+/**
+ * FixedKVNodeLayout
+ *
+ * Reusable implementation of a fixed size block mapping
+ * K -> V with internal representations KINT and VINT.
+ *
+ * Uses absl::container_internal::Layout for the actual memory layout.
+ *
+ * The primary interface exposed is centered on the iterator
+ * and related methods.
+ *
+ * Also included are helpers for doing splits and merges as for a btree.
+ */
+template <
+ size_t CAPACITY,
+ typename Meta,
+ typename MetaInt,
+ typename K,
+ typename KINT,
+ typename V,
+ typename VINT,
+ bool VALIDATE_INVARIANTS=true>
+class FixedKVNodeLayout {
+ char *buf = nullptr;
+
+ using L = absl::container_internal::Layout<ceph_le32, MetaInt, KINT, VINT>;
+ static constexpr L layout{1, 1, CAPACITY, CAPACITY};
+
+public:
+ template <bool is_const>
+ struct iter_t {
+ friend class FixedKVNodeLayout;
+ using parent_t = typename maybe_const_t<FixedKVNodeLayout, is_const>::type;
+
+ parent_t node;
+ uint16_t offset;
+
+ iter_t(
+ parent_t parent,
+ uint16_t offset) : node(parent), offset(offset) {}
+
+ iter_t(const iter_t &) = default;
+ iter_t(iter_t &&) = default;
+ iter_t &operator=(const iter_t &) = default;
+ iter_t &operator=(iter_t &&) = default;
+
+ operator iter_t<!is_const>() const {
+ static_assert(!is_const);
+ return iter_t<!is_const>(node, offset);
+ }
+
+ // Work nicely with for loops without requiring a nested type.
+ iter_t &operator*() { return *this; }
+ iter_t *operator->() { return this; }
+
+ iter_t operator++(int) {
+ auto ret = *this;
+ ++offset;
+ return ret;
+ }
+
+ iter_t &operator++() {
+ ++offset;
+ return *this;
+ }
+
+ uint16_t operator-(const iter_t &rhs) const {
+ assert(rhs.node == node);
+ return offset - rhs.offset;
+ }
+
+ iter_t operator+(uint16_t off) const {
+ return iter_t(
+ node,
+ offset + off);
+ }
+ iter_t operator-(uint16_t off) const {
+ return iter_t(
+ node,
+ offset - off);
+ }
+
+ bool operator==(const iter_t &rhs) const {
+ assert(node == rhs.node);
+ return rhs.offset == offset;
+ }
+
+ bool operator!=(const iter_t &rhs) const {
+ return !(*this == rhs);
+ }
+
+ K get_key() const {
+ return K(node->get_key_ptr()[offset]);
+ }
+
+ K get_next_key_or_max() const {
+ auto next = *this + 1;
+ if (next == node->end())
+ return std::numeric_limits<K>::max();
+ else
+ return next->get_key();
+ }
+
+ void set_val(V val) const {
+ static_assert(!is_const);
+ node->get_val_ptr()[offset] = VINT(val);
+ }
+
+ V get_val() const {
+ return V(node->get_val_ptr()[offset]);
+ };
+
+ bool contains(K addr) const {
+ return (get_key() <= addr) && (get_next_key_or_max() > addr);
+ }
+
+ uint16_t get_offset() const {
+ return offset;
+ }
+
+ private:
+ void set_key(K _lb) const {
+ static_assert(!is_const);
+ KINT lb;
+ lb = _lb;
+ node->get_key_ptr()[offset] = lb;
+ }
+
+ typename maybe_const_t<char, is_const>::type get_key_ptr() const {
+ return reinterpret_cast<
+ typename maybe_const_t<char, is_const>::type>(
+ node->get_key_ptr() + offset);
+ }
+
+ typename maybe_const_t<char, is_const>::type get_val_ptr() const {
+ return reinterpret_cast<
+ typename maybe_const_t<char, is_const>::type>(
+ node->get_val_ptr() + offset);
+ }
+ };
+ using const_iterator = iter_t<true>;
+ using iterator = iter_t<false>;
+
+ struct delta_t {
+ enum class op_t : uint8_t {
+ INSERT,
+ REMOVE,
+ UPDATE,
+ } op;
+ KINT key;
+ VINT val;
+
+ void replay(FixedKVNodeLayout &l) {
+ switch (op) {
+ case op_t::INSERT: {
+ l.insert(l.lower_bound(key), key, val);
+ break;
+ }
+ case op_t::REMOVE: {
+ auto iter = l.find(key);
+ assert(iter != l.end());
+ l.remove(iter);
+ break;
+ }
+ case op_t::UPDATE: {
+ auto iter = l.find(key);
+ assert(iter != l.end());
+ l.update(iter, val);
+ break;
+ }
+ default:
+ assert(0 == "Impossible");
+ }
+ }
+
+ bool operator==(const delta_t &rhs) const {
+ return op == rhs.op &&
+ key == rhs.key &&
+ val == rhs.val;
+ }
+ };
+
+public:
+ class delta_buffer_t {
+ std::vector<delta_t> buffer;
+ public:
+ bool empty() const {
+ return buffer.empty();
+ }
+ void insert(
+ const K &key,
+ const V &val) {
+ KINT k;
+ k = key;
+ buffer.push_back(
+ delta_t{
+ delta_t::op_t::INSERT,
+ k,
+ VINT(val)
+ });
+ }
+ void update(
+ const K &key,
+ const V &val) {
+ KINT k;
+ k = key;
+ buffer.push_back(
+ delta_t{
+ delta_t::op_t::UPDATE,
+ k,
+ VINT(val)
+ });
+ }
+ void remove(const K &key) {
+ KINT k;
+ k = key;
+ buffer.push_back(
+ delta_t{
+ delta_t::op_t::REMOVE,
+ k,
+ VINT()
+ });
+ }
+ void replay(FixedKVNodeLayout &node) {
+ for (auto &i: buffer) {
+ i.replay(node);
+ }
+ }
+ size_t get_bytes() const {
+ return buffer.size() * sizeof(delta_t);
+ }
+ void copy_out(char *out, size_t len) {
+ assert(len == get_bytes());
+ ::memcpy(out, reinterpret_cast<const void *>(buffer.data()), get_bytes());
+ buffer.clear();
+ }
+ void copy_in(const char *out, size_t len) {
+ assert(empty());
+ assert(len % sizeof(delta_t) == 0);
+ buffer = std::vector(
+ reinterpret_cast<const delta_t*>(out),
+ reinterpret_cast<const delta_t*>(out + len));
+ }
+ bool operator==(const delta_buffer_t &rhs) const {
+ return buffer == rhs.buffer;
+ }
+ };
+
+ void journal_insert(
+ const_iterator _iter,
+ const K &key,
+ const V &val,
+ delta_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.offset);
+ if (recorder) {
+ recorder->insert(
+ key,
+ val);
+ }
+ insert(iter, key, val);
+ }
+
+ void journal_update(
+ const_iterator _iter,
+ const V &val,
+ delta_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.offset);
+ if (recorder) {
+ recorder->update(iter->get_key(), val);
+ }
+ update(iter, val);
+ }
+
+ void journal_replace(
+ const_iterator _iter,
+ const K &key,
+ const V &val,
+ delta_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.offset);
+ if (recorder) {
+ recorder->remove(iter->get_key());
+ recorder->insert(key, val);
+ }
+ replace(iter, key, val);
+ }
+
+
+ void journal_remove(
+ const_iterator _iter,
+ delta_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.offset);
+ if (recorder) {
+ recorder->remove(iter->get_key());
+ }
+ remove(iter);
+ }
+
+
+ FixedKVNodeLayout(char *buf) :
+ buf(buf) {}
+
+ virtual ~FixedKVNodeLayout() = default;
+
+ const_iterator begin() const {
+ return const_iterator(
+ this,
+ 0);
+ }
+
+ const_iterator end() const {
+ return const_iterator(
+ this,
+ get_size());
+ }
+
+ iterator begin() {
+ return iterator(
+ this,
+ 0);
+ }
+
+ iterator end() {
+ return iterator(
+ this,
+ get_size());
+ }
+
+ const_iterator iter_idx(uint16_t off) const {
+ return const_iterator(
+ this,
+ off);
+ }
+
+ const_iterator find(K l) const {
+ auto ret = begin();
+ for (; ret != end(); ++ret) {
+ if (ret->get_key() == l)
+ break;
+ }
+ return ret;
+ }
+ iterator find(K l) {
+ const auto &tref = *this;
+ return iterator(this, tref.find(l).offset);
+ }
+
+ const_iterator lower_bound(K l) const {
+ auto ret = begin();
+ for (; ret != end(); ++ret) {
+ if (ret->get_key() >= l)
+ break;
+ }
+ return ret;
+ }
+ iterator lower_bound(K l) {
+ const auto &tref = *this;
+ return iterator(this, tref.lower_bound(l).offset);
+ }
+
+ const_iterator upper_bound(K l) const {
+ auto ret = begin();
+ for (; ret != end(); ++ret) {
+ if (ret->get_key() > l)
+ break;
+ }
+ return ret;
+ }
+ iterator upper_bound(K l) {
+ const auto &tref = *this;
+ return iterator(this, tref.upper_bound(l).offset);
+ }
+
+ const_iterator get_split_pivot() const {
+ return iter_idx(get_size() / 2);
+ }
+
+ uint16_t get_size() const {
+ return *layout.template Pointer<0>(buf);
+ }
+
+ /**
+ * set_size
+ *
+ * Set size representation to match size
+ */
+ void set_size(uint16_t size) {
+ *layout.template Pointer<0>(buf) = size;
+ }
+
+ /**
+ * get_meta/set_meta
+ *
+ * Enables stashing a templated type within the layout.
+ * Cannot be modified after initial write as it is not represented
+ * in delta_t
+ */
+ Meta get_meta() const {
+ MetaInt &metaint = *layout.template Pointer<1>(buf);
+ return Meta(metaint);
+ }
+ void set_meta(const Meta &meta) {
+ *layout.template Pointer<1>(buf) = MetaInt(meta);
+ }
+
+ constexpr static size_t get_capacity() {
+ return CAPACITY;
+ }
+
+ bool operator==(const FixedKVNodeLayout &rhs) const {
+ if (get_size() != rhs.get_size()) {
+ return false;
+ }
+
+ auto iter = begin();
+ auto iter2 = rhs.begin();
+ while (iter != end()) {
+ if (iter->get_key() != iter2->get_key() ||
+ iter->get_val() != iter2->get_val()) {
+ return false;
+ }
+ iter++;
+ iter2++;
+ }
+ return true;
+ }
+
+ /**
+ * split_into
+ *
+ * Takes *this and splits its contents into left and right.
+ */
+ K split_into(
+ FixedKVNodeLayout &left,
+ FixedKVNodeLayout &right) const {
+ auto piviter = get_split_pivot();
+
+ left.copy_from_foreign(left.begin(), begin(), piviter);
+ left.set_size(piviter - begin());
+
+ right.copy_from_foreign(right.begin(), piviter, end());
+ right.set_size(end() - piviter);
+
+ auto [lmeta, rmeta] = get_meta().split_into(piviter->get_key());
+ left.set_meta(lmeta);
+ right.set_meta(rmeta);
+
+ return piviter->get_key();
+ }
+
+ /**
+ * merge_from
+ *
+ * Takes two nodes and copies their contents into *this.
+ *
+ * precondition: left.size() + right.size() < CAPACITY
+ */
+ void merge_from(
+ const FixedKVNodeLayout &left,
+ const FixedKVNodeLayout &right)
+ {
+ copy_from_foreign(
+ end(),
+ left.begin(),
+ left.end());
+ set_size(left.get_size());
+ copy_from_foreign(
+ end(),
+ right.begin(),
+ right.end());
+ set_size(left.get_size() + right.get_size());
+ set_meta(Meta::merge_from(left.get_meta(), right.get_meta()));
+ }
+
+ /**
+ * balance_into_new_nodes
+ *
+ * Takes the contents of left and right and copies them into
+ * replacement_left and replacement_right such that in the
+ * event that the number of elements is odd the extra goes to
+ * the left side iff prefer_left.
+ */
+ static K balance_into_new_nodes(
+ const FixedKVNodeLayout &left,
+ const FixedKVNodeLayout &right,
+ bool prefer_left,
+ FixedKVNodeLayout &replacement_left,
+ FixedKVNodeLayout &replacement_right)
+ {
+ auto total = left.get_size() + right.get_size();
+ auto pivot_idx = (left.get_size() + right.get_size()) / 2;
+ if (total % 2 && prefer_left) {
+ pivot_idx++;
+ }
+ auto replacement_pivot = pivot_idx >= left.get_size() ?
+ right.iter_idx(pivot_idx - left.get_size())->get_key() :
+ left.iter_idx(pivot_idx)->get_key();
+
+ if (pivot_idx < left.get_size()) {
+ replacement_left.copy_from_foreign(
+ replacement_left.end(),
+ left.begin(),
+ left.iter_idx(pivot_idx));
+ replacement_left.set_size(pivot_idx);
+
+ replacement_right.copy_from_foreign(
+ replacement_right.end(),
+ left.iter_idx(pivot_idx),
+ left.end());
+
+ replacement_right.set_size(left.get_size() - pivot_idx);
+ replacement_right.copy_from_foreign(
+ replacement_right.end(),
+ right.begin(),
+ right.end());
+ replacement_right.set_size(total - pivot_idx);
+ } else {
+ replacement_left.copy_from_foreign(
+ replacement_left.end(),
+ left.begin(),
+ left.end());
+ replacement_left.set_size(left.get_size());
+
+ replacement_left.copy_from_foreign(
+ replacement_left.end(),
+ right.begin(),
+ right.iter_idx(pivot_idx - left.get_size()));
+ replacement_left.set_size(pivot_idx);
+
+ replacement_right.copy_from_foreign(
+ replacement_right.end(),
+ right.iter_idx(pivot_idx - left.get_size()),
+ right.end());
+ replacement_right.set_size(total - pivot_idx);
+ }
+
+ auto [lmeta, rmeta] = Meta::rebalance(
+ left.get_meta(), right.get_meta(), replacement_pivot);
+ replacement_left.set_meta(lmeta);
+ replacement_right.set_meta(rmeta);
+ return replacement_pivot;
+ }
+
+private:
+ void insert(
+ iterator iter,
+ const K &key,
+ const V &val) {
+ if (VALIDATE_INVARIANTS) {
+ if (iter != begin()) {
+ assert((iter - 1)->get_key() < key);
+ }
+ if (iter != end()) {
+ assert(iter->get_key() > key);
+ }
+ assert(get_size() < CAPACITY);
+ }
+ copy_from_local(iter + 1, iter, end());
+ iter->set_key(key);
+ iter->set_val(val);
+ set_size(get_size() + 1);
+ }
+
+ void update(
+ iterator iter,
+ V val) {
+ assert(iter != end());
+ iter->set_val(val);
+ }
+
+ void replace(
+ iterator iter,
+ const K &key,
+ const V &val) {
+ assert(iter != end());
+ if (VALIDATE_INVARIANTS) {
+ if (iter != begin()) {
+ assert((iter - 1)->get_key() < key);
+ }
+ if ((iter + 1) != end()) {
+ assert((iter + 1)->get_key() > key);
+ }
+ }
+ iter->set_key(key);
+ iter->set_val(val);
+ }
+
+ void remove(iterator iter) {
+ assert(iter != end());
+ copy_from_local(iter, iter + 1, end());
+ set_size(get_size() - 1);
+ }
+
+ /**
+ * get_key_ptr
+ *
+ * Get pointer to start of key array
+ */
+ KINT *get_key_ptr() {
+ return layout.template Pointer<2>(buf);
+ }
+ const KINT *get_key_ptr() const {
+ return layout.template Pointer<2>(buf);
+ }
+
+ /**
+ * get_val_ptr
+ *
+ * Get pointer to start of val array
+ */
+ VINT *get_val_ptr() {
+ return layout.template Pointer<3>(buf);
+ }
+ const VINT *get_val_ptr() const {
+ return layout.template Pointer<3>(buf);
+ }
+
+ /**
+ * node_resolve/unresolve_vals
+ *
+ * If the representation for values depends in some way on the
+ * node in which they are located, users may implement
+ * resolve/unresolve to enable copy_from_foreign to handle that
+ * transition.
+ */
+ virtual void node_resolve_vals(iterator from, iterator to) const {}
+ virtual void node_unresolve_vals(iterator from, iterator to) const {}
+
+ /**
+ * copy_from_foreign
+ *
+ * Copies entries from [from_src, to_src) to tgt.
+ *
+ * tgt and from_src must be from different nodes.
+ * from_src and to_src must be from the same node.
+ */
+ static void copy_from_foreign(
+ iterator tgt,
+ const_iterator from_src,
+ const_iterator to_src) {
+ assert(tgt->node != from_src->node);
+ assert(to_src->node == from_src->node);
+ memcpy(
+ tgt->get_val_ptr(), from_src->get_val_ptr(),
+ to_src->get_val_ptr() - from_src->get_val_ptr());
+ memcpy(
+ tgt->get_key_ptr(), from_src->get_key_ptr(),
+ to_src->get_key_ptr() - from_src->get_key_ptr());
+ from_src->node->node_resolve_vals(tgt, tgt + (to_src - from_src));
+ tgt->node->node_unresolve_vals(tgt, tgt + (to_src - from_src));
+ }
+
+ /**
+ * copy_from_local
+ *
+ * Copies entries from [from_src, to_src) to tgt.
+ *
+ * tgt, from_src, and to_src must be from the same node.
+ */
+ static void copy_from_local(
+ iterator tgt,
+ iterator from_src,
+ iterator to_src) {
+ assert(tgt->node == from_src->node);
+ assert(to_src->node == from_src->node);
+ memmove(
+ tgt->get_val_ptr(), from_src->get_val_ptr(),
+ to_src->get_val_ptr() - from_src->get_val_ptr());
+ memmove(
+ tgt->get_key_ptr(), from_src->get_key_ptr(),
+ to_src->get_key_ptr() - from_src->get_key_ptr());
+ }
+};
+
+}
diff --git a/src/crimson/common/formatter.cc b/src/crimson/common/formatter.cc
new file mode 100644
index 000000000..677216224
--- /dev/null
+++ b/src/crimson/common/formatter.cc
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "formatter.h"
+
+#include <fmt/format.h>
+#if FMT_VERSION >= 60000
+#include <fmt/chrono.h>
+#else
+#include <fmt/time.h>
+#endif
+
+
+template <>
+struct fmt::formatter<seastar::lowres_system_clock::time_point> {
+ // ignore the format string
+ template <typename ParseContext>
+ constexpr auto parse(ParseContext &ctx) { return ctx.begin(); }
+
+ template <typename FormatContext>
+ auto format(const seastar::lowres_system_clock::time_point& t,
+ FormatContext& ctx) {
+ std::time_t tt = std::chrono::duration_cast<std::chrono::seconds>(
+ t.time_since_epoch()).count();
+ auto milliseconds = (t.time_since_epoch() %
+ std::chrono::seconds(1)).count();
+ return fmt::format_to(ctx.out(), "{:%Y-%m-%d %H:%M:%S} {:03d}",
+ fmt::localtime(tt), milliseconds);
+ }
+};
+
+template <>
+struct fmt::formatter<ceph::coarse_real_clock::time_point> {
+ // ignore the format string
+ template <typename ParseContext>
+ constexpr auto parse(ParseContext &ctx) { return ctx.begin(); }
+
+ template <typename FormatContext>
+ auto format(const ceph::coarse_real_clock::time_point& t,
+ FormatContext& ctx) {
+ std::time_t tt = std::chrono::duration_cast<std::chrono::seconds>(
+ t.time_since_epoch()).count();
+ auto milliseconds = (t.time_since_epoch() %
+ std::chrono::seconds(1)).count();
+ return fmt::format_to(ctx.out(), "{:%Y-%m-%d %H:%M:%S} {:03d}",
+ fmt::localtime(tt), milliseconds);
+ }
+};
+
+namespace std {
+
+ostream& operator<<(ostream& out,
+ const seastar::lowres_system_clock::time_point& t)
+{
+ return out << fmt::format("{}", t);
+}
+
+ostream& operator<<(ostream& out,
+ const ceph::coarse_real_clock::time_point& t)
+{
+ return out << fmt::format("{}", t);
+}
+
+}
diff --git a/src/crimson/common/formatter.h b/src/crimson/common/formatter.h
new file mode 100644
index 000000000..1775b0954
--- /dev/null
+++ b/src/crimson/common/formatter.h
@@ -0,0 +1,15 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <seastar/core/lowres_clock.hh>
+
+#include "common/ceph_time.h"
+
+namespace std {
+
+ostream& operator<<(ostream& out,
+ const seastar::lowres_system_clock::time_point& t);
+ostream& operator<<(ostream& out,
+ const ceph::coarse_real_clock::time_point& t);
+
+}
diff --git a/src/crimson/common/gated.h b/src/crimson/common/gated.h
new file mode 100644
index 000000000..7d901b6b1
--- /dev/null
+++ b/src/crimson/common/gated.h
@@ -0,0 +1,51 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <seastar/core/gate.hh>
+#include <seastar/core/future.hh>
+#include <seastar/core/future-util.hh>
+
+#include "crimson/common/exception.h"
+#include "crimson/common/log.h"
+#include "include/ceph_assert.h"
+
+namespace crimson::common {
+
+class Gated {
+ public:
+ static seastar::logger& gated_logger() {
+ return crimson::get_logger(ceph_subsys_osd);
+ }
+ template <typename Func, typename T>
+ inline void dispatch_in_background(const char* what, T& who, Func&& func) {
+ (void) dispatch(what, who, func);
+ }
+ template <typename Func, typename T>
+ inline seastar::future<> dispatch(const char* what, T& who, Func&& func) {
+ return seastar::with_gate(pending_dispatch, std::forward<Func>(func)
+ ).handle_exception([what, &who] (std::exception_ptr eptr) {
+ if (*eptr.__cxa_exception_type() == typeid(system_shutdown_exception)) {
+ gated_logger().debug(
+ "{}, {} skipped, system shutdown", who, what);
+ return;
+ }
+ gated_logger().error(
+ "{} dispatch() {} caught exception: {}", who, what, eptr);
+ assert(*eptr.__cxa_exception_type()
+ == typeid(seastar::gate_closed_exception));
+ });
+ }
+
+ seastar::future<> close() {
+ return pending_dispatch.close();
+ }
+ bool is_closed() const {
+ return pending_dispatch.is_closed();
+ }
+ private:
+ seastar::gate pending_dispatch;
+};
+
+}// namespace crimson::common
diff --git a/src/crimson/common/layout.h b/src/crimson/common/layout.h
new file mode 100644
index 000000000..9d54ecd1d
--- /dev/null
+++ b/src/crimson/common/layout.h
@@ -0,0 +1,737 @@
+// Copyright 2018 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// MOTIVATION AND TUTORIAL
+//
+// If you want to put in a single heap allocation N doubles followed by M ints,
+// it's easy if N and M are known at compile time.
+//
+// struct S {
+// double a[N];
+// int b[M];
+// };
+//
+// S* p = new S;
+//
+// But what if N and M are known only in run time? Class template Layout to the
+// rescue! It's a portable generalization of the technique known as struct hack.
+//
+// // This object will tell us everything we need to know about the memory
+// // layout of double[N] followed by int[M]. It's structurally identical to
+// // size_t[2] that stores N and M. It's very cheap to create.
+// const Layout<double, int> layout(N, M);
+//
+// // Allocate enough memory for both arrays. `AllocSize()` tells us how much
+// // memory is needed. We are free to use any allocation function we want as
+// // long as it returns aligned memory.
+// std::unique_ptr<unsigned char[]> p(new unsigned char[layout.AllocSize()]);
+//
+// // Obtain the pointer to the array of doubles.
+// // Equivalent to `reinterpret_cast<double*>(p.get())`.
+// //
+// // We could have written layout.Pointer<0>(p) instead. If all the types are
+// // unique you can use either form, but if some types are repeated you must
+// // use the index form.
+// double* a = layout.Pointer<double>(p.get());
+//
+// // Obtain the pointer to the array of ints.
+// // Equivalent to `reinterpret_cast<int*>(p.get() + N * 8)`.
+// int* b = layout.Pointer<int>(p);
+//
+// If we are unable to specify sizes of all fields, we can pass as many sizes as
+// we can to `Partial()`. In return, it'll allow us to access the fields whose
+// locations and sizes can be computed from the provided information.
+// `Partial()` comes in handy when the array sizes are embedded into the
+// allocation.
+//
+// // size_t[1] containing N, size_t[1] containing M, double[N], int[M].
+// using L = Layout<size_t, size_t, double, int>;
+//
+// unsigned char* Allocate(size_t n, size_t m) {
+// const L layout(1, 1, n, m);
+// unsigned char* p = new unsigned char[layout.AllocSize()];
+// *layout.Pointer<0>(p) = n;
+// *layout.Pointer<1>(p) = m;
+// return p;
+// }
+//
+// void Use(unsigned char* p) {
+// // First, extract N and M.
+// // Specify that the first array has only one element. Using `prefix` we
+// // can access the first two arrays but not more.
+// constexpr auto prefix = L::Partial(1);
+// size_t n = *prefix.Pointer<0>(p);
+// size_t m = *prefix.Pointer<1>(p);
+//
+// // Now we can get pointers to the payload.
+// const L layout(1, 1, n, m);
+// double* a = layout.Pointer<double>(p);
+// int* b = layout.Pointer<int>(p);
+// }
+//
+// The layout we used above combines fixed-size with dynamically-sized fields.
+// This is quite common. Layout is optimized for this use case and generates
+// optimal code. All computations that can be performed at compile time are
+// indeed performed at compile time.
+//
+// Efficiency tip: The order of fields matters. In `Layout<T1, ..., TN>` try to
+// ensure that `alignof(T1) >= ... >= alignof(TN)`. This way you'll have no
+// padding in between arrays.
+//
+// You can manually override the alignment of an array by wrapping the type in
+// `Aligned<T, N>`. `Layout<..., Aligned<T, N>, ...>` has exactly the same API
+// and behavior as `Layout<..., T, ...>` except that the first element of the
+// array of `T` is aligned to `N` (the rest of the elements follow without
+// padding). `N` cannot be less than `alignof(T)`.
+//
+// `AllocSize()` and `Pointer()` are the most basic methods for dealing with
+// memory layouts. Check out the reference or code below to discover more.
+//
+// EXAMPLE
+//
+// // Immutable move-only string with sizeof equal to sizeof(void*). The
+// // string size and the characters are kept in the same heap allocation.
+// class CompactString {
+// public:
+// CompactString(const char* s = "") {
+// const size_t size = strlen(s);
+// // size_t[1] followed by char[size + 1].
+// const L layout(1, size + 1);
+// p_.reset(new unsigned char[layout.AllocSize()]);
+// // If running under ASAN, mark the padding bytes, if any, to catch
+// // memory errors.
+// layout.PoisonPadding(p_.get());
+// // Store the size in the allocation.
+// *layout.Pointer<size_t>(p_.get()) = size;
+// // Store the characters in the allocation.
+// memcpy(layout.Pointer<char>(p_.get()), s, size + 1);
+// }
+//
+// size_t size() const {
+// // Equivalent to reinterpret_cast<size_t&>(*p).
+// return *L::Partial().Pointer<size_t>(p_.get());
+// }
+//
+// const char* c_str() const {
+// // Equivalent to reinterpret_cast<char*>(p.get() + sizeof(size_t)).
+// // The argument in Partial(1) specifies that we have size_t[1] in front
+// // of the characters.
+// return L::Partial(1).Pointer<char>(p_.get());
+// }
+//
+// private:
+// // Our heap allocation contains a size_t followed by an array of chars.
+// using L = Layout<size_t, char>;
+// std::unique_ptr<unsigned char[]> p_;
+// };
+//
+// int main() {
+// CompactString s = "hello";
+// assert(s.size() == 5);
+// assert(strcmp(s.c_str(), "hello") == 0);
+// }
+//
+// DOCUMENTATION
+//
+// The interface exported by this file consists of:
+// - class `Layout<>` and its public members.
+// - The public members of class `internal_layout::LayoutImpl<>`. That class
+// isn't intended to be used directly, and its name and template parameter
+// list are internal implementation details, but the class itself provides
+// most of the functionality in this file. See comments on its members for
+// detailed documentation.
+//
+// `Layout<T1,... Tn>::Partial(count1,..., countm)` (where `m` <= `n`) returns a
+// `LayoutImpl<>` object. `Layout<T1,..., Tn> layout(count1,..., countn)`
+// creates a `Layout` object, which exposes the same functionality by inheriting
+// from `LayoutImpl<>`.
+
+#ifndef ABSL_CONTAINER_INTERNAL_LAYOUT_H_
+#define ABSL_CONTAINER_INTERNAL_LAYOUT_H_
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <ostream>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <typeinfo>
+#include <utility>
+
+#ifdef ADDRESS_SANITIZER
+#include <sanitizer/asan_interface.h>
+#endif
+
+// for C++20 std::span
+#include <boost/beast/core/span.hpp>
+#include <fmt/format.h>
+
+#if defined(__GXX_RTTI)
+#define ABSL_INTERNAL_HAS_CXA_DEMANGLE
+#endif
+
+#ifdef ABSL_INTERNAL_HAS_CXA_DEMANGLE
+#include <cxxabi.h>
+#endif
+
+namespace absl {
+namespace container_internal {
+
+// A type wrapper that instructs `Layout` to use the specific alignment for the
+// array. `Layout<..., Aligned<T, N>, ...>` has exactly the same API
+// and behavior as `Layout<..., T, ...>` except that the first element of the
+// array of `T` is aligned to `N` (the rest of the elements follow without
+// padding).
+//
+// Requires: `N >= alignof(T)` and `N` is a power of 2.
+template <class T, size_t N>
+struct Aligned;
+
+namespace internal_layout {
+
+template <class T>
+struct NotAligned {};
+
+template <class T, size_t N>
+struct NotAligned<const Aligned<T, N>> {
+ static_assert(sizeof(T) == 0, "Aligned<T, N> cannot be const-qualified");
+};
+
+template <size_t>
+using IntToSize = size_t;
+
+template <class>
+using TypeToSize = size_t;
+
+template <class T>
+struct Type : NotAligned<T> {
+ using type = T;
+};
+
+template <class T, size_t N>
+struct Type<Aligned<T, N>> {
+ using type = T;
+};
+
+template <class T>
+struct SizeOf : NotAligned<T>, std::integral_constant<size_t, sizeof(T)> {};
+
+template <class T, size_t N>
+struct SizeOf<Aligned<T, N>> : std::integral_constant<size_t, sizeof(T)> {};
+
+// Note: workaround for https://gcc.gnu.org/PR88115
+template <class T>
+struct AlignOf : NotAligned<T> {
+ static constexpr size_t value = alignof(T);
+};
+
+template <class T, size_t N>
+struct AlignOf<Aligned<T, N>> {
+ static_assert(N % alignof(T) == 0,
+ "Custom alignment can't be lower than the type's alignment");
+ static constexpr size_t value = N;
+};
+
+// Does `Ts...` contain `T`?
+template <class T, class... Ts>
+using Contains = std::disjunction<std::is_same<T, Ts>...>;
+
+template <class From, class To>
+using CopyConst =
+ typename std::conditional_t<std::is_const_v<From>, const To, To>;
+
+// Note: We're not qualifying this with absl:: because it doesn't compile under
+// MSVC.
+template <class T>
+using SliceType = boost::beast::span<T>;
+
+// This namespace contains no types. It prevents functions defined in it from
+// being found by ADL.
+namespace adl_barrier {
+
+template <class Needle, class... Ts>
+constexpr size_t Find(Needle, Needle, Ts...) {
+ static_assert(!Contains<Needle, Ts...>(), "Duplicate element type");
+ return 0;
+}
+
+template <class Needle, class T, class... Ts>
+constexpr size_t Find(Needle, T, Ts...) {
+ return adl_barrier::Find(Needle(), Ts()...) + 1;
+}
+
+constexpr bool IsPow2(size_t n) { return !(n & (n - 1)); }
+
+// Returns `q * m` for the smallest `q` such that `q * m >= n`.
+// Requires: `m` is a power of two. It's enforced by IsLegalElementType below.
+constexpr size_t Align(size_t n, size_t m) { return (n + m - 1) & ~(m - 1); }
+
+constexpr size_t Min(size_t a, size_t b) { return b < a ? b : a; }
+
+constexpr size_t Max(size_t a) { return a; }
+
+template <class... Ts>
+constexpr size_t Max(size_t a, size_t b, Ts... rest) {
+ return adl_barrier::Max(b < a ? a : b, rest...);
+}
+
+template <class T>
+std::string TypeName() {
+ std::string out;
+ int status = 0;
+ char* demangled = nullptr;
+#ifdef ABSL_INTERNAL_HAS_CXA_DEMANGLE
+ demangled = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, &status);
+#endif
+ if (status == 0 && demangled != nullptr) { // Demangling succeeded.
+ out = fmt::format("<{}>", demangled);
+ free(demangled);
+ } else {
+#if defined(__GXX_RTTI) || defined(_CPPRTTI)
+ out = fmt::format("<{}>", typeid(T).name());
+#endif
+ }
+ return out;
+}
+
+} // namespace adl_barrier
+
+template <bool C>
+using EnableIf = typename std::enable_if_t<C, int>;
+
+// Can `T` be a template argument of `Layout`?
+template <class T>
+using IsLegalElementType = std::integral_constant<
+ bool, !std::is_reference_v<T> && !std::is_volatile_v<T> &&
+ !std::is_reference_v<typename Type<T>::type> &&
+ !std::is_volatile_v<typename Type<T>::type> &&
+ adl_barrier::IsPow2(AlignOf<T>::value)>;
+
+template <class Elements, class SizeSeq, class OffsetSeq>
+class LayoutImpl;
+
+// Public base class of `Layout` and the result type of `Layout::Partial()`.
+//
+// `Elements...` contains all template arguments of `Layout` that created this
+// instance.
+//
+// `SizeSeq...` is `[0, NumSizes)` where `NumSizes` is the number of arguments
+// passed to `Layout::Partial()` or `Layout::Layout()`.
+//
+// `OffsetSeq...` is `[0, NumOffsets)` where `NumOffsets` is
+// `Min(sizeof...(Elements), NumSizes + 1)` (the number of arrays for which we
+// can compute offsets).
+template <class... Elements, size_t... SizeSeq, size_t... OffsetSeq>
+class LayoutImpl<std::tuple<Elements...>, std::index_sequence<SizeSeq...>,
+ std::index_sequence<OffsetSeq...>> {
+ private:
+ static_assert(sizeof...(Elements) > 0, "At least one field is required");
+ static_assert(std::conjunction_v<IsLegalElementType<Elements>...>,
+ "Invalid element type (see IsLegalElementType)");
+
+ enum {
+ NumTypes = sizeof...(Elements),
+ NumSizes = sizeof...(SizeSeq),
+ NumOffsets = sizeof...(OffsetSeq),
+ };
+
+ // These are guaranteed by `Layout`.
+ static_assert(NumOffsets == adl_barrier::Min(NumTypes, NumSizes + 1),
+ "Internal error");
+ static_assert(NumTypes > 0, "Internal error");
+
+ // Returns the index of `T` in `Elements...`. Results in a compilation error
+ // if `Elements...` doesn't contain exactly one instance of `T`.
+ template <class T>
+ static constexpr size_t ElementIndex() {
+ static_assert(Contains<Type<T>, Type<typename Type<Elements>::type>...>(),
+ "Type not found");
+ return adl_barrier::Find(Type<T>(),
+ Type<typename Type<Elements>::type>()...);
+ }
+
+ template <size_t N>
+ using ElementAlignment =
+ AlignOf<typename std::tuple_element<N, std::tuple<Elements...>>::type>;
+
+ public:
+ // Element types of all arrays packed in a tuple.
+ using ElementTypes = std::tuple<typename Type<Elements>::type...>;
+
+ // Element type of the Nth array.
+ template <size_t N>
+ using ElementType = typename std::tuple_element<N, ElementTypes>::type;
+
+ constexpr explicit LayoutImpl(IntToSize<SizeSeq>... sizes)
+ : size_{sizes...} {}
+
+ // Alignment of the layout, equal to the strictest alignment of all elements.
+ // All pointers passed to the methods of layout must be aligned to this value.
+ static constexpr size_t Alignment() {
+ return adl_barrier::Max(AlignOf<Elements>::value...);
+ }
+
+ // Offset in bytes of the Nth array.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // assert(x.Offset<0>() == 0); // The ints starts from 0.
+ // assert(x.Offset<1>() == 16); // The doubles starts from 16.
+ //
+ // Requires: `N <= NumSizes && N < sizeof...(Ts)`.
+ template <size_t N, EnableIf<N == 0> = 0>
+ constexpr size_t Offset() const {
+ return 0;
+ }
+
+ template <size_t N, EnableIf<N != 0> = 0>
+ constexpr size_t Offset() const {
+ static_assert(N < NumOffsets, "Index out of bounds");
+ return adl_barrier::Align(
+ Offset<N - 1>() + SizeOf<ElementType<N - 1>>() * size_[N - 1],
+ ElementAlignment<N>::value);
+ }
+
+ // Offset in bytes of the array with the specified element type. There must
+ // be exactly one such array and its zero-based index must be at most
+ // `NumSizes`.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // assert(x.Offset<int>() == 0); // The ints starts from 0.
+ // assert(x.Offset<double>() == 16); // The doubles starts from 16.
+ template <class T>
+ constexpr size_t Offset() const {
+ return Offset<ElementIndex<T>()>();
+ }
+
+ // Offsets in bytes of all arrays for which the offsets are known.
+ constexpr std::array<size_t, NumOffsets> Offsets() const {
+ return {{Offset<OffsetSeq>()...}};
+ }
+
+ // The number of elements in the Nth array. This is the Nth argument of
+ // `Layout::Partial()` or `Layout::Layout()` (zero-based).
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // assert(x.Size<0>() == 3);
+ // assert(x.Size<1>() == 4);
+ //
+ // Requires: `N < NumSizes`.
+ template <size_t N>
+ constexpr size_t Size() const {
+ static_assert(N < NumSizes, "Index out of bounds");
+ return size_[N];
+ }
+
+ // The number of elements in the array with the specified element type.
+ // There must be exactly one such array and its zero-based index must be
+ // at most `NumSizes`.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // assert(x.Size<int>() == 3);
+ // assert(x.Size<double>() == 4);
+ template <class T>
+ constexpr size_t Size() const {
+ return Size<ElementIndex<T>()>();
+ }
+
+ // The number of elements of all arrays for which they are known.
+ constexpr std::array<size_t, NumSizes> Sizes() const {
+ return {{Size<SizeSeq>()...}};
+ }
+
+ // Pointer to the beginning of the Nth array.
+ //
+ // `Char` must be `[const] [signed|unsigned] char`.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // unsigned char* p = new unsigned char[x.AllocSize()];
+ // int* ints = x.Pointer<0>(p);
+ // double* doubles = x.Pointer<1>(p);
+ //
+ // Requires: `N <= NumSizes && N < sizeof...(Ts)`.
+ // Requires: `p` is aligned to `Alignment()`.
+ template <size_t N, class Char>
+ CopyConst<Char, ElementType<N>>* Pointer(Char* p) const {
+ using C = typename std::remove_const<Char>::type;
+ static_assert(
+ std::is_same<C, char>() || std::is_same<C, unsigned char>() ||
+ std::is_same<C, signed char>(),
+ "The argument must be a pointer to [const] [signed|unsigned] char");
+ constexpr size_t alignment = Alignment();
+ (void)alignment;
+ assert(reinterpret_cast<uintptr_t>(p) % alignment == 0);
+ return reinterpret_cast<CopyConst<Char, ElementType<N>>*>(p + Offset<N>());
+ }
+
+ // Pointer to the beginning of the array with the specified element type.
+ // There must be exactly one such array and its zero-based index must be at
+ // most `NumSizes`.
+ //
+ // `Char` must be `[const] [signed|unsigned] char`.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // unsigned char* p = new unsigned char[x.AllocSize()];
+ // int* ints = x.Pointer<int>(p);
+ // double* doubles = x.Pointer<double>(p);
+ //
+ // Requires: `p` is aligned to `Alignment()`.
+ template <class T, class Char>
+ CopyConst<Char, T>* Pointer(Char* p) const {
+ return Pointer<ElementIndex<T>()>(p);
+ }
+
+ // Pointers to all arrays for which pointers are known.
+ //
+ // `Char` must be `[const] [signed|unsigned] char`.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // unsigned char* p = new unsigned char[x.AllocSize()];
+ //
+ // int* ints;
+ // double* doubles;
+ // std::tie(ints, doubles) = x.Pointers(p);
+ //
+ // Requires: `p` is aligned to `Alignment()`.
+ //
+ // Note: We're not using ElementType alias here because it does not compile
+ // under MSVC.
+ template <class Char>
+ std::tuple<CopyConst<
+ Char, typename std::tuple_element<OffsetSeq, ElementTypes>::type>*...>
+ Pointers(Char* p) const {
+ return std::tuple<CopyConst<Char, ElementType<OffsetSeq>>*...>(
+ Pointer<OffsetSeq>(p)...);
+ }
+
+ // The Nth array.
+ //
+ // `Char` must be `[const] [signed|unsigned] char`.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // unsigned char* p = new unsigned char[x.AllocSize()];
+ // Span<int> ints = x.Slice<0>(p);
+ // Span<double> doubles = x.Slice<1>(p);
+ //
+ // Requires: `N < NumSizes`.
+ // Requires: `p` is aligned to `Alignment()`.
+ template <size_t N, class Char>
+ SliceType<CopyConst<Char, ElementType<N>>> Slice(Char* p) const {
+ return SliceType<CopyConst<Char, ElementType<N>>>(Pointer<N>(p), Size<N>());
+ }
+
+ // The array with the specified element type. There must be exactly one
+ // such array and its zero-based index must be less than `NumSizes`.
+ //
+ // `Char` must be `[const] [signed|unsigned] char`.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // unsigned char* p = new unsigned char[x.AllocSize()];
+ // Span<int> ints = x.Slice<int>(p);
+ // Span<double> doubles = x.Slice<double>(p);
+ //
+ // Requires: `p` is aligned to `Alignment()`.
+ template <class T, class Char>
+ SliceType<CopyConst<Char, T>> Slice(Char* p) const {
+ return Slice<ElementIndex<T>()>(p);
+ }
+
+ // All arrays with known sizes.
+ //
+ // `Char` must be `[const] [signed|unsigned] char`.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // unsigned char* p = new unsigned char[x.AllocSize()];
+ //
+ // Span<int> ints;
+ // Span<double> doubles;
+ // std::tie(ints, doubles) = x.Slices(p);
+ //
+ // Requires: `p` is aligned to `Alignment()`.
+ //
+ // Note: We're not using ElementType alias here because it does not compile
+ // under MSVC.
+ template <class Char>
+ std::tuple<SliceType<CopyConst<
+ Char, typename std::tuple_element<SizeSeq, ElementTypes>::type>>...>
+ Slices(Char* p) const {
+ // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63875 (fixed
+ // in 6.1).
+ (void)p;
+ return std::tuple<SliceType<CopyConst<Char, ElementType<SizeSeq>>>...>(
+ Slice<SizeSeq>(p)...);
+ }
+
+ // The size of the allocation that fits all arrays.
+ //
+ // // int[3], 4 bytes of padding, double[4].
+ // Layout<int, double> x(3, 4);
+ // unsigned char* p = new unsigned char[x.AllocSize()]; // 48 bytes
+ //
+ // Requires: `NumSizes == sizeof...(Ts)`.
+ constexpr size_t AllocSize() const {
+ static_assert(NumTypes == NumSizes, "You must specify sizes of all fields");
+ return Offset<NumTypes - 1>() +
+ SizeOf<ElementType<NumTypes - 1>>() * size_[NumTypes - 1];
+ }
+
+ // If built with --config=asan, poisons padding bytes (if any) in the
+ // allocation. The pointer must point to a memory block at least
+ // `AllocSize()` bytes in length.
+ //
+ // `Char` must be `[const] [signed|unsigned] char`.
+ //
+ // Requires: `p` is aligned to `Alignment()`.
+ template <class Char, size_t N = NumOffsets - 1, EnableIf<N == 0> = 0>
+ void PoisonPadding(const Char* p) const {
+ Pointer<0>(p); // verify the requirements on `Char` and `p`
+ }
+
+ template <class Char, size_t N = NumOffsets - 1, EnableIf<N != 0> = 0>
+ void PoisonPadding(const Char* p) const {
+ static_assert(N < NumOffsets, "Index out of bounds");
+ (void)p;
+#ifdef ADDRESS_SANITIZER
+ PoisonPadding<Char, N - 1>(p);
+ // The `if` is an optimization. It doesn't affect the observable behaviour.
+ if (ElementAlignment<N - 1>::value % ElementAlignment<N>::value) {
+ size_t start =
+ Offset<N - 1>() + SizeOf<ElementType<N - 1>>() * size_[N - 1];
+ ASAN_POISON_MEMORY_REGION(p + start, Offset<N>() - start);
+ }
+#endif
+ }
+
+ // Human-readable description of the memory layout. Useful for debugging.
+ // Slow.
+ //
+ // // char[5], 3 bytes of padding, int[3], 4 bytes of padding, followed
+ // // by an unknown number of doubles.
+ // auto x = Layout<char, int, double>::Partial(5, 3);
+ // assert(x.DebugString() ==
+ // "@0<char>(1)[5]; @8<int>(4)[3]; @24<double>(8)");
+ //
+ // Each field is in the following format: @offset<type>(sizeof)[size] (<type>
+ // may be missing depending on the target platform). For example,
+ // @8<int>(4)[3] means that at offset 8 we have an array of ints, where each
+ // int is 4 bytes, and we have 3 of those ints. The size of the last field may
+ // be missing (as in the example above). Only fields with known offsets are
+ // described. Type names may differ across platforms: one compiler might
+ // produce "unsigned*" where another produces "unsigned int *".
+ std::string DebugString() const {
+ const auto offsets = Offsets();
+ const size_t sizes[] = {SizeOf<ElementType<OffsetSeq>>()...};
+ const std::string types[] = {
+ adl_barrier::TypeName<ElementType<OffsetSeq>>()...};
+ std::string res = fmt::format("@0{}({})", types[0], sizes[0]);
+ for (size_t i = 0; i != NumOffsets - 1; ++i) {
+ res += fmt::format("[{}]; @({})", size_[i], offsets[i + 1], types[i + 1], sizes[i + 1]);
+ }
+ // NumSizes is a constant that may be zero. Some compilers cannot see that
+ // inside the if statement "size_[NumSizes - 1]" must be valid.
+ int last = static_cast<int>(NumSizes) - 1;
+ if (NumTypes == NumSizes && last >= 0) {
+ res += fmt::format("[{}]", size_[last]);
+ }
+ return res;
+ }
+
+ private:
+ // Arguments of `Layout::Partial()` or `Layout::Layout()`.
+ size_t size_[NumSizes > 0 ? NumSizes : 1];
+};
+
+template <size_t NumSizes, class... Ts>
+using LayoutType = LayoutImpl<
+ std::tuple<Ts...>, std::make_index_sequence<NumSizes>,
+ std::make_index_sequence<adl_barrier::Min(sizeof...(Ts), NumSizes + 1)>>;
+
+} // namespace internal_layout
+
+// Descriptor of arrays of various types and sizes laid out in memory one after
+// another. See the top of the file for documentation.
+//
+// Check out the public API of internal_layout::LayoutImpl above. The type is
+// internal to the library but its methods are public, and they are inherited
+// by `Layout`.
+template <class... Ts>
+class Layout : public internal_layout::LayoutType<sizeof...(Ts), Ts...> {
+ public:
+ static_assert(sizeof...(Ts) > 0, "At least one field is required");
+ static_assert(
+ std::conjunction_v<internal_layout::IsLegalElementType<Ts>...>,
+ "Invalid element type (see IsLegalElementType)");
+
+ // The result type of `Partial()` with `NumSizes` arguments.
+ template <size_t NumSizes>
+ using PartialType = internal_layout::LayoutType<NumSizes, Ts...>;
+
+ // `Layout` knows the element types of the arrays we want to lay out in
+ // memory but not the number of elements in each array.
+ // `Partial(size1, ..., sizeN)` allows us to specify the latter. The
+ // resulting immutable object can be used to obtain pointers to the
+ // individual arrays.
+ //
+ // It's allowed to pass fewer array sizes than the number of arrays. E.g.,
+ // if all you need is to the offset of the second array, you only need to
+ // pass one argument -- the number of elements in the first array.
+ //
+ // // int[3] followed by 4 bytes of padding and an unknown number of
+ // // doubles.
+ // auto x = Layout<int, double>::Partial(3);
+ // // doubles start at byte 16.
+ // assert(x.Offset<1>() == 16);
+ //
+ // If you know the number of elements in all arrays, you can still call
+ // `Partial()` but it's more convenient to use the constructor of `Layout`.
+ //
+ // Layout<int, double> x(3, 5);
+ //
+ // Note: The sizes of the arrays must be specified in number of elements,
+ // not in bytes.
+ //
+ // Requires: `sizeof...(Sizes) <= sizeof...(Ts)`.
+ // Requires: all arguments are convertible to `size_t`.
+ template <class... Sizes>
+ static constexpr PartialType<sizeof...(Sizes)> Partial(Sizes&&... sizes) {
+ static_assert(sizeof...(Sizes) <= sizeof...(Ts));
+ return PartialType<sizeof...(Sizes)>(std::forward<Sizes>(sizes)...);
+ }
+
+ // Creates a layout with the sizes of all arrays specified. If you know
+ // only the sizes of the first N arrays (where N can be zero), you can use
+ // `Partial()` defined above. The constructor is essentially equivalent to
+ // calling `Partial()` and passing in all array sizes; the constructor is
+ // provided as a convenient abbreviation.
+ //
+ // Note: The sizes of the arrays must be specified in number of elements,
+ // not in bytes.
+ constexpr explicit Layout(internal_layout::TypeToSize<Ts>... sizes)
+ : internal_layout::LayoutType<sizeof...(Ts), Ts...>(sizes...) {}
+};
+
+} // namespace container_internal
+} // namespace absl
+
+#endif // ABSL_CONTAINER_INTERNAL_LAYOUT_H_
diff --git a/src/crimson/common/log.cc b/src/crimson/common/log.cc
new file mode 100644
index 000000000..cae9f6a7b
--- /dev/null
+++ b/src/crimson/common/log.cc
@@ -0,0 +1,21 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "log.h"
+
+static std::array<seastar::logger, ceph_subsys_get_num()> loggers{
+#define SUBSYS(name, log_level, gather_level) \
+ seastar::logger(#name),
+#define DEFAULT_SUBSYS(log_level, gather_level) \
+ seastar::logger("none"),
+ #include "common/subsys.h"
+#undef SUBSYS
+#undef DEFAULT_SUBSYS
+};
+
+namespace crimson {
+seastar::logger& get_logger(int subsys) {
+ assert(subsys < ceph_subsys_max);
+ return loggers[subsys];
+}
+}
diff --git a/src/crimson/common/log.h b/src/crimson/common/log.h
new file mode 100644
index 000000000..635349098
--- /dev/null
+++ b/src/crimson/common/log.h
@@ -0,0 +1,24 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <seastar/util/log.hh>
+#include "common/subsys_types.h"
+
+namespace crimson {
+seastar::logger& get_logger(int subsys);
+static inline seastar::log_level to_log_level(int level) {
+ if (level < 0) {
+ return seastar::log_level::error;
+ } else if (level < 1) {
+ return seastar::log_level::warn;
+ } else if (level < 5) {
+ return seastar::log_level::info;
+ } else if (level <= 20) {
+ return seastar::log_level::debug;
+ } else {
+ return seastar::log_level::trace;
+ }
+}
+}
diff --git a/src/crimson/common/perf_counters_collection.cc b/src/crimson/common/perf_counters_collection.cc
new file mode 100644
index 000000000..af80dbcc2
--- /dev/null
+++ b/src/crimson/common/perf_counters_collection.cc
@@ -0,0 +1,25 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "perf_counters_collection.h"
+
+namespace crimson::common {
+PerfCountersCollection::PerfCountersCollection()
+{
+ perf_collection = std::make_unique<PerfCountersCollectionImpl>();
+}
+PerfCountersCollection::~PerfCountersCollection()
+{
+ perf_collection->clear();
+}
+
+PerfCountersCollectionImpl* PerfCountersCollection:: get_perf_collection()
+{
+ return perf_collection.get();
+}
+
+PerfCountersCollection::ShardedPerfCountersCollection PerfCountersCollection::sharded_perf_coll;
+
+}
+
+
diff --git a/src/crimson/common/perf_counters_collection.h b/src/crimson/common/perf_counters_collection.h
new file mode 100644
index 000000000..a19630247
--- /dev/null
+++ b/src/crimson/common/perf_counters_collection.h
@@ -0,0 +1,37 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "common/perf_counters.h"
+#include <seastar/core/sharded.hh>
+
+using crimson::common::PerfCountersCollectionImpl;
+namespace crimson::common {
+class PerfCountersCollection: public seastar::sharded<PerfCountersCollection>
+{
+ using ShardedPerfCountersCollection = seastar::sharded<PerfCountersCollection>;
+
+private:
+ std::unique_ptr<PerfCountersCollectionImpl> perf_collection;
+ static ShardedPerfCountersCollection sharded_perf_coll;
+ friend PerfCountersCollection& local_perf_coll();
+ friend ShardedPerfCountersCollection& sharded_perf_coll();
+
+public:
+ PerfCountersCollection();
+ ~PerfCountersCollection();
+ PerfCountersCollectionImpl* get_perf_collection();
+
+};
+
+inline PerfCountersCollection::ShardedPerfCountersCollection& sharded_perf_coll(){
+ return PerfCountersCollection::sharded_perf_coll;
+}
+
+inline PerfCountersCollection& local_perf_coll() {
+ return PerfCountersCollection::sharded_perf_coll.local();
+}
+
+}
+
diff --git a/src/crimson/common/shared_lru.h b/src/crimson/common/shared_lru.h
new file mode 100644
index 000000000..4c1da401e
--- /dev/null
+++ b/src/crimson/common/shared_lru.h
@@ -0,0 +1,178 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <boost/smart_ptr/local_shared_ptr.hpp>
+#include <boost/smart_ptr/weak_ptr.hpp>
+#include "simple_lru.h"
+
+/// SharedLRU does its best to cache objects. It not only tracks the objects
+/// in its LRU cache with strong references, it also tracks objects with
+/// weak_ptr even if the cache does not hold any strong references to them. so
+/// that it can return the objects after they are evicted, as long as they've
+/// ever been cached and have not been destroyed yet.
+template<class K, class V>
+class SharedLRU {
+ using shared_ptr_t = boost::local_shared_ptr<V>;
+ using weak_ptr_t = boost::weak_ptr<V>;
+ using value_type = std::pair<K, shared_ptr_t>;
+
+ // weak_refs is already ordered, and we don't use accessors like
+ // LRUCache::lower_bound(), so unordered LRUCache would suffice.
+ SimpleLRU<K, shared_ptr_t, false> cache;
+ std::map<K, std::pair<weak_ptr_t, V*>> weak_refs;
+
+ struct Deleter {
+ SharedLRU<K,V>* cache;
+ const K key;
+ void operator()(V* ptr) {
+ cache->_erase_weak(key);
+ delete ptr;
+ }
+ };
+ void _erase_weak(const K& key) {
+ weak_refs.erase(key);
+ }
+public:
+ SharedLRU(size_t max_size = 20)
+ : cache{max_size}
+ {}
+ ~SharedLRU() {
+ cache.clear();
+ // use plain assert() in utiliy classes to avoid dependencies on logging
+ assert(weak_refs.empty());
+ }
+ /**
+ * Returns a reference to the given key, and perform an insertion if such
+ * key does not already exist
+ */
+ shared_ptr_t operator[](const K& key);
+ /**
+ * Returns true iff there are no live references left to anything that has been
+ * in the cache.
+ */
+ bool empty() const {
+ return weak_refs.empty();
+ }
+ size_t size() const {
+ return cache.size();
+ }
+ size_t capacity() const {
+ return cache.capacity();
+ }
+ /***
+ * Inserts a key if not present, or bumps it to the front of the LRU if
+ * it is, and then gives you a reference to the value. If the key already
+ * existed, you are responsible for deleting the new value you tried to
+ * insert.
+ *
+ * @param key The key to insert
+ * @param value The value that goes with the key
+ * @param existed Set to true if the value was already in the
+ * map, false otherwise
+ * @return A reference to the map's value for the given key
+ */
+ shared_ptr_t insert(const K& key, std::unique_ptr<V> value);
+ // clear all strong reference from the lru.
+ void clear() {
+ cache.clear();
+ }
+ shared_ptr_t find(const K& key);
+ // return the last element that is not greater than key
+ shared_ptr_t lower_bound(const K& key);
+ // return the first element that is greater than key
+ std::optional<value_type> upper_bound(const K& key);
+
+ void erase(const K& key) {
+ cache.erase(key);
+ _erase_weak(key);
+ }
+};
+
+template<class K, class V>
+typename SharedLRU<K,V>::shared_ptr_t
+SharedLRU<K,V>::insert(const K& key, std::unique_ptr<V> value)
+{
+ shared_ptr_t val;
+ if (auto found = weak_refs.find(key); found != weak_refs.end()) {
+ val = found->second.first.lock();
+ }
+ if (!val) {
+ val.reset(value.release(), Deleter{this, key});
+ weak_refs.emplace(key, std::make_pair(val, val.get()));
+ }
+ cache.insert(key, val);
+ return val;
+}
+
+template<class K, class V>
+typename SharedLRU<K,V>::shared_ptr_t
+SharedLRU<K,V>::operator[](const K& key)
+{
+ if (auto found = cache.find(key); found) {
+ return *found;
+ }
+ shared_ptr_t val;
+ if (auto found = weak_refs.find(key); found != weak_refs.end()) {
+ val = found->second.first.lock();
+ }
+ if (!val) {
+ val.reset(new V{}, Deleter{this, key});
+ weak_refs.emplace(key, std::make_pair(val, val.get()));
+ }
+ cache.insert(key, val);
+ return val;
+}
+
+template<class K, class V>
+typename SharedLRU<K,V>::shared_ptr_t
+SharedLRU<K,V>::find(const K& key)
+{
+ if (auto found = cache.find(key); found) {
+ return *found;
+ }
+ shared_ptr_t val;
+ if (auto found = weak_refs.find(key); found != weak_refs.end()) {
+ val = found->second.first.lock();
+ }
+ if (val) {
+ cache.insert(key, val);
+ }
+ return val;
+}
+
+template<class K, class V>
+typename SharedLRU<K,V>::shared_ptr_t
+SharedLRU<K,V>::lower_bound(const K& key)
+{
+ if (weak_refs.empty()) {
+ return {};
+ }
+ auto found = weak_refs.lower_bound(key);
+ if (found == weak_refs.end()) {
+ --found;
+ }
+ if (auto val = found->second.first.lock(); val) {
+ cache.insert(key, val);
+ return val;
+ } else {
+ return {};
+ }
+}
+
+template<class K, class V>
+std::optional<typename SharedLRU<K,V>::value_type>
+SharedLRU<K,V>::upper_bound(const K& key)
+{
+ for (auto found = weak_refs.upper_bound(key);
+ found != weak_refs.end();
+ ++found) {
+ if (auto val = found->second.first.lock(); val) {
+ return std::make_pair(found->first, val);
+ }
+ }
+ return std::nullopt;
+}
diff --git a/src/crimson/common/simple_lru.h b/src/crimson/common/simple_lru.h
new file mode 100644
index 000000000..1419c4885
--- /dev/null
+++ b/src/crimson/common/simple_lru.h
@@ -0,0 +1,141 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <list>
+#include <map>
+#include <optional>
+#include <type_traits>
+#include <unordered_map>
+
+template <class Key, class Value, bool Ordered>
+class SimpleLRU {
+ static_assert(std::is_default_constructible_v<Value>);
+ using list_type = std::list<Key>;
+ template<class K, class V>
+ using map_t = std::conditional_t<Ordered,
+ std::map<K, V>,
+ std::unordered_map<K, V>>;
+ using map_type = map_t<Key, std::pair<Value, typename list_type::iterator>>;
+ list_type lru;
+ map_type cache;
+ const size_t max_size;
+
+public:
+ SimpleLRU(size_t size = 20)
+ : cache(size),
+ max_size(size)
+ {}
+ size_t size() const {
+ return cache.size();
+ }
+ size_t capacity() const {
+ return max_size;
+ }
+ using insert_return_type = std::pair<Value, bool>;
+ insert_return_type insert(const Key& key, Value value);
+ std::optional<Value> find(const Key& key);
+ std::optional<std::enable_if<Ordered, Value>> lower_bound(const Key& key);
+ void erase(const Key& key);
+ void clear();
+private:
+ // bump the item to the front of the lru list
+ Value _lru_add(typename map_type::iterator found);
+ // evict the last element of most recently used list
+ void _evict();
+};
+
+template <class Key, class Value, bool Ordered>
+typename SimpleLRU<Key,Value,Ordered>::insert_return_type
+SimpleLRU<Key,Value,Ordered>::insert(const Key& key, Value value)
+{
+ if constexpr(Ordered) {
+ auto found = cache.lower_bound(key);
+ if (found != cache.end() && found->first == key) {
+ // already exists
+ return {found->second.first, true};
+ } else {
+ if (size() >= capacity()) {
+ _evict();
+ }
+ lru.push_front(key);
+ // use lower_bound as hint to save the lookup
+ cache.emplace_hint(found, key, std::make_pair(value, lru.begin()));
+ return {std::move(value), false};
+ }
+ } else {
+ // cache is not ordered
+ auto found = cache.find(key);
+ if (found != cache.end()) {
+ // already exists
+ return {found->second.first, true};
+ } else {
+ if (size() >= capacity()) {
+ _evict();
+ }
+ lru.push_front(key);
+ cache.emplace(key, std::make_pair(value, lru.begin()));
+ return {std::move(value), false};
+ }
+ }
+}
+
+template <class Key, class Value, bool Ordered>
+std::optional<Value> SimpleLRU<Key,Value,Ordered>::find(const Key& key)
+{
+ if (auto found = cache.find(key); found != cache.end()){
+ return _lru_add(found);
+ } else {
+ return {};
+ }
+}
+
+template <class Key, class Value, bool Ordered>
+std::optional<std::enable_if<Ordered, Value>>
+SimpleLRU<Key,Value,Ordered>::lower_bound(const Key& key)
+{
+ if (auto found = cache.lower_bound(key); found != cache.end()) {
+ return _lru_add(found);
+ } else {
+ return {};
+ }
+}
+
+template <class Key, class Value, bool Ordered>
+void SimpleLRU<Key,Value,Ordered>::clear()
+{
+ lru.clear();
+ cache.clear();
+}
+
+template <class Key, class Value, bool Ordered>
+void SimpleLRU<Key,Value,Ordered>::erase(const Key& key)
+{
+ if (auto found = cache.find(key); found != cache.end()) {
+ lru.erase(found->second.second);
+ cache.erase(found);
+ }
+}
+
+template <class Key, class Value, bool Ordered>
+Value SimpleLRU<Key,Value,Ordered>::_lru_add(
+ typename SimpleLRU<Key,Value,Ordered>::map_type::iterator found)
+{
+ auto& [value, in_lru] = found->second;
+ if (in_lru != lru.begin()){
+ // move item to the front
+ lru.splice(lru.begin(), lru, in_lru);
+ }
+ // the item is already at the front
+ return value;
+}
+
+template <class Key, class Value, bool Ordered>
+void SimpleLRU<Key,Value,Ordered>::_evict()
+{
+ // evict the last element of most recently used list
+ auto last = --lru.end();
+ cache.erase(*last);
+ lru.erase(last);
+}
diff --git a/src/crimson/common/throttle.cc b/src/crimson/common/throttle.cc
new file mode 100644
index 000000000..bd9195181
--- /dev/null
+++ b/src/crimson/common/throttle.cc
@@ -0,0 +1,59 @@
+#include "throttle.h"
+
+namespace crimson::common {
+
+int64_t Throttle::take(int64_t c)
+{
+ if (!max) {
+ return 0;
+ }
+ count += c;
+ return count;
+}
+
+int64_t Throttle::put(int64_t c)
+{
+ if (!max) {
+ return 0;
+ }
+ if (!c) {
+ return count;
+ }
+ on_free_slots.signal();
+ count -= c;
+ return count;
+}
+
+seastar::future<> Throttle::get(size_t c)
+{
+ if (!max) {
+ return seastar::make_ready_future<>();
+ }
+ return on_free_slots.wait([this, c] {
+ return !_should_wait(c);
+ }).then([this, c] {
+ count += c;
+ return seastar::make_ready_future<>();
+ });
+}
+
+void Throttle::reset_max(size_t m) {
+ if (max == m) {
+ return;
+ }
+
+ if (m > max) {
+ on_free_slots.signal();
+ }
+ max = m;
+}
+
+bool Throttle::_should_wait(size_t c) const {
+ if (!max) {
+ return false;
+ }
+ return ((c <= max && count + c > max) || // normally stay under max
+ (c >= max && count > max)); // except for large c
+}
+
+} // namespace crimson::common
diff --git a/src/crimson/common/throttle.h b/src/crimson/common/throttle.h
new file mode 100644
index 000000000..fea471c8d
--- /dev/null
+++ b/src/crimson/common/throttle.h
@@ -0,0 +1,39 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <seastar/core/condition-variable.hh>
+// pull seastar::timer<...>::timer definitions. FIX SEASTAR or reactor.hh
+// is obligatory and should be included everywhere?
+#include <seastar/core/reactor.hh>
+
+#include "common/ThrottleInterface.h"
+
+namespace crimson::common {
+
+class Throttle final : public ThrottleInterface {
+ size_t max = 0;
+ size_t count = 0;
+ // we cannot change the "count" of seastar::semaphore after it is created,
+ // so use condition_variable instead.
+ seastar::condition_variable on_free_slots;
+public:
+ explicit Throttle(size_t m)
+ : max(m)
+ {}
+ int64_t take(int64_t c = 1) override;
+ int64_t put(int64_t c = 1) override;
+ seastar::future<> get(size_t c);
+ size_t get_current() const {
+ return count;
+ }
+ size_t get_max() const {
+ return max;
+ }
+ void reset_max(size_t m);
+private:
+ bool _should_wait(size_t c) const;
+};
+
+} // namespace crimson::common
diff --git a/src/crimson/common/tri_mutex.cc b/src/crimson/common/tri_mutex.cc
new file mode 100644
index 000000000..c18aff1a0
--- /dev/null
+++ b/src/crimson/common/tri_mutex.cc
@@ -0,0 +1,225 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "tri_mutex.h"
+
+seastar::future<> read_lock::lock()
+{
+ return static_cast<tri_mutex*>(this)->lock_for_read();
+}
+
+void read_lock::unlock()
+{
+ static_cast<tri_mutex*>(this)->unlock_for_read();
+}
+
+seastar::future<> write_lock::lock()
+{
+ return static_cast<tri_mutex*>(this)->lock_for_write(false);
+}
+
+void write_lock::unlock()
+{
+ static_cast<tri_mutex*>(this)->unlock_for_write();
+}
+
+seastar::future<> excl_lock::lock()
+{
+ return static_cast<tri_mutex*>(this)->lock_for_excl();
+}
+
+void excl_lock::unlock()
+{
+ static_cast<tri_mutex*>(this)->unlock_for_excl();
+}
+
+seastar::future<> excl_lock_from_read::lock()
+{
+ static_cast<tri_mutex*>(this)->promote_from_read();
+ return seastar::make_ready_future<>();
+}
+
+void excl_lock_from_read::unlock()
+{
+ static_cast<tri_mutex*>(this)->demote_to_read();
+}
+
+seastar::future<> excl_lock_from_write::lock()
+{
+ static_cast<tri_mutex*>(this)->promote_from_write();
+ return seastar::make_ready_future<>();
+}
+
+void excl_lock_from_write::unlock()
+{
+ static_cast<tri_mutex*>(this)->demote_to_write();
+}
+
+seastar::future<> excl_lock_from_excl::lock()
+{
+ return seastar::make_ready_future<>();
+}
+
+void excl_lock_from_excl::unlock()
+{
+}
+
+tri_mutex::~tri_mutex()
+{
+ assert(!is_acquired());
+}
+
+seastar::future<> tri_mutex::lock_for_read()
+{
+ if (try_lock_for_read()) {
+ return seastar::make_ready_future<>();
+ }
+ waiters.emplace_back(seastar::promise<>(), type_t::read);
+ return waiters.back().pr.get_future();
+}
+
+bool tri_mutex::try_lock_for_read() noexcept
+{
+ if (!writers && !exclusively_used && waiters.empty()) {
+ ++readers;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void tri_mutex::unlock_for_read()
+{
+ assert(readers > 0);
+ if (--readers == 0) {
+ wake();
+ }
+}
+
+void tri_mutex::promote_from_read()
+{
+ assert(readers == 1);
+ --readers;
+ exclusively_used = true;
+}
+
+void tri_mutex::demote_to_read()
+{
+ assert(exclusively_used);
+ exclusively_used = false;
+ ++readers;
+}
+
+seastar::future<> tri_mutex::lock_for_write(bool greedy)
+{
+ if (try_lock_for_write(greedy)) {
+ return seastar::make_ready_future<>();
+ }
+ waiters.emplace_back(seastar::promise<>(), type_t::write);
+ return waiters.back().pr.get_future();
+}
+
+bool tri_mutex::try_lock_for_write(bool greedy) noexcept
+{
+ if (!readers && !exclusively_used) {
+ if (greedy || waiters.empty()) {
+ ++writers;
+ return true;
+ }
+ }
+ return false;
+}
+
+void tri_mutex::unlock_for_write()
+{
+ assert(writers > 0);
+ if (--writers == 0) {
+ wake();
+ }
+}
+
+void tri_mutex::promote_from_write()
+{
+ assert(writers == 1);
+ --writers;
+ exclusively_used = true;
+}
+
+void tri_mutex::demote_to_write()
+{
+ assert(exclusively_used);
+ exclusively_used = false;
+ ++writers;
+}
+
+// for exclusive users
+seastar::future<> tri_mutex::lock_for_excl()
+{
+ if (try_lock_for_excl()) {
+ return seastar::make_ready_future<>();
+ }
+ waiters.emplace_back(seastar::promise<>(), type_t::exclusive);
+ return waiters.back().pr.get_future();
+}
+
+bool tri_mutex::try_lock_for_excl() noexcept
+{
+ if (!readers && !writers && !exclusively_used) {
+ exclusively_used = true;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void tri_mutex::unlock_for_excl()
+{
+ assert(exclusively_used);
+ exclusively_used = false;
+ wake();
+}
+
+bool tri_mutex::is_acquired() const
+{
+ if (readers) {
+ return true;
+ } else if (writers) {
+ return true;
+ } else if (exclusively_used) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void tri_mutex::wake()
+{
+ assert(!readers && !writers && !exclusively_used);
+ type_t type = type_t::none;
+ while (!waiters.empty()) {
+ auto& waiter = waiters.front();
+ if (type == type_t::exclusive) {
+ break;
+ } if (type == type_t::none) {
+ type = waiter.type;
+ } else if (type != waiter.type) {
+ // to be woken in the next batch
+ break;
+ }
+ switch (type) {
+ case type_t::read:
+ ++readers;
+ break;
+ case type_t::write:
+ ++writers;
+ break;
+ case type_t::exclusive:
+ exclusively_used = true;
+ break;
+ default:
+ assert(0);
+ }
+ waiter.pr.set_value();
+ waiters.pop_front();
+ }
+}
diff --git a/src/crimson/common/tri_mutex.h b/src/crimson/common/tri_mutex.h
new file mode 100644
index 000000000..127573b3a
--- /dev/null
+++ b/src/crimson/common/tri_mutex.h
@@ -0,0 +1,156 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <seastar/core/future.hh>
+#include <seastar/core/circular_buffer.hh>
+
+class read_lock {
+public:
+ seastar::future<> lock();
+ void unlock();
+};
+
+class write_lock {
+public:
+ seastar::future<> lock();
+ void unlock();
+};
+
+class excl_lock {
+public:
+ seastar::future<> lock();
+ void unlock();
+};
+
+// promote from read to excl
+class excl_lock_from_read {
+public:
+ seastar::future<> lock();
+ void unlock();
+};
+
+// promote from write to excl
+class excl_lock_from_write {
+public:
+ seastar::future<> lock();
+ void unlock();
+};
+
+// promote from excl to excl
+class excl_lock_from_excl {
+public:
+ seastar::future<> lock();
+ void unlock();
+};
+
+/// shared/exclusive mutual exclusion
+///
+/// this lock design uses reader and writer is entirely and completely
+/// independent of the conventional reader/writer lock usage. Here, what we
+/// mean is that we can pipeline reads, and we can pipeline writes, but we
+/// cannot allow a read while writes are in progress or a write while reads are
+/// in progress. Any rmw operation is therefore exclusive.
+///
+/// tri_mutex is based on seastar::shared_mutex, but instead of two kinds of
+/// waiters, tri_mutex keeps track of three kinds of lock users:
+/// - readers
+/// - writers
+/// - exclusive users
+class tri_mutex : private read_lock,
+ write_lock,
+ excl_lock,
+ excl_lock_from_read,
+ excl_lock_from_write,
+ excl_lock_from_excl
+{
+public:
+ tri_mutex() = default;
+ ~tri_mutex();
+
+ read_lock& for_read() {
+ return *this;
+ }
+ write_lock& for_write() {
+ return *this;
+ }
+ excl_lock& for_excl() {
+ return *this;
+ }
+ excl_lock_from_read& excl_from_read() {
+ return *this;
+ }
+ excl_lock_from_write& excl_from_write() {
+ return *this;
+ }
+ excl_lock_from_write& excl_from_excl() {
+ return *this;
+ }
+
+ // for shared readers
+ seastar::future<> lock_for_read();
+ bool try_lock_for_read() noexcept;
+ void unlock_for_read();
+ void promote_from_read();
+ void demote_to_read();
+ unsigned get_readers() const {
+ return readers;
+ }
+
+ // for shared writers
+ seastar::future<> lock_for_write(bool greedy);
+ bool try_lock_for_write(bool greedy) noexcept;
+ void unlock_for_write();
+ void promote_from_write();
+ void demote_to_write();
+ unsigned get_writers() const {
+ return writers;
+ }
+
+ // for exclusive users
+ seastar::future<> lock_for_excl();
+ bool try_lock_for_excl() noexcept;
+ void unlock_for_excl();
+ bool is_excl_acquired() const {
+ return exclusively_used;
+ }
+
+ bool is_acquired() const;
+
+ /// pass the provided exception to any waiting waiters
+ template<typename Exception>
+ void abort(Exception ex) {
+ while (!waiters.empty()) {
+ auto& waiter = waiters.front();
+ waiter.pr.set_exception(std::make_exception_ptr(ex));
+ waiters.pop_front();
+ }
+ }
+
+private:
+ void wake();
+ unsigned readers = 0;
+ unsigned writers = 0;
+ bool exclusively_used = false;
+ enum class type_t : uint8_t {
+ read,
+ write,
+ exclusive,
+ none,
+ };
+ struct waiter_t {
+ waiter_t(seastar::promise<>&& pr, type_t type)
+ : pr(std::move(pr)), type(type)
+ {}
+ seastar::promise<> pr;
+ type_t type;
+ };
+ seastar::circular_buffer<waiter_t> waiters;
+ friend class read_lock;
+ friend class write_lock;
+ friend class excl_lock;
+ friend class excl_lock_from_read;
+ friend class excl_lock_from_write;
+ friend class excl_lock_from_excl;
+};
diff --git a/src/crimson/common/type_helpers.h b/src/crimson/common/type_helpers.h
new file mode 100644
index 000000000..4c606581f
--- /dev/null
+++ b/src/crimson/common/type_helpers.h
@@ -0,0 +1,8 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "boost/intrusive_ptr.hpp"
+
+template<typename T> using Ref = boost::intrusive_ptr<T>;