diff options
Diffstat (limited to '')
24 files changed, 4139 insertions, 0 deletions
diff --git a/src/crimson/common/assert.cc b/src/crimson/common/assert.cc new file mode 100644 index 000000000..07610c33f --- /dev/null +++ b/src/crimson/common/assert.cc @@ -0,0 +1,81 @@ +#include <cstdarg> +#include <iostream> + +#include <seastar/util/backtrace.hh> +#include <seastar/core/reactor.hh> + +#include "include/ceph_assert.h" + +#include "crimson/common/log.h" + +namespace ceph { + [[gnu::cold]] void __ceph_assert_fail(const ceph::assert_data &ctx) + { + __ceph_assert_fail(ctx.assertion, ctx.file, ctx.line, ctx.function); + } + + [[gnu::cold]] void __ceph_assert_fail(const char* assertion, + const char* file, int line, + const char* func) + { + seastar::logger& logger = crimson::get_logger(0); + logger.error("{}:{} : In function '{}', ceph_assert(%s)\n" + "{}", + file, line, func, assertion, + seastar::current_backtrace()); + std::cout << std::flush; + abort(); + } + [[gnu::cold]] void __ceph_assertf_fail(const char *assertion, + const char *file, int line, + const char *func, const char* msg, + ...) + { + char buf[8096]; + va_list args; + va_start(args, msg); + std::vsnprintf(buf, sizeof(buf), msg, args); + va_end(args); + + seastar::logger& logger = crimson::get_logger(0); + logger.error("{}:{} : In function '{}', ceph_assert(%s)\n" + "{}\n{}\n", + file, line, func, assertion, + buf, + seastar::current_backtrace()); + std::cout << std::flush; + abort(); + } + + [[gnu::cold]] void __ceph_abort(const char* file, int line, + const char* func, const std::string& msg) + { + seastar::logger& logger = crimson::get_logger(0); + logger.error("{}:{} : In function '{}', abort(%s)\n" + "{}", + file, line, func, msg, + seastar::current_backtrace()); + std::cout << std::flush; + abort(); + } + + [[gnu::cold]] void __ceph_abortf(const char* file, int line, + const char* func, const char* fmt, + ...) + { + char buf[8096]; + va_list args; + va_start(args, fmt); + std::vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + seastar::logger& logger = crimson::get_logger(0); + logger.error("{}:{} : In function '{}', abort()\n" + "{}\n{}\n", + file, line, func, + buf, + seastar::current_backtrace()); + std::cout << std::flush; + abort(); + } +} diff --git a/src/crimson/common/auth_handler.h b/src/crimson/common/auth_handler.h new file mode 100644 index 000000000..d4140b6a2 --- /dev/null +++ b/src/crimson/common/auth_handler.h @@ -0,0 +1,17 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +class EntityName; +class AuthCapsInfo; + +namespace crimson::common { +class AuthHandler { +public: + // the peer just got authorized + virtual void handle_authentication(const EntityName& name, + const AuthCapsInfo& caps) = 0; + virtual ~AuthHandler() = default; +}; +} diff --git a/src/crimson/common/buffer_io.cc b/src/crimson/common/buffer_io.cc new file mode 100644 index 000000000..86edf7a6f --- /dev/null +++ b/src/crimson/common/buffer_io.cc @@ -0,0 +1,57 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "buffer_io.h" + +#include <seastar/core/reactor.hh> +#include <seastar/core/fstream.hh> +#include <seastar/core/do_with.hh> + +#include "include/buffer.h" + +namespace crimson { + +seastar::future<> write_file(ceph::buffer::list&& bl, + seastar::sstring fn, + seastar::file_permissions permissions) +{ + const auto flags = (seastar::open_flags::wo | + seastar::open_flags::create | + seastar::open_flags::truncate); + seastar::file_open_options foo; + foo.create_permissions = permissions; + return seastar::open_file_dma(fn, flags, foo).then( + [bl=std::move(bl)](seastar::file f) { + return seastar::make_file_output_stream(f).then( + [bl=std::move(bl), f=std::move(f)](seastar::output_stream<char> out) { + return seastar::do_with(std::move(out), + std::move(f), + std::move(bl), + [](seastar::output_stream<char>& out, + seastar::file& f, + ceph::buffer::list& bl) { + return seastar::do_for_each(bl.buffers(), [&out](auto& buf) { + return out.write(buf.c_str(), buf.length()); + }).then([&out] { + return out.close(); + }); + }); + }); + }); +} + +seastar::future<seastar::temporary_buffer<char>> +read_file(const seastar::sstring fn) +{ + return seastar::open_file_dma(fn, seastar::open_flags::ro).then( + [] (seastar::file f) { + return f.size().then([f = std::move(f)](size_t s) { + return seastar::do_with(seastar::make_file_input_stream(f), + [s](seastar::input_stream<char>& in) { + return in.read_exactly(s); + }); + }); + }); +} + +} diff --git a/src/crimson/common/buffer_io.h b/src/crimson/common/buffer_io.h new file mode 100644 index 000000000..c5ece4a6f --- /dev/null +++ b/src/crimson/common/buffer_io.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <seastar/core/future.hh> +#include <seastar/core/file-types.hh> + +#include "include/buffer_fwd.h" + +namespace crimson { + seastar::future<> write_file(ceph::buffer::list&& bl, + seastar::sstring fn, + seastar::file_permissions= // 0644 + (seastar::file_permissions::user_read | + seastar::file_permissions::user_write | + seastar::file_permissions::group_read | + seastar::file_permissions::others_read)); + seastar::future<seastar::temporary_buffer<char>> + read_file(const seastar::sstring fn); +} diff --git a/src/crimson/common/config_proxy.cc b/src/crimson/common/config_proxy.cc new file mode 100644 index 000000000..88d4679d5 --- /dev/null +++ b/src/crimson/common/config_proxy.cc @@ -0,0 +1,93 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "config_proxy.h" + +#include <filesystem> + +#include "crimson/common/buffer_io.h" + +namespace crimson::common { + +ConfigProxy::ConfigProxy(const EntityName& name, std::string_view cluster) +{ + if (seastar::this_shard_id() != 0) { + return; + } + // set the initial value on CPU#0 + values.reset(seastar::make_lw_shared<ConfigValues>()); + values.get()->name = name; + values.get()->cluster = cluster; + // and the only copy of md_config_impl<> is allocated on CPU#0 + local_config.reset(new md_config_t{*values, obs_mgr, true}); + if (name.is_mds()) { + local_config->set_val_default(*values, obs_mgr, + "keyring", "$mds_data/keyring"); + } else if (name.is_osd()) { + local_config->set_val_default(*values, obs_mgr, + "keyring", "$osd_data/keyring"); + } +} + +seastar::future<> ConfigProxy::start() +{ + // populate values and config to all other shards + if (!values) { + return seastar::make_ready_future<>(); + } + return container().invoke_on_others([this](auto& proxy) { + return values.copy().then([config=local_config.get(), + &proxy](auto foreign_values) { + proxy.values.reset(); + proxy.values = std::move(foreign_values); + proxy.remote_config = config; + return seastar::make_ready_future<>(); + }); + }); +} + +void ConfigProxy::show_config(ceph::Formatter* f) const { + get_config().show_config(*values, f); +} + +seastar::future<> ConfigProxy::parse_config_files(const std::string& conf_files) +{ + auto conffile_paths = + get_config().get_conffile_paths(*values, + conf_files.empty() ? nullptr : conf_files.c_str(), + &std::cerr, + CODE_ENVIRONMENT_DAEMON); + return seastar::do_with(std::move(conffile_paths), [this] (auto& paths) { + return seastar::repeat([path=paths.begin(), e=paths.end(), this]() mutable { + if (path == e) { + // tried all conffile, none of them works + return seastar::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::yes); + } + return crimson::read_file(*path++).then([this](auto&& buf) { + return do_change([buf=std::move(buf), this](ConfigValues& values) { + if (get_config().parse_buffer(values, obs_mgr, + buf.get(), buf.size(), + &std::cerr) == 0) { + get_config().update_legacy_vals(values); + } else { + throw std::invalid_argument("parse error"); + } + }).then([] { + // this one works! + return seastar::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::yes); + }); + }).handle_exception_type([] (const std::filesystem::filesystem_error&) { + return seastar::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::no); + }).handle_exception_type([] (const std::invalid_argument&) { + return seastar::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::no); + }); + }); + }); +} + +ConfigProxy::ShardedConfig ConfigProxy::sharded_conf; +} diff --git a/src/crimson/common/config_proxy.h b/src/crimson/common/config_proxy.h new file mode 100644 index 000000000..f50a63431 --- /dev/null +++ b/src/crimson/common/config_proxy.h @@ -0,0 +1,200 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <seastar/core/reactor.hh> +#include <seastar/core/sharded.hh> +#include "common/config.h" +#include "common/config_obs.h" +#include "common/config_obs_mgr.h" +#include "common/errno.h" + +namespace ceph { +class Formatter; +} + +namespace crimson::common { + +// a facade for managing config. each shard has its own copy of ConfigProxy. +// +// In seastar-osd, there could be multiple instances of @c ConfigValues in a +// single process, as we are using a variant of read-copy-update mechinary to +// update the settings at runtime. +class ConfigProxy : public seastar::peering_sharded_service<ConfigProxy> +{ + using LocalConfigValues = seastar::lw_shared_ptr<ConfigValues>; + seastar::foreign_ptr<LocalConfigValues> values; + + md_config_t* remote_config = nullptr; + std::unique_ptr<md_config_t> local_config; + + using ConfigObserver = ceph::md_config_obs_impl<ConfigProxy>; + ObserverMgr<ConfigObserver> obs_mgr; + + const md_config_t& get_config() const { + return remote_config ? *remote_config : * local_config; + } + md_config_t& get_config() { + return remote_config ? *remote_config : * local_config; + } + + // apply changes to all shards + // @param func a functor which accepts @c "ConfigValues&" + template<typename Func> + seastar::future<> do_change(Func&& func) { + return container().invoke_on(values.get_owner_shard(), + [func = std::move(func)](ConfigProxy& owner) { + // apply the changes to a copy of the values + auto new_values = seastar::make_lw_shared(*owner.values); + new_values->changed.clear(); + func(*new_values); + + // always apply the new settings synchronously on the owner shard, to + // avoid racings with other do_change() calls in parallel. + ObserverMgr<ConfigObserver>::rev_obs_map rev_obs; + owner.values.reset(new_values); + owner.obs_mgr.for_each_change(owner.values->changed, owner, + [&rev_obs](ConfigObserver *obs, + const std::string &key) { + rev_obs[obs].insert(key); + }, nullptr); + for (auto& [obs, keys] : rev_obs) { + obs->handle_conf_change(owner, keys); + } + + return seastar::parallel_for_each(boost::irange(1u, seastar::smp::count), + [&owner, new_values] (auto cpu) { + return owner.container().invoke_on(cpu, + [foreign_values = seastar::make_foreign(new_values)](ConfigProxy& proxy) mutable { + proxy.values.reset(); + proxy.values = std::move(foreign_values); + + ObserverMgr<ConfigObserver>::rev_obs_map rev_obs; + proxy.obs_mgr.for_each_change(proxy.values->changed, proxy, + [&rev_obs](ConfigObserver *obs, const std::string& key) { + rev_obs[obs].insert(key); + }, nullptr); + for (auto& obs_keys : rev_obs) { + obs_keys.first->handle_conf_change(proxy, obs_keys.second); + } + }); + }).finally([new_values] { + new_values->changed.clear(); + }); + }); + } +public: + ConfigProxy(const EntityName& name, std::string_view cluster); + const ConfigValues* operator->() const noexcept { + return values.get(); + } + const ConfigValues get_config_values() { + return *values.get(); + } + ConfigValues* operator->() noexcept { + return values.get(); + } + + // required by sharded<> + seastar::future<> start(); + seastar::future<> stop() { + return seastar::make_ready_future<>(); + } + void add_observer(ConfigObserver* obs) { + obs_mgr.add_observer(obs); + } + void remove_observer(ConfigObserver* obs) { + obs_mgr.remove_observer(obs); + } + seastar::future<> rm_val(const std::string& key) { + return do_change([key, this](ConfigValues& values) { + auto ret = get_config().rm_val(values, key); + if (ret < 0) { + throw std::invalid_argument(cpp_strerror(ret)); + } + }); + } + seastar::future<> set_val(const std::string& key, + const std::string& val) { + return do_change([key, val, this](ConfigValues& values) { + std::stringstream err; + auto ret = get_config().set_val(values, obs_mgr, key, val, &err); + if (ret < 0) { + throw std::invalid_argument(err.str()); + } + }); + } + int get_val(const std::string &key, std::string *val) const { + return get_config().get_val(*values, key, val); + } + template<typename T> + const T get_val(const std::string& key) const { + return get_config().template get_val<T>(*values, key); + } + + int get_all_sections(std::vector<std::string>& sections) const { + return get_config().get_all_sections(sections); + } + + int get_val_from_conf_file(const std::vector<std::string>& sections, + const std::string& key, std::string& out, + bool expand_meta) const { + return get_config().get_val_from_conf_file(*values, sections, key, + out, expand_meta); + } + + unsigned get_osd_pool_default_min_size(uint8_t size) const { + return get_config().get_osd_pool_default_min_size(*values, size); + } + + seastar::future<> + set_mon_vals(const std::map<std::string,std::string,std::less<>>& kv) { + return do_change([kv, this](ConfigValues& values) { + get_config().set_mon_vals(nullptr, values, obs_mgr, kv, nullptr); + }); + } + + seastar::future<> inject_args(const std::string& s) { + return do_change([s, this](ConfigValues& values) { + std::stringstream err; + if (get_config().injectargs(values, obs_mgr, s, &err)) { + throw std::invalid_argument(err.str()); + } + }); + } + void show_config(ceph::Formatter* f) const; + + seastar::future<> parse_argv(std::vector<const char*>& argv) { + // we could pass whatever is unparsed to seastar, but seastar::app_template + // is used for driving the seastar application, and + // crimson::common::ConfigProxy is not available until seastar engine is up + // and running, so we have to feed the command line args to app_template + // first, then pass them to ConfigProxy. + return do_change([&argv, this](ConfigValues& values) { + get_config().parse_argv(values, + obs_mgr, + argv, + CONF_CMDLINE); + }); + } + + seastar::future<> parse_config_files(const std::string& conf_files); + + using ShardedConfig = seastar::sharded<ConfigProxy>; + +private: + static ShardedConfig sharded_conf; + friend ConfigProxy& local_conf(); + friend ShardedConfig& sharded_conf(); +}; + +inline ConfigProxy& local_conf() { + return ConfigProxy::sharded_conf.local(); +} + +inline ConfigProxy::ShardedConfig& sharded_conf() { + return ConfigProxy::sharded_conf; +} + +} diff --git a/src/crimson/common/errorator.h b/src/crimson/common/errorator.h new file mode 100644 index 000000000..af1e6ea45 --- /dev/null +++ b/src/crimson/common/errorator.h @@ -0,0 +1,1140 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <exception> +#include <system_error> + +#include <seastar/core/future-util.hh> + +#include "include/ceph_assert.h" + +namespace crimson { + +template<typename Iterator, typename AsyncAction> +inline auto do_for_each(Iterator begin, Iterator end, AsyncAction action) { + using futurator = \ + ::seastar::futurize<std::invoke_result_t<AsyncAction, decltype(*begin)>>; + + if (begin == end) { + return futurator::type::errorator_type::template make_ready_future<>(); + } + while (true) { + auto f = futurator::invoke(action, *begin); + ++begin; + if (begin == end) { + return f; + } + if (!f.available() || seastar::need_preempt()) { + return std::move(f)._then( + [ action = std::move(action), + begin = std::move(begin), + end = std::move(end) + ] () mutable { + return ::crimson::do_for_each(std::move(begin), + std::move(end), + std::move(action)); + }); + } + if (f.failed()) { + return f; + } + } +} +template<typename Container, typename AsyncAction> +inline auto do_for_each(Container& c, AsyncAction action) { + return ::crimson::do_for_each(std::begin(c), std::end(c), std::move(action)); +} + +template<typename AsyncAction> +inline auto do_until(AsyncAction action) { + using errorator_t = + typename ::seastar::futurize_t<std::invoke_result_t<AsyncAction>>::errorator_type; + + while (true) { + auto f = ::seastar::futurize_invoke(action); + if (f.failed()) { + return errorator_t::template make_exception_future2<>( + f.get_exception() + ); + } else if (f.available()) { + if (auto done = f.get0()) { + return errorator_t::template make_ready_future<>(); + } + } else { + return std::move(f)._then( + [action = std::move(action)] (auto &&done) mutable { + if (done) { + return errorator_t::template make_ready_future<>(); + } + return ::crimson::do_until( + std::move(action)); + }); + } + } +} + +// define the interface between error types and errorator +template <class ConcreteErrorT> +class error_t { + static constexpr const std::type_info& get_exception_ptr_type_info() { + return ConcreteErrorT::exception_ptr_type_info(); + } + + std::exception_ptr to_exception_ptr() const { + const auto* concrete_error = static_cast<const ConcreteErrorT*>(this); + return concrete_error->to_exception_ptr(); + } + + decltype(auto) static from_exception_ptr(std::exception_ptr ep) { + return ConcreteErrorT::from_exception_ptr(std::move(ep)); + } + + template <class... AllowedErrorsT> + friend struct errorator; + + template <class ErrorVisitorT, class FuturatorT> + friend class maybe_handle_error_t; + +public: + template <class Func> + static decltype(auto) handle(Func&& func) { + return ConcreteErrorT::handle(std::forward<Func>(func)); + } +}; + +// unthrowable_wrapper ensures compilation failure when somebody +// would like to `throw make_error<...>)()` instead of returning. +// returning allows for the compile-time verification of future's +// AllowedErrorsV and also avoid the burden of throwing. +template <class ErrorT, ErrorT ErrorV> +struct unthrowable_wrapper : error_t<unthrowable_wrapper<ErrorT, ErrorV>> { + unthrowable_wrapper(const unthrowable_wrapper&) = delete; + [[nodiscard]] static const auto& make() { + static constexpr unthrowable_wrapper instance{}; + return instance; + } + + template<class Func> + static auto handle(Func&& func) { + return [ + func = std::forward<Func>(func) + ] (const unthrowable_wrapper&) mutable -> decltype(auto) { + if constexpr (std::is_invocable_v<Func, ErrorT>) { + return std::invoke(std::forward<Func>(func), ErrorV); + } else { + return std::invoke(std::forward<Func>(func)); + } + }; + } + + struct pass_further { + decltype(auto) operator()(const unthrowable_wrapper& e) { + return e; + } + }; + + struct discard { + decltype(auto) operator()(const unthrowable_wrapper&) { + } + }; + + +private: + // can be used only to initialize the `instance` member + explicit unthrowable_wrapper() = default; + + // implement the errorable interface + struct throwable_carrier{}; + static std::exception_ptr carrier_instance; + + static constexpr const std::type_info& exception_ptr_type_info() { + return typeid(throwable_carrier); + } + auto to_exception_ptr() const { + // error codes don't need to instantiate `std::exception_ptr` each + // time as the code is actually a part of the type itself. + // `std::make_exception_ptr()` on modern enough GCCs is quite cheap + // (see the Gleb Natapov's patch eradicating throw/catch there), + // but using one instance per type boils down the overhead to just + // ref-counting. + return carrier_instance; + } + static const auto& from_exception_ptr(std::exception_ptr) { + return make(); + } + + friend class error_t<unthrowable_wrapper<ErrorT, ErrorV>>; +}; + +template <class ErrorT, ErrorT ErrorV> +std::exception_ptr unthrowable_wrapper<ErrorT, ErrorV>::carrier_instance = \ + std::make_exception_ptr< + unthrowable_wrapper<ErrorT, ErrorV>::throwable_carrier>({}); + + +template <class ErrorT> +struct stateful_error_t : error_t<stateful_error_t<ErrorT>> { + template <class... Args> + explicit stateful_error_t(Args&&... args) + : ep(std::make_exception_ptr<ErrorT>(std::forward<Args>(args)...)) { + } + + template<class Func> + static auto handle(Func&& func) { + static_assert(std::is_invocable_v<Func, ErrorT>); + return [ + func = std::forward<Func>(func) + ] (stateful_error_t<ErrorT>&& e) mutable -> decltype(auto) { + try { + std::rethrow_exception(e.ep); + } catch (const ErrorT& obj) { + return std::invoke(std::forward<Func>(func), obj); + } + ceph_abort_msg("exception type mismatch – impossible!"); + }; + } + +private: + std::exception_ptr ep; + + explicit stateful_error_t(std::exception_ptr ep) : ep(std::move(ep)) {} + + static constexpr const std::type_info& exception_ptr_type_info() { + return typeid(ErrorT); + } + auto to_exception_ptr() const { + return ep; + } + static stateful_error_t<ErrorT> from_exception_ptr(std::exception_ptr ep) { + return stateful_error_t<ErrorT>(std::move(ep)); + } + + friend class error_t<stateful_error_t<ErrorT>>; +}; + +namespace _impl { + template <class T> struct always_false : std::false_type {}; +}; + +template <class ErrorVisitorT, class FuturatorT> +class maybe_handle_error_t { + const std::type_info& type_info; + typename FuturatorT::type result; + ErrorVisitorT errfunc; + +public: + maybe_handle_error_t(ErrorVisitorT&& errfunc, std::exception_ptr ep) + : type_info(*ep.__cxa_exception_type()), + result(FuturatorT::make_exception_future(std::move(ep))), + errfunc(std::forward<ErrorVisitorT>(errfunc)) { + } + + template <class ErrorT> + void handle() { + static_assert(std::is_invocable<ErrorVisitorT, ErrorT>::value, + "provided Error Visitor is not exhaustive"); + // In C++ throwing an exception isn't the sole way to signal + // error with it. This approach nicely fits cold, infrequent cases + // but when applied to a hot one, it will likely hurt performance. + // + // Alternative approach is to create `std::exception_ptr` on our + // own and place it in the future via `make_exception_future()`. + // When it comes to handling, the pointer can be interrogated for + // pointee's type with `__cxa_exception_type()` instead of costly + // re-throwing (via `std::rethrow_exception()`) and matching with + // `catch`. The limitation here is lack of support for hierarchies + // of exceptions. The code below checks for exact match only while + // `catch` would allow to match against a base class as well. + // However, this shouldn't be a big issue for `errorator` as Error + // Visitors are already checked for exhaustiveness at compile-time. + // + // NOTE: `__cxa_exception_type()` is an extension of the language. + // It should be available both in GCC and Clang but a fallback + // (based on `std::rethrow_exception()` and `catch`) can be made + // to handle other platforms if necessary. + if (type_info == ErrorT::error_t::get_exception_ptr_type_info()) { + // set `state::invalid` in internals of `seastar::future` to not + // call `report_failed_future()` during `operator=()`. + [[maybe_unused]] auto&& ep = std::move(result).get_exception(); + + using return_t = std::invoke_result_t<ErrorVisitorT, ErrorT>; + if constexpr (std::is_assignable_v<decltype(result), return_t>) { + result = std::invoke(std::forward<ErrorVisitorT>(errfunc), + ErrorT::error_t::from_exception_ptr(std::move(ep))); + } else if constexpr (std::is_same_v<return_t, void>) { + // void denotes explicit discarding + // execute for the sake a side effects. Typically this boils down + // to throwing an exception by the handler. + std::invoke(std::forward<ErrorVisitorT>(errfunc), + ErrorT::error_t::from_exception_ptr(std::move(ep))); + } else { + static_assert(_impl::always_false<return_t>::value, + "return of Error Visitor is not assignable to future"); + // do nothing with `ep`. + } + } + } + + auto get_result() && { + return std::move(result); + } +}; + +template <class FuncHead, class... FuncTail> +static constexpr auto composer(FuncHead&& head, FuncTail&&... tail) { + return [ + head = std::forward<FuncHead>(head), + // perfect forwarding in lambda's closure isn't available in C++17 + // using tuple as workaround; see: https://stackoverflow.com/a/49902823 + tail = std::make_tuple(std::forward<FuncTail>(tail)...) + ] (auto&&... args) mutable -> decltype(auto) { + if constexpr (std::is_invocable_v<FuncHead, decltype(args)...>) { + return std::invoke(std::forward<FuncHead>(head), + std::forward<decltype(args)>(args)...); + } else if constexpr (sizeof...(FuncTail) > 0) { + using next_composer_t = decltype(composer<FuncTail...>); + auto&& next = std::apply<next_composer_t>(composer<FuncTail...>, + std::move(tail)); + return std::invoke(std::move(next), + std::forward<decltype(args)>(args)...); + } else { + static_assert( + std::is_invocable_v<FuncHead, decltype(args)...> || + (sizeof...(FuncTail) > 0), + "composition is not exhaustive"); + } + }; +} + +template <class ValueT> +struct errorated_future_marker{}; + +template <class... AllowedErrors> +struct errorator { + template <class T> + static inline constexpr bool is_error_v = std::is_base_of_v<error_t<T>, T>; + + static_assert((... && is_error_v<AllowedErrors>), + "errorator expects presence of ::is_error in all error types"); + + template <class ErrorT> + struct contains_once { + static constexpr bool value = + (0 + ... + std::is_same_v<ErrorT, AllowedErrors>) == 1; + }; + template <class... Errors> + struct contains_once<errorator<Errors...>> { + static constexpr bool value = (... && contains_once<Errors>::value); + }; + template <class T> + static constexpr bool contains_once_v = contains_once<T>::value; + + static_assert((... && contains_once_v<AllowedErrors>), + "no error type in errorator can be duplicated"); + + struct ready_future_marker{}; + struct exception_future_marker{}; + +private: + // see the comment for `using future = _future` below. + template <class> + class _future {}; + template <class ValueT> + class _future<::crimson::errorated_future_marker<ValueT>> + : private seastar::future<ValueT> { + using base_t = seastar::future<ValueT>; + // we need the friendship for the sake of `get_exception() &&` when + // `safe_then()` is going to return an errorated future as a result of + // chaining. In contrast to `seastar::future`, errorator<T...>::future` + // has this member private. + template <class ErrorVisitor, class Futurator> + friend class maybe_handle_error_t; + + // any `seastar::futurize` specialization must be able to access the base. + // see : `satisfy_with_result_of()` far below. + template <typename> + friend class seastar::futurize; + + template <typename T1, typename T2, typename... More> + friend auto seastar::internal::do_with_impl(T1&& rv1, T2&& rv2, More&&... more); + + template <class, class = std::void_t<>> + struct get_errorator { + // generic template for non-errorated things (plain types and + // vanilla seastar::future as well). + using type = errorator<>; + }; + template <class FutureT> + struct get_errorator<FutureT, + std::void_t<typename FutureT::errorator_type>> { + using type = typename FutureT::errorator_type; + }; + template <class T> + using get_errorator_t = typename get_errorator<T>::type; + + template <class ValueFuncErroratorT, class... ErrorVisitorRetsT> + struct make_errorator { + // NOP. The generic template. + }; + template <class... ValueFuncAllowedErrors, + class ErrorVisitorRetsHeadT, + class... ErrorVisitorRetsTailT> + struct make_errorator<errorator<ValueFuncAllowedErrors...>, + ErrorVisitorRetsHeadT, + ErrorVisitorRetsTailT...> { + private: + using step_errorator = errorator<ValueFuncAllowedErrors...>; + // add ErrorVisitorRetsHeadT only if 1) it's an error type and + // 2) isn't already included in the errorator's error set. + // It's enough to negate contains_once_v as any errorator<...> + // type is already guaranteed to be free of duplications. + using next_errorator = std::conditional_t< + is_error_v<ErrorVisitorRetsHeadT> && + !step_errorator::template contains_once_v<ErrorVisitorRetsHeadT>, + typename step_errorator::template extend<ErrorVisitorRetsHeadT>, + step_errorator>; + + public: + using type = typename make_errorator<next_errorator, + ErrorVisitorRetsTailT...>::type; + }; + // finish the recursion + template <class... ValueFuncAllowedErrors> + struct make_errorator<errorator<ValueFuncAllowedErrors...>> { + using type = ::crimson::errorator<ValueFuncAllowedErrors...>; + }; + template <class... Args> + using make_errorator_t = typename make_errorator<Args...>::type; + + using base_t::base_t; + + template <class Futurator, class Future, class ErrorVisitor> + [[gnu::noinline]] + static auto _safe_then_handle_errors(Future&& future, + ErrorVisitor&& errfunc) { + maybe_handle_error_t<ErrorVisitor, Futurator> maybe_handle_error( + std::forward<ErrorVisitor>(errfunc), + std::move(future).get_exception() + ); + (maybe_handle_error.template handle<AllowedErrors>() , ...); + return std::move(maybe_handle_error).get_result(); + } + + public: + using errorator_type = ::crimson::errorator<AllowedErrors...>; + using promise_type = seastar::promise<ValueT>; + + using base_t::available; + using base_t::failed; + // need this because of the legacy in PG::do_osd_ops(). + using base_t::handle_exception_type; + + [[gnu::always_inline]] + _future(base_t&& base) + : base_t(std::move(base)) { + } + + template <class... A> + [[gnu::always_inline]] + _future(ready_future_marker, A&&... a) + : base_t(::seastar::make_ready_future<ValueT>(std::forward<A>(a)...)) { + } + [[gnu::always_inline]] + _future(exception_future_marker, ::seastar::future_state_base&& state) noexcept + : base_t(::seastar::futurize<base_t>::make_exception_future(std::move(state))) { + } + [[gnu::always_inline]] + _future(exception_future_marker, std::exception_ptr&& ep) noexcept + : base_t(::seastar::futurize<base_t>::make_exception_future(std::move(ep))) { + } + + template <template <class...> class ErroratedFuture, + class = std::void_t< + typename ErroratedFuture< + ::crimson::errorated_future_marker<ValueT>>::errorator_type>> + operator ErroratedFuture<errorated_future_marker<ValueT>> () && { + using dest_errorator_t = \ + typename ErroratedFuture< + ::crimson::errorated_future_marker<ValueT>>::errorator_type; + static_assert(dest_errorator_t::template contains_once_v<errorator_type>, + "conversion is possible to more-or-eq errorated future!"); + return static_cast<base_t&&>(*this); + } + + // initialize future as failed without throwing. `make_exception_future()` + // internally uses `std::make_exception_ptr()`. cppreference.com shouldn't + // be misinterpreted when it says: + // + // "This is done as if executing the following code: + // try { + // throw e; + // } catch(...) { + // return std::current_exception(); + // }", + // + // the "as if" is absolutely crucial because modern GCCs employ optimized + // path for it. See: + // * https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=cce8e59224e18858749a2324bce583bcfd160d6c, + // * https://gcc.gnu.org/ml/gcc-patches/2016-08/msg00373.html. + // + // This behavior, combined with `__cxa_exception_type()` for inspecting + // exception's type, allows for throw/catch-free handling of stateless + // exceptions (which is fine for error codes). Stateful jumbos would be + // actually a bit harder as `_M_get()` is private, and thus rethrowing is + // necessary to get to the state inside. However, it's not unthinkable to + // see another extension bringing operator*() to the exception pointer... + // + // TODO: we don't really need to `make_exception_ptr` each time. It still + // allocates memory underneath while can be replaced with single instance + // per type created on start-up. + template <class ErrorT, + class DecayedT = std::decay_t<ErrorT>, + bool IsError = is_error_v<DecayedT>, + class = std::enable_if_t<IsError>> + _future(ErrorT&& e) + : base_t( + seastar::make_exception_future<ValueT>( + errorator_type::make_exception_ptr(e))) { + static_assert(errorator_type::contains_once_v<DecayedT>, + "ErrorT is not enlisted in errorator"); + } + + template <class ValueFuncT, class ErrorVisitorT> + auto safe_then(ValueFuncT&& valfunc, ErrorVisitorT&& errfunc) { + static_assert((... && std::is_invocable_v<ErrorVisitorT, + AllowedErrors>), + "provided Error Visitor is not exhaustive"); + + using value_func_result_t = + typename std::conditional_t<std::is_void_v<ValueT>, + std::invoke_result<ValueFuncT>, + std::invoke_result<ValueFuncT, ValueT>>::type; + // recognize whether there can be any error coming from the Value + // Function. + using value_func_errorator_t = get_errorator_t<value_func_result_t>; + // mutate the Value Function's errorator to harvest errors coming + // from the Error Visitor. Yes, it's perfectly fine to fail error + // handling at one step and delegate even broader set of issues + // to next continuation. + using return_errorator_t = make_errorator_t< + value_func_errorator_t, + std::decay_t<std::invoke_result_t<ErrorVisitorT, AllowedErrors>>...>; + // OK, now we know about all errors next continuation must take + // care about. If Visitor handled everything and the Value Func + // doesn't return any, we'll finish with errorator<>::future + // which is just vanilla seastar::future – that's it, next cont + // finally could use `.then()`! + using futurator_t = \ + typename return_errorator_t::template futurize<value_func_result_t>; + // `seastar::futurize`, used internally by `then_wrapped()`, would + // wrap any non-`seastar::future` type coming from Value Func into + // `seastar::future`. As we really don't want to end with things + // like `seastar::future<errorator::future<...>>`, we need either: + // * convert the errorated future into plain in the lambda below + // and back here or + // * specialize the `seastar::futurize<T>` to get proper kind of + // future directly from `::then_wrapped()`. + // As C++17 doesn't guarantee copy elision when non-same types are + // involved while examination of assemblies from GCC 8.1 confirmed + // extra copying, switch to the second approach has been made. + return this->then_wrapped( + [ valfunc = std::forward<ValueFuncT>(valfunc), + errfunc = std::forward<ErrorVisitorT>(errfunc) + ] (auto&& future) mutable noexcept { + if (__builtin_expect(future.failed(), false)) { + return _safe_then_handle_errors<futurator_t>( + std::move(future), std::forward<ErrorVisitorT>(errfunc)); + } else { + // NOTE: using `seastar::future::get()` here is a bit bloaty + // as the method rechecks availability of future's value and, + // if it's unavailable, does the `::do_wait()` path (yes, it + // targets `seastar::thread`). Actually this is dead code as + // `then_wrapped()` executes the lambda only when the future + // is available (which means: failed or ready). However, GCC + // hasn't optimized it out: + // + // if (__builtin_expect(future.failed(), false)) { + // ea25: 48 83 bd c8 fe ff ff cmpq $0x2,-0x138(%rbp) + // ea2c: 02 + // ea2d: 0f 87 f0 05 00 00 ja f023 <ceph::osd:: + // ... + // /// If get() is called in a \ref seastar::thread context, + // /// then it need not be available; instead, the thread will + // /// be paused until the future becomes available. + // [[gnu::always_inline]] + // std::tuple<T...> get() { + // if (!_state.available()) { + // ea3a: 0f 85 1b 05 00 00 jne ef5b <ceph::osd:: + // } + // ... + // + // I don't perceive this as huge issue. Though, it cannot be + // claimed errorator has 0 overhead on hot path. The perfect + // solution here would be mark the `::get_available_state()` + // as `protected` and use dedicated `get_value()` exactly as + // `::then()` already does. + return futurator_t::invoke(std::forward<ValueFuncT>(valfunc), + std::move(future).get()); + } + }); + } + + /** + * unsafe_thread_get + * + * Only valid within a seastar_thread. Ignores errorator protections + * and throws any contained exceptions. + * + * Should really only be used within test code + * (see test/crimson/gtest_seastar.h). + */ + auto &&unsafe_get() { + return seastar::future<ValueT>::get(); + } + auto unsafe_get0() { + return seastar::future<ValueT>::get0(); + } + + template <class FuncT> + _future finally(FuncT &&func) { + return this->then_wrapped( + [func = std::forward<FuncT>(func)](auto &&result) mutable noexcept { + if constexpr (seastar::is_future<std::invoke_result_t<FuncT>>::value) { + return ::seastar::futurize_invoke(std::forward<FuncT>(func)).then_wrapped( + [result = std::move(result)](auto&& f_res) mutable { + // TODO: f_res.failed() + (void)f_res.discard_result(); + return std::move(result); + }); + } else { + try { + func(); + } catch (...) { + // TODO: rethrow + } + return std::move(result); + } + }); + } + + // taking ErrorFuncOne and ErrorFuncTwo separately from ErrorFuncTail + // to avoid SFINAE + template <class ValueFunc, + class ErrorFuncHead, + class... ErrorFuncTail> + auto safe_then(ValueFunc&& value_func, + ErrorFuncHead&& error_func_head, + ErrorFuncTail&&... error_func_tail) { + static_assert(sizeof...(ErrorFuncTail) > 0); + return safe_then( + std::forward<ValueFunc>(value_func), + composer(std::forward<ErrorFuncHead>(error_func_head), + std::forward<ErrorFuncTail>(error_func_tail)...)); + } + + template <class ValueFunc> + auto safe_then(ValueFunc&& value_func) { + return safe_then(std::forward<ValueFunc>(value_func), + errorator_type::pass_further{}); + } + + template <class Func> + void then(Func&&) = delete; + + template <class ErrorVisitorT> + auto handle_error(ErrorVisitorT&& errfunc) { + static_assert((... && std::is_invocable_v<ErrorVisitorT, + AllowedErrors>), + "provided Error Visitor is not exhaustive"); + using return_errorator_t = make_errorator_t< + errorator<>, + std::decay_t<std::invoke_result_t<ErrorVisitorT, AllowedErrors>>...>; + using futurator_t = \ + typename return_errorator_t::template futurize<::seastar::future<ValueT>>; + return this->then_wrapped( + [ errfunc = std::forward<ErrorVisitorT>(errfunc) + ] (auto&& future) mutable noexcept { + if (__builtin_expect(future.failed(), false)) { + return _safe_then_handle_errors<futurator_t>( + std::move(future), std::forward<ErrorVisitorT>(errfunc)); + } else { + return typename futurator_t::type{ std::move(future) }; + } + }); + } + template <class ErrorFuncHead, + class... ErrorFuncTail> + auto handle_error(ErrorFuncHead&& error_func_head, + ErrorFuncTail&&... error_func_tail) { + static_assert(sizeof...(ErrorFuncTail) > 0); + return this->handle_error( + composer(std::forward<ErrorFuncHead>(error_func_head), + std::forward<ErrorFuncTail>(error_func_tail)...)); + } + + private: + // for ::crimson::do_for_each + template <class Func> + auto _then(Func&& func) { + return base_t::then(std::forward<Func>(func)); + } + template<typename Iterator, typename AsyncAction> + friend inline auto ::crimson::do_for_each(Iterator begin, + Iterator end, + AsyncAction action); + + template<typename AsyncAction> + friend inline auto ::crimson::do_until(AsyncAction action); + + template <typename Result> + friend class ::seastar::future; + + // let seastar::do_with_impl to up-cast us to seastar::future. + template<typename T, typename F> + friend inline auto ::seastar::internal::do_with_impl(T&& rvalue, F&& f); + template<typename T1, typename T2, typename T3_or_F, typename... More> + friend inline auto ::seastar::internal::do_with_impl(T1&& rv1, T2&& rv2, T3_or_F&& rv3, More&&... more); + }; + + class Enabler {}; + + template <typename T> + using EnableIf = typename std::enable_if<contains_once_v<std::decay_t<T>>, Enabler>::type; + + template <typename ErrorFunc> + struct all_same_way_t { + ErrorFunc func; + all_same_way_t(ErrorFunc &&error_func) + : func(std::forward<ErrorFunc>(error_func)) {} + + template <typename ErrorT, EnableIf<ErrorT>...> + decltype(auto) operator()(ErrorT&& e) { + using decayed_t = std::decay_t<decltype(e)>; + auto&& handler = + decayed_t::error_t::handle(std::forward<ErrorFunc>(func)); + static_assert(std::is_invocable_v<decltype(handler), ErrorT>); + return std::invoke(std::move(handler), std::forward<ErrorT>(e)); + } + }; + +public: + // HACK: `errorated_future_marker` and `_future` is just a hack to + // specialize `seastar::futurize` for category of class templates: + // `future<...>` from distinct errorators. Such tricks are usually + // performed basing on SFINAE and `std::void_t` to check existence + // of a trait/member (`future<...>::errorator_type` in our case). + // Unfortunately, this technique can't be applied as the `futurize` + // lacks the optional parameter. The problem looks awfully similar + // to following SO item: https://stackoverflow.com/a/38860413. + template <class ValueT=void> + using future = _future<::crimson::errorated_future_marker<ValueT>>; + + // the visitor that forwards handling of all errors to next continuation + struct pass_further { + template <class ErrorT, EnableIf<ErrorT>...> + decltype(auto) operator()(ErrorT&& e) { + static_assert(contains_once_v<std::decay_t<ErrorT>>, + "passing further disallowed ErrorT"); + return std::forward<ErrorT>(e); + } + }; + + struct discard_all { + template <class ErrorT, EnableIf<ErrorT>...> + void operator()(ErrorT&&) { + static_assert(contains_once_v<std::decay_t<ErrorT>>, + "discarding disallowed ErrorT"); + } + }; + + // assert_all{ "TODO" }; + class assert_all { + const char* const msg = nullptr; + public: + template <std::size_t N> + assert_all(const char (&msg)[N]) + : msg(msg) { + } + assert_all() = default; + + template <class ErrorT, EnableIf<ErrorT>...> + void operator()(ErrorT&&) { + static_assert(contains_once_v<std::decay_t<ErrorT>>, + "discarding disallowed ErrorT"); + if (msg) { + ceph_abort_msg(msg); + } else { + ceph_abort(); + } + } + }; + + template <class ErrorFunc> + static decltype(auto) all_same_way(ErrorFunc&& error_func) { + return all_same_way_t<ErrorFunc>{std::forward<ErrorFunc>(error_func)}; + }; + + // get a new errorator by extending current one with new error + template <class... NewAllowedErrorsT> + using extend = errorator<AllowedErrors..., NewAllowedErrorsT...>; + + // get a new errorator by summing and deduplicating error set of + // the errorator `unify<>` is applied on with another errorator + // provided as template parameter. + template <class OtherErroratorT> + struct unify { + // 1st: generic NOP template + }; + template <class OtherAllowedErrorsHead, + class... OtherAllowedErrorsTail> + struct unify<errorator<OtherAllowedErrorsHead, + OtherAllowedErrorsTail...>> { + private: + // 2nd: specialization for errorators with non-empty error set. + // + // split error set of other errorator, passed as template param, + // into head and tail. Mix error set of this errorator with head + // of the other one only if it isn't already present in the set. + using step_errorator = std::conditional_t< + contains_once_v<OtherAllowedErrorsHead> == false, + errorator<AllowedErrors..., OtherAllowedErrorsHead>, + errorator<AllowedErrors...>>; + using rest_errorator = errorator<OtherAllowedErrorsTail...>; + + public: + using type = typename step_errorator::template unify<rest_errorator>::type; + }; + template <class... EmptyPack> + struct unify<errorator<EmptyPack...>> { + // 3rd: recursion finisher + static_assert(sizeof...(EmptyPack) == 0); + using type = errorator<AllowedErrors...>; + }; + + template <typename T=void, typename... A> + static future<T> make_ready_future(A&&... value) { + return future<T>(ready_future_marker(), std::forward<A>(value)...); + } + + template <typename T=void> + static + future<T> make_exception_future2(std::exception_ptr&& ex) noexcept { + return future<T>(exception_future_marker(), std::move(ex)); + } + template <typename T=void> + static + future<T> make_exception_future2(seastar::future_state_base&& state) noexcept { + return future<T>(exception_future_marker(), std::move(state)); + } + template <typename T=void, typename Exception> + static + future<T> make_exception_future2(Exception&& ex) noexcept { + return make_exception_future2<T>(std::make_exception_ptr(std::forward<Exception>(ex))); + } + + static auto now() { + return make_ready_future<>(); + } + +private: + template <class T, class = std::void_t<T>> + class futurize { + using vanilla_futurize = seastar::futurize<T>; + + // explicit specializations for nested type is not allowed unless both + // the member template and the enclosing template are specialized. see + // section temp.expl.spec, N4659 + template <class Stored, int Dummy = 0> + struct stored_to_future { + using type = future<Stored>; + }; + template <int Dummy> + struct stored_to_future <seastar::internal::monostate, Dummy> { + using type = future<>; + }; + + public: + using type = + typename stored_to_future<typename vanilla_futurize::value_type>::type; + + template <class Func, class... Args> + static type invoke(Func&& func, Args&&... args) { + try { + return vanilla_futurize::invoke(std::forward<Func>(func), + std::forward<Args>(args)...); + } catch (...) { + return make_exception_future(std::current_exception()); + } + } + + template <class Func> + static type invoke(Func&& func, seastar::internal::monostate) { + try { + return vanilla_futurize::invoke(std::forward<Func>(func)); + } catch (...) { + return make_exception_future(std::current_exception()); + } + } + + template <typename Arg> + static type make_exception_future(Arg&& arg) { + return vanilla_futurize::make_exception_future(std::forward<Arg>(arg)); + } + }; + template <template <class...> class ErroratedFutureT, + class ValueT> + class futurize<ErroratedFutureT<::crimson::errorated_future_marker<ValueT>>, + std::void_t< + typename ErroratedFutureT< + ::crimson::errorated_future_marker<ValueT>>::errorator_type>> { + public: + using type = ::crimson::errorator<AllowedErrors...>::future<ValueT>; + + template <class Func, class... Args> + static type apply(Func&& func, std::tuple<Args...>&& args) { + try { + return ::seastar::futurize_apply(std::forward<Func>(func), + std::forward<std::tuple<Args...>>(args)); + } catch (...) { + return make_exception_future(std::current_exception()); + } + } + + template <class Func, class... Args> + static type invoke(Func&& func, Args&&... args) { + try { + return ::seastar::futurize_invoke(std::forward<Func>(func), + std::forward<Args>(args)...); + } catch (...) { + return make_exception_future(std::current_exception()); + } + } + + template <class Func> + static type invoke(Func&& func, seastar::internal::monostate) { + try { + return ::seastar::futurize_invoke(std::forward<Func>(func)); + } catch (...) { + return make_exception_future(std::current_exception()); + } + } + + template <typename Arg> + static type make_exception_future(Arg&& arg) { + return ::crimson::errorator<AllowedErrors...>::make_exception_future2<ValueT>(std::forward<Arg>(arg)); + } + }; + + template <class ErrorT> + static std::exception_ptr make_exception_ptr(ErrorT&& e) { + // calling via interface class due to encapsulation and friend relations. + return e.error_t<std::decay_t<ErrorT>>::to_exception_ptr(); + } + + // needed because of: + // * return_errorator_t::template futurize<...> in `safe_then()`, + // * conversion to `std::exception_ptr` in `future::future(ErrorT&&)`. + // the friendship with all errorators is an idea from Kefu to fix build + // issues on GCC 9. This version likely fixes some access violation bug + // we were exploiting before. + template <class...> + friend class errorator; +}; // class errorator, generic template + +// no errors? errorator<>::future is plain seastar::future then! +template <> +class errorator<> { +public: + template <class ValueT> + using future = ::seastar::future<ValueT>; + + template <class T> + using futurize = ::seastar::futurize<T>; + + // get a new errorator by extending current one with new error + template <class... NewAllowedErrors> + using extend = errorator<NewAllowedErrors...>; + + // errorator with empty error set never contains any error + template <class T> + static constexpr bool contains_once_v = false; +}; // class errorator, <> specialization + + +template <class ErroratorOne, + class ErroratorTwo, + class... FurtherErrators> +struct compound_errorator { +private: + // generic template. Empty `FurtherErrators` are handled by + // the specialization below. + static_assert(sizeof...(FurtherErrators) > 0); + using step = + typename compound_errorator<ErroratorOne, ErroratorTwo>::type; + +public: + using type = + typename compound_errorator<step, FurtherErrators...>::type; +}; +template <class ErroratorOne, + class ErroratorTwo> +struct compound_errorator<ErroratorOne, ErroratorTwo> { + // specialization for empty `FurtherErrators` arg pack + using type = + typename ErroratorOne::template unify<ErroratorTwo>::type; +}; +template <class... Args> +using compound_errorator_t = typename compound_errorator<Args...>::type; + +// this is conjunction of two nasty features: C++14's variable template +// and inline global variable of C++17. The latter is crucial to ensure +// the variable will get the same address across all translation units. +template <std::errc ErrorV> +inline std::error_code ec = std::make_error_code(ErrorV); + +template <std::errc ErrorV> +using ct_error_code = unthrowable_wrapper<const std::error_code&, ec<ErrorV>>; + +namespace ct_error { + using enoent = ct_error_code<std::errc::no_such_file_or_directory>; + using enodata = ct_error_code<std::errc::no_message_available>; + using invarg = ct_error_code<std::errc::invalid_argument>; + using input_output_error = ct_error_code<std::errc::io_error>; + using object_corrupted = ct_error_code<std::errc::illegal_byte_sequence>; + using permission_denied = ct_error_code<std::errc::permission_denied>; + using operation_not_supported = + ct_error_code<std::errc::operation_not_supported>; + using not_connected = ct_error_code<std::errc::not_connected>; + using timed_out = ct_error_code<std::errc::timed_out>; + using erange = + ct_error_code<std::errc::result_out_of_range>; + using ebadf = + ct_error_code<std::errc::bad_file_descriptor>; + using enospc = + ct_error_code<std::errc::no_space_on_device>; + using value_too_large = ct_error_code<std::errc::value_too_large>; + using eagain = + ct_error_code<std::errc::resource_unavailable_try_again>; + using file_too_large = + ct_error_code<std::errc::file_too_large>; + using address_in_use = ct_error_code<std::errc::address_in_use>; + + struct pass_further_all { + template <class ErrorT> + decltype(auto) operator()(ErrorT&& e) { + return std::forward<ErrorT>(e); + } + }; + + struct discard_all { + template <class ErrorT> + void operator()(ErrorT&&) { + } + }; + + class assert_all { + const char* const msg = nullptr; + public: + template <std::size_t N> + assert_all(const char (&msg)[N]) + : msg(msg) { + } + assert_all() = default; + + template <class ErrorT> + void operator()(ErrorT&&) { + if (msg) { + ceph_abort(msg); + } else { + ceph_abort(); + } + } + }; + + template <class ErrorFunc> + static decltype(auto) all_same_way(ErrorFunc&& error_func) { + return [ + error_func = std::forward<ErrorFunc>(error_func) + ] (auto&& e) mutable -> decltype(auto) { + using decayed_t = std::decay_t<decltype(e)>; + auto&& handler = + decayed_t::error_t::handle(std::forward<ErrorFunc>(error_func)); + return std::invoke(std::move(handler), std::forward<decltype(e)>(e)); + }; + }; +} + +using stateful_errc = stateful_error_t<std::errc>; +using stateful_errint = stateful_error_t<int>; +using stateful_ec = stateful_error_t<std::error_code>; + +} // namespace crimson + + +// open the `seastar` namespace to specialize `futurize`. This is not +// pretty for sure. I just hope it's not worse than e.g. specializing +// `hash` in the `std` namespace. The justification is copy avoidance +// in `future<...>::safe_then()`. See the comments there for details. +namespace seastar { + +// Container is a placeholder for errorator::_future<> template +template <template <class> class Container, + class Value> +struct futurize<Container<::crimson::errorated_future_marker<Value>>> { + using errorator_type = typename Container< + ::crimson::errorated_future_marker<Value>>::errorator_type; + + using type = typename errorator_type::template future<Value>; + using value_type = seastar::internal::future_stored_type_t<Value>; + + template<typename Func, typename... FuncArgs> + [[gnu::always_inline]] + static inline type invoke(Func&& func, FuncArgs&&... args) noexcept { + try { + return func(std::forward<FuncArgs>(args)...); + } catch (...) { + return make_exception_future(std::current_exception()); + } + } + + template <class Func> + [[gnu::always_inline]] + static type invoke(Func&& func, seastar::internal::monostate) noexcept { + try { + return func(); + } catch (...) { + return make_exception_future(std::current_exception()); + } + } + + template <typename Arg> + [[gnu::always_inline]] + static type make_exception_future(Arg&& arg) { + return errorator_type::template make_exception_future2<Value>(std::forward<Arg>(arg)); + } + +private: + template<typename PromiseT, typename Func> + static void satisfy_with_result_of(PromiseT&& pr, Func&& func) { + // this may use the protected variant of `seastar::future::forward_to()` + // because: + // 1. `seastar::future` established a friendship with with all + // specializations of `seastar::futurize`, including this + // one (we're in the `seastar` namespace!) WHILE + // 2. any errorated future declares now the friendship with any + // `seastar::futurize<...>`. + func().forward_to(std::move(pr)); + } + template <typename U> + friend class future; +}; + +template <template <class> class Container, + class Value> +struct continuation_base_from_future<Container<::crimson::errorated_future_marker<Value>>> { + using type = continuation_base<Value>; +}; + +} // namespace seastar diff --git a/src/crimson/common/exception.h b/src/crimson/common/exception.h new file mode 100644 index 000000000..05caf5ebd --- /dev/null +++ b/src/crimson/common/exception.h @@ -0,0 +1,50 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <exception> +#include <seastar/core/future.hh> +#include <seastar/core/future-util.hh> + +#include "crimson/common/log.h" + +namespace crimson::common { + +class system_shutdown_exception final : public std::exception{ +public: + const char* what() const noexcept final { + return "system shutting down"; + } +}; + +class actingset_changed final : public std::exception { +public: + actingset_changed(bool sp) : still_primary(sp) {} + const char* what() const noexcept final { + return "acting set changed"; + } + bool is_primary() const { + return still_primary; + } +private: + const bool still_primary; +}; + +template<typename Func, typename... Args> +inline seastar::future<> handle_system_shutdown(Func&& func, Args&&... args) +{ + return seastar::futurize_invoke(std::forward<Func>(func), + std::forward<Args>(args)...) + .handle_exception([](std::exception_ptr eptr) { + if (*eptr.__cxa_exception_type() == + typeid(crimson::common::system_shutdown_exception)) { + crimson::get_logger(ceph_subsys_osd).debug( + "operation skipped, system shutdown"); + return seastar::now(); + } + std::rethrow_exception(eptr); + }); +} + +} diff --git a/src/crimson/common/fixed_kv_node_layout.h b/src/crimson/common/fixed_kv_node_layout.h new file mode 100644 index 000000000..4c7cc2e76 --- /dev/null +++ b/src/crimson/common/fixed_kv_node_layout.h @@ -0,0 +1,700 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <iostream> + +#include "include/byteorder.h" + +#include "crimson/common/layout.h" + +namespace crimson::common { + +template <typename T, bool is_const> +struct maybe_const_t { +}; +template<typename T> +struct maybe_const_t<T, true> { + using type = const T*; +}; +template<typename T> +struct maybe_const_t<T, false> { + using type = T*; +}; + + +/** + * FixedKVNodeLayout + * + * Reusable implementation of a fixed size block mapping + * K -> V with internal representations KINT and VINT. + * + * Uses absl::container_internal::Layout for the actual memory layout. + * + * The primary interface exposed is centered on the iterator + * and related methods. + * + * Also included are helpers for doing splits and merges as for a btree. + */ +template < + size_t CAPACITY, + typename Meta, + typename MetaInt, + typename K, + typename KINT, + typename V, + typename VINT, + bool VALIDATE_INVARIANTS=true> +class FixedKVNodeLayout { + char *buf = nullptr; + + using L = absl::container_internal::Layout<ceph_le32, MetaInt, KINT, VINT>; + static constexpr L layout{1, 1, CAPACITY, CAPACITY}; + +public: + template <bool is_const> + struct iter_t { + friend class FixedKVNodeLayout; + using parent_t = typename maybe_const_t<FixedKVNodeLayout, is_const>::type; + + parent_t node; + uint16_t offset; + + iter_t( + parent_t parent, + uint16_t offset) : node(parent), offset(offset) {} + + iter_t(const iter_t &) = default; + iter_t(iter_t &&) = default; + iter_t &operator=(const iter_t &) = default; + iter_t &operator=(iter_t &&) = default; + + operator iter_t<!is_const>() const { + static_assert(!is_const); + return iter_t<!is_const>(node, offset); + } + + // Work nicely with for loops without requiring a nested type. + iter_t &operator*() { return *this; } + iter_t *operator->() { return this; } + + iter_t operator++(int) { + auto ret = *this; + ++offset; + return ret; + } + + iter_t &operator++() { + ++offset; + return *this; + } + + uint16_t operator-(const iter_t &rhs) const { + assert(rhs.node == node); + return offset - rhs.offset; + } + + iter_t operator+(uint16_t off) const { + return iter_t( + node, + offset + off); + } + iter_t operator-(uint16_t off) const { + return iter_t( + node, + offset - off); + } + + bool operator==(const iter_t &rhs) const { + assert(node == rhs.node); + return rhs.offset == offset; + } + + bool operator!=(const iter_t &rhs) const { + return !(*this == rhs); + } + + K get_key() const { + return K(node->get_key_ptr()[offset]); + } + + K get_next_key_or_max() const { + auto next = *this + 1; + if (next == node->end()) + return std::numeric_limits<K>::max(); + else + return next->get_key(); + } + + void set_val(V val) const { + static_assert(!is_const); + node->get_val_ptr()[offset] = VINT(val); + } + + V get_val() const { + return V(node->get_val_ptr()[offset]); + }; + + bool contains(K addr) const { + return (get_key() <= addr) && (get_next_key_or_max() > addr); + } + + uint16_t get_offset() const { + return offset; + } + + private: + void set_key(K _lb) const { + static_assert(!is_const); + KINT lb; + lb = _lb; + node->get_key_ptr()[offset] = lb; + } + + typename maybe_const_t<char, is_const>::type get_key_ptr() const { + return reinterpret_cast< + typename maybe_const_t<char, is_const>::type>( + node->get_key_ptr() + offset); + } + + typename maybe_const_t<char, is_const>::type get_val_ptr() const { + return reinterpret_cast< + typename maybe_const_t<char, is_const>::type>( + node->get_val_ptr() + offset); + } + }; + using const_iterator = iter_t<true>; + using iterator = iter_t<false>; + + struct delta_t { + enum class op_t : uint8_t { + INSERT, + REMOVE, + UPDATE, + } op; + KINT key; + VINT val; + + void replay(FixedKVNodeLayout &l) { + switch (op) { + case op_t::INSERT: { + l.insert(l.lower_bound(key), key, val); + break; + } + case op_t::REMOVE: { + auto iter = l.find(key); + assert(iter != l.end()); + l.remove(iter); + break; + } + case op_t::UPDATE: { + auto iter = l.find(key); + assert(iter != l.end()); + l.update(iter, val); + break; + } + default: + assert(0 == "Impossible"); + } + } + + bool operator==(const delta_t &rhs) const { + return op == rhs.op && + key == rhs.key && + val == rhs.val; + } + }; + +public: + class delta_buffer_t { + std::vector<delta_t> buffer; + public: + bool empty() const { + return buffer.empty(); + } + void insert( + const K &key, + const V &val) { + KINT k; + k = key; + buffer.push_back( + delta_t{ + delta_t::op_t::INSERT, + k, + VINT(val) + }); + } + void update( + const K &key, + const V &val) { + KINT k; + k = key; + buffer.push_back( + delta_t{ + delta_t::op_t::UPDATE, + k, + VINT(val) + }); + } + void remove(const K &key) { + KINT k; + k = key; + buffer.push_back( + delta_t{ + delta_t::op_t::REMOVE, + k, + VINT() + }); + } + void replay(FixedKVNodeLayout &node) { + for (auto &i: buffer) { + i.replay(node); + } + } + size_t get_bytes() const { + return buffer.size() * sizeof(delta_t); + } + void copy_out(char *out, size_t len) { + assert(len == get_bytes()); + ::memcpy(out, reinterpret_cast<const void *>(buffer.data()), get_bytes()); + buffer.clear(); + } + void copy_in(const char *out, size_t len) { + assert(empty()); + assert(len % sizeof(delta_t) == 0); + buffer = std::vector( + reinterpret_cast<const delta_t*>(out), + reinterpret_cast<const delta_t*>(out + len)); + } + bool operator==(const delta_buffer_t &rhs) const { + return buffer == rhs.buffer; + } + }; + + void journal_insert( + const_iterator _iter, + const K &key, + const V &val, + delta_buffer_t *recorder) { + auto iter = iterator(this, _iter.offset); + if (recorder) { + recorder->insert( + key, + val); + } + insert(iter, key, val); + } + + void journal_update( + const_iterator _iter, + const V &val, + delta_buffer_t *recorder) { + auto iter = iterator(this, _iter.offset); + if (recorder) { + recorder->update(iter->get_key(), val); + } + update(iter, val); + } + + void journal_replace( + const_iterator _iter, + const K &key, + const V &val, + delta_buffer_t *recorder) { + auto iter = iterator(this, _iter.offset); + if (recorder) { + recorder->remove(iter->get_key()); + recorder->insert(key, val); + } + replace(iter, key, val); + } + + + void journal_remove( + const_iterator _iter, + delta_buffer_t *recorder) { + auto iter = iterator(this, _iter.offset); + if (recorder) { + recorder->remove(iter->get_key()); + } + remove(iter); + } + + + FixedKVNodeLayout(char *buf) : + buf(buf) {} + + virtual ~FixedKVNodeLayout() = default; + + const_iterator begin() const { + return const_iterator( + this, + 0); + } + + const_iterator end() const { + return const_iterator( + this, + get_size()); + } + + iterator begin() { + return iterator( + this, + 0); + } + + iterator end() { + return iterator( + this, + get_size()); + } + + const_iterator iter_idx(uint16_t off) const { + return const_iterator( + this, + off); + } + + const_iterator find(K l) const { + auto ret = begin(); + for (; ret != end(); ++ret) { + if (ret->get_key() == l) + break; + } + return ret; + } + iterator find(K l) { + const auto &tref = *this; + return iterator(this, tref.find(l).offset); + } + + const_iterator lower_bound(K l) const { + auto ret = begin(); + for (; ret != end(); ++ret) { + if (ret->get_key() >= l) + break; + } + return ret; + } + iterator lower_bound(K l) { + const auto &tref = *this; + return iterator(this, tref.lower_bound(l).offset); + } + + const_iterator upper_bound(K l) const { + auto ret = begin(); + for (; ret != end(); ++ret) { + if (ret->get_key() > l) + break; + } + return ret; + } + iterator upper_bound(K l) { + const auto &tref = *this; + return iterator(this, tref.upper_bound(l).offset); + } + + const_iterator get_split_pivot() const { + return iter_idx(get_size() / 2); + } + + uint16_t get_size() const { + return *layout.template Pointer<0>(buf); + } + + /** + * set_size + * + * Set size representation to match size + */ + void set_size(uint16_t size) { + *layout.template Pointer<0>(buf) = size; + } + + /** + * get_meta/set_meta + * + * Enables stashing a templated type within the layout. + * Cannot be modified after initial write as it is not represented + * in delta_t + */ + Meta get_meta() const { + MetaInt &metaint = *layout.template Pointer<1>(buf); + return Meta(metaint); + } + void set_meta(const Meta &meta) { + *layout.template Pointer<1>(buf) = MetaInt(meta); + } + + constexpr static size_t get_capacity() { + return CAPACITY; + } + + bool operator==(const FixedKVNodeLayout &rhs) const { + if (get_size() != rhs.get_size()) { + return false; + } + + auto iter = begin(); + auto iter2 = rhs.begin(); + while (iter != end()) { + if (iter->get_key() != iter2->get_key() || + iter->get_val() != iter2->get_val()) { + return false; + } + iter++; + iter2++; + } + return true; + } + + /** + * split_into + * + * Takes *this and splits its contents into left and right. + */ + K split_into( + FixedKVNodeLayout &left, + FixedKVNodeLayout &right) const { + auto piviter = get_split_pivot(); + + left.copy_from_foreign(left.begin(), begin(), piviter); + left.set_size(piviter - begin()); + + right.copy_from_foreign(right.begin(), piviter, end()); + right.set_size(end() - piviter); + + auto [lmeta, rmeta] = get_meta().split_into(piviter->get_key()); + left.set_meta(lmeta); + right.set_meta(rmeta); + + return piviter->get_key(); + } + + /** + * merge_from + * + * Takes two nodes and copies their contents into *this. + * + * precondition: left.size() + right.size() < CAPACITY + */ + void merge_from( + const FixedKVNodeLayout &left, + const FixedKVNodeLayout &right) + { + copy_from_foreign( + end(), + left.begin(), + left.end()); + set_size(left.get_size()); + copy_from_foreign( + end(), + right.begin(), + right.end()); + set_size(left.get_size() + right.get_size()); + set_meta(Meta::merge_from(left.get_meta(), right.get_meta())); + } + + /** + * balance_into_new_nodes + * + * Takes the contents of left and right and copies them into + * replacement_left and replacement_right such that in the + * event that the number of elements is odd the extra goes to + * the left side iff prefer_left. + */ + static K balance_into_new_nodes( + const FixedKVNodeLayout &left, + const FixedKVNodeLayout &right, + bool prefer_left, + FixedKVNodeLayout &replacement_left, + FixedKVNodeLayout &replacement_right) + { + auto total = left.get_size() + right.get_size(); + auto pivot_idx = (left.get_size() + right.get_size()) / 2; + if (total % 2 && prefer_left) { + pivot_idx++; + } + auto replacement_pivot = pivot_idx >= left.get_size() ? + right.iter_idx(pivot_idx - left.get_size())->get_key() : + left.iter_idx(pivot_idx)->get_key(); + + if (pivot_idx < left.get_size()) { + replacement_left.copy_from_foreign( + replacement_left.end(), + left.begin(), + left.iter_idx(pivot_idx)); + replacement_left.set_size(pivot_idx); + + replacement_right.copy_from_foreign( + replacement_right.end(), + left.iter_idx(pivot_idx), + left.end()); + + replacement_right.set_size(left.get_size() - pivot_idx); + replacement_right.copy_from_foreign( + replacement_right.end(), + right.begin(), + right.end()); + replacement_right.set_size(total - pivot_idx); + } else { + replacement_left.copy_from_foreign( + replacement_left.end(), + left.begin(), + left.end()); + replacement_left.set_size(left.get_size()); + + replacement_left.copy_from_foreign( + replacement_left.end(), + right.begin(), + right.iter_idx(pivot_idx - left.get_size())); + replacement_left.set_size(pivot_idx); + + replacement_right.copy_from_foreign( + replacement_right.end(), + right.iter_idx(pivot_idx - left.get_size()), + right.end()); + replacement_right.set_size(total - pivot_idx); + } + + auto [lmeta, rmeta] = Meta::rebalance( + left.get_meta(), right.get_meta(), replacement_pivot); + replacement_left.set_meta(lmeta); + replacement_right.set_meta(rmeta); + return replacement_pivot; + } + +private: + void insert( + iterator iter, + const K &key, + const V &val) { + if (VALIDATE_INVARIANTS) { + if (iter != begin()) { + assert((iter - 1)->get_key() < key); + } + if (iter != end()) { + assert(iter->get_key() > key); + } + assert(get_size() < CAPACITY); + } + copy_from_local(iter + 1, iter, end()); + iter->set_key(key); + iter->set_val(val); + set_size(get_size() + 1); + } + + void update( + iterator iter, + V val) { + assert(iter != end()); + iter->set_val(val); + } + + void replace( + iterator iter, + const K &key, + const V &val) { + assert(iter != end()); + if (VALIDATE_INVARIANTS) { + if (iter != begin()) { + assert((iter - 1)->get_key() < key); + } + if ((iter + 1) != end()) { + assert((iter + 1)->get_key() > key); + } + } + iter->set_key(key); + iter->set_val(val); + } + + void remove(iterator iter) { + assert(iter != end()); + copy_from_local(iter, iter + 1, end()); + set_size(get_size() - 1); + } + + /** + * get_key_ptr + * + * Get pointer to start of key array + */ + KINT *get_key_ptr() { + return layout.template Pointer<2>(buf); + } + const KINT *get_key_ptr() const { + return layout.template Pointer<2>(buf); + } + + /** + * get_val_ptr + * + * Get pointer to start of val array + */ + VINT *get_val_ptr() { + return layout.template Pointer<3>(buf); + } + const VINT *get_val_ptr() const { + return layout.template Pointer<3>(buf); + } + + /** + * node_resolve/unresolve_vals + * + * If the representation for values depends in some way on the + * node in which they are located, users may implement + * resolve/unresolve to enable copy_from_foreign to handle that + * transition. + */ + virtual void node_resolve_vals(iterator from, iterator to) const {} + virtual void node_unresolve_vals(iterator from, iterator to) const {} + + /** + * copy_from_foreign + * + * Copies entries from [from_src, to_src) to tgt. + * + * tgt and from_src must be from different nodes. + * from_src and to_src must be from the same node. + */ + static void copy_from_foreign( + iterator tgt, + const_iterator from_src, + const_iterator to_src) { + assert(tgt->node != from_src->node); + assert(to_src->node == from_src->node); + memcpy( + tgt->get_val_ptr(), from_src->get_val_ptr(), + to_src->get_val_ptr() - from_src->get_val_ptr()); + memcpy( + tgt->get_key_ptr(), from_src->get_key_ptr(), + to_src->get_key_ptr() - from_src->get_key_ptr()); + from_src->node->node_resolve_vals(tgt, tgt + (to_src - from_src)); + tgt->node->node_unresolve_vals(tgt, tgt + (to_src - from_src)); + } + + /** + * copy_from_local + * + * Copies entries from [from_src, to_src) to tgt. + * + * tgt, from_src, and to_src must be from the same node. + */ + static void copy_from_local( + iterator tgt, + iterator from_src, + iterator to_src) { + assert(tgt->node == from_src->node); + assert(to_src->node == from_src->node); + memmove( + tgt->get_val_ptr(), from_src->get_val_ptr(), + to_src->get_val_ptr() - from_src->get_val_ptr()); + memmove( + tgt->get_key_ptr(), from_src->get_key_ptr(), + to_src->get_key_ptr() - from_src->get_key_ptr()); + } +}; + +} diff --git a/src/crimson/common/formatter.cc b/src/crimson/common/formatter.cc new file mode 100644 index 000000000..677216224 --- /dev/null +++ b/src/crimson/common/formatter.cc @@ -0,0 +1,64 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "formatter.h" + +#include <fmt/format.h> +#if FMT_VERSION >= 60000 +#include <fmt/chrono.h> +#else +#include <fmt/time.h> +#endif + + +template <> +struct fmt::formatter<seastar::lowres_system_clock::time_point> { + // ignore the format string + template <typename ParseContext> + constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } + + template <typename FormatContext> + auto format(const seastar::lowres_system_clock::time_point& t, + FormatContext& ctx) { + std::time_t tt = std::chrono::duration_cast<std::chrono::seconds>( + t.time_since_epoch()).count(); + auto milliseconds = (t.time_since_epoch() % + std::chrono::seconds(1)).count(); + return fmt::format_to(ctx.out(), "{:%Y-%m-%d %H:%M:%S} {:03d}", + fmt::localtime(tt), milliseconds); + } +}; + +template <> +struct fmt::formatter<ceph::coarse_real_clock::time_point> { + // ignore the format string + template <typename ParseContext> + constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } + + template <typename FormatContext> + auto format(const ceph::coarse_real_clock::time_point& t, + FormatContext& ctx) { + std::time_t tt = std::chrono::duration_cast<std::chrono::seconds>( + t.time_since_epoch()).count(); + auto milliseconds = (t.time_since_epoch() % + std::chrono::seconds(1)).count(); + return fmt::format_to(ctx.out(), "{:%Y-%m-%d %H:%M:%S} {:03d}", + fmt::localtime(tt), milliseconds); + } +}; + +namespace std { + +ostream& operator<<(ostream& out, + const seastar::lowres_system_clock::time_point& t) +{ + return out << fmt::format("{}", t); +} + +ostream& operator<<(ostream& out, + const ceph::coarse_real_clock::time_point& t) +{ + return out << fmt::format("{}", t); +} + +} diff --git a/src/crimson/common/formatter.h b/src/crimson/common/formatter.h new file mode 100644 index 000000000..1775b0954 --- /dev/null +++ b/src/crimson/common/formatter.h @@ -0,0 +1,15 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <seastar/core/lowres_clock.hh> + +#include "common/ceph_time.h" + +namespace std { + +ostream& operator<<(ostream& out, + const seastar::lowres_system_clock::time_point& t); +ostream& operator<<(ostream& out, + const ceph::coarse_real_clock::time_point& t); + +} diff --git a/src/crimson/common/gated.h b/src/crimson/common/gated.h new file mode 100644 index 000000000..7d901b6b1 --- /dev/null +++ b/src/crimson/common/gated.h @@ -0,0 +1,51 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <seastar/core/gate.hh> +#include <seastar/core/future.hh> +#include <seastar/core/future-util.hh> + +#include "crimson/common/exception.h" +#include "crimson/common/log.h" +#include "include/ceph_assert.h" + +namespace crimson::common { + +class Gated { + public: + static seastar::logger& gated_logger() { + return crimson::get_logger(ceph_subsys_osd); + } + template <typename Func, typename T> + inline void dispatch_in_background(const char* what, T& who, Func&& func) { + (void) dispatch(what, who, func); + } + template <typename Func, typename T> + inline seastar::future<> dispatch(const char* what, T& who, Func&& func) { + return seastar::with_gate(pending_dispatch, std::forward<Func>(func) + ).handle_exception([what, &who] (std::exception_ptr eptr) { + if (*eptr.__cxa_exception_type() == typeid(system_shutdown_exception)) { + gated_logger().debug( + "{}, {} skipped, system shutdown", who, what); + return; + } + gated_logger().error( + "{} dispatch() {} caught exception: {}", who, what, eptr); + assert(*eptr.__cxa_exception_type() + == typeid(seastar::gate_closed_exception)); + }); + } + + seastar::future<> close() { + return pending_dispatch.close(); + } + bool is_closed() const { + return pending_dispatch.is_closed(); + } + private: + seastar::gate pending_dispatch; +}; + +}// namespace crimson::common diff --git a/src/crimson/common/layout.h b/src/crimson/common/layout.h new file mode 100644 index 000000000..9d54ecd1d --- /dev/null +++ b/src/crimson/common/layout.h @@ -0,0 +1,737 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// MOTIVATION AND TUTORIAL +// +// If you want to put in a single heap allocation N doubles followed by M ints, +// it's easy if N and M are known at compile time. +// +// struct S { +// double a[N]; +// int b[M]; +// }; +// +// S* p = new S; +// +// But what if N and M are known only in run time? Class template Layout to the +// rescue! It's a portable generalization of the technique known as struct hack. +// +// // This object will tell us everything we need to know about the memory +// // layout of double[N] followed by int[M]. It's structurally identical to +// // size_t[2] that stores N and M. It's very cheap to create. +// const Layout<double, int> layout(N, M); +// +// // Allocate enough memory for both arrays. `AllocSize()` tells us how much +// // memory is needed. We are free to use any allocation function we want as +// // long as it returns aligned memory. +// std::unique_ptr<unsigned char[]> p(new unsigned char[layout.AllocSize()]); +// +// // Obtain the pointer to the array of doubles. +// // Equivalent to `reinterpret_cast<double*>(p.get())`. +// // +// // We could have written layout.Pointer<0>(p) instead. If all the types are +// // unique you can use either form, but if some types are repeated you must +// // use the index form. +// double* a = layout.Pointer<double>(p.get()); +// +// // Obtain the pointer to the array of ints. +// // Equivalent to `reinterpret_cast<int*>(p.get() + N * 8)`. +// int* b = layout.Pointer<int>(p); +// +// If we are unable to specify sizes of all fields, we can pass as many sizes as +// we can to `Partial()`. In return, it'll allow us to access the fields whose +// locations and sizes can be computed from the provided information. +// `Partial()` comes in handy when the array sizes are embedded into the +// allocation. +// +// // size_t[1] containing N, size_t[1] containing M, double[N], int[M]. +// using L = Layout<size_t, size_t, double, int>; +// +// unsigned char* Allocate(size_t n, size_t m) { +// const L layout(1, 1, n, m); +// unsigned char* p = new unsigned char[layout.AllocSize()]; +// *layout.Pointer<0>(p) = n; +// *layout.Pointer<1>(p) = m; +// return p; +// } +// +// void Use(unsigned char* p) { +// // First, extract N and M. +// // Specify that the first array has only one element. Using `prefix` we +// // can access the first two arrays but not more. +// constexpr auto prefix = L::Partial(1); +// size_t n = *prefix.Pointer<0>(p); +// size_t m = *prefix.Pointer<1>(p); +// +// // Now we can get pointers to the payload. +// const L layout(1, 1, n, m); +// double* a = layout.Pointer<double>(p); +// int* b = layout.Pointer<int>(p); +// } +// +// The layout we used above combines fixed-size with dynamically-sized fields. +// This is quite common. Layout is optimized for this use case and generates +// optimal code. All computations that can be performed at compile time are +// indeed performed at compile time. +// +// Efficiency tip: The order of fields matters. In `Layout<T1, ..., TN>` try to +// ensure that `alignof(T1) >= ... >= alignof(TN)`. This way you'll have no +// padding in between arrays. +// +// You can manually override the alignment of an array by wrapping the type in +// `Aligned<T, N>`. `Layout<..., Aligned<T, N>, ...>` has exactly the same API +// and behavior as `Layout<..., T, ...>` except that the first element of the +// array of `T` is aligned to `N` (the rest of the elements follow without +// padding). `N` cannot be less than `alignof(T)`. +// +// `AllocSize()` and `Pointer()` are the most basic methods for dealing with +// memory layouts. Check out the reference or code below to discover more. +// +// EXAMPLE +// +// // Immutable move-only string with sizeof equal to sizeof(void*). The +// // string size and the characters are kept in the same heap allocation. +// class CompactString { +// public: +// CompactString(const char* s = "") { +// const size_t size = strlen(s); +// // size_t[1] followed by char[size + 1]. +// const L layout(1, size + 1); +// p_.reset(new unsigned char[layout.AllocSize()]); +// // If running under ASAN, mark the padding bytes, if any, to catch +// // memory errors. +// layout.PoisonPadding(p_.get()); +// // Store the size in the allocation. +// *layout.Pointer<size_t>(p_.get()) = size; +// // Store the characters in the allocation. +// memcpy(layout.Pointer<char>(p_.get()), s, size + 1); +// } +// +// size_t size() const { +// // Equivalent to reinterpret_cast<size_t&>(*p). +// return *L::Partial().Pointer<size_t>(p_.get()); +// } +// +// const char* c_str() const { +// // Equivalent to reinterpret_cast<char*>(p.get() + sizeof(size_t)). +// // The argument in Partial(1) specifies that we have size_t[1] in front +// // of the characters. +// return L::Partial(1).Pointer<char>(p_.get()); +// } +// +// private: +// // Our heap allocation contains a size_t followed by an array of chars. +// using L = Layout<size_t, char>; +// std::unique_ptr<unsigned char[]> p_; +// }; +// +// int main() { +// CompactString s = "hello"; +// assert(s.size() == 5); +// assert(strcmp(s.c_str(), "hello") == 0); +// } +// +// DOCUMENTATION +// +// The interface exported by this file consists of: +// - class `Layout<>` and its public members. +// - The public members of class `internal_layout::LayoutImpl<>`. That class +// isn't intended to be used directly, and its name and template parameter +// list are internal implementation details, but the class itself provides +// most of the functionality in this file. See comments on its members for +// detailed documentation. +// +// `Layout<T1,... Tn>::Partial(count1,..., countm)` (where `m` <= `n`) returns a +// `LayoutImpl<>` object. `Layout<T1,..., Tn> layout(count1,..., countn)` +// creates a `Layout` object, which exposes the same functionality by inheriting +// from `LayoutImpl<>`. + +#ifndef ABSL_CONTAINER_INTERNAL_LAYOUT_H_ +#define ABSL_CONTAINER_INTERNAL_LAYOUT_H_ + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <ostream> +#include <string> +#include <tuple> +#include <type_traits> +#include <typeinfo> +#include <utility> + +#ifdef ADDRESS_SANITIZER +#include <sanitizer/asan_interface.h> +#endif + +// for C++20 std::span +#include <boost/beast/core/span.hpp> +#include <fmt/format.h> + +#if defined(__GXX_RTTI) +#define ABSL_INTERNAL_HAS_CXA_DEMANGLE +#endif + +#ifdef ABSL_INTERNAL_HAS_CXA_DEMANGLE +#include <cxxabi.h> +#endif + +namespace absl { +namespace container_internal { + +// A type wrapper that instructs `Layout` to use the specific alignment for the +// array. `Layout<..., Aligned<T, N>, ...>` has exactly the same API +// and behavior as `Layout<..., T, ...>` except that the first element of the +// array of `T` is aligned to `N` (the rest of the elements follow without +// padding). +// +// Requires: `N >= alignof(T)` and `N` is a power of 2. +template <class T, size_t N> +struct Aligned; + +namespace internal_layout { + +template <class T> +struct NotAligned {}; + +template <class T, size_t N> +struct NotAligned<const Aligned<T, N>> { + static_assert(sizeof(T) == 0, "Aligned<T, N> cannot be const-qualified"); +}; + +template <size_t> +using IntToSize = size_t; + +template <class> +using TypeToSize = size_t; + +template <class T> +struct Type : NotAligned<T> { + using type = T; +}; + +template <class T, size_t N> +struct Type<Aligned<T, N>> { + using type = T; +}; + +template <class T> +struct SizeOf : NotAligned<T>, std::integral_constant<size_t, sizeof(T)> {}; + +template <class T, size_t N> +struct SizeOf<Aligned<T, N>> : std::integral_constant<size_t, sizeof(T)> {}; + +// Note: workaround for https://gcc.gnu.org/PR88115 +template <class T> +struct AlignOf : NotAligned<T> { + static constexpr size_t value = alignof(T); +}; + +template <class T, size_t N> +struct AlignOf<Aligned<T, N>> { + static_assert(N % alignof(T) == 0, + "Custom alignment can't be lower than the type's alignment"); + static constexpr size_t value = N; +}; + +// Does `Ts...` contain `T`? +template <class T, class... Ts> +using Contains = std::disjunction<std::is_same<T, Ts>...>; + +template <class From, class To> +using CopyConst = + typename std::conditional_t<std::is_const_v<From>, const To, To>; + +// Note: We're not qualifying this with absl:: because it doesn't compile under +// MSVC. +template <class T> +using SliceType = boost::beast::span<T>; + +// This namespace contains no types. It prevents functions defined in it from +// being found by ADL. +namespace adl_barrier { + +template <class Needle, class... Ts> +constexpr size_t Find(Needle, Needle, Ts...) { + static_assert(!Contains<Needle, Ts...>(), "Duplicate element type"); + return 0; +} + +template <class Needle, class T, class... Ts> +constexpr size_t Find(Needle, T, Ts...) { + return adl_barrier::Find(Needle(), Ts()...) + 1; +} + +constexpr bool IsPow2(size_t n) { return !(n & (n - 1)); } + +// Returns `q * m` for the smallest `q` such that `q * m >= n`. +// Requires: `m` is a power of two. It's enforced by IsLegalElementType below. +constexpr size_t Align(size_t n, size_t m) { return (n + m - 1) & ~(m - 1); } + +constexpr size_t Min(size_t a, size_t b) { return b < a ? b : a; } + +constexpr size_t Max(size_t a) { return a; } + +template <class... Ts> +constexpr size_t Max(size_t a, size_t b, Ts... rest) { + return adl_barrier::Max(b < a ? a : b, rest...); +} + +template <class T> +std::string TypeName() { + std::string out; + int status = 0; + char* demangled = nullptr; +#ifdef ABSL_INTERNAL_HAS_CXA_DEMANGLE + demangled = abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, &status); +#endif + if (status == 0 && demangled != nullptr) { // Demangling succeeded. + out = fmt::format("<{}>", demangled); + free(demangled); + } else { +#if defined(__GXX_RTTI) || defined(_CPPRTTI) + out = fmt::format("<{}>", typeid(T).name()); +#endif + } + return out; +} + +} // namespace adl_barrier + +template <bool C> +using EnableIf = typename std::enable_if_t<C, int>; + +// Can `T` be a template argument of `Layout`? +template <class T> +using IsLegalElementType = std::integral_constant< + bool, !std::is_reference_v<T> && !std::is_volatile_v<T> && + !std::is_reference_v<typename Type<T>::type> && + !std::is_volatile_v<typename Type<T>::type> && + adl_barrier::IsPow2(AlignOf<T>::value)>; + +template <class Elements, class SizeSeq, class OffsetSeq> +class LayoutImpl; + +// Public base class of `Layout` and the result type of `Layout::Partial()`. +// +// `Elements...` contains all template arguments of `Layout` that created this +// instance. +// +// `SizeSeq...` is `[0, NumSizes)` where `NumSizes` is the number of arguments +// passed to `Layout::Partial()` or `Layout::Layout()`. +// +// `OffsetSeq...` is `[0, NumOffsets)` where `NumOffsets` is +// `Min(sizeof...(Elements), NumSizes + 1)` (the number of arrays for which we +// can compute offsets). +template <class... Elements, size_t... SizeSeq, size_t... OffsetSeq> +class LayoutImpl<std::tuple<Elements...>, std::index_sequence<SizeSeq...>, + std::index_sequence<OffsetSeq...>> { + private: + static_assert(sizeof...(Elements) > 0, "At least one field is required"); + static_assert(std::conjunction_v<IsLegalElementType<Elements>...>, + "Invalid element type (see IsLegalElementType)"); + + enum { + NumTypes = sizeof...(Elements), + NumSizes = sizeof...(SizeSeq), + NumOffsets = sizeof...(OffsetSeq), + }; + + // These are guaranteed by `Layout`. + static_assert(NumOffsets == adl_barrier::Min(NumTypes, NumSizes + 1), + "Internal error"); + static_assert(NumTypes > 0, "Internal error"); + + // Returns the index of `T` in `Elements...`. Results in a compilation error + // if `Elements...` doesn't contain exactly one instance of `T`. + template <class T> + static constexpr size_t ElementIndex() { + static_assert(Contains<Type<T>, Type<typename Type<Elements>::type>...>(), + "Type not found"); + return adl_barrier::Find(Type<T>(), + Type<typename Type<Elements>::type>()...); + } + + template <size_t N> + using ElementAlignment = + AlignOf<typename std::tuple_element<N, std::tuple<Elements...>>::type>; + + public: + // Element types of all arrays packed in a tuple. + using ElementTypes = std::tuple<typename Type<Elements>::type...>; + + // Element type of the Nth array. + template <size_t N> + using ElementType = typename std::tuple_element<N, ElementTypes>::type; + + constexpr explicit LayoutImpl(IntToSize<SizeSeq>... sizes) + : size_{sizes...} {} + + // Alignment of the layout, equal to the strictest alignment of all elements. + // All pointers passed to the methods of layout must be aligned to this value. + static constexpr size_t Alignment() { + return adl_barrier::Max(AlignOf<Elements>::value...); + } + + // Offset in bytes of the Nth array. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // assert(x.Offset<0>() == 0); // The ints starts from 0. + // assert(x.Offset<1>() == 16); // The doubles starts from 16. + // + // Requires: `N <= NumSizes && N < sizeof...(Ts)`. + template <size_t N, EnableIf<N == 0> = 0> + constexpr size_t Offset() const { + return 0; + } + + template <size_t N, EnableIf<N != 0> = 0> + constexpr size_t Offset() const { + static_assert(N < NumOffsets, "Index out of bounds"); + return adl_barrier::Align( + Offset<N - 1>() + SizeOf<ElementType<N - 1>>() * size_[N - 1], + ElementAlignment<N>::value); + } + + // Offset in bytes of the array with the specified element type. There must + // be exactly one such array and its zero-based index must be at most + // `NumSizes`. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // assert(x.Offset<int>() == 0); // The ints starts from 0. + // assert(x.Offset<double>() == 16); // The doubles starts from 16. + template <class T> + constexpr size_t Offset() const { + return Offset<ElementIndex<T>()>(); + } + + // Offsets in bytes of all arrays for which the offsets are known. + constexpr std::array<size_t, NumOffsets> Offsets() const { + return {{Offset<OffsetSeq>()...}}; + } + + // The number of elements in the Nth array. This is the Nth argument of + // `Layout::Partial()` or `Layout::Layout()` (zero-based). + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // assert(x.Size<0>() == 3); + // assert(x.Size<1>() == 4); + // + // Requires: `N < NumSizes`. + template <size_t N> + constexpr size_t Size() const { + static_assert(N < NumSizes, "Index out of bounds"); + return size_[N]; + } + + // The number of elements in the array with the specified element type. + // There must be exactly one such array and its zero-based index must be + // at most `NumSizes`. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // assert(x.Size<int>() == 3); + // assert(x.Size<double>() == 4); + template <class T> + constexpr size_t Size() const { + return Size<ElementIndex<T>()>(); + } + + // The number of elements of all arrays for which they are known. + constexpr std::array<size_t, NumSizes> Sizes() const { + return {{Size<SizeSeq>()...}}; + } + + // Pointer to the beginning of the Nth array. + // + // `Char` must be `[const] [signed|unsigned] char`. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // unsigned char* p = new unsigned char[x.AllocSize()]; + // int* ints = x.Pointer<0>(p); + // double* doubles = x.Pointer<1>(p); + // + // Requires: `N <= NumSizes && N < sizeof...(Ts)`. + // Requires: `p` is aligned to `Alignment()`. + template <size_t N, class Char> + CopyConst<Char, ElementType<N>>* Pointer(Char* p) const { + using C = typename std::remove_const<Char>::type; + static_assert( + std::is_same<C, char>() || std::is_same<C, unsigned char>() || + std::is_same<C, signed char>(), + "The argument must be a pointer to [const] [signed|unsigned] char"); + constexpr size_t alignment = Alignment(); + (void)alignment; + assert(reinterpret_cast<uintptr_t>(p) % alignment == 0); + return reinterpret_cast<CopyConst<Char, ElementType<N>>*>(p + Offset<N>()); + } + + // Pointer to the beginning of the array with the specified element type. + // There must be exactly one such array and its zero-based index must be at + // most `NumSizes`. + // + // `Char` must be `[const] [signed|unsigned] char`. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // unsigned char* p = new unsigned char[x.AllocSize()]; + // int* ints = x.Pointer<int>(p); + // double* doubles = x.Pointer<double>(p); + // + // Requires: `p` is aligned to `Alignment()`. + template <class T, class Char> + CopyConst<Char, T>* Pointer(Char* p) const { + return Pointer<ElementIndex<T>()>(p); + } + + // Pointers to all arrays for which pointers are known. + // + // `Char` must be `[const] [signed|unsigned] char`. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // unsigned char* p = new unsigned char[x.AllocSize()]; + // + // int* ints; + // double* doubles; + // std::tie(ints, doubles) = x.Pointers(p); + // + // Requires: `p` is aligned to `Alignment()`. + // + // Note: We're not using ElementType alias here because it does not compile + // under MSVC. + template <class Char> + std::tuple<CopyConst< + Char, typename std::tuple_element<OffsetSeq, ElementTypes>::type>*...> + Pointers(Char* p) const { + return std::tuple<CopyConst<Char, ElementType<OffsetSeq>>*...>( + Pointer<OffsetSeq>(p)...); + } + + // The Nth array. + // + // `Char` must be `[const] [signed|unsigned] char`. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // unsigned char* p = new unsigned char[x.AllocSize()]; + // Span<int> ints = x.Slice<0>(p); + // Span<double> doubles = x.Slice<1>(p); + // + // Requires: `N < NumSizes`. + // Requires: `p` is aligned to `Alignment()`. + template <size_t N, class Char> + SliceType<CopyConst<Char, ElementType<N>>> Slice(Char* p) const { + return SliceType<CopyConst<Char, ElementType<N>>>(Pointer<N>(p), Size<N>()); + } + + // The array with the specified element type. There must be exactly one + // such array and its zero-based index must be less than `NumSizes`. + // + // `Char` must be `[const] [signed|unsigned] char`. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // unsigned char* p = new unsigned char[x.AllocSize()]; + // Span<int> ints = x.Slice<int>(p); + // Span<double> doubles = x.Slice<double>(p); + // + // Requires: `p` is aligned to `Alignment()`. + template <class T, class Char> + SliceType<CopyConst<Char, T>> Slice(Char* p) const { + return Slice<ElementIndex<T>()>(p); + } + + // All arrays with known sizes. + // + // `Char` must be `[const] [signed|unsigned] char`. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // unsigned char* p = new unsigned char[x.AllocSize()]; + // + // Span<int> ints; + // Span<double> doubles; + // std::tie(ints, doubles) = x.Slices(p); + // + // Requires: `p` is aligned to `Alignment()`. + // + // Note: We're not using ElementType alias here because it does not compile + // under MSVC. + template <class Char> + std::tuple<SliceType<CopyConst< + Char, typename std::tuple_element<SizeSeq, ElementTypes>::type>>...> + Slices(Char* p) const { + // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63875 (fixed + // in 6.1). + (void)p; + return std::tuple<SliceType<CopyConst<Char, ElementType<SizeSeq>>>...>( + Slice<SizeSeq>(p)...); + } + + // The size of the allocation that fits all arrays. + // + // // int[3], 4 bytes of padding, double[4]. + // Layout<int, double> x(3, 4); + // unsigned char* p = new unsigned char[x.AllocSize()]; // 48 bytes + // + // Requires: `NumSizes == sizeof...(Ts)`. + constexpr size_t AllocSize() const { + static_assert(NumTypes == NumSizes, "You must specify sizes of all fields"); + return Offset<NumTypes - 1>() + + SizeOf<ElementType<NumTypes - 1>>() * size_[NumTypes - 1]; + } + + // If built with --config=asan, poisons padding bytes (if any) in the + // allocation. The pointer must point to a memory block at least + // `AllocSize()` bytes in length. + // + // `Char` must be `[const] [signed|unsigned] char`. + // + // Requires: `p` is aligned to `Alignment()`. + template <class Char, size_t N = NumOffsets - 1, EnableIf<N == 0> = 0> + void PoisonPadding(const Char* p) const { + Pointer<0>(p); // verify the requirements on `Char` and `p` + } + + template <class Char, size_t N = NumOffsets - 1, EnableIf<N != 0> = 0> + void PoisonPadding(const Char* p) const { + static_assert(N < NumOffsets, "Index out of bounds"); + (void)p; +#ifdef ADDRESS_SANITIZER + PoisonPadding<Char, N - 1>(p); + // The `if` is an optimization. It doesn't affect the observable behaviour. + if (ElementAlignment<N - 1>::value % ElementAlignment<N>::value) { + size_t start = + Offset<N - 1>() + SizeOf<ElementType<N - 1>>() * size_[N - 1]; + ASAN_POISON_MEMORY_REGION(p + start, Offset<N>() - start); + } +#endif + } + + // Human-readable description of the memory layout. Useful for debugging. + // Slow. + // + // // char[5], 3 bytes of padding, int[3], 4 bytes of padding, followed + // // by an unknown number of doubles. + // auto x = Layout<char, int, double>::Partial(5, 3); + // assert(x.DebugString() == + // "@0<char>(1)[5]; @8<int>(4)[3]; @24<double>(8)"); + // + // Each field is in the following format: @offset<type>(sizeof)[size] (<type> + // may be missing depending on the target platform). For example, + // @8<int>(4)[3] means that at offset 8 we have an array of ints, where each + // int is 4 bytes, and we have 3 of those ints. The size of the last field may + // be missing (as in the example above). Only fields with known offsets are + // described. Type names may differ across platforms: one compiler might + // produce "unsigned*" where another produces "unsigned int *". + std::string DebugString() const { + const auto offsets = Offsets(); + const size_t sizes[] = {SizeOf<ElementType<OffsetSeq>>()...}; + const std::string types[] = { + adl_barrier::TypeName<ElementType<OffsetSeq>>()...}; + std::string res = fmt::format("@0{}({})", types[0], sizes[0]); + for (size_t i = 0; i != NumOffsets - 1; ++i) { + res += fmt::format("[{}]; @({})", size_[i], offsets[i + 1], types[i + 1], sizes[i + 1]); + } + // NumSizes is a constant that may be zero. Some compilers cannot see that + // inside the if statement "size_[NumSizes - 1]" must be valid. + int last = static_cast<int>(NumSizes) - 1; + if (NumTypes == NumSizes && last >= 0) { + res += fmt::format("[{}]", size_[last]); + } + return res; + } + + private: + // Arguments of `Layout::Partial()` or `Layout::Layout()`. + size_t size_[NumSizes > 0 ? NumSizes : 1]; +}; + +template <size_t NumSizes, class... Ts> +using LayoutType = LayoutImpl< + std::tuple<Ts...>, std::make_index_sequence<NumSizes>, + std::make_index_sequence<adl_barrier::Min(sizeof...(Ts), NumSizes + 1)>>; + +} // namespace internal_layout + +// Descriptor of arrays of various types and sizes laid out in memory one after +// another. See the top of the file for documentation. +// +// Check out the public API of internal_layout::LayoutImpl above. The type is +// internal to the library but its methods are public, and they are inherited +// by `Layout`. +template <class... Ts> +class Layout : public internal_layout::LayoutType<sizeof...(Ts), Ts...> { + public: + static_assert(sizeof...(Ts) > 0, "At least one field is required"); + static_assert( + std::conjunction_v<internal_layout::IsLegalElementType<Ts>...>, + "Invalid element type (see IsLegalElementType)"); + + // The result type of `Partial()` with `NumSizes` arguments. + template <size_t NumSizes> + using PartialType = internal_layout::LayoutType<NumSizes, Ts...>; + + // `Layout` knows the element types of the arrays we want to lay out in + // memory but not the number of elements in each array. + // `Partial(size1, ..., sizeN)` allows us to specify the latter. The + // resulting immutable object can be used to obtain pointers to the + // individual arrays. + // + // It's allowed to pass fewer array sizes than the number of arrays. E.g., + // if all you need is to the offset of the second array, you only need to + // pass one argument -- the number of elements in the first array. + // + // // int[3] followed by 4 bytes of padding and an unknown number of + // // doubles. + // auto x = Layout<int, double>::Partial(3); + // // doubles start at byte 16. + // assert(x.Offset<1>() == 16); + // + // If you know the number of elements in all arrays, you can still call + // `Partial()` but it's more convenient to use the constructor of `Layout`. + // + // Layout<int, double> x(3, 5); + // + // Note: The sizes of the arrays must be specified in number of elements, + // not in bytes. + // + // Requires: `sizeof...(Sizes) <= sizeof...(Ts)`. + // Requires: all arguments are convertible to `size_t`. + template <class... Sizes> + static constexpr PartialType<sizeof...(Sizes)> Partial(Sizes&&... sizes) { + static_assert(sizeof...(Sizes) <= sizeof...(Ts)); + return PartialType<sizeof...(Sizes)>(std::forward<Sizes>(sizes)...); + } + + // Creates a layout with the sizes of all arrays specified. If you know + // only the sizes of the first N arrays (where N can be zero), you can use + // `Partial()` defined above. The constructor is essentially equivalent to + // calling `Partial()` and passing in all array sizes; the constructor is + // provided as a convenient abbreviation. + // + // Note: The sizes of the arrays must be specified in number of elements, + // not in bytes. + constexpr explicit Layout(internal_layout::TypeToSize<Ts>... sizes) + : internal_layout::LayoutType<sizeof...(Ts), Ts...>(sizes...) {} +}; + +} // namespace container_internal +} // namespace absl + +#endif // ABSL_CONTAINER_INTERNAL_LAYOUT_H_ diff --git a/src/crimson/common/log.cc b/src/crimson/common/log.cc new file mode 100644 index 000000000..cae9f6a7b --- /dev/null +++ b/src/crimson/common/log.cc @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "log.h" + +static std::array<seastar::logger, ceph_subsys_get_num()> loggers{ +#define SUBSYS(name, log_level, gather_level) \ + seastar::logger(#name), +#define DEFAULT_SUBSYS(log_level, gather_level) \ + seastar::logger("none"), + #include "common/subsys.h" +#undef SUBSYS +#undef DEFAULT_SUBSYS +}; + +namespace crimson { +seastar::logger& get_logger(int subsys) { + assert(subsys < ceph_subsys_max); + return loggers[subsys]; +} +} diff --git a/src/crimson/common/log.h b/src/crimson/common/log.h new file mode 100644 index 000000000..635349098 --- /dev/null +++ b/src/crimson/common/log.h @@ -0,0 +1,24 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <seastar/util/log.hh> +#include "common/subsys_types.h" + +namespace crimson { +seastar::logger& get_logger(int subsys); +static inline seastar::log_level to_log_level(int level) { + if (level < 0) { + return seastar::log_level::error; + } else if (level < 1) { + return seastar::log_level::warn; + } else if (level < 5) { + return seastar::log_level::info; + } else if (level <= 20) { + return seastar::log_level::debug; + } else { + return seastar::log_level::trace; + } +} +} diff --git a/src/crimson/common/perf_counters_collection.cc b/src/crimson/common/perf_counters_collection.cc new file mode 100644 index 000000000..af80dbcc2 --- /dev/null +++ b/src/crimson/common/perf_counters_collection.cc @@ -0,0 +1,25 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "perf_counters_collection.h" + +namespace crimson::common { +PerfCountersCollection::PerfCountersCollection() +{ + perf_collection = std::make_unique<PerfCountersCollectionImpl>(); +} +PerfCountersCollection::~PerfCountersCollection() +{ + perf_collection->clear(); +} + +PerfCountersCollectionImpl* PerfCountersCollection:: get_perf_collection() +{ + return perf_collection.get(); +} + +PerfCountersCollection::ShardedPerfCountersCollection PerfCountersCollection::sharded_perf_coll; + +} + + diff --git a/src/crimson/common/perf_counters_collection.h b/src/crimson/common/perf_counters_collection.h new file mode 100644 index 000000000..a19630247 --- /dev/null +++ b/src/crimson/common/perf_counters_collection.h @@ -0,0 +1,37 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "common/perf_counters.h" +#include <seastar/core/sharded.hh> + +using crimson::common::PerfCountersCollectionImpl; +namespace crimson::common { +class PerfCountersCollection: public seastar::sharded<PerfCountersCollection> +{ + using ShardedPerfCountersCollection = seastar::sharded<PerfCountersCollection>; + +private: + std::unique_ptr<PerfCountersCollectionImpl> perf_collection; + static ShardedPerfCountersCollection sharded_perf_coll; + friend PerfCountersCollection& local_perf_coll(); + friend ShardedPerfCountersCollection& sharded_perf_coll(); + +public: + PerfCountersCollection(); + ~PerfCountersCollection(); + PerfCountersCollectionImpl* get_perf_collection(); + +}; + +inline PerfCountersCollection::ShardedPerfCountersCollection& sharded_perf_coll(){ + return PerfCountersCollection::sharded_perf_coll; +} + +inline PerfCountersCollection& local_perf_coll() { + return PerfCountersCollection::sharded_perf_coll.local(); +} + +} + diff --git a/src/crimson/common/shared_lru.h b/src/crimson/common/shared_lru.h new file mode 100644 index 000000000..4c1da401e --- /dev/null +++ b/src/crimson/common/shared_lru.h @@ -0,0 +1,178 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <memory> +#include <optional> +#include <boost/smart_ptr/local_shared_ptr.hpp> +#include <boost/smart_ptr/weak_ptr.hpp> +#include "simple_lru.h" + +/// SharedLRU does its best to cache objects. It not only tracks the objects +/// in its LRU cache with strong references, it also tracks objects with +/// weak_ptr even if the cache does not hold any strong references to them. so +/// that it can return the objects after they are evicted, as long as they've +/// ever been cached and have not been destroyed yet. +template<class K, class V> +class SharedLRU { + using shared_ptr_t = boost::local_shared_ptr<V>; + using weak_ptr_t = boost::weak_ptr<V>; + using value_type = std::pair<K, shared_ptr_t>; + + // weak_refs is already ordered, and we don't use accessors like + // LRUCache::lower_bound(), so unordered LRUCache would suffice. + SimpleLRU<K, shared_ptr_t, false> cache; + std::map<K, std::pair<weak_ptr_t, V*>> weak_refs; + + struct Deleter { + SharedLRU<K,V>* cache; + const K key; + void operator()(V* ptr) { + cache->_erase_weak(key); + delete ptr; + } + }; + void _erase_weak(const K& key) { + weak_refs.erase(key); + } +public: + SharedLRU(size_t max_size = 20) + : cache{max_size} + {} + ~SharedLRU() { + cache.clear(); + // use plain assert() in utiliy classes to avoid dependencies on logging + assert(weak_refs.empty()); + } + /** + * Returns a reference to the given key, and perform an insertion if such + * key does not already exist + */ + shared_ptr_t operator[](const K& key); + /** + * Returns true iff there are no live references left to anything that has been + * in the cache. + */ + bool empty() const { + return weak_refs.empty(); + } + size_t size() const { + return cache.size(); + } + size_t capacity() const { + return cache.capacity(); + } + /*** + * Inserts a key if not present, or bumps it to the front of the LRU if + * it is, and then gives you a reference to the value. If the key already + * existed, you are responsible for deleting the new value you tried to + * insert. + * + * @param key The key to insert + * @param value The value that goes with the key + * @param existed Set to true if the value was already in the + * map, false otherwise + * @return A reference to the map's value for the given key + */ + shared_ptr_t insert(const K& key, std::unique_ptr<V> value); + // clear all strong reference from the lru. + void clear() { + cache.clear(); + } + shared_ptr_t find(const K& key); + // return the last element that is not greater than key + shared_ptr_t lower_bound(const K& key); + // return the first element that is greater than key + std::optional<value_type> upper_bound(const K& key); + + void erase(const K& key) { + cache.erase(key); + _erase_weak(key); + } +}; + +template<class K, class V> +typename SharedLRU<K,V>::shared_ptr_t +SharedLRU<K,V>::insert(const K& key, std::unique_ptr<V> value) +{ + shared_ptr_t val; + if (auto found = weak_refs.find(key); found != weak_refs.end()) { + val = found->second.first.lock(); + } + if (!val) { + val.reset(value.release(), Deleter{this, key}); + weak_refs.emplace(key, std::make_pair(val, val.get())); + } + cache.insert(key, val); + return val; +} + +template<class K, class V> +typename SharedLRU<K,V>::shared_ptr_t +SharedLRU<K,V>::operator[](const K& key) +{ + if (auto found = cache.find(key); found) { + return *found; + } + shared_ptr_t val; + if (auto found = weak_refs.find(key); found != weak_refs.end()) { + val = found->second.first.lock(); + } + if (!val) { + val.reset(new V{}, Deleter{this, key}); + weak_refs.emplace(key, std::make_pair(val, val.get())); + } + cache.insert(key, val); + return val; +} + +template<class K, class V> +typename SharedLRU<K,V>::shared_ptr_t +SharedLRU<K,V>::find(const K& key) +{ + if (auto found = cache.find(key); found) { + return *found; + } + shared_ptr_t val; + if (auto found = weak_refs.find(key); found != weak_refs.end()) { + val = found->second.first.lock(); + } + if (val) { + cache.insert(key, val); + } + return val; +} + +template<class K, class V> +typename SharedLRU<K,V>::shared_ptr_t +SharedLRU<K,V>::lower_bound(const K& key) +{ + if (weak_refs.empty()) { + return {}; + } + auto found = weak_refs.lower_bound(key); + if (found == weak_refs.end()) { + --found; + } + if (auto val = found->second.first.lock(); val) { + cache.insert(key, val); + return val; + } else { + return {}; + } +} + +template<class K, class V> +std::optional<typename SharedLRU<K,V>::value_type> +SharedLRU<K,V>::upper_bound(const K& key) +{ + for (auto found = weak_refs.upper_bound(key); + found != weak_refs.end(); + ++found) { + if (auto val = found->second.first.lock(); val) { + return std::make_pair(found->first, val); + } + } + return std::nullopt; +} diff --git a/src/crimson/common/simple_lru.h b/src/crimson/common/simple_lru.h new file mode 100644 index 000000000..1419c4885 --- /dev/null +++ b/src/crimson/common/simple_lru.h @@ -0,0 +1,141 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <list> +#include <map> +#include <optional> +#include <type_traits> +#include <unordered_map> + +template <class Key, class Value, bool Ordered> +class SimpleLRU { + static_assert(std::is_default_constructible_v<Value>); + using list_type = std::list<Key>; + template<class K, class V> + using map_t = std::conditional_t<Ordered, + std::map<K, V>, + std::unordered_map<K, V>>; + using map_type = map_t<Key, std::pair<Value, typename list_type::iterator>>; + list_type lru; + map_type cache; + const size_t max_size; + +public: + SimpleLRU(size_t size = 20) + : cache(size), + max_size(size) + {} + size_t size() const { + return cache.size(); + } + size_t capacity() const { + return max_size; + } + using insert_return_type = std::pair<Value, bool>; + insert_return_type insert(const Key& key, Value value); + std::optional<Value> find(const Key& key); + std::optional<std::enable_if<Ordered, Value>> lower_bound(const Key& key); + void erase(const Key& key); + void clear(); +private: + // bump the item to the front of the lru list + Value _lru_add(typename map_type::iterator found); + // evict the last element of most recently used list + void _evict(); +}; + +template <class Key, class Value, bool Ordered> +typename SimpleLRU<Key,Value,Ordered>::insert_return_type +SimpleLRU<Key,Value,Ordered>::insert(const Key& key, Value value) +{ + if constexpr(Ordered) { + auto found = cache.lower_bound(key); + if (found != cache.end() && found->first == key) { + // already exists + return {found->second.first, true}; + } else { + if (size() >= capacity()) { + _evict(); + } + lru.push_front(key); + // use lower_bound as hint to save the lookup + cache.emplace_hint(found, key, std::make_pair(value, lru.begin())); + return {std::move(value), false}; + } + } else { + // cache is not ordered + auto found = cache.find(key); + if (found != cache.end()) { + // already exists + return {found->second.first, true}; + } else { + if (size() >= capacity()) { + _evict(); + } + lru.push_front(key); + cache.emplace(key, std::make_pair(value, lru.begin())); + return {std::move(value), false}; + } + } +} + +template <class Key, class Value, bool Ordered> +std::optional<Value> SimpleLRU<Key,Value,Ordered>::find(const Key& key) +{ + if (auto found = cache.find(key); found != cache.end()){ + return _lru_add(found); + } else { + return {}; + } +} + +template <class Key, class Value, bool Ordered> +std::optional<std::enable_if<Ordered, Value>> +SimpleLRU<Key,Value,Ordered>::lower_bound(const Key& key) +{ + if (auto found = cache.lower_bound(key); found != cache.end()) { + return _lru_add(found); + } else { + return {}; + } +} + +template <class Key, class Value, bool Ordered> +void SimpleLRU<Key,Value,Ordered>::clear() +{ + lru.clear(); + cache.clear(); +} + +template <class Key, class Value, bool Ordered> +void SimpleLRU<Key,Value,Ordered>::erase(const Key& key) +{ + if (auto found = cache.find(key); found != cache.end()) { + lru.erase(found->second.second); + cache.erase(found); + } +} + +template <class Key, class Value, bool Ordered> +Value SimpleLRU<Key,Value,Ordered>::_lru_add( + typename SimpleLRU<Key,Value,Ordered>::map_type::iterator found) +{ + auto& [value, in_lru] = found->second; + if (in_lru != lru.begin()){ + // move item to the front + lru.splice(lru.begin(), lru, in_lru); + } + // the item is already at the front + return value; +} + +template <class Key, class Value, bool Ordered> +void SimpleLRU<Key,Value,Ordered>::_evict() +{ + // evict the last element of most recently used list + auto last = --lru.end(); + cache.erase(*last); + lru.erase(last); +} diff --git a/src/crimson/common/throttle.cc b/src/crimson/common/throttle.cc new file mode 100644 index 000000000..bd9195181 --- /dev/null +++ b/src/crimson/common/throttle.cc @@ -0,0 +1,59 @@ +#include "throttle.h" + +namespace crimson::common { + +int64_t Throttle::take(int64_t c) +{ + if (!max) { + return 0; + } + count += c; + return count; +} + +int64_t Throttle::put(int64_t c) +{ + if (!max) { + return 0; + } + if (!c) { + return count; + } + on_free_slots.signal(); + count -= c; + return count; +} + +seastar::future<> Throttle::get(size_t c) +{ + if (!max) { + return seastar::make_ready_future<>(); + } + return on_free_slots.wait([this, c] { + return !_should_wait(c); + }).then([this, c] { + count += c; + return seastar::make_ready_future<>(); + }); +} + +void Throttle::reset_max(size_t m) { + if (max == m) { + return; + } + + if (m > max) { + on_free_slots.signal(); + } + max = m; +} + +bool Throttle::_should_wait(size_t c) const { + if (!max) { + return false; + } + return ((c <= max && count + c > max) || // normally stay under max + (c >= max && count > max)); // except for large c +} + +} // namespace crimson::common diff --git a/src/crimson/common/throttle.h b/src/crimson/common/throttle.h new file mode 100644 index 000000000..fea471c8d --- /dev/null +++ b/src/crimson/common/throttle.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <seastar/core/condition-variable.hh> +// pull seastar::timer<...>::timer definitions. FIX SEASTAR or reactor.hh +// is obligatory and should be included everywhere? +#include <seastar/core/reactor.hh> + +#include "common/ThrottleInterface.h" + +namespace crimson::common { + +class Throttle final : public ThrottleInterface { + size_t max = 0; + size_t count = 0; + // we cannot change the "count" of seastar::semaphore after it is created, + // so use condition_variable instead. + seastar::condition_variable on_free_slots; +public: + explicit Throttle(size_t m) + : max(m) + {} + int64_t take(int64_t c = 1) override; + int64_t put(int64_t c = 1) override; + seastar::future<> get(size_t c); + size_t get_current() const { + return count; + } + size_t get_max() const { + return max; + } + void reset_max(size_t m); +private: + bool _should_wait(size_t c) const; +}; + +} // namespace crimson::common diff --git a/src/crimson/common/tri_mutex.cc b/src/crimson/common/tri_mutex.cc new file mode 100644 index 000000000..c18aff1a0 --- /dev/null +++ b/src/crimson/common/tri_mutex.cc @@ -0,0 +1,225 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#include "tri_mutex.h" + +seastar::future<> read_lock::lock() +{ + return static_cast<tri_mutex*>(this)->lock_for_read(); +} + +void read_lock::unlock() +{ + static_cast<tri_mutex*>(this)->unlock_for_read(); +} + +seastar::future<> write_lock::lock() +{ + return static_cast<tri_mutex*>(this)->lock_for_write(false); +} + +void write_lock::unlock() +{ + static_cast<tri_mutex*>(this)->unlock_for_write(); +} + +seastar::future<> excl_lock::lock() +{ + return static_cast<tri_mutex*>(this)->lock_for_excl(); +} + +void excl_lock::unlock() +{ + static_cast<tri_mutex*>(this)->unlock_for_excl(); +} + +seastar::future<> excl_lock_from_read::lock() +{ + static_cast<tri_mutex*>(this)->promote_from_read(); + return seastar::make_ready_future<>(); +} + +void excl_lock_from_read::unlock() +{ + static_cast<tri_mutex*>(this)->demote_to_read(); +} + +seastar::future<> excl_lock_from_write::lock() +{ + static_cast<tri_mutex*>(this)->promote_from_write(); + return seastar::make_ready_future<>(); +} + +void excl_lock_from_write::unlock() +{ + static_cast<tri_mutex*>(this)->demote_to_write(); +} + +seastar::future<> excl_lock_from_excl::lock() +{ + return seastar::make_ready_future<>(); +} + +void excl_lock_from_excl::unlock() +{ +} + +tri_mutex::~tri_mutex() +{ + assert(!is_acquired()); +} + +seastar::future<> tri_mutex::lock_for_read() +{ + if (try_lock_for_read()) { + return seastar::make_ready_future<>(); + } + waiters.emplace_back(seastar::promise<>(), type_t::read); + return waiters.back().pr.get_future(); +} + +bool tri_mutex::try_lock_for_read() noexcept +{ + if (!writers && !exclusively_used && waiters.empty()) { + ++readers; + return true; + } else { + return false; + } +} + +void tri_mutex::unlock_for_read() +{ + assert(readers > 0); + if (--readers == 0) { + wake(); + } +} + +void tri_mutex::promote_from_read() +{ + assert(readers == 1); + --readers; + exclusively_used = true; +} + +void tri_mutex::demote_to_read() +{ + assert(exclusively_used); + exclusively_used = false; + ++readers; +} + +seastar::future<> tri_mutex::lock_for_write(bool greedy) +{ + if (try_lock_for_write(greedy)) { + return seastar::make_ready_future<>(); + } + waiters.emplace_back(seastar::promise<>(), type_t::write); + return waiters.back().pr.get_future(); +} + +bool tri_mutex::try_lock_for_write(bool greedy) noexcept +{ + if (!readers && !exclusively_used) { + if (greedy || waiters.empty()) { + ++writers; + return true; + } + } + return false; +} + +void tri_mutex::unlock_for_write() +{ + assert(writers > 0); + if (--writers == 0) { + wake(); + } +} + +void tri_mutex::promote_from_write() +{ + assert(writers == 1); + --writers; + exclusively_used = true; +} + +void tri_mutex::demote_to_write() +{ + assert(exclusively_used); + exclusively_used = false; + ++writers; +} + +// for exclusive users +seastar::future<> tri_mutex::lock_for_excl() +{ + if (try_lock_for_excl()) { + return seastar::make_ready_future<>(); + } + waiters.emplace_back(seastar::promise<>(), type_t::exclusive); + return waiters.back().pr.get_future(); +} + +bool tri_mutex::try_lock_for_excl() noexcept +{ + if (!readers && !writers && !exclusively_used) { + exclusively_used = true; + return true; + } else { + return false; + } +} + +void tri_mutex::unlock_for_excl() +{ + assert(exclusively_used); + exclusively_used = false; + wake(); +} + +bool tri_mutex::is_acquired() const +{ + if (readers) { + return true; + } else if (writers) { + return true; + } else if (exclusively_used) { + return true; + } else { + return false; + } +} + +void tri_mutex::wake() +{ + assert(!readers && !writers && !exclusively_used); + type_t type = type_t::none; + while (!waiters.empty()) { + auto& waiter = waiters.front(); + if (type == type_t::exclusive) { + break; + } if (type == type_t::none) { + type = waiter.type; + } else if (type != waiter.type) { + // to be woken in the next batch + break; + } + switch (type) { + case type_t::read: + ++readers; + break; + case type_t::write: + ++writers; + break; + case type_t::exclusive: + exclusively_used = true; + break; + default: + assert(0); + } + waiter.pr.set_value(); + waiters.pop_front(); + } +} diff --git a/src/crimson/common/tri_mutex.h b/src/crimson/common/tri_mutex.h new file mode 100644 index 000000000..127573b3a --- /dev/null +++ b/src/crimson/common/tri_mutex.h @@ -0,0 +1,156 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <seastar/core/future.hh> +#include <seastar/core/circular_buffer.hh> + +class read_lock { +public: + seastar::future<> lock(); + void unlock(); +}; + +class write_lock { +public: + seastar::future<> lock(); + void unlock(); +}; + +class excl_lock { +public: + seastar::future<> lock(); + void unlock(); +}; + +// promote from read to excl +class excl_lock_from_read { +public: + seastar::future<> lock(); + void unlock(); +}; + +// promote from write to excl +class excl_lock_from_write { +public: + seastar::future<> lock(); + void unlock(); +}; + +// promote from excl to excl +class excl_lock_from_excl { +public: + seastar::future<> lock(); + void unlock(); +}; + +/// shared/exclusive mutual exclusion +/// +/// this lock design uses reader and writer is entirely and completely +/// independent of the conventional reader/writer lock usage. Here, what we +/// mean is that we can pipeline reads, and we can pipeline writes, but we +/// cannot allow a read while writes are in progress or a write while reads are +/// in progress. Any rmw operation is therefore exclusive. +/// +/// tri_mutex is based on seastar::shared_mutex, but instead of two kinds of +/// waiters, tri_mutex keeps track of three kinds of lock users: +/// - readers +/// - writers +/// - exclusive users +class tri_mutex : private read_lock, + write_lock, + excl_lock, + excl_lock_from_read, + excl_lock_from_write, + excl_lock_from_excl +{ +public: + tri_mutex() = default; + ~tri_mutex(); + + read_lock& for_read() { + return *this; + } + write_lock& for_write() { + return *this; + } + excl_lock& for_excl() { + return *this; + } + excl_lock_from_read& excl_from_read() { + return *this; + } + excl_lock_from_write& excl_from_write() { + return *this; + } + excl_lock_from_write& excl_from_excl() { + return *this; + } + + // for shared readers + seastar::future<> lock_for_read(); + bool try_lock_for_read() noexcept; + void unlock_for_read(); + void promote_from_read(); + void demote_to_read(); + unsigned get_readers() const { + return readers; + } + + // for shared writers + seastar::future<> lock_for_write(bool greedy); + bool try_lock_for_write(bool greedy) noexcept; + void unlock_for_write(); + void promote_from_write(); + void demote_to_write(); + unsigned get_writers() const { + return writers; + } + + // for exclusive users + seastar::future<> lock_for_excl(); + bool try_lock_for_excl() noexcept; + void unlock_for_excl(); + bool is_excl_acquired() const { + return exclusively_used; + } + + bool is_acquired() const; + + /// pass the provided exception to any waiting waiters + template<typename Exception> + void abort(Exception ex) { + while (!waiters.empty()) { + auto& waiter = waiters.front(); + waiter.pr.set_exception(std::make_exception_ptr(ex)); + waiters.pop_front(); + } + } + +private: + void wake(); + unsigned readers = 0; + unsigned writers = 0; + bool exclusively_used = false; + enum class type_t : uint8_t { + read, + write, + exclusive, + none, + }; + struct waiter_t { + waiter_t(seastar::promise<>&& pr, type_t type) + : pr(std::move(pr)), type(type) + {} + seastar::promise<> pr; + type_t type; + }; + seastar::circular_buffer<waiter_t> waiters; + friend class read_lock; + friend class write_lock; + friend class excl_lock; + friend class excl_lock_from_read; + friend class excl_lock_from_write; + friend class excl_lock_from_excl; +}; diff --git a/src/crimson/common/type_helpers.h b/src/crimson/common/type_helpers.h new file mode 100644 index 000000000..4c606581f --- /dev/null +++ b/src/crimson/common/type_helpers.h @@ -0,0 +1,8 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "boost/intrusive_ptr.hpp" + +template<typename T> using Ref = boost::intrusive_ptr<T>; |