diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
6 files changed, 558 insertions, 0 deletions
diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc new file mode 100644 index 000000000..bd22d4b67 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.cc @@ -0,0 +1,35 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#include "node_extent_manager.h" + +#include "node_extent_manager/dummy.h" +#include "node_extent_manager/seastore.h" +#include "stages/node_stage_layout.h" + +namespace crimson::os::seastore::onode { + +std::pair<node_type_t, field_type_t> NodeExtent::get_types() const { + const auto header = reinterpret_cast<const node_header_t*>(get_read()); + auto node_type = header->get_node_type(); + auto field_type = header->get_field_type(); + if (!field_type.has_value()) { + throw std::runtime_error("load failed: bad field type"); + } + return {node_type, *field_type}; +} + +NodeExtentManagerURef NodeExtentManager::create_dummy(bool is_sync) { + if (is_sync) { + return NodeExtentManagerURef(new DummyNodeExtentManager<true>()); + } else { + return NodeExtentManagerURef(new DummyNodeExtentManager<false>()); + } +} + +NodeExtentManagerURef NodeExtentManager::create_seastore( + TransactionManager& tm, laddr_t min_laddr) { + return NodeExtentManagerURef(new SeastoreNodeExtentManager(tm, min_laddr)); +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h new file mode 100644 index 000000000..77b230e03 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h @@ -0,0 +1,86 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/common/type_helpers.h" +#include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/transaction_manager.h" + +#include "fwd.h" +#include "super.h" +#include "node_extent_mutable.h" +#include "node_types.h" + +/** + * node_extent_manager.h + * + * Contains general interfaces for different backends (Dummy and Seastore). + */ + +namespace crimson::os::seastore::onode { + +using crimson::os::seastore::LogicalCachedExtent; +class NodeExtent : public LogicalCachedExtent { + public: + virtual ~NodeExtent() = default; + std::pair<node_type_t, field_type_t> get_types() const; + const char* get_read() const { + return get_bptr().c_str(); + } + NodeExtentMutable get_mutable() { + assert(is_pending()); + return do_get_mutable(); + } + + virtual DeltaRecorder* get_recorder() const = 0; + virtual NodeExtentRef mutate(context_t, DeltaRecorderURef&&) = 0; + + protected: + template <typename... T> + NodeExtent(T&&... t) : LogicalCachedExtent(std::forward<T>(t)...) {} + + NodeExtentMutable do_get_mutable() { + return NodeExtentMutable(*this); + } + + /** + * Abstracted interfaces to implement: + * - CacheExtent::duplicate_for_write() -> CachedExtentRef + * - CacheExtent::get_type() -> extent_types_t + * - CacheExtent::get_delta() -> ceph::bufferlist + * - LogicalCachedExtent::apply_delta(const ceph::bufferlist) -> void + */ + + private: + friend class NodeExtentMutable; +}; + +using crimson::os::seastore::TransactionManager; +class NodeExtentManager { + public: + virtual ~NodeExtentManager() = default; + using tm_ertr = crimson::errorator< + crimson::ct_error::input_output_error, + crimson::ct_error::invarg, + crimson::ct_error::enoent, + crimson::ct_error::erange>; + template <class ValueT=void> + using tm_future = tm_ertr::future<ValueT>; + + virtual bool is_read_isolated() const = 0; + virtual tm_future<NodeExtentRef> read_extent( + Transaction&, laddr_t, extent_len_t) = 0; + virtual tm_future<NodeExtentRef> alloc_extent(Transaction&, extent_len_t) = 0; + virtual tm_future<Super::URef> get_super(Transaction&, RootNodeTracker&) = 0; + virtual std::ostream& print(std::ostream& os) const = 0; + + static NodeExtentManagerURef create_dummy(bool is_sync); + static NodeExtentManagerURef create_seastore( + TransactionManager& tm, laddr_t min_laddr = L_ADDR_MIN); +}; +inline std::ostream& operator<<(std::ostream& os, const NodeExtentManager& nm) { + return nm.print(os); +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h new file mode 100644 index 000000000..830ea4a7d --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h @@ -0,0 +1,156 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <chrono> +#include <seastar/core/sleep.hh> + +#include "include/buffer_raw.h" + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h" + +/** + * dummy.h + * + * Dummy backend implementations for test purposes. + */ + +namespace crimson::os::seastore::onode { + +class DummySuper final: public Super { + public: + DummySuper(Transaction& t, RootNodeTracker& tracker, laddr_t* p_root_laddr) + : Super(t, tracker), p_root_laddr{p_root_laddr} {} + ~DummySuper() override = default; + protected: + laddr_t get_root_laddr() const override { return *p_root_laddr; } + void write_root_laddr(context_t, laddr_t addr) override { + logger().info("OTree::Dummy: update root {:#x} ...", addr); + *p_root_laddr = addr; + } + private: + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + laddr_t* p_root_laddr; +}; + +class DummyNodeExtent final: public NodeExtent { + public: + DummyNodeExtent(ceph::bufferptr &&ptr) : NodeExtent(std::move(ptr)) { + state = extent_state_t::INITIAL_WRITE_PENDING; + } + ~DummyNodeExtent() override = default; + protected: + NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override { + ceph_abort("impossible path"); } + DeltaRecorder* get_recorder() const override { + return nullptr; } + CachedExtentRef duplicate_for_write() override { + ceph_abort("impossible path"); } + extent_types_t get_type() const override { + return extent_types_t::TEST_BLOCK; } + ceph::bufferlist get_delta() override { + ceph_abort("impossible path"); } + void apply_delta(const ceph::bufferlist&) override { + ceph_abort("impossible path"); } +}; + +template <bool SYNC> +class DummyNodeExtentManager final: public NodeExtentManager { + static constexpr size_t ALIGNMENT = 4096; + public: + ~DummyNodeExtentManager() override = default; + protected: + bool is_read_isolated() const override { return false; } + + tm_future<NodeExtentRef> read_extent( + Transaction& t, laddr_t addr, extent_len_t len) override { + logger().trace("OTree::Dummy: reading {}B at {:#x} ...", len, addr); + if constexpr (SYNC) { + return read_extent_sync(t, addr, len); + } else { + using namespace std::chrono_literals; + return seastar::sleep(1us).then([this, &t, addr, len] { + return read_extent_sync(t, addr, len); + }); + } + } + + tm_future<NodeExtentRef> alloc_extent( + Transaction& t, extent_len_t len) override { + logger().trace("OTree::Dummy: allocating {}B ...", len); + if constexpr (SYNC) { + return alloc_extent_sync(t, len); + } else { + using namespace std::chrono_literals; + return seastar::sleep(1us).then([this, &t, len] { + return alloc_extent_sync(t, len); + }); + } + } + + tm_future<Super::URef> get_super( + Transaction& t, RootNodeTracker& tracker) override { + logger().trace("OTree::Dummy: get root ..."); + if constexpr (SYNC) { + return get_super_sync(t, tracker); + } else { + using namespace std::chrono_literals; + return seastar::sleep(1us).then([this, &t, &tracker] { + return get_super_sync(t, tracker); + }); + } + } + + std::ostream& print(std::ostream& os) const override { + return os << "DummyNodeExtentManager(sync=" << SYNC << ")"; + } + + private: + tm_future<NodeExtentRef> read_extent_sync( + Transaction& t, laddr_t addr, extent_len_t len) { + auto iter = allocate_map.find(addr); + assert(iter != allocate_map.end()); + auto extent = iter->second; + logger().trace("OTree::Dummy: read {}B at {:#x}", + extent->get_length(), extent->get_laddr()); + assert(extent->get_laddr() == addr); + assert(extent->get_length() == len); + return tm_ertr::make_ready_future<NodeExtentRef>(extent); + } + + tm_future<NodeExtentRef> alloc_extent_sync( + Transaction& t, extent_len_t len) { + assert(len % ALIGNMENT == 0); + auto r = ceph::buffer::create_aligned(len, ALIGNMENT); + auto addr = reinterpret_cast<laddr_t>(r->get_data()); + auto bp = ceph::bufferptr(std::move(r)); + auto extent = Ref<DummyNodeExtent>(new DummyNodeExtent(std::move(bp))); + extent->set_laddr(addr); + assert(allocate_map.find(extent->get_laddr()) == allocate_map.end()); + allocate_map.insert({extent->get_laddr(), extent}); + logger().debug("OTree::Dummy: allocated {}B at {:#x}", + extent->get_length(), extent->get_laddr()); + assert(extent->get_length() == len); + return tm_ertr::make_ready_future<NodeExtentRef>(extent); + } + + tm_future<Super::URef> get_super_sync( + Transaction& t, RootNodeTracker& tracker) { + logger().debug("OTree::Dummy: got root {:#x}", root_laddr); + return tm_ertr::make_ready_future<Super::URef>( + Super::URef(new DummySuper(t, tracker, &root_laddr))); + } + + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + + std::map<laddr_t, Ref<DummyNodeExtent>> allocate_map; + laddr_t root_laddr = L_ADDR_NULL; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc new file mode 100644 index 000000000..8d88485bf --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.cc @@ -0,0 +1,88 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "seastore.h" + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/stages/node_stage_layout.h" + +namespace { + +seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); +} + +} + +namespace crimson::os::seastore::onode { + +static DeltaRecorderURef create_recorder( + node_type_t node_type, field_type_t field_type) { + if (node_type == node_type_t::LEAF) { + if (field_type == field_type_t::N0) { + return DeltaRecorderT<node_fields_0_t, node_type_t::LEAF>::create(); + } else if (field_type == field_type_t::N1) { + return DeltaRecorderT<node_fields_1_t, node_type_t::LEAF>::create(); + } else if (field_type == field_type_t::N2) { + return DeltaRecorderT<node_fields_2_t, node_type_t::LEAF>::create(); + } else if (field_type == field_type_t::N3) { + return DeltaRecorderT<leaf_fields_3_t, node_type_t::LEAF>::create(); + } else { + ceph_abort("impossible path"); + } + } else if (node_type == node_type_t::INTERNAL) { + if (field_type == field_type_t::N0) { + return DeltaRecorderT<node_fields_0_t, node_type_t::INTERNAL>::create(); + } else if (field_type == field_type_t::N1) { + return DeltaRecorderT<node_fields_1_t, node_type_t::INTERNAL>::create(); + } else if (field_type == field_type_t::N2) { + return DeltaRecorderT<node_fields_2_t, node_type_t::INTERNAL>::create(); + } else if (field_type == field_type_t::N3) { + return DeltaRecorderT<internal_fields_3_t, node_type_t::INTERNAL>::create(); + } else { + ceph_abort("impossible path"); + } + } else { + ceph_abort("impossible path"); + } +} + +void SeastoreSuper::write_root_laddr(context_t c, laddr_t addr) { + logger().info("OTree::Seastore: update root {:#x} ...", addr); + root_addr = addr; + auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm); + nm->get_tm().write_onode_root(c.t, addr); +} + +NodeExtentRef SeastoreNodeExtent::mutate( + context_t c, DeltaRecorderURef&& _recorder) { + logger().debug("OTree::Seastore: mutate {:#x} ...", get_laddr()); + auto nm = static_cast<SeastoreNodeExtentManager*>(&c.nm); + auto extent = nm->get_tm().get_mutable_extent(c.t, this); + auto ret = extent->cast<SeastoreNodeExtent>(); + assert(!ret->recorder || ret->recorder->is_empty()); + ret->recorder = std::move(_recorder); + return ret; +} + +void SeastoreNodeExtent::apply_delta(const ceph::bufferlist& bl) { + logger().debug("OTree::Seastore: replay {:#x} ...", get_laddr()); + if (!recorder) { + auto [node_type, field_type] = get_types(); + recorder = create_recorder(node_type, field_type); + } else { +#ifndef NDEBUG + auto [node_type, field_type] = get_types(); + assert(recorder->node_type() == node_type); + assert(recorder->field_type() == field_type); +#endif + } + assert(is_clean()); + auto node = do_get_mutable(); + auto p = bl.cbegin(); + while (p != bl.end()) { + recorder->apply_delta(p, node); + } +} + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h new file mode 100644 index 000000000..f80b99fab --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h @@ -0,0 +1,126 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/common/log.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h" + +/** + * seastore.h + * + * Seastore backend implementations. + */ + +namespace crimson::os::seastore::onode { + +class SeastoreSuper final: public Super { + public: + SeastoreSuper(Transaction& t, RootNodeTracker& tracker, + laddr_t root_addr, TransactionManager& tm) + : Super(t, tracker), root_addr{root_addr}, tm{tm} {} + ~SeastoreSuper() override = default; + protected: + laddr_t get_root_laddr() const override { + return root_addr; + } + void write_root_laddr(context_t c, laddr_t addr) override; + private: + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + laddr_t root_addr; + TransactionManager& tm; +}; + +class SeastoreNodeExtent final: public NodeExtent { + public: + SeastoreNodeExtent(ceph::bufferptr &&ptr) + : NodeExtent(std::move(ptr)) {} + SeastoreNodeExtent(const SeastoreNodeExtent& other) + : NodeExtent(other) {} + ~SeastoreNodeExtent() override = default; + protected: + NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override; + + DeltaRecorder* get_recorder() const override { + return recorder.get(); + } + + CachedExtentRef duplicate_for_write() override { + return CachedExtentRef(new SeastoreNodeExtent(*this)); + } + extent_types_t get_type() const override { + return extent_types_t::ONODE_BLOCK_STAGED; + } + ceph::bufferlist get_delta() override { + assert(recorder); + return recorder->get_delta(); + } + void apply_delta(const ceph::bufferlist&) override; + private: + DeltaRecorderURef recorder; +}; + +class SeastoreNodeExtentManager final: public NodeExtentManager { + public: + SeastoreNodeExtentManager(TransactionManager& tm, laddr_t min) + : tm{tm}, addr_min{min} {}; + ~SeastoreNodeExtentManager() override = default; + TransactionManager& get_tm() { return tm; } + protected: + bool is_read_isolated() const override { return true; } + + tm_future<NodeExtentRef> read_extent( + Transaction& t, laddr_t addr, extent_len_t len) override { + logger().debug("OTree::Seastore: reading {}B at {:#x} ...", len, addr); + return tm.read_extents<SeastoreNodeExtent>(t, addr, len + ).safe_then([addr, len](auto&& extents) { + assert(extents.size() == 1); + [[maybe_unused]] auto [laddr, e] = extents.front(); + logger().trace("OTree::Seastore: read {}B at {:#x}", + e->get_length(), e->get_laddr()); + assert(e->get_laddr() == addr); + assert(e->get_length() == len); + std::ignore = addr; + std::ignore = len; + return NodeExtentRef(e); + }); + } + + tm_future<NodeExtentRef> alloc_extent( + Transaction& t, extent_len_t len) override { + logger().debug("OTree::Seastore: allocating {}B ...", len); + return tm.alloc_extent<SeastoreNodeExtent>(t, addr_min, len + ).safe_then([len](auto extent) { + logger().debug("OTree::Seastore: allocated {}B at {:#x}", + extent->get_length(), extent->get_laddr()); + assert(extent->get_length() == len); + std::ignore = len; + return NodeExtentRef(extent); + }); + } + + tm_future<Super::URef> get_super( + Transaction& t, RootNodeTracker& tracker) override { + logger().trace("OTree::Seastore: get root ..."); + return tm.read_onode_root(t).safe_then([this, &t, &tracker](auto root_addr) { + logger().debug("OTree::Seastore: got root {:#x}", root_addr); + return Super::URef(new SeastoreSuper(t, tracker, root_addr, tm)); + }); + } + + std::ostream& print(std::ostream& os) const override { + return os << "SeastoreNodeExtentManager"; + } + + private: + static seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } + TransactionManager& tm; + const laddr_t addr_min; +}; + +} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/test_replay.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/test_replay.h new file mode 100644 index 000000000..240c88932 --- /dev/null +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/test_replay.h @@ -0,0 +1,67 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/onode_manager/staged-fltree/node_delta_recorder.h" +#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h" + +/** test_replay.h + * + * A special version of NodeExtent to help verify delta encode, decode and + * replay in recorder_t under debug build. + */ + +namespace crimson::os::seastore::onode { + +class TestReplayExtent final: public NodeExtent { + public: + using Ref = crimson::os::seastore::TCachedExtentRef<TestReplayExtent>; + + void prepare_replay(NodeExtentRef from_extent) { + assert(get_length() == from_extent->get_length()); + auto mut = do_get_mutable(); + std::memcpy(mut.get_write(), from_extent->get_read(), get_length()); + } + + void replay_and_verify(NodeExtentRef replayed_extent) { + assert(get_length() == replayed_extent->get_length()); + auto mut = do_get_mutable(); + auto bl = recorder->get_delta(); + assert(bl.length()); + auto p = bl.cbegin(); + recorder->apply_delta(p, mut); + assert(p == bl.end()); + auto cmp = std::memcmp(get_read(), replayed_extent->get_read(), get_length()); + ceph_assert(cmp == 0 && "replay mismatch!"); + } + + static Ref create(extent_len_t length, DeltaRecorderURef&& recorder) { + auto r = ceph::buffer::create_aligned(length, 4096); + auto bp = ceph::bufferptr(std::move(r)); + return new TestReplayExtent(std::move(bp), std::move(recorder)); + } + + protected: + NodeExtentRef mutate(context_t, DeltaRecorderURef&&) override { + ceph_abort("impossible path"); } + DeltaRecorder* get_recorder() const override { + ceph_abort("impossible path"); } + CachedExtentRef duplicate_for_write() override { + ceph_abort("impossible path"); } + extent_types_t get_type() const override { + return extent_types_t::TEST_BLOCK; } + ceph::bufferlist get_delta() override { + ceph_abort("impossible path"); } + void apply_delta(const ceph::bufferlist&) override { + ceph_abort("impossible path"); } + + private: + TestReplayExtent(ceph::bufferptr&& ptr, DeltaRecorderURef&& recorder) + : NodeExtent(std::move(ptr)), recorder(std::move(recorder)) { + state = extent_state_t::MUTATION_PENDING; + } + DeltaRecorderURef recorder; +}; + +} |