summaryrefslogtreecommitdiffstats
path: root/src/crimson/os/seastore/extentmap_manager/btree
diff options
context:
space:
mode:
Diffstat (limited to 'src/crimson/os/seastore/extentmap_manager/btree')
-rw-r--r--src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.cc118
-rw-r--r--src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h64
-rw-r--r--src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h145
-rw-r--r--src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.cc373
-rw-r--r--src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h281
5 files changed, 981 insertions, 0 deletions
diff --git a/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.cc b/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.cc
new file mode 100644
index 000000000..f7609d3e8
--- /dev/null
+++ b/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.cc
@@ -0,0 +1,118 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include <sys/mman.h>
+#include <string.h>
+
+#include "crimson/common/log.h"
+
+#include "include/buffer.h"
+#include "crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h"
+#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h"
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+ }
+}
+
+namespace crimson::os::seastore::extentmap_manager {
+
+BtreeExtentMapManager::BtreeExtentMapManager(
+ TransactionManager &tm)
+ : tm(tm) {}
+
+BtreeExtentMapManager::initialize_extmap_ret
+BtreeExtentMapManager::initialize_extmap(Transaction &t)
+{
+
+ logger().debug("{}", __func__);
+ return tm.alloc_extent<ExtMapLeafNode>(t, L_ADDR_MIN, EXTMAP_BLOCK_SIZE)
+ .safe_then([](auto&& root_extent) {
+ root_extent->set_size(0);
+ extmap_node_meta_t meta{1};
+ root_extent->set_meta(meta);
+ extmap_root_t extmap_root = extmap_root_t(1, root_extent->get_laddr());
+ return initialize_extmap_ertr::make_ready_future<extmap_root_t>(extmap_root);
+ });
+}
+
+BtreeExtentMapManager::get_root_ret
+BtreeExtentMapManager::get_extmap_root(const extmap_root_t &extmap_root, Transaction &t)
+{
+ assert(extmap_root.extmap_root_laddr != L_ADDR_NULL);
+ laddr_t laddr = extmap_root.extmap_root_laddr;
+ return extmap_load_extent(get_ext_context(t), laddr, extmap_root.depth);
+}
+
+BtreeExtentMapManager::find_lextent_ret
+BtreeExtentMapManager::find_lextent(const extmap_root_t &extmap_root, Transaction &t,
+ objaddr_t lo, extent_len_t len)
+{
+ logger().debug("{}: {}, {}", __func__, lo, len);
+ return get_extmap_root(extmap_root, t).safe_then([this, &t, lo, len](auto&& extent) {
+ return extent->find_lextent(get_ext_context(t), lo, len);
+ }).safe_then([](auto &&e) {
+ logger().debug("{}: found_lextent {}", __func__, e);
+ return find_lextent_ret(
+ find_lextent_ertr::ready_future_marker{},
+ std::move(e));
+ });
+
+}
+
+BtreeExtentMapManager::add_lextent_ret
+BtreeExtentMapManager::add_lextent(extmap_root_t &extmap_root, Transaction &t,
+ objaddr_t lo, lext_map_val_t val)
+{
+ logger().debug("{}: {}, {}, {}", __func__, lo, val.laddr, val.length);
+ return get_extmap_root(extmap_root, t).safe_then([this, &extmap_root, &t, lo, val](auto &&root) {
+ return insert_lextent(extmap_root, t, root, lo, val);
+ }).safe_then([](auto ret) {
+ logger().debug("{}: {}", __func__, ret);
+ return add_lextent_ret(
+ add_lextent_ertr::ready_future_marker{},
+ std::move(ret));
+ });
+
+}
+
+BtreeExtentMapManager::insert_lextent_ret
+BtreeExtentMapManager::insert_lextent(extmap_root_t &extmap_root, Transaction &t,
+ ExtMapNodeRef root, objaddr_t logical_offset, lext_map_val_t val)
+{
+ auto split = insert_lextent_ertr::make_ready_future<ExtMapNodeRef>(root);
+ if (root->at_max_capacity()) {
+ logger().debug("{}::splitting root {}", __func__, *root);
+ split = root->extmap_alloc_extent<ExtMapInnerNode>(get_ext_context(t), EXTMAP_BLOCK_SIZE)
+ .safe_then([this, &extmap_root, root, &t, logical_offset](auto&& nroot) {
+ extmap_node_meta_t meta{root->get_node_meta().depth + 1};
+ nroot->set_meta(meta);
+ nroot->journal_insert(nroot->begin(), OBJ_ADDR_MIN,
+ root->get_laddr(), nullptr);
+ extmap_root.extmap_root_laddr = nroot->get_laddr();
+ extmap_root.depth = root->get_node_meta().depth + 1;
+ extmap_root.state = extmap_root_state_t::MUTATED;
+ return nroot->split_entry(get_ext_context(t), logical_offset, nroot->begin(), root);
+ });
+ }
+ return split.safe_then([this, &t, logical_offset, val](ExtMapNodeRef node) {
+ return node->insert(get_ext_context(t), logical_offset, val);
+ });
+}
+
+BtreeExtentMapManager::rm_lextent_ret
+BtreeExtentMapManager::rm_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val)
+{
+ logger().debug("{}: {}, {}, {}", __func__, lo, val.laddr, val.length);
+ return get_extmap_root(extmap_root, t).safe_then([this, &t, lo, val](auto extent) {
+ return extent->rm_lextent(get_ext_context(t), lo, val);
+ }).safe_then([](auto removed) {
+ logger().debug("{}: {}", __func__, removed);
+ return rm_lextent_ret(
+ rm_lextent_ertr::ready_future_marker{},
+ removed);
+ });
+}
+
+
+}
diff --git a/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h b/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h
new file mode 100644
index 000000000..db676f41d
--- /dev/null
+++ b/src/crimson/os/seastore/extentmap_manager/btree/btree_extentmap_manager.h
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <iostream>
+
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+#include <seastar/core/future.hh>
+
+#include "include/ceph_assert.h"
+#include "crimson/osd/exceptions.h"
+
+#include "crimson/os/seastore/extentmap_manager.h"
+#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/transaction_manager.h"
+
+namespace crimson::os::seastore::extentmap_manager {
+/**
+ * BtreeExtentMapManager
+ *
+ * Uses a btree to track :
+ * objaddr_t -> laddr_t mapping for each onode extentmap
+ */
+
+class BtreeExtentMapManager : public ExtentMapManager {
+ TransactionManager &tm;
+
+ ext_context_t get_ext_context(Transaction &t) {
+ return ext_context_t{tm,t};
+ }
+
+ /* get_extmap_root
+ *
+ * load extent map tree root node
+ */
+ using get_root_ertr = TransactionManager::read_extent_ertr;
+ using get_root_ret = get_root_ertr::future<ExtMapNodeRef>;
+ get_root_ret get_extmap_root(const extmap_root_t &extmap_root, Transaction &t);
+
+ using insert_lextent_ertr = TransactionManager::read_extent_ertr;
+ using insert_lextent_ret = insert_lextent_ertr::future<extent_mapping_t >;
+ insert_lextent_ret insert_lextent(extmap_root_t &extmap_root, Transaction &t,
+ ExtMapNodeRef extent, objaddr_t lo,
+ lext_map_val_t val);
+
+public:
+ explicit BtreeExtentMapManager(TransactionManager &tm);
+
+ initialize_extmap_ret initialize_extmap(Transaction &t) final;
+
+ find_lextent_ret find_lextent(const extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, extent_len_t len) final;
+
+ add_lextent_ret add_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) final;
+
+ rm_lextent_ret rm_lextent(extmap_root_t &extmap_root, Transaction &t, objaddr_t lo, lext_map_val_t val) final;
+
+
+};
+using BtreeExtentMapManagerRef = std::unique_ptr<BtreeExtentMapManager>;
+
+}
diff --git a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h
new file mode 100644
index 000000000..3937bd049
--- /dev/null
+++ b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h
@@ -0,0 +1,145 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+
+#pragma once
+
+#include <boost/iterator/counting_iterator.hpp>
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/extentmap_manager.h"
+
+namespace crimson::os::seastore::extentmap_manager{
+
+struct ext_context_t {
+ TransactionManager &tm;
+ Transaction &t;
+};
+
+struct extmap_node_meta_t {
+ depth_t depth = 0;
+
+ std::pair<extmap_node_meta_t, extmap_node_meta_t> split_into(objaddr_t pivot) const {
+ return std::make_pair(
+ extmap_node_meta_t{depth},
+ extmap_node_meta_t{depth});
+ }
+
+ static extmap_node_meta_t merge_from(
+ const extmap_node_meta_t &lhs, const extmap_node_meta_t &rhs) {
+ assert(lhs.depth == rhs.depth);
+ return extmap_node_meta_t{lhs.depth};
+ }
+
+ static std::pair<extmap_node_meta_t, extmap_node_meta_t>
+ rebalance(const extmap_node_meta_t &lhs, const extmap_node_meta_t &rhs, laddr_t pivot) {
+ assert(lhs.depth == rhs.depth);
+ return std::make_pair(
+ extmap_node_meta_t{lhs.depth},
+ extmap_node_meta_t{lhs.depth});
+ }
+};
+
+struct ExtMapNode : LogicalCachedExtent {
+ using ExtMapNodeRef = TCachedExtentRef<ExtMapNode>;
+
+ ExtMapNode(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {}
+ ExtMapNode(const ExtMapNode &other)
+ : LogicalCachedExtent(other) {}
+
+ using find_lextent_ertr = ExtentMapManager::find_lextent_ertr;
+ using find_lextent_ret = ExtentMapManager::find_lextent_ret;
+ virtual find_lextent_ret find_lextent(ext_context_t ec,
+ objaddr_t lo, extent_len_t len) = 0;
+
+ using insert_ertr = TransactionManager::read_extent_ertr;
+ using insert_ret = insert_ertr::future<extent_mapping_t>;
+ virtual insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) = 0;
+
+ using rm_lextent_ertr = TransactionManager::read_extent_ertr;
+ using rm_lextent_ret = rm_lextent_ertr::future<bool>;
+ virtual rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) = 0;
+
+ using split_children_ertr = TransactionManager::alloc_extent_ertr;
+ using split_children_ret = split_children_ertr::future
+ <std::tuple<ExtMapNodeRef, ExtMapNodeRef, uint32_t>>;
+ virtual split_children_ret make_split_children(ext_context_t ec) = 0;
+
+ using full_merge_ertr = TransactionManager::alloc_extent_ertr;
+ using full_merge_ret = full_merge_ertr::future<ExtMapNodeRef>;
+ virtual full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) = 0;
+
+ using make_balanced_ertr = TransactionManager::alloc_extent_ertr;
+ using make_balanced_ret = make_balanced_ertr::future
+ <std::tuple<ExtMapNodeRef, ExtMapNodeRef, uint32_t>>;
+ virtual make_balanced_ret
+ make_balanced(ext_context_t ec, ExtMapNodeRef right, bool prefer_left) = 0;
+
+ virtual extmap_node_meta_t get_node_meta() const = 0;
+
+ virtual bool at_max_capacity() const = 0;
+ virtual bool at_min_capacity() const = 0;
+ virtual unsigned get_node_size() const = 0;
+ virtual ~ExtMapNode() = default;
+
+ using alloc_ertr = TransactionManager::alloc_extent_ertr;
+ template<class T>
+ alloc_ertr::future<TCachedExtentRef<T>>
+ extmap_alloc_extent(ext_context_t ec, extent_len_t len) {
+ return ec.tm.alloc_extent<T>(ec.t, L_ADDR_MIN, len).safe_then(
+ [](auto&& extent) {
+ return alloc_ertr::make_ready_future<TCachedExtentRef<T>>(std::move(extent));
+ });
+ }
+
+ template<class T>
+ alloc_ertr::future<std::pair<TCachedExtentRef<T>, TCachedExtentRef<T>>>
+ extmap_alloc_2extents(ext_context_t ec, extent_len_t len) {
+ return seastar::do_with(std::pair<TCachedExtentRef<T>, TCachedExtentRef<T>>(),
+ [ec, len] (auto &extents) {
+ return crimson::do_for_each(boost::make_counting_iterator(0),
+ boost::make_counting_iterator(2),
+ [ec, len, &extents] (auto i) {
+ return ec.tm.alloc_extent<T>(ec.t, L_ADDR_MIN, len).safe_then(
+ [i, &extents](auto &&node) {
+ if (i == 0)
+ extents.first = node;
+ if (i == 1)
+ extents.second = node;
+ });
+ }).safe_then([&extents] {
+ return alloc_ertr::make_ready_future
+ <std::pair<TCachedExtentRef<T>, TCachedExtentRef<T>>>(std::move(extents));
+ });
+ });
+ }
+
+ using retire_ertr = crimson::errorator<
+ crimson::ct_error::enoent,
+ crimson::ct_error::input_output_error>;
+ using retire_ret = retire_ertr::future<std::list<unsigned>>;
+ retire_ret
+ extmap_retire_node(ext_context_t ec, std::list<laddr_t> dec_laddrs) {
+ return seastar::do_with(std::move(dec_laddrs), std::list<unsigned>(),
+ [ec] (auto &&dec_laddrs, auto &refcnt) {
+ return crimson::do_for_each(dec_laddrs.begin(), dec_laddrs.end(),
+ [ec, &refcnt] (auto &laddr) {
+ return ec.tm.dec_ref(ec.t, laddr).safe_then([&refcnt] (auto ref) {
+ refcnt.push_back(ref);
+ });
+ }).safe_then([&refcnt] {
+ return retire_ertr::make_ready_future<std::list<unsigned>>(std::move(refcnt));
+ });
+ });
+ }
+
+};
+
+using ExtMapNodeRef = ExtMapNode::ExtMapNodeRef;
+
+TransactionManager::read_extent_ertr::future<ExtMapNodeRef>
+extmap_load_extent(ext_context_t ec, laddr_t laddr, depth_t depth);
+
+}
diff --git a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.cc b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.cc
new file mode 100644
index 000000000..7bf8680a5
--- /dev/null
+++ b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.cc
@@ -0,0 +1,373 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/mman.h>
+#include <string.h>
+
+#include <memory>
+#include <string.h>
+
+#include "include/buffer.h"
+#include "include/byteorder.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h"
+#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h"
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+ }
+}
+
+namespace crimson::os::seastore::extentmap_manager {
+
+std::ostream &ExtMapInnerNode::print_detail_l(std::ostream &out) const
+{
+ return out << ", size=" << get_size()
+ << ", depth=" << get_meta().depth;
+}
+
+ExtMapInnerNode::find_lextent_ret
+ExtMapInnerNode::find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len)
+{
+ auto [begin, end] = bound(lo, lo + len);
+ auto result_up = std::make_unique<extent_map_list_t>();
+ auto &result = *result_up;
+ return crimson::do_for_each(
+ std::move(begin),
+ std::move(end),
+ [this, ec, &result, lo, len](const auto &val) mutable {
+ return extmap_load_extent(ec, val.get_val(), get_meta().depth - 1).safe_then(
+ [ec, &result, lo, len](auto extent) mutable {
+ return extent->find_lextent(ec, lo, len).safe_then(
+ [&result](auto item_list) mutable {
+ result.splice(result.end(), item_list,
+ item_list.begin(), item_list.end());
+ });
+ });
+ }).safe_then([result=std::move(result_up)] {
+ return find_lextent_ret(
+ find_lextent_ertr::ready_future_marker{},
+ std::move(*result));
+ });
+}
+
+ExtMapInnerNode::insert_ret
+ExtMapInnerNode::insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val)
+{
+ auto insertion_pt = get_containing_child(lo);
+ assert(insertion_pt != end());
+ return extmap_load_extent(ec, insertion_pt->get_val(), get_meta().depth - 1).safe_then(
+ [this, ec, insertion_pt, lo, val=std::move(val)](auto extent) mutable {
+ return extent->at_max_capacity() ?
+ split_entry(ec, lo, insertion_pt, extent) :
+ insert_ertr::make_ready_future<ExtMapNodeRef>(std::move(extent));
+ }).safe_then([ec, lo, val=std::move(val)](ExtMapNodeRef extent) mutable {
+ return extent->insert(ec, lo, val);
+ });
+}
+
+ExtMapInnerNode::rm_lextent_ret
+ExtMapInnerNode::rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val)
+{
+ auto rm_pt = get_containing_child(lo);
+ return extmap_load_extent(ec, rm_pt->get_val(), get_meta().depth - 1).safe_then(
+ [this, ec, rm_pt, lo, val=std::move(val)](auto extent) mutable {
+ if (extent->at_min_capacity() && get_node_size() > 1) {
+ return merge_entry(ec, lo, rm_pt, extent);
+ } else {
+ return merge_entry_ertr::make_ready_future<ExtMapNodeRef>(std::move(extent));
+ }
+ }).safe_then([ec, lo, val](ExtMapNodeRef extent) mutable {
+ return extent->rm_lextent(ec, lo, val);
+ });
+}
+
+ExtMapInnerNode::split_children_ret
+ExtMapInnerNode::make_split_children(ext_context_t ec)
+{
+ logger().debug("{}: {}", "ExtMapInnerNode", __func__);
+ return extmap_alloc_2extents<ExtMapInnerNode>(ec, EXTMAP_BLOCK_SIZE)
+ .safe_then([this] (auto &&ext_pair) {
+ auto [left, right] = ext_pair;
+ return split_children_ret(
+ split_children_ertr::ready_future_marker{},
+ std::make_tuple(left, right, split_into(*left, *right)));
+ });
+}
+
+ExtMapInnerNode::full_merge_ret
+ExtMapInnerNode::make_full_merge(ext_context_t ec, ExtMapNodeRef right)
+{
+ logger().debug("{}: {}", "ExtMapInnerNode", __func__);
+ return extmap_alloc_extent<ExtMapInnerNode>(ec, EXTMAP_BLOCK_SIZE)
+ .safe_then([this, right] (auto &&replacement) {
+ replacement->merge_from(*this, *right->cast<ExtMapInnerNode>());
+ return full_merge_ret(
+ full_merge_ertr::ready_future_marker{},
+ std::move(replacement));
+ });
+}
+
+ExtMapInnerNode::make_balanced_ret
+ExtMapInnerNode::make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left)
+{
+ logger().debug("{}: {}", "ExtMapInnerNode", __func__);
+ ceph_assert(_right->get_type() == type);
+ return extmap_alloc_2extents<ExtMapInnerNode>(ec, EXTMAP_BLOCK_SIZE)
+ .safe_then([this, _right, prefer_left] (auto &&replacement_pair){
+ auto [replacement_left, replacement_right] = replacement_pair;
+ auto &right = *_right->cast<ExtMapInnerNode>();
+ return make_balanced_ret(
+ make_balanced_ertr::ready_future_marker{},
+ std::make_tuple(replacement_left, replacement_right,
+ balance_into_new_nodes(*this, right, prefer_left,
+ *replacement_left, *replacement_right)));
+ });
+}
+
+ExtMapInnerNode::split_entry_ret
+ExtMapInnerNode::split_entry(ext_context_t ec, objaddr_t lo,
+ internal_iterator_t iter, ExtMapNodeRef entry)
+{
+ logger().debug("{}: {}", "ExtMapInnerNode", __func__);
+ if (!is_pending()) {
+ auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast<ExtMapInnerNode>();
+ auto mut_iter = mut->iter_idx(iter->get_offset());
+ return mut->split_entry(ec, lo, mut_iter, entry);
+ }
+ ceph_assert(!at_max_capacity());
+ return entry->make_split_children(ec)
+ .safe_then([this, ec, lo, iter, entry] (auto tuple){
+ auto [left, right, pivot] = tuple;
+ journal_update(iter, left->get_laddr(), maybe_get_delta_buffer());
+ journal_insert(iter + 1, pivot, right->get_laddr(), maybe_get_delta_buffer());
+ logger().debug(
+ "ExtMapInnerNode::split_entry *this {} entry {} into left {} right {}",
+ *this, *entry, *left, *right);
+ //retire extent
+ return ec.tm.dec_ref(ec.t, entry->get_laddr())
+ .safe_then([lo, left = left, right = right, pivot = pivot] (auto ret) {
+ return split_entry_ertr::make_ready_future<ExtMapNodeRef>(
+ pivot > lo ? left : right);
+ });
+ });
+}
+
+ExtMapInnerNode::merge_entry_ret
+ExtMapInnerNode::merge_entry(ext_context_t ec, objaddr_t lo,
+ internal_iterator_t iter, ExtMapNodeRef entry)
+{
+ if (!is_pending()) {
+ auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast<ExtMapInnerNode>();
+ auto mut_iter = mut->iter_idx(iter->get_offset());
+ return mut->merge_entry(ec, lo, mut_iter, entry);
+ }
+ logger().debug("ExtMapInnerNode: merge_entry: {}, {}", *this, *entry);
+ auto is_left = (iter + 1) == end();
+ auto donor_iter = is_left ? iter - 1 : iter + 1;
+ return extmap_load_extent(ec, donor_iter->get_val(), get_meta().depth - 1)
+ .safe_then([this, ec, lo, iter, entry, donor_iter, is_left]
+ (auto &&donor) mutable {
+ auto [l, r] = is_left ?
+ std::make_pair(donor, entry) : std::make_pair(entry, donor);
+ auto [liter, riter] = is_left ?
+ std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
+ if (donor->at_min_capacity()) {
+ return l->make_full_merge(ec, r)
+ .safe_then([this, ec, entry, l = l, r = r, liter = liter, riter = riter]
+ (auto &&replacement){
+ journal_update(liter, replacement->get_laddr(), maybe_get_delta_buffer());
+ journal_remove(riter, maybe_get_delta_buffer());
+ //retire extent
+ std::list<laddr_t> dec_laddrs;
+ dec_laddrs.push_back(l->get_laddr());
+ dec_laddrs.push_back(r->get_laddr());
+ return extmap_retire_node(ec, dec_laddrs)
+ .safe_then([replacement] (auto &&ret) {
+ return merge_entry_ertr::make_ready_future<ExtMapNodeRef>(replacement);
+ });
+ });
+ } else {
+ logger().debug("ExtMapInnerNode::merge_entry balanced l {} r {}",
+ *l, *r);
+ return l->make_balanced(ec, r, !is_left)
+ .safe_then([this, ec, lo, entry, l = l, r = r, liter = liter, riter = riter]
+ (auto tuple) {
+ auto [replacement_l, replacement_r, pivot] = tuple;
+ journal_update(liter, replacement_l->get_laddr(), maybe_get_delta_buffer());
+ journal_replace(riter, pivot, replacement_r->get_laddr(),
+ maybe_get_delta_buffer());
+ // retire extent
+ std::list<laddr_t> dec_laddrs;
+ dec_laddrs.push_back(l->get_laddr());
+ dec_laddrs.push_back(r->get_laddr());
+ return extmap_retire_node(ec, dec_laddrs)
+ .safe_then([lo, pivot = pivot, replacement_l = replacement_l, replacement_r = replacement_r]
+ (auto &&ret) {
+ return merge_entry_ertr::make_ready_future<ExtMapNodeRef>(
+ lo >= pivot ? replacement_r : replacement_l);
+ });
+ });
+ }
+ });
+}
+
+
+ExtMapInnerNode::internal_iterator_t
+ExtMapInnerNode::get_containing_child(objaddr_t lo)
+{
+ // TODO: binary search
+ for (auto i = begin(); i != end(); ++i) {
+ if (i.contains(lo))
+ return i;
+ }
+ ceph_assert(0 == "invalid");
+ return end();
+}
+
+std::ostream &ExtMapLeafNode::print_detail_l(std::ostream &out) const
+{
+ return out << ", size=" << get_size()
+ << ", depth=" << get_meta().depth;
+}
+
+ExtMapLeafNode::find_lextent_ret
+ExtMapLeafNode::find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len)
+{
+ logger().debug(
+ "ExtMapLeafNode::find_lextent {}~{}", lo, len);
+ auto ret = extent_map_list_t();
+ auto [from, to] = get_leaf_entries(lo, len);
+ if (from == to && to != end())
+ ++to;
+ for (; from != to; ++from) {
+ auto val = (*from).get_val();
+ ret.emplace_back(
+ extent_mapping_t(
+ (*from).get_key(),
+ val.laddr,
+ val.length));
+ logger().debug("ExtMapLeafNode::find_lextent find {}~{}", lo, val.laddr);
+ }
+ return find_lextent_ertr::make_ready_future<extent_map_list_t>(
+ std::move(ret));
+}
+
+ExtMapLeafNode::insert_ret
+ExtMapLeafNode::insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val)
+{
+ ceph_assert(!at_max_capacity());
+ if (!is_pending()) {
+ auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast<ExtMapLeafNode>();
+ return mut->insert(ec, lo, val);
+ }
+ auto insert_pt = lower_bound(lo);
+ journal_insert(insert_pt, lo, val, maybe_get_delta_buffer());
+
+ logger().debug(
+ "ExtMapLeafNode::insert: inserted {}->{} {}",
+ insert_pt.get_key(),
+ insert_pt.get_val().laddr,
+ insert_pt.get_val().length);
+ return insert_ertr::make_ready_future<extent_mapping_t>(
+ extent_mapping_t(lo, val.laddr, val.length));
+}
+
+ExtMapLeafNode::rm_lextent_ret
+ExtMapLeafNode::rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val)
+{
+ if (!is_pending()) {
+ auto mut = ec.tm.get_mutable_extent(ec.t, this)->cast<ExtMapLeafNode>();
+ return mut->rm_lextent(ec, lo, val);
+ }
+
+ auto [rm_pt, rm_end] = get_leaf_entries(lo, val.length);
+ if (lo == rm_pt->get_key() && val.laddr == rm_pt->get_val().laddr
+ && val.length == rm_pt->get_val().length) {
+ journal_remove(rm_pt, maybe_get_delta_buffer());
+ logger().debug(
+ "ExtMapLeafNode::rm_lextent: removed {}->{} {}",
+ rm_pt.get_key(),
+ rm_pt.get_val().laddr,
+ rm_pt.get_val().length);
+ return rm_lextent_ertr::make_ready_future<bool>(true);
+ } else {
+ return rm_lextent_ertr::make_ready_future<bool>(false);
+ }
+}
+
+ExtMapLeafNode::split_children_ret
+ExtMapLeafNode::make_split_children(ext_context_t ec)
+{
+ logger().debug("{}: {}", "ExtMapLeafNode", __func__);
+ return extmap_alloc_2extents<ExtMapLeafNode>(ec, EXTMAP_BLOCK_SIZE)
+ .safe_then([this] (auto &&ext_pair) {
+ auto [left, right] = ext_pair;
+ return split_children_ret(
+ split_children_ertr::ready_future_marker{},
+ std::make_tuple(left, right, split_into(*left, *right)));
+ });
+}
+
+ExtMapLeafNode::full_merge_ret
+ExtMapLeafNode::make_full_merge(ext_context_t ec, ExtMapNodeRef right)
+{
+ logger().debug("{}: {}", "ExtMapLeafNode", __func__);
+ return extmap_alloc_extent<ExtMapLeafNode>(ec, EXTMAP_BLOCK_SIZE)
+ .safe_then([this, right] (auto &&replacement) {
+ replacement->merge_from(*this, *right->cast<ExtMapLeafNode>());
+ return full_merge_ret(
+ full_merge_ertr::ready_future_marker{},
+ std::move(replacement));
+ });
+}
+ExtMapLeafNode::make_balanced_ret
+ExtMapLeafNode::make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left)
+{
+ logger().debug("{}: {}", "ExtMapLeafNode", __func__);
+ ceph_assert(_right->get_type() == type);
+ return extmap_alloc_2extents<ExtMapLeafNode>(ec, EXTMAP_BLOCK_SIZE)
+ .safe_then([this, _right, prefer_left] (auto &&replacement_pair) {
+ auto [replacement_left, replacement_right] = replacement_pair;
+ auto &right = *_right->cast<ExtMapLeafNode>();
+ return make_balanced_ret(
+ make_balanced_ertr::ready_future_marker{},
+ std::make_tuple(
+ replacement_left, replacement_right,
+ balance_into_new_nodes(
+ *this, right, prefer_left,
+ *replacement_left, *replacement_right)));
+ });
+}
+
+
+std::pair<ExtMapLeafNode::internal_iterator_t, ExtMapLeafNode::internal_iterator_t>
+ExtMapLeafNode::get_leaf_entries(objaddr_t addr, extent_len_t len)
+{
+ return bound(addr, addr + len);
+}
+
+
+TransactionManager::read_extent_ertr::future<ExtMapNodeRef>
+extmap_load_extent(ext_context_t ec, laddr_t laddr, depth_t depth)
+{
+ ceph_assert(depth > 0);
+ if (depth > 1) {
+ return ec.tm.read_extents<ExtMapInnerNode>(ec.t, laddr, EXTMAP_BLOCK_SIZE).safe_then(
+ [](auto&& extents) {
+ assert(extents.size() == 1);
+ [[maybe_unused]] auto [laddr, e] = extents.front();
+ return TransactionManager::read_extent_ertr::make_ready_future<ExtMapNodeRef>(std::move(e));
+ });
+ } else {
+ return ec.tm.read_extents<ExtMapLeafNode>(ec.t, laddr, EXTMAP_BLOCK_SIZE).safe_then(
+ [](auto&& extents) {
+ assert(extents.size() == 1);
+ [[maybe_unused]] auto [laddr, e] = extents.front();
+ return TransactionManager::read_extent_ertr::make_ready_future<ExtMapNodeRef>(std::move(e));
+ });
+ }
+}
+
+}
diff --git a/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h
new file mode 100644
index 000000000..f5da8cdc2
--- /dev/null
+++ b/src/crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h
@@ -0,0 +1,281 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+#include "include/buffer.h"
+
+#include "crimson/common/fixed_kv_node_layout.h"
+#include "crimson/common/errorator.h"
+#include "crimson/os/seastore/extentmap_manager.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node.h"
+
+namespace crimson::os::seastore::extentmap_manager {
+
+struct extmap_node_meta_le_t {
+ depth_le_t depth = init_les32(0);
+
+ extmap_node_meta_le_t() = default;
+ extmap_node_meta_le_t(const extmap_node_meta_le_t &) = default;
+ explicit extmap_node_meta_le_t(const extmap_node_meta_t &val)
+ : depth(init_les32(val.depth)) {}
+
+ operator extmap_node_meta_t() const {
+ return extmap_node_meta_t{ depth };
+ }
+};
+
+/**
+ * ExtMapInnerNode
+ *
+ * Abstracts operations on and layout of internal nodes for the
+ * Extentmap Tree.
+ *
+ * Layout (4k):
+ * num_entries: uint32_t 4b
+ * meta : depth 4b
+ * (padding) : 8b
+ * keys : objaddr_t[340] (340*4)b
+ * values : laddr_t[340] (340*8)b
+ * = 4096
+ */
+constexpr size_t INNER_NODE_CAPACITY =
+ (EXTMAP_BLOCK_SIZE - sizeof(uint32_t) - sizeof(extmap_node_meta_t))
+ / (sizeof (objaddr_t) + sizeof(laddr_t));
+
+struct ExtMapInnerNode
+ : ExtMapNode,
+ common::FixedKVNodeLayout<
+ INNER_NODE_CAPACITY,
+ extmap_node_meta_t, extmap_node_meta_le_t,
+ objaddr_t, ceph_le32,
+ laddr_t, laddr_le_t> {
+ using internal_iterator_t = const_iterator;
+ template <typename... T>
+ ExtMapInnerNode(T&&... t) :
+ ExtMapNode(std::forward<T>(t)...),
+ FixedKVNodeLayout(get_bptr().c_str()) {}
+
+ static constexpr extent_types_t type = extent_types_t::EXTMAP_INNER;
+
+ extmap_node_meta_t get_node_meta() const final {return get_meta();}
+
+ CachedExtentRef duplicate_for_write() final {
+ assert(delta_buffer.empty());
+ return CachedExtentRef(new ExtMapInnerNode(*this));
+ };
+
+ delta_buffer_t delta_buffer;
+ delta_buffer_t *maybe_get_delta_buffer() {
+ return is_mutation_pending() ? &delta_buffer : nullptr;
+ }
+
+ find_lextent_ret find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len) final;
+
+ insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final;
+
+ rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final;
+
+ split_children_ret make_split_children(ext_context_t ec) final;
+
+ full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) final;
+
+ make_balanced_ret make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left) final;
+
+ std::ostream &print_detail_l(std::ostream &out) const final;
+
+ extent_types_t get_type() const final {
+ return type;
+ }
+
+ ceph::bufferlist get_delta() final {
+ assert(!delta_buffer.empty());
+ ceph::buffer::ptr bptr(delta_buffer.get_bytes());
+ delta_buffer.copy_out(bptr.c_str(), bptr.length());
+ ceph::bufferlist bl;
+ bl.push_back(bptr);
+ return bl;
+ }
+
+ void apply_delta(const ceph::bufferlist &_bl) final {
+ assert(_bl.length());
+ ceph::bufferlist bl = _bl;
+ bl.rebuild();
+ delta_buffer_t buffer;
+ buffer.copy_in(bl.front().c_str(), bl.front().length());
+ buffer.replay(*this);
+ }
+
+ bool at_max_capacity() const final {
+ return get_size() == get_capacity();
+ }
+
+ bool at_min_capacity() const {
+ return get_size() == get_capacity() / 2;
+ }
+
+ unsigned get_node_size() const {
+ return get_size();
+ }
+
+ /* get the iterator containing [l, r]
+ */
+ std::pair<internal_iterator_t, internal_iterator_t> bound(
+ objaddr_t l, objaddr_t r) {
+ auto retl = begin();
+ for (; retl != end(); ++retl) {
+ if (retl->get_next_key_or_max() > l)
+ break;
+ }
+ auto retr = retl;
+ for (; retr != end(); ++retr) {
+ if (retr->get_key() >= r)
+ break;
+ }
+ return {retl, retr};
+ }
+
+ using split_entry_ertr = TransactionManager::read_extent_ertr;
+ using split_entry_ret = split_entry_ertr::future<ExtMapNodeRef>;
+ split_entry_ret split_entry(ext_context_t ec, objaddr_t lo,
+ internal_iterator_t, ExtMapNodeRef entry);
+ using merge_entry_ertr = TransactionManager::read_extent_ertr;
+ using merge_entry_ret = merge_entry_ertr::future<ExtMapNodeRef>;
+ merge_entry_ret merge_entry(ext_context_t ec, objaddr_t lo,
+ internal_iterator_t iter, ExtMapNodeRef entry);
+ internal_iterator_t get_containing_child(objaddr_t lo);
+
+};
+
+/**
+ * ExtMapLeafNode
+ *
+ * Abstracts operations on and layout of leaf nodes for the
+ * ExtentMap Tree.
+ *
+ * Layout (4k):
+ * num_entries: uint32_t 4b
+ * meta : depth 4b
+ * (padding) : 8b
+ * keys : objaddr_t[204] (204*4)b
+ * values : lext_map_val_t[204] (204*16)b
+ * = 4096
+ */
+constexpr size_t LEAF_NODE_CAPACITY =
+ (EXTMAP_BLOCK_SIZE - sizeof(uint32_t) - sizeof(extmap_node_meta_t))
+ / (sizeof(objaddr_t) + sizeof(lext_map_val_t));
+
+struct lext_map_val_le_t {
+ laddr_le_t laddr;
+ extent_len_le_t length = init_extent_len_le_t(0);
+
+ lext_map_val_le_t() = default;
+ lext_map_val_le_t(const lext_map_val_le_t &) = default;
+ explicit lext_map_val_le_t(const lext_map_val_t &val)
+ : laddr(laddr_le_t(val.laddr)),
+ length(init_extent_len_le_t(val.length)) {}
+
+ operator lext_map_val_t() const {
+ return lext_map_val_t{laddr, length};
+ }
+};
+
+struct ExtMapLeafNode
+ : ExtMapNode,
+ common::FixedKVNodeLayout<
+ LEAF_NODE_CAPACITY,
+ extmap_node_meta_t, extmap_node_meta_le_t,
+ objaddr_t, ceph_le32,
+ lext_map_val_t, lext_map_val_le_t> {
+ using internal_iterator_t = const_iterator;
+ template <typename... T>
+ ExtMapLeafNode(T&&... t) :
+ ExtMapNode(std::forward<T>(t)...),
+ FixedKVNodeLayout(get_bptr().c_str()) {}
+
+ static constexpr extent_types_t type = extent_types_t::EXTMAP_LEAF;
+
+ extmap_node_meta_t get_node_meta() const final { return get_meta(); }
+
+ CachedExtentRef duplicate_for_write() final {
+ assert(delta_buffer.empty());
+ return CachedExtentRef(new ExtMapLeafNode(*this));
+ };
+
+ delta_buffer_t delta_buffer;
+ delta_buffer_t *maybe_get_delta_buffer() {
+ return is_mutation_pending() ? &delta_buffer : nullptr;
+ }
+
+ find_lextent_ret find_lextent(ext_context_t ec, objaddr_t lo, extent_len_t len) final;
+
+ insert_ret insert(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final;
+
+ rm_lextent_ret rm_lextent(ext_context_t ec, objaddr_t lo, lext_map_val_t val) final;
+
+ split_children_ret make_split_children(ext_context_t ec) final;
+
+ full_merge_ret make_full_merge(ext_context_t ec, ExtMapNodeRef right) final;
+
+ make_balanced_ret make_balanced(ext_context_t ec, ExtMapNodeRef _right, bool prefer_left) final;
+
+ extent_types_t get_type() const final {
+ return type;
+ }
+
+ ceph::bufferlist get_delta() final {
+ assert(!delta_buffer.empty());
+ ceph::buffer::ptr bptr(delta_buffer.get_bytes());
+ delta_buffer.copy_out(bptr.c_str(), bptr.length());
+ ceph::bufferlist bl;
+ bl.push_back(bptr);
+ return bl;
+ }
+
+ void apply_delta(const ceph::bufferlist &_bl) final {
+ assert(_bl.length());
+ ceph::bufferlist bl = _bl;
+ bl.rebuild();
+ delta_buffer_t buffer;
+ buffer.copy_in(bl.front().c_str(), bl.front().length());
+ buffer.replay(*this);
+ }
+
+ std::ostream &print_detail_l(std::ostream &out) const final;
+
+ bool at_max_capacity() const final {
+ return get_size() == get_capacity();
+ }
+
+ bool at_min_capacity() const final {
+ return get_size() == get_capacity() / 2;
+ }
+
+ unsigned get_node_size() const {
+ return get_size();
+ }
+
+ /* get the iterator containing [l, r]
+ */
+ std::pair<internal_iterator_t, internal_iterator_t> bound(
+ objaddr_t l, objaddr_t r) {
+ auto retl = begin();
+ for (; retl != end(); ++retl) {
+ if (retl->get_key() >= l || (retl->get_key() + retl->get_val().length) > l)
+ break;
+ }
+ auto retr = retl;
+ for (; retr != end(); ++retr) {
+ if (retr->get_key() >= r)
+ break;
+ }
+ return {retl, retr};
+ }
+
+ std::pair<internal_iterator_t, internal_iterator_t>
+ get_leaf_entries(objaddr_t lo, extent_len_t len);
+
+};
+using ExtentMapLeafNodeRef = TCachedExtentRef<ExtMapLeafNode>;
+
+}