summaryrefslogtreecommitdiffstats
path: root/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc')
-rw-r--r--src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc580
1 files changed, 580 insertions, 0 deletions
diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
new file mode 100644
index 000000000..a837ae37e
--- /dev/null
+++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
@@ -0,0 +1,580 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/mman.h>
+#include <string.h>
+
+#include "crimson/common/log.h"
+
+#include "include/buffer.h"
+#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
+
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+ }
+}
+
+namespace crimson::os::seastore::lba_manager::btree {
+
+BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs(
+ Transaction &t)
+{
+ logger().debug("BtreeLBAManager::mkfs");
+ return cache.get_root(t).safe_then([this, &t](auto croot) {
+ auto root_leaf = cache.alloc_new_extent<LBALeafNode>(
+ t,
+ LBA_BLOCK_SIZE);
+ root_leaf->set_size(0);
+ lba_node_meta_t meta{0, L_ADDR_MAX, 1};
+ root_leaf->set_meta(meta);
+ root_leaf->pin.set_range(meta);
+ croot->get_root() =
+ root_t{
+ 1,
+ 0,
+ root_leaf->get_paddr(),
+ make_record_relative_paddr(0),
+ L_ADDR_NULL};
+ return mkfs_ertr::now();
+ });
+}
+
+BtreeLBAManager::get_root_ret
+BtreeLBAManager::get_root(Transaction &t)
+{
+ return cache.get_root(t).safe_then([this, &t](auto croot) {
+ logger().debug(
+ "BtreeLBAManager::get_root: reading root at {} depth {}",
+ paddr_t{croot->get_root().lba_root_addr},
+ unsigned(croot->get_root().lba_depth));
+ return get_lba_btree_extent(
+ get_context(t),
+ croot->get_root().lba_depth,
+ croot->get_root().lba_root_addr,
+ paddr_t());
+ });
+}
+
+BtreeLBAManager::get_mapping_ret
+BtreeLBAManager::get_mapping(
+ Transaction &t,
+ laddr_t offset, extent_len_t length)
+{
+ logger().debug("BtreeLBAManager::get_mapping: {}, {}", offset, length);
+ return get_root(
+ t).safe_then([this, &t, offset, length](auto extent) {
+ return extent->lookup_range(
+ get_context(t),
+ offset, length
+ ).safe_then([extent](auto ret) { return ret; });
+ }).safe_then([](auto &&e) {
+ logger().debug("BtreeLBAManager::get_mapping: got mapping {}", e);
+ return get_mapping_ret(
+ get_mapping_ertr::ready_future_marker{},
+ std::move(e));
+ });
+}
+
+
+BtreeLBAManager::get_mappings_ret
+BtreeLBAManager::get_mappings(
+ Transaction &t,
+ laddr_list_t &&list)
+{
+ logger().debug("BtreeLBAManager::get_mappings: {}", list);
+ auto l = std::make_unique<laddr_list_t>(std::move(list));
+ auto retptr = std::make_unique<lba_pin_list_t>();
+ auto &ret = *retptr;
+ return crimson::do_for_each(
+ l->begin(),
+ l->end(),
+ [this, &t, &ret](const auto &p) {
+ return get_mapping(t, p.first, p.second).safe_then(
+ [&ret](auto res) {
+ ret.splice(ret.end(), res, res.begin(), res.end());
+ });
+ }).safe_then([l=std::move(l), retptr=std::move(retptr)]() mutable {
+ return std::move(*retptr);
+ });
+}
+
+BtreeLBAManager::alloc_extent_ret
+BtreeLBAManager::alloc_extent(
+ Transaction &t,
+ laddr_t hint,
+ extent_len_t len,
+ paddr_t addr)
+{
+ // TODO: we can certainly combine the lookup and the insert.
+ return get_root(
+ t).safe_then([this, &t, hint, len](auto extent) {
+ logger().debug(
+ "BtreeLBAManager::alloc_extent: beginning search at {}",
+ *extent);
+ return extent->find_hole(
+ get_context(t),
+ hint,
+ L_ADDR_MAX,
+ len).safe_then([extent](auto ret) {
+ return std::make_pair(ret, extent);
+ });
+ }).safe_then([this, &t, len, addr](auto allocation_pair) {
+ auto &[laddr, extent] = allocation_pair;
+ ceph_assert(laddr != L_ADDR_MAX);
+ return insert_mapping(
+ t,
+ extent,
+ laddr,
+ { len, addr, 1, 0 }
+ ).safe_then([laddr=laddr, addr, len](auto pin) {
+ logger().debug(
+ "BtreeLBAManager::alloc_extent: alloc {}~{} for {}",
+ laddr,
+ len,
+ addr);
+ return alloc_extent_ret(
+ alloc_extent_ertr::ready_future_marker{},
+ LBAPinRef(pin.release()));
+ });
+ });
+}
+
+BtreeLBAManager::set_extent_ret
+BtreeLBAManager::set_extent(
+ Transaction &t,
+ laddr_t off, extent_len_t len, paddr_t addr)
+{
+ return get_root(
+ t).safe_then([this, &t, off, len, addr](auto root) {
+ return insert_mapping(
+ t,
+ root,
+ off,
+ { len, addr, 1, 0 });
+ }).safe_then([](auto ret) {
+ return set_extent_ret(
+ set_extent_ertr::ready_future_marker{},
+ LBAPinRef(ret.release()));
+ });
+}
+
+static bool is_lba_node(extent_types_t type)
+{
+ return type == extent_types_t::LADDR_INTERNAL ||
+ type == extent_types_t::LADDR_LEAF;
+}
+
+static bool is_lba_node(const CachedExtent &e)
+{
+ return is_lba_node(e.get_type());
+}
+
+btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e)
+{
+ if (is_lba_node(e)) {
+ return e.cast<LBANode>()->pin;
+ } else if (e.is_logical()) {
+ return static_cast<BtreeLBAPin &>(
+ e.cast<LogicalCachedExtent>()->get_pin()).pin;
+ } else {
+ ceph_abort_msg("impossible");
+ }
+}
+
+static depth_t get_depth(const CachedExtent &e)
+{
+ if (is_lba_node(e)) {
+ return e.cast<LBANode>()->get_node_meta().depth;
+ } else if (e.is_logical()) {
+ return 0;
+ } else {
+ ceph_assert(0 == "currently impossible");
+ return 0;
+ }
+}
+
+BtreeLBAManager::complete_transaction_ret
+BtreeLBAManager::complete_transaction(
+ Transaction &t)
+{
+ std::vector<CachedExtentRef> to_clear;
+ to_clear.reserve(t.get_retired_set().size());
+ for (auto &e: t.get_retired_set()) {
+ if (e->is_logical() || is_lba_node(*e))
+ to_clear.push_back(e);
+ }
+ // need to call check_parent from leaf->parent
+ std::sort(
+ to_clear.begin(), to_clear.end(),
+ [](auto &l, auto &r) { return get_depth(*l) < get_depth(*r); });
+
+ for (auto &e: to_clear) {
+ auto &pin = get_pin(*e);
+ logger().debug("{}: retiring {}, {}", __func__, *e, pin);
+ pin_set.retire(pin);
+ }
+
+ // ...but add_pin from parent->leaf
+ std::vector<CachedExtentRef> to_link;
+ to_link.reserve(t.get_fresh_block_list().size());
+ for (auto &e: t.get_fresh_block_list()) {
+ if (e->is_valid() && (is_lba_node(*e) || e->is_logical()))
+ to_link.push_back(e);
+ }
+ std::sort(
+ to_link.begin(), to_link.end(),
+ [](auto &l, auto &r) -> bool { return get_depth(*l) > get_depth(*r); });
+
+ for (auto &e : to_link) {
+ logger().debug("{}: linking {}", __func__, *e);
+ pin_set.add_pin(get_pin(*e));
+ }
+
+ for (auto &e: to_clear) {
+ auto &pin = get_pin(*e);
+ logger().debug("{}: checking {}, {}", __func__, *e, pin);
+ pin_set.check_parent(pin);
+ }
+ return complete_transaction_ertr::now();
+}
+
+BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
+ Transaction &t,
+ CachedExtentRef e)
+{
+ logger().debug("{}: {}", __func__, *e);
+ return get_root(t).safe_then(
+ [this, &t, e=std::move(e)](LBANodeRef root) mutable {
+ if (is_lba_node(*e)) {
+ auto lban = e->cast<LBANode>();
+ logger().debug("init_cached_extent: lba node, getting root");
+ return root->lookup(
+ op_context_t{cache, pin_set, t},
+ lban->get_node_meta().begin,
+ lban->get_node_meta().depth
+ ).safe_then([this, e=std::move(e)](LBANodeRef c) {
+ if (c->get_paddr() == e->get_paddr()) {
+ assert(&*c == &*e);
+ logger().debug("init_cached_extent: {} initialized", *e);
+ } else {
+ // e is obsolete
+ logger().debug("init_cached_extent: {} obsolete", *e);
+ cache.drop_from_cache(e);
+ }
+ return init_cached_extent_ertr::now();
+ });
+ } else if (e->is_logical()) {
+ auto logn = e->cast<LogicalCachedExtent>();
+ return root->lookup_range(
+ op_context_t{cache, pin_set, t},
+ logn->get_laddr(),
+ logn->get_length()).safe_then(
+ [this, logn=std::move(logn)](auto pins) {
+ if (pins.size() == 1) {
+ auto pin = std::move(pins.front());
+ pins.pop_front();
+ if (pin->get_paddr() == logn->get_paddr()) {
+ logn->set_pin(std::move(pin));
+ pin_set.add_pin(
+ static_cast<BtreeLBAPin&>(logn->get_pin()).pin);
+ logger().debug("init_cached_extent: {} initialized", *logn);
+ } else {
+ // paddr doesn't match, remapped, obsolete
+ logger().debug("init_cached_extent: {} obsolete", *logn);
+ cache.drop_from_cache(logn);
+ }
+ } else {
+ // set of extents changed, obsolete
+ logger().debug("init_cached_extent: {} obsolete", *logn);
+ cache.drop_from_cache(logn);
+ }
+ return init_cached_extent_ertr::now();
+ });
+ } else {
+ logger().debug("init_cached_extent: {} skipped", *e);
+ return init_cached_extent_ertr::now();
+ }
+ });
+}
+
+BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings(
+ Transaction &t,
+ laddr_t begin,
+ laddr_t end,
+ scan_mappings_func_t &&f)
+{
+ return seastar::do_with(
+ std::move(f),
+ LBANodeRef(),
+ [=, &t](auto &f, auto &lbarootref) {
+ return get_root(t).safe_then(
+ [=, &t, &f](LBANodeRef lbaroot) mutable {
+ lbarootref = lbaroot;
+ return lbaroot->scan_mappings(
+ get_context(t),
+ begin,
+ end,
+ f);
+ });
+ });
+}
+
+BtreeLBAManager::scan_mapped_space_ret BtreeLBAManager::scan_mapped_space(
+ Transaction &t,
+ scan_mapped_space_func_t &&f)
+{
+ return seastar::do_with(
+ std::move(f),
+ LBANodeRef(),
+ [=, &t](auto &f, auto &lbarootref) {
+ return get_root(t).safe_then(
+ [=, &t, &f](LBANodeRef lbaroot) mutable {
+ lbarootref = lbaroot;
+ return lbaroot->scan_mapped_space(
+ get_context(t),
+ f);
+ });
+ });
+}
+
+BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
+ Transaction &t,
+ CachedExtentRef extent)
+{
+ if (extent->is_logical()) {
+ auto lextent = extent->cast<LogicalCachedExtent>();
+ cache.retire_extent(t, extent);
+ auto nlextent = cache.alloc_new_extent_by_type(
+ t,
+ lextent->get_type(),
+ lextent->get_length())->cast<LogicalCachedExtent>();
+ lextent->get_bptr().copy_out(
+ 0,
+ lextent->get_length(),
+ nlextent->get_bptr().c_str());
+ nlextent->set_laddr(lextent->get_laddr());
+ nlextent->set_pin(lextent->get_pin().duplicate());
+
+ logger().debug(
+ "{}: rewriting {} into {}",
+ __func__,
+ *lextent,
+ *nlextent);
+
+ return update_mapping(
+ t,
+ lextent->get_laddr(),
+ [prev_addr = lextent->get_paddr(), addr = nlextent->get_paddr()](
+ const lba_map_val_t &in) {
+ lba_map_val_t ret = in;
+ ceph_assert(in.paddr == prev_addr);
+ ret.paddr = addr;
+ return ret;
+ }).safe_then([nlextent](auto e) {}).handle_error(
+ rewrite_extent_ertr::pass_further{},
+ /* ENOENT in particular should be impossible */
+ crimson::ct_error::assert_all{}
+ );
+ } else if (is_lba_node(*extent)) {
+ auto lba_extent = extent->cast<LBANode>();
+ cache.retire_extent(t, extent);
+ auto nlba_extent = cache.alloc_new_extent_by_type(
+ t,
+ lba_extent->get_type(),
+ lba_extent->get_length())->cast<LBANode>();
+ lba_extent->get_bptr().copy_out(
+ 0,
+ lba_extent->get_length(),
+ nlba_extent->get_bptr().c_str());
+ nlba_extent->pin.set_range(nlba_extent->get_node_meta());
+
+ /* This is a bit underhanded. Any relative addrs here must necessarily
+ * be record relative as we are rewriting a dirty extent. Thus, we
+ * are using resolve_relative_addrs with a (likely negative) block
+ * relative offset to correct them to block-relative offsets adjusted
+ * for our new transaction location.
+ *
+ * Upon commit, these now block relative addresses will be interpretted
+ * against the real final address.
+ */
+ nlba_extent->resolve_relative_addrs(
+ make_record_relative_paddr(0) - nlba_extent->get_paddr());
+
+ return update_internal_mapping(
+ t,
+ nlba_extent->get_node_meta().depth,
+ nlba_extent->get_node_meta().begin,
+ nlba_extent->get_paddr()).safe_then(
+ [](auto) {},
+ rewrite_extent_ertr::pass_further {},
+ crimson::ct_error::assert_all{});
+ } else {
+ return rewrite_extent_ertr::now();
+ }
+}
+
+BtreeLBAManager::get_physical_extent_if_live_ret
+BtreeLBAManager::get_physical_extent_if_live(
+ Transaction &t,
+ extent_types_t type,
+ paddr_t addr,
+ laddr_t laddr,
+ segment_off_t len)
+{
+ ceph_assert(is_lba_node(type));
+ return cache.get_extent_by_type(
+ t,
+ type,
+ addr,
+ laddr,
+ len
+ ).safe_then([=, &t](CachedExtentRef extent) {
+ return get_root(t).safe_then([=, &t](LBANodeRef root) {
+ auto lba_node = extent->cast<LBANode>();
+ return root->lookup(
+ op_context_t{cache, pin_set, t},
+ lba_node->get_node_meta().begin,
+ lba_node->get_node_meta().depth).safe_then([=](LBANodeRef c) {
+ if (c->get_paddr() == lba_node->get_paddr()) {
+ return get_physical_extent_if_live_ret(
+ get_physical_extent_if_live_ertr::ready_future_marker{},
+ lba_node);
+ } else {
+ cache.drop_from_cache(lba_node);
+ return get_physical_extent_if_live_ret(
+ get_physical_extent_if_live_ertr::ready_future_marker{},
+ CachedExtentRef());
+ }
+ });
+ });
+ });
+}
+
+BtreeLBAManager::BtreeLBAManager(
+ SegmentManager &segment_manager,
+ Cache &cache)
+ : segment_manager(segment_manager),
+ cache(cache) {}
+
+BtreeLBAManager::insert_mapping_ret BtreeLBAManager::insert_mapping(
+ Transaction &t,
+ LBANodeRef root,
+ laddr_t laddr,
+ lba_map_val_t val)
+{
+ auto split = insert_mapping_ertr::future<LBANodeRef>(
+ insert_mapping_ertr::ready_future_marker{},
+ root);
+ if (root->at_max_capacity()) {
+ split = cache.get_root(t).safe_then(
+ [this, root, laddr, &t](RootBlockRef croot) {
+ logger().debug(
+ "BtreeLBAManager::insert_mapping: splitting root {}",
+ *croot);
+ {
+ auto mut_croot = cache.duplicate_for_write(t, croot);
+ croot = mut_croot->cast<RootBlock>();
+ }
+ auto nroot = cache.alloc_new_extent<LBAInternalNode>(t, LBA_BLOCK_SIZE);
+ lba_node_meta_t meta{0, L_ADDR_MAX, root->get_node_meta().depth + 1};
+ nroot->set_meta(meta);
+ nroot->pin.set_range(meta);
+ nroot->journal_insert(
+ nroot->begin(),
+ L_ADDR_MIN,
+ root->get_paddr(),
+ nullptr);
+ croot->get_root().lba_root_addr = nroot->get_paddr();
+ croot->get_root().lba_depth = root->get_node_meta().depth + 1;
+ return nroot->split_entry(
+ get_context(t),
+ laddr, nroot->begin(), root);
+ });
+ }
+ return split.safe_then([this, &t, laddr, val](LBANodeRef node) {
+ return node->insert(
+ get_context(t),
+ laddr, val);
+ });
+}
+
+BtreeLBAManager::update_refcount_ret BtreeLBAManager::update_refcount(
+ Transaction &t,
+ laddr_t addr,
+ int delta)
+{
+ return update_mapping(
+ t,
+ addr,
+ [delta](const lba_map_val_t &in) {
+ lba_map_val_t out = in;
+ ceph_assert((int)out.refcount + delta >= 0);
+ out.refcount += delta;
+ return out;
+ }).safe_then([](auto result) {
+ return ref_update_result_t{result.refcount, result.paddr};
+ });
+}
+
+BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping(
+ Transaction &t,
+ laddr_t addr,
+ update_func_t &&f)
+{
+ return get_root(t
+ ).safe_then([this, f=std::move(f), &t, addr](LBANodeRef root) mutable {
+ return root->mutate_mapping(
+ get_context(t),
+ addr,
+ std::move(f));
+ });
+}
+
+BtreeLBAManager::update_internal_mapping_ret
+BtreeLBAManager::update_internal_mapping(
+ Transaction &t,
+ depth_t depth,
+ laddr_t laddr,
+ paddr_t paddr)
+{
+ return cache.get_root(t).safe_then([=, &t](RootBlockRef croot) {
+ if (depth == croot->get_root().lba_depth) {
+ logger().debug(
+ "update_internal_mapping: updating lba root to: {}->{}",
+ laddr,
+ paddr);
+ {
+ auto mut_croot = cache.duplicate_for_write(t, croot);
+ croot = mut_croot->cast<RootBlock>();
+ }
+ ceph_assert(laddr == 0);
+ auto old_paddr = croot->get_root().lba_root_addr;
+ croot->get_root().lba_root_addr = paddr;
+ return update_internal_mapping_ret(
+ update_internal_mapping_ertr::ready_future_marker{},
+ old_paddr);
+ } else {
+ logger().debug(
+ "update_internal_mapping: updating lba node at depth {} to: {}->{}",
+ depth,
+ laddr,
+ paddr);
+ return get_lba_btree_extent(
+ get_context(t),
+ croot->get_root().lba_depth,
+ croot->get_root().lba_root_addr,
+ paddr_t()).safe_then([=, &t](LBANodeRef broot) {
+ return broot->mutate_internal_address(
+ get_context(t),
+ depth,
+ laddr,
+ paddr);
+ });
+ }
+ });
+}
+
+}