From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- .../lba_manager/btree/lba_btree_node_impl.cc | 701 +++++++++++++++++++++ 1 file changed, 701 insertions(+) create mode 100644 src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc (limited to 'src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc') diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc new file mode 100644 index 000000000..5e400803b --- /dev/null +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc @@ -0,0 +1,701 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include + +#include +#include + +#include "include/buffer.h" +#include "include/byteorder.h" + +#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } +} + +namespace crimson::os::seastore::lba_manager::btree { + +std::ostream &LBAInternalNode::print_detail(std::ostream &out) const +{ + return out << ", size=" << get_size() + << ", meta=" << get_meta(); +} + +LBAInternalNode::lookup_ret LBAInternalNode::lookup( + op_context_t c, + laddr_t addr, + depth_t depth) +{ + auto meta = get_meta(); + if (depth == get_meta().depth) { + return lookup_ret( + lookup_ertr::ready_future_marker{}, + this); + } + assert(meta.begin <= addr); + assert(meta.end > addr); + auto iter = lower_bound(addr); + return get_lba_btree_extent( + c, + meta.depth - 1, + iter->get_val(), + get_paddr()).safe_then([c, addr, depth](auto child) { + return child->lookup(c, addr, depth); + }).finally([ref=LBANodeRef(this)] {}); +} + +LBAInternalNode::lookup_range_ret LBAInternalNode::lookup_range( + op_context_t c, + laddr_t addr, + extent_len_t len) +{ + auto [begin, end] = bound(addr, addr + len); + auto result_up = std::make_unique(); + auto &result = *result_up; + return crimson::do_for_each( + std::move(begin), + std::move(end), + [this, c, &result, addr, len](const auto &val) mutable { + return get_lba_btree_extent( + c, + get_meta().depth - 1, + val.get_val(), + get_paddr()).safe_then( + [c, &result, addr, len](auto extent) mutable { + return extent->lookup_range( + c, + addr, + len).safe_then( + [&result](auto pin_list) mutable { + result.splice(result.end(), pin_list, + pin_list.begin(), pin_list.end()); + }); + }); + }).safe_then([result=std::move(result_up), ref=LBANodeRef(this)] { + return lookup_range_ertr::make_ready_future( + std::move(*result)); + }); +} + +LBAInternalNode::insert_ret LBAInternalNode::insert( + op_context_t c, + laddr_t laddr, + lba_map_val_t val) +{ + auto insertion_pt = get_containing_child(laddr); + return get_lba_btree_extent( + c, + get_meta().depth - 1, + insertion_pt->get_val(), + get_paddr()).safe_then( + [this, insertion_pt, c, laddr, val=std::move(val)]( + auto extent) mutable { + return extent->at_max_capacity() ? + split_entry(c, laddr, insertion_pt, extent) : + insert_ertr::make_ready_future(std::move(extent)); + }).safe_then([c, laddr, val=std::move(val)]( + LBANodeRef extent) mutable { + return extent->insert(c, laddr, val); + }); +} + +LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping( + op_context_t c, + laddr_t laddr, + mutate_func_t &&f) +{ + return mutate_mapping_internal(c, laddr, true, std::move(f)); +} + +LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping_internal( + op_context_t c, + laddr_t laddr, + bool is_root, + mutate_func_t &&f) +{ + auto mutation_pt = get_containing_child(laddr); + if (mutation_pt == end()) { + assert(0 == "impossible"); + return crimson::ct_error::enoent::make(); + } + return get_lba_btree_extent( + c, + get_meta().depth - 1, + mutation_pt->get_val(), + get_paddr() + ).safe_then([=](LBANodeRef extent) { + if (extent->at_min_capacity() && get_size() > 1) { + return merge_entry( + c, + laddr, + mutation_pt, + extent, + is_root); + } else { + return merge_ertr::make_ready_future( + std::move(extent)); + } + }).safe_then([c, laddr, f=std::move(f)](LBANodeRef extent) mutable { + return extent->mutate_mapping_internal(c, laddr, false, std::move(f)); + }); +} + +LBAInternalNode::mutate_internal_address_ret LBAInternalNode::mutate_internal_address( + op_context_t c, + depth_t depth, + laddr_t laddr, + paddr_t paddr) +{ + if (get_meta().depth == (depth + 1)) { + if (!is_pending()) { + return c.cache.duplicate_for_write(c.trans, this)->cast( + )->mutate_internal_address( + c, + depth, + laddr, + paddr); + } + auto iter = get_containing_child(laddr); + if (iter->get_key() != laddr) { + return crimson::ct_error::enoent::make(); + } + + auto old_paddr = iter->get_val(); + + journal_update( + iter, + maybe_generate_relative(paddr), + maybe_get_delta_buffer()); + + return mutate_internal_address_ret( + mutate_internal_address_ertr::ready_future_marker{}, + old_paddr + ); + } else { + auto iter = get_containing_child(laddr); + return get_lba_btree_extent( + c, + get_meta().depth - 1, + iter->get_val(), + get_paddr() + ).safe_then([=](auto node) { + return node->mutate_internal_address( + c, + depth, + laddr, + paddr); + }); + } +} + +LBAInternalNode::find_hole_ret LBAInternalNode::find_hole( + op_context_t c, + laddr_t min_addr, + laddr_t max_addr, + extent_len_t len) +{ + logger().debug( + "LBAInternalNode::find_hole min={}, max={}, len={}, *this={}", + min_addr, max_addr, len, *this); + auto [begin, end] = bound(min_addr, max_addr); + return seastar::repeat_until_value( + [i=begin, e=end, c, min_addr, len, this]() mutable { + if (i == e) { + return seastar::make_ready_future>( + std::make_optional(L_ADDR_NULL)); + } + return get_lba_btree_extent(c, + get_meta().depth - 1, + i->get_val(), + get_paddr()).safe_then( + [c, min_addr, len, i](auto extent) mutable { + auto lb = std::max(min_addr, i->get_key()); + auto ub = i->get_next_key_or_max(); + logger().debug("LBAInternalNode::find_hole extent {} lb {} ub {}", + *extent, lb, ub); + return extent->find_hole(c, lb, ub, len); + }).safe_then([&i](auto addr) mutable -> std::optional { + if (addr == L_ADDR_NULL) { + ++i; + return {}; + } else { + return addr; + } + }, + // TODO: GCC enters a dead loop if crimson::do_until() is used + // or erroratorized future is returned + crimson::ct_error::assert_all{ "fix me - APIv6" }); + }); +} + +LBAInternalNode::scan_mappings_ret LBAInternalNode::scan_mappings( + op_context_t c, + laddr_t begin, + laddr_t end, + scan_mappings_func_t &f) +{ + auto [biter, eiter] = bound(begin, end); + return crimson::do_for_each( + std::move(biter), + std::move(eiter), + [=, &f](auto &viter) { + return get_lba_btree_extent( + c, + get_meta().depth - 1, + viter->get_val(), + get_paddr()).safe_then([=, &f](auto child) { + return child->scan_mappings(c, begin, end, f); + }); + }).safe_then([ref=LBANodeRef(this)]{}); +} + +LBAInternalNode::scan_mapped_space_ret LBAInternalNode::scan_mapped_space( + op_context_t c, + scan_mapped_space_func_t &f) +{ + f(get_paddr(), get_length()); + return crimson::do_for_each( + begin(), end(), + [=, &f](auto &viter) { + return get_lba_btree_extent( + c, + get_meta().depth - 1, + viter->get_val(), + get_paddr()).safe_then([=, &f](auto child) { + return child->scan_mapped_space(c, f); + }); + }).safe_then([ref=LBANodeRef(this)]{}); +} + + +void LBAInternalNode::resolve_relative_addrs(paddr_t base) +{ + for (auto i: *this) { + if (i->get_val().is_relative()) { + auto updated = base.add_relative(i->get_val()); + logger().debug( + "LBAInternalNode::resolve_relative_addrs {} -> {}", + i->get_val(), + updated); + i->set_val(updated); + } + } +} + + +LBAInternalNode::split_ret +LBAInternalNode::split_entry( + op_context_t c, + laddr_t addr, + internal_iterator_t iter, LBANodeRef entry) +{ + if (!is_pending()) { + auto mut = c.cache.duplicate_for_write( + c.trans, this)->cast(); + auto mut_iter = mut->iter_idx(iter->get_offset()); + return mut->split_entry(c, addr, mut_iter, entry); + } + + ceph_assert(!at_max_capacity()); + auto [left, right, pivot] = entry->make_split_children(c); + + journal_update( + iter, + maybe_generate_relative(left->get_paddr()), + maybe_get_delta_buffer()); + journal_insert( + iter + 1, + pivot, + maybe_generate_relative(right->get_paddr()), + maybe_get_delta_buffer()); + + c.cache.retire_extent(c.trans, entry); + + logger().debug( + "LBAInternalNode::split_entry *this {} entry {} into left {} right {}", + *this, + *entry, + *left, + *right); + + return split_ertr::make_ready_future( + pivot > addr ? left : right + ); +} + +LBAInternalNode::merge_ret +LBAInternalNode::merge_entry( + op_context_t c, + laddr_t addr, + internal_iterator_t iter, + LBANodeRef entry, + bool is_root) +{ + if (!is_pending()) { + auto mut = c.cache.duplicate_for_write(c.trans, this)->cast(); + auto mut_iter = mut->iter_idx(iter->get_offset()); + return mut->merge_entry(c, addr, mut_iter, entry, is_root); + } + + logger().debug( + "LBAInternalNode: merge_entry: {}, {}", + *this, + *entry); + auto donor_is_left = (iter + 1) == end(); + auto donor_iter = donor_is_left ? iter - 1 : iter + 1; + return get_lba_btree_extent( + c, + get_meta().depth - 1, + donor_iter->get_val(), + get_paddr() + ).safe_then([=](auto donor) mutable { + auto [l, r] = donor_is_left ? + std::make_pair(donor, entry) : std::make_pair(entry, donor); + auto [liter, riter] = donor_is_left ? + std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter); + if (donor->at_min_capacity()) { + auto replacement = l->make_full_merge( + c, + r); + + journal_update( + liter, + maybe_generate_relative(replacement->get_paddr()), + maybe_get_delta_buffer()); + journal_remove(riter, maybe_get_delta_buffer()); + + c.cache.retire_extent(c.trans, l); + c.cache.retire_extent(c.trans, r); + + if (is_root && get_size() == 1) { + return c.cache.get_root(c.trans).safe_then([=](RootBlockRef croot) { + { + auto mut_croot = c.cache.duplicate_for_write(c.trans, croot); + croot = mut_croot->cast(); + } + croot->root.lba_root_addr = begin()->get_val(); + logger().debug( + "LBAInternalNode::merge_entry: collapsing root {} to addr {}", + *this, + begin()->get_val()); + croot->root.lba_depth = get_meta().depth - 1; + c.cache.retire_extent(c.trans, this); + return merge_ertr::make_ready_future(replacement); + }); + } else { + return merge_ertr::make_ready_future(replacement); + } + } else { + logger().debug( + "LBAInternalEntry::merge_entry balanced l {} r {}", + *l, + *r); + auto [replacement_l, replacement_r, pivot] = + l->make_balanced( + c, + r, + !donor_is_left); + + journal_update( + liter, + maybe_generate_relative(replacement_l->get_paddr()), + maybe_get_delta_buffer()); + journal_replace( + riter, + pivot, + maybe_generate_relative(replacement_r->get_paddr()), + maybe_get_delta_buffer()); + + c.cache.retire_extent(c.trans, l); + c.cache.retire_extent(c.trans, r); + return merge_ertr::make_ready_future( + addr >= pivot ? replacement_r : replacement_l + ); + } + }); +} + + +LBAInternalNode::internal_iterator_t +LBAInternalNode::get_containing_child(laddr_t laddr) +{ + // TODO: binary search + for (auto i = begin(); i != end(); ++i) { + if (i.contains(laddr)) + return i; + } + ceph_assert(0 == "invalid"); + return end(); +} + +std::ostream &LBALeafNode::print_detail(std::ostream &out) const +{ + return out << ", size=" << get_size() + << ", meta=" << get_meta(); +} + +LBALeafNode::lookup_range_ret LBALeafNode::lookup_range( + op_context_t c, + laddr_t addr, + extent_len_t len) +{ + logger().debug( + "LBALeafNode::lookup_range {}~{}", + addr, + len); + auto ret = lba_pin_list_t(); + auto [i, end] = get_leaf_entries(addr, len); + for (; i != end; ++i) { + auto val = i->get_val(); + auto begin = i->get_key(); + ret.emplace_back( + std::make_unique( + this, + val.paddr.maybe_relative_to(get_paddr()), + lba_node_meta_t{ begin, begin + val.len, 0})); + } + return lookup_range_ertr::make_ready_future( + std::move(ret)); +} + +LBALeafNode::insert_ret LBALeafNode::insert( + op_context_t c, + laddr_t laddr, + lba_map_val_t val) +{ + ceph_assert(!at_max_capacity()); + + if (!is_pending()) { + return c.cache.duplicate_for_write(c.trans, this + )->cast()->insert(c, laddr, val); + } + + val.paddr = maybe_generate_relative(val.paddr); + logger().debug( + "LBALeafNode::insert: inserting {}~{} -> {}", + laddr, + val.len, + val.paddr); + + auto insert_pt = lower_bound(laddr); + journal_insert(insert_pt, laddr, val, maybe_get_delta_buffer()); + + logger().debug( + "LBALeafNode::insert: inserted {}~{} -> {}", + insert_pt.get_key(), + insert_pt.get_val().len, + insert_pt.get_val().paddr); + auto begin = insert_pt.get_key(); + return insert_ret( + insert_ertr::ready_future_marker{}, + std::make_unique( + this, + val.paddr.maybe_relative_to(get_paddr()), + lba_node_meta_t{ begin, begin + val.len, 0})); +} + +LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping( + op_context_t c, + laddr_t laddr, + mutate_func_t &&f) +{ + return mutate_mapping_internal(c, laddr, true, std::move(f)); +} + +LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping_internal( + op_context_t c, + laddr_t laddr, + bool is_root, + mutate_func_t &&f) +{ + auto mutation_pt = find(laddr); + if (mutation_pt == end()) { + return crimson::ct_error::enoent::make(); + } + + if (!is_pending()) { + return c.cache.duplicate_for_write(c.trans, this)->cast( + )->mutate_mapping_internal( + c, + laddr, + is_root, + std::move(f)); + } + + auto cur = mutation_pt.get_val(); + auto mutated = f(cur); + + mutated.paddr = maybe_generate_relative(mutated.paddr); + + logger().debug( + "{}: mutate addr {}: {} -> {}", + __func__, + laddr, + cur.paddr, + mutated.paddr); + + if (mutated.refcount > 0) { + journal_update(mutation_pt, mutated, maybe_get_delta_buffer()); + return mutate_mapping_ret( + mutate_mapping_ertr::ready_future_marker{}, + mutated); + } else { + journal_remove(mutation_pt, maybe_get_delta_buffer()); + return mutate_mapping_ret( + mutate_mapping_ertr::ready_future_marker{}, + mutated); + } +} + +LBALeafNode::mutate_internal_address_ret LBALeafNode::mutate_internal_address( + op_context_t c, + depth_t depth, + laddr_t laddr, + paddr_t paddr) +{ + ceph_assert(0 == "Impossible"); + return mutate_internal_address_ret( + mutate_internal_address_ertr::ready_future_marker{}, + paddr); +} + +LBALeafNode::find_hole_ret LBALeafNode::find_hole( + op_context_t c, + laddr_t min, + laddr_t max, + extent_len_t len) +{ + logger().debug( + "LBALeafNode::find_hole min={} max={}, len={}, *this={}", + min, max, len, *this); + auto [liter, uiter] = bound(min, max); + for (auto i = liter; i != uiter; ++i) { + auto ub = i->get_key(); + if (min + len <= ub) { + return find_hole_ret( + find_hole_ertr::ready_future_marker{}, + min); + } else { + min = i->get_key() + i->get_val().len; + } + } + if (min + len <= max) { + return find_hole_ret( + find_hole_ertr::ready_future_marker{}, + min); + } else { + return find_hole_ret( + find_hole_ertr::ready_future_marker{}, + L_ADDR_MAX); + } +} + +LBALeafNode::scan_mappings_ret LBALeafNode::scan_mappings( + op_context_t c, + laddr_t begin, + laddr_t end, + scan_mappings_func_t &f) +{ + auto [biter, eiter] = bound(begin, end); + for (auto i = biter; i != eiter; ++i) { + auto val = i->get_val(); + f(i->get_key(), val.paddr, val.len); + } + return scan_mappings_ertr::now(); +} + +LBALeafNode::scan_mapped_space_ret LBALeafNode::scan_mapped_space( + op_context_t c, + scan_mapped_space_func_t &f) +{ + f(get_paddr(), get_length()); + for (auto i = begin(); i != end(); ++i) { + auto val = i->get_val(); + f(val.paddr, val.len); + } + return scan_mappings_ertr::now(); +} + + +void LBALeafNode::resolve_relative_addrs(paddr_t base) +{ + for (auto i: *this) { + if (i->get_val().paddr.is_relative()) { + auto val = i->get_val(); + val.paddr = base.add_relative(val.paddr); + logger().debug( + "LBALeafNode::resolve_relative_addrs {} -> {}", + i->get_val().paddr, + val.paddr); + i->set_val(val); + } + } +} + +std::pair +LBALeafNode::get_leaf_entries(laddr_t addr, extent_len_t len) +{ + return bound(addr, addr + len); +} + +Cache::get_extent_ertr::future get_lba_btree_extent( + op_context_t c, + depth_t depth, + paddr_t offset, + paddr_t base) +{ + offset = offset.maybe_relative_to(base); + ceph_assert(depth > 0); + if (depth > 1) { + logger().debug( + "get_lba_btree_extent: reading internal at offset {}, depth {}", + offset, + depth); + return c.cache.get_extent( + c.trans, + offset, + LBA_BLOCK_SIZE).safe_then([c](auto ret) { + auto meta = ret->get_meta(); + if (ret->get_size()) { + ceph_assert(meta.begin <= ret->begin()->get_key()); + ceph_assert(meta.end > (ret->end() - 1)->get_key()); + } + if (!ret->is_pending() && !ret->pin.is_linked()) { + ret->pin.set_range(meta); + c.pins.add_pin(ret->pin); + } + return LBANodeRef(ret.detach(), /* add_ref = */ false); + }); + } else { + logger().debug( + "get_lba_btree_extent: reading leaf at offset {}, depth {}", + offset, + depth); + return c.cache.get_extent( + c.trans, + offset, + LBA_BLOCK_SIZE).safe_then([offset, c](auto ret) { + logger().debug( + "get_lba_btree_extent: read leaf at offset {} {}", + offset, + *ret); + auto meta = ret->get_meta(); + if (ret->get_size()) { + ceph_assert(meta.begin <= ret->begin()->get_key()); + ceph_assert(meta.end > (ret->end() - 1)->get_key()); + } + if (!ret->is_pending() && !ret->pin.is_linked()) { + ret->pin.set_range(meta); + c.pins.add_pin(ret->pin); + } + return LBANodeRef(ret.detach(), /* add_ref = */ false); + }); + } +} + +} -- cgit v1.2.3