// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #include #include #include "crimson/common/log.h" #include "include/buffer.h" #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" #include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h" namespace { seastar::logger& logger() { return crimson::get_logger(ceph_subsys_filestore); } } namespace crimson::os::seastore::lba_manager::btree { BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs( Transaction &t) { logger().debug("BtreeLBAManager::mkfs"); return cache.get_root(t).safe_then([this, &t](auto croot) { auto root_leaf = cache.alloc_new_extent( t, LBA_BLOCK_SIZE); root_leaf->set_size(0); lba_node_meta_t meta{0, L_ADDR_MAX, 1}; root_leaf->set_meta(meta); root_leaf->pin.set_range(meta); croot->get_root() = root_t{ 1, 0, root_leaf->get_paddr(), make_record_relative_paddr(0), L_ADDR_NULL}; return mkfs_ertr::now(); }); } BtreeLBAManager::get_root_ret BtreeLBAManager::get_root(Transaction &t) { return cache.get_root(t).safe_then([this, &t](auto croot) { logger().debug( "BtreeLBAManager::get_root: reading root at {} depth {}", paddr_t{croot->get_root().lba_root_addr}, unsigned(croot->get_root().lba_depth)); return get_lba_btree_extent( get_context(t), croot->get_root().lba_depth, croot->get_root().lba_root_addr, paddr_t()); }); } BtreeLBAManager::get_mapping_ret BtreeLBAManager::get_mapping( Transaction &t, laddr_t offset, extent_len_t length) { logger().debug("BtreeLBAManager::get_mapping: {}, {}", offset, length); return get_root( t).safe_then([this, &t, offset, length](auto extent) { return extent->lookup_range( get_context(t), offset, length ).safe_then([extent](auto ret) { return ret; }); }).safe_then([](auto &&e) { logger().debug("BtreeLBAManager::get_mapping: got mapping {}", e); return get_mapping_ret( get_mapping_ertr::ready_future_marker{}, std::move(e)); }); } BtreeLBAManager::get_mappings_ret BtreeLBAManager::get_mappings( Transaction &t, laddr_list_t &&list) { logger().debug("BtreeLBAManager::get_mappings: {}", list); auto l = std::make_unique(std::move(list)); auto retptr = std::make_unique(); auto &ret = *retptr; return crimson::do_for_each( l->begin(), l->end(), [this, &t, &ret](const auto &p) { return get_mapping(t, p.first, p.second).safe_then( [&ret](auto res) { ret.splice(ret.end(), res, res.begin(), res.end()); }); }).safe_then([l=std::move(l), retptr=std::move(retptr)]() mutable { return std::move(*retptr); }); } BtreeLBAManager::alloc_extent_ret BtreeLBAManager::alloc_extent( Transaction &t, laddr_t hint, extent_len_t len, paddr_t addr) { // TODO: we can certainly combine the lookup and the insert. return get_root( t).safe_then([this, &t, hint, len](auto extent) { logger().debug( "BtreeLBAManager::alloc_extent: beginning search at {}", *extent); return extent->find_hole( get_context(t), hint, L_ADDR_MAX, len).safe_then([extent](auto ret) { return std::make_pair(ret, extent); }); }).safe_then([this, &t, len, addr](auto allocation_pair) { auto &[laddr, extent] = allocation_pair; ceph_assert(laddr != L_ADDR_MAX); return insert_mapping( t, extent, laddr, { len, addr, 1, 0 } ).safe_then([laddr=laddr, addr, len](auto pin) { logger().debug( "BtreeLBAManager::alloc_extent: alloc {}~{} for {}", laddr, len, addr); return alloc_extent_ret( alloc_extent_ertr::ready_future_marker{}, LBAPinRef(pin.release())); }); }); } BtreeLBAManager::set_extent_ret BtreeLBAManager::set_extent( Transaction &t, laddr_t off, extent_len_t len, paddr_t addr) { return get_root( t).safe_then([this, &t, off, len, addr](auto root) { return insert_mapping( t, root, off, { len, addr, 1, 0 }); }).safe_then([](auto ret) { return set_extent_ret( set_extent_ertr::ready_future_marker{}, LBAPinRef(ret.release())); }); } static bool is_lba_node(extent_types_t type) { return type == extent_types_t::LADDR_INTERNAL || type == extent_types_t::LADDR_LEAF; } static bool is_lba_node(const CachedExtent &e) { return is_lba_node(e.get_type()); } btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e) { if (is_lba_node(e)) { return e.cast()->pin; } else if (e.is_logical()) { return static_cast( e.cast()->get_pin()).pin; } else { ceph_abort_msg("impossible"); } } static depth_t get_depth(const CachedExtent &e) { if (is_lba_node(e)) { return e.cast()->get_node_meta().depth; } else if (e.is_logical()) { return 0; } else { ceph_assert(0 == "currently impossible"); return 0; } } BtreeLBAManager::complete_transaction_ret BtreeLBAManager::complete_transaction( Transaction &t) { std::vector to_clear; to_clear.reserve(t.get_retired_set().size()); for (auto &e: t.get_retired_set()) { if (e->is_logical() || is_lba_node(*e)) to_clear.push_back(e); } // need to call check_parent from leaf->parent std::sort( to_clear.begin(), to_clear.end(), [](auto &l, auto &r) { return get_depth(*l) < get_depth(*r); }); for (auto &e: to_clear) { auto &pin = get_pin(*e); logger().debug("{}: retiring {}, {}", __func__, *e, pin); pin_set.retire(pin); } // ...but add_pin from parent->leaf std::vector to_link; to_link.reserve(t.get_fresh_block_list().size()); for (auto &e: t.get_fresh_block_list()) { if (e->is_valid() && (is_lba_node(*e) || e->is_logical())) to_link.push_back(e); } std::sort( to_link.begin(), to_link.end(), [](auto &l, auto &r) -> bool { return get_depth(*l) > get_depth(*r); }); for (auto &e : to_link) { logger().debug("{}: linking {}", __func__, *e); pin_set.add_pin(get_pin(*e)); } for (auto &e: to_clear) { auto &pin = get_pin(*e); logger().debug("{}: checking {}, {}", __func__, *e, pin); pin_set.check_parent(pin); } return complete_transaction_ertr::now(); } BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent( Transaction &t, CachedExtentRef e) { logger().debug("{}: {}", __func__, *e); return get_root(t).safe_then( [this, &t, e=std::move(e)](LBANodeRef root) mutable { if (is_lba_node(*e)) { auto lban = e->cast(); logger().debug("init_cached_extent: lba node, getting root"); return root->lookup( op_context_t{cache, pin_set, t}, lban->get_node_meta().begin, lban->get_node_meta().depth ).safe_then([this, e=std::move(e)](LBANodeRef c) { if (c->get_paddr() == e->get_paddr()) { assert(&*c == &*e); logger().debug("init_cached_extent: {} initialized", *e); } else { // e is obsolete logger().debug("init_cached_extent: {} obsolete", *e); cache.drop_from_cache(e); } return init_cached_extent_ertr::now(); }); } else if (e->is_logical()) { auto logn = e->cast(); return root->lookup_range( op_context_t{cache, pin_set, t}, logn->get_laddr(), logn->get_length()).safe_then( [this, logn=std::move(logn)](auto pins) { if (pins.size() == 1) { auto pin = std::move(pins.front()); pins.pop_front(); if (pin->get_paddr() == logn->get_paddr()) { logn->set_pin(std::move(pin)); pin_set.add_pin( static_cast(logn->get_pin()).pin); logger().debug("init_cached_extent: {} initialized", *logn); } else { // paddr doesn't match, remapped, obsolete logger().debug("init_cached_extent: {} obsolete", *logn); cache.drop_from_cache(logn); } } else { // set of extents changed, obsolete logger().debug("init_cached_extent: {} obsolete", *logn); cache.drop_from_cache(logn); } return init_cached_extent_ertr::now(); }); } else { logger().debug("init_cached_extent: {} skipped", *e); return init_cached_extent_ertr::now(); } }); } BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings( Transaction &t, laddr_t begin, laddr_t end, scan_mappings_func_t &&f) { return seastar::do_with( std::move(f), LBANodeRef(), [=, &t](auto &f, auto &lbarootref) { return get_root(t).safe_then( [=, &t, &f](LBANodeRef lbaroot) mutable { lbarootref = lbaroot; return lbaroot->scan_mappings( get_context(t), begin, end, f); }); }); } BtreeLBAManager::scan_mapped_space_ret BtreeLBAManager::scan_mapped_space( Transaction &t, scan_mapped_space_func_t &&f) { return seastar::do_with( std::move(f), LBANodeRef(), [=, &t](auto &f, auto &lbarootref) { return get_root(t).safe_then( [=, &t, &f](LBANodeRef lbaroot) mutable { lbarootref = lbaroot; return lbaroot->scan_mapped_space( get_context(t), f); }); }); } BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent( Transaction &t, CachedExtentRef extent) { if (extent->is_logical()) { auto lextent = extent->cast(); cache.retire_extent(t, extent); auto nlextent = cache.alloc_new_extent_by_type( t, lextent->get_type(), lextent->get_length())->cast(); lextent->get_bptr().copy_out( 0, lextent->get_length(), nlextent->get_bptr().c_str()); nlextent->set_laddr(lextent->get_laddr()); nlextent->set_pin(lextent->get_pin().duplicate()); logger().debug( "{}: rewriting {} into {}", __func__, *lextent, *nlextent); return update_mapping( t, lextent->get_laddr(), [prev_addr = lextent->get_paddr(), addr = nlextent->get_paddr()]( const lba_map_val_t &in) { lba_map_val_t ret = in; ceph_assert(in.paddr == prev_addr); ret.paddr = addr; return ret; }).safe_then([nlextent](auto e) {}).handle_error( rewrite_extent_ertr::pass_further{}, /* ENOENT in particular should be impossible */ crimson::ct_error::assert_all{} ); } else if (is_lba_node(*extent)) { auto lba_extent = extent->cast(); cache.retire_extent(t, extent); auto nlba_extent = cache.alloc_new_extent_by_type( t, lba_extent->get_type(), lba_extent->get_length())->cast(); lba_extent->get_bptr().copy_out( 0, lba_extent->get_length(), nlba_extent->get_bptr().c_str()); nlba_extent->pin.set_range(nlba_extent->get_node_meta()); /* This is a bit underhanded. Any relative addrs here must necessarily * be record relative as we are rewriting a dirty extent. Thus, we * are using resolve_relative_addrs with a (likely negative) block * relative offset to correct them to block-relative offsets adjusted * for our new transaction location. * * Upon commit, these now block relative addresses will be interpretted * against the real final address. */ nlba_extent->resolve_relative_addrs( make_record_relative_paddr(0) - nlba_extent->get_paddr()); return update_internal_mapping( t, nlba_extent->get_node_meta().depth, nlba_extent->get_node_meta().begin, nlba_extent->get_paddr()).safe_then( [](auto) {}, rewrite_extent_ertr::pass_further {}, crimson::ct_error::assert_all{}); } else { return rewrite_extent_ertr::now(); } } BtreeLBAManager::get_physical_extent_if_live_ret BtreeLBAManager::get_physical_extent_if_live( Transaction &t, extent_types_t type, paddr_t addr, laddr_t laddr, segment_off_t len) { ceph_assert(is_lba_node(type)); return cache.get_extent_by_type( t, type, addr, laddr, len ).safe_then([=, &t](CachedExtentRef extent) { return get_root(t).safe_then([=, &t](LBANodeRef root) { auto lba_node = extent->cast(); return root->lookup( op_context_t{cache, pin_set, t}, lba_node->get_node_meta().begin, lba_node->get_node_meta().depth).safe_then([=](LBANodeRef c) { if (c->get_paddr() == lba_node->get_paddr()) { return get_physical_extent_if_live_ret( get_physical_extent_if_live_ertr::ready_future_marker{}, lba_node); } else { cache.drop_from_cache(lba_node); return get_physical_extent_if_live_ret( get_physical_extent_if_live_ertr::ready_future_marker{}, CachedExtentRef()); } }); }); }); } BtreeLBAManager::BtreeLBAManager( SegmentManager &segment_manager, Cache &cache) : segment_manager(segment_manager), cache(cache) {} BtreeLBAManager::insert_mapping_ret BtreeLBAManager::insert_mapping( Transaction &t, LBANodeRef root, laddr_t laddr, lba_map_val_t val) { auto split = insert_mapping_ertr::future( insert_mapping_ertr::ready_future_marker{}, root); if (root->at_max_capacity()) { split = cache.get_root(t).safe_then( [this, root, laddr, &t](RootBlockRef croot) { logger().debug( "BtreeLBAManager::insert_mapping: splitting root {}", *croot); { auto mut_croot = cache.duplicate_for_write(t, croot); croot = mut_croot->cast(); } auto nroot = cache.alloc_new_extent(t, LBA_BLOCK_SIZE); lba_node_meta_t meta{0, L_ADDR_MAX, root->get_node_meta().depth + 1}; nroot->set_meta(meta); nroot->pin.set_range(meta); nroot->journal_insert( nroot->begin(), L_ADDR_MIN, root->get_paddr(), nullptr); croot->get_root().lba_root_addr = nroot->get_paddr(); croot->get_root().lba_depth = root->get_node_meta().depth + 1; return nroot->split_entry( get_context(t), laddr, nroot->begin(), root); }); } return split.safe_then([this, &t, laddr, val](LBANodeRef node) { return node->insert( get_context(t), laddr, val); }); } BtreeLBAManager::update_refcount_ret BtreeLBAManager::update_refcount( Transaction &t, laddr_t addr, int delta) { return update_mapping( t, addr, [delta](const lba_map_val_t &in) { lba_map_val_t out = in; ceph_assert((int)out.refcount + delta >= 0); out.refcount += delta; return out; }).safe_then([](auto result) { return ref_update_result_t{result.refcount, result.paddr}; }); } BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping( Transaction &t, laddr_t addr, update_func_t &&f) { return get_root(t ).safe_then([this, f=std::move(f), &t, addr](LBANodeRef root) mutable { return root->mutate_mapping( get_context(t), addr, std::move(f)); }); } BtreeLBAManager::update_internal_mapping_ret BtreeLBAManager::update_internal_mapping( Transaction &t, depth_t depth, laddr_t laddr, paddr_t paddr) { return cache.get_root(t).safe_then([=, &t](RootBlockRef croot) { if (depth == croot->get_root().lba_depth) { logger().debug( "update_internal_mapping: updating lba root to: {}->{}", laddr, paddr); { auto mut_croot = cache.duplicate_for_write(t, croot); croot = mut_croot->cast(); } ceph_assert(laddr == 0); auto old_paddr = croot->get_root().lba_root_addr; croot->get_root().lba_root_addr = paddr; return update_internal_mapping_ret( update_internal_mapping_ertr::ready_future_marker{}, old_paddr); } else { logger().debug( "update_internal_mapping: updating lba node at depth {} to: {}->{}", depth, laddr, paddr); return get_lba_btree_extent( get_context(t), croot->get_root().lba_depth, croot->get_root().lba_root_addr, paddr_t()).safe_then([=, &t](LBANodeRef broot) { return broot->mutate_internal_address( get_context(t), depth, laddr, paddr); }); } }); } }