diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/crimson/os/seastore/omap_manager | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/crimson/os/seastore/omap_manager')
7 files changed, 3221 insertions, 0 deletions
diff --git a/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc new file mode 100644 index 000000000..1782d7ee6 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc @@ -0,0 +1,293 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <string.h> + +#include "crimson/common/log.h" + +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" + +SET_SUBSYS(seastore_omap); + +namespace crimson::os::seastore::omap_manager { + +BtreeOMapManager::BtreeOMapManager( + TransactionManager &tm) + : tm(tm) {} + +BtreeOMapManager::initialize_omap_ret +BtreeOMapManager::initialize_omap(Transaction &t, laddr_t hint) +{ + LOG_PREFIX(BtreeOMapManager::initialize_omap); + DEBUGT("hint: {}", t, hint); + return tm.alloc_extent<OMapLeafNode>(t, hint, OMAP_LEAF_BLOCK_SIZE) + .si_then([hint, &t](auto&& root_extent) { + root_extent->set_size(0); + omap_node_meta_t meta{1}; + root_extent->set_meta(meta); + omap_root_t omap_root; + omap_root.update(root_extent->get_laddr(), 1, hint); + t.get_omap_tree_stats().depth = 1u; + t.get_omap_tree_stats().extents_num_delta++; + return initialize_omap_iertr::make_ready_future<omap_root_t>(omap_root); + }); +} + +BtreeOMapManager::get_root_ret +BtreeOMapManager::get_omap_root(omap_context_t oc, const omap_root_t &omap_root) +{ + assert(omap_root.get_location() != L_ADDR_NULL); + laddr_t laddr = omap_root.get_location(); + return omap_load_extent(oc, laddr, omap_root.get_depth()); +} + +BtreeOMapManager::handle_root_split_ret +BtreeOMapManager::handle_root_split( + omap_context_t oc, + omap_root_t &omap_root, + const OMapNode::mutation_result_t& mresult) +{ + LOG_PREFIX(BtreeOMapManager::handle_root_split); + DEBUGT("{}", oc.t, omap_root); + return oc.tm.alloc_extent<OMapInnerNode>(oc.t, omap_root.hint, + OMAP_INNER_BLOCK_SIZE) + .si_then([&omap_root, mresult, oc](auto&& nroot) -> handle_root_split_ret { + auto [left, right, pivot] = *(mresult.split_tuple); + omap_node_meta_t meta{omap_root.depth + 1}; + nroot->set_meta(meta); + nroot->journal_inner_insert(nroot->iter_begin(), left->get_laddr(), + "", nroot->maybe_get_delta_buffer()); + nroot->journal_inner_insert(nroot->iter_begin() + 1, right->get_laddr(), + pivot, nroot->maybe_get_delta_buffer()); + omap_root.update(nroot->get_laddr(), omap_root.get_depth() + 1, omap_root.hint); + oc.t.get_omap_tree_stats().depth = omap_root.depth; + ++(oc.t.get_omap_tree_stats().extents_num_delta); + return seastar::now(); + }); +} + +BtreeOMapManager::handle_root_merge_ret +BtreeOMapManager::handle_root_merge( + omap_context_t oc, + omap_root_t &omap_root, + OMapNode::mutation_result_t mresult) +{ + LOG_PREFIX(BtreeOMapManager::handle_root_merge); + DEBUGT("{}", oc.t, omap_root); + auto root = *(mresult.need_merge); + auto iter = root->cast<OMapInnerNode>()->iter_begin(); + omap_root.update( + iter->get_val(), + omap_root.depth -= 1, + omap_root.hint); + oc.t.get_omap_tree_stats().depth = omap_root.depth; + oc.t.get_omap_tree_stats().extents_num_delta--; + return oc.tm.dec_ref(oc.t, root->get_laddr() + ).si_then([](auto &&ret) -> handle_root_merge_ret { + return seastar::now(); + }).handle_error_interruptible( + handle_root_merge_iertr::pass_further{}, + crimson::ct_error::assert_all{ + "Invalid error in handle_root_merge" + } + ); +} + +BtreeOMapManager::omap_get_value_ret +BtreeOMapManager::omap_get_value( + const omap_root_t &omap_root, + Transaction &t, + const std::string &key) +{ + LOG_PREFIX(BtreeOMapManager::omap_get_value); + DEBUGT("key={}", t, key); + return get_omap_root( + get_omap_context(t, omap_root.hint), + omap_root + ).si_then([this, &t, &key, &omap_root](auto&& extent) { + return extent->get_value(get_omap_context(t, omap_root.hint), key); + }).si_then([](auto &&e) { + return omap_get_value_ret( + interruptible::ready_future_marker{}, + std::move(e)); + }); +} + +BtreeOMapManager::omap_set_keys_ret +BtreeOMapManager::omap_set_keys( + omap_root_t &omap_root, + Transaction &t, + std::map<std::string, ceph::bufferlist>&& keys) +{ + return seastar::do_with(std::move(keys), [&, this](auto& keys) { + return trans_intr::do_for_each( + keys.begin(), + keys.end(), + [&, this](auto &p) { + return omap_set_key(omap_root, t, p.first, p.second); + }); + }); +} + +BtreeOMapManager::omap_set_key_ret +BtreeOMapManager::omap_set_key( + omap_root_t &omap_root, + Transaction &t, + const std::string &key, + const ceph::bufferlist &value) +{ + LOG_PREFIX(BtreeOMapManager::omap_set_key); + DEBUGT("{} -> {}", t, key, value); + return get_omap_root( + get_omap_context(t, omap_root.hint), + omap_root + ).si_then([this, &t, &key, &value, &omap_root](auto root) { + return root->insert(get_omap_context(t, omap_root.hint), key, value); + }).si_then([this, &omap_root, &t](auto mresult) -> omap_set_key_ret { + if (mresult.status == mutation_status_t::SUCCESS) + return seastar::now(); + else if (mresult.status == mutation_status_t::WAS_SPLIT) + return handle_root_split(get_omap_context(t, omap_root.hint), omap_root, mresult); + else + return seastar::now(); + }); +} + +BtreeOMapManager::omap_rm_key_ret +BtreeOMapManager::omap_rm_key( + omap_root_t &omap_root, + Transaction &t, + const std::string &key) +{ + LOG_PREFIX(BtreeOMapManager::omap_rm_key); + DEBUGT("{}", t, key); + return get_omap_root( + get_omap_context(t, omap_root.hint), + omap_root + ).si_then([this, &t, &key, &omap_root](auto root) { + return root->rm_key(get_omap_context(t, omap_root.hint), key); + }).si_then([this, &omap_root, &t](auto mresult) -> omap_rm_key_ret { + if (mresult.status == mutation_status_t::SUCCESS) { + return seastar::now(); + } else if (mresult.status == mutation_status_t::WAS_SPLIT) { + return handle_root_split(get_omap_context(t, omap_root.hint), omap_root, mresult); + } else if (mresult.status == mutation_status_t::NEED_MERGE) { + auto root = *(mresult.need_merge); + if (root->get_node_size() == 1 && omap_root.depth != 1) { + return handle_root_merge(get_omap_context(t, omap_root.hint), omap_root, mresult); + } else { + return seastar::now(); + } + } else { + return seastar::now(); + } + }); + +} + +BtreeOMapManager::omap_rm_key_range_ret +BtreeOMapManager::omap_rm_key_range( + omap_root_t &omap_root, + Transaction &t, + const std::string &first, + const std::string &last, + omap_list_config_t config) +{ + LOG_PREFIX(BtreeOMapManager::omap_rm_key_range); + DEBUGT("{} ~ {}", t, first, last); + assert(first <= last); + return seastar::do_with( + std::make_optional<std::string>(first), + std::make_optional<std::string>(last), + [this, &omap_root, &t, config](auto &first, auto &last) { + return omap_list( + omap_root, + t, + first, + last, + config); + }).si_then([this, &omap_root, &t](auto results) { + LOG_PREFIX(BtreeOMapManager::omap_rm_key_range); + auto &[complete, kvs] = results; + std::vector<std::string> keys; + for (const auto& [k, _] : kvs) { + keys.push_back(k); + } + DEBUGT("total {} keys to remove", t, keys.size()); + return seastar::do_with( + std::move(keys), + [this, &omap_root, &t](auto& keys) { + return trans_intr::do_for_each( + keys.begin(), + keys.end(), + [this, &omap_root, &t](auto& key) { + return omap_rm_key(omap_root, t, key); + }); + }); + }); +} + +BtreeOMapManager::omap_list_ret +BtreeOMapManager::omap_list( + const omap_root_t &omap_root, + Transaction &t, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + omap_list_config_t config) +{ + LOG_PREFIX(BtreeOMapManager::omap_list); + if (first && last) { + DEBUGT("{}, first: {}, last: {}", t, omap_root, *first, *last); + assert(last >= first); + } else if (first) { + DEBUGT("{}, first: {}", t, omap_root, *first); + } else if (last) { + DEBUGT("{}, last: {}", t, omap_root, *last); + } else { + DEBUGT("{}", t, omap_root); + } + + return get_omap_root( + get_omap_context(t, omap_root.hint), + omap_root + ).si_then([this, config, &t, &first, &last, &omap_root](auto extent) { + return extent->list( + get_omap_context(t, omap_root.hint), + first, + last, + config); + }); +} + +BtreeOMapManager::omap_clear_ret +BtreeOMapManager::omap_clear( + omap_root_t &omap_root, + Transaction &t) +{ + LOG_PREFIX(BtreeOMapManager::omap_clear); + DEBUGT("{}", t, omap_root); + return get_omap_root( + get_omap_context(t, omap_root.hint), + omap_root + ).si_then([this, &t, &omap_root](auto extent) { + return extent->clear(get_omap_context(t, omap_root.hint)); + }).si_then([this, &omap_root, &t] { + return tm.dec_ref( + t, omap_root.get_location() + ).si_then([&omap_root] (auto ret) { + omap_root.update( + L_ADDR_NULL, + 0, L_ADDR_MIN); + return omap_clear_iertr::now(); + }); + }).handle_error_interruptible( + omap_clear_iertr::pass_further{}, + crimson::ct_error::assert_all{ + "Invalid error in BtreeOMapManager::omap_clear" + } + ); +} + +} diff --git a/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.h b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.h new file mode 100644 index 000000000..7fcba64c0 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.h @@ -0,0 +1,111 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once +#include <boost/intrusive_ptr.hpp> +#include <boost/smart_ptr/intrusive_ref_counter.hpp> +#include <seastar/core/future.hh> + +#include "include/ceph_assert.h" +#include "crimson/osd/exceptions.h" + +#include "crimson/os/seastore/omap_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/transaction_manager.h" + +namespace crimson::os::seastore::omap_manager { +/** + * BtreeOMapManager + * + * Uses a btree to track : + * string -> string mapping for each onode omap + */ + +class BtreeOMapManager : public OMapManager { + TransactionManager &tm; + + omap_context_t get_omap_context( + Transaction &t, laddr_t addr_min) { + return omap_context_t{tm, t, addr_min}; + } + + /* get_omap_root + * + * load omap tree root node + */ + using get_root_iertr = base_iertr; + using get_root_ret = get_root_iertr::future<OMapNodeRef>; + static get_root_ret get_omap_root( + omap_context_t c, + const omap_root_t &omap_root); + + /* handle_root_split + * + * root has been split and needs to update omap_root_t + */ + using handle_root_split_iertr = base_iertr; + using handle_root_split_ret = handle_root_split_iertr::future<>; + handle_root_split_ret handle_root_split( + omap_context_t c, + omap_root_t &omap_root, + const OMapNode::mutation_result_t& mresult); + + /* handle_root_merge + * + * root node has only one item and it is not leaf node, need remove a layer + */ + using handle_root_merge_iertr = base_iertr; + using handle_root_merge_ret = handle_root_merge_iertr::future<>; + handle_root_merge_ret handle_root_merge( + omap_context_t oc, + omap_root_t &omap_root, + OMapNode:: mutation_result_t mresult); + +public: + explicit BtreeOMapManager(TransactionManager &tm); + + initialize_omap_ret initialize_omap(Transaction &t, laddr_t hint) final; + + omap_get_value_ret omap_get_value( + const omap_root_t &omap_root, + Transaction &t, + const std::string &key) final; + + omap_set_key_ret omap_set_key( + omap_root_t &omap_root, + Transaction &t, + const std::string &key, const ceph::bufferlist &value) final; + + omap_set_keys_ret omap_set_keys( + omap_root_t &omap_root, + Transaction &t, + std::map<std::string, ceph::bufferlist>&& keys) final; + + omap_rm_key_ret omap_rm_key( + omap_root_t &omap_root, + Transaction &t, + const std::string &key) final; + + omap_rm_key_range_ret omap_rm_key_range( + omap_root_t &omap_root, + Transaction &t, + const std::string &first, + const std::string &last, + omap_list_config_t config) final; + + omap_list_ret omap_list( + const omap_root_t &omap_root, + Transaction &t, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + omap_list_config_t config = omap_list_config_t()) final; + + omap_clear_ret omap_clear( + omap_root_t &omap_root, + Transaction &t) final; + +}; +using BtreeOMapManagerRef = std::unique_ptr<BtreeOMapManager>; + +} diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h new file mode 100644 index 000000000..795daeddb --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h @@ -0,0 +1,122 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +#include <string> +#include <vector> + +//#include <boost/iterator/counting_iterator.hpp> + +#include "crimson/common/log.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/omap_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_types.h" + +namespace crimson::os::seastore::omap_manager{ + +struct omap_context_t { + TransactionManager &tm; + Transaction &t; + laddr_t hint; +}; + +enum class mutation_status_t : uint8_t { + SUCCESS = 0, + WAS_SPLIT = 1, + NEED_MERGE = 2, + FAIL = 3 +}; + +struct OMapNode : LogicalCachedExtent { + using base_iertr = OMapManager::base_iertr; + + using OMapNodeRef = TCachedExtentRef<OMapNode>; + + struct mutation_result_t { + mutation_status_t status; + /// Only populated if WAS_SPLIT, indicates the newly created left and right nodes + /// from splitting the target entry during insertion. + std::optional<std::tuple<OMapNodeRef, OMapNodeRef, std::string>> split_tuple; + /// only sopulated if need merged, indicate which entry need be doing merge in upper layer. + std::optional<OMapNodeRef> need_merge; + + mutation_result_t(mutation_status_t s, std::optional<std::tuple<OMapNodeRef, + OMapNodeRef, std::string>> tuple, std::optional<OMapNodeRef> n_merge) + : status(s), + split_tuple(tuple), + need_merge(n_merge) {} + }; + + OMapNode(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {} + OMapNode(const OMapNode &other) + : LogicalCachedExtent(other) {} + + using get_value_iertr = base_iertr; + using get_value_ret = OMapManager::omap_get_value_ret; + virtual get_value_ret get_value( + omap_context_t oc, + const std::string &key) = 0; + + using insert_iertr = base_iertr; + using insert_ret = insert_iertr::future<mutation_result_t>; + virtual insert_ret insert( + omap_context_t oc, + const std::string &key, + const ceph::bufferlist &value) = 0; + + using rm_key_iertr = base_iertr; + using rm_key_ret = rm_key_iertr::future<mutation_result_t>; + virtual rm_key_ret rm_key( + omap_context_t oc, + const std::string &key) = 0; + + using omap_list_config_t = OMapManager::omap_list_config_t; + using list_iertr = base_iertr; + using list_bare_ret = OMapManager::omap_list_bare_ret; + using list_ret = OMapManager::omap_list_ret; + virtual list_ret list( + omap_context_t oc, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + omap_list_config_t config) = 0; + + using clear_iertr = base_iertr; + using clear_ret = clear_iertr::future<>; + virtual clear_ret clear(omap_context_t oc) = 0; + + using full_merge_iertr = base_iertr; + using full_merge_ret = full_merge_iertr::future<OMapNodeRef>; + virtual full_merge_ret make_full_merge( + omap_context_t oc, + OMapNodeRef right) = 0; + + using make_balanced_iertr = base_iertr; + using make_balanced_ret = make_balanced_iertr::future + <std::tuple<OMapNodeRef, OMapNodeRef, std::string>>; + virtual make_balanced_ret make_balanced( + omap_context_t oc, + OMapNodeRef _right) = 0; + + virtual omap_node_meta_t get_node_meta() const = 0; + virtual bool extent_will_overflow( + size_t ksize, + std::optional<size_t> vsize) const = 0; + virtual bool can_merge(OMapNodeRef right) const = 0; + virtual bool extent_is_below_min() const = 0; + virtual uint32_t get_node_size() = 0; + + virtual ~OMapNode() = default; +}; + +using OMapNodeRef = OMapNode::OMapNodeRef; + +using omap_load_extent_iertr = OMapNode::base_iertr; +omap_load_extent_iertr::future<OMapNodeRef> +omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth); + +} + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter<crimson::os::seastore::omap_manager::OMapNode> : fmt::ostream_formatter {}; +#endif diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc new file mode 100644 index 000000000..4db58414a --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc @@ -0,0 +1,738 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <algorithm> +#include <string.h> +#include "include/buffer.h" +#include "include/byteorder.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" +#include "seastar/core/thread.hh" + +SET_SUBSYS(seastore_omap); + +namespace crimson::os::seastore::omap_manager { + +std::ostream &operator<<(std::ostream &out, const omap_inner_key_t &rhs) +{ + return out << "omap_inner_key (" << rhs.key_off<< " - " << rhs.key_len + << " - " << rhs.laddr << ")"; +} + +std::ostream &operator<<(std::ostream &out, const omap_leaf_key_t &rhs) +{ + return out << "omap_leaf_key_t (" << rhs.key_off<< " - " << rhs.key_len + << " - " << rhs.val_len << ")"; +} + +std::ostream &OMapInnerNode::print_detail_l(std::ostream &out) const +{ + return out << ", size=" << get_size() + << ", depth=" << get_meta().depth; +} + +using dec_ref_iertr = OMapInnerNode::base_iertr; +using dec_ref_ret = dec_ref_iertr::future<>; +template <typename T> +dec_ref_ret dec_ref(omap_context_t oc, T&& addr) { + return oc.tm.dec_ref(oc.t, std::forward<T>(addr)).handle_error_interruptible( + dec_ref_iertr::pass_further{}, + crimson::ct_error::assert_all{ + "Invalid error in OMapInnerNode helper dec_ref" + } + ).si_then([](auto &&e) {}); +} + +/** + * make_split_insert + * + * insert an entry at iter, with the address of key. + * will result in a split outcome encoded in the returned mutation_result_t + */ +OMapInnerNode::make_split_insert_ret +OMapInnerNode::make_split_insert( + omap_context_t oc, + internal_iterator_t iter, + std::string key, + laddr_t laddr) +{ + LOG_PREFIX(OMapInnerNode::make_split_insert); + DEBUGT("this: {}, key: {}", oc.t, *this, key); + return make_split_children(oc).si_then([=] (auto tuple) { + auto [left, right, pivot] = tuple; + if (pivot > key) { + auto liter = left->iter_idx(iter.get_index()); + left->journal_inner_insert(liter, laddr, key, + left->maybe_get_delta_buffer()); + } else { //right + auto riter = right->iter_idx(iter.get_index() - left->get_node_size()); + right->journal_inner_insert(riter, laddr, key, + right->maybe_get_delta_buffer()); + } + ++(oc.t.get_omap_tree_stats().extents_num_delta); + return make_split_insert_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::WAS_SPLIT, tuple, std::nullopt)); + }); + +} + + +OMapInnerNode::handle_split_ret +OMapInnerNode::handle_split( + omap_context_t oc, + internal_iterator_t iter, + mutation_result_t mresult) +{ + LOG_PREFIX(OMapInnerNode::handle_split); + DEBUGT("this: {}", oc.t, *this); + if (!is_mutable()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapInnerNode>(); + auto mut_iter = mut->iter_idx(iter.get_index()); + return mut->handle_split(oc, mut_iter, mresult); + } + auto [left, right, pivot] = *(mresult.split_tuple); + //update operation will not cause node overflow, so we can do it first. + journal_inner_update(iter, left->get_laddr(), maybe_get_delta_buffer()); + bool overflow = extent_will_overflow(pivot.size(), std::nullopt); + if (!overflow) { + journal_inner_insert(iter + 1, right->get_laddr(), pivot, + maybe_get_delta_buffer()); + return insert_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } else { + return make_split_insert(oc, iter + 1, pivot, right->get_laddr()) + .si_then([this, oc] (auto m_result) { + return dec_ref(oc, get_laddr()) + .si_then([m_result = std::move(m_result)] { + return insert_ret( + interruptible::ready_future_marker{}, + m_result); + }); + }); + } +} + +OMapInnerNode::get_value_ret +OMapInnerNode::get_value( + omap_context_t oc, + const std::string &key) +{ + LOG_PREFIX(OMapInnerNode::get_value); + DEBUGT("key = {}, this: {}", oc.t, key, *this); + auto child_pt = get_containing_child(key); + assert(child_pt != iter_cend()); + auto laddr = child_pt->get_val(); + return omap_load_extent(oc, laddr, get_meta().depth - 1).si_then( + [oc, &key] (auto extent) { + return extent->get_value(oc, key); + }).finally([ref = OMapNodeRef(this)] {}); +} + +OMapInnerNode::insert_ret +OMapInnerNode::insert( + omap_context_t oc, + const std::string &key, + const ceph::bufferlist &value) +{ + LOG_PREFIX(OMapInnerNode::insert); + DEBUGT("{}->{}, this: {}", oc.t, key, value, *this); + auto child_pt = get_containing_child(key); + assert(child_pt != iter_cend()); + auto laddr = child_pt->get_val(); + return omap_load_extent(oc, laddr, get_meta().depth - 1).si_then( + [oc, &key, &value] (auto extent) { + return extent->insert(oc, key, value); + }).si_then([this, oc, child_pt] (auto mresult) { + if (mresult.status == mutation_status_t::SUCCESS) { + return insert_iertr::make_ready_future<mutation_result_t>(mresult); + } else if (mresult.status == mutation_status_t::WAS_SPLIT) { + return handle_split(oc, child_pt, mresult); + } else { + return insert_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } + }); +} + +OMapInnerNode::rm_key_ret +OMapInnerNode::rm_key(omap_context_t oc, const std::string &key) +{ + LOG_PREFIX(OMapInnerNode::rm_key); + DEBUGT("key={}, this: {}", oc.t, key, *this); + auto child_pt = get_containing_child(key); + assert(child_pt != iter_cend()); + auto laddr = child_pt->get_val(); + return omap_load_extent(oc, laddr, get_meta().depth - 1).si_then( + [this, oc, &key, child_pt] (auto extent) { + return extent->rm_key(oc, key) + .si_then([this, oc, child_pt, extent = std::move(extent)] (auto mresult) { + switch (mresult.status) { + case mutation_status_t::SUCCESS: + case mutation_status_t::FAIL: + return rm_key_iertr::make_ready_future<mutation_result_t>(mresult); + case mutation_status_t::NEED_MERGE: { + if (get_node_size() >1) + return merge_entry(oc, child_pt, *(mresult.need_merge)); + else + return rm_key_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, + std::nullopt, std::nullopt)); + } + case mutation_status_t::WAS_SPLIT: + return handle_split(oc, child_pt, mresult); + default: + return rm_key_iertr::make_ready_future<mutation_result_t>(mresult); + } + }); + }); +} + +OMapInnerNode::list_ret +OMapInnerNode::list( + omap_context_t oc, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + omap_list_config_t config) +{ + LOG_PREFIX(OMapInnerNode::list); + if (first && last) { + DEBUGT("first: {}, last: {}, this: {}", oc.t, *first, *last, *this); + assert(*first <= *last); + } else if (first) { + DEBUGT("first: {}, this: {}", oc.t, *first, *this); + } else if (last) { + DEBUGT("last: {}, this: {}", oc.t, *last, *this); + } else { + DEBUGT("this: {}", oc.t, *this); + } + + auto first_iter = first ? + get_containing_child(*first) : + iter_cbegin(); + auto last_iter = last ? + get_containing_child(*last) + 1: + iter_cend(); + assert(first_iter != iter_cend()); + + return seastar::do_with( + first_iter, + last_iter, + iter_t(first_iter), + list_bare_ret(false, {}), + [this, &first, &last, oc, config]( + auto &fiter, + auto &liter, + auto &iter, + auto &ret) + { + auto &complete = std::get<0>(ret); + auto &result = std::get<1>(ret); + return trans_intr::repeat( + [&, config, oc, this]() -> list_iertr::future<seastar::stop_iteration> + { + if (iter == liter) { + complete = true; + return list_iertr::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::yes); + } + assert(result.size() < config.max_result_size); + auto laddr = iter->get_val(); + return omap_load_extent( + oc, laddr, + get_meta().depth - 1 + ).si_then([&, config, oc](auto &&extent) { + return seastar::do_with( + iter == fiter ? first : std::optional<std::string>(std::nullopt), + iter == liter - 1 ? last : std::optional<std::string>(std::nullopt), + [&result, extent = std::move(extent), config, oc]( + auto &nfirst, + auto &nlast) { + return extent->list( + oc, + nfirst, + nlast, + config.with_reduced_max(result.size())); + }).si_then([&, config](auto &&child_ret) mutable { + boost::ignore_unused(config); // avoid clang warning; + auto &[child_complete, child_result] = child_ret; + if (result.size() && child_result.size()) { + assert(child_result.begin()->first > result.rbegin()->first); + } + if (child_result.size() && first && iter == fiter) { + if (config.first_inclusive) { + assert(child_result.begin()->first >= *first); + } else { + assert(child_result.begin()->first > *first); + } + } + if (child_result.size() && last && iter == liter - 1) { + auto biter = --(child_result.end()); + if (config.last_inclusive) { + assert(biter->first <= *last); + } else { + assert(biter->first < *last); + } + } + result.merge(std::move(child_result)); + if (result.size() == config.max_result_size) { + return list_iertr::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::yes); + } + ++iter; + assert(child_complete); + return list_iertr::make_ready_future<seastar::stop_iteration>( + seastar::stop_iteration::no); + }); + }); + }).si_then([&ret, ref = OMapNodeRef(this)] { + return list_iertr::make_ready_future<list_bare_ret>(std::move(ret)); + }); + }); +} + +OMapInnerNode::clear_ret +OMapInnerNode::clear(omap_context_t oc) +{ + LOG_PREFIX(OMapInnerNode::clear); + DEBUGT("this: {}", oc.t, *this); + return trans_intr::do_for_each(iter_begin(), iter_end(), + [oc, this](auto iter) { + auto laddr = iter->get_val(); + auto ndepth = get_meta().depth - 1; + if (ndepth > 1) { + return omap_load_extent(oc, laddr, ndepth + ).si_then([oc](auto &&extent) { + return extent->clear(oc); + }).si_then([oc, laddr] { + return dec_ref(oc, laddr); + }).si_then([ref = OMapNodeRef(this)] { + return clear_iertr::now(); + }); + } else { + assert(ndepth == 1); + return dec_ref(oc, laddr + ).si_then([ref = OMapNodeRef(this)] { + return clear_iertr::now(); + }); + } + }); +} + +OMapInnerNode::split_children_ret +OMapInnerNode:: make_split_children(omap_context_t oc) +{ + LOG_PREFIX(OMapInnerNode::make_split_children); + DEBUGT("this: {}", oc.t, *this); + return oc.tm.alloc_extents<OMapInnerNode>(oc.t, oc.hint, + OMAP_INNER_BLOCK_SIZE, 2) + .si_then([this, oc] (auto &&ext_pair) { + LOG_PREFIX(OMapInnerNode::make_split_children); + auto left = ext_pair.front(); + auto right = ext_pair.back(); + DEBUGT("this: {}, split into: l {} r {}", oc.t, *this, *left, *right); + return split_children_ret( + interruptible::ready_future_marker{}, + std::make_tuple(left, right, split_into(*left, *right))); + }); +} + +OMapInnerNode::full_merge_ret +OMapInnerNode::make_full_merge(omap_context_t oc, OMapNodeRef right) +{ + LOG_PREFIX(OMapInnerNode::make_full_merge); + DEBUGT("", oc.t); + return oc.tm.alloc_extent<OMapInnerNode>(oc.t, oc.hint, + OMAP_INNER_BLOCK_SIZE) + .si_then([this, right] (auto &&replacement) { + replacement->merge_from(*this, *right->cast<OMapInnerNode>()); + return full_merge_ret( + interruptible::ready_future_marker{}, + std::move(replacement)); + }); +} + +OMapInnerNode::make_balanced_ret +OMapInnerNode::make_balanced(omap_context_t oc, OMapNodeRef _right) +{ + LOG_PREFIX(OMapInnerNode::make_balanced); + DEBUGT("l: {}, r: {}", oc.t, *this, *_right); + ceph_assert(_right->get_type() == TYPE); + return oc.tm.alloc_extents<OMapInnerNode>(oc.t, oc.hint, + OMAP_INNER_BLOCK_SIZE, 2) + .si_then([this, _right] (auto &&replacement_pair){ + auto replacement_left = replacement_pair.front(); + auto replacement_right = replacement_pair.back(); + auto &right = *_right->cast<OMapInnerNode>(); + return make_balanced_ret( + interruptible::ready_future_marker{}, + std::make_tuple(replacement_left, replacement_right, + balance_into_new_nodes(*this, right, + *replacement_left, *replacement_right))); + }); +} + +OMapInnerNode::merge_entry_ret +OMapInnerNode::merge_entry( + omap_context_t oc, + internal_iterator_t iter, + OMapNodeRef entry) +{ + LOG_PREFIX(OMapInnerNode::merge_entry); + DEBUGT("{}, parent: {}", oc.t, *entry, *this); + if (!is_mutable()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapInnerNode>(); + auto mut_iter = mut->iter_idx(iter->get_index()); + return mut->merge_entry(oc, mut_iter, entry); + } + auto is_left = (iter + 1) == iter_cend(); + auto donor_iter = is_left ? iter - 1 : iter + 1; + return omap_load_extent(oc, donor_iter->get_val(), get_meta().depth - 1 + ).si_then([=, this](auto &&donor) mutable { + LOG_PREFIX(OMapInnerNode::merge_entry); + auto [l, r] = is_left ? + std::make_pair(donor, entry) : std::make_pair(entry, donor); + auto [liter, riter] = is_left ? + std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter); + if (l->can_merge(r)) { + DEBUGT("make_full_merge l {} r {}", oc.t, *l, *r); + assert(entry->extent_is_below_min()); + return l->make_full_merge(oc, r + ).si_then([liter=liter, riter=riter, l=l, r=r, oc, this] + (auto &&replacement) { + LOG_PREFIX(OMapInnerNode::merge_entry); + DEBUGT("to update parent: {}", oc.t, *this); + journal_inner_update( + liter, + replacement->get_laddr(), + maybe_get_delta_buffer()); + journal_inner_remove(riter, maybe_get_delta_buffer()); + //retire extent + std::vector<laddr_t> dec_laddrs {l->get_laddr(), r->get_laddr()}; + return dec_ref(oc, dec_laddrs + ).si_then([this, oc] { + --(oc.t.get_omap_tree_stats().extents_num_delta); + if (extent_is_below_min()) { + return merge_entry_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::NEED_MERGE, + std::nullopt, this)); + } else { + return merge_entry_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, + std::nullopt, std::nullopt)); + } + }); + }); + } else { + DEBUGT("balanced l {} r {}", oc.t, *l, *r); + return l->make_balanced(oc, r + ).si_then([liter=liter, riter=riter, l=l, r=r, oc, this](auto tuple) { + LOG_PREFIX(OMapInnerNode::merge_entry); + DEBUGT("to update parent: {}", oc.t, *this); + auto [replacement_l, replacement_r, replacement_pivot] = tuple; + //update operation will not cuase node overflow, so we can do it first + journal_inner_update( + liter, + replacement_l->get_laddr(), + maybe_get_delta_buffer()); + bool overflow = extent_will_overflow(replacement_pivot.size(), + std::nullopt); + if (!overflow) { + journal_inner_remove(riter, maybe_get_delta_buffer()); + journal_inner_insert( + riter, + replacement_r->get_laddr(), + replacement_pivot, + maybe_get_delta_buffer()); + std::vector<laddr_t> dec_laddrs{l->get_laddr(), r->get_laddr()}; + return dec_ref(oc, dec_laddrs + ).si_then([] { + return merge_entry_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, + std::nullopt, std::nullopt)); + }); + } else { + DEBUGT("balanced and split {} r {}", oc.t, *l, *r); + //use remove and insert to instead of replace, + //remove operation will not cause node split, so we can do it first + journal_inner_remove(riter, maybe_get_delta_buffer()); + return make_split_insert(oc, riter, replacement_pivot, + replacement_r->get_laddr() + ).si_then([this, oc, l = l, r = r](auto mresult) { + std::vector<laddr_t> dec_laddrs{ + l->get_laddr(), + r->get_laddr(), + get_laddr()}; + return dec_ref(oc, dec_laddrs + ).si_then([mresult = std::move(mresult)] { + return merge_entry_ret( + interruptible::ready_future_marker{}, mresult); + }); + }); + } + }); + } + }); + +} + +OMapInnerNode::internal_iterator_t +OMapInnerNode::get_containing_child(const std::string &key) +{ + auto iter = std::find_if(iter_begin(), iter_end(), + [&key](auto it) { return it.contains(key); }); + return iter; +} + +std::ostream &OMapLeafNode::print_detail_l(std::ostream &out) const +{ + return out << ", size=" << get_size() + << ", depth=" << get_meta().depth; +} + +OMapLeafNode::get_value_ret +OMapLeafNode::get_value(omap_context_t oc, const std::string &key) +{ + LOG_PREFIX(OMapLeafNode::get_value); + DEBUGT("key = {}, this: {}", oc.t, *this, key); + auto ite = find_string_key(key); + if (ite != iter_end()) { + auto value = ite->get_val(); + return get_value_ret( + interruptible::ready_future_marker{}, + value); + } else { + return get_value_ret( + interruptible::ready_future_marker{}, + std::nullopt); + } +} + +OMapLeafNode::insert_ret +OMapLeafNode::insert( + omap_context_t oc, + const std::string &key, + const ceph::bufferlist &value) +{ + LOG_PREFIX(OMapLeafNode::insert); + DEBUGT("{} -> {}, this: {}", oc.t, key, value, *this); + bool overflow = extent_will_overflow(key.size(), value.length()); + if (!overflow) { + if (!is_mutable()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapLeafNode>(); + return mut->insert(oc, key, value); + } + auto replace_pt = find_string_key(key); + if (replace_pt != iter_end()) { + ++(oc.t.get_omap_tree_stats().num_updates); + journal_leaf_update(replace_pt, key, value, maybe_get_delta_buffer()); + } else { + ++(oc.t.get_omap_tree_stats().num_inserts); + auto insert_pt = string_lower_bound(key); + journal_leaf_insert(insert_pt, key, value, maybe_get_delta_buffer()); + + DEBUGT("inserted {}, this: {}", oc.t, insert_pt.get_key(), *this); + } + return insert_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } else { + return make_split_children(oc).si_then([this, oc, &key, &value] (auto tuple) { + auto [left, right, pivot] = tuple; + auto replace_pt = find_string_key(key); + if (replace_pt != iter_end()) { + ++(oc.t.get_omap_tree_stats().num_updates); + if (key < pivot) { //left + auto mut_iter = left->iter_idx(replace_pt->get_index()); + left->journal_leaf_update(mut_iter, key, value, left->maybe_get_delta_buffer()); + } else if (key >= pivot) { //right + auto mut_iter = right->iter_idx(replace_pt->get_index() - left->get_node_size()); + right->journal_leaf_update(mut_iter, key, value, right->maybe_get_delta_buffer()); + } + } else { + ++(oc.t.get_omap_tree_stats().num_inserts); + auto insert_pt = string_lower_bound(key); + if (key < pivot) { //left + auto mut_iter = left->iter_idx(insert_pt->get_index()); + left->journal_leaf_insert(mut_iter, key, value, left->maybe_get_delta_buffer()); + } else { + auto mut_iter = right->iter_idx(insert_pt->get_index() - left->get_node_size()); + right->journal_leaf_insert(mut_iter, key, value, right->maybe_get_delta_buffer()); + } + } + ++(oc.t.get_omap_tree_stats().extents_num_delta); + return dec_ref(oc, get_laddr()) + .si_then([tuple = std::move(tuple)] { + return insert_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::WAS_SPLIT, tuple, std::nullopt)); + }); + }); + } +} + +OMapLeafNode::rm_key_ret +OMapLeafNode::rm_key(omap_context_t oc, const std::string &key) +{ + LOG_PREFIX(OMapLeafNode::rm_key); + DEBUGT("{}, this: {}", oc.t, key, *this); + auto rm_pt = find_string_key(key); + if (!is_mutable() && rm_pt != iter_end()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapLeafNode>(); + return mut->rm_key(oc, key); + } + + if (rm_pt != iter_end()) { + ++(oc.t.get_omap_tree_stats().num_erases); + journal_leaf_remove(rm_pt, maybe_get_delta_buffer()); + if (extent_is_below_min()) { + return rm_key_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::NEED_MERGE, std::nullopt, + this->cast<OMapNode>())); + } else { + return rm_key_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } + } else { + return rm_key_ret( + interruptible::ready_future_marker{}, + mutation_result_t(mutation_status_t::FAIL, std::nullopt, std::nullopt)); + } + +} + +OMapLeafNode::list_ret +OMapLeafNode::list( + omap_context_t oc, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + omap_list_config_t config) +{ + LOG_PREFIX(OMapLeafNode::list); + DEBUGT( + "first {} last {} max_result_size {} first_inclusive {} \ + last_inclusive {}, this: {}", + oc.t, + first ? first->c_str() : "", + last ? last->c_str() : "", + config.max_result_size, + config.first_inclusive, + config.last_inclusive, + *this + ); + auto ret = list_bare_ret(false, {}); + auto &[complete, result] = ret; + auto iter = first ? + (config.first_inclusive ? + string_lower_bound(*first) : + string_upper_bound(*first)) : + iter_begin(); + auto liter = last ? + (config.last_inclusive ? + string_upper_bound(*last) : + string_lower_bound(*last)) : + iter_end(); + + for (; iter != liter && result.size() < config.max_result_size; iter++) { + result.emplace(std::make_pair(iter->get_key(), iter->get_val())); + } + + complete = (iter == liter); + + return list_iertr::make_ready_future<list_bare_ret>( + std::move(ret)); +} + +OMapLeafNode::clear_ret +OMapLeafNode::clear(omap_context_t oc) +{ + return clear_iertr::now(); +} + +OMapLeafNode::split_children_ret +OMapLeafNode::make_split_children(omap_context_t oc) +{ + LOG_PREFIX(OMapLeafNode::make_split_children); + DEBUGT("this: {}", oc.t, *this); + return oc.tm.alloc_extents<OMapLeafNode>(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE, 2) + .si_then([this] (auto &&ext_pair) { + auto left = ext_pair.front(); + auto right = ext_pair.back(); + return split_children_ret( + interruptible::ready_future_marker{}, + std::make_tuple(left, right, split_into(*left, *right))); + }); +} + +OMapLeafNode::full_merge_ret +OMapLeafNode::make_full_merge(omap_context_t oc, OMapNodeRef right) +{ + ceph_assert(right->get_type() == TYPE); + LOG_PREFIX(OMapLeafNode::make_full_merge); + DEBUGT("this: {}", oc.t, *this); + return oc.tm.alloc_extent<OMapLeafNode>(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE) + .si_then([this, right] (auto &&replacement) { + replacement->merge_from(*this, *right->cast<OMapLeafNode>()); + return full_merge_ret( + interruptible::ready_future_marker{}, + std::move(replacement)); + }); +} + +OMapLeafNode::make_balanced_ret +OMapLeafNode::make_balanced(omap_context_t oc, OMapNodeRef _right) +{ + ceph_assert(_right->get_type() == TYPE); + LOG_PREFIX(OMapLeafNode::make_balanced); + DEBUGT("this: {}", oc.t, *this); + return oc.tm.alloc_extents<OMapLeafNode>(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE, 2) + .si_then([this, _right] (auto &&replacement_pair) { + auto replacement_left = replacement_pair.front(); + auto replacement_right = replacement_pair.back(); + auto &right = *_right->cast<OMapLeafNode>(); + return make_balanced_ret( + interruptible::ready_future_marker{}, + std::make_tuple( + replacement_left, replacement_right, + balance_into_new_nodes( + *this, right, + *replacement_left, *replacement_right))); + }); +} + + +omap_load_extent_iertr::future<OMapNodeRef> +omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth) +{ + ceph_assert(depth > 0); + if (depth > 1) { + return oc.tm.read_extent<OMapInnerNode>(oc.t, laddr, + OMAP_INNER_BLOCK_SIZE) + .handle_error_interruptible( + omap_load_extent_iertr::pass_further{}, + crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" } + ).si_then( + [](auto&& e) { + return seastar::make_ready_future<OMapNodeRef>(std::move(e)); + }); + } else { + return oc.tm.read_extent<OMapLeafNode>(oc.t, laddr, OMAP_LEAF_BLOCK_SIZE + ).handle_error_interruptible( + omap_load_extent_iertr::pass_further{}, + crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" } + ).si_then( + [](auto&& e) { + return seastar::make_ready_future<OMapNodeRef>(std::move(e)); + }); + } +} +} diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h new file mode 100644 index 000000000..a2b51bbb0 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h @@ -0,0 +1,250 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <string.h> + +#include "include/buffer.h" + +#include "crimson/common/errorator.h" +#include "crimson/os/seastore/omap_manager.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h" +#include "crimson/os/seastore/omap_manager/btree/omap_types.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h" + +namespace crimson::os::seastore::omap_manager { + +/** + * OMapInnerNode + * + * Abstracts operations on and layout of internal nodes for the + * omap Tree. + * + * Layout (4k): + * num_entries: meta : keys : values : + */ + +struct OMapInnerNode + : OMapNode, + StringKVInnerNodeLayout { + using OMapInnerNodeRef = TCachedExtentRef<OMapInnerNode>; + using internal_iterator_t = const_iterator; + template <typename... T> + OMapInnerNode(T&&... t) : + OMapNode(std::forward<T>(t)...), + StringKVInnerNodeLayout(get_bptr().c_str()) {} + + omap_node_meta_t get_node_meta() const final { return get_meta(); } + bool extent_will_overflow(size_t ksize, std::optional<size_t> vsize) const { + return is_overflow(ksize); + } + bool can_merge(OMapNodeRef right) const { + return !is_overflow(*right->cast<OMapInnerNode>()); + } + bool extent_is_below_min() const { return below_min(); } + uint32_t get_node_size() { return get_size(); } + + CachedExtentRef duplicate_for_write(Transaction&) final { + assert(delta_buffer.empty()); + return CachedExtentRef(new OMapInnerNode(*this)); + } + + delta_inner_buffer_t delta_buffer; + delta_inner_buffer_t *maybe_get_delta_buffer() { + return is_mutation_pending() ? &delta_buffer : nullptr; + } + + get_value_ret get_value(omap_context_t oc, const std::string &key) final; + + insert_ret insert( + omap_context_t oc, + const std::string &key, + const ceph::bufferlist &value) final; + + rm_key_ret rm_key( + omap_context_t oc, + const std::string &key) final; + + list_ret list( + omap_context_t oc, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + omap_list_config_t config) final; + + clear_ret clear(omap_context_t oc) final; + + using split_children_iertr = base_iertr; + using split_children_ret = split_children_iertr::future + <std::tuple<OMapInnerNodeRef, OMapInnerNodeRef, std::string>>; + split_children_ret make_split_children(omap_context_t oc); + + full_merge_ret make_full_merge( + omap_context_t oc, OMapNodeRef right) final; + + make_balanced_ret make_balanced( + omap_context_t oc, OMapNodeRef right) final; + + using make_split_insert_iertr = base_iertr; + using make_split_insert_ret = make_split_insert_iertr::future<mutation_result_t>; + make_split_insert_ret make_split_insert( + omap_context_t oc, internal_iterator_t iter, + std::string key, laddr_t laddr); + + using merge_entry_iertr = base_iertr; + using merge_entry_ret = merge_entry_iertr::future<mutation_result_t>; + merge_entry_ret merge_entry( + omap_context_t oc, + internal_iterator_t iter, OMapNodeRef entry); + + using handle_split_iertr = base_iertr; + using handle_split_ret = handle_split_iertr::future<mutation_result_t>; + handle_split_ret handle_split( + omap_context_t oc, internal_iterator_t iter, + mutation_result_t mresult); + + std::ostream &print_detail_l(std::ostream &out) const final; + + static constexpr extent_types_t TYPE = extent_types_t::OMAP_INNER; + extent_types_t get_type() const final { + return TYPE; + } + + ceph::bufferlist get_delta() final { + ceph::bufferlist bl; + if (!delta_buffer.empty()) { + encode(delta_buffer, bl); + delta_buffer.clear(); + } + return bl; + } + + void apply_delta(const ceph::bufferlist &bl) final { + assert(bl.length()); + delta_inner_buffer_t buffer; + auto bptr = bl.cbegin(); + decode(buffer, bptr); + buffer.replay(*this); + } + + internal_iterator_t get_containing_child(const std::string &key); +}; +using OMapInnerNodeRef = OMapInnerNode::OMapInnerNodeRef; + +/** + * OMapLeafNode + * + * Abstracts operations on and layout of leaf nodes for the + * OMap Tree. + * + * Layout (4k): + * num_entries: meta : keys : values : + */ + +struct OMapLeafNode + : OMapNode, + StringKVLeafNodeLayout { + + using OMapLeafNodeRef = TCachedExtentRef<OMapLeafNode>; + using internal_iterator_t = const_iterator; + template <typename... T> + OMapLeafNode(T&&... t) : + OMapNode(std::forward<T>(t)...), + StringKVLeafNodeLayout(get_bptr().c_str()) {} + + omap_node_meta_t get_node_meta() const final { return get_meta(); } + bool extent_will_overflow( + size_t ksize, std::optional<size_t> vsize) const { + return is_overflow(ksize, *vsize); + } + bool can_merge(OMapNodeRef right) const { + return !is_overflow(*right->cast<OMapLeafNode>()); + } + bool extent_is_below_min() const { return below_min(); } + uint32_t get_node_size() { return get_size(); } + + CachedExtentRef duplicate_for_write(Transaction&) final { + assert(delta_buffer.empty()); + return CachedExtentRef(new OMapLeafNode(*this)); + } + + delta_leaf_buffer_t delta_buffer; + delta_leaf_buffer_t *maybe_get_delta_buffer() { + return is_mutation_pending() ? &delta_buffer : nullptr; + } + + get_value_ret get_value( + omap_context_t oc, const std::string &key) final; + + insert_ret insert( + omap_context_t oc, + const std::string &key, + const ceph::bufferlist &value) final; + + rm_key_ret rm_key( + omap_context_t oc, const std::string &key) final; + + list_ret list( + omap_context_t oc, + const std::optional<std::string> &first, + const std::optional<std::string> &last, + omap_list_config_t config) final; + + clear_ret clear( + omap_context_t oc) final; + + using split_children_iertr = base_iertr; + using split_children_ret = split_children_iertr::future + <std::tuple<OMapLeafNodeRef, OMapLeafNodeRef, std::string>>; + split_children_ret make_split_children( + omap_context_t oc); + + full_merge_ret make_full_merge( + omap_context_t oc, + OMapNodeRef right) final; + + make_balanced_ret make_balanced( + omap_context_t oc, + OMapNodeRef _right) final; + + static constexpr extent_types_t TYPE = extent_types_t::OMAP_LEAF; + extent_types_t get_type() const final { + return TYPE; + } + + ceph::bufferlist get_delta() final { + ceph::bufferlist bl; + if (!delta_buffer.empty()) { + encode(delta_buffer, bl); + delta_buffer.clear(); + } + return bl; + } + + void apply_delta(const ceph::bufferlist &_bl) final { + assert(_bl.length()); + ceph::bufferlist bl = _bl; + bl.rebuild(); + delta_leaf_buffer_t buffer; + auto bptr = bl.cbegin(); + decode(buffer, bptr); + buffer.replay(*this); + } + + std::ostream &print_detail_l(std::ostream &out) const final; + + std::pair<internal_iterator_t, internal_iterator_t> + get_leaf_entries(std::string &key); + +}; +using OMapLeafNodeRef = OMapLeafNode::OMapLeafNodeRef; + +std::ostream &operator<<(std::ostream &out, const omap_inner_key_t &rhs); +std::ostream &operator<<(std::ostream &out, const omap_leaf_key_t &rhs); +} + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter<crimson::os::seastore::omap_manager::OMapInnerNode> : fmt::ostream_formatter {}; +template <> struct fmt::formatter<crimson::os::seastore::omap_manager::OMapLeafNode> : fmt::ostream_formatter {}; +#endif diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_types.h b/src/crimson/os/seastore/omap_manager/btree/omap_types.h new file mode 100644 index 000000000..9e0d10e03 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/omap_types.h @@ -0,0 +1,157 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once +#include "crimson/os/seastore/seastore_types.h" + +namespace crimson::os::seastore::omap_manager { + +struct omap_node_meta_t { + depth_t depth = 0; + + std::pair<omap_node_meta_t, omap_node_meta_t> split_into() const { + return std::make_pair( + omap_node_meta_t{depth}, + omap_node_meta_t{depth}); + } + + static omap_node_meta_t merge_from( + const omap_node_meta_t &lhs, const omap_node_meta_t &rhs) { + assert(lhs.depth == rhs.depth); + return omap_node_meta_t{lhs.depth}; + } + + static std::pair<omap_node_meta_t, omap_node_meta_t> + rebalance(const omap_node_meta_t &lhs, const omap_node_meta_t &rhs) { + assert(lhs.depth == rhs.depth); + return std::make_pair( + omap_node_meta_t{lhs.depth}, + omap_node_meta_t{lhs.depth}); + } +}; + +struct omap_node_meta_le_t { + depth_le_t depth = init_depth_le(0); + + omap_node_meta_le_t() = default; + omap_node_meta_le_t(const omap_node_meta_le_t &) = default; + explicit omap_node_meta_le_t(const omap_node_meta_t &val) + : depth(init_depth_le(val.depth)) {} + + operator omap_node_meta_t() const { + return omap_node_meta_t{ depth }; + } +}; + +struct omap_inner_key_t { + uint16_t key_off = 0; + uint16_t key_len = 0; + laddr_t laddr = 0; + + omap_inner_key_t() = default; + omap_inner_key_t(uint16_t off, uint16_t len, laddr_t addr) + : key_off(off), key_len(len), laddr(addr) {} + + inline bool operator==(const omap_inner_key_t b) const { + return key_off == b.key_off && key_len == b.key_len && laddr == b.laddr; + } + inline bool operator!=(const omap_inner_key_t b) const { + return key_off != b.key_off || key_len != b.key_len || laddr != b.laddr; + } + DENC(omap_inner_key_t, v, p) { + DENC_START(1, 1, p); + denc(v.key_off, p); + denc(v.key_len, p); + denc(v.laddr, p); + DENC_FINISH(p); + } +}; + +struct omap_inner_key_le_t { + ceph_le16 key_off{0}; + ceph_le16 key_len{0}; + laddr_le_t laddr{0}; + + omap_inner_key_le_t() = default; + omap_inner_key_le_t(const omap_inner_key_le_t &) = default; + explicit omap_inner_key_le_t(const omap_inner_key_t &key) + : key_off(key.key_off), + key_len(key.key_len), + laddr(key.laddr) {} + + operator omap_inner_key_t() const { + return omap_inner_key_t{uint16_t(key_off), uint16_t(key_len), laddr_t(laddr)}; + } + + omap_inner_key_le_t& operator=(omap_inner_key_t key) { + key_off = key.key_off; + key_len = key.key_len; + laddr = laddr_le_t(key.laddr); + return *this; + } + + inline bool operator==(const omap_inner_key_le_t b) const { + return key_off == b.key_off && key_len == b.key_len && laddr == b.laddr; + } +}; + +struct omap_leaf_key_t { + uint16_t key_off = 0; + uint16_t key_len = 0; + uint16_t val_len = 0; + + omap_leaf_key_t() = default; + omap_leaf_key_t(uint16_t k_off, uint16_t k_len, uint16_t v_len) + : key_off(k_off), key_len(k_len), val_len(v_len) {} + + inline bool operator==(const omap_leaf_key_t b) const { + return key_off == b.key_off && key_len == b.key_len && + val_len == b.val_len; + } + inline bool operator!=(const omap_leaf_key_t b) const { + return key_off != b.key_off || key_len != b.key_len || + val_len != b.val_len; + } + + DENC(omap_leaf_key_t, v, p) { + DENC_START(1, 1, p); + denc(v.key_off, p); + denc(v.key_len, p); + denc(v.val_len, p); + DENC_FINISH(p); + } +}; + +struct omap_leaf_key_le_t { + ceph_le16 key_off{0}; + ceph_le16 key_len{0}; + ceph_le16 val_len{0}; + + omap_leaf_key_le_t() = default; + omap_leaf_key_le_t(const omap_leaf_key_le_t &) = default; + explicit omap_leaf_key_le_t(const omap_leaf_key_t &key) + : key_off(key.key_off), + key_len(key.key_len), + val_len(key.val_len) {} + + operator omap_leaf_key_t() const { + return omap_leaf_key_t{uint16_t(key_off), uint16_t(key_len), + uint16_t(val_len)}; + } + + omap_leaf_key_le_t& operator=(omap_leaf_key_t key) { + key_off = key.key_off; + key_len = key.key_len; + val_len = key.val_len; + return *this; + } + + inline bool operator==(const omap_leaf_key_le_t b) const { + return key_off == b.key_off && key_len == b.key_len && + val_len == b.val_len; + } +}; + +} +WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::omap_manager::omap_inner_key_t) +WRITE_CLASS_DENC_BOUNDED(crimson::os::seastore::omap_manager::omap_leaf_key_t) diff --git a/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h b/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h new file mode 100644 index 000000000..72b13fedf --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h @@ -0,0 +1,1550 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include <iostream> +#include <string> + +#include "include/byteorder.h" +#include "include/denc.h" +#include "include/encoding.h" + +#include "crimson/common/layout.h" +#include "crimson/common/fixed_kv_node_layout.h" +#include "crimson/os/seastore/omap_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_types.h" + +namespace crimson::os::seastore::omap_manager { +class StringKVInnerNodeLayout; +class StringKVLeafNodeLayout; + +/** + * copy_from_foreign + * + * Copy from another node entries to this node. + * [from_src, to_src) is another node entry range. + * tgt is this node entry to copy to. + * tgt and from_src must be from different nodes. + * from_src and to_src must be in the same node. + */ +template <typename iterator, typename const_iterator> +static void copy_from_foreign( + iterator tgt, + const_iterator from_src, + const_iterator to_src) { + assert(tgt->node != from_src->node); + assert(to_src->node == from_src->node); + if (from_src == to_src) + return; + + auto to_copy = from_src->get_right_ptr_end() - to_src->get_right_ptr_end(); + assert(to_copy > 0); + memcpy( + tgt->get_right_ptr_end() - to_copy, + to_src->get_right_ptr_end(), + to_copy); + memcpy( + tgt->get_node_key_ptr(), + from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + + auto offset_diff = tgt->get_right_offset_end() - from_src->get_right_offset_end(); + for (auto i = tgt; i != tgt + (to_src - from_src); ++i) { + i->update_offset(offset_diff); + } +} + +/** + * copy_from_local + * + * Copies entries from [from_src, to_src) to tgt. + * tgt, from_src, and to_src must be from the same node. + */ +template <typename iterator> +static void copy_from_local( + unsigned len, + iterator tgt, + iterator from_src, + iterator to_src) { + assert(tgt->node == from_src->node); + assert(to_src->node == from_src->node); + + auto to_copy = from_src->get_right_ptr_end() - to_src->get_right_ptr_end(); + assert(to_copy > 0); + int adjust_offset = tgt > from_src? -len : len; + memmove(to_src->get_right_ptr_end() + adjust_offset, + to_src->get_right_ptr_end(), + to_copy); + + for ( auto ite = from_src; ite < to_src; ite++) { + ite->update_offset(-adjust_offset); + } + memmove(tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); +} + +struct delta_inner_t { + enum class op_t : uint_fast8_t { + INSERT, + UPDATE, + REMOVE, + } op; + std::string key; + laddr_t addr; + + DENC(delta_inner_t, v, p) { + DENC_START(1, 1, p); + denc(v.op, p); + denc(v.key, p); + denc(v.addr, p); + DENC_FINISH(p); + } + + void replay(StringKVInnerNodeLayout &l); + bool operator==(const delta_inner_t &rhs) const { + return op == rhs.op && + key == rhs.key && + addr == rhs.addr; + } +}; +} +WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_inner_t) + +namespace crimson::os::seastore::omap_manager { +struct delta_leaf_t { + enum class op_t : uint_fast8_t { + INSERT, + UPDATE, + REMOVE, + } op; + std::string key; + ceph::bufferlist val; + + DENC(delta_leaf_t, v, p) { + DENC_START(1, 1, p); + denc(v.op, p); + denc(v.key, p); + denc(v.val, p); + DENC_FINISH(p); + } + + void replay(StringKVLeafNodeLayout &l); + bool operator==(const delta_leaf_t &rhs) const { + return op == rhs.op && + key == rhs.key && + val == rhs.val; + } +}; +} +WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_leaf_t) + +namespace crimson::os::seastore::omap_manager { +class delta_inner_buffer_t { + std::vector<delta_inner_t> buffer; +public: + bool empty() const { + return buffer.empty(); + } + void insert( + const std::string &key, + laddr_t addr) { + buffer.push_back( + delta_inner_t{ + delta_inner_t::op_t::INSERT, + key, + addr + }); + } + void update( + const std::string &key, + laddr_t addr) { + buffer.push_back( + delta_inner_t{ + delta_inner_t::op_t::UPDATE, + key, + addr + }); + } + void remove(const std::string &key) { + buffer.push_back( + delta_inner_t{ + delta_inner_t::op_t::REMOVE, + key, + L_ADDR_NULL + }); + } + + void replay(StringKVInnerNodeLayout &node) { + for (auto &i: buffer) { + i.replay(node); + } + } + + void clear() { + buffer.clear(); + } + + DENC(delta_inner_buffer_t, v, p) { + DENC_START(1, 1, p); + denc(v.buffer, p); + DENC_FINISH(p); + } + + bool operator==(const delta_inner_buffer_t &rhs) const { + return buffer == rhs.buffer; + } +}; +} +WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_inner_buffer_t) + +namespace crimson::os::seastore::omap_manager { +class delta_leaf_buffer_t { + std::vector<delta_leaf_t> buffer; +public: + bool empty() const { + return buffer.empty(); + } + void insert( + const std::string &key, + const ceph::bufferlist &val) { + buffer.push_back( + delta_leaf_t{ + delta_leaf_t::op_t::INSERT, + key, + val + }); + } + void update( + const std::string &key, + const ceph::bufferlist &val) { + buffer.push_back( + delta_leaf_t{ + delta_leaf_t::op_t::UPDATE, + key, + val + }); + } + void remove(const std::string &key) { + buffer.push_back( + delta_leaf_t{ + delta_leaf_t::op_t::REMOVE, + key, + bufferlist() + }); + } + + void replay(StringKVLeafNodeLayout &node) { + for (auto &i: buffer) { + i.replay(node); + } + } + + void clear() { + buffer.clear(); + } + + DENC(delta_leaf_buffer_t, v, p) { + DENC_START(1, 1, p); + denc(v.buffer, p); + DENC_FINISH(p); + } + + bool operator==(const delta_leaf_buffer_t &rhs) const { + return buffer == rhs.buffer; + } +}; +} +WRITE_CLASS_DENC(crimson::os::seastore::omap_manager::delta_leaf_buffer_t) + +namespace crimson::os::seastore::omap_manager { +/** + * StringKVInnerNodeLayout + * + * Uses absl::container_internal::Layout for the actual key memory layout. + * + * The primary interface exposed is centered on the iterator + * and related methods. + * + * Also included are helpers for doing splits and merges as for a btree. + * + * layout diagram: + * + * # <----------------------------- node range --------------------------------------------> # + * # #<~># free space # + * # <------------- left part -----------------------------> # <~# <----- right keys -----> # + * # # <------------ left keys --------------> #~> # # + * # # keys [2, n) |<~># #<~>| right keys [2, n) # + * # # <--- key 0 ----> | <--- key 1 ----> | # # | <- k1 -> | <-- k0 --> # + * # # | | # # | | # + * # num_ | meta # key | key | val | key | key | val | # # | key | key # + * # keys | depth # off | len | laddr| off | len | laddr| # # | buff | buff # + * # | # 0 | 0 | 0 | 1 | 1 | 1 |...#...#...| key 1 | key 0 # + * # | | | <- off --+----------> # + * # | | ^ | <- off --> # + * | | | ^ + * | +----------------------------------+ | + * +----------------------------------------------------------------+ + */ +class StringKVInnerNodeLayout { + char *buf = nullptr; + + using L = absl::container_internal::Layout<ceph_le32, omap_node_meta_le_t, omap_inner_key_le_t>; + static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1); + friend class delta_inner_t; +public: + template <bool is_const> + class iter_t { + friend class StringKVInnerNodeLayout; + + template <typename iterator, typename const_iterator> + friend void copy_from_foreign(iterator, const_iterator, const_iterator); + template <typename iterator> + friend void copy_from_local(unsigned, iterator, iterator, iterator); + + using parent_t = typename crimson::common::maybe_const_t<StringKVInnerNodeLayout, is_const>::type; + + mutable parent_t node; + uint16_t index; + + iter_t( + parent_t parent, + uint16_t index) : node(parent), index(index) {} + + public: + using iterator_category = std::input_iterator_tag; + using value_type = StringKVInnerNodeLayout; + using difference_type = std::ptrdiff_t; + using pointer = StringKVInnerNodeLayout*; + using reference = iter_t&; + + iter_t(const iter_t &) = default; + iter_t(iter_t &&) = default; + iter_t &operator=(const iter_t &) = default; + iter_t &operator=(iter_t &&) = default; + + operator iter_t<!is_const>() const { + static_assert(!is_const); + return iter_t<!is_const>(node, index); + } + + iter_t &operator*() { return *this; } + iter_t *operator->() { return this; } + + iter_t operator++(int) { + auto ret = *this; + ++index; + return ret; + } + + iter_t &operator++() { + ++index; + return *this; + } + + iter_t operator--(int) { + auto ret = *this; + assert(index > 0); + --index; + return ret; + } + + iter_t &operator--() { + assert(index > 0); + --index; + return *this; + } + + uint16_t operator-(const iter_t &rhs) const { + assert(rhs.node == node); + return index - rhs.index; + } + + iter_t operator+(uint16_t off) const { + return iter_t(node, index + off); + } + iter_t operator-(uint16_t off) const { + return iter_t(node, index - off); + } + + uint16_t operator<(const iter_t &rhs) const { + assert(rhs.node == node); + return index < rhs.index; + } + + uint16_t operator>(const iter_t &rhs) const { + assert(rhs.node == node); + return index > rhs.index; + } + + friend bool operator==(const iter_t &lhs, const iter_t &rhs) { + assert(lhs.node == rhs.node); + return lhs.index == rhs.index; + } + + private: + omap_inner_key_t get_node_key() const { + omap_inner_key_le_t kint = node->get_node_key_ptr()[index]; + return omap_inner_key_t(kint); + } + auto get_node_key_ptr() const { + return reinterpret_cast< + typename crimson::common::maybe_const_t<char, is_const>::type>( + node->get_node_key_ptr() + index); + } + + uint32_t get_node_val_offset() const { + return get_node_key().key_off; + } + auto get_node_val_ptr() const { + auto tail = node->buf + OMAP_INNER_BLOCK_SIZE; + if (*this == node->iter_end()) + return tail; + else { + return tail - get_node_val_offset(); + } + } + + int get_right_offset_end() const { + if (index == 0) + return 0; + else + return (*this - 1)->get_node_val_offset(); + } + auto get_right_ptr_end() const { + return node->buf + OMAP_INNER_BLOCK_SIZE - get_right_offset_end(); + } + + void update_offset(int offset) { + static_assert(!is_const); + auto key = get_node_key(); + assert(offset + key.key_off >= 0); + key.key_off += offset; + set_node_key(key); + } + + void set_node_key(omap_inner_key_t _lb) { + static_assert(!is_const); + omap_inner_key_le_t lb; + lb = _lb; + node->get_node_key_ptr()[index] = lb; + } + + void set_node_val(const std::string &str) { + static_assert(!is_const); + assert(str.size() == get_node_key().key_len); + assert(get_node_key().key_off >= str.size()); + assert(get_node_key().key_off < OMAP_INNER_BLOCK_SIZE); + assert(str.size() < OMAP_INNER_BLOCK_SIZE); + ::memcpy(get_node_val_ptr(), str.data(), str.size()); + } + + public: + uint16_t get_index() const { + return index; + } + + std::string get_key() const { + return std::string( + get_node_val_ptr(), + get_node_key().key_len); + } + + laddr_t get_val() const { + return get_node_key().laddr; + } + + bool contains(std::string_view key) const { + assert(*this != node->iter_end()); + auto next = *this + 1; + if (next == node->iter_end()) { + return get_key() <= key; + } else { + return (get_key() <= key) && (next->get_key() > key); + } + } + }; + using const_iterator = iter_t<true>; + using iterator = iter_t<false>; + +public: + void journal_inner_insert( + const_iterator _iter, + const laddr_t laddr, + const std::string &key, + delta_inner_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->insert( + key, + laddr); + } + inner_insert(iter, key, laddr); + } + + void journal_inner_update( + const_iterator _iter, + const laddr_t laddr, + delta_inner_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + auto key = iter->get_key(); + if (recorder) { + recorder->update(key, laddr); + } + inner_update(iter, laddr); + } + + void journal_inner_remove( + const_iterator _iter, + delta_inner_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->remove(iter->get_key()); + } + inner_remove(iter); + } + + StringKVInnerNodeLayout(char *buf) : + buf(buf) {} + + uint32_t get_size() const { + ceph_le32 &size = *layout.template Pointer<0>(buf); + return uint32_t(size); + } + + /** + * set_size + * + * Set size representation to match size + */ + void set_size(uint32_t size) { + ceph_le32 s; + s = size; + *layout.template Pointer<0>(buf) = s; + } + + const_iterator iter_cbegin() const { + return const_iterator( + this, + 0); + } + const_iterator iter_begin() const { + return iter_cbegin(); + } + + const_iterator iter_cend() const { + return const_iterator( + this, + get_size()); + } + const_iterator iter_end() const { + return iter_cend(); + } + + iterator iter_begin() { + return iterator( + this, + 0); + } + + iterator iter_end() { + return iterator( + this, + get_size()); + } + + const_iterator iter_idx(uint16_t off) const { + return const_iterator( + this, + off); + } + + const_iterator string_lower_bound(std::string_view str) const { + auto it = std::lower_bound(boost::make_counting_iterator<uint16_t>(0), + boost::make_counting_iterator<uint16_t>(get_size()), + str, + [this](uint16_t i, std::string_view str) { + const_iterator iter(this, i); + return iter->get_key() < str; + }); + return const_iterator(this, *it); + } + + iterator string_lower_bound(std::string_view str) { + const auto &tref = *this; + return iterator(this, tref.string_lower_bound(str).index); + } + + const_iterator string_upper_bound(std::string_view str) const { + auto it = std::upper_bound(boost::make_counting_iterator<uint16_t>(0), + boost::make_counting_iterator<uint16_t>(get_size()), + str, + [this](std::string_view str, uint16_t i) { + const_iterator iter(this, i); + return str < iter->get_key(); + }); + return const_iterator(this, *it); + } + + iterator string_upper_bound(std::string_view str) { + const auto &tref = *this; + return iterator(this, tref.string_upper_bound(str).index); + } + + const_iterator find_string_key(std::string_view str) const { + auto ret = iter_begin(); + for (; ret != iter_end(); ++ret) { + std::string s = ret->get_key(); + if (s == str) + break; + } + return ret; + } + + iterator find_string_key(std::string_view str) { + const auto &tref = *this; + return iterator(this, tref.find_string_key(str).index); + } + + const_iterator get_split_pivot() const { + uint32_t total_size = omap_inner_key_t( + get_node_key_ptr()[get_size()-1]).key_off; + uint32_t pivot_size = total_size / 2; + uint32_t size = 0; + for (auto ite = iter_begin(); ite < iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len; + if (size >= pivot_size){ + return ite; + } + } + return iter_end(); + } + + + /** + * get_meta/set_meta + * + * Enables stashing a templated type within the layout. + * Cannot be modified after initial write as it is not represented + * in delta_t + */ + omap_node_meta_t get_meta() const { + omap_node_meta_le_t &metaint = *layout.template Pointer<1>(buf); + return omap_node_meta_t(metaint); + } + void set_meta(const omap_node_meta_t &meta) { + *layout.template Pointer<1>(buf) = omap_node_meta_le_t(meta); + } + + uint32_t used_space() const { + uint32_t count = get_size(); + if (count) { + omap_inner_key_t last_key = omap_inner_key_t(get_node_key_ptr()[count-1]); + return last_key.key_off + count * sizeof(omap_inner_key_le_t); + } else { + return 0; + } + } + + uint32_t free_space() const { + return capacity() - used_space(); + } + + uint16_t capacity() const { + return OMAP_INNER_BLOCK_SIZE + - (reinterpret_cast<char*>(layout.template Pointer<2>(buf)) + - reinterpret_cast<char*>(layout.template Pointer<0>(buf))); + } + + bool is_overflow(size_t ksize) const { + return free_space() < (sizeof(omap_inner_key_le_t) + ksize); + } + + bool is_overflow(const StringKVInnerNodeLayout &rhs) const { + return free_space() < rhs.used_space(); + } + + bool below_min() const { + return free_space() > (capacity() / 2); + } + + bool operator==(const StringKVInnerNodeLayout &rhs) const { + if (get_size() != rhs.get_size()) { + return false; + } + + auto iter = iter_begin(); + auto iter2 = rhs.iter_begin(); + while (iter != iter_end()) { + if (iter->get_key() != iter2->get_key() || + iter->get_val() != iter2->get_val()) { + return false; + } + iter++; + iter2++; + } + return true; + } + + /** + * split_into + * + * Takes *this and splits its contents into left and right. + */ + std::string split_into( + StringKVInnerNodeLayout &left, + StringKVInnerNodeLayout &right) const { + auto piviter = get_split_pivot(); + assert(piviter != iter_end()); + + copy_from_foreign(left.iter_begin(), iter_begin(), piviter); + left.set_size(piviter - iter_begin()); + + copy_from_foreign(right.iter_begin(), piviter, iter_end()); + right.set_size(iter_end() - piviter); + + auto [lmeta, rmeta] = get_meta().split_into(); + left.set_meta(lmeta); + right.set_meta(rmeta); + + return piviter->get_key(); + } + + /** + * merge_from + * + * Takes two nodes and copies their contents into *this. + * + * precondition: left.size() + right.size() < CAPACITY + */ + void merge_from( + const StringKVInnerNodeLayout &left, + const StringKVInnerNodeLayout &right) { + copy_from_foreign( + iter_end(), + left.iter_begin(), + left.iter_end()); + set_size(left.get_size()); + + copy_from_foreign( + iter_end(), + right.iter_begin(), + right.iter_end()); + set_size(left.get_size() + right.get_size()); + set_meta(omap_node_meta_t::merge_from(left.get_meta(), right.get_meta())); + } + + /** + * balance_into_new_nodes + * + * Takes the contents of left and right and copies them into + * replacement_left and replacement_right such that + * the size of replacement_left just >= 1/2 of (left + right) + */ + static std::string balance_into_new_nodes( + const StringKVInnerNodeLayout &left, + const StringKVInnerNodeLayout &right, + StringKVInnerNodeLayout &replacement_left, + StringKVInnerNodeLayout &replacement_right) + { + uint32_t left_size = omap_inner_key_t(left.get_node_key_ptr()[left.get_size()-1]).key_off; + uint32_t right_size = omap_inner_key_t(right.get_node_key_ptr()[right.get_size()-1]).key_off; + uint32_t total = left_size + right_size; + uint32_t pivot_size = total / 2; + uint32_t pivot_idx = 0; + if (pivot_size < left_size) { + uint32_t size = 0; + for (auto ite = left.iter_begin(); ite < left.iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len; + if (size >= pivot_size){ + pivot_idx = ite.get_index(); + break; + } + } + } else { + uint32_t more_size = pivot_size - left_size; + uint32_t size = 0; + for (auto ite = right.iter_begin(); ite < right.iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len; + if (size >= more_size){ + pivot_idx = ite.get_index() + left.get_size(); + break; + } + } + } + + auto replacement_pivot = pivot_idx >= left.get_size() ? + right.iter_idx(pivot_idx - left.get_size())->get_key() : + left.iter_idx(pivot_idx)->get_key(); + + if (pivot_size < left_size) { + copy_from_foreign( + replacement_left.iter_end(), + left.iter_begin(), + left.iter_idx(pivot_idx)); + replacement_left.set_size(pivot_idx); + + copy_from_foreign( + replacement_right.iter_end(), + left.iter_idx(pivot_idx), + left.iter_end()); + replacement_right.set_size(left.get_size() - pivot_idx); + + copy_from_foreign( + replacement_right.iter_end(), + right.iter_begin(), + right.iter_end()); + replacement_right.set_size(right.get_size() + left.get_size()- pivot_idx); + } else { + copy_from_foreign( + replacement_left.iter_end(), + left.iter_begin(), + left.iter_end()); + replacement_left.set_size(left.get_size()); + + copy_from_foreign( + replacement_left.iter_end(), + right.iter_begin(), + right.iter_idx(pivot_idx - left.get_size())); + replacement_left.set_size(pivot_idx); + + copy_from_foreign( + replacement_right.iter_end(), + right.iter_idx(pivot_idx - left.get_size()), + right.iter_end()); + replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx); + } + + auto [lmeta, rmeta] = omap_node_meta_t::rebalance( + left.get_meta(), right.get_meta()); + replacement_left.set_meta(lmeta); + replacement_right.set_meta(rmeta); + return replacement_pivot; + } + +private: + void inner_insert( + iterator iter, + const std::string &key, + laddr_t val) { + if (iter != iter_begin()) { + assert((iter - 1)->get_key() < key); + } + if (iter != iter_end()) { + assert(iter->get_key() > key); + } + assert(!is_overflow(key.size())); + + if (iter != iter_end()) { + copy_from_local(key.size(), iter + 1, iter, iter_end()); + } + + omap_inner_key_t nkey; + nkey.key_len = key.size(); + nkey.laddr = val; + if (iter != iter_begin()) { + auto pkey = (iter - 1).get_node_key(); + nkey.key_off = nkey.key_len + pkey.key_off; + } else { + nkey.key_off = nkey.key_len; + } + + iter->set_node_key(nkey); + set_size(get_size() + 1); + iter->set_node_val(key); + } + + void inner_update( + iterator iter, + laddr_t addr) { + assert(iter != iter_end()); + auto node_key = iter->get_node_key(); + node_key.laddr = addr; + iter->set_node_key(node_key); + } + + void inner_remove(iterator iter) { + assert(iter != iter_end()); + if ((iter + 1) != iter_end()) + copy_from_local(iter->get_node_key().key_len, iter, iter + 1, iter_end()); + set_size(get_size() - 1); + } + + /** + * get_key_ptr + * + * Get pointer to start of key array + */ + omap_inner_key_le_t *get_node_key_ptr() { + return L::Partial(1, 1, get_size()).template Pointer<2>(buf); + } + const omap_inner_key_le_t *get_node_key_ptr() const { + return L::Partial(1, 1, get_size()).template Pointer<2>(buf); + } + +}; + +/** + * StringKVLeafNodeLayout + * + * layout diagram: + * + * # <----------------------------- node range -------------------------------------------------> # + * # #<~># free space # + * # <------------- left part ---------------------------> # <~# <----- right key-value pairs --> # + * # # <------------ left keys ------------> #~> # # + * # # keys [2, n) |<~># #<~>| right kvs [2, n) # + * # # <--- key 0 ---> | <--- key 1 ---> | # # | <-- kv 1 --> | <-- kv 0 --> # + * # # | | # # | | # + * # num_ | meta # key | key | val | key | key | val | # # | key | val | key | val # + * # keys | depth # off | len | len | off | len | len | # # | buff | buff | buff | buff # + * # # 0 | 0 | 0 | 1 | 1 | 1 |...#...#...| key 1 | val 1| key 0 | val 0 # + * # | | | <--- off ----+-------------> # + * # | | ^ | <--- off ---> # + * | | | ^ + * | +-----------------------------------+ | + * +-------------------------------------------------------------------+ + */ +class StringKVLeafNodeLayout { + char *buf = nullptr; + + using L = absl::container_internal::Layout<ceph_le32, omap_node_meta_le_t, omap_leaf_key_le_t>; + static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1); + friend class delta_leaf_t; + +public: + template <bool is_const> + class iter_t { + friend class StringKVLeafNodeLayout; + using parent_t = typename crimson::common::maybe_const_t<StringKVLeafNodeLayout, is_const>::type; + + template <typename iterator, typename const_iterator> + friend void copy_from_foreign(iterator, const_iterator, const_iterator); + template <typename iterator> + friend void copy_from_local(unsigned, iterator, iterator, iterator); + + parent_t node; + uint16_t index; + + iter_t( + parent_t parent, + uint16_t index) : node(parent), index(index) {} + + public: + iter_t(const iter_t &) = default; + iter_t(iter_t &&) = default; + iter_t &operator=(const iter_t &) = default; + iter_t &operator=(iter_t &&) = default; + + operator iter_t<!is_const>() const { + static_assert(!is_const); + return iter_t<!is_const>(node, index); + } + + iter_t &operator*() { return *this; } + iter_t *operator->() { return this; } + + iter_t operator++(int) { + auto ret = *this; + ++index; + return ret; + } + + iter_t &operator++() { + ++index; + return *this; + } + + uint16_t operator-(const iter_t &rhs) const { + assert(rhs.node == node); + return index - rhs.index; + } + + iter_t operator+(uint16_t off) const { + return iter_t( + node, + index + off); + } + iter_t operator-(uint16_t off) const { + return iter_t( + node, + index - off); + } + + uint16_t operator<(const iter_t &rhs) const { + assert(rhs.node == node); + return index < rhs.index; + } + + uint16_t operator>(const iter_t &rhs) const { + assert(rhs.node == node); + return index > rhs.index; + } + + bool operator==(const iter_t &rhs) const { + assert(node == rhs.node); + return rhs.index == index; + } + + bool operator!=(const iter_t &rhs) const { + assert(node == rhs.node); + return index != rhs.index; + } + + private: + omap_leaf_key_t get_node_key() const { + omap_leaf_key_le_t kint = node->get_node_key_ptr()[index]; + return omap_leaf_key_t(kint); + } + auto get_node_key_ptr() const { + return reinterpret_cast< + typename crimson::common::maybe_const_t<char, is_const>::type>( + node->get_node_key_ptr() + index); + } + + uint32_t get_node_val_offset() const { + return get_node_key().key_off; + } + auto get_node_val_ptr() const { + auto tail = node->buf + OMAP_LEAF_BLOCK_SIZE; + if (*this == node->iter_end()) + return tail; + else { + return tail - get_node_val_offset(); + } + } + + int get_right_offset_end() const { + if (index == 0) + return 0; + else + return (*this - 1)->get_node_val_offset(); + } + auto get_right_ptr_end() const { + return node->buf + OMAP_LEAF_BLOCK_SIZE - get_right_offset_end(); + } + + void update_offset(int offset) { + auto key = get_node_key(); + assert(offset + key.key_off >= 0); + key.key_off += offset; + set_node_key(key); + } + + void set_node_key(omap_leaf_key_t _lb) const { + static_assert(!is_const); + omap_leaf_key_le_t lb; + lb = _lb; + node->get_node_key_ptr()[index] = lb; + } + + void set_node_val(const std::string &key, const ceph::bufferlist &val) { + static_assert(!is_const); + auto node_key = get_node_key(); + assert(key.size() == node_key.key_len); + assert(val.length() == node_key.val_len); + ::memcpy(get_node_val_ptr(), key.data(), key.size()); + auto bliter = val.begin(); + bliter.copy(node_key.val_len, get_node_val_ptr() + node_key.key_len); + } + + public: + uint16_t get_index() const { + return index; + } + + std::string get_key() const { + return std::string( + get_node_val_ptr(), + get_node_key().key_len); + } + + std::string get_str_val() const { + auto node_key = get_node_key(); + return std::string( + get_node_val_ptr() + node_key.key_len, + get_node_key().val_len); + } + + ceph::bufferlist get_val() const { + auto node_key = get_node_key(); + ceph::bufferlist bl; + ceph::bufferptr bptr( + get_node_val_ptr() + node_key.key_len, + get_node_key().val_len); + bl.append(bptr); + return bl; + } + }; + using const_iterator = iter_t<true>; + using iterator = iter_t<false>; + +public: + void journal_leaf_insert( + const_iterator _iter, + const std::string &key, + const ceph::bufferlist &val, + delta_leaf_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->insert( + key, + val); + } + leaf_insert(iter, key, val); + } + + void journal_leaf_update( + const_iterator _iter, + const std::string &key, + const ceph::bufferlist &val, + delta_leaf_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->remove(iter->get_key()); + recorder->insert(key, val); + } + leaf_update(iter, key, val); + } + + void journal_leaf_remove( + const_iterator _iter, + delta_leaf_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->remove(iter->get_key()); + } + leaf_remove(iter); + } + + StringKVLeafNodeLayout(char *buf) : + buf(buf) {} + + const_iterator iter_begin() const { + return const_iterator( + this, + 0); + } + + const_iterator iter_end() const { + return const_iterator( + this, + get_size()); + } + + iterator iter_begin() { + return iterator( + this, + 0); + } + + iterator iter_end() { + return iterator( + this, + get_size()); + } + + const_iterator iter_idx(uint16_t off) const { + return const_iterator( + this, + off); + } + + const_iterator string_lower_bound(std::string_view str) const { + uint16_t start = 0, end = get_size(); + while (start != end) { + unsigned mid = (start + end) / 2; + const_iterator iter(this, mid); + std::string s = iter->get_key(); + if (s < str) { + start = ++mid; + } else if (s > str) { + end = mid; + } else { + return iter; + } + } + return const_iterator(this, start); + } + + iterator string_lower_bound(std::string_view str) { + const auto &tref = *this; + return iterator(this, tref.string_lower_bound(str).index); + } + + const_iterator string_upper_bound(std::string_view str) const { + auto ret = iter_begin(); + for (; ret != iter_end(); ++ret) { + std::string s = ret->get_key(); + if (s > str) + break; + } + return ret; + } + + iterator string_upper_bound(std::string_view str) { + const auto &tref = *this; + return iterator(this, tref.string_upper_bound(str).index); + } + + const_iterator find_string_key(std::string_view str) const { + auto ret = iter_begin(); + for (; ret != iter_end(); ++ret) { + std::string s = ret->get_key(); + if (s == str) + break; + } + return ret; + } + iterator find_string_key(std::string_view str) { + const auto &tref = *this; + return iterator(this, tref.find_string_key(str).index); + } + + const_iterator get_split_pivot() const { + uint32_t total_size = omap_leaf_key_t(get_node_key_ptr()[get_size()-1]).key_off; + uint32_t pivot_size = total_size / 2; + uint32_t size = 0; + for (auto ite = iter_begin(); ite < iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len + node_key.val_len; + if (size >= pivot_size){ + return ite; + } + } + return iter_end(); + } + + uint32_t get_size() const { + ceph_le32 &size = *layout.template Pointer<0>(buf); + return uint32_t(size); + } + + /** + * set_size + * + * Set size representation to match size + */ + void set_size(uint32_t size) { + ceph_le32 s; + s = size; + *layout.template Pointer<0>(buf) = s; + } + + /** + * get_meta/set_meta + * + * Enables stashing a templated type within the layout. + * Cannot be modified after initial write as it is not represented + * in delta_t + */ + omap_node_meta_t get_meta() const { + omap_node_meta_le_t &metaint = *layout.template Pointer<1>(buf); + return omap_node_meta_t(metaint); + } + void set_meta(const omap_node_meta_t &meta) { + *layout.template Pointer<1>(buf) = omap_node_meta_le_t(meta); + } + + uint32_t used_space() const { + uint32_t count = get_size(); + if (count) { + omap_leaf_key_t last_key = omap_leaf_key_t(get_node_key_ptr()[count-1]); + return last_key.key_off + count * sizeof(omap_leaf_key_le_t); + } else { + return 0; + } + } + + uint32_t free_space() const { + return capacity() - used_space(); + } + + uint32_t capacity() const { + return OMAP_LEAF_BLOCK_SIZE + - (reinterpret_cast<char*>(layout.template Pointer<2>(buf)) + - reinterpret_cast<char*>(layout.template Pointer<0>(buf))); + } + + bool is_overflow(size_t ksize, size_t vsize) const { + return free_space() < (sizeof(omap_leaf_key_le_t) + ksize + vsize); + } + + bool is_overflow(const StringKVLeafNodeLayout &rhs) const { + return free_space() < rhs.used_space(); + } + + bool below_min() const { + return free_space() > (capacity() / 2); + } + + bool operator==(const StringKVLeafNodeLayout &rhs) const { + if (get_size() != rhs.get_size()) { + return false; + } + + auto iter = iter_begin(); + auto iter2 = rhs.iter_begin(); + while (iter != iter_end()) { + if(iter->get_key() != iter2->get_key() || + iter->get_val() != iter2->get_val()) { + return false; + } + iter++; + iter2++; + } + return true; + } + + /** + * split_into + * + * Takes *this and splits its contents into left and right. + */ + std::string split_into( + StringKVLeafNodeLayout &left, + StringKVLeafNodeLayout &right) const { + auto piviter = get_split_pivot(); + assert (piviter != iter_end()); + + copy_from_foreign(left.iter_begin(), iter_begin(), piviter); + left.set_size(piviter - iter_begin()); + + copy_from_foreign(right.iter_begin(), piviter, iter_end()); + right.set_size(iter_end() - piviter); + + auto [lmeta, rmeta] = get_meta().split_into(); + left.set_meta(lmeta); + right.set_meta(rmeta); + + return piviter->get_key(); + } + + /** + * merge_from + * + * Takes two nodes and copies their contents into *this. + * + * precondition: left.size() + right.size() < CAPACITY + */ + void merge_from( + const StringKVLeafNodeLayout &left, + const StringKVLeafNodeLayout &right) + { + copy_from_foreign( + iter_end(), + left.iter_begin(), + left.iter_end()); + set_size(left.get_size()); + copy_from_foreign( + iter_end(), + right.iter_begin(), + right.iter_end()); + set_size(left.get_size() + right.get_size()); + set_meta(omap_node_meta_t::merge_from(left.get_meta(), right.get_meta())); + } + + /** + * balance_into_new_nodes + * + * Takes the contents of left and right and copies them into + * replacement_left and replacement_right such that + * the size of replacement_left side just >= 1/2 of the total size (left + right). + */ + static std::string balance_into_new_nodes( + const StringKVLeafNodeLayout &left, + const StringKVLeafNodeLayout &right, + StringKVLeafNodeLayout &replacement_left, + StringKVLeafNodeLayout &replacement_right) + { + uint32_t left_size = omap_leaf_key_t(left.get_node_key_ptr()[left.get_size()-1]).key_off; + uint32_t right_size = omap_leaf_key_t(right.get_node_key_ptr()[right.get_size()-1]).key_off; + uint32_t total = left_size + right_size; + uint32_t pivot_size = total / 2; + uint32_t pivot_idx = 0; + if (pivot_size < left_size) { + uint32_t size = 0; + for (auto ite = left.iter_begin(); ite < left.iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len + node_key.val_len; + if (size >= pivot_size){ + pivot_idx = ite.get_index(); + break; + } + } + } else { + uint32_t more_size = pivot_size - left_size; + uint32_t size = 0; + for (auto ite = right.iter_begin(); ite < right.iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len + node_key.val_len; + if (size >= more_size){ + pivot_idx = ite.get_index() + left.get_size(); + break; + } + } + } + + auto replacement_pivot = pivot_idx >= left.get_size() ? + right.iter_idx(pivot_idx - left.get_size())->get_key() : + left.iter_idx(pivot_idx)->get_key(); + + if (pivot_size < left_size) { + copy_from_foreign( + replacement_left.iter_end(), + left.iter_begin(), + left.iter_idx(pivot_idx)); + replacement_left.set_size(pivot_idx); + + copy_from_foreign( + replacement_right.iter_end(), + left.iter_idx(pivot_idx), + left.iter_end()); + replacement_right.set_size(left.get_size() - pivot_idx); + + copy_from_foreign( + replacement_right.iter_end(), + right.iter_begin(), + right.iter_end()); + replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx); + } else { + copy_from_foreign( + replacement_left.iter_end(), + left.iter_begin(), + left.iter_end()); + replacement_left.set_size(left.get_size()); + + copy_from_foreign( + replacement_left.iter_end(), + right.iter_begin(), + right.iter_idx(pivot_idx - left.get_size())); + replacement_left.set_size(pivot_idx); + + copy_from_foreign( + replacement_right.iter_end(), + right.iter_idx(pivot_idx - left.get_size()), + right.iter_end()); + replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx); + } + + auto [lmeta, rmeta] = omap_node_meta_t::rebalance( + left.get_meta(), right.get_meta()); + replacement_left.set_meta(lmeta); + replacement_right.set_meta(rmeta); + return replacement_pivot; + } + +private: + void leaf_insert( + iterator iter, + const std::string &key, + const bufferlist &val) { + if (iter != iter_begin()) { + assert((iter - 1)->get_key() < key); + } + if (iter != iter_end()) { + assert(iter->get_key() > key); + } + assert(!is_overflow(key.size(), val.length())); + omap_leaf_key_t node_key; + if (iter == iter_begin()) { + node_key.key_off = key.size() + val.length(); + node_key.key_len = key.size(); + node_key.val_len = val.length(); + } else { + node_key.key_off = (iter - 1)->get_node_key().key_off + + (key.size() + val.length()); + node_key.key_len = key.size(); + node_key.val_len = val.length(); + } + if (get_size() != 0 && iter != iter_end()) + copy_from_local(node_key.key_len + node_key.val_len, iter + 1, iter, iter_end()); + + iter->set_node_key(node_key); + set_size(get_size() + 1); + iter->set_node_val(key, val); + } + + void leaf_update( + iterator iter, + const std::string &key, + const ceph::bufferlist &val) { + assert(iter != iter_end()); + leaf_remove(iter); + assert(!is_overflow(key.size(), val.length())); + leaf_insert(iter, key, val); + } + + void leaf_remove(iterator iter) { + assert(iter != iter_end()); + if ((iter + 1) != iter_end()) { + omap_leaf_key_t key = iter->get_node_key(); + copy_from_local(key.key_len + key.val_len, iter, iter + 1, iter_end()); + } + set_size(get_size() - 1); + } + + /** + * get_key_ptr + * + * Get pointer to start of key array + */ + omap_leaf_key_le_t *get_node_key_ptr() { + return L::Partial(1, 1, get_size()).template Pointer<2>(buf); + } + const omap_leaf_key_le_t *get_node_key_ptr() const { + return L::Partial(1, 1, get_size()).template Pointer<2>(buf); + } + +}; + +inline void delta_inner_t::replay(StringKVInnerNodeLayout &l) { + switch (op) { + case op_t::INSERT: { + l.inner_insert(l.string_lower_bound(key), key, addr); + break; + } + case op_t::UPDATE: { + auto iter = l.find_string_key(key); + assert(iter != l.iter_end()); + l.inner_update(iter, addr); + break; + } + case op_t::REMOVE: { + auto iter = l.find_string_key(key); + assert(iter != l.iter_end()); + l.inner_remove(iter); + break; + } + default: + assert(0 == "Impossible"); + } +} + +inline void delta_leaf_t::replay(StringKVLeafNodeLayout &l) { + switch (op) { + case op_t::INSERT: { + l.leaf_insert(l.string_lower_bound(key), key, val); + break; + } + case op_t::UPDATE: { + auto iter = l.find_string_key(key); + assert(iter != l.iter_end()); + l.leaf_update(iter, key, val); + break; + } + case op_t::REMOVE: { + auto iter = l.find_string_key(key); + assert(iter != l.iter_end()); + l.leaf_remove(iter); + break; + } + default: + assert(0 == "Impossible"); + } +} + +} |