summaryrefslogtreecommitdiffstats
path: root/src/crimson/os/seastore/cache.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/crimson/os/seastore/cache.cc')
-rw-r--r--src/crimson/os/seastore/cache.cc541
1 files changed, 541 insertions, 0 deletions
diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc
new file mode 100644
index 000000000..6a406c1b8
--- /dev/null
+++ b/src/crimson/os/seastore/cache.cc
@@ -0,0 +1,541 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "crimson/os/seastore/cache.h"
+#include "crimson/common/log.h"
+
+// included for get_extent_by_type
+#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
+#include "crimson/os/seastore/onode_manager/simple-fltree/onode_block.h"
+#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h"
+#include "test/crimson/seastore/test_block.h"
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+ }
+}
+
+namespace crimson::os::seastore {
+
+Cache::Cache(SegmentManager &segment_manager) :
+ segment_manager(segment_manager) {}
+
+Cache::~Cache()
+{
+ for (auto &i: extents) {
+ logger().error("~Cache: extent {} still alive", i);
+ }
+ ceph_assert(extents.empty());
+}
+
+Cache::retire_extent_ret Cache::retire_extent_if_cached(
+ Transaction &t, paddr_t addr)
+{
+ if (auto ext = t.write_set.find_offset(addr); ext != t.write_set.end()) {
+ logger().debug("{}: found {} in t.write_set", __func__, addr);
+ t.add_to_retired_set(CachedExtentRef(&*ext));
+ return retire_extent_ertr::now();
+ } else if (auto iter = extents.find_offset(addr);
+ iter != extents.end()) {
+ auto ret = CachedExtentRef(&*iter);
+ return ret->wait_io().then([&t, ret=std::move(ret)]() mutable {
+ t.add_to_retired_set(ret);
+ return retire_extent_ertr::now();
+ });
+ } else {
+ return retire_extent_ertr::now();
+ }
+}
+
+void Cache::add_extent(CachedExtentRef ref)
+{
+ assert(ref->is_valid());
+ extents.insert(*ref);
+
+ if (ref->is_dirty()) {
+ add_to_dirty(ref);
+ } else {
+ ceph_assert(!ref->primary_ref_list_hook.is_linked());
+ }
+ logger().debug("add_extent: {}", *ref);
+}
+
+void Cache::mark_dirty(CachedExtentRef ref)
+{
+ if (ref->is_dirty()) {
+ assert(ref->primary_ref_list_hook.is_linked());
+ return;
+ }
+
+ add_to_dirty(ref);
+ ref->state = CachedExtent::extent_state_t::DIRTY;
+
+ logger().debug("mark_dirty: {}", *ref);
+}
+
+void Cache::add_to_dirty(CachedExtentRef ref)
+{
+ assert(ref->is_valid());
+ assert(!ref->primary_ref_list_hook.is_linked());
+ intrusive_ptr_add_ref(&*ref);
+ dirty.push_back(*ref);
+}
+
+void Cache::remove_extent(CachedExtentRef ref)
+{
+ logger().debug("remove_extent: {}", *ref);
+ assert(ref->is_valid());
+ extents.erase(*ref);
+
+ if (ref->is_dirty()) {
+ ceph_assert(ref->primary_ref_list_hook.is_linked());
+ dirty.erase(dirty.s_iterator_to(*ref));
+ intrusive_ptr_release(&*ref);
+ } else {
+ ceph_assert(!ref->primary_ref_list_hook.is_linked());
+ }
+}
+
+void Cache::replace_extent(CachedExtentRef next, CachedExtentRef prev)
+{
+ assert(next->get_paddr() == prev->get_paddr());
+ assert(next->version == prev->version + 1);
+ extents.replace(*next, *prev);
+
+ if (prev->is_dirty()) {
+ ceph_assert(prev->primary_ref_list_hook.is_linked());
+ auto prev_it = dirty.iterator_to(*prev);
+ dirty.insert(prev_it, *next);
+ dirty.erase(prev_it);
+ intrusive_ptr_release(&*prev);
+ intrusive_ptr_add_ref(&*next);
+ } else {
+ add_to_dirty(next);
+ }
+}
+
+CachedExtentRef Cache::alloc_new_extent_by_type(
+ Transaction &t, ///< [in, out] current transaction
+ extent_types_t type, ///< [in] type tag
+ segment_off_t length ///< [in] length
+)
+{
+ switch (type) {
+ case extent_types_t::ROOT:
+ assert(0 == "ROOT is never directly alloc'd");
+ return CachedExtentRef();
+ case extent_types_t::LADDR_INTERNAL:
+ return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length);
+ case extent_types_t::LADDR_LEAF:
+ return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length);
+ case extent_types_t::ONODE_BLOCK:
+ return alloc_new_extent<OnodeBlock>(t, length);
+ case extent_types_t::EXTMAP_INNER:
+ return alloc_new_extent<extentmap_manager::ExtMapInnerNode>(t, length);
+ case extent_types_t::EXTMAP_LEAF:
+ return alloc_new_extent<extentmap_manager::ExtMapLeafNode>(t, length);
+ case extent_types_t::TEST_BLOCK:
+ return alloc_new_extent<TestBlock>(t, length);
+ case extent_types_t::TEST_BLOCK_PHYSICAL:
+ return alloc_new_extent<TestBlockPhysical>(t, length);
+ case extent_types_t::NONE: {
+ ceph_assert(0 == "NONE is an invalid extent type");
+ return CachedExtentRef();
+ }
+ default:
+ ceph_assert(0 == "impossible");
+ return CachedExtentRef();
+ }
+}
+
+CachedExtentRef Cache::duplicate_for_write(
+ Transaction &t,
+ CachedExtentRef i) {
+ if (i->is_pending())
+ return i;
+
+ auto ret = i->duplicate_for_write();
+ if (ret->get_type() == extent_types_t::ROOT) {
+ // root must be loaded before mutate
+ assert(t.root == i);
+ t.root = ret->cast<RootBlock>();
+ } else {
+ ret->last_committed_crc = i->last_committed_crc;
+ ret->prior_instance = i;
+ t.add_mutated_extent(ret);
+ }
+
+ ret->version++;
+ ret->state = CachedExtent::extent_state_t::MUTATION_PENDING;
+ logger().debug("Cache::duplicate_for_write: {} -> {}", *i, *ret);
+ return ret;
+}
+
+std::optional<record_t> Cache::try_construct_record(Transaction &t)
+{
+ // First, validate read set
+ for (auto &i: t.read_set) {
+ if (i->state == CachedExtent::extent_state_t::INVALID)
+ return std::nullopt;
+ }
+
+ record_t record;
+
+ t.write_set.clear();
+
+ // Add new copy of mutated blocks, set_io_wait to block until written
+ record.deltas.reserve(t.mutated_block_list.size());
+ for (auto &i: t.mutated_block_list) {
+ if (!i->is_valid()) {
+ logger().debug("try_construct_record: ignoring invalid {}", *i);
+ continue;
+ }
+ logger().debug("try_construct_record: mutating {}", *i);
+
+ assert(i->prior_instance);
+ replace_extent(i, i->prior_instance);
+
+ i->prepare_write();
+ i->set_io_wait();
+
+ assert(i->get_version() > 0);
+ auto final_crc = i->get_crc32c();
+ record.deltas.push_back(
+ delta_info_t{
+ i->get_type(),
+ i->get_paddr(),
+ (i->is_logical()
+ ? i->cast<LogicalCachedExtent>()->get_laddr()
+ : L_ADDR_NULL),
+ i->last_committed_crc,
+ final_crc,
+ (segment_off_t)i->get_length(),
+ i->get_version() - 1,
+ i->get_delta()
+ });
+ i->last_committed_crc = final_crc;
+ }
+
+ if (t.root) {
+ logger().debug(
+ "{}: writing out root delta for {}",
+ __func__,
+ *t.root);
+ record.deltas.push_back(
+ delta_info_t{
+ extent_types_t::ROOT,
+ paddr_t{},
+ L_ADDR_NULL,
+ 0,
+ 0,
+ 0,
+ t.root->get_version() - 1,
+ t.root->get_delta()
+ });
+ }
+
+ // Transaction is now a go, set up in-memory cache state
+ // invalidate now invalid blocks
+ for (auto &i: t.retired_set) {
+ logger().debug("try_construct_record: retiring {}", *i);
+ ceph_assert(i->is_valid());
+ remove_extent(i);
+ i->state = CachedExtent::extent_state_t::INVALID;
+ }
+
+ record.extents.reserve(t.fresh_block_list.size());
+ for (auto &i: t.fresh_block_list) {
+ logger().debug("try_construct_record: fresh block {}", *i);
+ bufferlist bl;
+ i->prepare_write();
+ bl.append(i->get_bptr());
+ if (i->get_type() == extent_types_t::ROOT) {
+ assert(0 == "ROOT never gets written as a fresh block");
+ }
+
+ assert(bl.length() == i->get_length());
+ record.extents.push_back(extent_t{
+ i->get_type(),
+ i->is_logical()
+ ? i->cast<LogicalCachedExtent>()->get_laddr()
+ : L_ADDR_NULL,
+ std::move(bl)
+ });
+ }
+
+ return std::make_optional<record_t>(std::move(record));
+}
+
+void Cache::complete_commit(
+ Transaction &t,
+ paddr_t final_block_start,
+ journal_seq_t seq,
+ SegmentCleaner *cleaner)
+{
+ if (t.root) {
+ remove_extent(root);
+ root = t.root;
+ root->state = CachedExtent::extent_state_t::DIRTY;
+ root->on_delta_write(final_block_start);
+ root->dirty_from = seq;
+ add_extent(root);
+ logger().debug("complete_commit: new root {}", *t.root);
+ }
+
+ for (auto &i: t.fresh_block_list) {
+ i->set_paddr(final_block_start.add_relative(i->get_paddr()));
+ i->last_committed_crc = i->get_crc32c();
+ i->on_initial_write();
+
+ if (!i->is_valid()) {
+ logger().debug("complete_commit: invalid {}", *i);
+ continue;
+ }
+
+ i->state = CachedExtent::extent_state_t::CLEAN;
+ logger().debug("complete_commit: fresh {}", *i);
+ add_extent(i);
+ if (cleaner) {
+ cleaner->mark_space_used(
+ i->get_paddr(),
+ i->get_length());
+ }
+ }
+
+ // Add new copy of mutated blocks, set_io_wait to block until written
+ for (auto &i: t.mutated_block_list) {
+ logger().debug("complete_commit: mutated {}", *i);
+ assert(i->prior_instance);
+ i->on_delta_write(final_block_start);
+ i->prior_instance = CachedExtentRef();
+ if (!i->is_valid()) {
+ logger().debug("complete_commit: not dirtying invalid {}", *i);
+ continue;
+ }
+ i->state = CachedExtent::extent_state_t::DIRTY;
+ if (i->version == 1) {
+ i->dirty_from = seq;
+ }
+ }
+
+ if (cleaner) {
+ for (auto &i: t.retired_set) {
+ cleaner->mark_space_free(
+ i->get_paddr(),
+ i->get_length());
+ }
+ }
+
+ for (auto &i: t.mutated_block_list) {
+ i->complete_io();
+ }
+}
+
+void Cache::init() {
+ if (root) {
+ // initial creation will do mkfs followed by mount each of which calls init
+ remove_extent(root);
+ root = nullptr;
+ }
+ root = new RootBlock();
+ root->state = CachedExtent::extent_state_t::DIRTY;
+ add_extent(root);
+}
+
+Cache::mkfs_ertr::future<> Cache::mkfs(Transaction &t)
+{
+ return get_root(t).safe_then([this, &t](auto croot) {
+ duplicate_for_write(t, croot);
+ return mkfs_ertr::now();
+ });
+}
+
+Cache::close_ertr::future<> Cache::close()
+{
+ root.reset();
+ for (auto i = dirty.begin(); i != dirty.end(); ) {
+ auto ptr = &*i;
+ dirty.erase(i++);
+ intrusive_ptr_release(ptr);
+ }
+ return close_ertr::now();
+}
+
+Cache::replay_delta_ret
+Cache::replay_delta(
+ journal_seq_t journal_seq,
+ paddr_t record_base,
+ const delta_info_t &delta)
+{
+ if (delta.type == extent_types_t::ROOT) {
+ logger().debug("replay_delta: found root delta");
+ root->apply_delta_and_adjust_crc(record_base, delta.bl);
+ root->dirty_from = journal_seq;
+ return replay_delta_ertr::now();
+ } else {
+ auto get_extent_if_cached = [this](paddr_t addr)
+ -> replay_delta_ertr::future<CachedExtentRef> {
+ auto retiter = extents.find_offset(addr);
+ if (retiter != extents.end()) {
+ return replay_delta_ertr::make_ready_future<CachedExtentRef>(&*retiter);
+ } else {
+ return replay_delta_ertr::make_ready_future<CachedExtentRef>();
+ }
+ };
+ auto extent_fut = delta.pversion == 0 ?
+ get_extent_by_type(
+ delta.type,
+ delta.paddr,
+ delta.laddr,
+ delta.length) :
+ get_extent_if_cached(
+ delta.paddr);
+ return extent_fut.safe_then([=, &delta](auto extent) {
+ if (!extent) {
+ assert(delta.pversion > 0);
+ logger().debug(
+ "replay_delta: replaying {}, extent not present so delta is obsolete",
+ delta);
+ return;
+ }
+
+ logger().debug(
+ "replay_delta: replaying {} on {}",
+ *extent,
+ delta);
+
+ assert(extent->version == delta.pversion);
+
+ assert(extent->last_committed_crc == delta.prev_crc);
+ extent->apply_delta_and_adjust_crc(record_base, delta.bl);
+ assert(extent->last_committed_crc == delta.final_crc);
+
+ if (extent->version == 0) {
+ extent->dirty_from = journal_seq;
+ }
+ extent->version++;
+ mark_dirty(extent);
+ });
+ }
+}
+
+Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents(
+ journal_seq_t seq)
+{
+ std::vector<CachedExtentRef> ret;
+ for (auto i = dirty.begin(); i != dirty.end(); ++i) {
+ CachedExtentRef cand;
+ if (i->dirty_from < seq) {
+ assert(ret.empty() || ret.back()->dirty_from <= i->dirty_from);
+ ret.push_back(&*i);
+ } else {
+ break;
+ }
+ }
+ return seastar::do_with(
+ std::move(ret),
+ [](auto &ret) {
+ return seastar::do_for_each(
+ ret,
+ [](auto &ext) {
+ logger().debug(
+ "get_next_dirty_extents: waiting on {}",
+ *ext);
+ return ext->wait_io();
+ }).then([&ret]() mutable {
+ return seastar::make_ready_future<std::vector<CachedExtentRef>>(
+ std::move(ret));
+ });
+ });
+}
+
+Cache::get_root_ret Cache::get_root(Transaction &t)
+{
+ if (t.root) {
+ return get_root_ret(
+ get_root_ertr::ready_future_marker{},
+ t.root);
+ } else {
+ auto ret = root;
+ return ret->wait_io().then([ret, &t] {
+ t.root = ret;
+ return get_root_ret(
+ get_root_ertr::ready_future_marker{},
+ ret);
+ });
+ }
+}
+
+using StagedOnodeBlock = crimson::os::seastore::onode::SeastoreNodeExtent;
+
+Cache::get_extent_ertr::future<CachedExtentRef> Cache::get_extent_by_type(
+ extent_types_t type,
+ paddr_t offset,
+ laddr_t laddr,
+ segment_off_t length)
+{
+ return [=] {
+ switch (type) {
+ case extent_types_t::ROOT:
+ assert(0 == "ROOT is never directly read");
+ return get_extent_ertr::make_ready_future<CachedExtentRef>();
+ case extent_types_t::LADDR_INTERNAL:
+ return get_extent<lba_manager::btree::LBAInternalNode>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::LADDR_LEAF:
+ return get_extent<lba_manager::btree::LBALeafNode>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::EXTMAP_INNER:
+ return get_extent<extentmap_manager::ExtMapInnerNode>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::EXTMAP_LEAF:
+ return get_extent<extentmap_manager::ExtMapLeafNode>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::ONODE_BLOCK:
+ return get_extent<OnodeBlock>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::ONODE_BLOCK_STAGED:
+ return get_extent<StagedOnodeBlock>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::TEST_BLOCK:
+ return get_extent<TestBlock>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::TEST_BLOCK_PHYSICAL:
+ return get_extent<TestBlockPhysical>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::NONE: {
+ ceph_assert(0 == "NONE is an invalid extent type");
+ return get_extent_ertr::make_ready_future<CachedExtentRef>();
+ }
+ default:
+ ceph_assert(0 == "impossible");
+ return get_extent_ertr::make_ready_future<CachedExtentRef>();
+ }
+ }().safe_then([laddr](CachedExtentRef e) {
+ assert(e->is_logical() == (laddr != L_ADDR_NULL));
+ if (e->is_logical()) {
+ e->cast<LogicalCachedExtent>()->set_laddr(laddr);
+ }
+ return get_extent_ertr::make_ready_future<CachedExtentRef>(e);
+ });
+}
+
+}