From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/crimson/os/seastore/segment_cleaner.h | 691 ++++++++++++++++++++++++++++++ 1 file changed, 691 insertions(+) create mode 100644 src/crimson/os/seastore/segment_cleaner.h (limited to 'src/crimson/os/seastore/segment_cleaner.h') diff --git a/src/crimson/os/seastore/segment_cleaner.h b/src/crimson/os/seastore/segment_cleaner.h new file mode 100644 index 000000000..38ebd05bc --- /dev/null +++ b/src/crimson/os/seastore/segment_cleaner.h @@ -0,0 +1,691 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include + +#include "common/ceph_time.h" + +#include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/segment_manager.h" + +namespace crimson::os::seastore { +class Transaction; + +struct segment_info_t { + Segment::segment_state_t state = Segment::segment_state_t::EMPTY; + + // Will be non-null for any segments in the current journal + segment_seq_t journal_segment_seq = NULL_SEG_SEQ; + + + bool is_in_journal(journal_seq_t tail_committed) const { + return journal_segment_seq != NULL_SEG_SEQ && + tail_committed.segment_seq <= journal_segment_seq; + } + + bool is_empty() const { + return state == Segment::segment_state_t::EMPTY; + } + + bool is_closed() const { + return state == Segment::segment_state_t::CLOSED; + } + + bool is_open() const { + return state == Segment::segment_state_t::OPEN; + } +}; + +class SpaceTrackerI { +public: + virtual int64_t allocate( + segment_id_t segment, + segment_off_t offset, + extent_len_t len) = 0; + + virtual int64_t release( + segment_id_t segment, + segment_off_t offset, + extent_len_t len) = 0; + + virtual int64_t get_usage( + segment_id_t segment) const = 0; + + virtual bool equals(const SpaceTrackerI &other) const = 0; + + virtual std::unique_ptr make_empty() const = 0; + + virtual void dump_usage(segment_id_t) const = 0; + + virtual void reset() = 0; + + virtual ~SpaceTrackerI() = default; +}; +using SpaceTrackerIRef = std::unique_ptr; + +class SpaceTrackerSimple : public SpaceTrackerI { + // Tracks live space for each segment + std::vector live_bytes_by_segment; + + int64_t update_usage(segment_id_t segment, int64_t delta) { + assert(segment < live_bytes_by_segment.size()); + live_bytes_by_segment[segment] += delta; + assert(live_bytes_by_segment[segment] >= 0); + return live_bytes_by_segment[segment]; + } +public: + SpaceTrackerSimple(size_t num_segments) + : live_bytes_by_segment(num_segments, 0) {} + + int64_t allocate( + segment_id_t segment, + segment_off_t offset, + extent_len_t len) final { + return update_usage(segment, len); + } + + int64_t release( + segment_id_t segment, + segment_off_t offset, + extent_len_t len) final { + return update_usage(segment, -len); + } + + int64_t get_usage(segment_id_t segment) const final { + assert(segment < live_bytes_by_segment.size()); + return live_bytes_by_segment[segment]; + } + + void dump_usage(segment_id_t) const final {} + + void reset() final { + for (auto &i: live_bytes_by_segment) + i = 0; + } + + SpaceTrackerIRef make_empty() const final { + return SpaceTrackerIRef( + new SpaceTrackerSimple(live_bytes_by_segment.size())); + } + + bool equals(const SpaceTrackerI &other) const; +}; + +class SpaceTrackerDetailed : public SpaceTrackerI { + class SegmentMap { + int64_t used = 0; + std::vector bitmap; + + public: + SegmentMap(size_t blocks) : bitmap(blocks, false) {} + + int64_t update_usage(int64_t delta) { + used += delta; + return used; + } + + int64_t allocate( + segment_id_t segment, + segment_off_t offset, + extent_len_t len, + const extent_len_t block_size); + + int64_t release( + segment_id_t segment, + segment_off_t offset, + extent_len_t len, + const extent_len_t block_size); + + int64_t get_usage() const { + return used; + } + + void dump_usage(extent_len_t block_size) const; + + void reset() { + used = 0; + for (auto &&i: bitmap) { + i = false; + } + } + }; + const size_t block_size; + const size_t segment_size; + + // Tracks live space for each segment + std::vector segment_usage; + +public: + SpaceTrackerDetailed(size_t num_segments, size_t segment_size, size_t block_size) + : block_size(block_size), + segment_size(segment_size), + segment_usage(num_segments, segment_size / block_size) {} + + int64_t allocate( + segment_id_t segment, + segment_off_t offset, + extent_len_t len) final { + assert(segment < segment_usage.size()); + return segment_usage[segment].allocate(segment, offset, len, block_size); + } + + int64_t release( + segment_id_t segment, + segment_off_t offset, + extent_len_t len) final { + assert(segment < segment_usage.size()); + return segment_usage[segment].release(segment, offset, len, block_size); + } + + int64_t get_usage(segment_id_t segment) const final { + assert(segment < segment_usage.size()); + return segment_usage[segment].get_usage(); + } + + void dump_usage(segment_id_t seg) const final; + + void reset() final { + for (auto &i: segment_usage) + i.reset(); + } + + SpaceTrackerIRef make_empty() const final { + return SpaceTrackerIRef( + new SpaceTrackerDetailed( + segment_usage.size(), + segment_size, + block_size)); + } + + bool equals(const SpaceTrackerI &other) const; +}; + + +class SegmentCleaner : public JournalSegmentProvider { +public: + /// Config + struct config_t { + size_t num_segments = 0; + size_t segment_size = 0; + size_t block_size = 0; + size_t target_journal_segments = 0; + size_t max_journal_segments = 0; + + double reclaim_ratio_hard_limit = 0; + // don't apply reclaim ratio with available space below this + double reclaim_ratio_usage_min = 0; + + double available_ratio_hard_limit = 0; + + static config_t default_from_segment_manager( + SegmentManager &manager) { + return config_t{ + manager.get_num_segments(), + static_cast(manager.get_segment_size()), + (size_t)manager.get_block_size(), + 2, + 4, + .5, + .95, + .2 + }; + } + }; + + /// Callback interface for querying and operating on segments + class ExtentCallbackInterface { + public: + virtual ~ExtentCallbackInterface() = default; + /** + * get_next_dirty_extent + * + * returns all extents with dirty_from < bound + */ + using get_next_dirty_extents_ertr = crimson::errorator<>; + using get_next_dirty_extents_ret = get_next_dirty_extents_ertr::future< + std::vector>; + virtual get_next_dirty_extents_ret get_next_dirty_extents( + journal_seq_t bound ///< [in] return extents with dirty_from < bound + ) = 0; + + /** + * rewrite_extent + * + * Updates t with operations moving the passed extents to a new + * segment. extent may be invalid, implementation must correctly + * handle finding the current instance if it is still alive and + * otherwise ignore it. + */ + using rewrite_extent_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using rewrite_extent_ret = rewrite_extent_ertr::future<>; + virtual rewrite_extent_ret rewrite_extent( + Transaction &t, + CachedExtentRef extent) = 0; + + /** + * get_extent_if_live + * + * Returns extent at specified location if still referenced by + * lba_manager and not removed by t. + * + * See TransactionManager::get_extent_if_live and + * LBAManager::get_physical_extent_if_live. + */ + using get_extent_if_live_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using get_extent_if_live_ret = get_extent_if_live_ertr::future< + CachedExtentRef>; + virtual get_extent_if_live_ret get_extent_if_live( + Transaction &t, + extent_types_t type, + paddr_t addr, + laddr_t laddr, + segment_off_t len) = 0; + + /** + * scan_extents + * + * Interface shim for Journal::scan_extents + */ + using scan_extents_cursor = Journal::scan_valid_records_cursor; + using scan_extents_ertr = Journal::scan_extents_ertr; + using scan_extents_ret = Journal::scan_extents_ret; + virtual scan_extents_ret scan_extents( + scan_extents_cursor &cursor, + extent_len_t bytes_to_read) = 0; + + /** + * release_segment + * + * Release segment. + */ + using release_segment_ertr = SegmentManager::release_ertr; + using release_segment_ret = release_segment_ertr::future<>; + virtual release_segment_ret release_segment( + segment_id_t id) = 0; + }; + +private: + const config_t config; + + SpaceTrackerIRef space_tracker; + std::vector segments; + size_t empty_segments; + int64_t used_bytes = 0; + bool init_complete = false; + + journal_seq_t journal_tail_target; + journal_seq_t journal_tail_committed; + journal_seq_t journal_head; + + ExtentCallbackInterface *ecb = nullptr; + +public: + SegmentCleaner(config_t config, bool detailed = false) + : config(config), + space_tracker( + detailed ? + (SpaceTrackerI*)new SpaceTrackerDetailed( + config.num_segments, + config.segment_size, + config.block_size) : + (SpaceTrackerI*)new SpaceTrackerSimple( + config.num_segments)), + segments(config.num_segments), + empty_segments(config.num_segments) {} + + get_segment_ret get_segment() final; + + void close_segment(segment_id_t segment) final; + + void set_journal_segment( + segment_id_t segment, segment_seq_t seq) final { + assert(segment < segments.size()); + segments[segment].journal_segment_seq = seq; + assert(segments[segment].is_open()); + } + + journal_seq_t get_journal_tail_target() const final { + return journal_tail_target; + } + + void update_journal_tail_committed(journal_seq_t committed) final; + + void update_journal_tail_target(journal_seq_t target); + + void init_journal_tail(journal_seq_t tail) { + journal_tail_target = journal_tail_committed = tail; + } + + void set_journal_head(journal_seq_t head) { + assert(journal_head == journal_seq_t() || head >= journal_head); + journal_head = head; + } + + void init_mark_segment_closed(segment_id_t segment, segment_seq_t seq) final { + crimson::get_logger(ceph_subsys_filestore).debug( + "SegmentCleaner::init_mark_segment_closed: segment {}, seq {}", + segment, + seq); + mark_closed(segment); + segments[segment].journal_segment_seq = seq; + } + + segment_seq_t get_seq(segment_id_t id) final { + return segments[id].journal_segment_seq; + } + + void mark_segment_released(segment_id_t segment) { + return mark_empty(segment); + } + + void mark_space_used( + paddr_t addr, + extent_len_t len, + bool init_scan = false) { + assert(addr.segment < segments.size()); + + if (!init_scan && !init_complete) + return; + + if (!init_scan) { + assert(segments[addr.segment].state == Segment::segment_state_t::OPEN); + } + + used_bytes += len; + [[maybe_unused]] auto ret = space_tracker->allocate( + addr.segment, + addr.offset, + len); + assert(ret > 0); + } + + void mark_space_free( + paddr_t addr, + extent_len_t len) { + if (!init_complete) + return; + + used_bytes -= len; + assert(addr.segment < segments.size()); + + [[maybe_unused]] auto ret = space_tracker->release( + addr.segment, + addr.offset, + len); + assert(ret >= 0); + } + + segment_id_t get_next_gc_target() const { + segment_id_t ret = NULL_SEG_ID; + int64_t least_live_bytes = std::numeric_limits::max(); + for (segment_id_t i = 0; i < segments.size(); ++i) { + if (segments[i].is_closed() && + !segments[i].is_in_journal(journal_tail_committed) && + space_tracker->get_usage(i) < least_live_bytes) { + ret = i; + least_live_bytes = space_tracker->get_usage(i); + } + } + if (ret != NULL_SEG_ID) { + crimson::get_logger(ceph_subsys_filestore).debug( + "SegmentCleaner::get_next_gc_target: segment {} seq {}", + ret, + segments[ret].journal_segment_seq); + } + return ret; + } + + SpaceTrackerIRef get_empty_space_tracker() const { + return space_tracker->make_empty(); + } + + void complete_init() { init_complete = true; } + + void set_extent_callback(ExtentCallbackInterface *cb) { + ecb = cb; + } + + bool debug_check_space(const SpaceTrackerI &tracker) { + return space_tracker->equals(tracker); + } + + /** + * do_immediate_work + * + * Should be invoked prior to submission of any transaction, + * will piggy-back work required to maintain deferred work + * constraints. + */ + using do_immediate_work_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using do_immediate_work_ret = do_immediate_work_ertr::future<>; + do_immediate_work_ret do_immediate_work( + Transaction &t); + + + /** + * do_deferred_work + * + * Should be called at idle times -- will perform background + * operations based on deferred work constraints. + * + * If returned timespan is non-zero, caller should pause calling + * back into do_deferred_work before returned timespan has elapsed, + * or a foreground operation occurs. + */ + using do_deferred_work_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using do_deferred_work_ret = do_deferred_work_ertr::future< + ceph::timespan + >; + do_deferred_work_ret do_deferred_work( + Transaction &t); + +private: + + // journal status helpers + + /** + * rewrite_dirty + * + * Writes out dirty blocks dirtied earlier than limit. + */ + using rewrite_dirty_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using rewrite_dirty_ret = rewrite_dirty_ertr::future<>; + rewrite_dirty_ret rewrite_dirty( + Transaction &t, + journal_seq_t limit); + + journal_seq_t get_dirty_tail() const { + auto ret = journal_head; + ret.segment_seq -= std::min( + static_cast(ret.segment_seq), + config.target_journal_segments); + return ret; + } + + journal_seq_t get_dirty_tail_limit() const { + auto ret = journal_head; + ret.segment_seq -= std::min( + static_cast(ret.segment_seq), + config.max_journal_segments); + return ret; + } + + // GC status helpers + std::unique_ptr scan_cursor; + + /** + * do_gc + * + * Performs bytes worth of gc work on t. + */ + using do_gc_ertr = SegmentManager::read_ertr; + using do_gc_ret = do_gc_ertr::future<>; + do_gc_ret do_gc( + Transaction &t, + size_t bytes); + + size_t get_bytes_used_current_segment() const { + assert(journal_head != journal_seq_t()); + return journal_head.offset.offset; + } + + size_t get_bytes_available_current_segment() const { + return config.segment_size - get_bytes_used_current_segment(); + } + + /** + * get_bytes_scanned_current_segment + * + * Returns the number of bytes from the current gc segment that + * have been scanned. + */ + size_t get_bytes_scanned_current_segment() const { + if (!scan_cursor) + return 0; + + return scan_cursor->get_offset().offset; + } + + size_t get_available_bytes() const { + return (empty_segments * config.segment_size) + + get_bytes_available_current_segment() + + get_bytes_scanned_current_segment(); + } + + size_t get_total_bytes() const { + return config.segment_size * config.num_segments; + } + + size_t get_unavailable_bytes() const { + return get_total_bytes() - get_available_bytes(); + } + + /// Returns bytes currently occupied by live extents (not journal) + size_t get_used_bytes() const { + return used_bytes; + } + + /// Returns the number of bytes in unavailable segments that are not live + size_t get_reclaimable_bytes() const { + return get_unavailable_bytes() - get_used_bytes(); + } + + /** + * get_reclaim_ratio + * + * Returns the ratio of unavailable space that is not currently used. + */ + double get_reclaim_ratio() const { + if (get_unavailable_bytes() == 0) return 0; + return (double)get_reclaimable_bytes() / (double)get_unavailable_bytes(); + } + + /** + * get_available_ratio + * + * Returns ratio of available space to write to total space + */ + double get_available_ratio() const { + return (double)get_available_bytes() / (double)get_total_bytes(); + } + + /** + * get_immediate_bytes_to_gc_for_reclaim + * + * Returns the number of bytes to gc in order to bring the + * reclaim ratio below reclaim_ratio_usage_min. + */ + size_t get_immediate_bytes_to_gc_for_reclaim() const { + if (get_reclaim_ratio() < config.reclaim_ratio_hard_limit) + return 0; + + const size_t unavailable_target = std::max( + get_used_bytes() / (1.0 - config.reclaim_ratio_hard_limit), + (1 - config.reclaim_ratio_usage_min) * get_total_bytes()); + + if (unavailable_target > get_unavailable_bytes()) + return 0; + + return (get_unavailable_bytes() - unavailable_target) / get_reclaim_ratio(); + } + + /** + * get_immediate_bytes_to_gc_for_available + * + * Returns the number of bytes to gc in order to bring the + * the ratio of available disk space to total disk space above + * available_ratio_hard_limit. + */ + size_t get_immediate_bytes_to_gc_for_available() const { + if (get_available_ratio() > config.available_ratio_hard_limit) { + return 0; + } + + const double ratio_to_make_available = config.available_ratio_hard_limit - + get_available_ratio(); + return ratio_to_make_available * (double)get_total_bytes() + / get_reclaim_ratio(); + } + + /** + * get_immediate_bytes_to_gc + * + * Returns number of bytes to gc in order to restore any strict + * limits. + */ + size_t get_immediate_bytes_to_gc() const { + // number of bytes to gc in order to correct reclaim ratio + size_t for_reclaim = get_immediate_bytes_to_gc_for_reclaim(); + + // number of bytes to gc in order to correct available_ratio + size_t for_available = get_immediate_bytes_to_gc_for_available(); + + return std::max(for_reclaim, for_available); + } + + void mark_closed(segment_id_t segment) { + assert(segments.size() > segment); + if (init_complete) { + assert(segments[segment].is_open()); + } else { + assert(segments[segment].is_empty()); + assert(empty_segments > 0); + --empty_segments; + } + crimson::get_logger(ceph_subsys_filestore).debug( + "mark_closed: empty_segments: {}", + empty_segments); + segments[segment].state = Segment::segment_state_t::CLOSED; + } + + void mark_empty(segment_id_t segment) { + assert(segments.size() > segment); + assert(segments[segment].is_closed()); + assert(segments.size() > empty_segments); + ++empty_segments; + if (space_tracker->get_usage(segment) != 0) { + space_tracker->dump_usage(segment); + assert(space_tracker->get_usage(segment) == 0); + } + segments[segment].state = Segment::segment_state_t::EMPTY; + } + + void mark_open(segment_id_t segment) { + assert(segments.size() > segment); + assert(segments[segment].is_empty()); + assert(empty_segments > 0); + --empty_segments; + segments[segment].state = Segment::segment_state_t::OPEN; + } +}; + +} -- cgit v1.2.3