From 19fcec84d8d7d21e796c7624e521b60d28ee21ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 20:45:59 +0200 Subject: Adding upstream version 16.2.11+ds. Signed-off-by: Daniel Baumann --- src/crimson/osd/backfill_state.h | 382 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 382 insertions(+) create mode 100644 src/crimson/osd/backfill_state.h (limited to 'src/crimson/osd/backfill_state.h') diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h new file mode 100644 index 000000000..4bd2991fb --- /dev/null +++ b/src/crimson/osd/backfill_state.h @@ -0,0 +1,382 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "osd/recovery_types.h" + +namespace crimson::osd { + +namespace sc = boost::statechart; + +struct BackfillState { + struct BackfillListener; + struct PeeringFacade; + struct PGFacade; + + // events comes first + struct PrimaryScanned : sc::event { + BackfillInterval result; + PrimaryScanned(BackfillInterval&& result) + : result(std::move(result)) { + } + }; + + struct ReplicaScanned : sc::event { + pg_shard_t from; + BackfillInterval result; + ReplicaScanned(pg_shard_t from, BackfillInterval&& result) + : from(std::move(from)), + result(std::move(result)) { + } + }; + + struct ObjectPushed : sc::event { + // TODO: implement replica management; I don't want to follow + // current convention where the backend layer is responsible + // for tracking replicas. + hobject_t object; + pg_stat_t stat; + ObjectPushed(hobject_t object) + : object(std::move(object)) { + } + }; + + struct Triggered : sc::event { + }; + +private: + // internal events + struct RequestPrimaryScanning : sc::event { + }; + + struct RequestReplicasScanning : sc::event { + }; + + struct RequestWaiting : sc::event { + }; + + struct RequestDone : sc::event { + }; + + class ProgressTracker; + +public: + + struct Initial; + struct Enqueuing; + struct PrimaryScanning; + struct ReplicasScanning; + struct Waiting; + struct Done; + + struct BackfillMachine : sc::state_machine { + BackfillMachine(BackfillState& backfill_state, + BackfillListener& backfill_listener, + std::unique_ptr peering_state, + std::unique_ptr pg); + ~BackfillMachine(); + BackfillState& backfill_state; + BackfillListener& backfill_listener; + std::unique_ptr peering_state; + std::unique_ptr pg; + }; + +private: + template + struct StateHelper { + StateHelper(); + ~StateHelper(); + + BackfillState& backfill_state() { + return static_cast(this) \ + ->template context().backfill_state; + } + BackfillListener& backfill_listener() { + return static_cast(this) \ + ->template context().backfill_listener; + } + PeeringFacade& peering_state() { + return *static_cast(this) \ + ->template context().peering_state; + } + PGFacade& pg() { + return *static_cast(this)->template context().pg; + } + + const PeeringFacade& peering_state() const { + return *static_cast(this) \ + ->template context().peering_state; + } + const BackfillState& backfill_state() const { + return static_cast(this) \ + ->template context().backfill_state; + } + }; + +public: + + // states + struct Crashed : sc::simple_state, + StateHelper { + explicit Crashed(); + }; + + struct Initial : sc::state, + StateHelper { + using reactions = boost::mpl::list< + sc::custom_reaction, + sc::transition>; + explicit Initial(my_context); + // initialize after triggering backfill by on_activate_complete(). + // transit to Enqueuing. + sc::result react(const Triggered&); + }; + + struct Enqueuing : sc::state, + StateHelper { + using reactions = boost::mpl::list< + sc::transition, + sc::transition, + sc::transition, + sc::transition, + sc::transition>; + explicit Enqueuing(my_context); + + // indicate whether there is any remaining work to do when it comes + // to comparing the hobject_t namespace between primary and replicas. + // true doesn't necessarily mean backfill is done -- there could be + // in-flight pushes or drops which had been enqueued but aren't + // completed yet. + static bool all_enqueued( + const PeeringFacade& peering_state, + const BackfillInterval& backfill_info, + const std::map& peer_backfill_info); + + private: + void maybe_update_range(); + void trim_backfill_infos(); + + // these methods take BackfillIntervals instead of extracting them from + // the state to emphasize the relationships across the main loop. + bool all_emptied( + const BackfillInterval& local_backfill_info, + const std::map& peer_backfill_info) const; + hobject_t earliest_peer_backfill( + const std::map& peer_backfill_info) const; + bool should_rescan_replicas( + const std::map& peer_backfill_info, + const BackfillInterval& backfill_info) const; + // indicate whether a particular acting primary needs to scanned again + // to process next piece of the hobject_t's namespace. + // the logic is per analogy to replica_needs_scan(). See comments there. + bool should_rescan_primary( + const std::map& peer_backfill_info, + const BackfillInterval& backfill_info) const; + + // the result_t is intermediary between {remove,update}_on_peers() and + // updating BackfillIntervals in trim_backfilled_object_from_intervals. + // This step is important because it affects the main loop's condition, + // and thus deserves to be exposed instead of being called deeply from + // {remove,update}_on_peers(). + struct [[nodiscard]] result_t { + std::set pbi_targets; + hobject_t new_last_backfill_started; + }; + void trim_backfilled_object_from_intervals( + result_t&&, + hobject_t& last_backfill_started, + std::map& peer_backfill_info); + result_t remove_on_peers(const hobject_t& check); + result_t update_on_peers(const hobject_t& check); + }; + + struct PrimaryScanning : sc::state, + StateHelper { + using reactions = boost::mpl::list< + sc::custom_reaction, + sc::custom_reaction, + sc::transition>; + explicit PrimaryScanning(my_context); + sc::result react(ObjectPushed); + // collect scanning result and transit to Enqueuing. + sc::result react(PrimaryScanned); + }; + + struct ReplicasScanning : sc::state, + StateHelper { + using reactions = boost::mpl::list< + sc::custom_reaction, + sc::custom_reaction, + sc::transition>; + explicit ReplicasScanning(my_context); + // collect scanning result; if all results are collected, transition + // to Enqueuing will happen. + sc::result react(ObjectPushed); + sc::result react(ReplicaScanned); + + // indicate whether a particular peer should be scanned to retrieve + // BackfillInterval for new range of hobject_t namespace. + // true when bi.objects is exhausted, replica bi's end is not MAX, + // and primary bi'begin is further than the replica's one. + static bool replica_needs_scan( + const BackfillInterval& replica_backfill_info, + const BackfillInterval& local_backfill_info); + + private: + std::set waiting_on_backfill; + }; + + struct Waiting : sc::state, + StateHelper { + using reactions = boost::mpl::list< + sc::custom_reaction, + sc::transition>; + explicit Waiting(my_context); + sc::result react(ObjectPushed); + }; + + struct Done : sc::state, + StateHelper { + using reactions = boost::mpl::list< + sc::transition>; + explicit Done(my_context); + }; + + BackfillState(BackfillListener& backfill_listener, + std::unique_ptr peering_state, + std::unique_ptr pg); + ~BackfillState(); + + void process_event( + boost::intrusive_ptr evt) { + backfill_machine.process_event(*std::move(evt)); + } + + hobject_t get_last_backfill_started() const { + return last_backfill_started; + } +private: + hobject_t last_backfill_started; + BackfillInterval backfill_info; + std::map peer_backfill_info; + BackfillMachine backfill_machine; + std::unique_ptr progress_tracker; +}; + +// BackfillListener -- an interface used by the backfill FSM to request +// low-level services like issueing `MOSDPGPush` or `MOSDPGBackfillRemove`. +// The goals behind the interface are: 1) unittestability; 2) possibility +// to retrofit classical OSD with BackfillState. For the second reason we +// never use `seastar::future` -- instead responses to the requests are +// conveyed as events; see ObjectPushed as an example. +struct BackfillState::BackfillListener { + virtual void request_replica_scan( + const pg_shard_t& target, + const hobject_t& begin, + const hobject_t& end) = 0; + + virtual void request_primary_scan( + const hobject_t& begin) = 0; + + virtual void enqueue_push( + const hobject_t& obj, + const eversion_t& v) = 0; + + virtual void enqueue_drop( + const pg_shard_t& target, + const hobject_t& obj, + const eversion_t& v) = 0; + + virtual void maybe_flush() = 0; + + virtual void update_peers_last_backfill( + const hobject_t& new_last_backfill) = 0; + + virtual bool budget_available() const = 0; + + virtual void backfilled() = 0; + + virtual ~BackfillListener() = default; +}; + +// PeeringFacade -- a facade (in the GoF-defined meaning) simplifying +// the interface of PeeringState. The motivation is to have an inventory +// of behaviour that must be provided by a unit test's mock. +struct BackfillState::PeeringFacade { + virtual hobject_t earliest_backfill() const = 0; + virtual const std::set& get_backfill_targets() const = 0; + virtual const hobject_t& get_peer_last_backfill(pg_shard_t peer) const = 0; + virtual const eversion_t& get_last_update() const = 0; + virtual const eversion_t& get_log_tail() const = 0; + + // the performance impact of `std::function` has not been considered yet. + // If there is any proof (from e.g. profiling) about its significance, we + // can switch back to the template variant. + using scan_log_func_t = std::function; + virtual void scan_log_after(eversion_t, scan_log_func_t) const = 0; + + virtual bool is_backfill_target(pg_shard_t peer) const = 0; + virtual void update_complete_backfill_object_stats(const hobject_t &hoid, + const pg_stat_t &stats) = 0; + virtual bool is_backfilling() const = 0; + virtual ~PeeringFacade() {} +}; + +// PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge +// interface of crimson's PG class. The motivation is to have an inventory +// of behaviour that must be provided by a unit test's mock. +struct BackfillState::PGFacade { + virtual const eversion_t& get_projected_last_update() const = 0; + virtual ~PGFacade() {} +}; + +class BackfillState::ProgressTracker { + // TODO: apply_stat, + enum class op_stage_t { + enqueued_push, + enqueued_drop, + completed_push, + }; + + struct registry_item_t { + op_stage_t stage; + std::optional stats; + }; + + BackfillMachine& backfill_machine; + std::map registry; + + BackfillState& backfill_state() { + return backfill_machine.backfill_state; + } + PeeringFacade& peering_state() { + return *backfill_machine.peering_state; + } + BackfillListener& backfill_listener() { + return backfill_machine.backfill_listener; + } + +public: + ProgressTracker(BackfillMachine& backfill_machine) + : backfill_machine(backfill_machine) { + } + + bool tracked_objects_completed() const; + + bool enqueue_push(const hobject_t&); + void enqueue_drop(const hobject_t&); + void complete_to(const hobject_t&, const pg_stat_t&); +}; + +} // namespace crimson::osd -- cgit v1.2.3