// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #pragma once #include #include #include #include #include #include #include #include #include "osd/recovery_types.h" namespace crimson::osd { namespace sc = boost::statechart; struct BackfillState { struct BackfillListener; struct PeeringFacade; struct PGFacade; // events comes first struct PrimaryScanned : sc::event { BackfillInterval result; PrimaryScanned(BackfillInterval&& result) : result(std::move(result)) { } }; struct ReplicaScanned : sc::event { pg_shard_t from; BackfillInterval result; ReplicaScanned(pg_shard_t from, BackfillInterval&& result) : from(std::move(from)), result(std::move(result)) { } }; struct ObjectPushed : sc::event { // TODO: implement replica management; I don't want to follow // current convention where the backend layer is responsible // for tracking replicas. hobject_t object; pg_stat_t stat; ObjectPushed(hobject_t object) : object(std::move(object)) { } }; struct Triggered : sc::event { }; private: // internal events struct RequestPrimaryScanning : sc::event { }; struct RequestReplicasScanning : sc::event { }; struct RequestWaiting : sc::event { }; struct RequestDone : sc::event { }; class ProgressTracker; public: struct Initial; struct Enqueuing; struct PrimaryScanning; struct ReplicasScanning; struct Waiting; struct Done; struct BackfillMachine : sc::state_machine { BackfillMachine(BackfillState& backfill_state, BackfillListener& backfill_listener, std::unique_ptr peering_state, std::unique_ptr pg); ~BackfillMachine(); BackfillState& backfill_state; BackfillListener& backfill_listener; std::unique_ptr peering_state; std::unique_ptr pg; }; private: template struct StateHelper { StateHelper(); ~StateHelper(); BackfillState& backfill_state() { return static_cast(this) \ ->template context().backfill_state; } BackfillListener& backfill_listener() { return static_cast(this) \ ->template context().backfill_listener; } PeeringFacade& peering_state() { return *static_cast(this) \ ->template context().peering_state; } PGFacade& pg() { return *static_cast(this)->template context().pg; } const PeeringFacade& peering_state() const { return *static_cast(this) \ ->template context().peering_state; } const BackfillState& backfill_state() const { return static_cast(this) \ ->template context().backfill_state; } }; public: // states struct Crashed : sc::simple_state, StateHelper { explicit Crashed(); }; struct Initial : sc::state, StateHelper { using reactions = boost::mpl::list< sc::custom_reaction, sc::transition>; explicit Initial(my_context); // initialize after triggering backfill by on_activate_complete(). // transit to Enqueuing. sc::result react(const Triggered&); }; struct Enqueuing : sc::state, StateHelper { using reactions = boost::mpl::list< sc::transition, sc::transition, sc::transition, sc::transition, sc::transition>; explicit Enqueuing(my_context); // indicate whether there is any remaining work to do when it comes // to comparing the hobject_t namespace between primary and replicas. // true doesn't necessarily mean backfill is done -- there could be // in-flight pushes or drops which had been enqueued but aren't // completed yet. static bool all_enqueued( const PeeringFacade& peering_state, const BackfillInterval& backfill_info, const std::map& peer_backfill_info); private: void maybe_update_range(); void trim_backfill_infos(); // these methods take BackfillIntervals instead of extracting them from // the state to emphasize the relationships across the main loop. bool all_emptied( const BackfillInterval& local_backfill_info, const std::map& peer_backfill_info) const; hobject_t earliest_peer_backfill( const std::map& peer_backfill_info) const; bool should_rescan_replicas( const std::map& peer_backfill_info, const BackfillInterval& backfill_info) const; // indicate whether a particular acting primary needs to scanned again // to process next piece of the hobject_t's namespace. // the logic is per analogy to replica_needs_scan(). See comments there. bool should_rescan_primary( const std::map& peer_backfill_info, const BackfillInterval& backfill_info) const; // the result_t is intermediary between {remove,update}_on_peers() and // updating BackfillIntervals in trim_backfilled_object_from_intervals. // This step is important because it affects the main loop's condition, // and thus deserves to be exposed instead of being called deeply from // {remove,update}_on_peers(). struct [[nodiscard]] result_t { std::set pbi_targets; hobject_t new_last_backfill_started; }; void trim_backfilled_object_from_intervals( result_t&&, hobject_t& last_backfill_started, std::map& peer_backfill_info); result_t remove_on_peers(const hobject_t& check); result_t update_on_peers(const hobject_t& check); }; struct PrimaryScanning : sc::state, StateHelper { using reactions = boost::mpl::list< sc::custom_reaction, sc::custom_reaction, sc::transition>; explicit PrimaryScanning(my_context); sc::result react(ObjectPushed); // collect scanning result and transit to Enqueuing. sc::result react(PrimaryScanned); }; struct ReplicasScanning : sc::state, StateHelper { using reactions = boost::mpl::list< sc::custom_reaction, sc::custom_reaction, sc::transition>; explicit ReplicasScanning(my_context); // collect scanning result; if all results are collected, transition // to Enqueuing will happen. sc::result react(ObjectPushed); sc::result react(ReplicaScanned); // indicate whether a particular peer should be scanned to retrieve // BackfillInterval for new range of hobject_t namespace. // true when bi.objects is exhausted, replica bi's end is not MAX, // and primary bi'begin is further than the replica's one. static bool replica_needs_scan( const BackfillInterval& replica_backfill_info, const BackfillInterval& local_backfill_info); private: std::set waiting_on_backfill; }; struct Waiting : sc::state, StateHelper { using reactions = boost::mpl::list< sc::custom_reaction, sc::transition>; explicit Waiting(my_context); sc::result react(ObjectPushed); }; struct Done : sc::state, StateHelper { using reactions = boost::mpl::list< sc::transition>; explicit Done(my_context); }; BackfillState(BackfillListener& backfill_listener, std::unique_ptr peering_state, std::unique_ptr pg); ~BackfillState(); void process_event( boost::intrusive_ptr evt) { backfill_machine.process_event(*std::move(evt)); } hobject_t get_last_backfill_started() const { return last_backfill_started; } private: hobject_t last_backfill_started; BackfillInterval backfill_info; std::map peer_backfill_info; BackfillMachine backfill_machine; std::unique_ptr progress_tracker; }; // BackfillListener -- an interface used by the backfill FSM to request // low-level services like issueing `MOSDPGPush` or `MOSDPGBackfillRemove`. // The goals behind the interface are: 1) unittestability; 2) possibility // to retrofit classical OSD with BackfillState. For the second reason we // never use `seastar::future` -- instead responses to the requests are // conveyed as events; see ObjectPushed as an example. struct BackfillState::BackfillListener { virtual void request_replica_scan( const pg_shard_t& target, const hobject_t& begin, const hobject_t& end) = 0; virtual void request_primary_scan( const hobject_t& begin) = 0; virtual void enqueue_push( const hobject_t& obj, const eversion_t& v) = 0; virtual void enqueue_drop( const pg_shard_t& target, const hobject_t& obj, const eversion_t& v) = 0; virtual void maybe_flush() = 0; virtual void update_peers_last_backfill( const hobject_t& new_last_backfill) = 0; virtual bool budget_available() const = 0; virtual void backfilled() = 0; virtual ~BackfillListener() = default; }; // PeeringFacade -- a facade (in the GoF-defined meaning) simplifying // the interface of PeeringState. The motivation is to have an inventory // of behaviour that must be provided by a unit test's mock. struct BackfillState::PeeringFacade { virtual hobject_t earliest_backfill() const = 0; virtual const std::set& get_backfill_targets() const = 0; virtual const hobject_t& get_peer_last_backfill(pg_shard_t peer) const = 0; virtual const eversion_t& get_last_update() const = 0; virtual const eversion_t& get_log_tail() const = 0; // the performance impact of `std::function` has not been considered yet. // If there is any proof (from e.g. profiling) about its significance, we // can switch back to the template variant. using scan_log_func_t = std::function; virtual void scan_log_after(eversion_t, scan_log_func_t) const = 0; virtual bool is_backfill_target(pg_shard_t peer) const = 0; virtual void update_complete_backfill_object_stats(const hobject_t &hoid, const pg_stat_t &stats) = 0; virtual bool is_backfilling() const = 0; virtual ~PeeringFacade() {} }; // PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge // interface of crimson's PG class. The motivation is to have an inventory // of behaviour that must be provided by a unit test's mock. struct BackfillState::PGFacade { virtual const eversion_t& get_projected_last_update() const = 0; virtual ~PGFacade() {} }; class BackfillState::ProgressTracker { // TODO: apply_stat, enum class op_stage_t { enqueued_push, enqueued_drop, completed_push, }; struct registry_item_t { op_stage_t stage; std::optional stats; }; BackfillMachine& backfill_machine; std::map registry; BackfillState& backfill_state() { return backfill_machine.backfill_state; } PeeringFacade& peering_state() { return *backfill_machine.peering_state; } BackfillListener& backfill_listener() { return backfill_machine.backfill_listener; } public: ProgressTracker(BackfillMachine& backfill_machine) : backfill_machine(backfill_machine) { } bool tracked_objects_completed() const; bool enqueue_push(const hobject_t&); void enqueue_drop(const hobject_t&); void complete_to(const hobject_t&, const pg_stat_t&); }; } // namespace crimson::osd