Adding upstream version 16.2.11+ds.upstream/16.2.11+ds upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 18:45:59 +0000
commit: 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree: 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/crimson/osd/backfill_state.h
parent: Initial commit. (diff)
download: ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
1 files changed, 382 insertions, 0 deletions
diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h
new file mode 100644
index 000000000..4bd2991fb
--- /dev/null
+++ b/src/crimson/osd/backfill_state.h
@@ -0,0 +1,382 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <optional>
+
+#include <boost/statechart/custom_reaction.hpp>
+#include <boost/statechart/event.hpp>
+#include <boost/statechart/event_base.hpp>
+#include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/state.hpp>
+#include <boost/statechart/state_machine.hpp>
+#include <boost/statechart/transition.hpp>
+
+#include "osd/recovery_types.h"
+
+namespace crimson::osd {
+
+namespace sc = boost::statechart;
+
+struct BackfillState {
+  struct BackfillListener;
+  struct PeeringFacade;
+  struct PGFacade;
+
+  // events comes first
+  struct PrimaryScanned : sc::event<PrimaryScanned> {
+    BackfillInterval result;
+    PrimaryScanned(BackfillInterval&& result)
+      : result(std::move(result)) {
+    }
+  };
+
+  struct ReplicaScanned : sc::event<ReplicaScanned> {
+    pg_shard_t from;
+    BackfillInterval result;
+    ReplicaScanned(pg_shard_t from, BackfillInterval&& result)
+      : from(std::move(from)),
+        result(std::move(result)) {
+    }
+  };
+
+  struct ObjectPushed : sc::event<ObjectPushed> {
+    // TODO: implement replica management; I don't want to follow
+    // current convention where the backend layer is responsible
+    // for tracking replicas.
+    hobject_t object;
+    pg_stat_t stat;
+    ObjectPushed(hobject_t object)
+      : object(std::move(object)) {
+    }
+  };
+
+  struct Triggered : sc::event<Triggered> {
+  };
+
+private:
+  // internal events
+  struct RequestPrimaryScanning : sc::event<RequestPrimaryScanning> {
+  };
+
+  struct RequestReplicasScanning : sc::event<RequestReplicasScanning> {
+  };
+
+  struct RequestWaiting : sc::event<RequestWaiting> {
+  };
+
+  struct RequestDone : sc::event<RequestDone> {
+  };
+
+  class ProgressTracker;
+
+public:
+
+  struct Initial;
+  struct Enqueuing;
+  struct PrimaryScanning;
+  struct ReplicasScanning;
+  struct Waiting;
+  struct Done;
+
+  struct BackfillMachine : sc::state_machine<BackfillMachine, Initial> {
+    BackfillMachine(BackfillState& backfill_state,
+                    BackfillListener& backfill_listener,
+                    std::unique_ptr<PeeringFacade> peering_state,
+                    std::unique_ptr<PGFacade> pg);
+    ~BackfillMachine();
+    BackfillState& backfill_state;
+    BackfillListener& backfill_listener;
+    std::unique_ptr<PeeringFacade> peering_state;
+    std::unique_ptr<PGFacade> pg;
+  };
+
+private:
+  template <class S>
+  struct StateHelper {
+    StateHelper();
+    ~StateHelper();
+
+    BackfillState& backfill_state() {
+      return static_cast<S*>(this) \
+        ->template context<BackfillMachine>().backfill_state;
+    }
+    BackfillListener& backfill_listener() {
+      return static_cast<S*>(this) \
+        ->template context<BackfillMachine>().backfill_listener;
+    }
+    PeeringFacade& peering_state() {
+      return *static_cast<S*>(this) \
+        ->template context<BackfillMachine>().peering_state;
+    }
+    PGFacade& pg() {
+      return *static_cast<S*>(this)->template context<BackfillMachine>().pg;
+    }
+
+    const PeeringFacade& peering_state() const {
+      return *static_cast<const S*>(this) \
+        ->template context<BackfillMachine>().peering_state;
+    }
+    const BackfillState& backfill_state() const {
+      return static_cast<const S*>(this) \
+        ->template context<BackfillMachine>().backfill_state;
+    }
+  };
+
+public:
+
+  // states
+  struct Crashed : sc::simple_state<Crashed, BackfillMachine>,
+                   StateHelper<Crashed> {
+    explicit Crashed();
+  };
+
+  struct Initial : sc::state<Initial, BackfillMachine>,
+                   StateHelper<Initial> {
+    using reactions = boost::mpl::list<
+      sc::custom_reaction<Triggered>,
+      sc::transition<sc::event_base, Crashed>>;
+    explicit Initial(my_context);
+    // initialize after triggering backfill by on_activate_complete().
+    // transit to Enqueuing.
+    sc::result react(const Triggered&);
+  };
+
+  struct Enqueuing : sc::state<Enqueuing, BackfillMachine>,
+                     StateHelper<Enqueuing> {
+    using reactions = boost::mpl::list<
+      sc::transition<RequestPrimaryScanning, PrimaryScanning>,
+      sc::transition<RequestReplicasScanning, ReplicasScanning>,
+      sc::transition<RequestWaiting, Waiting>,
+      sc::transition<RequestDone, Done>,
+      sc::transition<sc::event_base, Crashed>>;
+    explicit Enqueuing(my_context);
+
+    // indicate whether there is any remaining work to do when it comes
+    // to comparing the hobject_t namespace between primary and replicas.
+    // true doesn't necessarily mean backfill is done -- there could be
+    // in-flight pushes or drops which had been enqueued but aren't
+    // completed yet.
+    static bool all_enqueued(
+      const PeeringFacade& peering_state,
+      const BackfillInterval& backfill_info,
+      const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info);
+
+  private:
+    void maybe_update_range();
+    void trim_backfill_infos();
+
+    // these methods take BackfillIntervals instead of extracting them from
+    // the state to emphasize the relationships across the main loop.
+    bool all_emptied(
+      const BackfillInterval& local_backfill_info,
+      const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const;
+    hobject_t earliest_peer_backfill(
+      const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const;
+    bool should_rescan_replicas(
+      const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
+      const BackfillInterval& backfill_info) const;
+    // indicate whether a particular acting primary needs to scanned again
+    // to process next piece of the hobject_t's namespace.
+    // the logic is per analogy to replica_needs_scan(). See comments there.
+    bool should_rescan_primary(
+      const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
+      const BackfillInterval& backfill_info) const;
+
+    // the result_t is intermediary between {remove,update}_on_peers() and
+    // updating BackfillIntervals in trim_backfilled_object_from_intervals.
+    // This step is important because it affects the main loop's condition,
+    // and thus deserves to be exposed instead of being called deeply from
+    // {remove,update}_on_peers().
+    struct [[nodiscard]] result_t {
+      std::set<pg_shard_t> pbi_targets;
+      hobject_t new_last_backfill_started;
+    };
+    void trim_backfilled_object_from_intervals(
+      result_t&&,
+      hobject_t& last_backfill_started,
+      std::map<pg_shard_t, BackfillInterval>& peer_backfill_info);
+    result_t remove_on_peers(const hobject_t& check);
+    result_t update_on_peers(const hobject_t& check);
+  };
+
+  struct PrimaryScanning : sc::state<PrimaryScanning, BackfillMachine>,
+                           StateHelper<PrimaryScanning> {
+    using reactions = boost::mpl::list<
+      sc::custom_reaction<ObjectPushed>,
+      sc::custom_reaction<PrimaryScanned>,
+      sc::transition<sc::event_base, Crashed>>;
+    explicit PrimaryScanning(my_context);
+    sc::result react(ObjectPushed);
+    // collect scanning result and transit to Enqueuing.
+    sc::result react(PrimaryScanned);
+  };
+
+  struct ReplicasScanning : sc::state<ReplicasScanning, BackfillMachine>,
+                            StateHelper<ReplicasScanning> {
+    using reactions = boost::mpl::list<
+      sc::custom_reaction<ObjectPushed>,
+      sc::custom_reaction<ReplicaScanned>,
+      sc::transition<sc::event_base, Crashed>>;
+    explicit ReplicasScanning(my_context);
+    // collect scanning result; if all results are collected, transition
+    // to Enqueuing will happen.
+    sc::result react(ObjectPushed);
+    sc::result react(ReplicaScanned);
+
+    // indicate whether a particular peer should be scanned to retrieve
+    // BackfillInterval for new range of hobject_t namespace.
+    // true when bi.objects is exhausted, replica bi's end is not MAX,
+    // and primary bi'begin is further than the replica's one.
+    static bool replica_needs_scan(
+      const BackfillInterval& replica_backfill_info,
+      const BackfillInterval& local_backfill_info);
+
+  private:
+    std::set<pg_shard_t> waiting_on_backfill;
+  };
+
+  struct Waiting : sc::state<Waiting, BackfillMachine>,
+                   StateHelper<Waiting> {
+    using reactions = boost::mpl::list<
+      sc::custom_reaction<ObjectPushed>,
+      sc::transition<sc::event_base, Crashed>>;
+    explicit Waiting(my_context);
+    sc::result react(ObjectPushed);
+  };
+
+  struct Done : sc::state<Done, BackfillMachine>,
+                StateHelper<Done> {
+    using reactions = boost::mpl::list<
+      sc::transition<sc::event_base, Crashed>>;
+    explicit Done(my_context);
+  };
+
+  BackfillState(BackfillListener& backfill_listener,
+                std::unique_ptr<PeeringFacade> peering_state,
+                std::unique_ptr<PGFacade> pg);
+  ~BackfillState();
+
+  void process_event(
+    boost::intrusive_ptr<const sc::event_base> evt) {
+    backfill_machine.process_event(*std::move(evt));
+  }
+
+  hobject_t get_last_backfill_started() const {
+    return last_backfill_started;
+  }
+private:
+  hobject_t last_backfill_started;
+  BackfillInterval backfill_info;
+  std::map<pg_shard_t, BackfillInterval> peer_backfill_info;
+  BackfillMachine backfill_machine;
+  std::unique_ptr<ProgressTracker> progress_tracker;
+};
+
+// BackfillListener -- an interface used by the backfill FSM to request
+// low-level services like issueing `MOSDPGPush` or `MOSDPGBackfillRemove`.
+// The goals behind the interface are: 1) unittestability; 2) possibility
+// to retrofit classical OSD with BackfillState. For the second reason we
+// never use `seastar::future` -- instead responses to the requests are
+// conveyed as events; see ObjectPushed as an example.
+struct BackfillState::BackfillListener {
+  virtual void request_replica_scan(
+    const pg_shard_t& target,
+    const hobject_t& begin,
+    const hobject_t& end) = 0;
+
+  virtual void request_primary_scan(
+    const hobject_t& begin) = 0;
+
+  virtual void enqueue_push(
+    const hobject_t& obj,
+    const eversion_t& v) = 0;
+
+  virtual void enqueue_drop(
+    const pg_shard_t& target,
+    const hobject_t& obj,
+    const eversion_t& v) = 0;
+
+  virtual void maybe_flush() = 0;
+
+  virtual void update_peers_last_backfill(
+    const hobject_t& new_last_backfill) = 0;
+
+  virtual bool budget_available() const = 0;
+
+  virtual void backfilled() = 0;
+
+  virtual ~BackfillListener() = default;
+};
+
+// PeeringFacade -- a facade (in the GoF-defined meaning) simplifying
+// the interface of PeeringState. The motivation is to have an inventory
+// of behaviour that must be provided by a unit test's mock.
+struct BackfillState::PeeringFacade {
+  virtual hobject_t earliest_backfill() const = 0;
+  virtual const std::set<pg_shard_t>& get_backfill_targets() const = 0;
+  virtual const hobject_t& get_peer_last_backfill(pg_shard_t peer) const = 0;
+  virtual const eversion_t& get_last_update() const = 0;
+  virtual const eversion_t& get_log_tail() const = 0;
+
+  // the performance impact of `std::function` has not been considered yet.
+  // If there is any proof (from e.g. profiling) about its significance, we
+  // can switch back to the template variant.
+  using scan_log_func_t = std::function<void(const pg_log_entry_t&)>;
+  virtual void scan_log_after(eversion_t, scan_log_func_t) const = 0;
+
+  virtual bool is_backfill_target(pg_shard_t peer) const = 0;
+  virtual void update_complete_backfill_object_stats(const hobject_t &hoid,
+                                             const pg_stat_t &stats) = 0;
+  virtual bool is_backfilling() const = 0;
+  virtual ~PeeringFacade() {}
+};
+
+// PGFacade -- a facade (in the GoF-defined meaning) simplifying the huge
+// interface of crimson's PG class. The motivation is to have an inventory
+// of behaviour that must be provided by a unit test's mock.
+struct BackfillState::PGFacade {
+  virtual const eversion_t& get_projected_last_update() const = 0;
+  virtual ~PGFacade() {}
+};
+
+class BackfillState::ProgressTracker {
+  // TODO: apply_stat,
+  enum class op_stage_t {
+    enqueued_push,
+    enqueued_drop,
+    completed_push,
+  };
+
+  struct registry_item_t {
+    op_stage_t stage;
+    std::optional<pg_stat_t> stats;
+  };
+
+  BackfillMachine& backfill_machine;
+  std::map<hobject_t, registry_item_t> registry;
+
+  BackfillState& backfill_state() {
+    return backfill_machine.backfill_state;
+  }
+  PeeringFacade& peering_state() {
+    return *backfill_machine.peering_state;
+  }
+  BackfillListener& backfill_listener() {
+    return backfill_machine.backfill_listener;
+  }
+
+public:
+  ProgressTracker(BackfillMachine& backfill_machine)
+    : backfill_machine(backfill_machine) {
+  }
+
+  bool tracked_objects_completed() const;
+
+  bool enqueue_push(const hobject_t&);
+  void enqueue_drop(const hobject_t&);
+  void complete_to(const hobject_t&, const pg_stat_t&);
+};
+
+} // namespace crimson::osd
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 18:45:59 +0000
commit	19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree	42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/crimson/osd/backfill_state.h
parent	Initial commit. (diff)
download	ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip