1 files changed, 2521 insertions, 0 deletions
diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h
new file mode 100644
index 000000000..6901ab506
--- /dev/null
+++ b/src/osd/PeeringState.h
@@ -0,0 +1,2521 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/statechart/custom_reaction.hpp>
+#include <boost/statechart/event.hpp>
+#include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/state.hpp>
+#include <boost/statechart/state_machine.hpp>
+#include <boost/statechart/transition.hpp>
+#include <boost/statechart/event_base.hpp>
+#include <string>
+#include <atomic>
+
+#include "include/ceph_assert.h"
+#include "include/common_fwd.h"
+
+#include "PGLog.h"
+#include "PGStateUtils.h"
+#include "PGPeeringEvent.h"
+#include "osd_types.h"
+#include "osd_types_fmt.h"
+#include "os/ObjectStore.h"
+#include "OSDMap.h"
+#include "MissingLoc.h"
+#include "osd/osd_perf_counters.h"
+#include "common/ostream_temp.h"
+
+struct PGPool {
+  epoch_t cached_epoch;
+  int64_t id;
+  std::string name;
+
+  pg_pool_t info;
+  SnapContext snapc;   // the default pool snapc, ready to go.
+
+  PGPool(OSDMapRef map, int64_t i, const pg_pool_t& info,
+	 const std::string& name)
+    : cached_epoch(map->get_epoch()),
+      id(i),
+      name(name),
+      info(info) {
+    snapc = info.get_snap_context();
+  }
+
+  void update(OSDMapRef map);
+
+  ceph::timespan get_readable_interval(ConfigProxy &conf) const {
+    double v = 0;
+    if (info.opts.get(pool_opts_t::READ_LEASE_INTERVAL, &v)) {
+      return ceph::make_timespan(v);
+    } else {
+      auto hbi = conf->osd_heartbeat_grace;
+      auto fac = conf->osd_pool_default_read_lease_ratio;
+      return ceph::make_timespan(hbi * fac);
+    }
+  }
+};
+
+template <>
+struct fmt::formatter<PGPool> {
+  template <typename ParseContext>
+  constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
+
+  template <typename FormatContext>
+  auto format(const PGPool& pool, FormatContext& ctx)
+  {
+    return fmt::format_to(ctx.out(),
+                          "{}/{}({})",
+                          pool.id,
+                          pool.name,
+                          pool.info);
+  }
+};
+
+struct PeeringCtx;
+
+// [primary only] content recovery state
+struct BufferedRecoveryMessages {
+#if defined(WITH_SEASTAR)
+  std::map<int, std::vector<MessageURef>> message_map;
+#else
+  std::map<int, std::vector<MessageRef>> message_map;
+#endif
+
+  BufferedRecoveryMessages() = default;
+  BufferedRecoveryMessages(PeeringCtx &ctx);
+
+  void accept_buffered_messages(BufferedRecoveryMessages &m) {
+    for (auto &[target, ls] : m.message_map) {
+      auto &ovec = message_map[target];
+      // put buffered messages in front
+      ls.reserve(ls.size() + ovec.size());
+      ls.insert(ls.end(), std::make_move_iterator(ovec.begin()), std::make_move_iterator(ovec.end()));
+      ovec.clear();
+      ovec.swap(ls);
+    }
+  }
+
+  template <class MsgT> // MsgT = MessageRef for ceph-osd and MessageURef for crimson-osd
+  void send_osd_message(int target, MsgT&& m) {
+    message_map[target].emplace_back(std::forward<MsgT>(m));
+  }
+  void send_notify(int to, const pg_notify_t &n);
+  void send_query(int to, spg_t spgid, const pg_query_t &q);
+  void send_info(int to, spg_t to_spgid,
+		 epoch_t min_epoch, epoch_t cur_epoch,
+		 const pg_info_t &info,
+		 std::optional<pg_lease_t> lease = {},
+		 std::optional<pg_lease_ack_t> lease_ack = {});
+};
+
+struct HeartbeatStamps : public RefCountedObject {
+  mutable ceph::mutex lock = ceph::make_mutex("HeartbeatStamps::lock");
+
+  const int osd;
+
+  // we maintain an upper and lower bound on the delta between our local
+  // mono_clock time (minus the startup_time) to the peer OSD's mono_clock
+  // time (minus its startup_time).
+  //
+  // delta is (remote_clock_time - local_clock_time), so that
+  // local_time + delta -> peer_time, and peer_time - delta -> local_time.
+  //
+  // we have an upper and lower bound value on this delta, meaning the
+  // value of the remote clock is somewhere between [my_time + lb, my_time + ub]
+  //
+  // conversely, if we have a remote timestamp T, then that is
+  // [T - ub, T - lb] in terms of the local clock.  i.e., if you are
+  // substracting the delta, then take care that you swap the role of the
+  // lb and ub values.
+
+  /// lower bound on peer clock - local clock
+  std::optional<ceph::signedspan> peer_clock_delta_lb;
+
+  /// upper bound on peer clock - local clock
+  std::optional<ceph::signedspan> peer_clock_delta_ub;
+
+  /// highest up_from we've seen from this rank
+  epoch_t up_from = 0;
+
+  void print(std::ostream& out) const {
+    std::lock_guard l(lock);
+    out << "hbstamp(osd." << osd << " up_from " << up_from
+	<< " peer_clock_delta [";
+    if (peer_clock_delta_lb) {
+      out << *peer_clock_delta_lb;
+    }
+    out << ",";
+    if (peer_clock_delta_ub) {
+      out << *peer_clock_delta_ub;
+    }
+    out << "])";
+  }
+
+  void sent_ping(std::optional<ceph::signedspan> *delta_ub) {
+    std::lock_guard l(lock);
+    // the non-primaries need a lower bound on remote clock - local clock.  if
+    // we assume the transit for the last ping_reply was
+    // instantaneous, that would be (the negative of) our last
+    // peer_clock_delta_lb value.
+    if (peer_clock_delta_lb) {
+      *delta_ub = - *peer_clock_delta_lb;
+    }
+  }
+
+  void got_ping(epoch_t this_up_from,
+		ceph::signedspan now,
+		ceph::signedspan peer_send_stamp,
+		std::optional<ceph::signedspan> delta_ub,
+		ceph::signedspan *out_delta_ub) {
+    std::lock_guard l(lock);
+    if (this_up_from < up_from) {
+      return;
+    }
+    if (this_up_from > up_from) {
+      up_from = this_up_from;
+    }
+    peer_clock_delta_lb = peer_send_stamp - now;
+    peer_clock_delta_ub = delta_ub;
+    *out_delta_ub = - *peer_clock_delta_lb;
+  }
+
+  void got_ping_reply(ceph::signedspan now,
+		      ceph::signedspan peer_send_stamp,
+		      std::optional<ceph::signedspan> delta_ub) {
+    std::lock_guard l(lock);
+    peer_clock_delta_lb = peer_send_stamp - now;
+    peer_clock_delta_ub = delta_ub;
+  }
+
+private:
+  FRIEND_MAKE_REF(HeartbeatStamps);
+  HeartbeatStamps(int o)
+    : RefCountedObject(NULL),
+      osd(o) {}
+};
+using HeartbeatStampsRef = ceph::ref_t<HeartbeatStamps>;
+
+inline std::ostream& operator<<(std::ostream& out, const HeartbeatStamps& hb)
+{
+  hb.print(out);
+  return out;
+}
+
+
+struct PeeringCtx : BufferedRecoveryMessages {
+  ObjectStore::Transaction transaction;
+  HBHandle* handle = nullptr;
+
+  PeeringCtx() = default;
+
+  PeeringCtx(const PeeringCtx &) = delete;
+  PeeringCtx &operator=(const PeeringCtx &) = delete;
+
+  PeeringCtx(PeeringCtx &&) = default;
+  PeeringCtx &operator=(PeeringCtx &&) = default;
+
+  void reset_transaction() {
+    transaction = ObjectStore::Transaction();
+  }
+};
+
+/**
+ * Wraps PeeringCtx to hide the difference between buffering messages to
+ * be sent after flush or immediately.
+ */
+struct PeeringCtxWrapper {
+  utime_t start_time;
+  BufferedRecoveryMessages &msgs;
+  ObjectStore::Transaction &transaction;
+  HBHandle * const handle = nullptr;
+
+  PeeringCtxWrapper(PeeringCtx &wrapped) :
+    msgs(wrapped),
+    transaction(wrapped.transaction),
+    handle(wrapped.handle) {}
+
+  PeeringCtxWrapper(BufferedRecoveryMessages &buf, PeeringCtx &wrapped)
+    : msgs(buf),
+      transaction(wrapped.transaction),
+      handle(wrapped.handle) {}
+
+  PeeringCtxWrapper(PeeringCtxWrapper &&ctx) = default;
+
+  template <class MsgT> // MsgT = MessageRef for ceph-osd and MessageURef for crimson-osd
+  void send_osd_message(int target, MsgT&& m) {
+    msgs.send_osd_message(target, std::forward<MsgT>(m));
+  }
+  void send_notify(int to, const pg_notify_t &n) {
+    msgs.send_notify(to, n);
+  }
+  void send_query(int to, spg_t spgid, const pg_query_t &q) {
+    msgs.send_query(to, spgid, q);
+  }
+  void send_info(int to, spg_t to_spgid,
+		 epoch_t min_epoch, epoch_t cur_epoch,
+		 const pg_info_t &info,
+		 std::optional<pg_lease_t> lease = {},
+		 std::optional<pg_lease_ack_t> lease_ack = {}) {
+    msgs.send_info(to, to_spgid, min_epoch, cur_epoch, info,
+		   lease, lease_ack);
+  }
+};
+
+/* Encapsulates PG recovery process */
+class PeeringState : public MissingLoc::MappingInfo {
+public:
+  struct PeeringListener : public EpochSource {
+    /// Prepare t with written information
+    virtual void prepare_write(
+      pg_info_t &info,
+      pg_info_t &last_written_info,
+      PastIntervals &past_intervals,
+      PGLog &pglog,
+      bool dirty_info,
+      bool dirty_big_info,
+      bool need_write_epoch,
+      ObjectStore::Transaction &t) = 0;
+
+    /// Notify that info/history changed (generally to update scrub registration)
+    virtual void on_info_history_change() = 0;
+
+    /// Notify PG that Primary/Replica status has changed (to update scrub registration)
+    virtual void on_primary_status_change(bool was_primary, bool now_primary) = 0;
+
+    /// Need to reschedule next scrub. Assuming no change in role
+    virtual void reschedule_scrub() = 0;
+
+    /// Notify that a scrub has been requested
+    virtual void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) = 0;
+
+    /// Return current snap_trimq size
+    virtual uint64_t get_snap_trimq_size() const = 0;
+
+    /// Send cluster message to osd
+    #if defined(WITH_SEASTAR)
+    virtual void send_cluster_message(
+      int osd, MessageURef m, epoch_t epoch, bool share_map_update=false) = 0;
+    #else
+    virtual void send_cluster_message(
+      int osd, MessageRef m, epoch_t epoch, bool share_map_update=false) = 0;
+    #endif
+    /// Send pg_created to mon
+    virtual void send_pg_created(pg_t pgid) = 0;
+
+    virtual ceph::signedspan get_mnow() const = 0;
+    virtual HeartbeatStampsRef get_hb_stamps(int peer) = 0;
+    virtual void schedule_renew_lease(epoch_t plr, ceph::timespan delay) = 0;
+    virtual void queue_check_readable(epoch_t lpr, ceph::timespan delay) = 0;
+    virtual void recheck_readable() = 0;
+
+    virtual unsigned get_target_pg_log_entries() const = 0;
+
+    // ============ Flush state ==================
+    /**
+     * try_flush_or_schedule_async()
+     *
+     * If true, caller may assume all past operations on this pg
+     * have been flushed.  Else, caller will receive an on_flushed()
+     * call once the flush has completed.
+     */
+    virtual bool try_flush_or_schedule_async() = 0;
+    /// Arranges for a commit on t to call on_flushed() once flushed.
+    virtual void start_flush_on_transaction(
+      ObjectStore::Transaction &t) = 0;
+    /// Notification that all outstanding flushes for interval have completed
+    virtual void on_flushed() = 0;
+
+    //============= Recovery ====================
+    /// Arrange for even to be queued after delay
+    virtual void schedule_event_after(
+      PGPeeringEventRef event,
+      float delay) = 0;
+    /**
+     * request_local_background_io_reservation
+     *
+     * Request reservation at priority with on_grant queued on grant
+     * and on_preempt on preempt
+     */
+    virtual void request_local_background_io_reservation(
+      unsigned priority,
+      PGPeeringEventURef on_grant,
+      PGPeeringEventURef on_preempt) = 0;
+    /// Modify pending local background reservation request priority
+    virtual void update_local_background_io_priority(
+      unsigned priority) = 0;
+    /// Cancel pending local background reservation request
+    virtual void cancel_local_background_io_reservation() = 0;
+
+    /**
+     * request_remote_background_io_reservation
+     *
+     * Request reservation at priority with on_grant queued on grant
+     * and on_preempt on preempt
+     */
+    virtual void request_remote_recovery_reservation(
+      unsigned priority,
+      PGPeeringEventURef on_grant,
+      PGPeeringEventURef on_preempt) = 0;
+    /// Cancel pending remote background reservation request
+    virtual void cancel_remote_recovery_reservation() = 0;
+
+    /// Arrange for on_commit to be queued upon commit of t
+    virtual void schedule_event_on_commit(
+      ObjectStore::Transaction &t,
+      PGPeeringEventRef on_commit) = 0;
+
+    //============================ HB =============================
+    /// Update hb set to peers
+    virtual void update_heartbeat_peers(std::set<int> peers) = 0;
+
+    /// Std::set targets being probed in this interval
+    virtual void set_probe_targets(const std::set<pg_shard_t> &probe_set) = 0;
+    /// Clear targets being probed in this interval
+    virtual void clear_probe_targets() = 0;
+
+    /// Queue for a pg_temp of wanted
+    virtual void queue_want_pg_temp(const std::vector<int> &wanted) = 0;
+    /// Clear queue for a pg_temp of wanted
+    virtual void clear_want_pg_temp() = 0;
+
+    /// Arrange for stats to be shipped to mon to be updated for this pg
+    virtual void publish_stats_to_osd() = 0;
+    /// Clear stats to be shipped to mon for this pg
+    virtual void clear_publish_stats() = 0;
+
+    /// Notification to check outstanding operation targets
+    virtual void check_recovery_sources(const OSDMapRef& newmap) = 0;
+    /// Notification to check outstanding blocklist
+    virtual void check_blocklisted_watchers() = 0;
+    /// Notification to clear state associated with primary
+    virtual void clear_primary_state() = 0;
+
+    // =================== Event notification ====================
+    virtual void on_pool_change() = 0;
+    virtual void on_role_change() = 0;
+    virtual void on_change(ObjectStore::Transaction &t) = 0;
+    virtual void on_activate(interval_set<snapid_t> to_trim) = 0;
+    virtual void on_activate_complete() = 0;
+    virtual void on_new_interval() = 0;
+    virtual Context *on_clean() = 0;
+    virtual void on_activate_committed() = 0;
+    virtual void on_active_exit() = 0;
+
+    // ====================== PG deletion =======================
+    /// Notification of removal complete, t must be populated to complete removal
+    virtual void on_removal(ObjectStore::Transaction &t) = 0;
+    /// Perform incremental removal work
+    virtual std::pair<ghobject_t, bool> do_delete_work(
+      ObjectStore::Transaction &t, ghobject_t _next) = 0;
+
+    // ======================= PG Merge =========================
+    virtual void clear_ready_to_merge() = 0;
+    virtual void set_not_ready_to_merge_target(pg_t pgid, pg_t src) = 0;
+    virtual void set_not_ready_to_merge_source(pg_t pgid) = 0;
+    virtual void set_ready_to_merge_target(eversion_t lu, epoch_t les, epoch_t lec) = 0;
+    virtual void set_ready_to_merge_source(eversion_t lu) = 0;
+
+    // ==================== Std::map notifications ===================
+    virtual void on_active_actmap() = 0;
+    virtual void on_active_advmap(const OSDMapRef &osdmap) = 0;
+    virtual epoch_t cluster_osdmap_trim_lower_bound() = 0;
+
+    // ============ recovery reservation notifications ==========
+    virtual void on_backfill_reserved() = 0;
+    virtual void on_backfill_canceled() = 0;
+    virtual void on_recovery_reserved() = 0;
+
+    // ================recovery space accounting ================
+    virtual bool try_reserve_recovery_space(
+      int64_t primary_num_bytes, int64_t local_num_bytes) = 0;
+    virtual void unreserve_recovery_space() = 0;
+
+    // ================== Peering log events ====================
+    /// Get handler for rolling forward/back log entries
+    virtual PGLog::LogEntryHandlerRef get_log_handler(
+      ObjectStore::Transaction &t) = 0;
+
+    // ============ On disk representation changes ==============
+    virtual void rebuild_missing_set_with_deletes(PGLog &pglog) = 0;
+
+    // ======================= Logging ==========================
+    virtual PerfCounters &get_peering_perf() = 0;
+    virtual PerfCounters &get_perf_logger() = 0;
+    virtual void log_state_enter(const char *state) = 0;
+    virtual void log_state_exit(
+      const char *state_name, utime_t enter_time,
+      uint64_t events, utime_t event_dur) = 0;
+    virtual void dump_recovery_info(ceph::Formatter *f) const = 0;
+
+    virtual OstreamTemp get_clog_info() = 0;
+    virtual OstreamTemp get_clog_error() = 0;
+    virtual OstreamTemp get_clog_debug() = 0;
+
+    virtual ~PeeringListener() {}
+  };
+
+  struct QueryState : boost::statechart::event< QueryState > {
+    ceph::Formatter *f;
+    explicit QueryState(ceph::Formatter *f) : f(f) {}
+    void print(std::ostream *out) const {
+      *out << "Query";
+    }
+  };
+
+  struct QueryUnfound : boost::statechart::event< QueryUnfound > {
+    ceph::Formatter *f;
+    explicit QueryUnfound(ceph::Formatter *f) : f(f) {}
+    void print(std::ostream *out) const {
+      *out << "QueryUnfound";
+    }
+  };
+
+  struct AdvMap : boost::statechart::event< AdvMap > {
+    OSDMapRef osdmap;
+    OSDMapRef lastmap;
+    std::vector<int> newup, newacting;
+    int up_primary, acting_primary;
+    AdvMap(
+      OSDMapRef osdmap, OSDMapRef lastmap,
+      std::vector<int>& newup, int up_primary,
+      std::vector<int>& newacting, int acting_primary):
+      osdmap(osdmap), lastmap(lastmap),
+      newup(newup),
+      newacting(newacting),
+      up_primary(up_primary),
+      acting_primary(acting_primary) {}
+    void print(std::ostream *out) const {
+      *out << "AdvMap";
+    }
+  };
+
+  struct ActMap : boost::statechart::event< ActMap > {
+    ActMap() : boost::statechart::event< ActMap >() {}
+    void print(std::ostream *out) const {
+      *out << "ActMap";
+    }
+  };
+  struct Activate : boost::statechart::event< Activate > {
+    epoch_t activation_epoch;
+    explicit Activate(epoch_t q) : boost::statechart::event< Activate >(),
+			  activation_epoch(q) {}
+    void print(std::ostream *out) const {
+      *out << "Activate from " << activation_epoch;
+    }
+  };
+  struct ActivateCommitted : boost::statechart::event< ActivateCommitted > {
+    epoch_t epoch;
+    epoch_t activation_epoch;
+    explicit ActivateCommitted(epoch_t e, epoch_t ae)
+      : boost::statechart::event< ActivateCommitted >(),
+	epoch(e),
+	activation_epoch(ae) {}
+    void print(std::ostream *out) const {
+      *out << "ActivateCommitted from " << activation_epoch
+	   << " processed at " << epoch;
+    }
+  };
+public:
+  struct UnfoundBackfill : boost::statechart::event<UnfoundBackfill> {
+    explicit UnfoundBackfill() {}
+    void print(std::ostream *out) const {
+      *out << "UnfoundBackfill";
+    }
+  };
+  struct UnfoundRecovery : boost::statechart::event<UnfoundRecovery> {
+    explicit UnfoundRecovery() {}
+    void print(std::ostream *out) const {
+      *out << "UnfoundRecovery";
+    }
+  };
+
+  struct RequestScrub : boost::statechart::event<RequestScrub> {
+    scrub_level_t deep;
+    scrub_type_t repair;
+    explicit RequestScrub(bool d, bool r) : deep(scrub_level_t(d)), repair(scrub_type_t(r)) {}
+    void print(std::ostream *out) const {
+      *out << "RequestScrub(" << ((deep==scrub_level_t::deep) ? "deep" : "shallow")
+	   << ((repair==scrub_type_t::do_repair) ? " repair)" : ")");
+    }
+  };
+
+  TrivialEvent(Initialize)
+  TrivialEvent(GotInfo)
+  TrivialEvent(NeedUpThru)
+  TrivialEvent(Backfilled)
+  TrivialEvent(LocalBackfillReserved)
+  TrivialEvent(RejectTooFullRemoteReservation)
+  TrivialEvent(RequestBackfill)
+  TrivialEvent(RemoteRecoveryPreempted)
+  TrivialEvent(RemoteBackfillPreempted)
+  TrivialEvent(BackfillTooFull)
+  TrivialEvent(RecoveryTooFull)
+
+  TrivialEvent(MakePrimary)
+  TrivialEvent(MakeStray)
+  TrivialEvent(NeedActingChange)
+  TrivialEvent(IsIncomplete)
+  TrivialEvent(IsDown)
+
+  TrivialEvent(AllReplicasRecovered)
+  TrivialEvent(DoRecovery)
+  TrivialEvent(LocalRecoveryReserved)
+  TrivialEvent(AllRemotesReserved)
+  TrivialEvent(AllBackfillsReserved)
+  TrivialEvent(GoClean)
+
+  TrivialEvent(AllReplicasActivated)
+
+  TrivialEvent(IntervalFlush)
+
+  TrivialEvent(DeleteStart)
+  TrivialEvent(DeleteSome)
+
+  TrivialEvent(SetForceRecovery)
+  TrivialEvent(UnsetForceRecovery)
+  TrivialEvent(SetForceBackfill)
+  TrivialEvent(UnsetForceBackfill)
+
+  TrivialEvent(DeleteReserved)
+  TrivialEvent(DeleteInterrupted)
+
+  TrivialEvent(CheckReadable)
+
+  void start_handle(PeeringCtx *new_ctx);
+  void end_handle();
+  void begin_block_outgoing();
+  void end_block_outgoing();
+  void clear_blocked_outgoing();
+ private:
+
+  /* States */
+  struct Initial;
+  class PeeringMachine : public boost::statechart::state_machine< PeeringMachine, Initial > {
+  public:
+    PeeringState *state;
+    PGStateHistory *state_history;
+    CephContext *cct;
+    spg_t spgid;
+    DoutPrefixProvider *dpp;
+    PeeringListener *pl;
+
+    utime_t event_time;
+    uint64_t event_count;
+
+    void clear_event_counters() {
+      event_time = utime_t();
+      event_count = 0;
+    }
+
+    void log_enter(const char *state_name);
+    void log_exit(const char *state_name, utime_t duration);
+
+    PeeringMachine(
+      PeeringState *state, CephContext *cct,
+      spg_t spgid,
+      DoutPrefixProvider *dpp,
+      PeeringListener *pl,
+      PGStateHistory *state_history) :
+      state(state),
+      state_history(state_history),
+      cct(cct), spgid(spgid),
+      dpp(dpp), pl(pl),
+      event_count(0) {}
+
+    /* Accessor functions for state methods */
+    ObjectStore::Transaction& get_cur_transaction() {
+      ceph_assert(state->rctx);
+      return state->rctx->transaction;
+    }
+
+    PeeringCtxWrapper &get_recovery_ctx() {
+      assert(state->rctx);
+      return *(state->rctx);
+    }
+
+    void send_notify(int to, const pg_notify_t &n) {
+      ceph_assert(state->rctx);
+      state->rctx->send_notify(to, n);
+    }
+    void send_query(int to, const pg_query_t &query) {
+      state->rctx->send_query(
+	to,
+	spg_t(spgid.pgid, query.to),
+	query);
+    }
+  };
+  friend class PeeringMachine;
+
+  /* States */
+  // Initial
+  // Reset
+  // Start
+  //   Started
+  //     Primary
+  //       WaitActingChange
+  //       Peering
+  //         GetInfo
+  //         GetLog
+  //         GetMissing
+  //         WaitUpThru
+  //         Incomplete
+  //       Active
+  //         Activating
+  //         Clean
+  //         Recovered
+  //         Backfilling
+  //         WaitRemoteBackfillReserved
+  //         WaitLocalBackfillReserved
+  //         NotBackfilling
+  //         NotRecovering
+  //         Recovering
+  //         WaitRemoteRecoveryReserved
+  //         WaitLocalRecoveryReserved
+  //     ReplicaActive
+  //       RepNotRecovering
+  //       RepRecovering
+  //       RepWaitBackfillReserved
+  //       RepWaitRecoveryReserved
+  //     Stray
+  //     ToDelete
+  //       WaitDeleteReserved
+  //       Deleting
+  // Crashed
+
+  struct Crashed : boost::statechart::state< Crashed, PeeringMachine >, NamedState {
+    explicit Crashed(my_context ctx);
+  };
+
+  struct Reset;
+
+  struct Initial : boost::statechart::state< Initial, PeeringMachine >, NamedState {
+    explicit Initial(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::transition< Initialize, Reset >,
+      boost::statechart::custom_reaction< NullEvt >,
+      boost::statechart::transition< boost::statechart::event_base, Crashed >
+      > reactions;
+
+    boost::statechart::result react(const MNotifyRec&);
+    boost::statechart::result react(const MInfoRec&);
+    boost::statechart::result react(const MLogRec&);
+    boost::statechart::result react(const boost::statechart::event_base&) {
+      return discard_event();
+    }
+  };
+
+  struct Reset : boost::statechart::state< Reset, PeeringMachine >, NamedState {
+    explicit Reset(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< AdvMap >,
+      boost::statechart::custom_reaction< ActMap >,
+      boost::statechart::custom_reaction< NullEvt >,
+      boost::statechart::custom_reaction< IntervalFlush >,
+      boost::statechart::transition< boost::statechart::event_base, Crashed >
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const AdvMap&);
+    boost::statechart::result react(const ActMap&);
+    boost::statechart::result react(const IntervalFlush&);
+    boost::statechart::result react(const boost::statechart::event_base&) {
+      return discard_event();
+    }
+  };
+
+  struct Start;
+
+  struct Started : boost::statechart::state< Started, PeeringMachine, Start >, NamedState {
+    explicit Started(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< AdvMap >,
+      boost::statechart::custom_reaction< IntervalFlush >,
+      // ignored
+      boost::statechart::custom_reaction< NullEvt >,
+      boost::statechart::custom_reaction<SetForceRecovery>,
+      boost::statechart::custom_reaction<UnsetForceRecovery>,
+      boost::statechart::custom_reaction<SetForceBackfill>,
+      boost::statechart::custom_reaction<UnsetForceBackfill>,
+      boost::statechart::custom_reaction<RequestScrub>,
+      boost::statechart::custom_reaction<CheckReadable>,
+      // crash
+      boost::statechart::transition< boost::statechart::event_base, Crashed >
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const AdvMap&);
+    boost::statechart::result react(const IntervalFlush&);
+    boost::statechart::result react(const boost::statechart::event_base&) {
+      return discard_event();
+    }
+  };
+
+  struct Primary;
+  struct Stray;
+
+  struct Start : boost::statechart::state< Start, Started >, NamedState {
+    explicit Start(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::transition< MakePrimary, Primary >,
+      boost::statechart::transition< MakeStray, Stray >
+      > reactions;
+  };
+
+  struct Peering;
+  struct WaitActingChange;
+  struct Incomplete;
+  struct Down;
+
+  struct Primary : boost::statechart::state< Primary, Started, Peering >, NamedState {
+    explicit Primary(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< ActMap >,
+      boost::statechart::custom_reaction< MNotifyRec >,
+      boost::statechart::custom_reaction<SetForceRecovery>,
+      boost::statechart::custom_reaction<UnsetForceRecovery>,
+      boost::statechart::custom_reaction<SetForceBackfill>,
+      boost::statechart::custom_reaction<UnsetForceBackfill>,
+      boost::statechart::custom_reaction<RequestScrub>
+      > reactions;
+    boost::statechart::result react(const ActMap&);
+    boost::statechart::result react(const MNotifyRec&);
+    boost::statechart::result react(const SetForceRecovery&);
+    boost::statechart::result react(const UnsetForceRecovery&);
+    boost::statechart::result react(const SetForceBackfill&);
+    boost::statechart::result react(const UnsetForceBackfill&);
+    boost::statechart::result react(const RequestScrub&);
+  };
+
+  struct WaitActingChange : boost::statechart::state< WaitActingChange, Primary>,
+		      NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< AdvMap >,
+      boost::statechart::custom_reaction< MLogRec >,
+      boost::statechart::custom_reaction< MInfoRec >,
+      boost::statechart::custom_reaction< MNotifyRec >
+      > reactions;
+    explicit WaitActingChange(my_context ctx);
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const AdvMap&);
+    boost::statechart::result react(const MLogRec&);
+    boost::statechart::result react(const MInfoRec&);
+    boost::statechart::result react(const MNotifyRec&);
+    void exit();
+  };
+
+  struct GetInfo;
+  struct Active;
+
+  struct Peering : boost::statechart::state< Peering, Primary, GetInfo >, NamedState {
+    PastIntervals::PriorSet prior_set;
+    bool history_les_bound;  //< need osd_find_best_info_ignore_history_les
+
+    explicit Peering(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::transition< Activate, Active >,
+      boost::statechart::custom_reaction< AdvMap >
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const AdvMap &advmap);
+  };
+
+  struct WaitLocalRecoveryReserved;
+  struct Activating;
+  struct Active : boost::statechart::state< Active, Primary, Activating >, NamedState {
+    explicit Active(my_context ctx);
+    void exit();
+
+    const std::set<pg_shard_t> remote_shards_to_reserve_recovery;
+    const std::set<pg_shard_t> remote_shards_to_reserve_backfill;
+    bool all_replicas_activated;
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< ActMap >,
+      boost::statechart::custom_reaction< AdvMap >,
+      boost::statechart::custom_reaction< MInfoRec >,
+      boost::statechart::custom_reaction< MNotifyRec >,
+      boost::statechart::custom_reaction< MLogRec >,
+      boost::statechart::custom_reaction< MTrim >,
+      boost::statechart::custom_reaction< Backfilled >,
+      boost::statechart::custom_reaction< ActivateCommitted >,
+      boost::statechart::custom_reaction< AllReplicasActivated >,
+      boost::statechart::custom_reaction< DeferRecovery >,
+      boost::statechart::custom_reaction< DeferBackfill >,
+      boost::statechart::custom_reaction< UnfoundRecovery >,
+      boost::statechart::custom_reaction< UnfoundBackfill >,
+      boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>,
+      boost::statechart::custom_reaction< RemoteReservationRevoked>,
+      boost::statechart::custom_reaction< DoRecovery>,
+      boost::statechart::custom_reaction< RenewLease>,
+      boost::statechart::custom_reaction< MLeaseAck>,
+      boost::statechart::custom_reaction< CheckReadable>
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const ActMap&);
+    boost::statechart::result react(const AdvMap&);
+    boost::statechart::result react(const MInfoRec& infoevt);
+    boost::statechart::result react(const MNotifyRec& notevt);
+    boost::statechart::result react(const MLogRec& logevt);
+    boost::statechart::result react(const MTrim& trimevt);
+    boost::statechart::result react(const Backfilled&) {
+      return discard_event();
+    }
+    boost::statechart::result react(const ActivateCommitted&);
+    boost::statechart::result react(const AllReplicasActivated&);
+    boost::statechart::result react(const RenewLease&);
+    boost::statechart::result react(const MLeaseAck&);
+    boost::statechart::result react(const DeferRecovery& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const DeferBackfill& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const UnfoundRecovery& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const UnfoundBackfill& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const RemoteReservationRevokedTooFull&) {
+      return discard_event();
+    }
+    boost::statechart::result react(const RemoteReservationRevoked&) {
+      return discard_event();
+    }
+    boost::statechart::result react(const DoRecovery&) {
+      return discard_event();
+    }
+    boost::statechart::result react(const CheckReadable&);
+    void all_activated_and_committed();
+  };
+
+  struct Clean : boost::statechart::state< Clean, Active >, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
+      boost::statechart::custom_reaction<SetForceRecovery>,
+      boost::statechart::custom_reaction<SetForceBackfill>
+    > reactions;
+    explicit Clean(my_context ctx);
+    void exit();
+    boost::statechart::result react(const boost::statechart::event_base&) {
+      return discard_event();
+    }
+  };
+
+  struct Recovered : boost::statechart::state< Recovered, Active >, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::transition< GoClean, Clean >,
+      boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
+      boost::statechart::custom_reaction< AllReplicasActivated >
+    > reactions;
+    explicit Recovered(my_context ctx);
+    void exit();
+    boost::statechart::result react(const AllReplicasActivated&) {
+      post_event(GoClean());
+      return forward_event();
+    }
+  };
+
+  struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::custom_reaction< Backfilled >,
+      boost::statechart::custom_reaction< DeferBackfill >,
+      boost::statechart::custom_reaction< UnfoundBackfill >,
+      boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
+      boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>,
+      boost::statechart::custom_reaction< RemoteReservationRevoked>
+      > reactions;
+    explicit Backfilling(my_context ctx);
+    boost::statechart::result react(const RemoteReservationRejectedTooFull& evt) {
+      // for compat with old peers
+      post_event(RemoteReservationRevokedTooFull());
+      return discard_event();
+    }
+    void backfill_release_reservations();
+    boost::statechart::result react(const Backfilled& evt);
+    boost::statechart::result react(const RemoteReservationRevokedTooFull& evt);
+    boost::statechart::result react(const RemoteReservationRevoked& evt);
+    boost::statechart::result react(const DeferBackfill& evt);
+    boost::statechart::result react(const UnfoundBackfill& evt);
+    void cancel_backfill();
+    void exit();
+  };
+
+  struct WaitRemoteBackfillReserved : boost::statechart::state< WaitRemoteBackfillReserved, Active >, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::custom_reaction< RemoteBackfillReserved >,
+      boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
+      boost::statechart::custom_reaction< RemoteReservationRevoked >,
+      boost::statechart::transition< AllBackfillsReserved, Backfilling >
+      > reactions;
+    std::set<pg_shard_t>::const_iterator backfill_osd_it;
+    explicit WaitRemoteBackfillReserved(my_context ctx);
+    void retry();
+    void exit();
+    boost::statechart::result react(const RemoteBackfillReserved& evt);
+    boost::statechart::result react(const RemoteReservationRejectedTooFull& evt);
+    boost::statechart::result react(const RemoteReservationRevoked& evt);
+  };
+
+  struct WaitLocalBackfillReserved : boost::statechart::state< WaitLocalBackfillReserved, Active >, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::transition< LocalBackfillReserved, WaitRemoteBackfillReserved >,
+      boost::statechart::custom_reaction< RemoteBackfillReserved >
+      > reactions;
+    explicit WaitLocalBackfillReserved(my_context ctx);
+    boost::statechart::result react(const RemoteBackfillReserved& evt) {
+      /* no-op */
+      return discard_event();
+    }
+    void exit();
+  };
+
+  struct NotBackfilling : boost::statechart::state< NotBackfilling, Active>, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved>,
+      boost::statechart::custom_reaction< RemoteBackfillReserved >,
+      boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >
+      > reactions;
+    explicit NotBackfilling(my_context ctx);
+    void exit();
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const RemoteBackfillReserved& evt);
+    boost::statechart::result react(const RemoteReservationRejectedTooFull& evt);
+  };
+
+  struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
+      boost::statechart::custom_reaction< DeferRecovery >,
+      boost::statechart::custom_reaction< UnfoundRecovery >
+      > reactions;
+    explicit NotRecovering(my_context ctx);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const DeferRecovery& evt) {
+      /* no-op */
+      return discard_event();
+    }
+    boost::statechart::result react(const UnfoundRecovery& evt) {
+      /* no-op */
+      return discard_event();
+    }
+    void exit();
+  };
+
+  struct ToDelete;
+  struct RepNotRecovering;
+  struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState {
+    explicit ReplicaActive(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< ActMap >,
+      boost::statechart::custom_reaction< MQuery >,
+      boost::statechart::custom_reaction< MInfoRec >,
+      boost::statechart::custom_reaction< MLogRec >,
+      boost::statechart::custom_reaction< MTrim >,
+      boost::statechart::custom_reaction< Activate >,
+      boost::statechart::custom_reaction< ActivateCommitted >,
+      boost::statechart::custom_reaction< DeferRecovery >,
+      boost::statechart::custom_reaction< DeferBackfill >,
+      boost::statechart::custom_reaction< UnfoundRecovery >,
+      boost::statechart::custom_reaction< UnfoundBackfill >,
+      boost::statechart::custom_reaction< RemoteBackfillPreempted >,
+      boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
+      boost::statechart::custom_reaction< RecoveryDone >,
+      boost::statechart::transition<DeleteStart, ToDelete>,
+      boost::statechart::custom_reaction< MLease >
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const MInfoRec& infoevt);
+    boost::statechart::result react(const MLogRec& logevt);
+    boost::statechart::result react(const MTrim& trimevt);
+    boost::statechart::result react(const ActMap&);
+    boost::statechart::result react(const MQuery&);
+    boost::statechart::result react(const Activate&);
+    boost::statechart::result react(const ActivateCommitted&);
+    boost::statechart::result react(const MLease&);
+    boost::statechart::result react(const RecoveryDone&) {
+      return discard_event();
+    }
+    boost::statechart::result react(const DeferRecovery& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const DeferBackfill& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const UnfoundRecovery& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const UnfoundBackfill& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const RemoteBackfillPreempted& evt) {
+      return discard_event();
+    }
+    boost::statechart::result react(const RemoteRecoveryPreempted& evt) {
+      return discard_event();
+    }
+  };
+
+  struct RepRecovering : boost::statechart::state< RepRecovering, ReplicaActive >, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::transition< RecoveryDone, RepNotRecovering >,
+      // for compat with old peers
+      boost::statechart::transition< RemoteReservationRejectedTooFull, RepNotRecovering >,
+      boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
+      boost::statechart::custom_reaction< BackfillTooFull >,
+      boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
+      boost::statechart::custom_reaction< RemoteBackfillPreempted >
+      > reactions;
+    explicit RepRecovering(my_context ctx);
+    boost::statechart::result react(const RemoteRecoveryPreempted &evt);
+    boost::statechart::result react(const BackfillTooFull &evt);
+    boost::statechart::result react(const RemoteBackfillPreempted &evt);
+    void exit();
+  };
+
+  struct RepWaitBackfillReserved : boost::statechart::state< RepWaitBackfillReserved, ReplicaActive >, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::custom_reaction< RemoteBackfillReserved >,
+      boost::statechart::custom_reaction< RejectTooFullRemoteReservation >,
+      boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
+      boost::statechart::custom_reaction< RemoteReservationCanceled >
+      > reactions;
+    explicit RepWaitBackfillReserved(my_context ctx);
+    void exit();
+    boost::statechart::result react(const RemoteBackfillReserved &evt);
+    boost::statechart::result react(const RejectTooFullRemoteReservation &evt);
+    boost::statechart::result react(const RemoteReservationRejectedTooFull &evt);
+    boost::statechart::result react(const RemoteReservationCanceled &evt);
+  };
+
+  struct RepWaitRecoveryReserved : boost::statechart::state< RepWaitRecoveryReserved, ReplicaActive >, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::custom_reaction< RemoteRecoveryReserved >,
+      // for compat with old peers
+      boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
+      boost::statechart::custom_reaction< RemoteReservationCanceled >
+      > reactions;
+    explicit RepWaitRecoveryReserved(my_context ctx);
+    void exit();
+    boost::statechart::result react(const RemoteRecoveryReserved &evt);
+    boost::statechart::result react(const RemoteReservationRejectedTooFull &evt) {
+      // for compat with old peers
+      post_event(RemoteReservationCanceled());
+      return discard_event();
+    }
+    boost::statechart::result react(const RemoteReservationCanceled &evt);
+  };
+
+  struct RepNotRecovering : boost::statechart::state< RepNotRecovering, ReplicaActive>, NamedState {
+    typedef boost::mpl::list<
+      boost::statechart::custom_reaction< RequestRecoveryPrio >,
+      boost::statechart::custom_reaction< RequestBackfillPrio >,
+      boost::statechart::custom_reaction< RejectTooFullRemoteReservation >,
+      boost::statechart::transition< RemoteReservationRejectedTooFull, RepNotRecovering >,
+      boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
+      boost::statechart::custom_reaction< RemoteRecoveryReserved >,
+      boost::statechart::custom_reaction< RemoteBackfillReserved >,
+      boost::statechart::transition< RecoveryDone, RepNotRecovering >  // for compat with pre-reservation peers
+      > reactions;
+    explicit RepNotRecovering(my_context ctx);
+    boost::statechart::result react(const RequestRecoveryPrio &evt);
+    boost::statechart::result react(const RequestBackfillPrio &evt);
+    boost::statechart::result react(const RemoteBackfillReserved &evt) {
+      // my reservation completion raced with a RELEASE from primary
+      return discard_event();
+    }
+    boost::statechart::result react(const RemoteRecoveryReserved &evt) {
+      // my reservation completion raced with a RELEASE from primary
+      return discard_event();
+    }
+    boost::statechart::result react(const RejectTooFullRemoteReservation &evt);
+    void exit();
+  };
+
+  struct Recovering : boost::statechart::state< Recovering, Active >, NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< AllReplicasRecovered >,
+      boost::statechart::custom_reaction< DeferRecovery >,
+      boost::statechart::custom_reaction< UnfoundRecovery >,
+      boost::statechart::custom_reaction< RequestBackfill >
+      > reactions;
+    explicit Recovering(my_context ctx);
+    void exit();
+    void release_reservations(bool cancel = false);
+    boost::statechart::result react(const AllReplicasRecovered &evt);
+    boost::statechart::result react(const DeferRecovery& evt);
+    boost::statechart::result react(const UnfoundRecovery& evt);
+    boost::statechart::result react(const RequestBackfill &evt);
+  };
+
+  struct WaitRemoteRecoveryReserved : boost::statechart::state< WaitRemoteRecoveryReserved, Active >, NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< RemoteRecoveryReserved >,
+      boost::statechart::transition< AllRemotesReserved, Recovering >
+      > reactions;
+    std::set<pg_shard_t>::const_iterator remote_recovery_reservation_it;
+    explicit WaitRemoteRecoveryReserved(my_context ctx);
+    boost::statechart::result react(const RemoteRecoveryReserved &evt);
+    void exit();
+  };
+
+  struct WaitLocalRecoveryReserved : boost::statechart::state< WaitLocalRecoveryReserved, Active >, NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::transition< LocalRecoveryReserved, WaitRemoteRecoveryReserved >,
+      boost::statechart::custom_reaction< RecoveryTooFull >
+      > reactions;
+    explicit WaitLocalRecoveryReserved(my_context ctx);
+    void exit();
+    boost::statechart::result react(const RecoveryTooFull &evt);
+  };
+
+  struct Activating : boost::statechart::state< Activating, Active >, NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::transition< AllReplicasRecovered, Recovered >,
+      boost::statechart::transition< DoRecovery, WaitLocalRecoveryReserved >,
+      boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved >
+      > reactions;
+    explicit Activating(my_context ctx);
+    void exit();
+  };
+
+  struct Stray : boost::statechart::state< Stray, Started >,
+            NamedState {
+    explicit Stray(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< MQuery >,
+      boost::statechart::custom_reaction< MLogRec >,
+      boost::statechart::custom_reaction< MInfoRec >,
+      boost::statechart::custom_reaction< ActMap >,
+      boost::statechart::custom_reaction< RecoveryDone >,
+      boost::statechart::transition<DeleteStart, ToDelete>
+      > reactions;
+    boost::statechart::result react(const MQuery& query);
+    boost::statechart::result react(const MLogRec& logevt);
+    boost::statechart::result react(const MInfoRec& infoevt);
+    boost::statechart::result react(const ActMap&);
+    boost::statechart::result react(const RecoveryDone&) {
+      return discard_event();
+    }
+  };
+
+  struct WaitDeleteReserved;
+  struct ToDelete : boost::statechart::state<ToDelete, Started, WaitDeleteReserved>, NamedState {
+    unsigned priority = 0;
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< ActMap >,
+      boost::statechart::custom_reaction< ActivateCommitted >,
+      boost::statechart::custom_reaction< DeleteSome >
+      > reactions;
+    explicit ToDelete(my_context ctx);
+    boost::statechart::result react(const ActMap &evt);
+    boost::statechart::result react(const DeleteSome &evt) {
+      // happens if we drop out of Deleting due to reprioritization etc.
+      return discard_event();
+    }
+    boost::statechart::result react(const ActivateCommitted&) {
+      // Can happens if we were activated as a stray but not actually pulled
+      // from prior to the pg going clean and sending a delete.
+      return discard_event();
+    }
+    void exit();
+  };
+
+  struct Deleting;
+  struct WaitDeleteReserved : boost::statechart::state<WaitDeleteReserved,
+						 ToDelete>, NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::transition<DeleteReserved, Deleting>
+      > reactions;
+    explicit WaitDeleteReserved(my_context ctx);
+    void exit();
+  };
+
+  struct Deleting : boost::statechart::state<Deleting,
+				       ToDelete>, NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< DeleteSome >,
+      boost::statechart::transition<DeleteInterrupted, WaitDeleteReserved>
+      > reactions;
+    ghobject_t next;
+    explicit Deleting(my_context ctx);
+    boost::statechart::result react(const DeleteSome &evt);
+    void exit();
+  };
+
+  struct GetLog;
+
+  struct GetInfo : boost::statechart::state< GetInfo, Peering >, NamedState {
+    std::set<pg_shard_t> peer_info_requested;
+
+    explicit GetInfo(my_context ctx);
+    void exit();
+    void get_infos();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::transition< GotInfo, GetLog >,
+      boost::statechart::custom_reaction< MNotifyRec >,
+      boost::statechart::transition< IsDown, Down >
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const MNotifyRec& infoevt);
+  };
+
+  struct GotLog : boost::statechart::event< GotLog > {
+    GotLog() : boost::statechart::event< GotLog >() {}
+  };
+
+  struct GetLog : boost::statechart::state< GetLog, Peering >, NamedState {
+    pg_shard_t auth_log_shard;
+    boost::intrusive_ptr<MOSDPGLog> msg;
+
+    explicit GetLog(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< MLogRec >,
+      boost::statechart::custom_reaction< GotLog >,
+      boost::statechart::custom_reaction< AdvMap >,
+      boost::statechart::transition< NeedActingChange, WaitActingChange >,
+      boost::statechart::transition< IsIncomplete, Incomplete >
+      > reactions;
+    boost::statechart::result react(const AdvMap&);
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const MLogRec& logevt);
+    boost::statechart::result react(const GotLog&);
+  };
+
+  struct WaitUpThru;
+
+  struct GetMissing : boost::statechart::state< GetMissing, Peering >, NamedState {
+    std::set<pg_shard_t> peer_missing_requested;
+
+    explicit GetMissing(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< MLogRec >,
+      boost::statechart::transition< NeedUpThru, WaitUpThru >
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const MLogRec& logevt);
+  };
+
+  struct WaitUpThru : boost::statechart::state< WaitUpThru, Peering >, NamedState {
+    explicit WaitUpThru(my_context ctx);
+    void exit();
+
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< ActMap >,
+      boost::statechart::custom_reaction< MLogRec >
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const ActMap& am);
+    boost::statechart::result react(const MLogRec& logrec);
+  };
+
+  struct Down : boost::statechart::state< Down, Peering>, NamedState {
+    explicit Down(my_context ctx);
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< QueryState >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< MNotifyRec >
+      > reactions;
+    boost::statechart::result react(const QueryState& q);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const MNotifyRec& infoevt);
+    void exit();
+  };
+
+  struct Incomplete : boost::statechart::state< Incomplete, Peering>, NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< AdvMap >,
+      boost::statechart::custom_reaction< MNotifyRec >,
+      boost::statechart::custom_reaction< QueryUnfound >,
+      boost::statechart::custom_reaction< QueryState >
+      > reactions;
+    explicit Incomplete(my_context ctx);
+    boost::statechart::result react(const AdvMap &advmap);
+    boost::statechart::result react(const MNotifyRec& infoevt);
+    boost::statechart::result react(const QueryUnfound& q);
+    boost::statechart::result react(const QueryState& q);
+    void exit();
+  };
+
+  PGStateHistory state_history;
+  CephContext* cct;
+  spg_t spgid;
+  DoutPrefixProvider *dpp;
+  PeeringListener *pl;
+
+  /// context passed in by state machine caller
+  PeeringCtx *orig_ctx;
+
+  /// populated if we are buffering messages pending a flush
+  std::optional<BufferedRecoveryMessages> messages_pending_flush;
+
+  /**
+   * populated between start_handle() and end_handle(), points into
+   * the message lists for messages_pending_flush while blocking messages
+   * or into orig_ctx otherwise
+   */
+  std::optional<PeeringCtxWrapper> rctx;
+
+  /**
+   * OSDMap state
+   */
+  OSDMapRef osdmap_ref;              ///< Reference to current OSDMap
+  PGPool pool;                       ///< Current pool state
+  epoch_t last_persisted_osdmap = 0; ///< Last osdmap epoch persisted
+
+
+  /**
+   * Peering state information
+   */
+  int role = -1;             ///< 0 = primary, 1 = replica, -1=none.
+  uint64_t state = 0;        ///< PG_STATE_*
+
+  pg_shard_t primary;        ///< id/shard of primary
+  pg_shard_t pg_whoami;      ///< my id/shard
+  pg_shard_t up_primary;     ///< id/shard of primary of up set
+  std::vector<int> up;            ///< crush mapping without temp pgs
+  std::set<pg_shard_t> upset;     ///< up in set form
+  std::vector<int> acting;        ///< actual acting set for the current interval
+  std::set<pg_shard_t> actingset; ///< acting in set form
+
+  /// union of acting, recovery, and backfill targets
+  std::set<pg_shard_t> acting_recovery_backfill;
+
+  std::vector<HeartbeatStampsRef> hb_stamps;
+
+  ceph::signedspan readable_interval = ceph::signedspan::zero();
+
+  /// how long we can service reads in this interval
+  ceph::signedspan readable_until = ceph::signedspan::zero();
+
+  /// upper bound on any acting OSDs' readable_until in this interval
+  ceph::signedspan readable_until_ub = ceph::signedspan::zero();
+
+  /// upper bound from prior interval(s)
+  ceph::signedspan prior_readable_until_ub = ceph::signedspan::zero();
+
+  /// pg instances from prior interval(s) that may still be readable
+  std::set<int> prior_readable_down_osds;
+
+  /// [replica] upper bound we got from the primary (primary's clock)
+  ceph::signedspan readable_until_ub_from_primary = ceph::signedspan::zero();
+
+  /// [primary] last upper bound shared by primary to replicas
+  ceph::signedspan readable_until_ub_sent = ceph::signedspan::zero();
+
+  /// [primary] readable ub acked by acting set members
+  std::vector<ceph::signedspan> acting_readable_until_ub;
+
+  bool send_notify = false; ///< True if a notify needs to be sent to the primary
+
+  bool dirty_info = false;          ///< small info structu on disk out of date
+  bool dirty_big_info = false;      ///< big info structure on disk out of date
+
+  pg_info_t info;                   ///< current pg info
+  pg_info_t last_written_info;      ///< last written info
+  PastIntervals past_intervals;     ///< information about prior pg mappings
+  PGLog  pg_log;                    ///< pg log
+
+  epoch_t last_peering_reset = 0;   ///< epoch of last peering reset
+
+  /// last_update that has committed; ONLY DEFINED WHEN is_active()
+  eversion_t  last_update_ondisk;
+  eversion_t  last_complete_ondisk; ///< last_complete that has committed.
+  eversion_t  last_update_applied;  ///< last_update readable
+  /// last version to which rollback_info trimming has been applied
+  eversion_t  last_rollback_info_trimmed_to_applied;
+
+  /// Counter to determine when pending flushes have completed
+  unsigned flushes_in_progress = 0;
+
+  /**
+   * Primary state
+   */
+  std::set<pg_shard_t>    stray_set; ///< non-acting osds that have PG data.
+  std::map<pg_shard_t, pg_info_t>    peer_info; ///< info from peers (stray or prior)
+  std::map<pg_shard_t, int64_t>    peer_bytes; ///< Peer's num_bytes from peer_info
+  std::set<pg_shard_t> peer_purged; ///< peers purged
+  std::map<pg_shard_t, pg_missing_t> peer_missing; ///< peer missing sets
+  std::set<pg_shard_t> peer_log_requested; ///< logs i've requested (and start stamps)
+  std::set<pg_shard_t> peer_missing_requested; ///< missing sets requested
+
+  /// features supported by all peers
+  uint64_t peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+  /// features supported by acting set
+  uint64_t acting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+  /// features supported by up and acting
+  uint64_t upacting_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+
+  /// most recently consumed osdmap's require_osd_version
+  ceph_release_t last_require_osd_release;
+
+  std::vector<int> want_acting; ///< non-empty while peering needs a new acting set
+
+  // acting_recovery_backfill contains shards that are acting,
+  // async recovery targets, or backfill targets.
+  std::map<pg_shard_t,eversion_t> peer_last_complete_ondisk;
+
+  /// up: min over last_complete_ondisk, peer_last_complete_ondisk
+  eversion_t  min_last_complete_ondisk;
+  /// point to which the log should be trimmed
+  eversion_t  pg_trim_to;
+
+  std::set<int> blocked_by; ///< osds we are blocked by (for pg stats)
+
+  bool need_up_thru = false; ///< true if osdmap with updated up_thru needed
+
+  /// I deleted these strays; ignore racing PGInfo from them
+  std::set<pg_shard_t> peer_activated;
+
+  std::set<pg_shard_t> backfill_targets;       ///< osds to be backfilled
+  std::set<pg_shard_t> async_recovery_targets; ///< osds to be async recovered
+
+  /// osds which might have objects on them which are unfound on the primary
+  std::set<pg_shard_t> might_have_unfound;
+
+  bool deleting = false;  /// true while in removing or OSD is shutting down
+  std::atomic<bool> deleted = {false}; /// true once deletion complete
+
+  MissingLoc missing_loc; ///< information about missing objects
+
+  bool backfill_reserved = false;
+  bool backfill_reserving = false;
+
+  PeeringMachine machine;
+
+  void update_osdmap_ref(OSDMapRef newmap) {
+    osdmap_ref = std::move(newmap);
+  }
+
+  void update_heartbeat_peers();
+  void query_unfound(Formatter *f, std::string state);
+  bool proc_replica_info(
+    pg_shard_t from, const pg_info_t &oinfo, epoch_t send_epoch);
+  void remove_down_peer_info(const OSDMapRef &osdmap);
+  void check_recovery_sources(const OSDMapRef& map);
+  void set_last_peering_reset();
+  void check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap);
+  bool should_restart_peering(
+    int newupprimary,
+    int newactingprimary,
+    const std::vector<int>& newup,
+    const std::vector<int>& newacting,
+    OSDMapRef lastmap,
+    OSDMapRef osdmap);
+  void start_peering_interval(
+    const OSDMapRef lastmap,
+    const std::vector<int>& newup, int up_primary,
+    const std::vector<int>& newacting, int acting_primary,
+    ObjectStore::Transaction &t);
+  void on_new_interval();
+  void clear_recovery_state();
+  void clear_primary_state();
+  void check_past_interval_bounds() const;
+  bool set_force_recovery(bool b);
+  bool set_force_backfill(bool b);
+
+  /// clip calculated priority to reasonable range
+  int clamp_recovery_priority(int prio, int pool_recovery_prio, int max);
+  /// get log recovery reservation priority
+  unsigned get_recovery_priority();
+  /// get backfill reservation priority
+  unsigned get_backfill_priority();
+  /// get priority for pg deletion
+  unsigned get_delete_priority();
+
+public:
+  /**
+   * recovery_msg_priority_t
+   *
+   * Defines priority values for use with recovery messages.  The values are
+   * chosen to be reasonable for wpq during an upgrade scenarios, but are
+   * actually translated into a class in PGRecoveryMsg::get_scheduler_class()
+   */
+  enum recovery_msg_priority_t : int {
+    FORCED = 20,
+    UNDERSIZED = 15,
+    DEGRADED = 10,
+    BEST_EFFORT = 5
+  };
+
+  /// get message priority for recovery messages
+  int get_recovery_op_priority() const {
+    if (cct->_conf->osd_op_queue == "mclock_scheduler") {
+      /* For mclock, we use special priority values which will be
+       * translated into op classes within PGRecoveryMsg::get_scheduler_class
+       */
+      if (is_forced_recovery_or_backfill()) {
+	return recovery_msg_priority_t::FORCED;
+      } else if (is_undersized()) {
+	return recovery_msg_priority_t::UNDERSIZED;
+      } else if (is_degraded()) {
+	return recovery_msg_priority_t::DEGRADED;
+      } else {
+	return recovery_msg_priority_t::BEST_EFFORT;
+      }
+    } else {
+      /* For WeightedPriorityQueue, we use pool or osd config settings to
+       * statically set the priority for recovery messages.  This special
+       * handling should probably be removed after Reef */
+      int64_t pri = 0;
+      pool.info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri);
+      return  pri > 0 ? pri : cct->_conf->osd_recovery_op_priority;
+    }
+  }
+
+private:
+  bool check_prior_readable_down_osds(const OSDMapRef& map);
+
+  bool adjust_need_up_thru(const OSDMapRef osdmap);
+  PastIntervals::PriorSet build_prior();
+
+  void reject_reservation();
+
+  // acting std::set
+  std::map<pg_shard_t, pg_info_t>::const_iterator find_best_info(
+    const std::map<pg_shard_t, pg_info_t> &infos,
+    bool restrict_to_up_acting,
+    bool *history_les_bound) const;
+
+  static void calc_ec_acting(
+    std::map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
+    unsigned size,
+    const std::vector<int> &acting,
+    const std::vector<int> &up,
+    const std::map<pg_shard_t, pg_info_t> &all_info,
+    bool restrict_to_up_acting,
+    std::vector<int> *want,
+    std::set<pg_shard_t> *backfill,
+    std::set<pg_shard_t> *acting_backfill,
+    std::ostream &ss);
+
+  static std::pair<std::map<pg_shard_t, pg_info_t>::const_iterator, eversion_t>
+  select_replicated_primary(
+    std::map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
+    uint64_t force_auth_primary_missing_objects,
+    const std::vector<int> &up,
+    pg_shard_t up_primary,
+    const std::map<pg_shard_t, pg_info_t> &all_info,
+    const OSDMapRef osdmap,
+    std::ostream &ss);
+
+  static void calc_replicated_acting(
+    std::map<pg_shard_t, pg_info_t>::const_iterator primary_shard,
+    eversion_t oldest_auth_log_entry,
+    unsigned size,
+    const std::vector<int> &acting,
+    const std::vector<int> &up,
+    pg_shard_t up_primary,
+    const std::map<pg_shard_t, pg_info_t> &all_info,
+    bool restrict_to_up_acting,
+    std::vector<int> *want,
+    std::set<pg_shard_t> *backfill,
+    std::set<pg_shard_t> *acting_backfill,
+    const OSDMapRef osdmap,
+    const PGPool& pool,
+    std::ostream &ss);
+  static void calc_replicated_acting_stretch(
+    std::map<pg_shard_t, pg_info_t>::const_iterator primary_shard,
+    eversion_t oldest_auth_log_entry,
+    unsigned size,
+    const std::vector<int> &acting,
+    const std::vector<int> &up,
+    pg_shard_t up_primary,
+    const std::map<pg_shard_t, pg_info_t> &all_info,
+    bool restrict_to_up_acting,
+    std::vector<int> *want,
+    std::set<pg_shard_t> *backfill,
+    std::set<pg_shard_t> *acting_backfill,
+    const OSDMapRef osdmap,
+    const PGPool& pool,
+    std::ostream &ss);
+
+  void choose_async_recovery_ec(
+    const std::map<pg_shard_t, pg_info_t> &all_info,
+    const pg_info_t &auth_info,
+    std::vector<int> *want,
+    std::set<pg_shard_t> *async_recovery,
+    const OSDMapRef osdmap) const;
+  void choose_async_recovery_replicated(
+    const std::map<pg_shard_t, pg_info_t> &all_info,
+    const pg_info_t &auth_info,
+    std::vector<int> *want,
+    std::set<pg_shard_t> *async_recovery,
+    const OSDMapRef osdmap) const;
+
+  bool recoverable(const std::vector<int> &want) const;
+  bool choose_acting(pg_shard_t &auth_log_shard,
+		     bool restrict_to_up_acting,
+		     bool *history_les_bound,
+		     bool request_pg_temp_change_only = false);
+
+  bool search_for_missing(
+    const pg_info_t &oinfo, const pg_missing_t &omissing,
+    pg_shard_t fromosd,
+    PeeringCtxWrapper &rctx);
+  void build_might_have_unfound();
+  void log_weirdness();
+  void activate(
+    ObjectStore::Transaction& t,
+    epoch_t activation_epoch,
+    PeeringCtxWrapper &ctx);
+
+  void rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead);
+  void merge_log(
+    ObjectStore::Transaction& t, pg_info_t &oinfo,
+    pg_log_t&& olog, pg_shard_t from);
+
+  void proc_primary_info(ObjectStore::Transaction &t, const pg_info_t &info);
+  void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo,
+		       pg_log_t&& olog, pg_missing_t&& omissing,
+		       pg_shard_t from);
+  void proc_replica_log(pg_info_t &oinfo, const pg_log_t &olog,
+			pg_missing_t&& omissing, pg_shard_t from);
+
+  void calc_min_last_complete_ondisk() {
+    eversion_t min = last_complete_ondisk;
+    ceph_assert(!acting_recovery_backfill.empty());
+    for (std::set<pg_shard_t>::iterator i = acting_recovery_backfill.begin();
+	 i != acting_recovery_backfill.end();
+	 ++i) {
+      if (*i == get_primary()) continue;
+      if (peer_last_complete_ondisk.count(*i) == 0)
+	return;   // we don't have complete info
+      eversion_t a = peer_last_complete_ondisk[*i];
+      if (a < min)
+	min = a;
+    }
+    if (min == min_last_complete_ondisk)
+      return;
+    min_last_complete_ondisk = min;
+    return;
+  }
+
+  void fulfill_info(
+    pg_shard_t from, const pg_query_t &query,
+    std::pair<pg_shard_t, pg_info_t> &notify_info);
+  void fulfill_log(
+    pg_shard_t from, const pg_query_t &query, epoch_t query_epoch);
+  void fulfill_query(const MQuery& q, PeeringCtxWrapper &rctx);
+
+  void try_mark_clean();
+
+  void update_blocked_by();
+  void update_calc_stats();
+
+  void add_log_entry(const pg_log_entry_t& e, bool applied);
+
+  void calc_trim_to();
+  void calc_trim_to_aggressive();
+
+public:
+  PeeringState(
+    CephContext *cct,
+    pg_shard_t pg_whoami,
+    spg_t spgid,
+    const PGPool &pool,
+    OSDMapRef curmap,
+    DoutPrefixProvider *dpp,
+    PeeringListener *pl);
+
+  /// Process evt
+  void handle_event(const boost::statechart::event_base &evt,
+		    PeeringCtx *rctx) {
+    start_handle(rctx);
+    machine.process_event(evt);
+    end_handle();
+  }
+
+  /// Process evt
+  void handle_event(PGPeeringEventRef evt,
+		    PeeringCtx *rctx) {
+    start_handle(rctx);
+    machine.process_event(evt->get_event());
+    end_handle();
+  }
+
+  /// Init fresh instance of PG
+  void init(
+    int role,
+    const std::vector<int>& newup, int new_up_primary,
+    const std::vector<int>& newacting, int new_acting_primary,
+    const pg_history_t& history,
+    const PastIntervals& pi,
+    ObjectStore::Transaction &t);
+
+  /// Init pg instance from disk state
+  template <typename F>
+  auto init_from_disk_state(
+    pg_info_t &&info_from_disk,
+    PastIntervals &&past_intervals_from_disk,
+    F &&pg_log_init) {
+    info = std::move(info_from_disk);
+    last_written_info = info;
+    past_intervals = std::move(past_intervals_from_disk);
+    auto ret = pg_log_init(pg_log);
+    log_weirdness();
+    return ret;
+  }
+
+  /// Std::set initial primary/acting
+  void init_primary_up_acting(
+    const std::vector<int> &newup,
+    const std::vector<int> &newacting,
+    int new_up_primary,
+    int new_acting_primary);
+  void init_hb_stamps();
+
+  /// Std::set initial role
+  void set_role(int r) {
+    role = r;
+  }
+
+  /// Std::set predicates used for determining readable and recoverable
+  void set_backend_predicates(
+    IsPGReadablePredicate *is_readable,
+    IsPGRecoverablePredicate *is_recoverable) {
+    missing_loc.set_backend_predicates(is_readable, is_recoverable);
+  }
+
+  /// Send current pg_info to peers
+  void share_pg_info();
+
+  /// Get stats for child pgs
+  void start_split_stats(
+    const std::set<spg_t>& childpgs, std::vector<object_stat_sum_t> *out);
+
+  /// Update new child with stats
+  void finish_split_stats(
+    const object_stat_sum_t& stats, ObjectStore::Transaction &t);
+
+  /// Split state for child_pgid into *child
+  void split_into(
+    pg_t child_pgid, PeeringState *child, unsigned split_bits);
+
+  /// Merge state from sources
+  void merge_from(
+    std::map<spg_t,PeeringState *>& sources,
+    PeeringCtx &rctx,
+    unsigned split_bits,
+    const pg_merge_meta_t& last_pg_merge_meta);
+
+  /// Permit stray replicas to purge now unnecessary state
+  void purge_strays();
+
+  /**
+   * update_stats
+   *
+   * Mechanism for updating stats and/or history.  Pass t to mark
+   * dirty and write out.  Return true if stats should be published
+   * to the osd.
+   */
+  void update_stats(
+    std::function<bool(pg_history_t &, pg_stat_t &)> f,
+    ObjectStore::Transaction *t = nullptr);
+
+  void update_stats_wo_resched(
+    std::function<void(pg_history_t &, pg_stat_t &)> f);
+
+  /**
+   * adjust_purged_snaps
+   *
+   * Mechanism for updating purged_snaps.  Marks dirty_info, big_dirty_info.
+   */
+  void adjust_purged_snaps(
+    std::function<void(interval_set<snapid_t> &snaps)> f);
+
+  /// Updates info.hit_set to hset_history, does not dirty
+  void update_hset(const pg_hit_set_history_t &hset_history);
+
+  /// Get all pg_shards that needs recovery
+  std::vector<pg_shard_t> get_replica_recovery_order() const;
+
+  /**
+   * update_history
+   *
+   * Merges new_history into info.history clearing past_intervals and
+   * dirtying as needed.
+   *
+   * Calls PeeringListener::on_info_history_change()
+   */
+  void update_history(const pg_history_t& new_history);
+
+  /**
+   * prepare_stats_for_publish
+   *
+   * Returns updated pg_stat_t if stats have changed since
+   * pg_stats_publish adding in unstable_stats.
+   *
+   * @param pg_stats_publish the latest pg_stat possessed by caller
+   * @param unstable_stats additional stats which should be included in the
+   *        returned stats
+   * @return the up to date stats if it is different from the specfied
+   *         @c pg_stats_publish
+   */
+  std::optional<pg_stat_t> prepare_stats_for_publish(
+    const std::optional<pg_stat_t> &pg_stats_publish,
+    const object_stat_collection_t &unstable_stats);
+
+  /**
+   * Merge entries updating missing as necessary on all
+   * acting_recovery_backfill logs and missings (also missing_loc)
+   */
+  bool append_log_entries_update_missing(
+    const mempool::osd_pglog::list<pg_log_entry_t> &entries,
+    ObjectStore::Transaction &t,
+    std::optional<eversion_t> trim_to,
+    std::optional<eversion_t> roll_forward_to);
+
+  void append_log_with_trim_to_updated(
+    std::vector<pg_log_entry_t>&& log_entries,
+    eversion_t roll_forward_to,
+    ObjectStore::Transaction &t,
+    bool transaction_applied,
+    bool async) {
+    update_trim_to();
+    append_log(std::move(log_entries), pg_trim_to, roll_forward_to,
+	min_last_complete_ondisk, t, transaction_applied, async);
+  }
+
+  /**
+   * Updates local log to reflect new write from primary.
+   */
+  void append_log(
+    std::vector<pg_log_entry_t>&& logv,
+    eversion_t trim_to,
+    eversion_t roll_forward_to,
+    eversion_t min_last_complete_ondisk,
+    ObjectStore::Transaction &t,
+    bool transaction_applied,
+    bool async);
+
+  /**
+   * retrieve the min last_backfill among backfill targets
+   */
+  hobject_t earliest_backfill() const;
+
+
+  /**
+   * Updates local log/missing to reflect new oob log update from primary
+   */
+  void merge_new_log_entries(
+    const mempool::osd_pglog::list<pg_log_entry_t> &entries,
+    ObjectStore::Transaction &t,
+    std::optional<eversion_t> trim_to,
+    std::optional<eversion_t> roll_forward_to);
+
+  /// Update missing set to reflect e (TODOSAM: not sure why this is needed)
+  void add_local_next_event(const pg_log_entry_t& e) {
+    pg_log.missing_add_next_entry(e);
+  }
+
+  /// Update log trim boundary
+  void update_trim_to() {
+    bool hard_limit = (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT));
+    if (hard_limit)
+      calc_trim_to_aggressive();
+    else
+      calc_trim_to();
+  }
+
+  /// Pre-process pending update on hoid represented by logv
+  void pre_submit_op(
+    const hobject_t &hoid,
+    const std::vector<pg_log_entry_t>& logv,
+    eversion_t at_version);
+
+  /// Signal that oid has been locally recovered to version v
+  void recover_got(
+    const hobject_t &oid, eversion_t v,
+    bool is_delete,
+    ObjectStore::Transaction &t);
+
+  /// Signal that oid has been recovered on peer to version
+  void on_peer_recover(
+    pg_shard_t peer,
+    const hobject_t &soid,
+    const eversion_t &version);
+
+  /// Notify that soid is being recovered on peer
+  void begin_peer_recover(
+    pg_shard_t peer,
+    const hobject_t soid);
+
+  /// Pull missing sets from all candidate peers
+  bool discover_all_missing(
+    BufferedRecoveryMessages &rctx);
+
+  /// Notify that hoid has been fully recocovered
+  void object_recovered(
+    const hobject_t &hoid,
+    const object_stat_sum_t &stat_diff) {
+    info.stats.stats.sum.add(stat_diff);
+    missing_loc.recovered(hoid);
+  }
+
+  /// Update info/stats to reflect backfill progress
+  void update_backfill_progress(
+    const hobject_t &updated_backfill,
+    const pg_stat_t &updated_stats,
+    bool preserve_local_num_bytes,
+    ObjectStore::Transaction &t);
+
+  /// Update info/stats to reflect completed backfill on hoid
+  void update_complete_backfill_object_stats(
+    const hobject_t &hoid,
+    const pg_stat_t &stats);
+
+  /// Update last_backfill for peer to new_last_backfill
+  void update_peer_last_backfill(
+    pg_shard_t peer,
+    const hobject_t &new_last_backfill);
+
+  /// Update info.stats with delta_stats for operation on soid
+  void apply_op_stats(
+    const hobject_t &soid,
+    const object_stat_sum_t &delta_stats);
+
+  /**
+   * force_object_missing
+   *
+   * Force oid on peer to be missing at version.  If the object does not
+   * currently need recovery, either candidates if provided or the remainder
+   * of the acting std::set will be deemed to have the object.
+   */
+  void force_object_missing(
+    const pg_shard_t &peer,
+    const hobject_t &oid,
+    eversion_t version) {
+    force_object_missing(std::set<pg_shard_t>{peer}, oid, version);
+  }
+  void force_object_missing(
+    const std::set<pg_shard_t> &peer,
+    const hobject_t &oid,
+    eversion_t version);
+
+  /// Update state prior to backfilling soid on targets
+  void prepare_backfill_for_missing(
+    const hobject_t &soid,
+    const eversion_t &version,
+    const std::vector<pg_shard_t> &targets);
+
+  /// Std::set targets with the right version for revert (see recover_primary)
+  void set_revert_with_targets(
+    const hobject_t &soid,
+    const std::set<pg_shard_t> &good_peers);
+
+  /// Update lcod for fromosd
+  void update_peer_last_complete_ondisk(
+    pg_shard_t fromosd,
+    eversion_t lcod) {
+    peer_last_complete_ondisk[fromosd] = lcod;
+  }
+
+  /// Update lcod
+  void update_last_complete_ondisk(
+    eversion_t lcod) {
+    last_complete_ondisk = lcod;
+  }
+
+  /// Update state to reflect recovery up to version
+  void recovery_committed_to(eversion_t version);
+
+  /// Mark recovery complete
+  void local_recovery_complete() {
+    info.last_complete = info.last_update;
+  }
+
+  /// Update last_requested pointer to v
+  void set_last_requested(version_t v) {
+    pg_log.set_last_requested(v);
+  }
+
+  /// Write dirty state to t
+  void write_if_dirty(ObjectStore::Transaction& t);
+
+  /// Mark write completed to v with persisted lc
+  void complete_write(eversion_t v, eversion_t lc);
+
+  /// Update local write applied pointer
+  void local_write_applied(eversion_t v) {
+    last_update_applied = v;
+  }
+
+  /// Updates peering state with new map
+  void advance_map(
+    OSDMapRef osdmap,       ///< [in] new osdmap
+    OSDMapRef lastmap,      ///< [in] prev osdmap
+    std::vector<int>& newup,     ///< [in] new up set
+    int up_primary,         ///< [in] new up primary
+    std::vector<int>& newacting, ///< [in] new acting
+    int acting_primary,     ///< [in] new acting primary
+    PeeringCtx &rctx        ///< [out] recovery context
+    );
+
+  /// Activates most recently updated map
+  void activate_map(
+    PeeringCtx &rctx        ///< [out] recovery context
+    );
+
+  /// resets last_persisted_osdmap
+  void reset_last_persisted() {
+    last_persisted_osdmap = 0;
+    dirty_info = true;
+    dirty_big_info = true;
+  }
+
+  /// Signal shutdown beginning
+  void shutdown() {
+    deleting = true;
+  }
+
+  /// Signal shutdown complete
+  void set_delete_complete() {
+    deleted = true;
+  }
+
+  /// Dirty info and write out
+  void force_write_state(ObjectStore::Transaction &t) {
+    dirty_info = true;
+    dirty_big_info = true;
+    write_if_dirty(t);
+  }
+
+  /// Get current interval's readable_until
+  ceph::signedspan get_readable_until() const {
+    return readable_until;
+  }
+
+  /// Get prior intervals' readable_until upper bound
+  ceph::signedspan get_prior_readable_until_ub() const {
+    return prior_readable_until_ub;
+  }
+
+  /// Get prior intervals' readable_until down OSDs of note
+  const std::set<int>& get_prior_readable_down_osds() const {
+    return prior_readable_down_osds;
+  }
+
+  /// Reset prior intervals' readable_until upper bound (e.g., bc it passed)
+  void clear_prior_readable_until_ub() {
+    prior_readable_until_ub = ceph::signedspan::zero();
+    prior_readable_down_osds.clear();
+    info.history.prior_readable_until_ub = ceph::signedspan::zero();
+  }
+
+  void renew_lease(ceph::signedspan now) {
+    bool was_min = (readable_until_ub == readable_until);
+    readable_until_ub_sent = now + readable_interval;
+    if (was_min) {
+      recalc_readable_until();
+    }
+  }
+
+  void send_lease();
+  void schedule_renew_lease();
+
+  pg_lease_t get_lease() {
+    return pg_lease_t(readable_until, readable_until_ub_sent, readable_interval);
+  }
+
+  void proc_lease(const pg_lease_t& l);
+  void proc_lease_ack(int from, const pg_lease_ack_t& la);
+  void proc_renew_lease();
+
+  pg_lease_ack_t get_lease_ack() {
+    return pg_lease_ack_t(readable_until_ub_from_primary);
+  }
+
+  /// [primary] recalc readable_until[_ub] for the current interval
+  void recalc_readable_until();
+
+  //============================ const helpers ================================
+  const char *get_current_state() const {
+    return state_history.get_current_state();
+  }
+  epoch_t get_last_peering_reset() const {
+    return last_peering_reset;
+  }
+  eversion_t get_last_rollback_info_trimmed_to_applied() const {
+    return last_rollback_info_trimmed_to_applied;
+  }
+  /// Returns stable reference to internal pool structure
+  const PGPool &get_pgpool() const {
+    return pool;
+  }
+  /// Returns reference to current osdmap
+  const OSDMapRef &get_osdmap() const {
+    ceph_assert(osdmap_ref);
+    return osdmap_ref;
+  }
+  /// Returns epoch of current osdmap
+  epoch_t get_osdmap_epoch() const {
+    return get_osdmap()->get_epoch();
+  }
+
+  bool is_ec_pg() const override {
+    return pool.info.is_erasure();
+  }
+  int get_pg_size() const override {
+    return pool.info.size;
+  }
+  bool is_deleting() const {
+    return deleting;
+  }
+  bool is_deleted() const {
+    return deleted;
+  }
+  const std::set<pg_shard_t> &get_upset() const override {
+    return upset;
+  }
+  bool is_acting_recovery_backfill(pg_shard_t osd) const {
+    return acting_recovery_backfill.count(osd);
+  }
+  bool is_acting(pg_shard_t osd) const {
+    return has_shard(pool.info.is_erasure(), acting, osd);
+  }
+  bool is_up(pg_shard_t osd) const {
+    return has_shard(pool.info.is_erasure(), up, osd);
+  }
+  static bool has_shard(bool ec, const std::vector<int>& v, pg_shard_t osd) {
+    if (ec) {
+      return v.size() > (unsigned)osd.shard && v[osd.shard] == osd.osd;
+    } else {
+      return std::find(v.begin(), v.end(), osd.osd) != v.end();
+    }
+  }
+  const PastIntervals& get_past_intervals() const {
+    return past_intervals;
+  }
+  /// acting osd that is not the primary
+  bool is_nonprimary() const {
+    return role >= 0 && pg_whoami != primary;
+  }
+  /// primary osd
+  bool is_primary() const {
+    return pg_whoami == primary;
+  }
+  bool pg_has_reset_since(epoch_t e) const {
+    return deleted || e < get_last_peering_reset();
+  }
+
+  int get_role() const {
+    return role;
+  }
+  const std::vector<int> &get_acting() const {
+    return acting;
+  }
+  const std::set<pg_shard_t> &get_actingset() const {
+    return actingset;
+  }
+  int get_acting_primary() const {
+    return primary.osd;
+  }
+  pg_shard_t get_primary() const {
+    return primary;
+  }
+  const std::vector<int> &get_up() const {
+    return up;
+  }
+  int get_up_primary() const {
+    return up_primary.osd;
+  }
+
+  bool is_backfill_target(pg_shard_t osd) const {
+    return backfill_targets.count(osd);
+  }
+  const std::set<pg_shard_t> &get_backfill_targets() const {
+    return backfill_targets;
+  }
+  bool is_async_recovery_target(pg_shard_t peer) const {
+    return async_recovery_targets.count(peer);
+  }
+  const std::set<pg_shard_t> &get_async_recovery_targets() const {
+    return async_recovery_targets;
+  }
+  const std::set<pg_shard_t> &get_acting_recovery_backfill() const {
+    return acting_recovery_backfill;
+  }
+
+  const PGLog &get_pg_log() const {
+    return pg_log;
+  }
+
+  bool state_test(uint64_t m) const { return (state & m) != 0; }
+  void state_set(uint64_t m) { state |= m; }
+  void state_clear(uint64_t m) { state &= ~m; }
+
+  bool is_complete() const { return info.last_complete == info.last_update; }
+  bool should_send_notify() const { return send_notify; }
+
+  uint64_t get_state() const { return state; }
+  bool is_active() const { return state_test(PG_STATE_ACTIVE); }
+  bool is_activating() const { return state_test(PG_STATE_ACTIVATING); }
+  bool is_peering() const { return state_test(PG_STATE_PEERING); }
+  bool is_down() const { return state_test(PG_STATE_DOWN); }
+  bool is_recovery_unfound() const {
+    return state_test(PG_STATE_RECOVERY_UNFOUND);
+  }
+  bool is_backfilling() const {
+    return state_test(PG_STATE_BACKFILLING);
+  }
+  bool is_backfill_unfound() const {
+    return state_test(PG_STATE_BACKFILL_UNFOUND);
+  }
+  bool is_incomplete() const { return state_test(PG_STATE_INCOMPLETE); }
+  bool is_clean() const { return state_test(PG_STATE_CLEAN); }
+  bool is_degraded() const { return state_test(PG_STATE_DEGRADED); }
+  bool is_undersized() const { return state_test(PG_STATE_UNDERSIZED); }
+  bool is_remapped() const { return state_test(PG_STATE_REMAPPED); }
+  bool is_peered() const {
+    return state_test(PG_STATE_ACTIVE) || state_test(PG_STATE_PEERED);
+  }
+  bool is_recovering() const { return state_test(PG_STATE_RECOVERING); }
+  bool is_premerge() const { return state_test(PG_STATE_PREMERGE); }
+  bool is_repair() const { return state_test(PG_STATE_REPAIR); }
+  bool is_empty() const { return info.last_update == eversion_t(0,0); }
+
+  bool get_need_up_thru() const {
+    return need_up_thru;
+  }
+
+  bool is_forced_recovery_or_backfill() const {
+    return get_state() & (PG_STATE_FORCED_RECOVERY | PG_STATE_FORCED_BACKFILL);
+  }
+
+  bool is_backfill_reserved() const {
+    return backfill_reserved;
+  }
+
+  bool is_backfill_reserving() const {
+    return backfill_reserving;
+  }
+
+  ceph_release_t get_last_require_osd_release() const {
+    return last_require_osd_release;
+  }
+
+  const pg_info_t &get_info() const {
+    return info;
+  }
+
+  const decltype(peer_info) &get_peer_info() const {
+    return peer_info;
+  }
+  const decltype(peer_missing) &get_peer_missing() const {
+    return peer_missing;
+  }
+  const pg_missing_const_i &get_peer_missing(const pg_shard_t &peer) const {
+    if (peer == pg_whoami) {
+      return pg_log.get_missing();
+    } else {
+      assert(peer_missing.count(peer));
+      return peer_missing.find(peer)->second;
+    }
+  }
+  const pg_info_t&get_peer_info(pg_shard_t peer) const {
+    assert(peer_info.count(peer));
+    return peer_info.find(peer)->second;
+  }
+  bool has_peer_info(pg_shard_t peer) const {
+    return peer_info.count(peer);
+  }
+
+  bool needs_recovery() const;
+  bool needs_backfill() const;
+
+  /**
+   * Returns whether a particular object can be safely read on this replica
+   */
+  bool can_serve_replica_read(const hobject_t &hoid) {
+    ceph_assert(!is_primary());
+    return !pg_log.get_log().has_write_since(
+      hoid, get_min_last_complete_ondisk());
+  }
+
+  /**
+   * Returns whether the current acting set is able to go active
+   * and serve writes. It needs to satisfy min_size and any
+   * applicable stretch cluster constraints.
+   */
+  bool acting_set_writeable() {
+    return (actingset.size() >= pool.info.min_size) &&
+      (pool.info.stretch_set_can_peer(acting, *get_osdmap(), NULL));
+  }
+
+  /**
+   * Returns whether all peers which might have unfound objects have been
+   * queried or marked lost.
+   */
+  bool all_unfound_are_queried_or_lost(const OSDMapRef osdmap) const;
+  bool all_missing_unfound() const {
+    const auto& missing = pg_log.get_missing();
+    if (!missing.have_missing())
+      return false;
+    for (auto& m : missing.get_items()) {
+      if (!missing_loc.is_unfound(m.first))
+        return false;
+    }
+    return true;
+  }
+
+  bool perform_deletes_during_peering() const {
+    return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES));
+  }
+
+
+  bool have_unfound() const {
+    return missing_loc.have_unfound();
+  }
+  uint64_t get_num_unfound() const {
+    return missing_loc.num_unfound();
+  }
+
+  bool have_missing() const {
+    return pg_log.get_missing().num_missing() > 0;
+  }
+  unsigned int get_num_missing() const {
+    return pg_log.get_missing().num_missing();
+  }
+
+  const MissingLoc &get_missing_loc() const {
+    return missing_loc;
+  }
+
+  const MissingLoc::missing_by_count_t &get_missing_by_count() const {
+    return missing_loc.get_missing_by_count();
+  }
+
+  eversion_t get_min_last_complete_ondisk() const {
+    return min_last_complete_ondisk;
+  }
+
+  eversion_t get_pg_trim_to() const {
+    return pg_trim_to;
+  }
+
+  eversion_t get_last_update_applied() const {
+    return last_update_applied;
+  }
+
+  eversion_t get_last_update_ondisk() const {
+    return last_update_ondisk;
+  }
+
+  bool debug_has_dirty_state() const {
+    return dirty_info || dirty_big_info;
+  }
+
+  std::string get_pg_state_string() const {
+    return pg_state_string(state);
+  }
+
+  /// Dump representation of past_intervals to out
+  void print_past_intervals(std::ostream &out) const {
+    out << "[" << past_intervals.get_bounds()
+	<< ")/" << past_intervals.size();
+  }
+
+  void dump_history(ceph::Formatter *f) const {
+    state_history.dump(f);
+  }
+
+  /// Dump formatted peering status
+  void dump_peering_state(ceph::Formatter *f);
+
+private:
+  /// Mask feature vector with feature set from new peer
+  void apply_peer_features(uint64_t f) { peer_features &= f; }
+
+  /// Reset feature vector to default
+  void reset_min_peer_features() {
+    peer_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
+  }
+public:
+  /// Get feature vector common to all known peers with this pg
+  uint64_t get_min_peer_features() const { return peer_features; }
+
+  /// Get feature vector common to acting set
+  uint64_t get_min_acting_features() const { return acting_features; }
+
+  /// Get feature vector common to up/acting set
+  uint64_t get_min_upacting_features() const { return upacting_features; }
+
+
+  // Flush control interface
+private:
+  /**
+   * Start additional flush (blocks needs_flush/activation until
+   * complete_flush is called once for each start_flush call as
+   * required by start_flush_on_transaction).
+   */
+  void start_flush(ObjectStore::Transaction &t) {
+    flushes_in_progress++;
+    pl->start_flush_on_transaction(t);
+  }
+public:
+  /// True if there are outstanding flushes
+  bool needs_flush() const {
+    return flushes_in_progress > 0;
+  }
+  /// Must be called once per start_flush
+  void complete_flush();
+
+  friend std::ostream &operator<<(std::ostream &out, const PeeringState &ps);
+};
+
+std::ostream &operator<<(std::ostream &out, const PeeringState &ps);