summaryrefslogtreecommitdiffstats
path: root/src/osd/scrub_machine.h
blob: 7f88a675a2b66217605ea5c3d615f6206e855ee4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once

#include <string>

#include <boost/statechart/custom_reaction.hpp>
#include <boost/statechart/deferral.hpp>
#include <boost/statechart/event.hpp>
#include <boost/statechart/event_base.hpp>
#include <boost/statechart/in_state_reaction.hpp>
#include <boost/statechart/simple_state.hpp>
#include <boost/statechart/state.hpp>
#include <boost/statechart/state_machine.hpp>
#include <boost/statechart/transition.hpp>

#include "common/version.h"
#include "include/Context.h"

#include "scrub_machine_lstnr.h"
#include "scrubber_common.h"

using namespace std::string_literals;

class PG;  // holding a pointer to that one - just for testing
class PgScrubber;
namespace Scrub {

namespace sc = ::boost::statechart;
namespace mpl = ::boost::mpl;

//
//  EVENTS
//

void on_event_creation(std::string_view nm);
void on_event_discard(std::string_view nm);

#define MEV(E)                                          \
  struct E : sc::event<E> {                             \
    inline static int actv{0};                          \
    E()                                                 \
    {                                                   \
      if (!actv++)                                      \
	on_event_creation(#E);                          \
    }                                                   \
    ~E()                                                \
    {                                                   \
      if (!--actv)                                      \
	on_event_discard(#E);                           \
    }                                                   \
    void print(std::ostream* out) const { *out << #E; } \
    std::string_view print() const { return #E; }       \
  };

MEV(RemotesReserved)  ///< all replicas have granted our reserve request

MEV(ReservationFailure)	 ///< a reservation request has failed

MEV(StartScrub)	 ///< initiate a new scrubbing session (relevant if we are a Primary)

MEV(AfterRepairScrub)  ///< initiate a new scrubbing session. Only triggered at Recovery
		       ///< completion.

MEV(Unblocked)	///< triggered when the PG unblocked an object that was marked for
		///< scrubbing. Via the PGScrubUnblocked op

MEV(InternalSchedScrub)

MEV(SelectedChunkFree)

MEV(ChunkIsBusy)

MEV(ActivePushesUpd)	 ///< Update to active_pushes. 'active_pushes' represents recovery
			 ///< that is in-flight to the local ObjectStore
MEV(UpdatesApplied)	 ///< (Primary only) all updates are committed

MEV(InternalAllUpdates)	 ///< the internal counterpart of UpdatesApplied

MEV(GotReplicas)  ///< got a map from a replica

MEV(IntBmPreempted)  ///< internal - BuildMap preempted. Required, as detected within the
		     ///< ctor

MEV(InternalError)

MEV(IntLocalMapDone)

MEV(DigestUpdate)  ///< external. called upon success of a MODIFY op. See
		   ///< scrub_snapshot_metadata()

MEV(MapsCompared)  ///< (Crimson) maps_compare_n_cleanup() transactions are done

MEV(StartReplica)  ///< initiating replica scrub.

MEV(StartReplicaNoWait)	 ///< 'start replica' when there are no pending updates

MEV(SchedReplica)

MEV(ReplicaPushesUpd)  ///< Update to active_pushes. 'active_pushes' represents recovery
		       ///< that is in-flight to the local ObjectStore

MEV(FullReset)	///< guarantee that the FSM is in the quiescent state (i.e. NotActive)

MEV(NextChunk)	///< finished handling this chunk. Go get the next one

MEV(ScrubFinished)  ///< all chunks handled


struct NotActive;	    ///< the quiescent state. No active scrubbing.
struct ReservingReplicas;   ///< securing scrub resources from replicas' OSDs
struct ActiveScrubbing;	    ///< the active state for a Primary. A sub-machine.
struct ReplicaWaitUpdates;  ///< an active state for a replica. Waiting for all active
			    ///< operations to finish.
struct ActiveReplica;	    ///< an active state for a replica.


class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
 public:
  friend class PgScrubber;

 public:
  explicit ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub);
  ~ScrubMachine();

  spg_t m_pg_id;
  ScrubMachineListener* m_scrbr;
  std::ostream& gen_prefix(std::ostream& out) const;

  std::string current_states_desc() const;
  void assert_not_active() const;
  [[nodiscard]] bool is_reserving() const;
  [[nodiscard]] bool is_accepting_updates() const;
};

/**
 *  The Scrubber's base (quiescent) state.
 *  Scrubbing is triggered by one of the following events:
 *  - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs resources
 *    reservation process. Will be issued by PG::scrub(), following a
 *    queued "PGScrub" op.
 *  - a special end-of-recovery Primary scrub event ('AfterRepairScrub') that is
 *    not required to reserve resources.
 *  - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming
 *    MOSDRepScrub message.
 *
 *  note (20.8.21): originally, AfterRepairScrub was triggering a scrub without waiting
 *   for replica resources to be acquired. But once replicas started using the
 *   resource-request to identify and tag the scrub session, this bypass cannot be
 *   supported anymore.
 */
struct NotActive : sc::state<NotActive, ScrubMachine> {
  explicit NotActive(my_context ctx);

  using reactions = mpl::list<sc::transition<StartScrub, ReservingReplicas>,
			      // a scrubbing that was initiated at recovery completion,
			      // and requires no resource reservations:
			      sc::transition<AfterRepairScrub, ReservingReplicas>,
			      sc::transition<StartReplica, ReplicaWaitUpdates>,
			      sc::transition<StartReplicaNoWait, ActiveReplica>>;
};

struct ReservingReplicas : sc::state<ReservingReplicas, ScrubMachine> {

  explicit ReservingReplicas(my_context ctx);
  using reactions = mpl::list<sc::custom_reaction<FullReset>,
			      // all replicas granted our resources request
			      sc::transition<RemotesReserved, ActiveScrubbing>,
			      sc::custom_reaction<ReservationFailure>>;

  sc::result react(const FullReset&);

  /// at least one replica denied us the scrub resources we've requested
  sc::result react(const ReservationFailure&);
};


// the "active" sub-states

struct RangeBlocked;  ///< the objects range is blocked
struct PendingTimer;  ///< either delaying the scrub by some time and requeuing, or just
		      ///< requeue
struct NewChunk;      ///< select a chunk to scrub, and verify its availability
struct WaitPushes;
struct WaitLastUpdate;
struct BuildMap;
struct DrainReplMaps;  ///< a problem during BuildMap. Wait for all replicas to report,
		       ///< then restart.
struct WaitReplicas;   ///< wait for all replicas to report
struct WaitDigestUpdate;

struct ActiveScrubbing : sc::state<ActiveScrubbing, ScrubMachine, PendingTimer> {

  explicit ActiveScrubbing(my_context ctx);
  ~ActiveScrubbing();

  using reactions = mpl::list<
    sc::custom_reaction<InternalError>,
    sc::custom_reaction<FullReset>>;

  sc::result react(const FullReset&);
  sc::result react(const InternalError&);
};

struct RangeBlocked : sc::state<RangeBlocked, ActiveScrubbing> {
  explicit RangeBlocked(my_context ctx);
  using reactions = mpl::list<sc::transition<Unblocked, PendingTimer>>;
};

struct PendingTimer : sc::state<PendingTimer, ActiveScrubbing> {

  explicit PendingTimer(my_context ctx);

  using reactions = mpl::list<sc::transition<InternalSchedScrub, NewChunk>>;
};

struct NewChunk : sc::state<NewChunk, ActiveScrubbing> {

  explicit NewChunk(my_context ctx);

  using reactions = mpl::list<sc::transition<ChunkIsBusy, RangeBlocked>,
			      sc::custom_reaction<SelectedChunkFree>>;

  sc::result react(const SelectedChunkFree&);
};

/**
 * initiate the update process for this chunk
 *
 * Wait fo 'active_pushes' to clear.
 * 'active_pushes' represents recovery that is in-flight to the local Objectstore, hence
 * scrub waits until the correct data is readable (in-flight data to the Objectstore is
 * not readable until written to disk, termed 'applied' here)
 */
struct WaitPushes : sc::state<WaitPushes, ActiveScrubbing> {

  explicit WaitPushes(my_context ctx);

  using reactions = mpl::list<sc::custom_reaction<ActivePushesUpd>>;

  sc::result react(const ActivePushesUpd&);
};

struct WaitLastUpdate : sc::state<WaitLastUpdate, ActiveScrubbing> {

  explicit WaitLastUpdate(my_context ctx);

  void on_new_updates(const UpdatesApplied&);

  using reactions = mpl::list<sc::custom_reaction<InternalAllUpdates>,
			      sc::in_state_reaction<UpdatesApplied,
						    WaitLastUpdate,
						    &WaitLastUpdate::on_new_updates>>;

  sc::result react(const InternalAllUpdates&);
};

struct BuildMap : sc::state<BuildMap, ActiveScrubbing> {
  explicit BuildMap(my_context ctx);

  // possible error scenarios:
  // - an error reported by the backend will trigger an 'InternalError' event,
  //   handled by our parent state;
  // - if preempted, we switch to DrainReplMaps, where we will wait for all
  //   replicas to send their maps before acknowledging the preemption;
  // - an interval change will be handled by the relevant 'send-event' functions,
  //   and will translated into a 'FullReset' event.
  using reactions =
    mpl::list<sc::transition<IntBmPreempted, DrainReplMaps>,
	      sc::transition<InternalSchedScrub, BuildMap>,  // looping, waiting
							     // for the backend to
							     // finish
	      sc::custom_reaction<IntLocalMapDone>>;

  sc::result react(const IntLocalMapDone&);
};

/*
 *  "drain" scrub-maps responses from replicas
 */
struct DrainReplMaps : sc::state<DrainReplMaps, ActiveScrubbing> {
  explicit DrainReplMaps(my_context ctx);

  using reactions =
    mpl::list<sc::custom_reaction<GotReplicas>	// all replicas are accounted for
	      >;

  sc::result react(const GotReplicas&);
};

struct WaitReplicas : sc::state<WaitReplicas, ActiveScrubbing> {
  explicit WaitReplicas(my_context ctx);

  using reactions =
    mpl::list<sc::custom_reaction<GotReplicas>,	 // all replicas are accounted for
	      sc::transition<MapsCompared, WaitDigestUpdate>,
	      sc::deferral<DigestUpdate>  // might arrive before we've reached WDU
	      >;

  sc::result react(const GotReplicas&);

  bool all_maps_already_called{false};	// see comment in react code
};

struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing> {
  explicit WaitDigestUpdate(my_context ctx);

  using reactions = mpl::list<sc::custom_reaction<DigestUpdate>,
			      sc::custom_reaction<ScrubFinished>,
			      sc::transition<NextChunk, PendingTimer>>;
  sc::result react(const DigestUpdate&);
  sc::result react(const ScrubFinished&);
};

// ----------------------------- the "replica active" states -----------------------

/*
 * Waiting for 'active_pushes' to complete
 *
 * When in this state:
 * - the details of the Primary's request were internalized by PgScrubber;
 * - 'active' scrubbing is set
 */
struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ScrubMachine> {
  explicit ReplicaWaitUpdates(my_context ctx);
  using reactions =
    mpl::list<sc::custom_reaction<ReplicaPushesUpd>, sc::custom_reaction<FullReset>>;

  sc::result react(const ReplicaPushesUpd&);
  sc::result react(const FullReset&);
};


struct ActiveReplica : sc::state<ActiveReplica, ScrubMachine> {
  explicit ActiveReplica(my_context ctx);
  using reactions = mpl::list<sc::custom_reaction<SchedReplica>,
			      sc::custom_reaction<FullReset>,
			      sc::transition<ScrubFinished, NotActive>>;

  sc::result react(const SchedReplica&);
  sc::result react(const FullReset&);
};

}  // namespace Scrub