1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
|
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once
#include <fmt/ranges.h>
#include "common/scrub_types.h"
#include "include/types.h"
#include "os/ObjectStore.h"
#include "OpRequest.h"
namespace ceph {
class Formatter;
}
struct PGPool;
namespace Scrub {
class ReplicaReservations;
}
/// Facilitating scrub-realated object access to private PG data
class ScrubberPasskey {
private:
friend class Scrub::ReplicaReservations;
friend class PrimaryLogScrub;
friend class PgScrubber;
friend class ScrubBackend;
ScrubberPasskey() {}
ScrubberPasskey(const ScrubberPasskey&) = default;
ScrubberPasskey& operator=(const ScrubberPasskey&) = delete;
};
namespace Scrub {
/// high/low OP priority
enum class scrub_prio_t : bool { low_priority = false, high_priority = true };
/// Identifies a specific scrub activation within an interval,
/// see ScrubPGgIF::m_current_token
using act_token_t = uint32_t;
/// "environment" preconditions affecting which PGs are eligible for scrubbing
struct ScrubPreconds {
bool allow_requested_repair_only{false};
bool load_is_low{true};
bool time_permit{true};
bool only_deadlined{false};
};
/// PG services used by the scrubber backend
struct PgScrubBeListener {
virtual ~PgScrubBeListener() = default;
virtual const PGPool& get_pgpool() const = 0;
virtual pg_shard_t get_primary() const = 0;
virtual void force_object_missing(ScrubberPasskey,
const std::set<pg_shard_t>& peer,
const hobject_t& oid,
eversion_t version) = 0;
virtual const pg_info_t& get_pg_info(ScrubberPasskey) const = 0;
// query the PG backend for the on-disk size of an object
virtual uint64_t logical_to_ondisk_size(uint64_t logical_size) const = 0;
// used to verify our "cleaness" before scrubbing
virtual bool is_waiting_for_unreadable_object() const = 0;
};
} // namespace Scrub
/**
* Flags affecting the scheduling and behaviour of the *next* scrub.
*
* we hold two of these flag collections: one
* for the next scrub, and one frozen at initiation (i.e. in pg::queue_scrub())
*/
struct requested_scrub_t {
// flags to indicate explicitly requested scrubs (by admin):
// bool must_scrub, must_deep_scrub, must_repair, need_auto;
/**
* 'must_scrub' is set by an admin command (or by need_auto).
* Affects the priority of the scrubbing, and the sleep periods
* during the scrub.
*/
bool must_scrub{false};
/**
* scrub must not be aborted.
* Set for explicitly requested scrubs, and for scrubs originated by the
* pairing process with the 'repair' flag set (in the RequestScrub event).
*
* Will be copied into the 'required' scrub flag upon scrub start.
*/
bool req_scrub{false};
/**
* Set from:
* - scrub_requested() with need_auto param set, which only happens in
* - scrub_finish() - if deep_scrub_on_error is set, and we have errors
*
* If set, will prevent the OSD from casually postponing our scrub. When
* scrubbing starts, will cause must_scrub, must_deep_scrub and auto_repair to
* be set.
*/
bool need_auto{false};
/**
* Set for scrub-after-recovery just before we initiate the recovery deep
* scrub, or if scrub_requested() was called with either need_auto ot repair.
* Affects PG_STATE_DEEP_SCRUB.
*/
bool must_deep_scrub{false};
/**
* (An intermediary flag used by pg::sched_scrub() on the first time
* a planned scrub has all its resources). Determines whether the next
* repair/scrub will be 'deep'.
*
* Note: 'dumped' by PgScrubber::dump() and such. In reality, being a
* temporary that is set and reset by the same operation, will never
* appear externally to be set
*/
bool time_for_deep{false};
bool deep_scrub_on_error{false};
/**
* If set, we should see must_deep_scrub & must_scrub, too
*
* - 'must_repair' is checked by the OSD when scheduling the scrubs.
* - also checked & cleared at pg::queue_scrub()
*/
bool must_repair{false};
/*
* the value of auto_repair is determined in sched_scrub() (once per scrub.
* previous value is not remembered). Set if
* - allowed by configuration and backend, and
* - must_scrub is not set (i.e. - this is a periodic scrub),
* - time_for_deep was just set
*/
bool auto_repair{false};
/**
* indicating that we are scrubbing post repair to verify everything is fixed.
* Otherwise - PG_STATE_FAILED_REPAIR will be asserted.
*/
bool check_repair{false};
/**
* Used to indicate, both in client-facing listings and internally, that
* the planned scrub will be a deep one.
*/
bool calculated_to_deep{false};
};
std::ostream& operator<<(std::ostream& out, const requested_scrub_t& sf);
template <>
struct fmt::formatter<requested_scrub_t> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const requested_scrub_t& rs, FormatContext& ctx)
{
return fmt::format_to(ctx.out(),
"(plnd:{}{}{}{}{}{}{}{}{}{})",
rs.must_repair ? " must_repair" : "",
rs.auto_repair ? " auto_repair" : "",
rs.check_repair ? " check_repair" : "",
rs.deep_scrub_on_error ? " deep_scrub_on_error" : "",
rs.must_deep_scrub ? " must_deep_scrub" : "",
rs.must_scrub ? " must_scrub" : "",
rs.time_for_deep ? " time_for_deep" : "",
rs.need_auto ? " need_auto" : "",
rs.req_scrub ? " req_scrub" : "",
rs.calculated_to_deep ? " deep" : "");
}
};
/**
* The interface used by the PG when requesting scrub-related info or services
*/
struct ScrubPgIF {
virtual ~ScrubPgIF() = default;
friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s)
{
return s.show(out);
}
virtual std::ostream& show(std::ostream& out) const = 0;
// --------------- triggering state-machine events:
virtual void initiate_regular_scrub(epoch_t epoch_queued) = 0;
virtual void initiate_scrub_after_repair(epoch_t epoch_queued) = 0;
virtual void send_scrub_resched(epoch_t epoch_queued) = 0;
virtual void active_pushes_notification(epoch_t epoch_queued) = 0;
virtual void update_applied_notification(epoch_t epoch_queued) = 0;
virtual void digest_update_notification(epoch_t epoch_queued) = 0;
virtual void send_scrub_unblock(epoch_t epoch_queued) = 0;
virtual void send_replica_maps_ready(epoch_t epoch_queued) = 0;
virtual void send_replica_pushes_upd(epoch_t epoch_queued) = 0;
virtual void send_start_replica(epoch_t epoch_queued,
Scrub::act_token_t token) = 0;
virtual void send_sched_replica(epoch_t epoch_queued,
Scrub::act_token_t token) = 0;
virtual void send_full_reset(epoch_t epoch_queued) = 0;
virtual void send_chunk_free(epoch_t epoch_queued) = 0;
virtual void send_chunk_busy(epoch_t epoch_queued) = 0;
virtual void send_local_map_done(epoch_t epoch_queued) = 0;
virtual void send_get_next_chunk(epoch_t epoch_queued) = 0;
virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0;
virtual void on_applied_when_primary(const eversion_t& applied_version) = 0;
// --------------------------------------------------
[[nodiscard]] virtual bool are_callbacks_pending() const = 0; // currently
// only used
// for an
// assert
/**
* the scrubber is marked 'active':
* - for the primary: when all replica OSDs grant us the requested resources
* - for replicas: upon receiving the scrub request from the primary
*/
[[nodiscard]] virtual bool is_scrub_active() const = 0;
/**
* 'true' until after the FSM processes the 'scrub-finished' event,
* and scrubbing is completely cleaned-up.
*
* In other words - holds longer than is_scrub_active(), thus preventing
* a rescrubbing of the same PG while the previous scrub has not fully
* terminated.
*/
[[nodiscard]] virtual bool is_queued_or_active() const = 0;
/**
* Manipulate the 'scrubbing request has been queued, or - we are
* actually scrubbing' Scrubber's flag
*
* clear_queued_or_active() will also restart any blocked snaptrimming.
*/
virtual void set_queued_or_active() = 0;
virtual void clear_queued_or_active() = 0;
/// are we waiting for resource reservation grants form our replicas?
[[nodiscard]] virtual bool is_reserving() const = 0;
/// handle a message carrying a replica map
virtual void map_from_replica(OpRequestRef op) = 0;
virtual void replica_scrub_op(OpRequestRef op) = 0;
virtual void set_op_parameters(const requested_scrub_t&) = 0;
virtual void scrub_clear_state() = 0;
virtual void handle_query_state(ceph::Formatter* f) = 0;
virtual pg_scrubbing_status_t get_schedule() const = 0;
virtual void dump_scrubber(ceph::Formatter* f,
const requested_scrub_t& request_flags) const = 0;
/**
* Return true if soid is currently being scrubbed and pending IOs should
* block. May have a side effect of preempting an in-progress scrub -- will
* return false in that case.
*
* @param soid object to check for ongoing scrub
* @return boolean whether a request on soid should block until scrub
* completion
*/
virtual bool write_blocked_by_scrub(const hobject_t& soid) = 0;
/// Returns whether any objects in the range [begin, end] are being scrubbed
virtual bool range_intersects_scrub(const hobject_t& start,
const hobject_t& end) = 0;
/// the op priority, taken from the primary's request message
virtual Scrub::scrub_prio_t replica_op_priority() const = 0;
/// the priority of the on-going scrub (used when requeuing events)
virtual unsigned int scrub_requeue_priority(
Scrub::scrub_prio_t with_priority) const = 0;
virtual unsigned int scrub_requeue_priority(
Scrub::scrub_prio_t with_priority,
unsigned int suggested_priority) const = 0;
virtual void add_callback(Context* context) = 0;
/// add to scrub statistics, but only if the soid is below the scrub start
virtual void stats_of_handled_objects(const object_stat_sum_t& delta_stats,
const hobject_t& soid) = 0;
/**
* the version of 'scrub_clear_state()' that does not try to invoke FSM
* services (thus can be called from FSM reactions)
*/
virtual void clear_pgscrub_state() = 0;
/**
* triggers the 'RemotesReserved' (all replicas granted scrub resources)
* state-machine event
*/
virtual void send_remotes_reserved(epoch_t epoch_queued) = 0;
/**
* triggers the 'ReservationFailure' (at least one replica denied us the
* requested resources) state-machine event
*/
virtual void send_reservation_failure(epoch_t epoch_queued) = 0;
virtual void cleanup_store(ObjectStore::Transaction* t) = 0;
virtual bool get_store_errors(const scrub_ls_arg_t& arg,
scrub_ls_result_t& res_inout) const = 0;
/**
* force a periodic 'publish_stats_to_osd()' call, to update scrub-related
* counters and statistics.
*/
virtual void update_scrub_stats(
ceph::coarse_real_clock::time_point now_is) = 0;
// --------------- reservations -----------------------------------
/**
* message all replicas with a request to "unreserve" scrub
*/
virtual void unreserve_replicas() = 0;
/**
* "forget" all replica reservations. No messages are sent to the
* previously-reserved.
*
* Used upon interval change. The replicas' state is guaranteed to
* be reset separately by the interval-change event.
*/
virtual void discard_replica_reservations() = 0;
/**
* clear both local and OSD-managed resource reservation flags
*/
virtual void clear_scrub_reservations() = 0;
/**
* Reserve local scrub resources (managed by the OSD)
*
* Fails if OSD's local-scrubs budget was exhausted
* \returns were local resources reserved?
*/
virtual bool reserve_local() = 0;
/**
* Register/de-register with the OSD scrub queue
*
* Following our status as Primary or replica.
*/
virtual void on_primary_change(
std::string_view caller,
const requested_scrub_t& request_flags) = 0;
/**
* Recalculate the required scrub time.
*
* This function assumes that the queue registration status is up-to-date,
* i.e. the OSD "knows our name" if-f we are the Primary.
*/
virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0;
// on the replica:
virtual void handle_scrub_reserve_request(OpRequestRef op) = 0;
virtual void handle_scrub_reserve_release(OpRequestRef op) = 0;
// and on the primary:
virtual void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) = 0;
virtual void handle_scrub_reserve_reject(OpRequestRef op,
pg_shard_t from) = 0;
virtual void rm_from_osd_scrubbing() = 0;
virtual void scrub_requested(scrub_level_t scrub_level,
scrub_type_t scrub_type,
requested_scrub_t& req_flags) = 0;
// --------------- debugging via the asok ------------------------------
virtual int asok_debug(std::string_view cmd,
std::string param,
Formatter* f,
std::stringstream& ss) = 0;
};
|