summaryrefslogtreecommitdiffstats
path: root/src/osd/scrubber/scrub_backend.h
blob: ffb41c27e37ba0636f34141c8b3bbe0dea0eff39 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
#pragma once

// clang-format off
/*
      +------------------------+
      |                        |
      |   PgScrubber           |
      |                        |-----------------------------+
      |                        |                             |
      +------------------------+                             | ownes & uses
      |   PrimaryLogScrub      |                             |
      +------------------------+                             |
                                                             |
                                                             |
                                                             v
                                  +-------------------------------------------+
                                  |ScrubBackend                               |
        +----------------+        |============                               |
        |  this_chunk    |        |                                           |
        | (scrub_chunk_t)|<-------| + decode_received_map()                   |
        +----------------+        | + scrub_compare_maps()                    |
                                  | + scan_snaps()                            |
                                  | .....                                     |
                                  |                                           |
                                  |                                           |
                                  +--------------------/-------------\--------+
                                         --/          /               \
                                      --/             |               |
                                   --/               /                 \
                                 -/            uses  |            uses |
                        uses  --/                   /                   \
                           --/                     /                    |
                        --/                        |                     \
                       v                           v                     v
                  PgBackend             PG/PrimaryLogPG          OSD Services


*/
// clang-format on

#include <fmt/core.h>
#include <fmt/format.h>

#include <string_view>

#include "common/LogClient.h"
#include "osd/OSDMap.h"
#include "osd/osd_types_fmt.h"
#include "osd/scrubber_common.h"
#include "osd/SnapMapReaderI.h"

struct ScrubMap;

class PG;
class PgScrubber;
struct PGPool;
using Scrub::PgScrubBeListener;

using data_omap_digests_t =
  std::pair<std::optional<uint32_t>, std::optional<uint32_t>>;

/// a list of fixes to be performed on objects' digests
using digests_fixes_t = std::vector<std::pair<hobject_t, data_omap_digests_t>>;

using shard_info_map_t = std::map<pg_shard_t, shard_info_wrapper>;
using shard_to_scrubmap_t = std::map<pg_shard_t, ScrubMap>;

using auth_peers_t = std::vector<std::pair<ScrubMap::object, pg_shard_t>>;

using wrapped_err_t =
  std::variant<inconsistent_obj_wrapper, inconsistent_snapset_wrapper>;
using inconsistent_objs_t = std::vector<wrapped_err_t>;

/// omap-specific stats
struct omap_stat_t {
  int large_omap_objects{0};
  int64_t omap_bytes{0};
  int64_t omap_keys{0};
};

struct error_counters_t {
  int shallow_errors{0};
  int deep_errors{0};
};

// the PgScrubber services used by the backend
struct ScrubBeListener {
  virtual std::ostream& gen_prefix(std::ostream& out) const = 0;
  virtual CephContext* get_pg_cct() const = 0;
  virtual LoggerSinkSet& get_logger() const = 0;
  virtual bool is_primary() const = 0;
  virtual spg_t get_pgid() const = 0;
  virtual const OSDMapRef& get_osdmap() const = 0;
  virtual void add_to_stats(const object_stat_sum_t& stat) = 0;
  virtual void submit_digest_fixes(const digests_fixes_t& fixes) = 0;
  virtual ~ScrubBeListener() = default;
};

// As the main scrub-backend entry point - scrub_compare_maps() - must
// be able to return both a list of snap fixes and a list of inconsistent
// objects:
struct objs_fix_list_t {
  inconsistent_objs_t inconsistent_objs;
  std::vector<Scrub::snap_mapper_fix_t> snap_fix_list;
};

/**
 * A structure used internally by select_auth_object()
 *
 * Conveys the usability of a specific shard as an auth source.
 */
struct shard_as_auth_t {
  // note: 'not_found' differs from 'not_usable' in that 'not_found'
  // does not carry an error message to be cluster-logged.
  enum class usable_t : uint8_t { not_usable, not_found, usable };

  // the ctor used when the shard should not be considered as auth
  explicit shard_as_auth_t(std::string err_msg)
      : possible_auth{usable_t::not_usable}
      , error_text{err_msg}
      , oi{}
      , auth_iter{}
      , digest{std::nullopt}
  {}

  // the object cannot be found on the shard
  explicit shard_as_auth_t()
      : possible_auth{usable_t::not_found}
      , error_text{}
      , oi{}
      , auth_iter{}
      , digest{std::nullopt}
  {}

  shard_as_auth_t(std::string err_msg, std::optional<uint32_t> data_digest)
      : possible_auth{usable_t::not_usable}
      , error_text{err_msg}
      , oi{}
      , auth_iter{}
      , digest{data_digest}
  {}

  // possible auth candidate
  shard_as_auth_t(const object_info_t& anoi,
                  shard_to_scrubmap_t::iterator it,
                  std::string err_msg,
                  std::optional<uint32_t> data_digest)
      : possible_auth{usable_t::usable}
      , error_text{err_msg}
      , oi{anoi}
      , auth_iter{it}
      , digest{data_digest}
  {}


  usable_t possible_auth;
  std::string error_text;
  object_info_t oi;
  shard_to_scrubmap_t::iterator auth_iter;
  std::optional<uint32_t> digest;
  // when used for Crimson, we'll probably want to return 'digest_match' (and
  // other in/out arguments) via this struct
};

// the format specifier {D} is used to request debug output
template <>
struct fmt::formatter<shard_as_auth_t> {
  template <typename ParseContext>
  constexpr auto parse(ParseContext& ctx)
  {
    auto it = ctx.begin();
    if (it != ctx.end()) {
      debug_log = (*it++) == 'D';
    }
    return it;
  }
  template <typename FormatContext>
  auto format(shard_as_auth_t const& as_auth, FormatContext& ctx)
  {
    if (debug_log) {
      // note: 'if' chain, as hard to consistently (on all compilers) avoid some
      // warnings for a switch plus multiple return paths
      if (as_auth.possible_auth == shard_as_auth_t::usable_t::not_usable) {
        return fmt::format_to(ctx.out(),
                              "{{shard-not-usable:{}}}",
                              as_auth.error_text);
      }
      if (as_auth.possible_auth == shard_as_auth_t::usable_t::not_found) {
        return fmt::format_to(ctx.out(), "{{shard-not-found}}");
      }
      return fmt::format_to(ctx.out(),
                            "{{shard-usable: soid:{} {{txt:{}}} }}",
                            as_auth.oi.soid,
                            as_auth.error_text);

    } else {
      return fmt::format_to(
        ctx.out(),
        "usable:{} soid:{} {{txt:{}}}",
        (as_auth.possible_auth == shard_as_auth_t::usable_t::usable) ? "yes"
                                                                     : "no",
        as_auth.oi.soid,
        as_auth.error_text);
    }
  }

  bool debug_log{false};
};

struct auth_selection_t {
  shard_to_scrubmap_t::iterator auth;  ///< an iter into one of this_chunk->maps
  pg_shard_t auth_shard;               // set to auth->first
  object_info_t auth_oi;
  shard_info_map_t shard_map;
  bool is_auth_available{false};  ///< managed to select an auth' source?
  bool digest_match{true};        ///< do all (existing) digests match?
};

// note: some scrub tests are sensitive to the specific format of
// auth_selection_t listing in the logs
template <>
struct fmt::formatter<auth_selection_t> {
  template <typename ParseContext>
  constexpr auto parse(ParseContext& ctx)
  {
    return ctx.begin();
  }

  template <typename FormatContext>
  auto format(auth_selection_t const& aus, FormatContext& ctx)
  {
    return fmt::format_to(ctx.out(),
                          " {{AU-S: {}->{:x} OI({:x}:{}) {} dm:{}}} ",
                          aus.auth->first,
                          (uint64_t)(&aus.auth->second),
                          (uint64_t)(&aus.auth_oi),
                          aus.auth_oi,
                          aus.shard_map.size(),
                          aus.digest_match);
  }
};

/**
 * the back-end data that is per-chunk
 *
 * Created by the Scrubber after all replicas' maps have arrived.
 */
struct scrub_chunk_t {

  explicit scrub_chunk_t(pg_shard_t i_am) { received_maps[i_am] = ScrubMap{}; }

  /// the working set of scrub maps: the received maps, plus
  /// Primary's own map.
  std::map<pg_shard_t, ScrubMap> received_maps;

  /// a collection of all objs mentioned in the maps
  std::set<hobject_t> authoritative_set;

  utime_t started{ceph_clock_now()};

  digests_fixes_t missing_digest;

  /// Map from object with errors to good peers
  std::map<hobject_t, std::list<pg_shard_t>> authoritative;

  inconsistent_objs_t m_inconsistent_objs;

  /// shallow/deep error counters
  error_counters_t m_error_counts;

  // these must be reset for each element:

  std::set<pg_shard_t> cur_missing;
  std::set<pg_shard_t> cur_inconsistent;
  bool fix_digest{false};
};


/**
 * ScrubBackend wraps the data and operations required for the back-end part of
 * the scrubbing (i.e. for comparing the maps and fixing objects).
 *
 * Created anew upon each initiation of a scrub session.
 */
class ScrubBackend {
 public:
  // Primary constructor
  ScrubBackend(ScrubBeListener& scrubber,
               PgScrubBeListener& pg,
               pg_shard_t i_am,
               bool repair,
               scrub_level_t shallow_or_deep,
               const std::set<pg_shard_t>& acting);

  // Replica constructor: no primary map
  ScrubBackend(ScrubBeListener& scrubber,
               PgScrubBeListener& pg,
               pg_shard_t i_am,
               bool repair,
               scrub_level_t shallow_or_deep);

  friend class PgScrubber;
  friend class TestScrubBackend;

  /**
   * reset the per-chunk data structure (scrub_chunk_t).
   * Create an empty scrub-map for this shard, and place it
   * in the appropriate entry in 'received_maps'.
   *
   * @returns a pointer to the newly created ScrubMap.
   */
  void new_chunk();

  ScrubMap& get_primary_scrubmap();

  /**
   * sets Backend's m_repair flag (setting m_mode_desc to a corresponding
   * string)
   */
  void update_repair_status(bool should_repair);

  std::vector<Scrub::snap_mapper_fix_t> replica_clean_meta(
    ScrubMap& smap,
    bool max_reached,
    const hobject_t& start,
    Scrub::SnapMapReaderI& snaps_getter);

  /**
   * decode the arriving MOSDRepScrubMap message, placing the replica's
   * scrub-map into received_maps[from].
   *
   * @param from replica
   */
  void decode_received_map(pg_shard_t from, const MOSDRepScrubMap& msg);

  objs_fix_list_t scrub_compare_maps(bool max_reached,
				     Scrub::SnapMapReaderI& snaps_getter);

  int scrub_process_inconsistent();

  const omap_stat_t& this_scrub_omapstats() const { return m_omap_stats; }

  int authoritative_peers_count() const { return m_auth_peers.size(); };

  std::ostream& logger_prefix(std::ostream* _dout, const ScrubBackend* t);

 private:
  // set/constructed at the ctor():
  ScrubBeListener& m_scrubber;
  Scrub::PgScrubBeListener& m_pg;
  const pg_shard_t m_pg_whoami;
  bool m_repair;
  const scrub_level_t m_depth;
  const spg_t m_pg_id;
  std::vector<pg_shard_t> m_acting_but_me;  // primary only
  bool m_is_replicated{true};
  std::string_view m_mode_desc;
  std::string m_formatted_id;
  const PGPool& m_pool;
  bool m_incomplete_clones_allowed{false};

  /// collecting some scrub-session-wide omap stats
  omap_stat_t m_omap_stats;

  /// Mapping from object with errors to good peers
  std::map<hobject_t, auth_peers_t> m_auth_peers;

  // shorthands:
  ConfigProxy& m_conf;
  LoggerSinkSet& clog;

 private:

  struct auth_and_obj_errs_t {
    std::list<pg_shard_t> auth_list;
    std::set<pg_shard_t> object_errors;
  };

  std::optional<scrub_chunk_t> this_chunk;

  /// Maps from objects with errors to missing peers
  HobjToShardSetMapping m_missing;  // used by scrub_process_inconsistent()

  /// Maps from objects with errors to inconsistent peers
  HobjToShardSetMapping m_inconsistent;  // used by scrub_process_inconsistent()

  /// Cleaned std::map pending snap metadata scrub
  ScrubMap m_cleaned_meta_map{};

  /// a reference to the primary map
  ScrubMap& my_map();

  /// shallow/deep error counters
  error_counters_t get_error_counts() const { return this_chunk->m_error_counts; }

  /**
   *  merge_to_authoritative_set() updates
   *   - this_chunk->maps[from] with the replicas' scrub-maps;
   *   - this_chunk->authoritative_set as a union of all the maps' objects;
   */
  void merge_to_authoritative_set();

  // note: used by both Primary & replicas
  static ScrubMap clean_meta_map(ScrubMap& cleaned, bool max_reached);

  void compare_smaps();

  /// might return error messages to be cluster-logged
  std::optional<std::string> compare_obj_in_maps(const hobject_t& ho);

  void omap_checks();

  std::optional<auth_and_obj_errs_t> for_empty_auth_list(
    std::list<pg_shard_t>&& auths,
    std::set<pg_shard_t>&& obj_errors,
    shard_to_scrubmap_t::iterator auth,
    const hobject_t& ho,
    std::stringstream& errstream);

  auth_and_obj_errs_t match_in_shards(const hobject_t& ho,
                                      auth_selection_t& auth_sel,
                                      inconsistent_obj_wrapper& obj_result,
                                      std::stringstream& errstream);

  // returns: true if a discrepancy was found
  bool compare_obj_details(pg_shard_t auth_shard,
                           const ScrubMap::object& auth,
                           const object_info_t& auth_oi,
                           const ScrubMap::object& candidate,
                           shard_info_wrapper& shard_result,
                           inconsistent_obj_wrapper& obj_result,
                           std::stringstream& errorstream,
                           bool has_snapset);

  void repair_object(const hobject_t& soid,
                     const auth_peers_t& ok_peers,
                     const std::set<pg_shard_t>& bad_peers);

  /**
   * An auxiliary used by select_auth_object() to test a specific shard
   * as a possible auth candidate.
   * @param ho        the hobject for which we are looking for an auth source
   * @param srd       the candidate shard
   * @param shard_map [out] a collection of shard_info-s per shard.
   * possible_auth_shard() might set error flags in the relevant (this shard's)
   * entry.
   */
  shard_as_auth_t possible_auth_shard(const hobject_t& ho,
                                      const pg_shard_t& srd,
                                      shard_info_map_t& shard_map);

  auth_selection_t select_auth_object(const hobject_t& ho,
                                      std::stringstream& errstream);


  enum class digest_fixing_t { no, if_aged, force };

  /*
   *  an aux used by inconsistents() to determine whether to fix the digest
   */
  [[nodiscard]] digest_fixing_t should_fix_digest(
    const hobject_t& ho,
    const ScrubMap::object& auth_object,
    const object_info_t& auth_oi,
    bool repair_flag,
    std::stringstream& errstream);

  void inconsistents(const hobject_t& ho,
                     ScrubMap::object& auth_object,
                     object_info_t& auth_oi,  // consider moving to object
                     auth_and_obj_errs_t&& auth_n_errs,
                     std::stringstream& errstream);

  int process_clones_to(const std::optional<hobject_t>& head,
                        const std::optional<SnapSet>& snapset,
                        std::optional<snapid_t> target,
                        std::vector<snapid_t>::reverse_iterator* curclone,
                        inconsistent_snapset_wrapper& e);

  /**
   * Validate consistency of the object info and snap sets.
   */
  void scrub_snapshot_metadata(ScrubMap& map);

  /**
   *  Updates the "global" (i.e. - not 'per-chunk') databases:
   *   - in m_authoritative: a list of good peers for each "problem" object in
   *     the current chunk;
   *   - in m_cleaned_meta_map: a "cleaned" version of the object (the one from
   *     the selected shard).
   */
  void update_authoritative();

  void log_missing(int missing,
                   const std::optional<hobject_t>& head,
                   const char* logged_func_name);

  /**
   * returns a list of snaps "fix orders"
   */
  std::vector<Scrub::snap_mapper_fix_t> scan_snaps(
    ScrubMap& smap,
    Scrub::SnapMapReaderI& snaps_getter);

  /**
   * an aux used by scan_snaps(), possibly returning a fix-order
   * for a specific hobject.
   */
  std::optional<Scrub::snap_mapper_fix_t> scan_object_snaps(
    const hobject_t& hoid,
    const SnapSet& snapset,
    Scrub::SnapMapReaderI& snaps_getter);

  // accessing the PG backend for this translation service
  uint64_t logical_to_ondisk_size(uint64_t logical_size) const;
};

template <>
struct fmt::formatter<data_omap_digests_t> {
  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }

  template <typename FormatContext>
  auto format(const data_omap_digests_t& dg, FormatContext& ctx)
  {
    // can't use value_or() due to different output types
    if (std::get<0>(dg).has_value()) {
      fmt::format_to(ctx.out(), "[{:#x}/", std::get<0>(dg).value());
    } else {
      fmt::format_to(ctx.out(), "[---/");
    }
    if (std::get<1>(dg).has_value()) {
      return fmt::format_to(ctx.out(), "{:#x}]", std::get<1>(dg).value());
    } else {
      return fmt::format_to(ctx.out(), "---]");
    }
  }
};

template <>
struct fmt::formatter<std::pair<hobject_t, data_omap_digests_t>> {
  constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }

  template <typename FormatContext>
  auto format(const std::pair<hobject_t, data_omap_digests_t>& x,
	      FormatContext& ctx) const
  {
    return fmt::format_to(ctx.out(),
			  "{{ {} - {} }}",
			  std::get<0>(x),
			  std::get<1>(x));
  }
};