diff options
Diffstat (limited to 'src/osd/scrubber/scrub_backend.h')
-rw-r--r-- | src/osd/scrubber/scrub_backend.h | 554 |
1 files changed, 554 insertions, 0 deletions
diff --git a/src/osd/scrubber/scrub_backend.h b/src/osd/scrubber/scrub_backend.h new file mode 100644 index 000000000..ffb41c27e --- /dev/null +++ b/src/osd/scrubber/scrub_backend.h @@ -0,0 +1,554 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +// clang-format off +/* + +------------------------+ + | | + | PgScrubber | + | |-----------------------------+ + | | | + +------------------------+ | ownes & uses + | PrimaryLogScrub | | + +------------------------+ | + | + | + v + +-------------------------------------------+ + |ScrubBackend | + +----------------+ |============ | + | this_chunk | | | + | (scrub_chunk_t)|<-------| + decode_received_map() | + +----------------+ | + scrub_compare_maps() | + | + scan_snaps() | + | ..... | + | | + | | + +--------------------/-------------\--------+ + --/ / \ + --/ | | + --/ / \ + -/ uses | uses | + uses --/ / \ + --/ / | + --/ | \ + v v v + PgBackend PG/PrimaryLogPG OSD Services + + +*/ +// clang-format on + +#include <fmt/core.h> +#include <fmt/format.h> + +#include <string_view> + +#include "common/LogClient.h" +#include "osd/OSDMap.h" +#include "osd/osd_types_fmt.h" +#include "osd/scrubber_common.h" +#include "osd/SnapMapReaderI.h" + +struct ScrubMap; + +class PG; +class PgScrubber; +struct PGPool; +using Scrub::PgScrubBeListener; + +using data_omap_digests_t = + std::pair<std::optional<uint32_t>, std::optional<uint32_t>>; + +/// a list of fixes to be performed on objects' digests +using digests_fixes_t = std::vector<std::pair<hobject_t, data_omap_digests_t>>; + +using shard_info_map_t = std::map<pg_shard_t, shard_info_wrapper>; +using shard_to_scrubmap_t = std::map<pg_shard_t, ScrubMap>; + +using auth_peers_t = std::vector<std::pair<ScrubMap::object, pg_shard_t>>; + +using wrapped_err_t = + std::variant<inconsistent_obj_wrapper, inconsistent_snapset_wrapper>; +using inconsistent_objs_t = std::vector<wrapped_err_t>; + +/// omap-specific stats +struct omap_stat_t { + int large_omap_objects{0}; + int64_t omap_bytes{0}; + int64_t omap_keys{0}; +}; + +struct error_counters_t { + int shallow_errors{0}; + int deep_errors{0}; +}; + +// the PgScrubber services used by the backend +struct ScrubBeListener { + virtual std::ostream& gen_prefix(std::ostream& out) const = 0; + virtual CephContext* get_pg_cct() const = 0; + virtual LoggerSinkSet& get_logger() const = 0; + virtual bool is_primary() const = 0; + virtual spg_t get_pgid() const = 0; + virtual const OSDMapRef& get_osdmap() const = 0; + virtual void add_to_stats(const object_stat_sum_t& stat) = 0; + virtual void submit_digest_fixes(const digests_fixes_t& fixes) = 0; + virtual ~ScrubBeListener() = default; +}; + +// As the main scrub-backend entry point - scrub_compare_maps() - must +// be able to return both a list of snap fixes and a list of inconsistent +// objects: +struct objs_fix_list_t { + inconsistent_objs_t inconsistent_objs; + std::vector<Scrub::snap_mapper_fix_t> snap_fix_list; +}; + +/** + * A structure used internally by select_auth_object() + * + * Conveys the usability of a specific shard as an auth source. + */ +struct shard_as_auth_t { + // note: 'not_found' differs from 'not_usable' in that 'not_found' + // does not carry an error message to be cluster-logged. + enum class usable_t : uint8_t { not_usable, not_found, usable }; + + // the ctor used when the shard should not be considered as auth + explicit shard_as_auth_t(std::string err_msg) + : possible_auth{usable_t::not_usable} + , error_text{err_msg} + , oi{} + , auth_iter{} + , digest{std::nullopt} + {} + + // the object cannot be found on the shard + explicit shard_as_auth_t() + : possible_auth{usable_t::not_found} + , error_text{} + , oi{} + , auth_iter{} + , digest{std::nullopt} + {} + + shard_as_auth_t(std::string err_msg, std::optional<uint32_t> data_digest) + : possible_auth{usable_t::not_usable} + , error_text{err_msg} + , oi{} + , auth_iter{} + , digest{data_digest} + {} + + // possible auth candidate + shard_as_auth_t(const object_info_t& anoi, + shard_to_scrubmap_t::iterator it, + std::string err_msg, + std::optional<uint32_t> data_digest) + : possible_auth{usable_t::usable} + , error_text{err_msg} + , oi{anoi} + , auth_iter{it} + , digest{data_digest} + {} + + + usable_t possible_auth; + std::string error_text; + object_info_t oi; + shard_to_scrubmap_t::iterator auth_iter; + std::optional<uint32_t> digest; + // when used for Crimson, we'll probably want to return 'digest_match' (and + // other in/out arguments) via this struct +}; + +// the format specifier {D} is used to request debug output +template <> +struct fmt::formatter<shard_as_auth_t> { + template <typename ParseContext> + constexpr auto parse(ParseContext& ctx) + { + auto it = ctx.begin(); + if (it != ctx.end()) { + debug_log = (*it++) == 'D'; + } + return it; + } + template <typename FormatContext> + auto format(shard_as_auth_t const& as_auth, FormatContext& ctx) + { + if (debug_log) { + // note: 'if' chain, as hard to consistently (on all compilers) avoid some + // warnings for a switch plus multiple return paths + if (as_auth.possible_auth == shard_as_auth_t::usable_t::not_usable) { + return fmt::format_to(ctx.out(), + "{{shard-not-usable:{}}}", + as_auth.error_text); + } + if (as_auth.possible_auth == shard_as_auth_t::usable_t::not_found) { + return fmt::format_to(ctx.out(), "{{shard-not-found}}"); + } + return fmt::format_to(ctx.out(), + "{{shard-usable: soid:{} {{txt:{}}} }}", + as_auth.oi.soid, + as_auth.error_text); + + } else { + return fmt::format_to( + ctx.out(), + "usable:{} soid:{} {{txt:{}}}", + (as_auth.possible_auth == shard_as_auth_t::usable_t::usable) ? "yes" + : "no", + as_auth.oi.soid, + as_auth.error_text); + } + } + + bool debug_log{false}; +}; + +struct auth_selection_t { + shard_to_scrubmap_t::iterator auth; ///< an iter into one of this_chunk->maps + pg_shard_t auth_shard; // set to auth->first + object_info_t auth_oi; + shard_info_map_t shard_map; + bool is_auth_available{false}; ///< managed to select an auth' source? + bool digest_match{true}; ///< do all (existing) digests match? +}; + +// note: some scrub tests are sensitive to the specific format of +// auth_selection_t listing in the logs +template <> +struct fmt::formatter<auth_selection_t> { + template <typename ParseContext> + constexpr auto parse(ParseContext& ctx) + { + return ctx.begin(); + } + + template <typename FormatContext> + auto format(auth_selection_t const& aus, FormatContext& ctx) + { + return fmt::format_to(ctx.out(), + " {{AU-S: {}->{:x} OI({:x}:{}) {} dm:{}}} ", + aus.auth->first, + (uint64_t)(&aus.auth->second), + (uint64_t)(&aus.auth_oi), + aus.auth_oi, + aus.shard_map.size(), + aus.digest_match); + } +}; + +/** + * the back-end data that is per-chunk + * + * Created by the Scrubber after all replicas' maps have arrived. + */ +struct scrub_chunk_t { + + explicit scrub_chunk_t(pg_shard_t i_am) { received_maps[i_am] = ScrubMap{}; } + + /// the working set of scrub maps: the received maps, plus + /// Primary's own map. + std::map<pg_shard_t, ScrubMap> received_maps; + + /// a collection of all objs mentioned in the maps + std::set<hobject_t> authoritative_set; + + utime_t started{ceph_clock_now()}; + + digests_fixes_t missing_digest; + + /// Map from object with errors to good peers + std::map<hobject_t, std::list<pg_shard_t>> authoritative; + + inconsistent_objs_t m_inconsistent_objs; + + /// shallow/deep error counters + error_counters_t m_error_counts; + + // these must be reset for each element: + + std::set<pg_shard_t> cur_missing; + std::set<pg_shard_t> cur_inconsistent; + bool fix_digest{false}; +}; + + +/** + * ScrubBackend wraps the data and operations required for the back-end part of + * the scrubbing (i.e. for comparing the maps and fixing objects). + * + * Created anew upon each initiation of a scrub session. + */ +class ScrubBackend { + public: + // Primary constructor + ScrubBackend(ScrubBeListener& scrubber, + PgScrubBeListener& pg, + pg_shard_t i_am, + bool repair, + scrub_level_t shallow_or_deep, + const std::set<pg_shard_t>& acting); + + // Replica constructor: no primary map + ScrubBackend(ScrubBeListener& scrubber, + PgScrubBeListener& pg, + pg_shard_t i_am, + bool repair, + scrub_level_t shallow_or_deep); + + friend class PgScrubber; + friend class TestScrubBackend; + + /** + * reset the per-chunk data structure (scrub_chunk_t). + * Create an empty scrub-map for this shard, and place it + * in the appropriate entry in 'received_maps'. + * + * @returns a pointer to the newly created ScrubMap. + */ + void new_chunk(); + + ScrubMap& get_primary_scrubmap(); + + /** + * sets Backend's m_repair flag (setting m_mode_desc to a corresponding + * string) + */ + void update_repair_status(bool should_repair); + + std::vector<Scrub::snap_mapper_fix_t> replica_clean_meta( + ScrubMap& smap, + bool max_reached, + const hobject_t& start, + Scrub::SnapMapReaderI& snaps_getter); + + /** + * decode the arriving MOSDRepScrubMap message, placing the replica's + * scrub-map into received_maps[from]. + * + * @param from replica + */ + void decode_received_map(pg_shard_t from, const MOSDRepScrubMap& msg); + + objs_fix_list_t scrub_compare_maps(bool max_reached, + Scrub::SnapMapReaderI& snaps_getter); + + int scrub_process_inconsistent(); + + const omap_stat_t& this_scrub_omapstats() const { return m_omap_stats; } + + int authoritative_peers_count() const { return m_auth_peers.size(); }; + + std::ostream& logger_prefix(std::ostream* _dout, const ScrubBackend* t); + + private: + // set/constructed at the ctor(): + ScrubBeListener& m_scrubber; + Scrub::PgScrubBeListener& m_pg; + const pg_shard_t m_pg_whoami; + bool m_repair; + const scrub_level_t m_depth; + const spg_t m_pg_id; + std::vector<pg_shard_t> m_acting_but_me; // primary only + bool m_is_replicated{true}; + std::string_view m_mode_desc; + std::string m_formatted_id; + const PGPool& m_pool; + bool m_incomplete_clones_allowed{false}; + + /// collecting some scrub-session-wide omap stats + omap_stat_t m_omap_stats; + + /// Mapping from object with errors to good peers + std::map<hobject_t, auth_peers_t> m_auth_peers; + + // shorthands: + ConfigProxy& m_conf; + LoggerSinkSet& clog; + + private: + + struct auth_and_obj_errs_t { + std::list<pg_shard_t> auth_list; + std::set<pg_shard_t> object_errors; + }; + + std::optional<scrub_chunk_t> this_chunk; + + /// Maps from objects with errors to missing peers + HobjToShardSetMapping m_missing; // used by scrub_process_inconsistent() + + /// Maps from objects with errors to inconsistent peers + HobjToShardSetMapping m_inconsistent; // used by scrub_process_inconsistent() + + /// Cleaned std::map pending snap metadata scrub + ScrubMap m_cleaned_meta_map{}; + + /// a reference to the primary map + ScrubMap& my_map(); + + /// shallow/deep error counters + error_counters_t get_error_counts() const { return this_chunk->m_error_counts; } + + /** + * merge_to_authoritative_set() updates + * - this_chunk->maps[from] with the replicas' scrub-maps; + * - this_chunk->authoritative_set as a union of all the maps' objects; + */ + void merge_to_authoritative_set(); + + // note: used by both Primary & replicas + static ScrubMap clean_meta_map(ScrubMap& cleaned, bool max_reached); + + void compare_smaps(); + + /// might return error messages to be cluster-logged + std::optional<std::string> compare_obj_in_maps(const hobject_t& ho); + + void omap_checks(); + + std::optional<auth_and_obj_errs_t> for_empty_auth_list( + std::list<pg_shard_t>&& auths, + std::set<pg_shard_t>&& obj_errors, + shard_to_scrubmap_t::iterator auth, + const hobject_t& ho, + std::stringstream& errstream); + + auth_and_obj_errs_t match_in_shards(const hobject_t& ho, + auth_selection_t& auth_sel, + inconsistent_obj_wrapper& obj_result, + std::stringstream& errstream); + + // returns: true if a discrepancy was found + bool compare_obj_details(pg_shard_t auth_shard, + const ScrubMap::object& auth, + const object_info_t& auth_oi, + const ScrubMap::object& candidate, + shard_info_wrapper& shard_result, + inconsistent_obj_wrapper& obj_result, + std::stringstream& errorstream, + bool has_snapset); + + void repair_object(const hobject_t& soid, + const auth_peers_t& ok_peers, + const std::set<pg_shard_t>& bad_peers); + + /** + * An auxiliary used by select_auth_object() to test a specific shard + * as a possible auth candidate. + * @param ho the hobject for which we are looking for an auth source + * @param srd the candidate shard + * @param shard_map [out] a collection of shard_info-s per shard. + * possible_auth_shard() might set error flags in the relevant (this shard's) + * entry. + */ + shard_as_auth_t possible_auth_shard(const hobject_t& ho, + const pg_shard_t& srd, + shard_info_map_t& shard_map); + + auth_selection_t select_auth_object(const hobject_t& ho, + std::stringstream& errstream); + + + enum class digest_fixing_t { no, if_aged, force }; + + /* + * an aux used by inconsistents() to determine whether to fix the digest + */ + [[nodiscard]] digest_fixing_t should_fix_digest( + const hobject_t& ho, + const ScrubMap::object& auth_object, + const object_info_t& auth_oi, + bool repair_flag, + std::stringstream& errstream); + + void inconsistents(const hobject_t& ho, + ScrubMap::object& auth_object, + object_info_t& auth_oi, // consider moving to object + auth_and_obj_errs_t&& auth_n_errs, + std::stringstream& errstream); + + int process_clones_to(const std::optional<hobject_t>& head, + const std::optional<SnapSet>& snapset, + std::optional<snapid_t> target, + std::vector<snapid_t>::reverse_iterator* curclone, + inconsistent_snapset_wrapper& e); + + /** + * Validate consistency of the object info and snap sets. + */ + void scrub_snapshot_metadata(ScrubMap& map); + + /** + * Updates the "global" (i.e. - not 'per-chunk') databases: + * - in m_authoritative: a list of good peers for each "problem" object in + * the current chunk; + * - in m_cleaned_meta_map: a "cleaned" version of the object (the one from + * the selected shard). + */ + void update_authoritative(); + + void log_missing(int missing, + const std::optional<hobject_t>& head, + const char* logged_func_name); + + /** + * returns a list of snaps "fix orders" + */ + std::vector<Scrub::snap_mapper_fix_t> scan_snaps( + ScrubMap& smap, + Scrub::SnapMapReaderI& snaps_getter); + + /** + * an aux used by scan_snaps(), possibly returning a fix-order + * for a specific hobject. + */ + std::optional<Scrub::snap_mapper_fix_t> scan_object_snaps( + const hobject_t& hoid, + const SnapSet& snapset, + Scrub::SnapMapReaderI& snaps_getter); + + // accessing the PG backend for this translation service + uint64_t logical_to_ondisk_size(uint64_t logical_size) const; +}; + +template <> +struct fmt::formatter<data_omap_digests_t> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template <typename FormatContext> + auto format(const data_omap_digests_t& dg, FormatContext& ctx) + { + // can't use value_or() due to different output types + if (std::get<0>(dg).has_value()) { + fmt::format_to(ctx.out(), "[{:#x}/", std::get<0>(dg).value()); + } else { + fmt::format_to(ctx.out(), "[---/"); + } + if (std::get<1>(dg).has_value()) { + return fmt::format_to(ctx.out(), "{:#x}]", std::get<1>(dg).value()); + } else { + return fmt::format_to(ctx.out(), "---]"); + } + } +}; + +template <> +struct fmt::formatter<std::pair<hobject_t, data_omap_digests_t>> { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + + template <typename FormatContext> + auto format(const std::pair<hobject_t, data_omap_digests_t>& x, + FormatContext& ctx) const + { + return fmt::format_to(ctx.out(), + "{{ {} - {} }}", + std::get<0>(x), + std::get<1>(x)); + } +}; |