From 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 20:24:20 +0200 Subject: Adding upstream version 14.2.21. Signed-off-by: Daniel Baumann --- src/mgr/DaemonHealthMetricCollector.cc | 125 +++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 src/mgr/DaemonHealthMetricCollector.cc (limited to 'src/mgr/DaemonHealthMetricCollector.cc') diff --git a/src/mgr/DaemonHealthMetricCollector.cc b/src/mgr/DaemonHealthMetricCollector.cc new file mode 100644 index 00000000..1c3dc431 --- /dev/null +++ b/src/mgr/DaemonHealthMetricCollector.cc @@ -0,0 +1,125 @@ +#include + +#include "include/health.h" +#include "include/types.h" +#include "DaemonHealthMetricCollector.h" + + + +ostream& operator<<(ostream& os, + const DaemonHealthMetricCollector::DaemonKey& daemon) { + return os << daemon.first << "." << daemon.second; +} + +// define operator<<(ostream&, const vector&) after +// ostream& operator<<(ostream&, const DaemonKey&), so that C++'s +// ADL can use the former instead of using the generic one: +// operator<<(ostream&, const std::pair&) +ostream& operator<<( + ostream& os, + const vector& daemons) +{ + os << "["; + for (auto d = daemons.begin(); d != daemons.end(); ++d) { + if (d != daemons.begin()) os << ","; + os << *d; + } + os << "]"; + return os; +} + +namespace { + +class SlowOps final : public DaemonHealthMetricCollector { + bool _is_relevant(daemon_metric type) const override { + return type == daemon_metric::SLOW_OPS; + } + health_check_t& _get_check(health_check_map_t& cm) const override { + return cm.get_or_add("SLOW_OPS", HEALTH_WARN, ""); + } + bool _update(const DaemonKey& daemon, + const DaemonHealthMetric& metric) override { + auto num_slow = metric.get_n1(); + auto blocked_time = metric.get_n2(); + value.n1 += num_slow; + value.n2 = std::max(value.n2, blocked_time); + if (num_slow || blocked_time) { + daemons.push_back(daemon); + return true; + } else { + return false; + } + } + void _summarize(health_check_t& check) const override { + if (daemons.empty()) { + return; + } + static const char* fmt = "%1% slow ops, oldest one blocked for %2% sec, %3%"; + // Note this message format is used in mgr/prometheus, so any change in format + // requires a corresponding change in the mgr/prometheus module. + ostringstream ss; + if (daemons.size() > 1) { + if (daemons.size() > 10) { + ss << "daemons " << vector(daemons.begin(), daemons.begin()+10) + << "..." << " have slow ops."; + } else { + ss << "daemons " << daemons << " have slow ops."; + } + } else { + ss << daemons.front() << " has slow ops"; + } + check.summary = boost::str(boost::format(fmt) % value.n1 % value.n2 % ss.str()); + // No detail + } + vector daemons; +}; + + +class PendingPGs final : public DaemonHealthMetricCollector { + bool _is_relevant(daemon_metric type) const override { + return type == daemon_metric::PENDING_CREATING_PGS; + } + health_check_t& _get_check(health_check_map_t& cm) const override { + return cm.get_or_add("PENDING_CREATING_PGS", HEALTH_WARN, ""); + } + bool _update(const DaemonKey& osd, + const DaemonHealthMetric& metric) override { + value.n += metric.get_n(); + if (metric.get_n()) { + osds.push_back(osd); + return true; + } else { + return false; + } + } + void _summarize(health_check_t& check) const override { + if (osds.empty()) { + return; + } + static const char* fmt = "%1% PGs pending on creation"; + check.summary = boost::str(boost::format(fmt) % value.n); + ostringstream ss; + if (osds.size() > 1) { + ss << "osds " << osds << " have pending PGs."; + } else { + ss << osds.front() << " has pending PGs"; + } + check.detail.push_back(ss.str()); + } + vector osds; +}; + +} // anonymous namespace + +unique_ptr +DaemonHealthMetricCollector::create(daemon_metric m) +{ + switch (m) { + case daemon_metric::SLOW_OPS: + return unique_ptr{new SlowOps}; + case daemon_metric::PENDING_CREATING_PGS: + return unique_ptr{new PendingPGs}; + default: + return unique_ptr{}; + } +} -- cgit v1.2.3