summaryrefslogtreecommitdiffstats
path: root/src/mgr/DaemonHealthMetricCollector.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/mgr/DaemonHealthMetricCollector.cc
parentInitial commit. (diff)
downloadceph-upstream/18.2.2.tar.xz
ceph-upstream/18.2.2.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/mgr/DaemonHealthMetricCollector.cc')
-rw-r--r--src/mgr/DaemonHealthMetricCollector.cc105
1 files changed, 105 insertions, 0 deletions
diff --git a/src/mgr/DaemonHealthMetricCollector.cc b/src/mgr/DaemonHealthMetricCollector.cc
new file mode 100644
index 000000000..bf206015a
--- /dev/null
+++ b/src/mgr/DaemonHealthMetricCollector.cc
@@ -0,0 +1,105 @@
+#include <fmt/format.h>
+
+#include "include/health.h"
+#include "include/types.h"
+#include "DaemonHealthMetricCollector.h"
+
+namespace {
+
+using std::unique_ptr;
+using std::vector;
+using std::ostringstream;
+
+class SlowOps final : public DaemonHealthMetricCollector {
+ bool _is_relevant(daemon_metric type) const override {
+ return type == daemon_metric::SLOW_OPS;
+ }
+ health_check_t& _get_check(health_check_map_t& cm) const override {
+ return cm.get_or_add("SLOW_OPS", HEALTH_WARN, "", 1);
+ }
+ bool _update(const DaemonKey& daemon,
+ const DaemonHealthMetric& metric) override {
+ auto num_slow = metric.get_n1();
+ auto blocked_time = metric.get_n2();
+ value.n1 += num_slow;
+ value.n2 = std::max(value.n2, blocked_time);
+ if (num_slow || blocked_time) {
+ daemons.push_back(daemon);
+ return true;
+ } else {
+ return false;
+ }
+ }
+ void _summarize(health_check_t& check) const override {
+ if (daemons.empty()) {
+ return;
+ }
+ // Note this message format is used in mgr/prometheus, so any change in format
+ // requires a corresponding change in the mgr/prometheus module.
+ ostringstream ss;
+ if (daemons.size() > 1) {
+ if (daemons.size() > 10) {
+ ss << "daemons " << vector<DaemonKey>(daemons.begin(), daemons.begin()+10)
+ << "..." << " have slow ops.";
+ } else {
+ ss << "daemons " << daemons << " have slow ops.";
+ }
+ } else {
+ ss << daemons.front() << " has slow ops";
+ }
+ check.summary =
+ fmt::format("{} slow ops, oldest one blocked for {} sec, {}",
+ value.n1, value.n2, ss.str());
+ // No detail
+ }
+ vector<DaemonKey> daemons;
+};
+
+
+class PendingPGs final : public DaemonHealthMetricCollector {
+ bool _is_relevant(daemon_metric type) const override {
+ return type == daemon_metric::PENDING_CREATING_PGS;
+ }
+ health_check_t& _get_check(health_check_map_t& cm) const override {
+ return cm.get_or_add("PENDING_CREATING_PGS", HEALTH_WARN, "", 1);
+ }
+ bool _update(const DaemonKey& osd,
+ const DaemonHealthMetric& metric) override {
+ value.n += metric.get_n();
+ if (metric.get_n()) {
+ osds.push_back(osd);
+ return true;
+ } else {
+ return false;
+ }
+ }
+ void _summarize(health_check_t& check) const override {
+ if (osds.empty()) {
+ return;
+ }
+ check.summary = fmt::format("{} PGs pending on creation", value.n);
+ ostringstream ss;
+ if (osds.size() > 1) {
+ ss << "osds " << osds << " have pending PGs.";
+ } else {
+ ss << osds.front() << " has pending PGs";
+ }
+ check.detail.push_back(ss.str());
+ }
+ vector<DaemonKey> osds;
+};
+
+} // anonymous namespace
+
+unique_ptr<DaemonHealthMetricCollector>
+DaemonHealthMetricCollector::create(daemon_metric m)
+{
+ switch (m) {
+ case daemon_metric::SLOW_OPS:
+ return std::make_unique<SlowOps>();
+ case daemon_metric::PENDING_CREATING_PGS:
+ return std::make_unique<PendingPGs>();
+ default:
+ return {};
+ }
+}