summaryrefslogtreecommitdiffstats
path: root/qa/tasks/check_counter.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /qa/tasks/check_counter.py
parentInitial commit. (diff)
downloadceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz
ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--qa/tasks/check_counter.py98
1 files changed, 98 insertions, 0 deletions
diff --git a/qa/tasks/check_counter.py b/qa/tasks/check_counter.py
new file mode 100644
index 00000000..daa81973
--- /dev/null
+++ b/qa/tasks/check_counter.py
@@ -0,0 +1,98 @@
+
+import logging
+import json
+
+from teuthology.task import Task
+from teuthology import misc
+
+log = logging.getLogger(__name__)
+
+
+class CheckCounter(Task):
+ """
+ Use this task to validate that some daemon perf counters were
+ incremented by the nested tasks.
+
+ Config:
+ 'cluster_name': optional, specify which cluster
+ 'target': dictionary of daemon type to list of performance counters.
+ 'dry_run': just log the value of the counters, don't fail if they
+ aren't nonzero.
+
+ Success condition is that for all of the named counters, at least
+ one of the daemons of that type has the counter nonzero.
+
+ Example to check cephfs dirfrag splits are happening:
+ - install:
+ - ceph:
+ - ceph-fuse:
+ - check-counter:
+ counters:
+ mds:
+ - "mds.dir_split"
+ - workunit: ...
+ """
+
+ def start(self):
+ log.info("START")
+
+ def end(self):
+ overrides = self.ctx.config.get('overrides', {})
+ misc.deep_merge(self.config, overrides.get('check-counter', {}))
+
+ cluster_name = self.config.get('cluster_name', None)
+ dry_run = self.config.get('dry_run', False)
+ targets = self.config.get('counters', {})
+
+ if cluster_name is None:
+ cluster_name = next(iter(self.ctx.managers.keys()))
+
+ for daemon_type, counters in targets.items():
+ # List of 'a', 'b', 'c'...
+ daemon_ids = list(misc.all_roles_of_type(self.ctx.cluster, daemon_type))
+ daemons = dict([(daemon_id,
+ self.ctx.daemons.get_daemon(daemon_type, daemon_id))
+ for daemon_id in daemon_ids])
+
+ seen = set()
+
+ for daemon_id, daemon in daemons.items():
+ if not daemon.running():
+ log.info("Ignoring daemon {0}, it isn't running".format(daemon_id))
+ continue
+ else:
+ log.debug("Getting stats from {0}".format(daemon_id))
+
+ manager = self.ctx.managers[cluster_name]
+ proc = manager.admin_socket(daemon_type, daemon_id, ["perf", "dump"])
+ response_data = proc.stdout.getvalue().strip()
+ if response_data:
+ perf_dump = json.loads(response_data)
+ else:
+ log.warning("No admin socket response from {0}, skipping".format(daemon_id))
+ continue
+
+ for counter in counters:
+ subsys, counter_id = counter.split(".")
+ if subsys not in perf_dump or counter_id not in perf_dump[subsys]:
+ log.warning("Counter '{0}' not found on daemon {1}.{2}".format(
+ counter, daemon_type, daemon_id))
+ continue
+ value = perf_dump[subsys][counter_id]
+
+ log.info("Daemon {0}.{1} {2}={3}".format(
+ daemon_type, daemon_id, counter, value
+ ))
+
+ if value > 0:
+ seen.add(counter)
+
+ if not dry_run:
+ unseen = set(counters) - set(seen)
+ if unseen:
+ raise RuntimeError("The following counters failed to be set "
+ "on {0} daemons: {1}".format(
+ daemon_type, unseen
+ ))
+
+task = CheckCounter