diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 06:53:20 +0000 |
commit | e5a812082ae033afb1eed82c0f2df3d0f6bdc93f (patch) | |
tree | a6716c9275b4b413f6c9194798b34b91affb3cc7 /python | |
parent | Initial commit. (diff) | |
download | pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.tar.xz pacemaker-e5a812082ae033afb1eed82c0f2df3d0f6bdc93f.zip |
Adding upstream version 2.1.6.upstream/2.1.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'python')
-rw-r--r-- | python/Makefile.am | 20 | ||||
-rw-r--r-- | python/pacemaker/Makefile.am | 17 | ||||
-rw-r--r-- | python/pacemaker/__init__.py | 9 | ||||
-rw-r--r-- | python/pacemaker/_cts/CTS.py | 244 | ||||
-rw-r--r-- | python/pacemaker/_cts/Makefile.am | 24 | ||||
-rw-r--r-- | python/pacemaker/_cts/__init__.py | 6 | ||||
-rw-r--r-- | python/pacemaker/_cts/corosync.py | 169 | ||||
-rw-r--r-- | python/pacemaker/_cts/environment.py | 651 | ||||
-rw-r--r-- | python/pacemaker/_cts/errors.py | 53 | ||||
-rw-r--r-- | python/pacemaker/_cts/logging.py | 130 | ||||
-rw-r--r-- | python/pacemaker/_cts/patterns.py | 408 | ||||
-rw-r--r-- | python/pacemaker/_cts/process.py | 76 | ||||
-rw-r--r-- | python/pacemaker/_cts/remote.py | 288 | ||||
-rw-r--r-- | python/pacemaker/_cts/test.py | 594 | ||||
-rw-r--r-- | python/pacemaker/_cts/watcher.py | 551 | ||||
-rw-r--r-- | python/pacemaker/buildoptions.py.in | 57 | ||||
-rw-r--r-- | python/pacemaker/exitstatus.py | 59 | ||||
-rw-r--r-- | python/pylintrc | 556 | ||||
-rw-r--r-- | python/setup.py.in | 20 | ||||
-rw-r--r-- | python/tests/Makefile.am | 12 | ||||
-rw-r--r-- | python/tests/test_exitstatus.py | 14 |
21 files changed, 3958 insertions, 0 deletions
diff --git a/python/Makefile.am b/python/Makefile.am new file mode 100644 index 0000000..6cefb63 --- /dev/null +++ b/python/Makefile.am @@ -0,0 +1,20 @@ +# +# Copyright 2023 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +MAINTAINERCLEANFILES = Makefile.in + +EXTRA_DIST = pylintrc + +SUBDIRS = pacemaker tests + +check-local: + $(PYTHON) -m unittest discover -v -s tests + +pylint: + pylint $(SUBDIRS) diff --git a/python/pacemaker/Makefile.am b/python/pacemaker/Makefile.am new file mode 100644 index 0000000..f209bba --- /dev/null +++ b/python/pacemaker/Makefile.am @@ -0,0 +1,17 @@ +# +# Copyright 2023 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +MAINTAINERCLEANFILES = Makefile.in + +pkgpython_PYTHON = __init__.py \ + exitstatus.py + +nodist_pkgpython_PYTHON = buildoptions.py + +SUBDIRS = _cts diff --git a/python/pacemaker/__init__.py b/python/pacemaker/__init__.py new file mode 100644 index 0000000..e5d992e --- /dev/null +++ b/python/pacemaker/__init__.py @@ -0,0 +1,9 @@ +""" +API reference documentation for the `pacemaker` package. +""" + +__copyright__ = "Copyright 2023 the Pacemaker project contributors" +__license__ = "GNU Lesser General Public License version 2.1 or later (LGPLv2.1+)" + +from pacemaker.buildoptions import BuildOptions +from pacemaker.exitstatus import ExitStatus diff --git a/python/pacemaker/_cts/CTS.py b/python/pacemaker/_cts/CTS.py new file mode 100644 index 0000000..4ca7e59 --- /dev/null +++ b/python/pacemaker/_cts/CTS.py @@ -0,0 +1,244 @@ +""" Main classes for Pacemaker's Cluster Test Suite (CTS) """ + +__all__ = ["CtsLab", "NodeStatus", "Process"] +__copyright__ = "Copyright 2000-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" + +import sys +import time +import traceback + +from pacemaker.exitstatus import ExitStatus +from pacemaker._cts.environment import EnvFactory +from pacemaker._cts.logging import LogFactory +from pacemaker._cts.remote import RemoteFactory + +class CtsLab: + """ A class that defines the Lab Environment for the Cluster Test System. + It defines those things which are expected to change from test + environment to test environment for the same cluster manager. + + This is where you define the set of nodes that are in your test lab, + what kind of reset mechanism you use, etc. All this data is stored + as key/value pairs in an Environment instance constructed from arguments + passed to this class. + + The CTS code ignores names it doesn't know about or need. Individual + tests have access to this information, and it is perfectly acceptable + to provide hints, tweaks, fine-tuning directions, or other information + to the tests through this mechanism. + """ + + def __init__(self, args=None): + """ Create a new CtsLab instance. This class can be treated kind + of like a dictionary due to the presence of typical dict functions + like has_key, __getitem__, and __setitem__. However, it is not a + dictionary so do not rely on standard dictionary behavior. + + Arguments: + + args -- A list of command line parameters, minus the program name. + """ + + self._env = EnvFactory().getInstance(args) + self._logger = LogFactory() + + def dump(self): + """ Print the current environment """ + + self._env.dump() + + def has_key(self, key): + """ Does the given environment key exist? """ + + return key in list(self._env.keys()) + + def __getitem__(self, key): + """ Return the given environment key, or raise KeyError if it does + not exist + """ + + # Throughout this file, pylint has trouble understanding that EnvFactory + # and RemoteFactory are singleton instances that can be treated as callable + # and subscriptable objects. Various warnings are disabled because of this. + # See also a comment about self._rsh in environment.py. + # pylint: disable=unsubscriptable-object + return self._env[key] + + def __setitem__(self, key, value): + """ Set the given environment key to the given value, overriding any + previous value + """ + + # pylint: disable=unsupported-assignment-operation + self._env[key] = value + + def run(self, scenario, iterations): + """ Run the given scenario the given number of times. + + Returns: + + ExitStatus.OK on success, or ExitStatus.ERROR on error + """ + + if not scenario: + self._logger.log("No scenario was defined") + return ExitStatus.ERROR + + self._logger.log("Cluster nodes: ") + # pylint: disable=unsubscriptable-object + for node in self._env["nodes"]: + self._logger.log(" * %s" % (node)) + + if not scenario.SetUp(): + return ExitStatus.ERROR + + # We want to alert on any exceptions caused by running a scenario, so + # here it's okay to disable the pylint warning. + # pylint: disable=bare-except + try: + scenario.run(iterations) + except: + self._logger.log("Exception by %s" % sys.exc_info()[0]) + self._logger.traceback(traceback) + + scenario.summarize() + scenario.TearDown() + return ExitStatus.ERROR + + scenario.TearDown() + scenario.summarize() + + if scenario.Stats["failure"] > 0: + return ExitStatus.ERROR + + if scenario.Stats["success"] != iterations: + self._logger.log("No failure count but success != requested iterations") + return ExitStatus.ERROR + + return ExitStatus.OK + + +class NodeStatus: + """ A class for querying the status of cluster nodes - are nodes up? Do + they respond to SSH connections? + """ + + def __init__(self, env): + """ Create a new NodeStatus instance + + Arguments: + + env -- An Environment instance + """ + self._env = env + + def _node_booted(self, node): + """ Return True if the given node is booted (responds to pings) """ + + # pylint: disable=not-callable + (rc, _) = RemoteFactory().getInstance()("localhost", "ping -nq -c1 -w1 %s" % node, verbose=0) + return rc == 0 + + def _sshd_up(self, node): + """ Return true if sshd responds on the given node """ + + # pylint: disable=not-callable + (rc, _) = RemoteFactory().getInstance()(node, "true", verbose=0) + return rc == 0 + + def wait_for_node(self, node, timeout=300): + """ Wait for a node to become available. Should the timeout be reached, + the user will be given a choice whether to continue or not. If not, + ValueError will be raised. + + Returns: + + True when the node is available, or False if the timeout is reached. + """ + + initial_timeout = timeout + anytimeouts = False + + while timeout > 0: + if self._node_booted(node) and self._sshd_up(node): + if anytimeouts: + # Fudge to wait for the system to finish coming up + time.sleep(30) + LogFactory().debug("Node %s now up" % node) + + return True + + time.sleep(30) + if not anytimeouts: + LogFactory().debug("Waiting for node %s to come up" % node) + + anytimeouts = True + timeout -= 1 + + LogFactory().log("%s did not come up within %d tries" % (node, initial_timeout)) + if self._env["continue"]: + answer = "Y" + else: + try: + answer = input('Continue? [nY]') + except EOFError: + answer = "n" + + if answer and answer == "n": + raise ValueError("%s did not come up within %d tries" % (node, initial_timeout)) + + return False + + def wait_for_all_nodes(self, nodes, timeout=300): + """ Return True when all nodes come up, or False if the timeout is reached """ + + for node in nodes: + if not self.wait_for_node(node, timeout): + return False + + return True + + +class Process: + """ A class for managing a Pacemaker daemon """ + + # pylint: disable=invalid-name + def __init__(self, cm, name, dc_only=False, pats=None, dc_pats=None, + badnews_ignore=None): + """ Create a new Process instance. + + Arguments: + + cm -- A ClusterManager instance + name -- The command being run + dc_only -- Should this daemon be killed only on the DC? + pats -- Regexes we expect to find in log files + dc_pats -- Additional DC-specific regexes we expect to find + in log files + badnews_ignore -- Regexes for lines in the log that can be ignored + """ + + self._cm = cm + self.badnews_ignore = badnews_ignore + self.dc_only = dc_only + self.dc_pats = dc_pats + self.name = name + self.pats = pats + + if self.badnews_ignore is None: + self.badnews_ignore = [] + + if self.dc_pats is None: + self.dc_pats = [] + + if self.pats is None: + self.pats = [] + + def kill(self, node): + """ Kill the instance of this process running on the given node """ + + (rc, _) = self._cm.rsh(node, "killall -9 %s" % self.name) + + if rc != 0: + self._cm.log ("ERROR: Kill %s failed on node %s" % (self.name, node)) diff --git a/python/pacemaker/_cts/Makefile.am b/python/pacemaker/_cts/Makefile.am new file mode 100644 index 0000000..3b3e3f8 --- /dev/null +++ b/python/pacemaker/_cts/Makefile.am @@ -0,0 +1,24 @@ +# +# Copyright 2023 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +MAINTAINERCLEANFILES = Makefile.in + +pkgpythondir = $(pythondir)/$(PACKAGE)/_cts + +pkgpython_PYTHON = CTS.py \ + __init__.py \ + corosync.py \ + environment.py \ + errors.py \ + logging.py \ + patterns.py \ + process.py \ + remote.py \ + test.py \ + watcher.py diff --git a/python/pacemaker/_cts/__init__.py b/python/pacemaker/_cts/__init__.py new file mode 100644 index 0000000..dfc05ad --- /dev/null +++ b/python/pacemaker/_cts/__init__.py @@ -0,0 +1,6 @@ +""" +Internal Python API for the `pacemaker` package. +""" + +__copyright__ = "Copyright 2023 the Pacemaker project contributors" +__license__ = "GNU Lesser General Public License version 2.1 or later (LGPLv2.1+)" diff --git a/python/pacemaker/_cts/corosync.py b/python/pacemaker/_cts/corosync.py new file mode 100644 index 0000000..aabaecd --- /dev/null +++ b/python/pacemaker/_cts/corosync.py @@ -0,0 +1,169 @@ +""" A module providing functions for manipulating corosync """ + +__all__ = ["Corosync", "localname"] +__copyright__ = "Copyright 2009-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+)" + +import os +import subprocess +import time + +from pacemaker.buildoptions import BuildOptions +from pacemaker._cts.process import killall, stdout_from_command + + +AUTOGEN_COROSYNC_TEMPLATE = """ +totem { + version: 2 + cluster_name: %s + crypto_cipher: none + crypto_hash: none + transport: udp +} + +nodelist { + node { + nodeid: 1 + name: %s + ring0_addr: 127.0.0.1 + } +} + +logging { + debug: off + to_syslog: no + to_stderr: no + to_logfile: yes + logfile: %s +} +""" + + +def corosync_cfg_exists(): + """ Does the corosync config file exist? """ + + return os.path.exists(BuildOptions.COROSYNC_CONFIG_FILE) + + +def corosync_log_file(cfgfile): + """ Where does corosync log to? """ + + with open(cfgfile, "r", encoding="utf-8") as f: + for line in f.readlines(): + # "to_logfile:" could also be in the config file, so check for a + # slash to make sure it's a path we're looking at. + if "logfile: /" in line: + return line.split()[-1] + + return None + + +def generate_corosync_cfg(logdir, cluster_name, node_name): + """ Generate the corosync config file, if it does not already exist """ + + if corosync_cfg_exists(): + return False + + logfile = os.path.join(logdir, "corosync.log") + + with open(BuildOptions.COROSYNC_CONFIG_FILE, "w", encoding="utf-8") as corosync_cfg: + corosync_cfg.write(AUTOGEN_COROSYNC_TEMPLATE % (cluster_name, node_name, logfile)) + + return True + + +def localname(): + """ Return the uname of the local host """ + + our_uname = stdout_from_command(["uname", "-n"]) + if our_uname: + our_uname = our_uname[0] + else: + our_uname = "localhost" + + return our_uname + + +class Corosync: + """ A class for managing corosync processes and config files """ + + def __init__(self, verbose, logdir, cluster_name): + """ Create a new Corosync instance. + + Arguments: + + verbose -- Whether to print the corosync log file + logdir -- The base directory under which to store log files + cluster_name -- The name of the cluster + """ + + self.verbose = verbose + self.logdir = logdir + self.cluster_name = cluster_name + + self._generated_cfg_file = False + + def _ready(self, logfile, timeout=10): + """ Is corosync ready to go? """ + + i = 0 + + while i < timeout: + with open(logfile, "r", encoding="utf-8") as corosync_log: + for line in corosync_log.readlines(): + if line.endswith("ready to provide service.\n"): + # Even once the line is in the log file, we may still need to wait just + # a little bit longer before corosync is really ready to go. + time.sleep(1) + return + + time.sleep(1) + i += 1 + + raise TimeoutError + + def start(self, kill_first=False, timeout=10): + """ Start the corosync process + + Arguments: + + kill_first -- Whether to kill any pre-existing corosync processes before + starting a new one + timeout -- If corosync does not start within this many seconds, raise + TimeoutError + """ + + if kill_first: + killall(["corosync"]) + + self._generated_cfg_file = generate_corosync_cfg(self.logdir, + self.cluster_name, localname()) + logfile = corosync_log_file(BuildOptions.COROSYNC_CONFIG_FILE) + + if self.verbose: + print("Starting corosync") + + with subprocess.Popen("corosync", stdout=subprocess.PIPE) as test: + test.wait() + + # Wait for corosync to be ready before returning + self._ready(logfile, timeout=timeout) + + def stop(self): + """ Stop the corosync process """ + + killall(["corosync"]) + + # If we did not write out the corosync config file, don't do anything else. + if not self._generated_cfg_file: + return + + if self.verbose: + print("Corosync output") + + logfile = corosync_log_file(BuildOptions.COROSYNC_CONFIG_FILE) + with open(logfile, "r", encoding="utf-8") as corosync_log: + for line in corosync_log.readlines(): + print(line.strip()) + + os.remove(BuildOptions.COROSYNC_CONFIG_FILE) diff --git a/python/pacemaker/_cts/environment.py b/python/pacemaker/_cts/environment.py new file mode 100644 index 0000000..e4d70e6 --- /dev/null +++ b/python/pacemaker/_cts/environment.py @@ -0,0 +1,651 @@ +""" Test environment classes for Pacemaker's Cluster Test Suite (CTS) """ + +__all__ = ["EnvFactory"] +__copyright__ = "Copyright 2014-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" + +import argparse +import os +import random +import socket +import sys +import time + +from pacemaker._cts.logging import LogFactory +from pacemaker._cts.remote import RemoteFactory +from pacemaker._cts.watcher import LogKind + +class Environment: + """ A class for managing the CTS environment, consisting largely of processing + and storing command line parameters + """ + + # pylint doesn't understand that self._rsh is callable (it stores the + # singleton instance of RemoteExec, as returned by the getInstance method + # of RemoteFactory). It's possible we could fix this with type annotations, + # but those were introduced with python 3.5 and we only support python 3.4. + # I think we could also fix this by getting rid of the getInstance methods, + # but that's a project for another day. For now, just disable the warning. + # pylint: disable=not-callable + + def __init__(self, args): + """ Create a new Environment instance. This class can be treated kind + of like a dictionary due to the presence of typical dict functions + like has_key, __getitem__, and __setitem__. However, it is not a + dictionary so do not rely on standard dictionary behavior. + + Arguments: + + args -- A list of command line parameters, minus the program name. + If None, sys.argv will be used. + """ + + self.data = {} + self._nodes = [] + + # Set some defaults before processing command line arguments. These are + # either not set by any command line parameter, or they need a default + # that can't be set in add_argument. + self["DeadTime"] = 300 + self["StartTime"] = 300 + self["StableTime"] = 30 + self["tests"] = [] + self["IPagent"] = "IPaddr2" + self["DoFencing"] = True + self["ClobberCIB"] = False + self["CIBfilename"] = None + self["CIBResource"] = False + self["LogWatcher"] = LogKind.ANY + self["node-limit"] = 0 + self["scenario"] = "random" + + self.random_gen = random.Random() + + self._logger = LogFactory() + self._rsh = RemoteFactory().getInstance() + self._target = "localhost" + + self._seed_random() + self._parse_args(args) + + if not self["ListTests"]: + self._validate() + self._discover() + + def _seed_random(self, seed=None): + """ Initialize the random number generator with the given seed, or use + the current time if None + """ + + if not seed: + seed = int(time.time()) + + self["RandSeed"] = seed + self.random_gen.seed(str(seed)) + + def dump(self): + """ Print the current environment """ + + keys = [] + for key in list(self.data.keys()): + keys.append(key) + + keys.sort() + for key in keys: + s = "Environment[%s]" % key + self._logger.debug("{key:35}: {val}".format(key=s, val=str(self[key]))) + + def keys(self): + """ Return a list of all environment keys stored in this instance """ + + return list(self.data.keys()) + + def has_key(self, key): + """ Does the given environment key exist? """ + + if key == "nodes": + return True + + return key in self.data + + def __getitem__(self, key): + """ Return the given environment key, or None if it does not exist """ + + if str(key) == "0": + raise ValueError("Bad call to 'foo in X', should reference 'foo in X.keys()' instead") + + if key == "nodes": + return self._nodes + + if key == "Name": + return self._get_stack_short() + + if key in self.data: + return self.data[key] + + return None + + def __setitem__(self, key, value): + """ Set the given environment key to the given value, overriding any + previous value + """ + + if key == "Stack": + self._set_stack(value) + + elif key == "node-limit": + self.data[key] = value + self._filter_nodes() + + elif key == "nodes": + self._nodes = [] + for node in value: + # I don't think I need the IP address, etc. but this validates + # the node name against /etc/hosts and/or DNS, so it's a + # GoodThing(tm). + try: + n = node.strip() + socket.gethostbyname_ex(n) + self._nodes.append(n) + except: + self._logger.log("%s not found in DNS... aborting" % node) + raise + + self._filter_nodes() + + else: + self.data[key] = value + + def random_node(self): + """ Choose a random node from the cluster """ + + return self.random_gen.choice(self["nodes"]) + + def _set_stack(self, name): + """ Normalize the given cluster stack name """ + + if name in ["corosync", "cs", "mcp"]: + self.data["Stack"] = "corosync 2+" + + else: + raise ValueError("Unknown stack: %s" % name) + + def _get_stack_short(self): + """ Return the short name for the currently set cluster stack """ + + if "Stack" not in self.data: + return "unknown" + + if self.data["Stack"] == "corosync 2+": + return "crm-corosync" + + LogFactory().log("Unknown stack: %s" % self["stack"]) + raise ValueError("Unknown stack: %s" % self["stack"]) + + def _detect_systemd(self): + """ Detect whether systemd is in use on the target node """ + + if "have_systemd" not in self.data: + (rc, _) = self._rsh(self._target, "systemctl list-units", verbose=0) + self["have_systemd"] = rc == 0 + + def _detect_syslog(self): + """ Detect the syslog variant in use on the target node """ + + if "syslogd" not in self.data: + if self["have_systemd"]: + # Systemd + (_, lines) = self._rsh(self._target, r"systemctl list-units | grep syslog.*\.service.*active.*running | sed 's:.service.*::'", verbose=1) + self["syslogd"] = lines[0].strip() + else: + # SYS-V + (_, lines) = self._rsh(self._target, "chkconfig --list | grep syslog.*on | awk '{print $1}' | head -n 1", verbose=1) + self["syslogd"] = lines[0].strip() + + if "syslogd" not in self.data or not self["syslogd"]: + # default + self["syslogd"] = "rsyslog" + + def disable_service(self, node, service): + """ Disable the given service on the given node """ + + if self["have_systemd"]: + # Systemd + (rc, _) = self._rsh(node, "systemctl disable %s" % service) + return rc + + # SYS-V + (rc, _) = self._rsh(node, "chkconfig %s off" % service) + return rc + + def enable_service(self, node, service): + """ Enable the given service on the given node """ + + if self["have_systemd"]: + # Systemd + (rc, _) = self._rsh(node, "systemctl enable %s" % service) + return rc + + # SYS-V + (rc, _) = self._rsh(node, "chkconfig %s on" % service) + return rc + + def service_is_enabled(self, node, service): + """ Is the given service enabled on the given node? """ + + if self["have_systemd"]: + # Systemd + + # With "systemctl is-enabled", we should check if the service is + # explicitly "enabled" instead of the return code. For example it returns + # 0 if the service is "static" or "indirect", but they don't really count + # as "enabled". + (rc, _) = self._rsh(node, "systemctl is-enabled %s | grep enabled" % service) + return rc == 0 + + # SYS-V + (rc, _) = self._rsh(node, "chkconfig --list | grep -e %s.*on" % service) + return rc == 0 + + def _detect_at_boot(self): + """ Detect if the cluster starts at boot """ + + if "at-boot" not in self.data: + self["at-boot"] = self.service_is_enabled(self._target, "corosync") \ + or self.service_is_enabled(self._target, "pacemaker") + + def _detect_ip_offset(self): + """ Detect the offset for IPaddr resources """ + + if self["CIBResource"] and "IPBase" not in self.data: + (_, lines) = self._rsh(self._target, "ip addr | grep inet | grep -v -e link -e inet6 -e '/32' -e ' lo' | awk '{print $2}'", verbose=0) + network = lines[0].strip() + + (_, lines) = self._rsh(self._target, "nmap -sn -n %s | grep 'scan report' | awk '{print $NF}' | sed 's:(::' | sed 's:)::' | sort -V | tail -n 1" % network, verbose=0) + + try: + self["IPBase"] = lines[0].strip() + except (IndexError, TypeError): + self["IPBase"] = None + + if not self["IPBase"]: + self["IPBase"] = " fe80::1234:56:7890:1000" + self._logger.log("Could not determine an offset for IPaddr resources. Perhaps nmap is not installed on the nodes.") + self._logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"]) + return + + # pylint thinks self["IPBase"] is a list, not a string, which causes it + # to error out because a list doesn't have split(). + # pylint: disable=no-member + if int(self["IPBase"].split('.')[3]) >= 240: + self._logger.log("Could not determine an offset for IPaddr resources. Upper bound is too high: %s %s" + % (self["IPBase"], self["IPBase"].split('.')[3])) + self["IPBase"] = " fe80::1234:56:7890:1000" + self._logger.log("Defaulting to '%s', use --test-ip-base to override" % self["IPBase"]) + + def _filter_nodes(self): + """ If --limit-nodes is given, keep that many nodes from the front of the + list of cluster nodes and drop the rest + """ + + if self["node-limit"] > 0: + if len(self["nodes"]) > self["node-limit"]: + # pylint thinks self["node-limit"] is a list even though we initialize + # it as an int in __init__ and treat it as an int everywhere. + # pylint: disable=bad-string-format-type + self._logger.log("Limiting the number of nodes configured=%d (max=%d)" + %(len(self["nodes"]), self["node-limit"])) + + while len(self["nodes"]) > self["node-limit"]: + self["nodes"].pop(len(self["nodes"])-1) + + def _validate(self): + """ Were we given all the required command line parameters? """ + + if not self["nodes"]: + raise ValueError("No nodes specified!") + + def _discover(self): + """ Probe cluster nodes to figure out how to log and manage services """ + + self._target = random.Random().choice(self["nodes"]) + + exerciser = socket.gethostname() + + # Use the IP where possible to avoid name lookup failures + for ip in socket.gethostbyname_ex(exerciser)[2]: + if ip != "127.0.0.1": + exerciser = ip + break + + self["cts-exerciser"] = exerciser + + self._detect_systemd() + self._detect_syslog() + self._detect_at_boot() + self._detect_ip_offset() + + def _parse_args(self, argv): + """ Parse and validate command line parameters, setting the appropriate + values in the environment dictionary. If argv is None, use sys.argv + instead. + """ + + if not argv: + argv = sys.argv[1:] + + parser = argparse.ArgumentParser(epilog="%s -g virt1 -r --stonith ssh --schema pacemaker-2.0 500" % sys.argv[0]) + + grp1 = parser.add_argument_group("Common options") + grp1.add_argument("-g", "--dsh-group", "--group", + metavar="GROUP", dest="group", + help="Use the nodes listed in the named DSH group (~/.dsh/groups/$name)") + grp1.add_argument("-l", "--limit-nodes", + type=int, default=0, + metavar="MAX", + help="Only use the first MAX cluster nodes supplied with --nodes") + grp1.add_argument("--benchmark", + action="store_true", + help="Add timing information") + grp1.add_argument("--list", "--list-tests", + action="store_true", dest="list_tests", + help="List the valid tests") + grp1.add_argument("--nodes", + metavar="NODES", + help="List of cluster nodes separated by whitespace") + grp1.add_argument("--stack", + default="corosync", + metavar="STACK", + help="Which cluster stack is installed") + + grp2 = parser.add_argument_group("Options that CTS will usually auto-detect correctly") + grp2.add_argument("-L", "--logfile", + metavar="PATH", + help="Where to look for logs from cluster nodes") + grp2.add_argument("--at-boot", "--cluster-starts-at-boot", + choices=["1", "0", "yes", "no"], + help="Does the cluster software start at boot time?") + grp2.add_argument("--facility", "--syslog-facility", + default="daemon", + metavar="NAME", + help="Which syslog facility to log to") + grp2.add_argument("--ip", "--test-ip-base", + metavar="IP", + help="Offset for generated IP address resources") + + grp3 = parser.add_argument_group("Options for release testing") + grp3.add_argument("-r", "--populate-resources", + action="store_true", + help="Generate a sample configuration") + grp3.add_argument("--choose", + metavar="NAME", + help="Run only the named test") + grp3.add_argument("--fencing", "--stonith", + choices=["1", "0", "yes", "no", "lha", "openstack", "rhcs", "rhevm", "scsi", "ssh", "virt", "xvm"], + default="1", + help="What fencing agent to use") + grp3.add_argument("--once", + action="store_true", + help="Run all valid tests once") + + grp4 = parser.add_argument_group("Additional (less common) options") + grp4.add_argument("-c", "--clobber-cib", + action="store_true", + help="Erase any existing configuration") + grp4.add_argument("-y", "--yes", + action="store_true", dest="always_continue", + help="Continue to run whenever prompted") + grp4.add_argument("--boot", + action="store_true", + help="") + grp4.add_argument("--bsc", + action="store_true", + help="") + grp4.add_argument("--cib-filename", + metavar="PATH", + help="Install the given CIB file to the cluster") + grp4.add_argument("--container-tests", + action="store_true", + help="Include pacemaker_remote tests that run in lxc container resources") + grp4.add_argument("--experimental-tests", + action="store_true", + help="Include experimental tests") + grp4.add_argument("--loop-minutes", + type=int, default=60, + help="") + grp4.add_argument("--no-loop-tests", + action="store_true", + help="Don't run looping/time-based tests") + grp4.add_argument("--no-unsafe-tests", + action="store_true", + help="Don't run tests that are unsafe for use with ocfs2/drbd") + grp4.add_argument("--notification-agent", + metavar="PATH", + default="/var/lib/pacemaker/notify.sh", + help="Script to configure for Pacemaker alerts") + grp4.add_argument("--notification-recipient", + metavar="R", + default="/var/lib/pacemaker/notify.log", + help="Recipient to pass to alert script") + grp4.add_argument("--oprofile", + metavar="NODES", + help="List of cluster nodes to run oprofile on") + grp4.add_argument("--outputfile", + metavar="PATH", + help="Location to write logs to") + grp4.add_argument("--qarsh", + action="store_true", + help="Use QARSH to access nodes instead of SSH") + grp4.add_argument("--schema", + metavar="SCHEMA", + default="pacemaker-3.0", + help="Create a CIB conforming to the given schema") + grp4.add_argument("--seed", + metavar="SEED", + help="Use the given string as the random number seed") + grp4.add_argument("--set", + action="append", + metavar="ARG", + default=[], + help="Set key=value pairs (can be specified multiple times)") + grp4.add_argument("--stonith-args", + metavar="ARGS", + default="hostlist=all,livedangerously=yes", + help="") + grp4.add_argument("--stonith-type", + metavar="TYPE", + default="external/ssh", + help="") + grp4.add_argument("--trunc", + action="store_true", dest="truncate", + help="Truncate log file before starting") + grp4.add_argument("--valgrind-procs", + metavar="PROCS", + default="pacemaker-attrd pacemaker-based pacemaker-controld pacemaker-execd pacemaker-fenced pacemaker-schedulerd", + help="Run valgrind against the given space-separated list of processes") + grp4.add_argument("--valgrind-tests", + action="store_true", + help="Include tests using valgrind") + grp4.add_argument("--warn-inactive", + action="store_true", + help="Warn if a resource is assigned to an inactive node") + + parser.add_argument("iterations", + nargs='?', + type=int, default=1, + help="Number of tests to run") + + args = parser.parse_args(args=argv) + + # Set values on this object based on what happened with command line + # processing. This has to be done in several blocks. + + # These values can always be set. They get a default from the add_argument + # calls, only do one thing, and they do not have any side effects. + self["ClobberCIB"] = args.clobber_cib + self["ListTests"] = args.list_tests + self["Schema"] = args.schema + self["Stack"] = args.stack + self["SyslogFacility"] = args.facility + self["TruncateLog"] = args.truncate + self["at-boot"] = args.at_boot in ["1", "yes"] + self["benchmark"] = args.benchmark + self["continue"] = args.always_continue + self["container-tests"] = args.container_tests + self["experimental-tests"] = args.experimental_tests + self["iterations"] = args.iterations + self["loop-minutes"] = args.loop_minutes + self["loop-tests"] = not args.no_loop_tests + self["notification-agent"] = args.notification_agent + self["notification-recipient"] = args.notification_recipient + self["node-limit"] = args.limit_nodes + self["stonith-params"] = args.stonith_args + self["stonith-type"] = args.stonith_type + self["unsafe-tests"] = not args.no_unsafe_tests + self["valgrind-procs"] = args.valgrind_procs + self["valgrind-tests"] = args.valgrind_tests + self["warn-inactive"] = args.warn_inactive + + # Nodes and groups are mutually exclusive, so their defaults cannot be + # set in their add_argument calls. Additionally, groups does more than + # just set a value. Here, set nodes first and then if a group is + # specified, override the previous nodes value. + if args.nodes: + self["nodes"] = args.nodes.split(" ") + else: + self["nodes"] = [] + + if args.group: + self["OutputFile"] = "%s/cluster-%s.log" % (os.environ['HOME'], args.dsh_group) + LogFactory().add_file(self["OutputFile"], "CTS") + + dsh_file = "%s/.dsh/group/%s" % (os.environ['HOME'], args.dsh_group) + + if os.path.isfile(dsh_file): + self["nodes"] = [] + + with open(dsh_file, "r", encoding="utf-8") as f: + for line in f: + l = line.strip() + + if not l.startswith('#'): + self["nodes"].append(l) + else: + print("Unknown DSH group: %s" % args.dsh_group) + + # Everything else either can't have a default set in an add_argument + # call (likely because we don't want to always have a value set for it) + # or it does something fancier than just set a single value. However, + # order does not matter for these as long as the user doesn't provide + # conflicting arguments on the command line. So just do Everything + # alphabetically. + if args.boot: + self["scenario"] = "boot" + + if args.bsc: + self["DoBSC"] = True + self["scenario"] = "basic-sanity" + + if args.cib_filename: + self["CIBfilename"] = args.cib_filename + else: + self["CIBfilename"] = None + + if args.choose: + self["scenario"] = "sequence" + self["tests"].append(args.choose) + + if args.fencing: + if args.fencing in ["0", "no"]: + self["DoFencing"] = False + else: + self["DoFencing"] = True + + if args.fencing in ["rhcs", "virt", "xvm"]: + self["stonith-type"] = "fence_xvm" + + elif args.fencing == "scsi": + self["stonith-type"] = "fence_scsi" + + elif args.fencing in ["lha", "ssh"]: + self["stonith-params"] = "hostlist=all,livedangerously=yes" + self["stonith-type"] = "external/ssh" + + elif args.fencing == "openstack": + self["stonith-type"] = "fence_openstack" + + print("Obtaining OpenStack credentials from the current environment") + self["stonith-params"] = "region=%s,tenant=%s,auth=%s,user=%s,password=%s" % ( + os.environ['OS_REGION_NAME'], + os.environ['OS_TENANT_NAME'], + os.environ['OS_AUTH_URL'], + os.environ['OS_USERNAME'], + os.environ['OS_PASSWORD'] + ) + + elif args.fencing == "rhevm": + self["stonith-type"] = "fence_rhevm" + + print("Obtaining RHEV-M credentials from the current environment") + self["stonith-params"] = "login=%s,passwd=%s,ipaddr=%s,ipport=%s,ssl=1,shell_timeout=10" % ( + os.environ['RHEVM_USERNAME'], + os.environ['RHEVM_PASSWORD'], + os.environ['RHEVM_SERVER'], + os.environ['RHEVM_PORT'], + ) + + if args.ip: + self["CIBResource"] = True + self["ClobberCIB"] = True + self["IPBase"] = args.ip + + if args.logfile: + self["LogAuditDisabled"] = True + self["LogFileName"] = args.logfile + self["LogWatcher"] = LogKind.REMOTE_FILE + else: + # We can't set this as the default on the parser.add_argument call + # for this option because then args.logfile will be set, which means + # the above branch will be taken and those other values will also be + # set. + self["LogFileName"] = "/var/log/messages" + + if args.once: + self["scenario"] = "all-once" + + if args.oprofile: + self["oprofile"] = args.oprofile.split(" ") + else: + self["oprofile"] = [] + + if args.outputfile: + self["OutputFile"] = args.outputfile + LogFactory().add_file(self["OutputFile"]) + + if args.populate_resources: + self["CIBResource"] = True + self["ClobberCIB"] = True + + if args.qarsh: + self._rsh.enable_qarsh() + + for kv in args.set: + (name, value) = kv.split("=") + self[name] = value + print("Setting %s = %s" % (name, value)) + +class EnvFactory: + """ A class for constructing a singleton instance of an Environment object """ + + instance = None + + # pylint: disable=invalid-name + def getInstance(self, args=None): + """ Returns the previously created instance of Environment, or creates a + new instance if one does not already exist. + """ + + if not EnvFactory.instance: + EnvFactory.instance = Environment(args) + + return EnvFactory.instance diff --git a/python/pacemaker/_cts/errors.py b/python/pacemaker/_cts/errors.py new file mode 100644 index 0000000..2e245e7 --- /dev/null +++ b/python/pacemaker/_cts/errors.py @@ -0,0 +1,53 @@ +""" A module providing custom exception classes used throughout the pacemaker library """ + +__all__ = ["ExitCodeError", "OutputFoundError", "OutputNotFoundError", "XmlValidationError"] +__copyright__ = "Copyright 2009-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+)" + + +class TestError(Exception): + """ Base class for exceptions in this module """ + + +class ExitCodeError(TestError): + """ Exception raised when command exit status is unexpected """ + + def __init__(self, exit_code): + TestError.__init__(self) + self.exit_code = exit_code + + def __str__(self): + return repr(self.exit_code) + + +class OutputNotFoundError(TestError): + """ Exception raised when command output does not contain wanted string """ + + def __init__(self, output): + TestError.__init__(self) + self.output = output + + def __str__(self): + return repr(self.output) + + +class OutputFoundError(TestError): + """ Exception raised when command output contains unwanted string """ + + def __init__(self, output): + TestError.__init__(self) + self.output = output + + def __str__(self): + return repr(self.output) + + +class XmlValidationError(TestError): + """ Exception raised when xmllint fails """ + + def __init__(self, output): + TestError.__init__(self) + self.output = output + + def __str__(self): + return repr(self.output) diff --git a/python/pacemaker/_cts/logging.py b/python/pacemaker/_cts/logging.py new file mode 100644 index 0000000..d9f3012 --- /dev/null +++ b/python/pacemaker/_cts/logging.py @@ -0,0 +1,130 @@ +""" Logging classes for Pacemaker's Cluster Test Suite (CTS) """ + +__all__ = ["LogFactory"] +__copyright__ = "Copyright 2014-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" + +import os +import sys +import time + + +class Logger: + """ Abstract class to use as parent for CTS logging classes """ + + TimeFormat = "%b %d %H:%M:%S\t" + + def __init__(self, filename=None, tag=None): + # Whether this logger should print debug messages + self._debug_target = True + + self._logfile = filename + + if tag: + self._source = tag + ": " + else: + self._source = "" + + def __call__(self, lines): + """ Log specified messages """ + + raise ValueError("Abstract class member (__call__)") + + def write(self, line): + """ Log a single line excluding trailing whitespace """ + + return self(line.rstrip()) + + def writelines(self, lines): + """ Log a series of lines excluding trailing whitespace """ + + for line in lines: + self.write(line) + + @property + def is_debug_target(self): + """ Return True if this logger should receive debug messages """ + + return self._debug_target + + +class StdErrLog(Logger): + """ Class to log to standard error """ + + def __init__(self, filename, tag): + Logger.__init__(self, filename, tag) + self._debug_target = False + + def __call__(self, lines): + """ Log specified lines to stderr """ + + timestamp = time.strftime(Logger.TimeFormat, + time.localtime(time.time())) + if isinstance(lines, str): + lines = [lines] + + for line in lines: + print("%s%s" % (timestamp, line), file=sys.__stderr__) + + sys.__stderr__.flush() + + +class FileLog(Logger): + """ Class to log to a file """ + + def __init__(self, filename, tag): + Logger.__init__(self, filename, tag) + self._hostname = os.uname()[1] + + def __call__(self, lines): + """ Log specified lines to the file """ + + with open(self._logfile, "at", encoding="utf-8") as logf: + timestamp = time.strftime(Logger.TimeFormat, + time.localtime(time.time())) + + if isinstance(lines, str): + lines = [lines] + + for line in lines: + print("%s%s %s%s" % (timestamp, self._hostname, self._source, line), + file=logf) + + +class LogFactory: + """ Singleton to log messages to various destinations """ + + log_methods = [] + have_stderr = False + + def add_file(self, filename, tag=None): + """ When logging messages, log them to specified file """ + + if filename: + LogFactory.log_methods.append(FileLog(filename, tag)) + + def add_stderr(self): + """ When logging messages, log them to standard error """ + + if not LogFactory.have_stderr: + LogFactory.have_stderr = True + LogFactory.log_methods.append(StdErrLog(None, None)) + + def log(self, args): + """ Log a message (to all configured log destinations) """ + + for logfn in LogFactory.log_methods: + logfn(args.strip()) + + def debug(self, args): + """ Log a debug message (to all configured log destinations) """ + + for logfn in LogFactory.log_methods: + if logfn.is_debug_target: + logfn("debug: %s" % args.strip()) + + def traceback(self, traceback): + """ Log a stack trace (to all configured log destinations) """ + + for logfn in LogFactory.log_methods: + traceback.print_exc(50, logfn) diff --git a/python/pacemaker/_cts/patterns.py b/python/pacemaker/_cts/patterns.py new file mode 100644 index 0000000..880477a --- /dev/null +++ b/python/pacemaker/_cts/patterns.py @@ -0,0 +1,408 @@ +""" Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS) """ + +__all__ = ["PatternSelector"] +__copyright__ = "Copyright 2008-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+)" + +import argparse + +from pacemaker.buildoptions import BuildOptions + +class BasePatterns: + """ The base class for holding a stack-specific set of command and log + file/stdout patterns. Stack-specific classes need to be built on top + of this one. + """ + + def __init__(self): + """ Create a new BasePatterns instance which holds a very minimal set of + basic patterns. + """ + + self._bad_news = [] + self._components = {} + self._name = "crm-base" + + self._ignore = [ + "avoid confusing Valgrind", + + # Logging bug in some versions of libvirtd + r"libvirtd.*: internal error: Failed to parse PCI config address", + + # pcs can log this when node is fenced, but fencing is OK in some + # tests (and we will catch it in pacemaker logs when not OK) + r"pcs.daemon:No response from: .* request: get_configs, error:", + ] + + self._commands = { + "StatusCmd" : "crmadmin -t 60 -S %s 2>/dev/null", + "CibQuery" : "cibadmin -Ql", + "CibAddXml" : "cibadmin --modify -c --xml-text %s", + "CibDelXpath" : "cibadmin --delete --xpath %s", + "RscRunning" : BuildOptions.DAEMON_DIR + "/cts-exec-helper -R -r %s", + "CIBfile" : "%s:" + BuildOptions.CIB_DIR + "/cib.xml", + "TmpDir" : "/tmp", + + "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", + "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", + + "MaintenanceModeOn" : "cibadmin --modify -c --xml-text '<cluster_property_set id=\"cib-bootstrap-options\"><nvpair id=\"cts-maintenance-mode-setting\" name=\"maintenance-mode\" value=\"true\"/></cluster_property_set>'", + "MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", + + "StandbyCmd" : "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null", + "StandbyQueryCmd" : "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null", + } + + self._search = { + "Pat:DC_IDLE" : r"pacemaker-controld.*State transition.*-> S_IDLE", + + # This won't work if we have multiple partitions + "Pat:Local_started" : r"%s\W.*controller successfully started", + "Pat:NonDC_started" : r"%s\W.*State transition.*-> S_NOT_DC", + "Pat:DC_started" : r"%s\W.*State transition.*-> S_IDLE", + "Pat:We_stopped" : r"%s\W.*OVERRIDE THIS PATTERN", + "Pat:They_stopped" : r"%s\W.*LOST:.* %s ", + "Pat:They_dead" : r"node %s.*: is dead", + "Pat:They_up" : r"%s %s\W.*OVERRIDE THIS PATTERN", + "Pat:TransitionComplete" : "Transition status: Complete: complete", + + "Pat:Fencing_start" : r"Requesting peer fencing .* targeting %s", + "Pat:Fencing_ok" : r"pacemaker-fenced.*:\s*Operation .* targeting %s by .* for .*@.*: OK", + "Pat:Fencing_recover" : r"pacemaker-schedulerd.*: Recover\s+%s", + "Pat:Fencing_active" : r"stonith resource .* is active on 2 nodes (attempting recovery)", + "Pat:Fencing_probe" : r"pacemaker-controld.* Result of probe operation for %s on .*: Error", + + "Pat:RscOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok", + "Pat:RscOpFail" : r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of %s ", + "Pat:CloneOpFail" : r"pacemaker-schedulerd.*:.*Unexpected result .* recorded for %s of (%s|%s) ", + "Pat:RscRemoteOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok", + "Pat:NodeFenced" : r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK", + } + + def get_component(self, key): + """ Return the patterns for a single component as a list, given by key. + This is typically the name of some subprogram (pacemaker-based, + pacemaker-fenced, etc.) or various special purpose keys. If key is + unknown, return an empty list. + """ + + if key in self._components: + return self._components[key] + + print("Unknown component '%s' for %s" % (key, self._name)) + return [] + + def get_patterns(self, key): + """ Return various patterns supported by this object, given by key. + Depending on the key, this could either be a list or a hash. If key + is unknown, return None. + """ + + if key == "BadNews": + return self._bad_news + if key == "BadNewsIgnore": + return self._ignore + if key == "Commands": + return self._commands + if key == "Search": + return self._search + if key == "Components": + return self._components + + print("Unknown pattern '%s' for %s" % (key, self._name)) + return None + + def __getitem__(self, key): + if key == "Name": + return self._name + if key in self._commands: + return self._commands[key] + if key in self._search: + return self._search[key] + + print("Unknown template '%s' for %s" % (key, self._name)) + return None + + +class Corosync2Patterns(BasePatterns): + """ Patterns for Corosync version 2 cluster manager class """ + + def __init__(self): + BasePatterns.__init__(self) + self._name = "crm-corosync" + + self._commands.update({ + "StartCmd" : "service corosync start && service pacemaker start", + "StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop", + + "EpochCmd" : "crm_node -e", + "QuorumCmd" : "crm_node -q", + "PartitionCmd" : "crm_node -p", + }) + + self._search.update({ + # Close enough ... "Corosync Cluster Engine exiting normally" isn't + # printed reliably. + "Pat:We_stopped" : r"%s\W.*Unloading all Corosync service engines", + "Pat:They_stopped" : r"%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost", + "Pat:They_dead" : r"pacemaker-controld.*Node %s(\[|\s).*state is now lost", + "Pat:They_up" : r"\W%s\W.*pacemaker-controld.*Node %s state is now member", + + "Pat:ChildExit" : r"\[[0-9]+\] exited with status [0-9]+ \(", + # "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes() + "Pat:ChildKilled" : r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated( with signal 9|$)", + "Pat:ChildRespawn" : r"%s\W.*pacemakerd.*Respawning %s subdaemon after unexpected exit", + + "Pat:InfraUp" : r"%s\W.*corosync.*Initializing transport", + "Pat:PacemakerUp" : r"%s\W.*pacemakerd.*Starting Pacemaker", + }) + + self._ignore += [ + r"crm_mon:", + r"crmadmin:", + r"update_trace_data", + r"async_notify:.*strange, client not found", + r"Parse error: Ignoring unknown option .*nodename", + r"error.*: Operation 'reboot' .* using FencingFail returned ", + r"getinfo response error: 1$", + r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE", + r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)", + ] + + self._bad_news = [ + r"[^(]error:", + r"crit:", + r"ERROR:", + r"CRIT:", + r"Shutting down...NOW", + r"Timer I_TERMINATE just popped", + r"input=I_ERROR", + r"input=I_FAIL", + r"input=I_INTEGRATED cause=C_TIMER_POPPED", + r"input=I_FINALIZED cause=C_TIMER_POPPED", + r"input=I_ERROR", + r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting", + r"schedulerd.*Attempting recovery of resource", + r"is taking more than 2x its timeout", + r"Confirm not received from", + r"Welcome reply not received from", + r"Attempting to schedule .* after a stop", + r"Resource .* was active at shutdown", + r"duplicate entries for call_id", + r"Search terminated:", + r":global_timer_callback", + r"Faking parameter digest creation", + r"Parameters to .* action changed:", + r"Parameters to .* changed", + r"pacemakerd.*\[[0-9]+\] terminated( with signal| as IPC server|$)", + r"pacemaker-schedulerd.*Recover\s+.*\(.* -\> .*\)", + r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting", + r"Peer is not part of our cluster", + r"We appear to be in an election loop", + r"Unknown node -> we will not deliver message", + r"(Blackbox dump requested|Problem detected)", + r"pacemakerd.*Could not connect to Cluster Configuration Database API", + r"Receiving messages from a node we think is dead", + r"share the same cluster nodeid", + r"share the same name", + + r"pacemaker-controld:.*Transition failed: terminated", + r"Local CIB .* differs from .*:", + r"warn.*:\s*Continuing but .* will NOT be used", + r"warn.*:\s*Cluster configuration file .* is corrupt", + r"Election storm", + r"stalled the FSA with pending inputs", + ] + + self._components["common-ignore"] = [ + r"Pending action:", + r"resource( was|s were) active at shutdown", + r"pending LRM operations at shutdown", + r"Lost connection to the CIB manager", + r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported", + r"pacemaker-controld.*:\s*Performing A_EXIT_1 - forcefully exiting ", + r".*:\s*Requesting fencing \([^)]+\) targeting node ", + r"(Blackbox dump requested|Problem detected)", + ] + + self._components["corosync-ignore"] = [ + r"Could not connect to Corosync CFG: CS_ERR_LIBRARY", + r"error:.*Connection to the CPG API failed: Library error", + r"\[[0-9]+\] exited with status [0-9]+ \(", + r"\[[0-9]+\] terminated with signal 15", + r"pacemaker-based.*error:.*Corosync connection lost", + r"pacemaker-fenced.*error:.*Corosync connection terminated", + r"pacemaker-controld.*State transition .* S_RECOVERY", + r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state", + r"pacemaker-controld.*error:.*Could not recover from internal error", + r"error:.*Connection to cib_(shm|rw).* (failed|closed)", + r"error:.*cib_(shm|rw) IPC provider disconnected while waiting", + r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", + r"crit: Fencing daemon connection failed", + # This is overbroad, but we don't have a way to say that only + # certain transition errors are acceptable (if the fencer respawns, + # fence devices may appear multiply active). We have to rely on + # other causes of a transition error logging their own error + # message, which is the usual practice. + r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["corosync"] = [ + # We expect each daemon to lose its cluster connection. + # However, if the CIB manager loses its connection first, + # it's possible for another daemon to lose that connection and + # exit before losing the cluster connection. + r"pacemakerd.*:\s*warning:.*Lost connection to cluster layer", + r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", + r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer", + r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", + r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", + r"schedulerd.*Scheduling node .* for fencing", + r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK", + ] + + self._components["pacemaker-based"] = [ + r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102", + r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1", + r"pacemakerd.* Respawning pacemaker-attrd subdaemon after unexpected exit", + r"pacemakerd.* Respawning pacemaker-based subdaemon after unexpected exit", + r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", + r"pacemakerd.* Respawning pacemaker-fenced subdaemon after unexpected exit", + r"pacemaker-.* Connection to cib_.* (failed|closed)", + r"pacemaker-attrd.*:.*Lost connection to the CIB manager", + r"pacemaker-controld.*:.*Lost connection to the CIB manager", + r"pacemaker-controld.*I_ERROR.*handle_cib_disconnect", + r"pacemaker-controld.* State transition .* S_RECOVERY", + r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", + r"pacemaker-controld.*Could not recover from internal error", + ] + + self._components["pacemaker-based-ignore"] = [ + r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", + r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", + r"pacemaker-controld.*:Could not connect to attrd: Connection refused", + # This is overbroad, but we don't have a way to say that only + # certain transition errors are acceptable (if the fencer respawns, + # fence devices may appear multiply active). We have to rely on + # other causes of a transition error logging their own error + # message, which is the usual practice. + r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["pacemaker-execd"] = [ + r"pacemaker-controld.*Connection to executor failed", + r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy", + r"pacemaker-controld.*State transition .* S_RECOVERY", + r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", + r"pacemaker-controld.*Could not recover from internal error", + r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1", + r"pacemakerd.* Respawning pacemaker-execd subdaemon after unexpected exit", + r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", + ] + + self._components["pacemaker-execd-ignore"] = [ + r"pacemaker-(attrd|controld).*Connection to lrmd.* (failed|closed)", + r"pacemaker-(attrd|controld).*Could not execute alert", + ] + + self._components["pacemaker-controld"] = [ + r"State transition .* -> S_IDLE", + ] + + self._components["pacemaker-controld-ignore"] = [] + self._components["pacemaker-attrd"] = [] + self._components["pacemaker-attrd-ignore"] = [] + + self._components["pacemaker-schedulerd"] = [ + r"State transition .* S_RECOVERY", + r"pacemakerd.* Respawning pacemaker-controld subdaemon after unexpected exit", + r"pacemaker-controld\[[0-9]+\] exited with status 1 \(", + r"Connection to the scheduler failed", + r"pacemaker-controld.*I_ERROR.*save_cib_contents", + r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", + r"pacemaker-controld.*Could not recover from internal error", + ] + + self._components["pacemaker-schedulerd-ignore"] = [ + r"Connection to pengine.* (failed|closed)", + ] + + self._components["pacemaker-fenced"] = [ + r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", + r"Fencing daemon connection failed", + r"pacemaker-controld.*Fencer successfully connected", + ] + + self._components["pacemaker-fenced-ignore"] = [ + r"(error|warning):.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", + r"crit:.*Fencing daemon connection failed", + r"error:.*Fencer connection failed \(will retry\)", + r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", + # This is overbroad, but we don't have a way to say that only + # certain transition errors are acceptable (if the fencer respawns, + # fence devices may appear multiply active). We have to rely on + # other causes of a transition error logging their own error + # message, which is the usual practice. + r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["pacemaker-fenced-ignore"].extend(self._components["common-ignore"]) + + +patternVariants = { + "crm-base": BasePatterns, + "crm-corosync": Corosync2Patterns +} + + +class PatternSelector: + """ A class for choosing one of several Pattern objects and then extracting + various pieces of information from that object + """ + + def __init__(self, name="crm-corosync"): + """ Create a new PatternSelector object by instantiating whatever class + is given by name. Defaults to Corosync2Patterns for "crm-corosync" or + None. While other objects could be supported in the future, only this + and the base object are supported at this time. + """ + + self._name = name + + # If no name was given, use the default. Otherwise, look up the appropriate + # class in patternVariants, instantiate it, and use that. + if not name: + self._base = Corosync2Patterns() + else: + self._base = patternVariants[name]() + + def get_patterns(self, kind): + """ Call get_patterns on the previously instantiated pattern object """ + + return self._base.get_patterns(kind) + + def get_template(self, key): + """ Return a single pattern from the previously instantiated pattern + object as a string, or None if no pattern exists for the given key. + """ + + return self._base[key] + + def get_component(self, kind): + """ Call get_component on the previously instantiated pattern object """ + + return self._base.get_component(kind) + + def __getitem__(self, key): + return self.get_template(key) + + +# PYTHONPATH=python python python/pacemaker/_cts/patterns.py -k crm-corosync -t StartCmd +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-k", "--kind", metavar="KIND") + parser.add_argument("-t", "--template", metavar="TEMPLATE") + + args = parser.parse_args() + + print(PatternSelector(args.kind)[args.template]) diff --git a/python/pacemaker/_cts/process.py b/python/pacemaker/_cts/process.py new file mode 100644 index 0000000..2940b71 --- /dev/null +++ b/python/pacemaker/_cts/process.py @@ -0,0 +1,76 @@ +""" A module for managing and communicating with external processes """ + +__all__ = ["killall", "exit_if_proc_running", "pipe_communicate", "stdout_from_command"] +__copyright__ = "Copyright 2009-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+)" + +import subprocess +import sys + +import psutil + +from pacemaker.exitstatus import ExitStatus + +def killall(process_names, terminate=False): + """ Kill all instances of every process in a list """ + + if not process_names: + return + + if not isinstance(process_names, list): + process_names = [process_names] + + procs = [] + for proc in psutil.process_iter(["name"]): + if proc.info["name"] in process_names: + procs.append(proc) + + if terminate: + for proc in procs: + proc.terminate() + _, alive = psutil.wait_procs(procs, timeout=3) + procs = alive + + for proc in procs: + proc.kill() + + +def is_proc_running(process_name): + """ Check whether a process with a given name is running """ + + for proc in psutil.process_iter(["name"]): + if proc.info["name"] == process_name: + return True + return False + + +def exit_if_proc_running(process_name): + """ Exit with error if a given process is running """ + + if is_proc_running(process_name): + print("Error: %s is already running!" % process_name) + print("Run %s only when the cluster is stopped." % sys.argv[0]) + sys.exit(ExitStatus.ERROR) + + +def pipe_communicate(pipes, check_stderr=False, stdin=None): + """ Get text output from pipes """ + + if stdin is not None: + pipe_outputs = pipes.communicate(input=stdin.encode()) + else: + pipe_outputs = pipes.communicate() + + output = pipe_outputs[0].decode(sys.stdout.encoding) + if check_stderr: + output = output + pipe_outputs[1].decode(sys.stderr.encoding) + + return output + + +def stdout_from_command(args): + """ Execute command and return its standard output """ + + with subprocess.Popen(args, stdout=subprocess.PIPE) as p: + p.wait() + return pipe_communicate(p).split("\n") diff --git a/python/pacemaker/_cts/remote.py b/python/pacemaker/_cts/remote.py new file mode 100644 index 0000000..99d2ed7 --- /dev/null +++ b/python/pacemaker/_cts/remote.py @@ -0,0 +1,288 @@ +""" Remote command runner for Pacemaker's Cluster Test Suite (CTS) """ + +__all__ = ["RemoteExec", "RemoteFactory"] +__copyright__ = "Copyright 2014-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" + +import re +import os + +from subprocess import Popen,PIPE +from threading import Thread + +from pacemaker._cts.logging import LogFactory + +def convert2string(lines): + """ Convert a byte string to a UTF-8 string, and a list of byte strings to + a list of UTF-8 strings. All other text formats are passed through. + """ + + if isinstance(lines, bytes): + return lines.decode("utf-8") + + if isinstance(lines, list): + lst = [] + for line in lines: + if isinstance(line, bytes): + line = line.decode("utf-8") + + lst.append(line) + + return lst + + return lines + +class AsyncCmd(Thread): + """ A class for doing the hard work of running a command on another machine """ + + def __init__(self, node, command, proc=None, delegate=None): + """ Create a new AsyncCmd instance + + Arguments: + + node -- The remote machine to run on + command -- The ssh command string to use for remote execution + proc -- If not None, a process object previously created with Popen. + Instead of spawning a new process, we will then wait on + this process to finish and handle its output. + delegate -- When the command completes, call the async_complete method + on this object + """ + + self._command = command + self._delegate = delegate + self._logger = LogFactory() + self._node = node + self._proc = proc + + Thread.__init__(self) + + def run(self): + """ Run the previously instantiated AsyncCmd object """ + + out = None + err = None + + if not self._proc: + # pylint: disable=consider-using-with + self._proc = Popen(self._command, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True) + + self._logger.debug("cmd: async: target=%s, pid=%d: %s" % (self._node, self._proc.pid, self._command)) + self._proc.wait() + + if self._delegate: + self._logger.debug("cmd: pid %d returned %d to %s" % (self._proc.pid, self._proc.returncode, repr(self._delegate))) + else: + self._logger.debug("cmd: pid %d returned %d" % (self._proc.pid, self._proc.returncode)) + + if self._proc.stderr: + err = self._proc.stderr.readlines() + self._proc.stderr.close() + + for line in err: + self._logger.debug("cmd: stderr[%d]: %s" % (self._proc.pid, line)) + + err = convert2string(err) + + if self._proc.stdout: + out = self._proc.stdout.readlines() + self._proc.stdout.close() + out = convert2string(out) + + if self._delegate: + self._delegate.async_complete(self._proc.pid, self._proc.returncode, out, err) + +class RemoteExec: + """ An abstract class for remote execution. It runs a command on another + machine using ssh and scp. + """ + + def __init__(self, command, cp_command, silent=False): + """ Create a new RemoteExec instance + + Arguments: + + command -- The ssh command string to use for remote execution + cp_command -- The scp command string to use for copying files + silent -- Should we log command status? + """ + + self._command = command + self._cp_command = cp_command + self._logger = LogFactory() + self._silent = silent + self._our_node = os.uname()[1].lower() + + def _fixcmd(self, cmd): + """ Perform shell escapes on certain characters in the input cmd string """ + + return re.sub("\'", "'\\''", cmd) + + def _cmd(self, args): + """ Given a list of arguments, return the string that will be run on the + remote system + """ + + sysname = args[0] + command = args[1] + + if sysname is None or sysname.lower() == self._our_node or sysname == "localhost": + ret = command + else: + ret = "%s %s '%s'" % (self._command, sysname, self._fixcmd(command)) + + return ret + + def _log(self, args): + """ Log a message """ + + if not self._silent: + self._logger.log(args) + + def _debug(self, args): + """ Log a message at the debug level """ + + if not self._silent: + self._logger.debug(args) + + def call_async(self, node, command, delegate=None): + """ Run the given command on the given remote system and do not wait for + it to complete. + + Arguments: + + node -- The remote machine to run on + command -- The command to run, as a string + delegate -- When the command completes, call the async_complete method + on this object + + Returns: + + The running process object + """ + + aproc = AsyncCmd(node, self._cmd([node, command]), delegate=delegate) + aproc.start() + return aproc + + def __call__(self, node, command, synchronous=True, verbose=2): + """ Run the given command on the given remote system. If you call this class + like a function, this is what gets called. It's approximately the same + as a system() call on the remote machine. + + Arguments: + + node -- The remote machine to run on + command -- The command to run, as a string + synchronous -- Should we wait for the command to complete? + verbose -- If 0, do not lo:g anything. If 1, log the command and its + return code but not its output. If 2, additionally log + command output. + + Returns: + + A tuple of (return code, command output) + """ + + rc = 0 + result = None + # pylint: disable=consider-using-with + proc = Popen(self._cmd([node, command]), + stdout = PIPE, stderr = PIPE, close_fds = True, shell = True) + + if not synchronous and proc.pid > 0 and not self._silent: + aproc = AsyncCmd(node, command, proc=proc) + aproc.start() + return (rc, result) + + if proc.stdout: + result = proc.stdout.readlines() + proc.stdout.close() + else: + self._log("No stdout stream") + + rc = proc.wait() + + if verbose > 0: + self._debug("cmd: target=%s, rc=%d: %s" % (node, rc, command)) + + result = convert2string(result) + + if proc.stderr: + errors = proc.stderr.readlines() + proc.stderr.close() + + for err in errors: + self._debug("cmd: stderr: %s" % err) + + if verbose == 2: + for line in result: + self._debug("cmd: stdout: %s" % line) + + return (rc, result) + + def copy(self, source, target, silent=False): + """ Perform a copy of the source file to the remote target, using the + cp_command provided when the RemoteExec object was created. + + Returns: + + The return code of the cp_command + """ + + cmd = "%s '%s' '%s'" % (self._cp_command, source, target) + rc = os.system(cmd) + + if not silent: + self._debug("cmd: rc=%d: %s" % (rc, cmd)) + + return rc + + def exists_on_all(self, filename, hosts): + """ Return True if specified file exists on all specified hosts. """ + + for host in hosts: + rc = self(host, "test -r %s" % filename) + if rc != 0: + return False + + return True + + +class RemoteFactory: + """ A class for constructing a singleton instance of a RemoteExec object """ + + # Class variables + + # -n: no stdin, -x: no X11, + # -o ServerAliveInterval=5: disconnect after 3*5s if the server + # stops responding + command = ("ssh -l root -n -x -o ServerAliveInterval=5 " + "-o ConnectTimeout=10 -o TCPKeepAlive=yes " + "-o ServerAliveCountMax=3 ") + + # -B: batch mode, -q: no stats (quiet) + cp_command = "scp -B -q" + + instance = None + + # pylint: disable=invalid-name + def getInstance(self): + """ Returns the previously created instance of RemoteExec, or creates a + new instance if one does not already exist. + """ + + if not RemoteFactory.instance: + RemoteFactory.instance = RemoteExec(RemoteFactory.command, + RemoteFactory.cp_command, + False) + return RemoteFactory.instance + + def enable_qarsh(self): + """ Enable the QA remote shell """ + + # http://nstraz.wordpress.com/2008/12/03/introducing-qarsh/ + print("Using QARSH for connections to cluster nodes") + + RemoteFactory.command = "qarsh -t 300 -l root" + RemoteFactory.cp_command = "qacp -q" diff --git a/python/pacemaker/_cts/test.py b/python/pacemaker/_cts/test.py new file mode 100644 index 0000000..fb809a9 --- /dev/null +++ b/python/pacemaker/_cts/test.py @@ -0,0 +1,594 @@ +""" A module providing base classes for defining regression tests and groups of + regression tests. Everything exported here should be considered an abstract + class that needs to be subclassed in order to do anything useful. Various + functions will raise NotImplementedError if not overridden by a subclass. +""" + +__copyright__ = "Copyright 2009-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+)" + +__all__ = ["Test", "Tests"] + +import io +import os +import re +import shlex +import signal +import subprocess +import sys +import time + +from pacemaker._cts.errors import ExitCodeError, OutputFoundError, OutputNotFoundError, XmlValidationError +from pacemaker._cts.process import pipe_communicate +from pacemaker.buildoptions import BuildOptions +from pacemaker.exitstatus import ExitStatus + +def find_validator(rng_file): + """ Return the command line used to validate XML output, or None if the validator + is not installed. + """ + + if os.access("/usr/bin/xmllint", os.X_OK): + if rng_file is None: + return ["xmllint", "-"] + + return ["xmllint", "--relaxng", rng_file, "-"] + + return None + + +def rng_directory(): + """ Which directory contains the RNG schema files? """ + + if "PCMK_schema_directory" in os.environ: + return os.environ["PCMK_schema_directory"] + + if os.path.exists("%s/cts-fencing.in" % sys.path[0]): + return "xml" + + return BuildOptions.SCHEMA_DIR + + +class Pattern: + """ A class for checking log files for a given pattern """ + + def __init__(self, pat, negative=False, regex=False): + """ Create a new Pattern instance + + Arguments: + + pat -- The string to search for + negative -- If True, pat must not be found in any input + regex -- If True, pat is a regex and not a substring + """ + + self._pat = pat + self.negative = negative + self.regex = regex + + def __str__(self): + return self._pat + + def match(self, line): + """ Is this pattern found in the given line? """ + + if self.regex: + return re.search(self._pat, line) is not None + + return self._pat in line + + +class Test: + """ The base class for a single regression test. A single regression test + may still run multiple commands as part of its execution. + """ + + def __init__(self, name, description, **kwargs): + """ Create a new Test instance. This method must be provided by all + subclasses, which must call Test.__init__ first. + + Arguments: + + description -- A user-readable description of the test, helpful in + identifying what test is running or has failed. + name -- The name of the test. Command line tools use this + attribute to allow running only tests with the exact + name, or tests whose name matches a given pattern. + This should be unique among all tests. + + Keyword arguments: + + force_wait -- + logdir -- The base directory under which to create a directory + to store output and temporary data. + timeout -- How long to wait for the test to complete. + verbose -- Whether to print additional information, including + verbose command output and daemon log files. + """ + + self.description = description + self.executed = False + self.name = name + + self.force_wait = kwargs.get("force_wait", False) + self.logdir = kwargs.get("logdir", "/tmp") + self.timeout = kwargs.get("timeout", 2) + self.verbose = kwargs.get("verbose", False) + + self._cmds = [] + self._patterns = [] + + self._daemon_location = None + self._daemon_output = "" + self._daemon_process = None + + self._result_exitcode = ExitStatus.OK + self._result_txt = "" + + ### + ### PROPERTIES + ### + + @property + def exitcode(self): + """ The final exitcode of the Test. If all commands pass, this property + will be ExitStatus.OK. Otherwise, this property will be the exitcode + of the first command to fail. + """ + return self._result_exitcode + + @exitcode.setter + def exitcode(self, value): + self._result_exitcode = value + + @property + def logpath(self): + """ The path to the log for whatever daemon is being tested. Note that + this requires all subclasses to set self._daemon_location before + accessing this property or an exception will be raised. + """ + return os.path.join(self.logdir, self._daemon_location + ".log") + + ### + ### PRIVATE METHODS + ### + + def _kill_daemons(self): + """ Kill any running daemons in preparation for executing the test """ + raise NotImplementedError("_kill_daemons not provided by subclass") + + def _match_log_patterns(self): + """ Check test output for expected patterns, setting self.exitcode and + self._result_txt as appropriate. Not all subclass will need to do + this. + """ + if len(self._patterns) == 0: + return + + n_failed_matches = 0 + n_negative_matches = 0 + + output = self._daemon_output.split("\n") + + for pat in self._patterns: + positive_match = False + + for line in output: + if pat.match(line): + if pat.negative: + n_negative_matches += 1 + + if self.verbose: + print("This pattern should not have matched = '%s" % pat) + + break + + positive_match = True + break + + if not pat.negative and not positive_match: + n_failed_matches += 1 + print("Pattern Not Matched = '%s'" % pat) + + if n_failed_matches > 0 or n_negative_matches > 0: + msg = "FAILURE - '%s' failed. %d patterns out of %d not matched. %d negative matches." + self._result_txt = msg % (self.name, n_failed_matches, len(self._patterns), n_negative_matches) + self.exitcode = ExitStatus.ERROR + + + def _new_cmd(self, cmd, args, exitcode, **kwargs): + """ Add a command to be executed as part of this test. + + Arguments: + + cmd -- The program to run. + args -- Commands line arguments to pass to cmd, as a string. + exitcode -- The expected exit code of cmd. This can be used to + run a command that is expected to fail. + + Keyword arguments: + + stdout_match -- If not None, a string that is expected to be + present in the stdout of cmd. This can be a + regular expression. + no_wait -- Do not wait for cmd to complete. + stdout_negative_match -- If not None, a string that is expected to be + missing in the stdout of cmd. This can be a + regualr expression. + kill -- A command to be run after cmd, typically in + order to kill a failed process. This should be + the entire command line including arguments as + a single string. + validate -- If True, the output of cmd will be passed to + xmllint for validation. If validation fails, + XmlValidationError will be raised. + check_rng -- If True and validate is True, command output + will additionally be checked against the + api-result.rng file. + check_stderr -- If True, the stderr of cmd will be included in + output. + env -- If not None, variables to set in the environment + """ + + self._cmds.append( + { + "args": args, + "check_rng": kwargs.get("check_rng", True), + "check_stderr": kwargs.get("check_stderr", True), + "cmd": cmd, + "expected_exitcode": exitcode, + "kill": kwargs.get("kill", None), + "no_wait": kwargs.get("no_wait", False), + "stdout_match": kwargs.get("stdout_match", None), + "stdout_negative_match": kwargs.get("stdout_negative_match", None), + "validate": kwargs.get("validate", True), + "env": kwargs.get("env", None), + } + ) + + def _start_daemons(self): + """ Start any necessary daemons in preparation for executing the test """ + raise NotImplementedError("_start_daemons not provided by subclass") + + ### + ### PUBLIC METHODS + ### + + def add_cmd(self, cmd, args, validate=True, check_rng=True, check_stderr=True, + env=None): + """ Add a simple command to be executed as part of this test """ + + self._new_cmd(cmd, args, ExitStatus.OK, validate=validate, check_rng=check_rng, + check_stderr=check_stderr, env=env) + + def add_cmd_and_kill(self, cmd, args, kill_proc): + """ Add a command and system command to be executed as part of this test """ + + self._new_cmd(cmd, args, ExitStatus.OK, kill=kill_proc) + + def add_cmd_check_stdout(self, cmd, args, match, no_match=None, env=None): + """ Add a simple command with expected output to be executed as part of this test """ + + self._new_cmd(cmd, args, ExitStatus.OK, stdout_match=match, + stdout_negative_match=no_match, env=env) + + def add_cmd_expected_fail(self, cmd, args, exitcode=ExitStatus.ERROR): + """ Add a command that is expected to fail to be executed as part of this test """ + + self._new_cmd(cmd, args, exitcode) + + def add_cmd_no_wait(self, cmd, args): + """ Add a simple command to be executed (without waiting) as part of this test """ + + self._new_cmd(cmd, args, ExitStatus.OK, no_wait=True) + + def add_log_pattern(self, pattern, negative=False, regex=False): + """ Add a pattern that should appear in the test's logs """ + + self._patterns.append(Pattern(pattern, negative=negative, regex=regex)) + + def clean_environment(self): + """ Clean up the host after executing a test """ + + if self._daemon_process: + if self._daemon_process.poll() is None: + self._daemon_process.terminate() + self._daemon_process.wait() + else: + return_code = { + getattr(signal, _signame): _signame + for _signame in dir(signal) + if _signame.startswith('SIG') and not _signame.startswith("SIG_") + }.get(-self._daemon_process.returncode, "RET=%d" % (self._daemon_process.returncode)) + msg = "FAILURE - '%s' failed. %s abnormally exited during test (%s)." + self._result_txt = msg % (self.name, self._daemon_location, return_code) + self.exitcode = ExitStatus.ERROR + + self._daemon_process = None + self._daemon_output = "" + + # the default for utf-8 encoding would error out if e.g. memory corruption + # makes fenced output any kind of 8 bit value - while still interesting + # for debugging and we'd still like the regression-test to go over the + # full set of test-cases + with open(self.logpath, 'rt', encoding = "ISO-8859-1") as logfile: + for line in logfile.readlines(): + self._daemon_output += line + + if self.verbose: + print("Daemon Output Start") + print(self._daemon_output) + print("Daemon Output End") + + def print_result(self, filler): + """ Print the result of the last test execution """ + + print("%s%s" % (filler, self._result_txt)) + + def run(self): + """ Execute this test """ + + i = 1 + + self.start_environment() + + if self.verbose: + print("\n--- START TEST - %s" % self.name) + + self._result_txt = "SUCCESS - '%s'" % (self.name) + self.exitcode = ExitStatus.OK + + for cmd in self._cmds: + try: + self.run_cmd(cmd) + except ExitCodeError as e: + print("Step %d FAILED - command returned %s, expected %d" % (i, e, cmd['expected_exitcode'])) + self.set_error(i, cmd) + break + except OutputNotFoundError as e: + print("Step %d FAILED - '%s' was not found in command output: %s" % (i, cmd['stdout_match'], e)) + self.set_error(i, cmd) + break + except OutputFoundError as e: + print("Step %d FAILED - '%s' was found in command output: %s" % (i, cmd['stdout_negative_match'], e)) + self.set_error(i, cmd) + break + except XmlValidationError as e: + print("Step %d FAILED - xmllint failed: %s" % (i, e)) + self.set_error(i, cmd) + break + + if self.verbose: + print("Step %d SUCCESS" % (i)) + + i = i + 1 + + self.clean_environment() + + if self.exitcode == ExitStatus.OK: + self._match_log_patterns() + + print(self._result_txt) + if self.verbose: + print("--- END TEST - %s\n" % self.name) + + self.executed = True + + def run_cmd(self, args): + """ Execute a command as part of this test """ + + cmd = shlex.split(args['args']) + cmd.insert(0, args['cmd']) + + if self.verbose: + print("\n\nRunning: %s" % " ".join(cmd)) + + # FIXME: Using "with" here breaks fencing merge tests. + # pylint: disable=consider-using-with + if args['env']: + new_env = os.environ.copy() + new_env.update(args['env']) + test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + env=new_env) + else: + test = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if args['kill']: + if self.verbose: + print("Also running: %s" % args['kill']) + + ### Typically, the kill argument is used to detect some sort of + ### failure. Without yielding for a few seconds here, the process + ### launched earlier that is listening for the failure may not have + ### time to connect to pacemaker-execd. + time.sleep(2) + subprocess.Popen(shlex.split(args['kill'])) + + if not args['no_wait']: + test.wait() + else: + return ExitStatus.OK + + output = pipe_communicate(test, check_stderr=args['check_stderr']) + + if self.verbose: + print(output) + + if test.returncode != args['expected_exitcode']: + raise ExitCodeError(test.returncode) + + if args['stdout_match'] is not None and \ + re.search(args['stdout_match'], output) is None: + raise OutputNotFoundError(output) + + if args['stdout_negative_match'] is not None and \ + re.search(args['stdout_negative_match'], output) is not None: + raise OutputFoundError(output) + + if args['validate']: + if args['check_rng']: + rng_file = rng_directory() + "/api/api-result.rng" + else: + rng_file = None + + cmd = find_validator(rng_file) + if not cmd: + raise XmlValidationError("Could not find validator for %s" % rng_file) + + if self.verbose: + print("\nRunning: %s" % " ".join(cmd)) + + with subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as validator: + output = pipe_communicate(validator, check_stderr=True, stdin=output) + + if self.verbose: + print(output) + + if validator.returncode != 0: + raise XmlValidationError(output) + + return ExitStatus.OK + + def set_error(self, step, cmd): + """ Record failure of this test """ + + msg = "FAILURE - '%s' failed at step %d. Command: %s %s" + self._result_txt = msg % (self.name, step, cmd['cmd'], cmd['args']) + self.exitcode = ExitStatus.ERROR + + def start_environment(self): + """ Prepare the host for executing a test """ + + if os.path.exists(self.logpath): + os.remove(self.logpath) + + self._kill_daemons() + self._start_daemons() + + logfile = None + + init_time = time.time() + update_time = init_time + + while True: + # FIXME: Eventually use 'with' here, which seems complicated given + # everything happens in a loop. + # pylint: disable=consider-using-with + time.sleep(0.1) + + if not self.force_wait and logfile is None \ + and os.path.exists(self.logpath): + logfile = io.open(self.logpath, 'rt', encoding = "ISO-8859-1") + + if not self.force_wait and logfile is not None: + for line in logfile.readlines(): + if "successfully started" in line: + return + + now = time.time() + + if self.timeout > 0 and (now - init_time) >= self.timeout: + if not self.force_wait: + print("\tDaemon %s doesn't seem to have been initialized within %fs." + "\n\tConsider specifying a longer '--timeout' value." + %(self._daemon_location, self.timeout)) + return + + if self.verbose and (now - update_time) >= 5: + print("Waiting for %s to be initialized: %fs ..." + %(self._daemon_location, now - init_time)) + update_time = now + + +class Tests: + """ The base class for a collection of regression tests """ + + def __init__(self, **kwargs): + """ Create a new Tests instance. This method must be provided by all + subclasses, which must call Tests.__init__ first. + + Keywork arguments: + + force_wait -- + logdir -- The base directory under which to create a directory + to store output and temporary data. + timeout -- How long to wait for the test to complete. + verbose -- Whether to print additional information, including + verbose command output and daemon log files. + """ + + self.force_wait = kwargs.get("force_wait", False) + self.logdir = kwargs.get("logdir", "/tmp") + self.timeout = kwargs.get("timeout", 2) + self.verbose = kwargs.get("verbose", False) + + self._tests = [] + + def exit(self): + """ Exit (with error status code if any test failed) """ + + for test in self._tests: + if not test.executed: + continue + + if test.exitcode != ExitStatus.OK: + sys.exit(ExitStatus.ERROR) + + sys.exit(ExitStatus.OK) + + def print_list(self): + """ List all registered tests """ + + print("\n==== %d TESTS FOUND ====" % len(self._tests)) + print("%35s - %s" % ("TEST NAME", "TEST DESCRIPTION")) + print("%35s - %s" % ("--------------------", "--------------------")) + + for test in self._tests: + print("%35s - %s" % (test.name, test.description)) + + print("==== END OF LIST ====\n") + + def print_results(self): + """ Print summary of results of executed tests """ + + failures = 0 + success = 0 + + print("\n\n======= FINAL RESULTS ==========") + print("\n--- FAILURE RESULTS:") + + for test in self._tests: + if not test.executed: + continue + + if test.exitcode != ExitStatus.OK: + failures = failures + 1 + test.print_result(" ") + else: + success = success + 1 + + if failures == 0: + print(" None") + + print("\n--- TOTALS\n Pass:%d\n Fail:%d\n" % (success, failures)) + + def run_single(self, name): + """ Run a single named test """ + + for test in self._tests: + if test.name == name: + test.run() + break + + def run_tests(self): + """ Run all tests """ + + for test in self._tests: + test.run() + + def run_tests_matching(self, pattern): + """ Run all tests whose name matches a pattern """ + + for test in self._tests: + if test.name.count(pattern) != 0: + test.run() diff --git a/python/pacemaker/_cts/watcher.py b/python/pacemaker/_cts/watcher.py new file mode 100644 index 0000000..3bdb892 --- /dev/null +++ b/python/pacemaker/_cts/watcher.py @@ -0,0 +1,551 @@ +""" Log searching classes for Pacemaker's Cluster Test Suite (CTS) """ + +__all__ = ["LogKind", "LogWatcher"] +__copyright__ = "Copyright 2014-2023 the Pacemaker project contributors" +__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" + +from enum import Enum, unique +import re +import time +import threading + +from pacemaker.buildoptions import BuildOptions +from pacemaker._cts.logging import LogFactory +from pacemaker._cts.remote import RemoteFactory + +LOG_WATCHER_BIN = BuildOptions.DAEMON_DIR + "/cts-log-watcher" + +@unique +class LogKind(Enum): + """ The various kinds of log files that can be watched """ + + ANY = 0 + FILE = 1 + REMOTE_FILE = 2 + JOURNAL = 3 + + def __str__(self): + if self.value == 0: + return "any" + if self.value == 1: + return "combined syslog" + if self.value == 2: + return "remote" + + return "journal" + +class SearchObj: + """ The base class for various kinds of log watchers. Log-specific watchers + need to be built on top of this one. + """ + + def __init__(self, filename, host=None, name=None): + """ Create a new SearchObj instance + + Arguments: + + filename -- The log to watch + host -- The cluster node on which to watch the log + name -- A unique name to use when logging about this watch + """ + + self.cache = [] + self.filename = filename + self.limit = None + self.logger = LogFactory() + self.name = name + self.offset = "EOF" + self.rsh = RemoteFactory().getInstance() + + if host: + self.host = host + else: + self.host = "localhost" + + def __str__(self): + if self.host: + return "%s:%s" % (self.host, self.filename) + + return self.filename + + def log(self, args): + """ Log a message """ + + message = "lw: %s: %s" % (self, args) + self.logger.log(message) + + def debug(self, args): + """ Log a debug message """ + + message = "lw: %s: %s" % (self, args) + self.logger.debug(message) + + def harvest(self, delegate=None): + """ Collect lines from a log, optionally calling delegate when complete """ + + async_task = self.harvest_async(delegate) + async_task.join() + + def harvest_async(self, delegate=None): + """ Collect lines from a log asynchronously, optionally calling delegate + when complete. This method must be implemented by all subclasses. + """ + + raise NotImplementedError + + def end(self): + """ Mark that a log is done being watched, resetting internal data structures + to the beginning of the file. Subsequent watches will therefore start + from the beginning again. + """ + + self.debug("Unsetting the limit") + self.limit = None + +class FileObj(SearchObj): + """ A specialized SearchObj subclass for watching log files """ + + def __init__(self, filename, host=None, name=None): + """ Create a new FileObj instance + + Arguments: + + filename -- The file to watch + host -- The cluster node on which to watch the file + name -- A unique name to use when logging about this watch + """ + + SearchObj.__init__(self, filename, host, name) + self._delegate = None + + self.harvest() + + def async_complete(self, pid, returncode, out, err): + """ Called when an asynchronous log file read is complete. This function + saves the output from that read for look()/look_for_all() to process + and records the current position in the journal. Future reads will + pick back up from that spot. + + Arguments: + + pid -- The ID of the process that did the read + returncode -- The return code of the process that did the read + out -- stdout from the file read + err -- stderr from the file read + """ + + for line in out: + match = re.search(r"^CTSwatcher:Last read: (\d+)", line) + + if match: + self.offset = match.group(1) + self.debug("Got %d lines, new offset: %s %s" % (len(out), self.offset, repr(self._delegate))) + elif re.search(r"^CTSwatcher:.*truncated", line): + self.log(line) + elif re.search(r"^CTSwatcher:", line): + self.debug("Got control line: %s" % line) + else: + self.cache.append(line) + + if self._delegate: + self._delegate.async_complete(pid, returncode, self.cache, err) + + def harvest_async(self, delegate=None): + """ Collect lines from the log file on a single host asynchronously, + optionally calling delegate when complete. This can be called + repeatedly, reading a chunk each time or until the end of the log + file is hit. + """ + + self._delegate = delegate + self.cache = [] + + if self.limit and (self.offset == "EOF" or int(self.offset) > self.limit): + if self._delegate: + self._delegate.async_complete(-1, -1, [], []) + + return None + + return self.rsh.call_async(self.host, + "%s -t %s -p CTSwatcher: -l 200 -f %s -o %s" % (LOG_WATCHER_BIN, self.name, self.filename, self.offset), + delegate=self) + + def set_end(self): + """ Internally record where we expect to find the end of a log file, + which is just the number of lines in the file. Calls to harvest + from the log file will not go any farther than what this function + records. + """ + + if self.limit: + return + + # pylint: disable=not-callable + (_, lines) = self.rsh(self.host, + "%s -t %s -p CTSwatcher: -l 2 -f %s -o %s" % (LOG_WATCHER_BIN, self.name, self.filename, "EOF"), + verbose=0) + + for line in lines: + match = re.search(r"^CTSwatcher:Last read: (\d+)", line) + if match: + self.limit = int(match.group(1)) + self.debug("Set limit to: %d" % self.limit) + +class JournalObj(SearchObj): + """ A specialized SearchObj subclass for watching systemd journals """ + + def __init__(self, host=None, name=None): + """ Create a new JournalObj instance + + Arguments: + + host -- The cluster node on which to watch the journal + name -- A unique name to use when logging about this watch + """ + + SearchObj.__init__(self, name, host, name) + self._delegate = None + self._hit_limit = False + + self.harvest() + + def async_complete(self, pid, returncode, out, err): + """ Called when an asynchronous journal read is complete. This function + saves the output from that read for look()/look_for_all() to process + and records the current position in the journal. Future reads will + pick back up from that spot. + + Arguments: + + pid -- The ID of the process that did the journal read + returncode -- The return code of the process that did the journal read + out -- stdout from the journal read + err -- stderr from the journal read + """ + + found_cursor = False + for line in out: + match = re.search(r"^-- cursor: ([^.]+)", line) + + if match: + found_cursor = True + self.offset = match.group(1).strip() + self.debug("Got %d lines, new cursor: %s" % (len(out), self.offset)) + else: + self.cache.append(line) + + if self.limit and not found_cursor: + self._hit_limit = True + self.debug("Got %d lines but no cursor: %s" % (len(out), self.offset)) + + # Get the current cursor + # pylint: disable=not-callable + (_, out) = self.rsh(self.host, "journalctl -q -n 0 --show-cursor", verbose=0) + for line in out: + match = re.search(r"^-- cursor: ([^.]+)", line) + + if match: + self.offset = match.group(1).strip() + self.debug("Got %d lines, new cursor: %s" % (len(out), self.offset)) + else: + self.log("Not a new cursor: %s" % line) + self.cache.append(line) + + if self._delegate: + self._delegate.async_complete(pid, returncode, self.cache, err) + + def harvest_async(self, delegate=None): + """ Collect lines from the journal on a single host asynchronously, + optionally calling delegate when complete. This can be called + repeatedly, reading a chunk each time or until the end of the + journal is hit. + """ + + self._delegate = delegate + self.cache = [] + + # Use --lines to prevent journalctl from overflowing the Popen input buffer + if self.limit and self._hit_limit: + return None + + if self.offset == "EOF": + command = "journalctl -q -n 0 --show-cursor" + elif self.limit: + command = "journalctl -q --after-cursor='%s' --until '%s' --lines=200 --show-cursor" % (self.offset, self.limit) + else: + command = "journalctl -q --after-cursor='%s' --lines=200 --show-cursor" % (self.offset) + + return self.rsh.call_async(self.host, command, delegate=self) + + def set_end(self): + """ Internally record where we expect to find the end of a host's journal, + which is just the current time. Calls to harvest from the journal will + not go any farther than what this function records. + """ + + if self.limit: + return + + self._hit_limit = False + # pylint: disable=not-callable + (rc, lines) = self.rsh(self.host, "date +'%Y-%m-%d %H:%M:%S'", verbose=0) + + if rc == 0 and len(lines) == 1: + self.limit = lines[0].strip() + self.debug("Set limit to: %s" % self.limit) + else: + self.debug("Unable to set limit for %s because date returned %d lines with status %d" % (self.host, + len(lines), rc)) + +class LogWatcher: + """ A class for watching a single log file or journal across multiple hosts, + looking for lines that match given regular expressions. + + The way you use this class is as follows: + - Construct a LogWatcher object + - Call set_watch() when you want to start watching the log + - Call look() to scan the log looking for the patterns + """ + + def __init__(self, log, regexes, hosts, kind=LogKind.ANY, name="Anon", timeout=10, silent=False): + """ Create a new LogWatcher instance. + + Arguments: + + log -- The log file to watch + regexes -- A list of regular expressions to match against the log + hosts -- A list of cluster nodes on which to watch the log + kind -- What type of log is this object watching? + name -- A unique name to use when logging about this watch + timeout -- Default number of seconds to watch a log file at a time; + this can be overridden by the timeout= parameter to + self.look on an as-needed basis + silent -- If False, log extra information + """ + + self.filename = log + self.hosts = hosts + self.kind = kind + self.name = name + self.regexes = regexes + self.unmatched = None + self.whichmatch = -1 + + self._cache_lock = threading.Lock() + self._file_list = [] + self._line_cache = [] + self._logger = LogFactory() + self._timeout = int(timeout) + + # Validate our arguments. Better sooner than later ;-) + for regex in regexes: + re.compile(regex) + + if not self.hosts: + raise ValueError("LogWatcher requires hosts argument") + + if not self.filename: + raise ValueError("LogWatcher requires log argument") + + if not silent: + for regex in self.regexes: + self._debug("Looking for regex: %s" % regex) + + def _debug(self, args): + """ Log a debug message """ + + message = "lw: %s: %s" % (self.name, args) + self._logger.debug(message) + + def set_watch(self): + """ Mark the place to start watching the log from """ + + if self.kind == LogKind.REMOTE_FILE: + for node in self.hosts: + self._file_list.append(FileObj(self.filename, node, self.name)) + + elif self.kind == LogKind.JOURNAL: + for node in self.hosts: + self._file_list.append(JournalObj(node, self.name)) + + else: + self._file_list.append(FileObj(self.filename)) + + def async_complete(self, pid, returncode, out, err): + """ Called when an asynchronous log file read is complete. This function + saves the output from that read for look()/look_for_all() to process + and records the current position. Future reads will pick back up + from that spot. + + Arguments: + + pid -- The ID of the process that did the read + returncode -- The return code of the process that did the read + out -- stdout from the file read + err -- stderr from the file read + """ + + # It's not clear to me whether this function ever gets called as + # delegate somewhere, which is what would pass returncode and err + # as parameters. Just disable the warning for now. + # pylint: disable=unused-argument + + # TODO: Probably need a lock for updating self._line_cache + self._logger.debug("%s: Got %d lines from %d (total %d)" % (self.name, len(out), pid, len(self._line_cache))) + + if out: + with self._cache_lock: + self._line_cache.extend(out) + + def __get_lines(self): + """ Iterate over all watched log files and collect new lines from each """ + + if not self._file_list: + raise ValueError("No sources to read from") + + pending = [] + + for f in self._file_list: + t = f.harvest_async(self) + if t: + pending.append(t) + + for t in pending: + t.join(60.0) + if t.is_alive(): + self._logger.log("%s: Aborting after 20s waiting for %s logging commands" % (self.name, repr(t))) + return + + def end(self): + """ Mark that a log is done being watched, resetting internal data structures + to the beginning of the file. Subsequent watches will therefore start + from the beginning again. + """ + + for f in self._file_list: + f.end() + + def look(self, timeout=None): + """ Examine the log looking for the regexes that were given when this + object was created. It starts looking from the place marked by + set_watch(), continuing through the file in the fashion of + `tail -f`. It properly recovers from log file truncation but not + from removing and recreating the log. + + Arguments: + + timeout -- Number of seconds to watch the log file; defaults to + seconds argument passed when this object was created + + Returns: + + The first line which matches any regex + """ + + if not timeout: + timeout = self._timeout + + lines = 0 + begin = time.time() + end = begin + timeout + 1 + + if not self.regexes: + self._debug("Nothing to look for") + return None + + if timeout == 0: + for f in self._file_list: + f.set_end() + + while True: + if self._line_cache: + lines += 1 + + with self._cache_lock: + line = self._line_cache[0] + self._line_cache.remove(line) + + which = -1 + + if re.search("CTS:", line): + continue + + for regex in self.regexes: + which += 1 + + matchobj = re.search(regex, line) + + if matchobj: + self.whichmatch = which + self._debug("Matched: %s" % line) + return line + + elif timeout > 0 and end < time.time(): + timeout = 0 + for f in self._file_list: + f.set_end() + + else: + self.__get_lines() + + if not self._line_cache and end < time.time(): + self._debug("Single search terminated: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), lines)) + return None + + self._debug("Waiting: start=%d, end=%d, now=%d, lines=%d" % (begin, end, time.time(), len(self._line_cache))) + time.sleep(1) + + self._debug("How did we get here") + return None + + def look_for_all(self, allow_multiple_matches=False, silent=False): + """ Like look(), but looks for matches for multiple regexes. This function + returns when the timeout is reached or all regexes were matched. As a + side effect, self.unmatched will contain regexes that were not matched. + This can be inspected by the caller. + + Arguments: + + allow_multiple_matches -- If True, allow each regex to match more than + once. If False (the default), once a regex + matches a line, it will no longer be searched + for. + silent -- If False, log extra information + + Returns: + + If all regexes are matched, return the matching lines. Otherwise, return + None. + """ + + save_regexes = self.regexes + result = [] + + if not silent: + self._debug("starting search: timeout=%d" % self._timeout) + + while self.regexes: + one_result = self.look(self._timeout) + if not one_result: + self.unmatched = self.regexes + self.regexes = save_regexes + self.end() + return None + + result.append(one_result) + if not allow_multiple_matches: + del self.regexes[self.whichmatch] + + else: + # Allow multiple regexes to match a single line + tmp_regexes = self.regexes + self.regexes = [] + + for regex in tmp_regexes: + matchobj = re.search(regex, one_result) + if not matchobj: + self.regexes.append(regex) + + self.unmatched = None + self.regexes = save_regexes + return result diff --git a/python/pacemaker/buildoptions.py.in b/python/pacemaker/buildoptions.py.in new file mode 100644 index 0000000..53b492b --- /dev/null +++ b/python/pacemaker/buildoptions.py.in @@ -0,0 +1,57 @@ +""" A module providing information on build-time configuration of pacemaker """ + +__all__ = ["BuildOptions"] +__copyright__ = "Copyright 2023 the Pacemaker project contributors" +__license__ = "GNU Lesser General Public License version 2.1 or later (LGPLv2.1+)" + +class BuildOptions: + """ Variables generated as part of the ./configure && make process. These + affect how pacemaker was configured and where its various parts get + installed. + """ + + BASH_PATH = "@BASH_PATH@" + """ Path to the bash shell """ + + _BUILD_DIR = "@abs_top_builddir@" + """ Top-level build directory + NOTE: This is not especially useful on installed systems, but is useful for + running various programs from a source checkout + """ + + CIB_DIR = "@CRM_CONFIG_DIR@" + """ Where CIB files are stored """ + + COROSYNC_CONFIG_FILE = "@PCMK__COROSYNC_CONF@" + """ Path to the corosync config file """ + + DAEMON_DIR = "@CRM_DAEMON_DIR@" + """ Where Pacemaker daemons are installed """ + + DAEMON_USER = "@CRM_DAEMON_USER@" + """ User to run Pacemaker daemons as """ + + LOCAL_STATE_DIR = "@localstatedir@" + """ Where miscellaneous temporary state files are stored """ + + LOG_DIR = "@CRM_LOG_DIR@" + """ Where Pacemaker log files are stored """ + + OCF_RA_INSTALL_DIR = "@OCF_RA_INSTALL_DIR@" + """ Where resource agents are installed """ + + OCF_ROOT_DIR = "@OCF_ROOT_DIR@" + """ Root directory for OCF resource agents and libraries """ + + RSC_TMP_DIR = "@CRM_RSCTMP_DIR@" + """ Where resource agents should keep state files """ + + # pylint: disable=comparison-of-constants + REMOTE_ENABLED = "@PC_NAME_GNUTLS@" != "" + """ Was Pacemaker Remote support built? """ + + SBIN_DIR = "@sbindir@" + """ Where administrative programs are installed """ + + SCHEMA_DIR = "@CRM_SCHEMA_DIRECTORY@" + """ Where Relax-NG schema files are stored """ diff --git a/python/pacemaker/exitstatus.py b/python/pacemaker/exitstatus.py new file mode 100644 index 0000000..f74f9ec --- /dev/null +++ b/python/pacemaker/exitstatus.py @@ -0,0 +1,59 @@ +""" A module providing constants relating to why a process or function exited """ + +__all__ = ["ExitStatus"] +__copyright__ = "Copyright 2023 the Pacemaker project contributors" +__license__ = "GNU Lesser General Public License version 2.1 or later (LGPLv2.1+)" + +from enum import IntEnum, unique + +# These values must be kept in sync with include/crm/common/results.h +@unique +class ExitStatus(IntEnum): + """ Why did a function or process exit? These constants describe both success + and failure conditions. + """ + + OK = 0 + ERROR = 1 + INVALID_PARAM = 2 + UNIMPLEMENT_FEATURE = 3 + INSUFFICIENT_PRIV = 4 + NOT_INSTALLED = 5 + NOT_CONFIGURED = 6 + NOT_RUNNING = 7 + PROMOTED = 8 + FAILED_PROMOTED = 9 + USAGE = 64 + DATAERR = 65 + NOINPUT = 66 + NOUSER = 67 + NOHOST = 68 + UNAVAILABLE = 69 + SOFTWARE = 70 + OSERR = 71 + OSFILE = 72 + CANTCREAT = 73 + IOERR = 74 + TEMPFAIL = 75 + PROTOCOL = 76 + NOPERM = 77 + CONFIG = 78 + FATAL = 100 + PANIC = 101 + DISCONNECT = 102 + OLD = 103 + DIGEST = 104 + NOSUCH = 105 + QUORUM = 106 + UNSAFE = 107 + EXISTS = 108 + MULTIPLE = 109 + EXPIRED = 110 + NOT_YET_IN_EFFECT = 111 + INDETERMINATE = 112 + UNSATISFIED = 113 + TIMEOUT = 124 + DEGRADED = 190 + DEGRADED_PROMOTED = 191 + NONE = 193 + MAX = 255 diff --git a/python/pylintrc b/python/pylintrc new file mode 100644 index 0000000..e65110b --- /dev/null +++ b/python/pylintrc @@ -0,0 +1,556 @@ +# NOTE: Any line with CHANGED: describes something that we changed from the +# default pylintrc configuration. + +[MAIN] + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Files or directories to be skipped. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the ignore-list. The +# regex matches against paths and can be in Posix or Windows format. +ignore-paths= + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. +ignore-patterns=^\.# + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=1 + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-allow-list= + +# Minimum supported python version +# CHANGED +py-version = 3.4 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# Specify a score threshold under which the program will exit with error. +fail-under=10.0 + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint in +# a server-like mode. +clear-cache-post-run=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +# confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable= + use-symbolic-message-instead, + useless-suppression, + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +# CHANGED +disable=line-too-long, + too-few-public-methods, + too-many-arguments, + too-many-branches, + too-many-instance-attributes, + too-many-statements, + unrecognized-option, + useless-option-value + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables 'fatal', 'error', 'warning', 'refactor', 'convention' +# and 'info', which contain the number of messages in each category, as +# well as 'statement', which is the total number of statements analyzed. This +# score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + +# Activate the evaluation score. +score=yes + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +# CHANGED: Don't do anything about FIXME, XXX, or TODO +notes= + +# Regular expression of note tags to take in consideration. +#notes-rgx= + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=6 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=100 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )?<?https?://\S+>?$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Maximum number of lines in a module +max-module-lines=2000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +# CHANGED: Single variable names are handled by variable-rgx below, leaving +# _ here as the name for any variable that should be ignored. +good-names=_ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names +# CHANGED: One letter variables are fine +variable-rgx=[a-z_][a-z0-9_]{,30}$ + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,}$ + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. +#class-const-rgx= + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,}$ + +# Regular expression matching correct type variable names +#typevar-rgx= + +# Regular expression which should only match function or class names that do +# not require a docstring. Use ^(?!__init__$)_ to also check __init__. +no-docstring-rgx=__.*__ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# List of decorators that define properties, such as abc.abstractproperty. +property-classes=abc.abstractproperty + + +[TYPECHECK] + +# Regex pattern to define which classes are considered mixins if ignore-mixin- +# members is set to 'yes' +mixin-class-rgx=.*MixIn + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=SQLObject, optparse.Values, thread._local, _thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=REQUEST,acl_users,aq_parent,argparse.Namespace + +# List of decorators that create context managers from functions, such as +# contextlib.contextmanager. +contextmanager-decorators=contextlib.contextmanager + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# List of comma separated words that should be considered directives if they +# appear and the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:,pragma:,# noinspection + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file=.pyenchant_pylint_custom_dict.txt + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=2 + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=10 + +# Maximum number of locals for function / method body +max-locals=25 + +# Maximum number of return / yield for function / method body +max-returns=11 + +# Maximum number of branch for function / method body +max-branches=27 + +# Maximum number of statements in function / method body +max-statements=100 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# List of qualified class names to ignore when counting class parents (see R0901). +ignored-parents= + +# Maximum number of attributes for a class (see R0902). +max-attributes=11 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=25 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of statements in a try-block +max-try-statements = 14 + +# List of regular expressions of class ancestor names to +# ignore when counting public methods (see R0903). +exclude-too-few-public-methods= + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp,__post_init__ + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=builtins.Exception + + +[TYPING] + +# Set to ``no`` if the app / library does **NOT** need to support runtime +# introspection of type annotations. If you use type annotations +# **exclusively** for type checking of an application, you're probably fine. +# For libraries, evaluate if some users what to access the type hints at +# runtime first, e.g., through ``typing.get_type_hints``. Applies to Python +# versions 3.7 - 3.9 +runtime-typing = no + + +[DEPRECATED_BUILTINS] + +# List of builtins function names that should not be used, separated by a comma +bad-functions=map,input + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[CODE_STYLE] + +# Max line length for which to sill emit suggestions. Used to prevent optional +# suggestions which would get split by a code formatter (e.g., black). Will +# default to the setting for ``max-line-length``. +#max-line-length-suggestions= diff --git a/python/setup.py.in b/python/setup.py.in new file mode 100644 index 0000000..c4083da --- /dev/null +++ b/python/setup.py.in @@ -0,0 +1,20 @@ +#!@PYTHON@ + +import re +from setuptools import setup + +# This will match things like "2.1.3" and "2.1.3-100", but not things like +# "2.1.3-100.deadbeef". Any other formats (or lack of a match) will result +# in an exception during package building, which is probably okay. That's an +# error on our part and is something we should fix. +ver = re.match("[0-9.]+[0-9-]*", "@PACKAGE_VERSION@")[0] + +setup(name='pacemaker', + version=ver, + author='The Pacemaker project contributors', + author_email='@PACKAGE_BUGREPORT@', + license='LGPLv2.1+', + url='https://clusterlabs.org/pacemaker/', + description='Python libraries for Pacemaker', + packages=['pacemaker', 'pacemaker._cts'], + ) diff --git a/python/tests/Makefile.am b/python/tests/Makefile.am new file mode 100644 index 0000000..490b272 --- /dev/null +++ b/python/tests/Makefile.am @@ -0,0 +1,12 @@ +# +# Copyright 2023 the Pacemaker project contributors +# +# The version control history for this file may have further details. +# +# This source code is licensed under the GNU General Public License version 2 +# or later (GPLv2+) WITHOUT ANY WARRANTY. +# + +MAINTAINERCLEANFILES = Makefile.in + +EXTRA_DIST = $(wildcard test_*) diff --git a/python/tests/test_exitstatus.py b/python/tests/test_exitstatus.py new file mode 100644 index 0000000..571f6b4 --- /dev/null +++ b/python/tests/test_exitstatus.py @@ -0,0 +1,14 @@ +# These warnings are not useful in unit tests. +# pylint: disable=missing-class-docstring,missing-function-docstring,missing-module-docstring + +__copyright__ = "Copyright 2023 the Pacemaker project contributors" +__license__ = "GPLv2+" + +import unittest + +from pacemaker.exitstatus import ExitStatus + +class ExitStatusTestCase(unittest.TestCase): + def test_min_max(self): + self.assertEqual(ExitStatus.OK, 0) + self.assertEqual(ExitStatus.MAX, 255) |