diff options
Diffstat (limited to '')
-rw-r--r-- | agents/heuristics_ping/fence_heuristics_ping.py | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/agents/heuristics_ping/fence_heuristics_ping.py b/agents/heuristics_ping/fence_heuristics_ping.py new file mode 100644 index 0000000..5b2c61d --- /dev/null +++ b/agents/heuristics_ping/fence_heuristics_ping.py @@ -0,0 +1,198 @@ +#!@PYTHON@ -tt + +# The Following Agent Has Been Tested On: +# +# RHEL 7.4 +# + +import io +import re +import subprocess +import shlex +import sys, stat +import logging +import os +import atexit +sys.path.append("@FENCEAGENTSLIBDIR@") +from fencing import fail_usage, run_command, fence_action, all_opt +from fencing import atexit_handler, check_input, process_input, show_docs +from fencing import run_delay + +def ping_test(con, options): + # Send pings to the targets + + if options["--action"] == "on": + # we want unfencing to always succeed + return True + + if not "--ping-targets" in options or options["--ping-targets"] == "": + # "off" was requested so fake "on" to provoke failure + logging.error("ping target required") + return False + + timeout = int(options["--ping-timeout"]) + count = int(options["--ping-count"]) + interval = int(options["--ping-interval"]) + good_required = int(options["--ping-good-count"]) + maxfail = int(options["--ping-maxfail"]) + targets = options["--ping-targets"].split(",") + exitcode = True + p = {} + failcount = 0 + # search string for parsing the results of the ping-executable + packet_count = re.compile(r".*transmitted, ([0-9]*)( packets)? received.*") + + # start a ping-process per target + for target in targets: + ping_path = '@PING_CMD@' + target_mangled = target + if target.startswith('inet6:'): + if '@PING6_CMD@' == '': + p[target] = None + continue + ping_path = '@PING6_CMD@' + target_mangled = target.split(':',2)[1] + elif target.startswith('inet:'): + ping_path = '@PING4_CMD@' + target_mangled = target.split(':',2)[1] + + ping_cmd = "%s -n -q -W %d -c %d -i %d %s" % ( + ping_path, timeout, count, interval, target_mangled) + logging.info("Running command: %s", ping_cmd) + try: + p[target] = subprocess.Popen(shlex.split(ping_cmd), + stdout=subprocess.PIPE); + except OSError: + p[target] = None + + # collect the results of the ping-processes + for target in targets: + good = 0 + if p[target] != None: + p[target].wait() + if p[target].returncode == 0: + for line in p[target].stdout: + searchres = packet_count.search(line.decode()) + if searchres: + good = int(searchres.group(1)) + break + if good >= good_required: + logging.info("ping target %s received %d of %d" \ + % (target, good, count)) + continue + failcount += 1 + logging.info("ping target %s received %d of %d and thus failed" + % (target, good, count)) + else: + failcount += 1 + logging.error("ping target %s failed on OS level" % target) + + if failcount > maxfail: + exitcode = False + + return exitcode + + +def define_new_opts(): + all_opt["ping_count"] = { + "getopt" : ":", + "longopt" : "ping-count", + "required" : "0", + "help" : "--ping-count=[number] Number of ping-probes to send", + "shortdesc" : "The number of ping-probes that is being sent per target", + "default" : "10", + "order" : 1 + } + + all_opt["ping_good_count"] = { + "getopt" : ":", + "longopt" : "ping-good-count", + "required" : "0", + "help" : "--ping-good-count=[number] Number of positive ping-probes required", + "shortdesc" : "The number of positive ping-probes required to account a target as available", + "default" : "8", + "order" : 1 + } + + all_opt["ping_interval"] = { + "getopt" : ":", + "longopt" : "ping-interval", + "required" : "0", + "help" : "--ping-interval=[seconds] Seconds between ping-probes", + "shortdesc" : "The interval in seconds between ping-probes", + "default" : "1", + "order" : 1 + } + + all_opt["ping_timeout"] = { + "getopt" : ":", + "longopt" : "ping-timeout", + "required" : "0", + "help" : "--ping-timeout=[seconds] Timeout for individual ping-probes", + "shortdesc" : "The timeout in seconds till an individual ping-probe is accounted as lost", + "default" : "2", + "order" : 1 + } + + all_opt["ping_maxfail"] = { + "getopt" : ":", + "longopt" : "ping-maxfail", + "required" : "0", + "help" : "--ping-maxfail=[number] Number of failed ping-targets allowed", + "shortdesc" : "The number of failed ping-targets to still account as overall success", + "default" : "0", + "order" : 1 + } + + all_opt["ping_targets"] = { + "getopt" : ":", + "longopt" : "ping-targets", + "required" : "1", + "help" : "--ping-targets=tgt1,[inet6:]tgt2 Comma separated list of ping-targets", + "shortdesc" : "A comma separated list of ping-targets (optionally prepended by 'inet:' or 'inet6:') to be probed", + "default" : "", + "order" : 1 + } + + +def main(): + device_opt = ["no_status", "no_password", "ping_count", "ping_good_count", + "ping_interval", "ping_timeout", "ping_maxfail", "ping_targets", "method"] + define_new_opts() + atexit.register(atexit_handler) + + all_opt["method"]["default"] = "cycle" + all_opt["method"]["help"] = "-m, --method=[method] Method to fence (cycle|onoff) (Default: cycle)" + + options = check_input(device_opt, process_input(device_opt)) + + docs = {} + docs["shortdesc"] = "Fence agent for ping-heuristic based fencing" + docs["longdesc"] = "fence_heuristics_ping uses ping-heuristics to control execution of another fence agent on the same fencing level.\ +\n.P\n\ +This is not a fence agent by itself! \ +Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristics_ping." + docs["vendorurl"] = "" + show_docs(options, docs) + + # move ping-test to the end of the time-window set via --delay + # as to give the network time to settle after the incident that has + # caused fencing and have the results as current as possible + max_pingcheck = (int(options["--ping-count"]) - 1) * \ + int(options["--ping-interval"]) + int(options["--ping-timeout"]) + run_delay(options, reserve=max_pingcheck) + + result = fence_action(\ + None, \ + options, \ + None, \ + None, \ + reboot_cycle_fn = ping_test, + sync_set_power_fn = ping_test) + + # execute the remaining delay + run_delay(options, result=result) + sys.exit(result) + +if __name__ == "__main__": + main() |