summaryrefslogtreecommitdiffstats
path: root/agents/heuristics_ping/fence_heuristics_ping.py
diff options
context:
space:
mode:
Diffstat (limited to 'agents/heuristics_ping/fence_heuristics_ping.py')
-rw-r--r--agents/heuristics_ping/fence_heuristics_ping.py198
1 files changed, 198 insertions, 0 deletions
diff --git a/agents/heuristics_ping/fence_heuristics_ping.py b/agents/heuristics_ping/fence_heuristics_ping.py
new file mode 100644
index 0000000..5b2c61d
--- /dev/null
+++ b/agents/heuristics_ping/fence_heuristics_ping.py
@@ -0,0 +1,198 @@
+#!@PYTHON@ -tt
+
+# The Following Agent Has Been Tested On:
+#
+# RHEL 7.4
+#
+
+import io
+import re
+import subprocess
+import shlex
+import sys, stat
+import logging
+import os
+import atexit
+sys.path.append("@FENCEAGENTSLIBDIR@")
+from fencing import fail_usage, run_command, fence_action, all_opt
+from fencing import atexit_handler, check_input, process_input, show_docs
+from fencing import run_delay
+
+def ping_test(con, options):
+ # Send pings to the targets
+
+ if options["--action"] == "on":
+ # we want unfencing to always succeed
+ return True
+
+ if not "--ping-targets" in options or options["--ping-targets"] == "":
+ # "off" was requested so fake "on" to provoke failure
+ logging.error("ping target required")
+ return False
+
+ timeout = int(options["--ping-timeout"])
+ count = int(options["--ping-count"])
+ interval = int(options["--ping-interval"])
+ good_required = int(options["--ping-good-count"])
+ maxfail = int(options["--ping-maxfail"])
+ targets = options["--ping-targets"].split(",")
+ exitcode = True
+ p = {}
+ failcount = 0
+ # search string for parsing the results of the ping-executable
+ packet_count = re.compile(r".*transmitted, ([0-9]*)( packets)? received.*")
+
+ # start a ping-process per target
+ for target in targets:
+ ping_path = '@PING_CMD@'
+ target_mangled = target
+ if target.startswith('inet6:'):
+ if '@PING6_CMD@' == '':
+ p[target] = None
+ continue
+ ping_path = '@PING6_CMD@'
+ target_mangled = target.split(':',2)[1]
+ elif target.startswith('inet:'):
+ ping_path = '@PING4_CMD@'
+ target_mangled = target.split(':',2)[1]
+
+ ping_cmd = "%s -n -q -W %d -c %d -i %d %s" % (
+ ping_path, timeout, count, interval, target_mangled)
+ logging.info("Running command: %s", ping_cmd)
+ try:
+ p[target] = subprocess.Popen(shlex.split(ping_cmd),
+ stdout=subprocess.PIPE);
+ except OSError:
+ p[target] = None
+
+ # collect the results of the ping-processes
+ for target in targets:
+ good = 0
+ if p[target] != None:
+ p[target].wait()
+ if p[target].returncode == 0:
+ for line in p[target].stdout:
+ searchres = packet_count.search(line.decode())
+ if searchres:
+ good = int(searchres.group(1))
+ break
+ if good >= good_required:
+ logging.info("ping target %s received %d of %d" \
+ % (target, good, count))
+ continue
+ failcount += 1
+ logging.info("ping target %s received %d of %d and thus failed"
+ % (target, good, count))
+ else:
+ failcount += 1
+ logging.error("ping target %s failed on OS level" % target)
+
+ if failcount > maxfail:
+ exitcode = False
+
+ return exitcode
+
+
+def define_new_opts():
+ all_opt["ping_count"] = {
+ "getopt" : ":",
+ "longopt" : "ping-count",
+ "required" : "0",
+ "help" : "--ping-count=[number] Number of ping-probes to send",
+ "shortdesc" : "The number of ping-probes that is being sent per target",
+ "default" : "10",
+ "order" : 1
+ }
+
+ all_opt["ping_good_count"] = {
+ "getopt" : ":",
+ "longopt" : "ping-good-count",
+ "required" : "0",
+ "help" : "--ping-good-count=[number] Number of positive ping-probes required",
+ "shortdesc" : "The number of positive ping-probes required to account a target as available",
+ "default" : "8",
+ "order" : 1
+ }
+
+ all_opt["ping_interval"] = {
+ "getopt" : ":",
+ "longopt" : "ping-interval",
+ "required" : "0",
+ "help" : "--ping-interval=[seconds] Seconds between ping-probes",
+ "shortdesc" : "The interval in seconds between ping-probes",
+ "default" : "1",
+ "order" : 1
+ }
+
+ all_opt["ping_timeout"] = {
+ "getopt" : ":",
+ "longopt" : "ping-timeout",
+ "required" : "0",
+ "help" : "--ping-timeout=[seconds] Timeout for individual ping-probes",
+ "shortdesc" : "The timeout in seconds till an individual ping-probe is accounted as lost",
+ "default" : "2",
+ "order" : 1
+ }
+
+ all_opt["ping_maxfail"] = {
+ "getopt" : ":",
+ "longopt" : "ping-maxfail",
+ "required" : "0",
+ "help" : "--ping-maxfail=[number] Number of failed ping-targets allowed",
+ "shortdesc" : "The number of failed ping-targets to still account as overall success",
+ "default" : "0",
+ "order" : 1
+ }
+
+ all_opt["ping_targets"] = {
+ "getopt" : ":",
+ "longopt" : "ping-targets",
+ "required" : "1",
+ "help" : "--ping-targets=tgt1,[inet6:]tgt2 Comma separated list of ping-targets",
+ "shortdesc" : "A comma separated list of ping-targets (optionally prepended by 'inet:' or 'inet6:') to be probed",
+ "default" : "",
+ "order" : 1
+ }
+
+
+def main():
+ device_opt = ["no_status", "no_password", "ping_count", "ping_good_count",
+ "ping_interval", "ping_timeout", "ping_maxfail", "ping_targets", "method"]
+ define_new_opts()
+ atexit.register(atexit_handler)
+
+ all_opt["method"]["default"] = "cycle"
+ all_opt["method"]["help"] = "-m, --method=[method] Method to fence (cycle|onoff) (Default: cycle)"
+
+ options = check_input(device_opt, process_input(device_opt))
+
+ docs = {}
+ docs["shortdesc"] = "Fence agent for ping-heuristic based fencing"
+ docs["longdesc"] = "fence_heuristics_ping uses ping-heuristics to control execution of another fence agent on the same fencing level.\
+\n.P\n\
+This is not a fence agent by itself! \
+Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristics_ping."
+ docs["vendorurl"] = ""
+ show_docs(options, docs)
+
+ # move ping-test to the end of the time-window set via --delay
+ # as to give the network time to settle after the incident that has
+ # caused fencing and have the results as current as possible
+ max_pingcheck = (int(options["--ping-count"]) - 1) * \
+ int(options["--ping-interval"]) + int(options["--ping-timeout"])
+ run_delay(options, reserve=max_pingcheck)
+
+ result = fence_action(\
+ None, \
+ options, \
+ None, \
+ None, \
+ reboot_cycle_fn = ping_test,
+ sync_set_power_fn = ping_test)
+
+ # execute the remaining delay
+ run_delay(options, result=result)
+ sys.exit(result)
+
+if __name__ == "__main__":
+ main()