summaryrefslogtreecommitdiffstats
path: root/agents/scsi/fence_scsi.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--agents/scsi/fence_scsi.py598
1 files changed, 598 insertions, 0 deletions
diff --git a/agents/scsi/fence_scsi.py b/agents/scsi/fence_scsi.py
new file mode 100644
index 0000000..f9e6823
--- /dev/null
+++ b/agents/scsi/fence_scsi.py
@@ -0,0 +1,598 @@
+#!@PYTHON@ -tt
+
+import sys
+import stat
+import re
+import os
+import time
+import logging
+import atexit
+import hashlib
+import ctypes
+sys.path.append("@FENCEAGENTSLIBDIR@")
+from fencing import fail_usage, run_command, atexit_handler, check_input, process_input, show_docs, fence_action, all_opt
+from fencing import run_delay
+
+STORE_PATH = "@STORE_PATH@"
+
+
+def get_status(conn, options):
+ del conn
+ status = "off"
+ for dev in options["devices"]:
+ is_block_device(dev)
+ reset_dev(options, dev)
+ if options["--key"] in get_registration_keys(options, dev):
+ status = "on"
+ else:
+ logging.debug("No registration for key "\
+ + options["--key"] + " on device " + dev + "\n")
+ if options["--action"] == "on":
+ status = "off"
+ break
+ return status
+
+
+def set_status(conn, options):
+ del conn
+ count = 0
+ if options["--action"] == "on":
+ set_key(options)
+ for dev in options["devices"]:
+ is_block_device(dev)
+
+ register_dev(options, dev)
+ if options["--key"] not in get_registration_keys(options, dev):
+ count += 1
+ logging.debug("Failed to register key "\
+ + options["--key"] + "on device " + dev + "\n")
+ continue
+ dev_write(dev, options)
+
+ if get_reservation_key(options, dev) is None \
+ and not reserve_dev(options, dev) \
+ and get_reservation_key(options, dev) is None:
+ count += 1
+ logging.debug("Failed to create reservation (key="\
+ + options["--key"] + ", device=" + dev + ")\n")
+
+ else:
+ host_key = get_key()
+ if host_key == options["--key"].lower():
+ fail_usage("Failed: keys cannot be same. You can not fence yourself.")
+ for dev in options["devices"]:
+ is_block_device(dev)
+
+ if options["--key"] in get_registration_keys(options, dev):
+ preempt_abort(options, host_key, dev)
+
+ for dev in options["devices"]:
+ if options["--key"] in get_registration_keys(options, dev):
+ count += 1
+ logging.debug("Failed to remove key "\
+ + options["--key"] + " on device " + dev + "\n")
+ continue
+
+ if not get_reservation_key(options, dev):
+ count += 1
+ logging.debug("No reservation exists on device " + dev + "\n")
+ if count:
+ logging.error("Failed to verify " + str(count) + " device(s)")
+ sys.exit(1)
+
+
+# check if host is ready to execute actions
+def do_action_monitor(options):
+ # Check if required binaries are installed
+ if bool(run_cmd(options, options["--sg_persist-path"] + " -V")["rc"]):
+ logging.error("Unable to run " + options["--sg_persist-path"])
+ return 1
+ elif bool(run_cmd(options, options["--sg_turs-path"] + " -V")["rc"]):
+ logging.error("Unable to run " + options["--sg_turs-path"])
+ return 1
+ elif ("--devices" not in options and
+ bool(run_cmd(options, options["--vgs-path"] + " --version")["rc"])):
+ logging.error("Unable to run " + options["--vgs-path"])
+ return 1
+
+ # Keys have to be present in order to fence/unfence
+ get_key()
+ dev_read()
+
+ return 0
+
+
+# run command, returns dict, ret["rc"] = exit code; ret["out"] = output;
+# ret["err"] = error
+def run_cmd(options, cmd):
+ ret = {}
+ (ret["rc"], ret["out"], ret["err"]) = run_command(options, cmd)
+ ret["out"] = "".join([i for i in ret["out"] if i is not None])
+ ret["err"] = "".join([i for i in ret["err"] if i is not None])
+ return ret
+
+
+# check if device exist and is block device
+def is_block_device(dev):
+ if not os.path.exists(dev):
+ fail_usage("Failed: device \"" + dev + "\" does not exist")
+ if not stat.S_ISBLK(os.stat(dev).st_mode):
+ fail_usage("Failed: device \"" + dev + "\" is not a block device")
+
+
+# cancel registration
+def preempt_abort(options, host, dev):
+ reset_dev(options,dev)
+ cmd = options["--sg_persist-path"] + " -n -o -A -T 5 -K " + host + " -S " + options["--key"] + " -d " + dev
+ return not bool(run_cmd(options, cmd)["rc"])
+
+
+def reset_dev(options, dev):
+ return run_cmd(options, options["--sg_turs-path"] + " " + dev)["rc"]
+
+
+def register_dev(options, dev):
+ dev = os.path.realpath(dev)
+ if re.search(r"^dm", dev[5:]):
+ for slave in get_mpath_slaves(dev):
+ register_dev(options, slave)
+ return True
+ if get_reservation_key(options, dev, False) == options["--key"]:
+ return True
+ reset_dev(options, dev)
+ cmd = options["--sg_persist-path"] + " -n -o -I -S " + options["--key"] + " -d " + dev
+ cmd += " -Z" if "--aptpl" in options else ""
+ #cmd return code != 0 but registration can be successful
+ return not bool(run_cmd(options, cmd)["rc"])
+
+
+def reserve_dev(options, dev):
+ reset_dev(options,dev)
+ cmd = options["--sg_persist-path"] + " -n -o -R -T 5 -K " + options["--key"] + " -d " + dev
+ return not bool(run_cmd(options, cmd)["rc"])
+
+
+def get_reservation_key(options, dev, fail=True):
+ reset_dev(options,dev)
+ opts = ""
+ if "--readonly" in options:
+ opts = "-y "
+ cmd = options["--sg_persist-path"] + " -n -i " + opts + "-r -d " + dev
+ out = run_cmd(options, cmd)
+ if out["rc"] and fail:
+ fail_usage('Cannot get reservation key on device "' + dev
+ + '": ' + out["err"])
+ match = re.search(r"\s+key=0x(\S+)\s+", out["out"], re.IGNORECASE)
+ return match.group(1) if match else None
+
+
+def get_registration_keys(options, dev, fail=True):
+ reset_dev(options,dev)
+ keys = []
+ opts = ""
+ if "--readonly" in options:
+ opts = "-y "
+ cmd = options["--sg_persist-path"] + " -n -i " + opts + "-k -d " + dev
+ out = run_cmd(options, cmd)
+ if out["rc"]:
+ fail_usage('Cannot get registration keys on device "' + dev
+ + '": ' + out["err"], fail)
+ if not fail:
+ return []
+ for line in out["out"].split("\n"):
+ match = re.search(r"\s+0x(\S+)\s*", line)
+ if match:
+ keys.append(match.group(1))
+ return keys
+
+
+def get_cluster_id(options):
+ cmd = options["--corosync-cmap-path"] + " totem.cluster_name"
+
+ match = re.search(r"\(str\) = (\S+)\n", run_cmd(options, cmd)["out"])
+
+ if not match:
+ fail_usage("Failed: cannot get cluster name")
+
+ try:
+ return hashlib.md5(match.group(1).encode('ascii')).hexdigest()
+ except ValueError:
+ # FIPS requires usedforsecurity=False and might not be
+ # available on all distros: https://bugs.python.org/issue9216
+ return hashlib.md5(match.group(1).encode('ascii'), usedforsecurity=False).hexdigest()
+
+
+def get_node_id(options):
+ cmd = options["--corosync-cmap-path"] + " nodelist"
+ out = run_cmd(options, cmd)["out"]
+
+ match = re.search(r".(\d+).name \(str\) = " + options["--plug"] + "\n", out)
+
+ # try old format before failing
+ if not match:
+ match = re.search(r".(\d+).ring._addr \(str\) = " + options["--plug"] + "\n", out)
+
+ return match.group(1) if match else fail_usage("Failed: unable to parse output of corosync-cmapctl or node does not exist")
+
+def get_node_hash(options):
+ try:
+ return hashlib.md5(options["--plug"].encode('ascii')).hexdigest()
+ except ValueError:
+ # FIPS requires usedforsecurity=False and might not be
+ # available on all distros: https://bugs.python.org/issue9216
+ return hashlib.md5(options["--plug"].encode('ascii'), usedforsecurity=False).hexdigest()
+
+
+def generate_key(options):
+ if options["--key-value"] == "hash":
+ return "%.4s%.4s" % (get_cluster_id(options), get_node_hash(options))
+ else:
+ return "%.4s%.4d" % (get_cluster_id(options), int(get_node_id(options)))
+
+
+# save node key to file
+def set_key(options):
+ file_path = options["store_path"] + ".key"
+ if not os.path.isdir(os.path.dirname(options["store_path"])):
+ os.makedirs(os.path.dirname(options["store_path"]))
+ try:
+ f = open(file_path, "w")
+ except IOError:
+ fail_usage("Failed: Cannot open file \""+ file_path + "\"")
+ f.write(options["--key"].lower() + "\n")
+ f.close()
+
+
+# read node key from file
+def get_key(fail=True):
+ file_path = STORE_PATH + ".key"
+ try:
+ f = open(file_path, "r")
+ except IOError:
+ fail_usage("Failed: Cannot open file \""+ file_path + "\"", fail)
+ if not fail:
+ return None
+ return f.readline().strip().lower()
+
+
+def dev_write(dev, options):
+ file_path = options["store_path"] + ".dev"
+ if not os.path.isdir(os.path.dirname(options["store_path"])):
+ os.makedirs(os.path.dirname(options["store_path"]))
+ try:
+ f = open(file_path, "a+")
+ except IOError:
+ fail_usage("Failed: Cannot open file \""+ file_path + "\"")
+ f.seek(0)
+ out = f.read()
+ if not re.search(r"^" + dev + "\s+", out, flags=re.MULTILINE):
+ f.write(dev + "\n")
+ f.close()
+
+
+def dev_read(fail=True, opt=None):
+ file_path = STORE_PATH + ".dev"
+ try:
+ f = open(file_path, "r")
+ except IOError:
+ if "--suppress-errors" not in opt:
+ fail_usage("Failed: Cannot open file \"" + file_path + "\"", fail)
+ if not fail:
+ return None
+ # get not empty lines from file
+ devs = [line.strip() for line in f if line.strip()]
+ f.close()
+ return devs
+
+
+def get_clvm_devices(options):
+ devs = []
+ cmd = options["--vgs-path"] + " " +\
+ "--noheadings " +\
+ "--separator : " +\
+ "--sort pv_uuid " +\
+ "--options vg_attr,pv_name "+\
+ "--config 'global { locking_type = 0 } devices { preferred_names = [ \"^/dev/dm\" ] }'"
+ out = run_cmd(options, cmd)
+ if out["rc"]:
+ fail_usage("Failed: Cannot get clvm devices")
+ for line in out["out"].split("\n"):
+ if 'c' in line.split(":")[0]:
+ devs.append(line.split(":")[1])
+ return devs
+
+
+def get_mpath_slaves(dev):
+ if dev[:5] == "/dev/":
+ dev = dev[5:]
+ slaves = [i for i in os.listdir("/sys/block/" + dev + "/slaves/") if i[:1] != "."]
+ if slaves[0][:2] == "dm":
+ slaves = get_mpath_slaves(slaves[0])
+ else:
+ slaves = ["/dev/" + x for x in slaves]
+ return slaves
+
+
+def define_new_opts():
+ all_opt["devices"] = {
+ "getopt" : "d:",
+ "longopt" : "devices",
+ "help" : "-d, --devices=[devices] List of devices to use for current operation",
+ "required" : "0",
+ "shortdesc" : "List of devices to use for current operation. Devices can \
+be comma-separated list of raw devices (eg. /dev/sdc). Each device must support SCSI-3 \
+persistent reservations.",
+ "order": 1
+ }
+ all_opt["nodename"] = {
+ "getopt" : ":",
+ "longopt" : "nodename",
+ "help" : "",
+ "required" : "0",
+ "shortdesc" : "",
+ "order": 1
+ }
+ all_opt["key"] = {
+ "getopt" : "k:",
+ "longopt" : "key",
+ "help" : "-k, --key=[key] Key to use for the current operation",
+ "required" : "0",
+ "shortdesc" : "Key to use for the current operation. This key should be \
+unique to a node. For the \"on\" action, the key specifies the key use to \
+register the local node. For the \"off\" action, this key specifies the key to \
+be removed from the device(s).",
+ "order": 1
+ }
+ all_opt["aptpl"] = {
+ "getopt" : "a",
+ "longopt" : "aptpl",
+ "help" : "-a, --aptpl Use the APTPL flag for registrations",
+ "required" : "0",
+ "shortdesc" : "Use the APTPL flag for registrations. This option is only used for the 'on' action.",
+ "order": 1
+ }
+ all_opt["readonly"] = {
+ "getopt" : "",
+ "longopt" : "readonly",
+ "help" : "--readonly Open DEVICE read-only. May be useful with PRIN commands if there are unwanted side effects with the default read-write open.",
+ "required" : "0",
+ "shortdesc" : "Open DEVICE read-only.",
+ "order": 4
+ }
+ all_opt["suppress-errors"] = {
+ "getopt" : "",
+ "longopt" : "suppress-errors",
+ "help" : "--suppress-errors Suppress error log. Suppresses error logging when run from the watchdog service before pacemaker starts.",
+ "required" : "0",
+ "shortdesc" : "Error log suppression.",
+ "order": 5
+ }
+ all_opt["logfile"] = {
+ "getopt" : ":",
+ "longopt" : "logfile",
+ "help" : "-f, --logfile Log output (stdout and stderr) to file",
+ "required" : "0",
+ "shortdesc" : "Log output (stdout and stderr) to file",
+ "order": 6
+ }
+ all_opt["corosync_cmap_path"] = {
+ "getopt" : ":",
+ "longopt" : "corosync-cmap-path",
+ "help" : "--corosync-cmap-path=[path] Path to corosync-cmapctl binary",
+ "required" : "0",
+ "shortdesc" : "Path to corosync-cmapctl binary",
+ "default" : "@COROSYNC_CMAPCTL_PATH@",
+ "order": 300
+ }
+ all_opt["sg_persist_path"] = {
+ "getopt" : ":",
+ "longopt" : "sg_persist-path",
+ "help" : "--sg_persist-path=[path] Path to sg_persist binary",
+ "required" : "0",
+ "shortdesc" : "Path to sg_persist binary",
+ "default" : "@SG_PERSIST_PATH@",
+ "order": 300
+ }
+ all_opt["sg_turs_path"] = {
+ "getopt" : ":",
+ "longopt" : "sg_turs-path",
+ "help" : "--sg_turs-path=[path] Path to sg_turs binary",
+ "required" : "0",
+ "shortdesc" : "Path to sg_turs binary",
+ "default" : "@SG_TURS_PATH@",
+ "order": 300
+ }
+ all_opt["vgs_path"] = {
+ "getopt" : ":",
+ "longopt" : "vgs-path",
+ "help" : "--vgs-path=[path] Path to vgs binary",
+ "required" : "0",
+ "shortdesc" : "Path to vgs binary",
+ "default" : "@VGS_PATH@",
+ "order": 300
+ }
+ all_opt["key_value"] = {
+ "getopt" : ":",
+ "longopt" : "key-value",
+ "help" : "--key-value=<id|hash> SCSI key node generation method",
+ "required" : "0",
+ "shortdesc" : "Method used to generate the SCSI key. \"id\" (default) \
+uses the positional ID from \"corosync-cmactl nodelist\" output which can get inconsistent \
+when nodes are removed from cluster without full cluster restart. \"hash\" uses part of hash \
+made out of node names which is not affected over time but there is theoretical chance that \
+hashes can collide as size of SCSI key is quite limited.",
+ "default" : "id",
+ "order": 300
+ }
+
+
+def scsi_check_get_options(options):
+ try:
+ f = open("/etc/sysconfig/stonith", "r")
+ except IOError:
+ return options
+
+ match = re.findall(r"^\s*(\S*)\s*=\s*(\S*)\s*", "".join(f.readlines()), re.MULTILINE)
+
+ for m in match:
+ options[m[0].lower()] = m[1].lower()
+
+ f.close()
+
+ return options
+
+
+def scsi_check(hardreboot=False):
+ if len(sys.argv) >= 3 and sys.argv[1] == "repair":
+ return int(sys.argv[2])
+ options = {}
+ options["--sg_turs-path"] = "@SG_TURS_PATH@"
+ options["--sg_persist-path"] = "@SG_PERSIST_PATH@"
+ options["--power-timeout"] = "5"
+ options["retry"] = "0"
+ options["retry-sleep"] = "1"
+ options = scsi_check_get_options(options)
+ if "verbose" in options and options["verbose"] == "yes":
+ logging.getLogger().setLevel(logging.DEBUG)
+ devs = dev_read(fail=False,opt=options)
+ if not devs:
+ if "--suppress-errors" not in options:
+ logging.error("No devices found")
+ return 0
+ key = get_key(fail=False)
+ if not key:
+ logging.error("Key not found")
+ return 0
+ for dev in devs:
+ for n in range(int(options["retry"]) + 1):
+ if n > 0:
+ logging.debug("retry: " + str(n) + " of " + options["retry"])
+ if key in get_registration_keys(options, dev, fail=False):
+ logging.debug("key " + key + " registered with device " + dev)
+ return 0
+ else:
+ logging.debug("key " + key + " not registered with device " + dev)
+
+ if n < int(options["retry"]):
+ time.sleep(float(options["retry-sleep"]))
+
+ logging.debug("key " + key + " registered with any devices")
+
+ if hardreboot == True:
+ libc = ctypes.cdll['libc.so.6']
+ libc.reboot(0x1234567)
+ return 2
+
+
+def main():
+
+ atexit.register(atexit_handler)
+
+ device_opt = ["no_login", "no_password", "devices", "nodename", "port",\
+ "no_port", "key", "aptpl", "fabric_fencing", "on_target", "corosync_cmap_path",\
+ "sg_persist_path", "sg_turs_path", "readonly", "suppress-errors", "logfile", "vgs_path",\
+ "force_on", "key_value"]
+
+ define_new_opts()
+
+ all_opt["delay"]["getopt"] = "H:"
+
+ all_opt["port"]["help"] = "-n, --plug=[nodename] Name of the node to be fenced"
+ all_opt["port"]["shortdesc"] = "Name of the node to be fenced. The node name is used to \
+generate the key value used for the current operation. This option will be \
+ignored when used with the -k option."
+
+ #fence_scsi_check
+ if os.path.basename(sys.argv[0]) == "fence_scsi_check":
+ sys.exit(scsi_check())
+ elif os.path.basename(sys.argv[0]) == "fence_scsi_check_hardreboot":
+ sys.exit(scsi_check(True))
+
+ options = check_input(device_opt, process_input(device_opt), other_conditions=True)
+
+ # hack to remove list/list-status actions which are not supported
+ options["device_opt"] = [ o for o in options["device_opt"] if o != "separator" ]
+
+ docs = {}
+ docs["shortdesc"] = "Fence agent for SCSI persistent reservation"
+ docs["longdesc"] = "fence_scsi is an I/O fencing agent that uses SCSI-3 \
+persistent reservations to control access to shared storage devices. These \
+devices must support SCSI-3 persistent reservations (SPC-3 or greater) as \
+well as the \"preempt-and-abort\" subcommand.\nThe fence_scsi agent works by \
+having each node in the cluster register a unique key with the SCSI \
+device(s). Reservation key is generated from \"node id\" (default) or from \
+\"node name hash\" (RECOMMENDED) by adjusting \"key_value\" option. \
+Using hash is recommended to prevent issues when removing nodes \
+from cluster without full cluster restart. \
+Once registered, a single node will become the reservation holder \
+by creating a \"write exclusive, registrants only\" reservation on the \
+device(s). The result is that only registered nodes may write to the \
+device(s). When a node failure occurs, the fence_scsi agent will remove the \
+key belonging to the failed node from the device(s). The failed node will no \
+longer be able to write to the device(s). A manual reboot is required.\
+\n.P\n\
+When used as a watchdog device you can define e.g. retry=1, retry-sleep=2 and \
+verbose=yes parameters in /etc/sysconfig/stonith if you have issues with it \
+failing."
+ docs["vendorurl"] = ""
+ show_docs(options, docs)
+
+ run_delay(options)
+
+ # backward compatibility layer BEGIN
+ if "--logfile" in options:
+ try:
+ logfile = open(options["--logfile"], 'w')
+ sys.stderr = logfile
+ sys.stdout = logfile
+ except IOError:
+ fail_usage("Failed: Unable to create file " + options["--logfile"])
+ # backward compatibility layer END
+
+ options["store_path"] = STORE_PATH
+
+ # Input control BEGIN
+ stop_after_error = False if options["--action"] == "validate-all" else True
+
+ if options["--action"] == "monitor":
+ sys.exit(do_action_monitor(options))
+
+ # workaround to avoid regressions
+ if "--nodename" in options and options["--nodename"]:
+ options["--plug"] = options["--nodename"]
+ del options["--nodename"]
+
+ if not (("--plug" in options and options["--plug"])\
+ or ("--key" in options and options["--key"])):
+ fail_usage("Failed: nodename or key is required", stop_after_error)
+
+ if options["--action"] != "validate-all":
+ if not ("--key" in options and options["--key"]):
+ options["--key"] = generate_key(options)
+
+ if options["--key"] == "0" or not options["--key"]:
+ fail_usage("Failed: key cannot be 0", stop_after_error)
+
+ if "--key-value" in options\
+ and (options["--key-value"] != "id" and options["--key-value"] != "hash"):
+ fail_usage("Failed: key-value has to be 'id' or 'hash'", stop_after_error)
+
+ if options["--action"] == "validate-all":
+ sys.exit(0)
+
+ options["--key"] = options["--key"].lstrip('0')
+
+ if not ("--devices" in options and options["--devices"].split(",")):
+ options["devices"] = get_clvm_devices(options)
+ else:
+ options["devices"] = options["--devices"].split(",")
+
+ if not options["devices"]:
+ fail_usage("Failed: No devices found")
+ # Input control END
+
+ result = fence_action(None, options, set_status, get_status)
+ sys.exit(result)
+
+if __name__ == "__main__":
+ main()