diff options
Diffstat (limited to 'agents/sbd/fence_sbd.py')
-rw-r--r-- | agents/sbd/fence_sbd.py | 435 |
1 files changed, 435 insertions, 0 deletions
diff --git a/agents/sbd/fence_sbd.py b/agents/sbd/fence_sbd.py new file mode 100644 index 0000000..2b0127d --- /dev/null +++ b/agents/sbd/fence_sbd.py @@ -0,0 +1,435 @@ +#!@PYTHON@ -tt + +import sys, stat +import logging +import os +import atexit +sys.path.append("@FENCEAGENTSLIBDIR@") +from fencing import fail_usage, run_commands, fence_action, all_opt +from fencing import atexit_handler, check_input, process_input, show_docs +from fencing import run_delay +import itertools + +DEVICE_INIT = 1 +DEVICE_NOT_INIT = -3 +PATH_NOT_EXISTS = -1 +PATH_NOT_BLOCK = -2 + +def is_block_device(filename): + """Checks if a given path is a valid block device + + Key arguments: + filename -- the file to check + + Return codes: + True if it's a valid block device + False, otherwise + """ + + try: + mode = os.lstat(filename).st_mode + except OSError: + return False + else: + return stat.S_ISBLK(mode) + +def is_link(filename): + """Checks if a given path is a link. + + Key arguments: + filename -- the file to check + + Return codes: + True if it's a link + False, otherwise + """ + + try: + mode = os.lstat(filename).st_mode + except OSError: + return False + else: + return stat.S_ISLNK(mode) + +def check_sbd_device(options, device_path): + """checks that a given sbd device exists and is initialized + + Key arguments: + options -- options dictionary + device_path -- device path to check + + Return Codes: + 1 / DEVICE_INIT if the device exists and is initialized + -1 / PATH_NOT_EXISTS if the path does not exists + -2 / PATH_NOT_BLOCK if the path exists but is not a valid block device + -3 / DEVICE_NOT_INIT if the sbd device is not initialized + """ + + # First of all we need to check if the device is valid + if not os.path.exists(device_path): + return PATH_NOT_EXISTS + + # We need to check if device path is a symbolic link. If so we resolve that + # link. + if is_link(device_path): + link_target = os.readlink(device_path) + device_path = os.path.join(os.path.dirname(device_path), link_target) + + # As second step we make sure it's a valid block device + if not is_block_device(device_path): + return PATH_NOT_BLOCK + + cmd = "%s -d %s dump" % (options["--sbd-path"], device_path) + + (return_code, out, err) = run_commands(options, [ cmd ]) + + for line in itertools.chain(out.split("\n"), err.split("\n")): + if len(line) == 0: + continue + + # If we read "NOT dumped" something went wrong, e.g. the device is not + # initialized. + if "NOT dumped" in line: + return DEVICE_NOT_INIT + + return DEVICE_INIT + + +def generate_sbd_command(options, command, arguments=None): + """Generates a sbd command based on given arguments. + + Return Value: + generated list of sbd commands (strings) depending + on command multiple commands with a device each + or a single command with multiple devices + """ + cmds = [] + + if not command in ["list", "dump"]: + cmd = options["--sbd-path"] + + # add "-d" for each sbd device + for device in parse_sbd_devices(options): + cmd += " -d %s" % device + + cmd += " %s %s" % (command, arguments) + cmds.append(cmd) + + else: + for device in parse_sbd_devices(options): + cmd = options["--sbd-path"] + cmd += " -d %s" % device + cmd += " %s %s" % (command, arguments) + cmds.append(cmd) + + return cmds + +def send_sbd_message(conn, options, plug, message): + """Sends a message to all sbd devices. + + Key arguments: + conn -- connection structure + options -- options dictionary + plug -- plug to sent the message to + message -- message to send + + Return Value: + (return_code, out, err) Tuple containing the error code, + """ + + del conn + + arguments = "%s %s" % (plug, message) + cmd = generate_sbd_command(options, "message", arguments) + + (return_code, out, err) = run_commands(options, cmd) + + return (return_code, out, err) + +def get_msg_timeout(options): + """Reads the configured sbd message timeout from each device. + + Key arguments: + options -- options dictionary + + Return Value: + msg_timeout (integer, seconds) + """ + + # get the defined msg_timeout + msg_timeout = -1 # default sbd msg timeout + + cmd = generate_sbd_command(options, "dump") + + (return_code, out, err) = run_commands(options, cmd) + + for line in itertools.chain(out.split("\n"), err.split("\n")): + if len(line) == 0: + continue + + if "msgwait" in line: + tmp_msg_timeout = int(line.split(':')[1]) + if -1 != msg_timeout and tmp_msg_timeout != msg_timeout: + logging.warn(\ + "sbd message timeouts differ in different devices") + # we only save the highest timeout + if tmp_msg_timeout > msg_timeout: + msg_timeout = tmp_msg_timeout + + return msg_timeout + +def set_power_status(conn, options): + """send status to sbd device (poison pill) + + Key arguments: + conn -- connection structure + options -- options dictionary + + Return Value: + return_code -- action result (bool) + """ + + target_status = options["--action"] + plug = options["--plug"] + return_code = 99 + out = "" + err = "" + + # Map fencing actions to sbd messages + if "on" == target_status: + (return_code, out, err) = send_sbd_message(conn, options, plug, "clear") + elif "off" == target_status: + (return_code, out, err) = send_sbd_message(conn, options, plug, "off") + elif "reboot" == target_status: + (return_code, out, err) = send_sbd_message(conn, options, plug, "reset") + + if 0 != return_code: + logging.error("sending message to sbd device(s) \ + failed with return code %d", return_code) + logging.error("DETAIL: output on stdout was \"%s\"", out) + logging.error("DETAIL: output on stderr was \"%s\"", err) + + return not bool(return_code) + +def reboot_cycle(conn, options): + """" trigger reboot by sbd messages + + Key arguments: + conn -- connection structure + options -- options dictionary + + Return Value: + return_code -- action result (bool) + """ + + plug = options["--plug"] + return_code = 99 + out = "" + err = "" + + (return_code, out, err) = send_sbd_message(conn, options, plug, "reset") + return not bool(return_code) + +def get_power_status(conn, options): + """Returns the status of a specific node. + + Key arguments: + conn -- connection structure + options -- option dictionary + + Return Value: + status -- status code (string) + """ + + status = "UNKWNOWN" + plug = options["--plug"] + + nodelist = get_node_list(conn, options) + + # We need to check if the specified plug / node a already a allocated slot + # on the device. + if plug not in nodelist: + logging.error("node \"%s\" not found in node list", plug) + else: + status = nodelist[plug][1] + + + return status + +def translate_status(sbd_status): + """Translates the sbd status to fencing status. + + Key arguments: + sbd_status -- status to translate (string) + + Return Value: + status -- fencing status (string) + """ + + status = "UNKNOWN" + + + # Currently we only accept "clear" to be marked as online. Eventually we + # should also check against "test" + online_status = ["clear"] + + offline_status = ["reset", "off"] + + if any(online_status_element in sbd_status \ + for online_status_element in online_status): + status = "on" + + if any(offline_status_element in sbd_status \ + for offline_status_element in offline_status): + status = "off" + + return status + +def get_node_list(conn, options): + """Returns a list of hostnames, registerd on the sbd device. + + Key arguments: + conn -- connection options + options -- options + + Return Value: + nodelist -- dictionary wich contains all node names and there status + """ + + del conn + + nodelist = {} + + cmd = generate_sbd_command(options, "list") + + (return_code, out, err) = run_commands(options, cmd) + + for line in out.split("\n"): + if len(line) == 0: + continue + + # if we read "unreadable" something went wrong + if "NOT dumped" in line: + return nodelist + + words = line.split() + port = words[1] + sbd_status = words[2] + nodelist[port] = (port, translate_status(sbd_status)) + + return nodelist + +def parse_sbd_devices(options): + """Returns an array of all sbd devices. + + Key arguments: + options -- options dictionary + + Return Value: + devices -- array of device paths + """ + + devices = [str.strip(dev) \ + for dev in str.split(options["--devices"], ",")] + + return devices + +def define_new_opts(): + """Defines the all opt list + """ + all_opt["devices"] = { + "getopt" : ":", + "longopt" : "devices", + "help":"--devices=[device_a,device_b] \ +Comma separated list of sbd devices", + "required" : "1", + "shortdesc" : "SBD Device", + "order": 1 + } + + all_opt["sbd_path"] = { + "getopt" : ":", + "longopt" : "sbd-path", + "help" : "--sbd-path=[path] Path to SBD binary", + "required" : "0", + "default" : "@SBD_PATH@", + "order": 200 + } + +def main(): + """Main function + """ + # We need to define "no_password" otherwise we will be ask about it if + # we don't provide any password. + device_opt = ["no_password", "devices", "port", "method", "sbd_path"] + + # close stdout if we get interrupted + atexit.register(atexit_handler) + + define_new_opts() + + all_opt["method"]["default"] = "cycle" + all_opt["method"]["help"] = "-m, --method=[method] Method to fence (onoff|cycle) (Default: cycle)" + all_opt["power_timeout"]["default"] = "30" + + options = check_input(device_opt, process_input(device_opt)) + + # fill the needed variables to generate metadata and help text output + docs = {} + docs["shortdesc"] = "Fence agent for sbd" + docs["longdesc"] = "fence_sbd is I/O Fencing agent \ +which can be used in environments where sbd can be used (shared storage)." + docs["vendorurl"] = "" + show_docs(options, docs) + + # We need to check if --devices is given and not empty. + if "--devices" not in options: + fail_usage("No SBD devices specified. \ + At least one SBD device is required.") + + run_delay(options) + + # We need to check if the provided sbd_devices exists. We need to do + # that for every given device. + # Just for the case we are really rebooting / powering off a device + # (pacemaker as well uses the list command to generate a dynamic list) + # we leave it to sbd to try and decide if it was successful + if not options["--action"] in ["reboot", "off", "list"]: + for device_path in parse_sbd_devices(options): + logging.debug("check device \"%s\"", device_path) + + return_code = check_sbd_device(options, device_path) + if PATH_NOT_EXISTS == return_code: + logging.error("\"%s\" does not exist", device_path) + elif PATH_NOT_BLOCK == return_code: + logging.error("\"%s\" is not a valid block device", device_path) + elif DEVICE_NOT_INIT == return_code: + logging.error("\"%s\" is not initialized", device_path) + elif DEVICE_INIT != return_code: + logging.error("UNKNOWN error while checking \"%s\"", device_path) + + # If we get any error while checking the device we need to exit at this + # point. + if DEVICE_INIT != return_code: + exit(return_code) + + # we check against the defined timeouts. If the pacemaker timeout is smaller + # then that defined within sbd we should report this. + power_timeout = int(options["--power-timeout"]) + sbd_msg_timeout = get_msg_timeout(options) + if 0 < power_timeout <= sbd_msg_timeout: + logging.warn("power timeout needs to be \ + greater then sbd message timeout") + + result = fence_action(\ + None, \ + options, \ + set_power_status, \ + get_power_status, \ + get_node_list, \ + reboot_cycle) + + sys.exit(result) + +if __name__ == "__main__": + main() |