diff options
Diffstat (limited to 'agents/evacuate/fence_evacuate.py')
-rw-r--r-- | agents/evacuate/fence_evacuate.py | 428 |
1 files changed, 428 insertions, 0 deletions
diff --git a/agents/evacuate/fence_evacuate.py b/agents/evacuate/fence_evacuate.py new file mode 100644 index 0000000..53d6fd1 --- /dev/null +++ b/agents/evacuate/fence_evacuate.py @@ -0,0 +1,428 @@ +#!@PYTHON@ -tt + +import sys +import time +import atexit +import logging +import inspect +import requests.exceptions + +sys.path.append("@FENCEAGENTSLIBDIR@") +from fencing import * +from fencing import fail_usage, is_executable, run_command, run_delay + +EVACUABLE_TAG = "evacuable" +TRUE_TAGS = ['true'] + +def get_power_status(connection, options): + + status = "unknown" + logging.debug("get action: " + options["--action"]) + + if connection: + try: + services = connection.services.list(host=options["--plug"], binary="nova-compute") + for service in services: + logging.debug("Status of %s is %s, %s" % (service.binary, service.state, service.status)) + if service.state == "up" and service.status == "enabled": + # Up and operational + status = "on" + + elif service.state == "down" and service.status == "disabled": + # Down and fenced + status = "off" + + elif service.state == "down": + # Down and requires fencing + status = "failed" + + elif service.state == "up": + # Up and requires unfencing + status = "running" + else: + logging.warning("Unknown status detected from nova for %s: %s, %s" % (options["--plug"], service.state, service.status)) + status = "%s %s" % (service.state, service.status) + break + except requests.exception.ConnectionError as err: + logging.warning("Nova connection failed: " + str(err)) + return status + +# NOTE(sbauza); We mimic the host-evacuate module since it's only a contrib +# module which is not stable +def _server_evacuate(connection, server, on_shared_storage): + success = False + error_message = "" + try: + logging.debug("Resurrecting instance: %s" % server) + (response, dictionary) = connection.servers.evacuate(server=server, on_shared_storage=on_shared_storage) + + if response == None: + error_message = "No response while evacuating instance" + elif response.status_code == 200: + success = True + error_message = response.reason + else: + error_message = response.reason + + except Exception as e: + error_message = "Error while evacuating instance: %s" % e + + return { + "uuid": server, + "accepted": success, + "reason": error_message, + } + +def _is_server_evacuable(server, evac_flavors, evac_images): + reason = "flavor "+server.flavor.get('id') + if server.flavor.get('id') in evac_flavors: + return True + if hasattr(server.image, 'get'): + if server.image.get('id') in evac_images: + return True + reason = reason +" and image "+server.image.get('id') + + logging.debug("Instance is not evacuable: no match for %s" % reason) + return False + +def _get_evacuable_flavors(connection): + result = [] + flavors = connection.flavors.list(is_public=None) + # Since the detailed view for all flavors doesn't provide the extra specs, + # we need to call each of the flavor to get them. + for flavor in flavors: + tag = flavor.get_keys().get(EVACUABLE_TAG) + if tag and tag.strip().lower() in TRUE_TAGS: + result.append(flavor.id) + return result + +def _get_evacuable_images(connection): + result = [] + images = [] + if hasattr(connection, "images"): + images = connection.images.list(detailed=True) + elif hasattr(connection, "glance"): + # OSP12+ + images = connection.glance.list() + + for image in images: + if hasattr(image, 'metadata'): + tag = image.metadata.get(EVACUABLE_TAG) + if tag and tag.strip().lower() in TRUE_TAGS: + result.append(image.id) + elif hasattr(image, 'tags'): + # OSP12+ + if EVACUABLE_TAG in image.tags: + result.append(image.id) + return result + +def _host_evacuate(connection, options): + result = True + images = _get_evacuable_images(connection) + flavors = _get_evacuable_flavors(connection) + servers = connection.servers.list(search_opts={'host': options["--plug"], 'all_tenants': 1 }) + + if options["--instance-filtering"] == "False": + logging.debug("Not evacuating anything") + evacuables = [] + elif len(flavors) or len(images): + logging.debug("Filtering images and flavors: %s %s" % (repr(flavors), repr(images))) + # Identify all evacuable servers + logging.debug("Checking %s" % repr(servers)) + evacuables = [server for server in servers + if _is_server_evacuable(server, flavors, images)] + logging.debug("Evacuating %s" % repr(evacuables)) + else: + logging.debug("Evacuating all images and flavors") + evacuables = servers + + if options["--no-shared-storage"] != "False": + on_shared_storage = False + else: + on_shared_storage = True + + for server in evacuables: + logging.debug("Processing %s" % server) + if hasattr(server, 'id'): + response = _server_evacuate(connection, server.id, on_shared_storage) + if response["accepted"]: + logging.debug("Evacuated %s from %s: %s" % + (response["uuid"], options["--plug"], response["reason"])) + else: + logging.error("Evacuation of %s on %s failed: %s" % + (response["uuid"], options["--plug"], response["reason"])) + result = False + else: + logging.error("Could not evacuate instance: %s" % server.to_dict()) + # Should a malformed instance result in a failed evacuation? + # result = False + return result + +def set_attrd_status(host, status, options): + logging.debug("Setting fencing status for %s to %s" % (host, status)) + run_command(options, "attrd_updater -p -n evacuate -Q -N %s -U %s" % (host, status)) + +def set_power_status(connection, options): + logging.debug("set action: " + options["--action"]) + + if not connection: + return + + if options["--action"] == "off" and not _host_evacuate(options): + sys.exit(1) + + sys.exit(0) + +def get_plugs_list(connection, options): + result = {} + + if connection: + services = connection.services.list(binary="nova-compute") + for service in services: + longhost = service.host + shorthost = longhost.split('.')[0] + result[longhost] = ("", None) + result[shorthost] = ("", None) + return result + +def create_nova_connection(options): + nova = None + + try: + from novaclient import client + from novaclient.exceptions import NotAcceptable + except ImportError: + fail_usage("Nova not found or not accessible") + + from keystoneauth1 import loading + from keystoneauth1 import session + from keystoneclient import discover + + # Prefer the oldest and strip the leading 'v' + keystone_versions = discover.available_versions(options["--auth-url"]) + keystone_version = keystone_versions[0]['id'][1:] + kwargs = dict( + auth_url=options["--auth-url"], + username=options["--username"], + password=options["--password"] + ) + + if discover.version_match("2", keystone_version): + kwargs["tenant_name"] = options["--tenant-name"] + + elif discover.version_match("3", keystone_version): + kwargs["project_name"] = options["--tenant-name"] + kwargs["user_domain_name"] = options["--user-domain"] + kwargs["project_domain_name"] = options["--project-domain"] + + loader = loading.get_plugin_loader('password') + keystone_auth = loader.load_from_options(**kwargs) + keystone_session = session.Session(auth=keystone_auth, verify=not "--insecure" in options) + + versions = [ "2.11", "2" ] + for version in versions: + clientargs = inspect.getargspec(client.Client).varargs + + # Some versions of Openstack prior to Ocata only + # supported positional arguments for username, + # password, and tenant. + # + # Versions since Ocata only support named arguments. + # + # So we need to use introspection to figure out how to + # create a Nova client. + # + # Happy days + # + if clientargs: + # OSP < 11 + # ArgSpec(args=['version', 'username', 'password', 'project_id', 'auth_url'], + # varargs=None, + # keywords='kwargs', defaults=(None, None, None, None)) + nova = client.Client(version, + None, # User + None, # Password + None, # Tenant + None, # Auth URL + insecure="--insecure" in options, + region_name=options["--region-name"], + endpoint_type=options["--endpoint-type"], + session=keystone_session, auth=keystone_auth, + http_log_debug="--verbose" in options) + else: + # OSP >= 11 + # ArgSpec(args=['version'], varargs='args', keywords='kwargs', defaults=None) + nova = client.Client(version, + region_name=options["--region-name"], + endpoint_type=options["--endpoint-type"], + session=keystone_session, auth=keystone_auth, + http_log_debug="--verbose" in options) + + try: + nova.hypervisors.list() + return nova + + except NotAcceptable as e: + logging.warning(e) + + except Exception as e: + logging.warning("Nova connection failed. %s: %s" % (e.__class__.__name__, e)) + + logging.warning("Couldn't obtain a supported connection to nova, tried: %s\n" % repr(versions)) + return None + +def define_new_opts(): + all_opt["endpoint_type"] = { + "getopt" : "e:", + "longopt" : "endpoint-type", + "help" : "-e, --endpoint-type=[endpoint] Nova Endpoint type (publicURL, internalURL, adminURL)", + "required" : "0", + "shortdesc" : "Nova Endpoint type", + "default" : "internalURL", + "order": 1, + } + all_opt["tenant_name"] = { + "getopt" : "t:", + "longopt" : "tenant-name", + "help" : "-t, --tenant-name=[name] Keystone v2 Tenant or v3 Project Name", + "required" : "0", + "shortdesc" : "Keystone Admin Tenant or v3 Project", + "default" : "", + "order": 1, + } + all_opt["user-domain"] = { + "getopt" : "u:", + "longopt" : "user-domain", + "help" : "-u, --user-domain=[name] Keystone v3 User Domain", + "required" : "0", + "shortdesc" : "Keystone v3 User Domain", + "default" : "Default", + "order": 2, + } + all_opt["project-domain"] = { + "getopt" : "P:", + "longopt" : "project-domain", + "help" : "-P, --project-domain=[name] Keystone v3 Project Domain", + "required" : "0", + "shortdesc" : "Keystone v3 Project Domain", + "default" : "Default", + "order": 2, + } + all_opt["auth_url"] = { + "getopt" : "k:", + "longopt" : "auth-url", + "help" : "-k, --auth-url=[url] Keystone Admin Auth URL", + "required" : "0", + "shortdesc" : "Keystone Admin Auth URL", + "default" : "", + "order": 1, + } + all_opt["region_name"] = { + "getopt" : ":", + "longopt" : "region-name", + "help" : "--region-name=[region] Region Name", + "required" : "0", + "shortdesc" : "Region Name", + "default" : "", + "order": 1, + } + all_opt["insecure"] = { + "getopt" : "", + "longopt" : "insecure", + "help" : "--insecure Explicitly allow agent to perform \"insecure\" TLS (https) requests", + "required" : "0", + "shortdesc" : "Allow Insecure TLS Requests", + "order": 2, + } + all_opt["domain"] = { + "getopt" : "d:", + "longopt" : "domain", + "help" : "-d, --domain=[string] DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN", + "required" : "0", + "shortdesc" : "DNS domain in which hosts live", + "order": 5, + } + all_opt["instance_filtering"] = { + "getopt" : "", + "longopt" : "instance-filtering", + "help" : "--instance-filtering Allow instances created from images and flavors with evacuable=true to be evacuated (or all if no images/flavors have been tagged)", + "required" : "0", + "shortdesc" : "Allow instances to be evacuated", + "default" : "True", + "order": 5, + } + all_opt["no_shared_storage"] = { + "getopt" : "", + "longopt" : "no-shared-storage", + "help" : "--no-shared-storage Disable functionality for shared storage", + "required" : "0", + "shortdesc" : "Disable functionality for dealing with shared storage", + "default" : "False", + "order": 5, + } + all_opt["compute-domain"] = { + "getopt" : ":", + "longopt" : "compute-domain", + "help" : "--compute-domain=[string] Replaced by --domain", + "required" : "0", + "shortdesc" : "Replaced by domain", + "order": 6, + } + +def main(): + atexit.register(atexit_handler) + + device_opt = ["login", "passwd", "tenant_name", "auth_url", + "no_login", "no_password", "port", "domain", "compute-domain", + "project-domain", "user-domain", "no_shared_storage", + "endpoint_type", "instance_filtering", "insecure", "region_name"] + define_new_opts() + all_opt["shell_timeout"]["default"] = "180" + + options = check_input(device_opt, process_input(device_opt)) + + docs = {} + docs["shortdesc"] = "Fence agent for the automatic resurrection of OpenStack compute instances" + docs["longdesc"] = "Used to reschedule flagged instances" + docs["vendorurl"] = "" + + show_docs(options, docs) + + run_delay(options) + + # workaround to avoid regressions + if "--compute-domain" in options and options["--compute-domain"]: + options["--domain"] = options["--compute-domain"] + del options["--domain"] + + + # Disable insecure-certificate-warning message + if "--insecure" in options: + import urllib3 + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + connection = create_nova_connection(options) + + # Un-evacuating a server doesn't make sense + if options["--action"] in ["on"]: + logging.error("Action %s is not supported by this agent" % (options["--action"])) + sys.exit(1) + + if options["--action"] in ["off", "reboot"]: + status = get_power_status(connection, options) + if status != "off": + logging.error("Cannot resurrect instances from %s in state '%s'" % (options["--plug"], status)) + sys.exit(1) + + elif not _host_evacuate(connection, options): + logging.error("Resurrection of instances from %s failed" % (options["--plug"])) + sys.exit(1) + + logging.info("Resurrection of instances from %s complete" % (options["--plug"])) + sys.exit(0) + + result = fence_action(connection, options, set_power_status, get_power_status, get_plugs_list, None) + sys.exit(result) + +if __name__ == "__main__": + main() |