diff options
Diffstat (limited to 'testing/mozharness/mozharness/base')
-rw-r--r-- | testing/mozharness/mozharness/base/__init__.py | 0 | ||||
-rw-r--r-- | testing/mozharness/mozharness/base/config.py | 693 | ||||
-rw-r--r-- | testing/mozharness/mozharness/base/diskutils.py | 170 | ||||
-rwxr-xr-x | testing/mozharness/mozharness/base/errors.py | 164 | ||||
-rwxr-xr-x | testing/mozharness/mozharness/base/log.py | 783 | ||||
-rwxr-xr-x | testing/mozharness/mozharness/base/parallel.py | 35 | ||||
-rw-r--r-- | testing/mozharness/mozharness/base/python.py | 1182 | ||||
-rw-r--r-- | testing/mozharness/mozharness/base/script.py | 2551 | ||||
-rwxr-xr-x | testing/mozharness/mozharness/base/transfer.py | 41 | ||||
-rw-r--r-- | testing/mozharness/mozharness/base/vcs/__init__.py | 0 | ||||
-rw-r--r-- | testing/mozharness/mozharness/base/vcs/gittool.py | 107 | ||||
-rwxr-xr-x | testing/mozharness/mozharness/base/vcs/mercurial.py | 478 | ||||
-rwxr-xr-x | testing/mozharness/mozharness/base/vcs/vcsbase.py | 149 |
13 files changed, 6353 insertions, 0 deletions
diff --git a/testing/mozharness/mozharness/base/__init__.py b/testing/mozharness/mozharness/base/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/testing/mozharness/mozharness/base/__init__.py diff --git a/testing/mozharness/mozharness/base/config.py b/testing/mozharness/mozharness/base/config.py new file mode 100644 index 0000000000..d12b3aecad --- /dev/null +++ b/testing/mozharness/mozharness/base/config.py @@ -0,0 +1,693 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Generic config parsing and dumping, the way I remember it from scripts +gone by. + +The config should be built from script-level defaults, overlaid by +config-file defaults, overlaid by command line options. + + (For buildbot-analogues that would be factory-level defaults, + builder-level defaults, and build request/scheduler settings.) + +The config should then be locked (set to read-only, to prevent runtime +alterations). Afterwards we should dump the config to a file that is +uploaded with the build, and can be used to debug or replicate the build +at a later time. + +TODO: + +* check_required_settings or something -- run at init, assert that + these settings are set. +""" + +import os +import socket +import sys +import time +from copy import deepcopy +from optparse import Option, OptionGroup, OptionParser + +from mozharness.base.log import CRITICAL, DEBUG, ERROR, FATAL, INFO, WARNING + +try: + from urllib2 import URLError, urlopen +except ImportError: + from urllib.error import URLError + from urllib.request import urlopen + + +try: + import simplejson as json +except ImportError: + import json + + +# optparse {{{1 +class ExtendedOptionParser(OptionParser): + """OptionParser, but with ExtendOption as the option_class.""" + + def __init__(self, **kwargs): + kwargs["option_class"] = ExtendOption + OptionParser.__init__(self, **kwargs) + + +class ExtendOption(Option): + """from http://docs.python.org/library/optparse.html?highlight=optparse#adding-new-actions""" + + ACTIONS = Option.ACTIONS + ("extend",) + STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",) + TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",) + ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",) + + def take_action(self, action, dest, opt, value, values, parser): + if action == "extend": + lvalue = value.split(",") + values.ensure_value(dest, []).extend(lvalue) + else: + Option.take_action(self, action, dest, opt, value, values, parser) + + +def make_immutable(item): + if isinstance(item, list) or isinstance(item, tuple): + result = LockedTuple(item) + elif isinstance(item, dict): + result = ReadOnlyDict(item) + result.lock() + else: + result = item + return result + + +class LockedTuple(tuple): + def __new__(cls, items): + return tuple.__new__(cls, (make_immutable(x) for x in items)) + + def __deepcopy__(self, memo): + return [deepcopy(elem, memo) for elem in self] + + +# ReadOnlyDict {{{1 +class ReadOnlyDict(dict): + def __init__(self, dictionary): + self._lock = False + self.update(dictionary.copy()) + + def _check_lock(self): + assert not self._lock, "ReadOnlyDict is locked!" + + def lock(self): + for (k, v) in list(self.items()): + self[k] = make_immutable(v) + self._lock = True + + def __setitem__(self, *args): + self._check_lock() + return dict.__setitem__(self, *args) + + def __delitem__(self, *args): + self._check_lock() + return dict.__delitem__(self, *args) + + def clear(self, *args): + self._check_lock() + return dict.clear(self, *args) + + def pop(self, *args): + self._check_lock() + return dict.pop(self, *args) + + def popitem(self, *args): + self._check_lock() + return dict.popitem(self, *args) + + def setdefault(self, *args): + self._check_lock() + return dict.setdefault(self, *args) + + def update(self, *args): + self._check_lock() + dict.update(self, *args) + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in list(self.__dict__.items()): + setattr(result, k, deepcopy(v, memo)) + result._lock = False + for k, v in list(self.items()): + result[k] = deepcopy(v, memo) + return result + + +DEFAULT_CONFIG_PATH = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(__file__))), + "configs", +) + + +# parse_config_file {{{1 +def parse_config_file( + file_name, quiet=False, search_path=None, config_dict_name="config" +): + """Read a config file and return a dictionary.""" + file_path = None + if os.path.exists(file_name): + file_path = file_name + else: + if not search_path: + search_path = [".", DEFAULT_CONFIG_PATH] + for path in search_path: + if os.path.exists(os.path.join(path, file_name)): + file_path = os.path.join(path, file_name) + break + else: + raise IOError("Can't find %s in %s!" % (file_name, search_path)) + if file_name.endswith(".py"): + global_dict = {} + local_dict = {} + exec( + compile(open(file_path, "rb").read(), file_path, "exec"), + global_dict, + local_dict, + ) + config = local_dict[config_dict_name] + elif file_name.endswith(".json"): + fh = open(file_path) + config = {} + json_config = json.load(fh) + config = dict(json_config) + fh.close() + else: + raise RuntimeError( + "Unknown config file type %s! (config files must end in .json or .py)" + % file_name + ) + # TODO return file_path + return config + + +def download_config_file(url, file_name): + n = 0 + attempts = 5 + sleeptime = 60 + max_sleeptime = 5 * 60 + while True: + if n >= attempts: + print( + "Failed to download from url %s after %d attempts, quiting..." + % (url, attempts) + ) + raise SystemError(-1) + try: + contents = urlopen(url, timeout=30).read() + break + except URLError as e: + print("Error downloading from url %s: %s" % (url, str(e))) + except socket.timeout as e: + print("Time out accessing %s: %s" % (url, str(e))) + except socket.error as e: + print("Socket error when accessing %s: %s" % (url, str(e))) + print("Sleeping %d seconds before retrying" % sleeptime) + time.sleep(sleeptime) + sleeptime = sleeptime * 2 + if sleeptime > max_sleeptime: + sleeptime = max_sleeptime + n += 1 + + try: + f = open(file_name, "w") + f.write(contents) + f.close() + except IOError as e: + print("Error writing downloaded contents to file %s: %s" % (file_name, str(e))) + raise SystemError(-1) + + +# BaseConfig {{{1 +class BaseConfig(object): + """Basic config setting/getting.""" + + def __init__( + self, + config=None, + initial_config_file=None, + config_options=None, + all_actions=None, + default_actions=None, + volatile_config=None, + option_args=None, + require_config_file=False, + append_env_variables_from_configs=False, + usage="usage: %prog [options]", + ): + self._config = {} + self.all_cfg_files_and_dicts = [] + self.actions = [] + self.config_lock = False + self.require_config_file = require_config_file + # It allows to append env variables from multiple config files + self.append_env_variables_from_configs = append_env_variables_from_configs + + if all_actions: + self.all_actions = all_actions[:] + else: + self.all_actions = ["clobber", "build"] + if default_actions: + self.default_actions = default_actions[:] + else: + self.default_actions = self.all_actions[:] + if volatile_config is None: + self.volatile_config = { + "actions": None, + "add_actions": None, + "no_actions": None, + } + else: + self.volatile_config = deepcopy(volatile_config) + + if config: + self.set_config(config) + if initial_config_file: + initial_config = parse_config_file(initial_config_file) + self.all_cfg_files_and_dicts.append((initial_config_file, initial_config)) + self.set_config(initial_config) + # Since initial_config_file is only set when running unit tests, + # if no option_args have been specified, then the parser will + # parse sys.argv which in this case would be the command line + # options specified to run the tests, e.g. nosetests -v. Clearly, + # the options passed to nosetests (such as -v) should not be + # interpreted by mozharness as mozharness options, so we specify + # a dummy command line with no options, so that the parser does + # not add anything from the test invocation command line + # arguments to the mozharness options. + if option_args is None: + option_args = [ + "dummy_mozharness_script_with_no_command_line_options.py" + ] + if config_options is None: + config_options = [] + self._create_config_parser(config_options, usage) + # we allow manually passing of option args for things like nosetests + self.parse_args(args=option_args) + + def get_read_only_config(self): + return ReadOnlyDict(self._config) + + def _create_config_parser(self, config_options, usage): + self.config_parser = ExtendedOptionParser(usage=usage) + self.config_parser.add_option( + "--work-dir", + action="store", + dest="work_dir", + type="string", + default="build", + help="Specify the work_dir (subdir of base_work_dir)", + ) + self.config_parser.add_option( + "--base-work-dir", + action="store", + dest="base_work_dir", + type="string", + default=os.getcwd(), + help="Specify the absolute path of the parent of the working directory", + ) + self.config_parser.add_option( + "--extra-config-path", + action="extend", + dest="config_paths", + type="string", + help="Specify additional paths to search for config files.", + ) + self.config_parser.add_option( + "-c", + "--config-file", + "--cfg", + action="extend", + dest="config_files", + default=[], + type="string", + help="Specify a config file; can be repeated", + ) + self.config_parser.add_option( + "-C", + "--opt-config-file", + "--opt-cfg", + action="extend", + dest="opt_config_files", + type="string", + default=[], + help="Specify an optional config file, like --config-file but with no " + "error if the file is missing; can be repeated", + ) + self.config_parser.add_option( + "--dump-config", + action="store_true", + dest="dump_config", + help="List and dump the config generated from this run to " "a JSON file.", + ) + self.config_parser.add_option( + "--dump-config-hierarchy", + action="store_true", + dest="dump_config_hierarchy", + help="Like --dump-config but will list and dump which config " + "files were used making up the config and specify their own " + "keys/values that were not overwritten by another cfg -- " + "held the highest hierarchy.", + ) + self.config_parser.add_option( + "--append-env-variables-from-configs", + action="store_true", + dest="append_env_variables_from_configs", + help="Merge environment variables from config files.", + ) + + # Logging + log_option_group = OptionGroup(self.config_parser, "Logging") + log_option_group.add_option( + "--log-level", + action="store", + type="choice", + dest="log_level", + default=INFO, + choices=[DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL], + help="Set log level (debug|info|warning|error|critical|fatal)", + ) + log_option_group.add_option( + "-q", + "--quiet", + action="store_false", + dest="log_to_console", + default=True, + help="Don't log to the console", + ) + log_option_group.add_option( + "--append-to-log", + action="store_true", + dest="append_to_log", + default=False, + help="Append to the log", + ) + log_option_group.add_option( + "--multi-log", + action="store_const", + const="multi", + dest="log_type", + help="Log using MultiFileLogger", + ) + log_option_group.add_option( + "--simple-log", + action="store_const", + const="simple", + dest="log_type", + help="Log using SimpleFileLogger", + ) + self.config_parser.add_option_group(log_option_group) + + # Actions + action_option_group = OptionGroup( + self.config_parser, + "Actions", + "Use these options to list or enable/disable actions.", + ) + action_option_group.add_option( + "--list-actions", + action="store_true", + dest="list_actions", + help="List all available actions, then exit", + ) + action_option_group.add_option( + "--add-action", + action="extend", + dest="add_actions", + metavar="ACTIONS", + help="Add action %s to the list of actions" % self.all_actions, + ) + action_option_group.add_option( + "--no-action", + action="extend", + dest="no_actions", + metavar="ACTIONS", + help="Don't perform action", + ) + action_option_group.add_option( + "--requires-gpu", + action="store_true", + dest="requires_gpu", + default=False, + help="Indicates if the task requires gpu. ", + ) + for action in self.all_actions: + action_option_group.add_option( + "--%s" % action, + action="append_const", + dest="actions", + const=action, + help="Add %s to the limited list of actions" % action, + ) + action_option_group.add_option( + "--no-%s" % action, + action="append_const", + dest="no_actions", + const=action, + help="Remove %s from the list of actions to perform" % action, + ) + self.config_parser.add_option_group(action_option_group) + # Child-specified options + # TODO error checking for overlapping options + if config_options: + for option in config_options: + self.config_parser.add_option(*option[0], **option[1]) + + # Initial-config-specified options + config_options = self._config.get("config_options", None) + if config_options: + for option in config_options: + self.config_parser.add_option(*option[0], **option[1]) + + def set_config(self, config, overwrite=False): + """This is probably doable some other way.""" + if self._config and not overwrite: + self._config.update(config) + else: + self._config = config + return self._config + + def get_actions(self): + return self.actions + + def verify_actions(self, action_list, quiet=False): + for action in action_list: + if action not in self.all_actions: + if not quiet: + print("Invalid action %s not in %s!" % (action, self.all_actions)) + raise SystemExit(-1) + return action_list + + def verify_actions_order(self, action_list): + try: + indexes = [self.all_actions.index(elt) for elt in action_list] + sorted_indexes = sorted(indexes) + for i in range(len(indexes)): + if indexes[i] != sorted_indexes[i]: + print( + ("Action %s comes in different order in %s\n" + "than in %s") + % (action_list[i], action_list, self.all_actions) + ) + raise SystemExit(-1) + except ValueError as e: + print("Invalid action found: " + str(e)) + raise SystemExit(-1) + + def list_actions(self): + print("Actions available:") + for a in self.all_actions: + print(" " + ("*" if a in self.default_actions else " "), a) + raise SystemExit(0) + + def get_cfgs_from_files(self, all_config_files, options): + """Returns the configuration derived from the list of configuration + files. The result is represented as a list of `(filename, + config_dict)` tuples; they will be combined with keys in later + dictionaries taking precedence over earlier. + + `all_config_files` is all files specified with `--config-file` and + `--opt-config-file`; `options` is the argparse options object giving + access to any other command-line options. + + This function is also responsible for downloading any configuration + files specified by URL. It uses ``parse_config_file`` in this module + to parse individual files. + + This method can be overridden in a subclass to add extra logic to the + way that self.config is made up. See + `mozharness.mozilla.building.buildbase.BuildingConfig` for an example. + """ + config_paths = options.config_paths or ["."] + all_cfg_files_and_dicts = [] + for cf in all_config_files: + try: + if "://" in cf: # config file is an url + file_name = os.path.basename(cf) + file_path = os.path.join(os.getcwd(), file_name) + download_config_file(cf, file_path) + all_cfg_files_and_dicts.append( + ( + file_path, + parse_config_file( + file_path, + search_path=["."], + ), + ) + ) + else: + all_cfg_files_and_dicts.append( + ( + cf, + parse_config_file( + cf, + search_path=config_paths + [DEFAULT_CONFIG_PATH], + ), + ) + ) + except Exception: + if cf in options.opt_config_files: + print("WARNING: optional config file not found %s" % cf) + else: + raise + + if "EXTRA_MOZHARNESS_CONFIG" in os.environ: + env_config = json.loads(os.environ["EXTRA_MOZHARNESS_CONFIG"]) + all_cfg_files_and_dicts.append(("[EXTRA_MOZHARENSS_CONFIG]", env_config)) + + return all_cfg_files_and_dicts + + def parse_args(self, args=None): + """Parse command line arguments in a generic way. + Return the parser object after adding the basic options, so + child objects can manipulate it. + """ + self.command_line = " ".join(sys.argv) + if args is None: + args = sys.argv[1:] + (options, args) = self.config_parser.parse_args(args) + + defaults = self.config_parser.defaults.copy() + + if not options.config_files: + if self.require_config_file: + if options.list_actions: + self.list_actions() + print("Required config file not set! (use --config-file option)") + raise SystemExit(-1) + + os.environ["REQUIRE_GPU"] = "0" + if options.requires_gpu: + os.environ["REQUIRE_GPU"] = "1" + + # this is what get_cfgs_from_files returns. It will represent each + # config file name and its assoctiated dict + # eg ('builds/branch_specifics.py', {'foo': 'bar'}) + # let's store this to self for things like --interpret-config-files + self.all_cfg_files_and_dicts.extend( + self.get_cfgs_from_files( + # append opt_config to allow them to overwrite previous configs + options.config_files + options.opt_config_files, + options=options, + ) + ) + config = {} + if ( + self.append_env_variables_from_configs + or options.append_env_variables_from_configs + ): + # We only append values from various configs for the 'env' entry + # For everything else we follow the standard behaviour + for i, (c_file, c_dict) in enumerate(self.all_cfg_files_and_dicts): + for v in list(c_dict.keys()): + if v == "env" and v in config: + config[v].update(c_dict[v]) + else: + config[v] = c_dict[v] + else: + for i, (c_file, c_dict) in enumerate(self.all_cfg_files_and_dicts): + config.update(c_dict) + # assign or update self._config depending on if it exists or not + # NOTE self._config will be passed to ReadOnlyConfig's init -- a + # dict subclass with immutable locking capabilities -- and serve + # as the keys/values that make up that instance. Ultimately, + # this becomes self.config during BaseScript's init + self.set_config(config) + + for key in list(defaults.keys()): + value = getattr(options, key) + if value is None: + continue + # Don't override config_file defaults with config_parser defaults + if key in defaults and value == defaults[key] and key in self._config: + continue + self._config[key] = value + + # The idea behind the volatile_config is we don't want to save this + # info over multiple runs. This defaults to the action-specific + # config options, but can be anything. + for key in list(self.volatile_config.keys()): + if self._config.get(key) is not None: + self.volatile_config[key] = self._config[key] + del self._config[key] + + self.update_actions() + if options.list_actions: + self.list_actions() + + # Keep? This is for saving the volatile config in the dump_config + self._config["volatile_config"] = self.volatile_config + + self.options = options + self.args = args + return (self.options, self.args) + + def update_actions(self): + """Update actions after reading in config. + + Seems a little complex, but the logic goes: + + First, if default_actions is specified in the config, set our + default actions even if the script specifies other default actions. + + Without any other action-specific options, run with default actions. + + If we specify --ACTION or --only-ACTION once or multiple times, + we want to override the default_actions list with the one(s) we list. + + Otherwise, if we specify --add-action ACTION, we want to add an + action to the list. + + Finally, if we specify --no-ACTION, remove that from the list of + actions to perform. + """ + if self._config.get("default_actions"): + default_actions = self.verify_actions(self._config["default_actions"]) + self.default_actions = default_actions + self.verify_actions_order(self.default_actions) + self.actions = self.default_actions[:] + if self.volatile_config["actions"]: + actions = self.verify_actions(self.volatile_config["actions"]) + self.actions = actions + elif self.volatile_config["add_actions"]: + actions = self.verify_actions(self.volatile_config["add_actions"]) + self.actions.extend(actions) + if self.volatile_config["no_actions"]: + actions = self.verify_actions(self.volatile_config["no_actions"]) + for action in actions: + if action in self.actions: + self.actions.remove(action) + + +# __main__ {{{1 +if __name__ == "__main__": + pass diff --git a/testing/mozharness/mozharness/base/diskutils.py b/testing/mozharness/mozharness/base/diskutils.py new file mode 100644 index 0000000000..757a6ffb7a --- /dev/null +++ b/testing/mozharness/mozharness/base/diskutils.py @@ -0,0 +1,170 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Disk utility module, no mixins here! + + examples: + 1) get disk size + from mozharness.base.diskutils import DiskInfo, DiskutilsError + ... + try: + DiskSize().get_size(path='/', unit='Mb') + except DiskutilsError as e: + # manage the exception e.g: log.error(e) + pass + log.info("%s" % di) + + + 2) convert disk size: + from mozharness.base.diskutils import DiskutilsError, convert_to + ... + file_size = <function that gets file size in bytes> + # convert file_size to GB + try: + file_size = convert_to(file_size, from_unit='bytes', to_unit='GB') + except DiskutilsError as e: + # manage the exception e.g: log.error(e) + pass + +""" +import ctypes +import logging +import os +import sys + +from six import string_types + +from mozharness.base.log import INFO, numeric_log_level + +# use mozharness log +log = logging.getLogger(__name__) + + +class DiskutilsError(Exception): + """Exception thrown by Diskutils module""" + + pass + + +def convert_to(size, from_unit, to_unit): + """Helper method to convert filesystem sizes to kB/ MB/ GB/ TB/ + valid values for source_format and destination format are: + * bytes + * kB + * MB + * GB + * TB + returns: size converted from source_format to destination_format. + """ + sizes = { + "bytes": 1, + "kB": 1024, + "MB": 1024 * 1024, + "GB": 1024 * 1024 * 1024, + "TB": 1024 * 1024 * 1024 * 1024, + } + try: + df = sizes[to_unit] + sf = sizes[from_unit] + # pylint --py3k W1619 + return size * sf / df + except KeyError: + raise DiskutilsError("conversion error: Invalid source or destination format") + except TypeError: + raise DiskutilsError("conversion error: size (%s) is not a number" % size) + + +class DiskInfo(object): + """Stores basic information about the disk""" + + def __init__(self): + self.unit = "bytes" + self.free = 0 + self.used = 0 + self.total = 0 + + def __str__(self): + string = ["Disk space info (in %s)" % self.unit] + string += ["total: %s" % self.total] + string += ["used: %s" % self.used] + string += ["free: %s" % self.free] + return " ".join(string) + + def _to(self, unit): + from_unit = self.unit + to_unit = unit + self.free = convert_to(self.free, from_unit=from_unit, to_unit=to_unit) + self.used = convert_to(self.used, from_unit=from_unit, to_unit=to_unit) + self.total = convert_to(self.total, from_unit=from_unit, to_unit=to_unit) + self.unit = unit + + +class DiskSize(object): + """DiskSize object""" + + @staticmethod + def _posix_size(path): + """returns the disk size in bytes + disk size is relative to path + """ + # we are on a POSIX system + st = os.statvfs(path) + disk_info = DiskInfo() + disk_info.free = st.f_bavail * st.f_frsize + disk_info.used = (st.f_blocks - st.f_bfree) * st.f_frsize + disk_info.total = st.f_blocks * st.f_frsize + return disk_info + + @staticmethod + def _windows_size(path): + """returns size in bytes, works only on windows platforms""" + # we're on a non POSIX system (windows) + # DLL call + disk_info = DiskInfo() + dummy = ctypes.c_ulonglong() # needed by the dll call but not used + total = ctypes.c_ulonglong() # stores the total space value + free = ctypes.c_ulonglong() # stores the free space value + # depending on path format (unicode or not) and python version (2 or 3) + # we need to call GetDiskFreeSpaceExW or GetDiskFreeSpaceExA + called_function = ctypes.windll.kernel32.GetDiskFreeSpaceExA + if isinstance(path, string_types) or sys.version_info >= (3,): + called_function = ctypes.windll.kernel32.GetDiskFreeSpaceExW + # we're ready for the dll call. On error it returns 0 + if ( + called_function( + path, ctypes.byref(dummy), ctypes.byref(total), ctypes.byref(free) + ) + != 0 + ): + # success, we can use the values returned by the dll call + disk_info.free = free.value + disk_info.total = total.value + disk_info.used = total.value - free.value + return disk_info + + @staticmethod + def get_size(path, unit, log_level=INFO): + """Disk info stats: + total => size of the disk + used => space used + free => free space + In case of error raises a DiskutilError Exception + """ + try: + # let's try to get the disk size using os module + disk_info = DiskSize()._posix_size(path) + except AttributeError: + try: + # os module failed. let's try to get the size using + # ctypes.windll... + disk_info = DiskSize()._windows_size(path) + except AttributeError: + # No luck! This is not a posix nor window platform + # raise an exception + raise DiskutilsError("Unsupported platform") + + disk_info._to(unit) + lvl = numeric_log_level(log_level) + log.log(lvl, msg="%s" % disk_info) + return disk_info diff --git a/testing/mozharness/mozharness/base/errors.py b/testing/mozharness/mozharness/base/errors.py new file mode 100755 index 0000000000..814dd2e045 --- /dev/null +++ b/testing/mozharness/mozharness/base/errors.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Generic error lists. + +Error lists are used to parse output in mozharness.base.log.OutputParser. + +Each line of output is matched against each substring or regular expression +in the error list. On a match, we determine the 'level' of that line, +whether IGNORE, DEBUG, INFO, WARNING, ERROR, CRITICAL, or FATAL. + +TODO: Context lines (requires work on the OutputParser side) + +TODO: We could also create classes that generate these, but with the +appropriate level (please don't die on any errors; please die on any +warning; etc.) or platform or language or whatever. +""" + +import re + +from mozharness.base.log import CRITICAL, DEBUG, ERROR, WARNING + + +# Exceptions +class VCSException(Exception): + pass + + +# ErrorLists {{{1 +BaseErrorList = [{"substr": r"""command not found""", "level": ERROR}] + +HgErrorList = BaseErrorList + [ + { + "regex": re.compile(r"""^abort:"""), + "level": ERROR, + "explanation": "Automation Error: hg not responding", + }, + { + "substr": r"""unknown exception encountered""", + "level": ERROR, + "explanation": "Automation Error: python exception in hg", + }, + { + "substr": r"""failed to import extension""", + "level": WARNING, + "explanation": "Automation Error: hg extension missing", + }, +] + +GitErrorList = BaseErrorList + [ + {"substr": r"""Permission denied (publickey).""", "level": ERROR}, + {"substr": r"""fatal: The remote end hung up unexpectedly""", "level": ERROR}, + {"substr": r"""does not appear to be a git repository""", "level": ERROR}, + {"substr": r"""error: src refspec""", "level": ERROR}, + {"substr": r"""invalid author/committer line -""", "level": ERROR}, + {"substr": r"""remote: fatal: Error in object""", "level": ERROR}, + { + "substr": r"""fatal: sha1 file '<stdout>' write error: Broken pipe""", + "level": ERROR, + }, + {"substr": r"""error: failed to push some refs to """, "level": ERROR}, + {"substr": r"""remote: error: denying non-fast-forward """, "level": ERROR}, + {"substr": r"""! [remote rejected] """, "level": ERROR}, + {"regex": re.compile(r"""remote:.*No such file or directory"""), "level": ERROR}, +] + +PythonErrorList = BaseErrorList + [ + {"regex": re.compile(r"""Warning:.*Error: """), "level": WARNING}, + {"regex": re.compile(r"""package.*> Error:"""), "level": ERROR}, + {"substr": r"""Traceback (most recent call last)""", "level": ERROR}, + {"substr": r"""SyntaxError: """, "level": ERROR}, + {"substr": r"""TypeError: """, "level": ERROR}, + {"substr": r"""NameError: """, "level": ERROR}, + {"substr": r"""ZeroDivisionError: """, "level": ERROR}, + {"regex": re.compile(r"""raise \w*Exception: """), "level": CRITICAL}, + {"regex": re.compile(r"""raise \w*Error: """), "level": CRITICAL}, +] + +VirtualenvErrorList = [ + {"substr": r"""not found or a compiler error:""", "level": WARNING}, + {"regex": re.compile("""\d+: error: """), "level": ERROR}, + {"regex": re.compile("""\d+: warning: """), "level": WARNING}, + { + "regex": re.compile(r"""Downloading .* \(.*\): *([0-9]+%)? *[0-9\.]+[kmKM]b"""), + "level": DEBUG, + }, +] + PythonErrorList + +RustErrorList = [ + {"regex": re.compile(r"""error\[E\d+\]:"""), "level": ERROR}, + {"substr": r"""error: Could not compile""", "level": ERROR}, + {"substr": r"""error: aborting due to previous error""", "level": ERROR}, + {"substr": r"""thread 'main' panicked at""", "level": ERROR}, +] + +# We may need to have various MakefileErrorLists for differing amounts of +# warning-ignoring-ness. +MakefileErrorList = ( + BaseErrorList + + PythonErrorList + + RustErrorList + + [ + {"substr": r""": error: """, "level": ERROR}, + {"substr": r"""No rule to make target """, "level": ERROR}, + {"regex": re.compile(r"""akefile.*was not found\."""), "level": ERROR}, + {"regex": re.compile(r"""Stop\.$"""), "level": ERROR}, + { + "regex": re.compile(r"""make\[\d+\]: \*\*\* \[.*\] Error \d+"""), + "level": ERROR, + }, + {"regex": re.compile(r""":\d+: warning:"""), "level": WARNING}, + {"regex": re.compile(r"""make(?:\[\d+\])?: \*\*\*/"""), "level": ERROR}, + {"substr": r"""Warning: """, "level": WARNING}, + ] +) + +TarErrorList = BaseErrorList + [ + {"substr": r"""(stdin) is not a bzip2 file.""", "level": ERROR}, + {"regex": re.compile(r"""Child returned status [1-9]"""), "level": ERROR}, + {"substr": r"""Error exit delayed from previous errors""", "level": ERROR}, + {"substr": r"""stdin: unexpected end of file""", "level": ERROR}, + {"substr": r"""stdin: not in gzip format""", "level": ERROR}, + {"substr": r"""Cannot exec: No such file or directory""", "level": ERROR}, + {"substr": r""": Error is not recoverable: exiting now""", "level": ERROR}, +] + +ZipErrorList = BaseErrorList + [ + { + "substr": r"""zip warning:""", + "level": WARNING, + }, + { + "substr": r"""zip error:""", + "level": ERROR, + }, + { + "substr": r"""Cannot open file: it does not appear to be a valid archive""", + "level": ERROR, + }, +] + +ZipalignErrorList = BaseErrorList + [ + { + "regex": re.compile(r"""Unable to open .* as a zip archive"""), + "level": ERROR, + }, + { + "regex": re.compile(r"""Output file .* exists"""), + "level": ERROR, + }, + { + "substr": r"""Input and output can't be the same file""", + "level": ERROR, + }, +] + + +# __main__ {{{1 +if __name__ == "__main__": + """TODO: unit tests.""" + pass diff --git a/testing/mozharness/mozharness/base/log.py b/testing/mozharness/mozharness/base/log.py new file mode 100755 index 0000000000..3276696751 --- /dev/null +++ b/testing/mozharness/mozharness/base/log.py @@ -0,0 +1,783 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Generic logging classes and functionalities for single and multi file logging. +Capturing console output and providing general logging functionalities. + +Attributes: + FATAL_LEVEL (int): constant logging level value set based on the logging.CRITICAL + value + DEBUG (str): mozharness `debug` log name + INFO (str): mozharness `info` log name + WARNING (str): mozharness `warning` log name + CRITICAL (str): mozharness `critical` log name + FATAL (str): mozharness `fatal` log name + IGNORE (str): mozharness `ignore` log name + LOG_LEVELS (dict): mapping of the mozharness log level names to logging values + ROOT_LOGGER (logging.Logger): instance of a logging.Logger class + +TODO: +- network logging support. +- log rotation config +""" + +import logging +import os +import sys +import time +import traceback +from datetime import datetime + +from six import binary_type + +# Define our own FATAL_LEVEL +FATAL_LEVEL = logging.CRITICAL + 10 +logging.addLevelName(FATAL_LEVEL, "FATAL") + +# mozharness log levels. +DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL, IGNORE = ( + "debug", + "info", + "warning", + "error", + "critical", + "fatal", + "ignore", +) + + +LOG_LEVELS = { + DEBUG: logging.DEBUG, + INFO: logging.INFO, + WARNING: logging.WARNING, + ERROR: logging.ERROR, + CRITICAL: logging.CRITICAL, + FATAL: FATAL_LEVEL, +} + +# mozharness root logger +ROOT_LOGGER = logging.getLogger() + +# Force logging to use UTC timestamps +logging.Formatter.converter = time.gmtime + + +# LogMixin {{{1 +class LogMixin(object): + """This is a mixin for any object to access similar logging functionality + + The logging functionality described here is specially useful for those + objects with self.config and self.log_obj member variables + """ + + def _log_level_at_least(self, level): + """Check if the current logging level is greater or equal than level + + Args: + level (str): log level name to compare against mozharness log levels + names + + Returns: + bool: True if the current logging level is great or equal than level, + False otherwise + """ + log_level = INFO + levels = [DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL] + if hasattr(self, "config"): + log_level = self.config.get("log_level", INFO) + return levels.index(level) >= levels.index(log_level) + + def _print(self, message, stderr=False): + """prints a message to the sys.stdout or sys.stderr according to the + value of the stderr argument. + + Args: + message (str): The message to be printed + stderr (bool, optional): if True, message will be printed to + sys.stderr. Defaults to False. + + Returns: + None + """ + if not hasattr(self, "config") or self.config.get("log_to_console", True): + if stderr: + print(message, file=sys.stderr) + else: + print(message) + + def log(self, message, level=INFO, exit_code=-1): + """log the message passed to it according to level, exit if level == FATAL + + Args: + message (str): message to be logged + level (str, optional): logging level of the message. Defaults to INFO + exit_code (int, optional): exit code to log before the scripts calls + SystemExit. + + Returns: + None + """ + if self.log_obj: + return self.log_obj.log_message( + message, + level=level, + exit_code=exit_code, + post_fatal_callback=self._post_fatal, + ) + if level == INFO: + if self._log_level_at_least(level): + self._print(message) + elif level == DEBUG: + if self._log_level_at_least(level): + self._print("DEBUG: %s" % message) + elif level in (WARNING, ERROR, CRITICAL): + if self._log_level_at_least(level): + self._print("%s: %s" % (level.upper(), message), stderr=True) + elif level == FATAL: + if self._log_level_at_least(level): + self._print("FATAL: %s" % message, stderr=True) + raise SystemExit(exit_code) + + def worst_level(self, target_level, existing_level, levels=None): + """Compare target_level with existing_level according to levels values + and return the worst among them. + + Args: + target_level (str): minimum logging level to which the current object + should be set + existing_level (str): current logging level + levels (list(str), optional): list of logging levels names to compare + target_level and existing_level against. + Defaults to mozharness log level + list sorted from most to less critical. + + Returns: + str: the logging lavel that is closest to the first levels value, + i.e. levels[0] + """ + if not levels: + levels = [FATAL, CRITICAL, ERROR, WARNING, INFO, DEBUG, IGNORE] + if target_level not in levels: + self.fatal("'%s' not in %s'." % (target_level, levels)) + for l in levels: + if l in (target_level, existing_level): + return l + + # Copying Bear's dumpException(): + # https://hg.mozilla.org/build/tools/annotate/1485f23c38e0/sut_tools/sut_lib.py#l23 + def exception(self, message=None, level=ERROR): + """log an exception message base on the log level passed to it. + + This function fetches the information of the current exception being handled and + adds it to the message argument. + + Args: + message (str, optional): message to be printed at the beginning of the log. + Default to an empty string. + level (str, optional): log level to use for the logging. Defaults to ERROR + + Returns: + None + """ + tb_type, tb_value, tb_traceback = sys.exc_info() + if message is None: + message = "" + else: + message = "%s\n" % message + for s in traceback.format_exception(tb_type, tb_value, tb_traceback): + message += "%s\n" % s + # Log at the end, as a fatal will attempt to exit after the 1st line. + self.log(message, level=level) + + def debug(self, message): + """calls the log method with DEBUG as logging level + + Args: + message (str): message to log + """ + self.log(message, level=DEBUG) + + def info(self, message): + """calls the log method with INFO as logging level + + Args: + message (str): message to log + """ + self.log(message, level=INFO) + + def warning(self, message): + """calls the log method with WARNING as logging level + + Args: + message (str): message to log + """ + self.log(message, level=WARNING) + + def error(self, message): + """calls the log method with ERROR as logging level + + Args: + message (str): message to log + """ + self.log(message, level=ERROR) + + def critical(self, message): + """calls the log method with CRITICAL as logging level + + Args: + message (str): message to log + """ + self.log(message, level=CRITICAL) + + def fatal(self, message, exit_code=-1): + """calls the log method with FATAL as logging level + + Args: + message (str): message to log + exit_code (int, optional): exit code to use for the SystemExit + exception to be raised. Default to -1. + """ + self.log(message, level=FATAL, exit_code=exit_code) + + def _post_fatal(self, message=None, exit_code=None): + """Sometimes you want to create a report or cleanup + or notify on fatal(); override this method to do so. + + Please don't use this for anything significantly long-running. + + Args: + message (str, optional): message to report. Default to None + exit_code (int, optional): exit code to use for the SystemExit + exception to be raised. Default to None + """ + pass + + +# OutputParser {{{1 +class OutputParser(LogMixin): + """Helper object to parse command output. + + This will buffer output if needed, so we can go back and mark + [(linenum - 10) : linenum+10] as errors if need be, without having to + get all the output first. + + linenum+10 will be easy; we can set self.num_post_context_lines to 10, + and self.num_post_context_lines-- as we mark each line to at least error + level X. + + linenum-10 will be trickier. We'll not only need to save the line + itself, but also the level that we've set for that line previously, + whether by matching on that line, or by a previous line's context. + We should only log that line if all output has ended (self.finish() ?); + otherwise store a list of dictionaries in self.context_buffer that is + buffered up to self.num_pre_context_lines (set to the largest + pre-context-line setting in error_list.) + """ + + def __init__( + self, config=None, log_obj=None, error_list=None, log_output=True, **kwargs + ): + """Initialization method for the OutputParser class + + Args: + config (dict, optional): dictionary containing values such as `log_level` + or `log_to_console`. Defaults to `None`. + log_obj (BaseLogger, optional): instance of the BaseLogger class. Defaults + to `None`. + error_list (list, optional): list of the error to look for. Defaults to + `None`. + log_output (boolean, optional): flag for deciding if the commands + output should be logged or not. + Defaults to `True`. + """ + self.config = config + self.log_obj = log_obj + self.error_list = error_list or [] + self.log_output = log_output + self.num_errors = 0 + self.num_warnings = 0 + # TODO context_lines. + # Not in use yet, but will be based off error_list. + self.context_buffer = [] + self.num_pre_context_lines = 0 + self.num_post_context_lines = 0 + self.worst_log_level = INFO + + def parse_single_line(self, line): + """parse a console output line and check if it matches one in `error_list`, + if so then log it according to `log_output`. + + Args: + line (str): command line output to parse. + + Returns: + If the line hits a match in the error_list, the new log level the line was + (or should be) logged at is returned. Otherwise, returns None. + """ + for error_check in self.error_list: + # TODO buffer for context_lines. + match = False + if "substr" in error_check: + if error_check["substr"] in line: + match = True + elif "regex" in error_check: + if error_check["regex"].search(line): + match = True + else: + self.warning("error_list: 'substr' and 'regex' not in %s" % error_check) + if match: + log_level = error_check.get("level", INFO) + if self.log_output: + message = " %s" % line + if error_check.get("explanation"): + message += "\n %s" % error_check["explanation"] + if error_check.get("summary"): + self.add_summary(message, level=log_level) + else: + self.log(message, level=log_level) + if log_level in (ERROR, CRITICAL, FATAL): + self.num_errors += 1 + if log_level == WARNING: + self.num_warnings += 1 + self.worst_log_level = self.worst_level(log_level, self.worst_log_level) + return log_level + + if self.log_output: + self.info(" %s" % line) + + def add_lines(self, output): + """process a string or list of strings, decode them to utf-8,strip + them of any trailing whitespaces and parse them using `parse_single_line` + + strings consisting only of whitespaces are ignored. + + Args: + output (str | list): string or list of string to parse + """ + if not isinstance(output, list): + output = [output] + + for line in output: + if not line or line.isspace(): + continue + + if isinstance(line, binary_type): + line = line.decode("utf-8", "replace") + + line = line.rstrip() + self.parse_single_line(line) + + +# BaseLogger {{{1 +class BaseLogger(object): + """Base class in charge of logging handling logic such as creating logging + files, dirs, attaching to the console output and managing its output. + + Attributes: + LEVELS (dict): flat copy of the `LOG_LEVELS` attribute of the `log` module. + + TODO: status? There may be a status object or status capability in + either logging or config that allows you to count the number of + error,critical,fatal messages for us to count up at the end (aiming + for 0). + """ + + LEVELS = LOG_LEVELS + + def __init__( + self, + log_level=INFO, + log_format="%(message)s", + log_date_format="%H:%M:%S", + log_name="test", + log_to_console=True, + log_dir=".", + log_to_raw=False, + logger_name="", + append_to_log=False, + ): + """BaseLogger constructor + + Args: + log_level (str, optional): mozharness log level name. Defaults to INFO. + log_format (str, optional): message format string to instantiate a + `logging.Formatter`. Defaults to '%(message)s' + log_date_format (str, optional): date format string to instantiate a + `logging.Formatter`. Defaults to '%H:%M:%S' + log_name (str, optional): name to use for the log files to be created. + Defaults to 'test' + log_to_console (bool, optional): set to True in order to create a Handler + instance base on the `Logger` + current instance. Defaults to True. + log_dir (str, optional): directory location to store the log files. + Defaults to '.', i.e. current working directory. + log_to_raw (bool, optional): set to True in order to create a *raw.log + file. Defaults to False. + logger_name (str, optional): currently useless parameter. According + to the code comments, it could be useful + if we were to have multiple logging + objects that don't trample each other. + append_to_log (bool, optional): set to True if the logging content should + be appended to old logging files. Defaults to False + """ + + self.log_format = log_format + self.log_date_format = log_date_format + self.log_to_console = log_to_console + self.log_to_raw = log_to_raw + self.log_level = log_level + self.log_name = log_name + self.log_dir = log_dir + self.append_to_log = append_to_log + + # Not sure what I'm going to use this for; useless unless we + # can have multiple logging objects that don't trample each other + self.logger_name = logger_name + + self.all_handlers = [] + self.log_files = {} + + self.create_log_dir() + + def create_log_dir(self): + """create a logging directory if it doesn't exits. If there is a file with + same name as the future logging directory it will be deleted. + """ + + if os.path.exists(self.log_dir): + if not os.path.isdir(self.log_dir): + os.remove(self.log_dir) + if not os.path.exists(self.log_dir): + os.makedirs(self.log_dir) + self.abs_log_dir = os.path.abspath(self.log_dir) + + def init_message(self, name=None): + """log an init message stating the name passed to it, the current date + and time and, the current working directory. + + Args: + name (str, optional): name to use for the init log message. Defaults to + the current instance class name. + """ + + if not name: + name = self.__class__.__name__ + self.log_message( + "%s online at %s in %s" + % (name, datetime.utcnow().strftime("%Y%m%d %H:%M:%SZ"), os.getcwd()) + ) + + def get_logger_level(self, level=None): + """translate the level name passed to it and return its numeric value + according to `LEVELS` values. + + Args: + level (str, optional): level name to be translated. Defaults to the current + instance `log_level`. + + Returns: + int: numeric value of the log level name passed to it or 0 (NOTSET) if the + name doesn't exists + """ + + if not level: + level = self.log_level + return self.LEVELS.get(level, logging.NOTSET) + + def get_log_formatter(self, log_format=None, date_format=None): + """create a `logging.Formatter` base on the log and date format. + + Args: + log_format (str, optional): log format to use for the Formatter constructor. + Defaults to the current instance log format. + date_format (str, optional): date format to use for the Formatter constructor. + Defaults to the current instance date format. + + Returns: + logging.Formatter: instance created base on the passed arguments + """ + + if not log_format: + log_format = self.log_format + if not date_format: + date_format = self.log_date_format + return logging.Formatter(log_format, date_format) + + def new_logger(self): + """Create a new logger based on the ROOT_LOGGER instance. By default there are no handlers. + The new logger becomes a member variable of the current instance as `self.logger`. + """ + + self.logger = ROOT_LOGGER + self.logger.setLevel(self.get_logger_level()) + self._clear_handlers() + if self.log_to_console: + self.add_console_handler() + if self.log_to_raw: + self.log_files["raw"] = "%s_raw.log" % self.log_name + self.add_file_handler( + os.path.join(self.abs_log_dir, self.log_files["raw"]), + log_format="%(message)s", + ) + + def _clear_handlers(self): + """remove all handlers stored in `self.all_handlers`. + + To prevent dups -- logging will preserve Handlers across + objects :( + """ + attrs = dir(self) + if "all_handlers" in attrs and "logger" in attrs: + for handler in self.all_handlers: + self.logger.removeHandler(handler) + self.all_handlers = [] + + def __del__(self): + """BaseLogger class destructor; shutdown, flush and remove all handlers""" + logging.shutdown() + self._clear_handlers() + + def add_console_handler(self, log_level=None, log_format=None, date_format=None): + """create a `logging.StreamHandler` using `sys.stderr` for logging the console + output and add it to the `all_handlers` member variable + + Args: + log_level (str, optional): useless argument. Not used here. + Defaults to None. + log_format (str, optional): format used for the Formatter attached to the + StreamHandler. Defaults to None. + date_format (str, optional): format used for the Formatter attached to the + StreamHandler. Defaults to None. + """ + + console_handler = logging.StreamHandler() + console_handler.setFormatter( + self.get_log_formatter(log_format=log_format, date_format=date_format) + ) + self.logger.addHandler(console_handler) + self.all_handlers.append(console_handler) + + def add_file_handler( + self, log_path, log_level=None, log_format=None, date_format=None + ): + """create a `logging.FileHandler` base on the path, log and date format + and add it to the `all_handlers` member variable. + + Args: + log_path (str): filepath to use for the `FileHandler`. + log_level (str, optional): useless argument. Not used here. + Defaults to None. + log_format (str, optional): log format to use for the Formatter constructor. + Defaults to the current instance log format. + date_format (str, optional): date format to use for the Formatter constructor. + Defaults to the current instance date format. + """ + + if not self.append_to_log and os.path.exists(log_path): + os.remove(log_path) + file_handler = logging.FileHandler(log_path) + file_handler.setLevel(self.get_logger_level(log_level)) + file_handler.setFormatter( + self.get_log_formatter(log_format=log_format, date_format=date_format) + ) + self.logger.addHandler(file_handler) + self.all_handlers.append(file_handler) + + def log_message(self, message, level=INFO, exit_code=-1, post_fatal_callback=None): + """Generic log method. + There should be more options here -- do or don't split by line, + use os.linesep instead of assuming \n, be able to pass in log level + by name or number. + + Adding the IGNORE special level for runCommand. + + Args: + message (str): message to log using the current `logger` + level (str, optional): log level of the message. Defaults to INFO. + exit_code (int, optional): exit code to use in case of a FATAL level is used. + Defaults to -1. + post_fatal_callback (function, optional): function to callback in case of + of a fatal log level. Defaults None. + """ + + if level == IGNORE: + return + for line in message.splitlines(): + self.logger.log(self.get_logger_level(level), line) + if level == FATAL: + if callable(post_fatal_callback): + self.logger.log(FATAL_LEVEL, "Running post_fatal callback...") + post_fatal_callback(message=message, exit_code=exit_code) + self.logger.log(FATAL_LEVEL, "Exiting %d" % exit_code) + raise SystemExit(exit_code) + + +# SimpleFileLogger {{{1 +class SimpleFileLogger(BaseLogger): + """Subclass of the BaseLogger. + + Create one logFile. Possibly also output to the terminal and a raw log + (no prepending of level or date) + """ + + def __init__( + self, + log_format="%(asctime)s %(levelname)8s - %(message)s", + logger_name="Simple", + log_dir="logs", + **kwargs + ): + """SimpleFileLogger constructor. Calls its superclass constructor, + creates a new logger instance and log an init message. + + Args: + log_format (str, optional): message format string to instantiate a + `logging.Formatter`. Defaults to + '%(asctime)s %(levelname)8s - %(message)s' + log_name (str, optional): name to use for the log files to be created. + Defaults to 'Simple' + log_dir (str, optional): directory location to store the log files. + Defaults to 'logs' + **kwargs: Arbitrary keyword arguments passed to the BaseLogger constructor + """ + + BaseLogger.__init__( + self, + logger_name=logger_name, + log_format=log_format, + log_dir=log_dir, + **kwargs + ) + self.new_logger() + self.init_message() + + def new_logger(self): + """calls the BaseLogger.new_logger method and adds a file handler to it.""" + + BaseLogger.new_logger(self) + self.log_path = os.path.join(self.abs_log_dir, "%s.log" % self.log_name) + self.log_files["default"] = self.log_path + self.add_file_handler(self.log_path) + + +# MultiFileLogger {{{1 +class MultiFileLogger(BaseLogger): + """Subclass of the BaseLogger class. Create a log per log level in log_dir. + Possibly also output to the terminal and a raw log (no prepending of level or date) + """ + + def __init__( + self, + logger_name="Multi", + log_format="%(asctime)s %(levelname)8s - %(message)s", + log_dir="logs", + log_to_raw=True, + **kwargs + ): + """MultiFileLogger constructor. Calls its superclass constructor, + creates a new logger instance and log an init message. + + Args: + log_format (str, optional): message format string to instantiate a + `logging.Formatter`. Defaults to + '%(asctime)s %(levelname)8s - %(message)s' + log_name (str, optional): name to use for the log files to be created. + Defaults to 'Multi' + log_dir (str, optional): directory location to store the log files. + Defaults to 'logs' + log_to_raw (bool, optional): set to True in order to create a *raw.log + file. Defaults to False. + **kwargs: Arbitrary keyword arguments passed to the BaseLogger constructor + """ + + BaseLogger.__init__( + self, + logger_name=logger_name, + log_format=log_format, + log_to_raw=log_to_raw, + log_dir=log_dir, + **kwargs + ) + + self.new_logger() + self.init_message() + + def new_logger(self): + """calls the BaseLogger.new_logger method and adds a file handler per + logging level in the `LEVELS` class attribute. + """ + + BaseLogger.new_logger(self) + min_logger_level = self.get_logger_level(self.log_level) + for level in list(self.LEVELS.keys()): + if self.get_logger_level(level) >= min_logger_level: + self.log_files[level] = "%s_%s.log" % (self.log_name, level) + self.add_file_handler( + os.path.join(self.abs_log_dir, self.log_files[level]), + log_level=level, + ) + + +# ConsoleLogger {{{1 +class ConsoleLogger(BaseLogger): + """Subclass of the BaseLogger. + + Output logs to stderr. + """ + + def __init__( + self, + log_format="%(levelname)8s - %(message)s", + log_date_format="%H:%M:%S", + logger_name="Console", + **kwargs + ): + """ConsoleLogger constructor. Calls its superclass constructor, + creates a new logger instance and log an init message. + + Args: + log_format (str, optional): message format string to instantiate a + `logging.Formatter`. Defaults to + '%(levelname)8s - %(message)s' + **kwargs: Arbitrary keyword arguments passed to the BaseLogger + constructor + """ + + BaseLogger.__init__( + self, logger_name=logger_name, log_format=log_format, **kwargs + ) + self.new_logger() + self.init_message() + + def new_logger(self): + """Create a new logger based on the ROOT_LOGGER instance. By default + there are no handlers. The new logger becomes a member variable of the + current instance as `self.logger`. + """ + self.logger = ROOT_LOGGER + self.logger.setLevel(self.get_logger_level()) + self._clear_handlers() + self.add_console_handler() + + +def numeric_log_level(level): + """Converts a mozharness log level (string) to the corresponding logger + level (number). This function makes possible to set the log level + in functions that do not inherit from LogMixin + + Args: + level (str): log level name to convert. + + Returns: + int: numeric value of the log level name. + """ + return LOG_LEVELS[level] + + +# __main__ {{{1 +if __name__ == "__main__": + """Useless comparison, due to the `pass` keyword on its body""" + pass diff --git a/testing/mozharness/mozharness/base/parallel.py b/testing/mozharness/mozharness/base/parallel.py new file mode 100755 index 0000000000..678dadeede --- /dev/null +++ b/testing/mozharness/mozharness/base/parallel.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Generic ways to parallelize jobs. +""" + + +# ChunkingMixin {{{1 +class ChunkingMixin(object): + """Generic Chunking helper methods.""" + + def query_chunked_list(self, possible_list, this_chunk, total_chunks, sort=False): + """Split a list of items into a certain number of chunks and + return the subset of that will occur in this chunk. + + Ported from build.l10n.getLocalesForChunk in build/tools. + """ + if sort: + possible_list = sorted(possible_list) + else: + # Copy to prevent altering + possible_list = possible_list[:] + length = len(possible_list) + for c in range(1, total_chunks + 1): + n = length // total_chunks + # If the total number of items isn't evenly divisible by the + # number of chunks, we need to append one more onto some chunks + if c <= (length % total_chunks): + n += 1 + if c == this_chunk: + return possible_list[0:n] + del possible_list[0:n] diff --git a/testing/mozharness/mozharness/base/python.py b/testing/mozharness/mozharness/base/python.py new file mode 100644 index 0000000000..73e50bfe7c --- /dev/null +++ b/testing/mozharness/mozharness/base/python.py @@ -0,0 +1,1182 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Python usage, esp. virtualenv. +""" + +import errno +import json +import os +import shutil +import site +import socket +import subprocess +import sys +import traceback +from pathlib import Path + +try: + import urlparse +except ImportError: + import urllib.parse as urlparse + +from six import string_types + +import mozharness +from mozharness.base.errors import VirtualenvErrorList +from mozharness.base.log import FATAL, WARNING +from mozharness.base.script import ( + PostScriptAction, + PostScriptRun, + PreScriptAction, + ScriptMixin, +) + +external_tools_path = os.path.join( + os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))), + "external_tools", +) + + +def get_tlsv1_post(): + # Monkeypatch to work around SSL errors in non-bleeding-edge Python. + # Taken from https://lukasa.co.uk/2013/01/Choosing_SSL_Version_In_Requests/ + import ssl + + import requests + from requests.packages.urllib3.poolmanager import PoolManager + + class TLSV1Adapter(requests.adapters.HTTPAdapter): + def init_poolmanager(self, connections, maxsize, block=False): + self.poolmanager = PoolManager( + num_pools=connections, + maxsize=maxsize, + block=block, + ssl_version=ssl.PROTOCOL_TLSv1, + ) + + s = requests.Session() + s.mount("https://", TLSV1Adapter()) + return s.post + + +# Virtualenv {{{1 +virtualenv_config_options = [ + [ + ["--virtualenv-path"], + { + "action": "store", + "dest": "virtualenv_path", + "default": "venv", + "help": "Specify the path to the virtualenv top level directory", + }, + ], + [ + ["--find-links"], + { + "action": "extend", + "dest": "find_links", + "default": ["https://pypi.pub.build.mozilla.org/pub/"], + "help": "URL to look for packages at", + }, + ], + [ + ["--pip-index"], + { + "action": "store_true", + "default": False, + "dest": "pip_index", + "help": "Use pip indexes", + }, + ], + [ + ["--no-pip-index"], + { + "action": "store_false", + "dest": "pip_index", + "help": "Don't use pip indexes (default)", + }, + ], +] + + +class VirtualenvMixin(object): + """BaseScript mixin, designed to create and use virtualenvs. + + Config items: + * virtualenv_path points to the virtualenv location on disk. + * virtualenv_modules lists the module names. + * MODULE_url list points to the module URLs (optional) + Requires virtualenv to be in PATH. + Depends on ScriptMixin + """ + + python_paths = {} + site_packages_path = None + + def __init__(self, *args, **kwargs): + self._virtualenv_modules = [] + super(VirtualenvMixin, self).__init__(*args, **kwargs) + + def register_virtualenv_module( + self, + name=None, + url=None, + method=None, + requirements=None, + optional=False, + two_pass=False, + editable=False, + ): + """Register a module to be installed with the virtualenv. + + This method can be called up until create_virtualenv() to register + modules that should be installed in the virtualenv. + + See the documentation for install_module for how the arguments are + applied. + """ + self._virtualenv_modules.append( + (name, url, method, requirements, optional, two_pass, editable) + ) + + def query_virtualenv_path(self): + """Determine the absolute path to the virtualenv.""" + dirs = self.query_abs_dirs() + + if "abs_virtualenv_dir" in dirs: + return dirs["abs_virtualenv_dir"] + + p = self.config["virtualenv_path"] + if not p: + self.fatal( + "virtualenv_path config option not set; " "this should never happen" + ) + + if os.path.isabs(p): + return p + else: + return os.path.join(dirs["abs_work_dir"], p) + + def query_python_path(self, binary="python"): + """Return the path of a binary inside the virtualenv, if + c['virtualenv_path'] is set; otherwise return the binary name. + Otherwise return None + """ + if binary not in self.python_paths: + bin_dir = "bin" + if self._is_windows(): + bin_dir = "Scripts" + virtualenv_path = self.query_virtualenv_path() + self.python_paths[binary] = os.path.abspath( + os.path.join(virtualenv_path, bin_dir, binary) + ) + + return self.python_paths[binary] + + def query_python_site_packages_path(self): + if self.site_packages_path: + return self.site_packages_path + python = self.query_python_path() + self.site_packages_path = self.get_output_from_command( + [ + python, + "-c", + "from distutils.sysconfig import get_python_lib; " + + "print(get_python_lib())", + ] + ) + return self.site_packages_path + + def package_versions( + self, pip_freeze_output=None, error_level=WARNING, log_output=False + ): + """ + reads packages from `pip freeze` output and returns a dict of + {package_name: 'version'} + """ + packages = {} + + if pip_freeze_output is None: + # get the output from `pip freeze` + pip = self.query_python_path("pip") + if not pip: + self.log("package_versions: Program pip not in path", level=error_level) + return {} + pip_freeze_output = self.get_output_from_command( + [pip, "list", "--format", "freeze", "--no-index"], + silent=True, + ignore_errors=True, + ) + if not isinstance(pip_freeze_output, string_types): + self.fatal( + "package_versions: Error encountered running `pip freeze`: " + + pip_freeze_output + ) + + for l in pip_freeze_output.splitlines(): + # parse the output into package, version + line = l.strip() + if not line: + # whitespace + continue + if line.startswith("-"): + # not a package, probably like '-e http://example.com/path#egg=package-dev' + continue + if "==" not in line: + self.fatal("pip_freeze_packages: Unrecognized output line: %s" % line) + package, version = line.split("==", 1) + packages[package] = version + + if log_output: + self.info("Current package versions:") + for package in sorted(packages): + self.info(" %s == %s" % (package, packages[package])) + + return packages + + def is_python_package_installed(self, package_name, error_level=WARNING): + """ + Return whether the package is installed + """ + # pylint --py3k W1655 + package_versions = self.package_versions(error_level=error_level) + return package_name.lower() in [package.lower() for package in package_versions] + + def install_module( + self, + module=None, + module_url=None, + install_method=None, + requirements=(), + optional=False, + global_options=[], + no_deps=False, + editable=False, + ): + """ + Install module via pip. + + module_url can be a url to a python package tarball, a path to + a directory containing a setup.py (absolute or relative to work_dir) + or None, in which case it will default to the module name. + + requirements is a list of pip requirements files. If specified, these + will be combined with the module_url (if any), like so: + + pip install -r requirements1.txt -r requirements2.txt module_url + """ + import http.client + import time + import urllib.error + import urllib.request + + c = self.config + dirs = self.query_abs_dirs() + env = self.query_env() + venv_path = self.query_virtualenv_path() + self.info("Installing %s into virtualenv %s" % (module, venv_path)) + if not module_url: + module_url = module + if install_method in (None, "pip"): + if not module_url and not requirements: + self.fatal("Must specify module and/or requirements") + pip = self.query_python_path("pip") + if c.get("verbose_pip"): + command = [pip, "-v", "install"] + else: + command = [pip, "install"] + if no_deps: + command += ["--no-deps"] + # To avoid timeouts with our pypi server, increase default timeout: + # https://bugzilla.mozilla.org/show_bug.cgi?id=1007230#c802 + command += ["--timeout", str(c.get("pip_timeout", 120))] + for requirement in requirements: + command += ["-r", requirement] + if c.get("find_links") and not c["pip_index"]: + command += ["--no-index"] + for opt in global_options: + command += ["--global-option", opt] + else: + self.fatal( + "install_module() doesn't understand an install_method of %s!" + % install_method + ) + + # find_links connection check while loop + find_links_added = 0 + fl_retry_sleep_seconds = 10 + fl_max_retry_minutes = 5 + fl_retry_loops = (fl_max_retry_minutes * 60) / fl_retry_sleep_seconds + for link in c.get("find_links", []): + parsed = urlparse.urlparse(link) + dns_result = None + get_result = None + retry_counter = 0 + while retry_counter < fl_retry_loops and ( + dns_result is None or get_result is None + ): + try: + dns_result = socket.gethostbyname(parsed.hostname) + get_result = urllib.request.urlopen(link, timeout=10).read() + break + except socket.gaierror: + retry_counter += 1 + self.warning( + "find_links: dns check failed for %s, sleeping %ss and retrying..." + % (parsed.hostname, fl_retry_sleep_seconds) + ) + time.sleep(fl_retry_sleep_seconds) + except ( + urllib.error.HTTPError, + urllib.error.URLError, + socket.timeout, + http.client.RemoteDisconnected, + ) as e: + retry_counter += 1 + self.warning( + "find_links: connection check failed for %s, sleeping %ss and retrying..." + % (link, fl_retry_sleep_seconds) + ) + self.warning("find_links: exception: %s" % e) + time.sleep(fl_retry_sleep_seconds) + # now that the connectivity check is good, add the link + if dns_result and get_result: + self.info("find_links: connection checks passed for %s, adding." % link) + find_links_added += 1 + command.extend(["--find-links", link]) + else: + self.warning( + "find_links: connection checks failed for %s" + ", but max retries reached. continuing..." % link + ) + + # TODO: make this fatal if we always see failures after this + if find_links_added == 0: + self.warning( + "find_links: no find_links added. pip installation will probably fail!" + ) + + # module_url can be None if only specifying requirements files + if module_url: + if editable: + if install_method in (None, "pip"): + command += ["-e"] + else: + self.fatal( + "editable installs not supported for install_method %s" + % install_method + ) + command += [module_url] + + # If we're only installing a single requirements file, use + # the file's directory as cwd, so relative paths work correctly. + cwd = dirs["abs_work_dir"] + if not module and len(requirements) == 1: + cwd = os.path.dirname(requirements[0]) + + # Allow for errors while building modules, but require a + # return status of 0. + self.retry( + self.run_command, + # None will cause default value to be used + attempts=1 if optional else None, + good_statuses=(0,), + error_level=WARNING if optional else FATAL, + error_message=("Could not install python package: failed all attempts."), + args=[ + command, + ], + kwargs={ + "error_list": VirtualenvErrorList, + "cwd": cwd, + "env": env, + # WARNING only since retry will raise final FATAL if all + # retry attempts are unsuccessful - and we only want + # an ERROR of FATAL if *no* retry attempt works + "error_level": WARNING, + }, + ) + + def create_virtualenv(self, modules=(), requirements=()): + """ + Create a python virtualenv. + + This uses the copy of virtualenv that is vendored in mozharness. + + virtualenv_modules can be a list of module names to install, e.g. + + virtualenv_modules = ['module1', 'module2'] + + or it can be a heterogeneous list of modules names and dicts that + define a module by its name, url-or-path, and a list of its global + options. + + virtualenv_modules = [ + { + 'name': 'module1', + 'url': None, + 'global_options': ['--opt', '--without-gcc'] + }, + { + 'name': 'module2', + 'url': 'http://url/to/package', + 'global_options': ['--use-clang'] + }, + { + 'name': 'module3', + 'url': os.path.join('path', 'to', 'setup_py', 'dir') + 'global_options': [] + }, + 'module4' + ] + + virtualenv_requirements is an optional list of pip requirements files to + use when invoking pip, e.g., + + virtualenv_requirements = [ + '/path/to/requirements1.txt', + '/path/to/requirements2.txt' + ] + """ + c = self.config + dirs = self.query_abs_dirs() + venv_path = self.query_virtualenv_path() + self.info("Creating virtualenv %s" % venv_path) + + # Always use the virtualenv that is vendored since that is deterministic. + # base_work_dir is for when we're running with mozharness.zip, e.g. on + # test jobs + # abs_src_dir is for when we're running out of a checked out copy of + # the source code + vendor_search_dirs = [ + os.path.join("{base_work_dir}", "mozharness"), + "{abs_src_dir}", + ] + if "abs_src_dir" not in dirs and "repo_path" in self.config: + dirs["abs_src_dir"] = os.path.normpath(self.config["repo_path"]) + for d in vendor_search_dirs: + try: + src_dir = Path(d.format(**dirs)) + except KeyError: + continue + + pip_wheel_path = ( + src_dir + / "third_party" + / "python" + / "_venv" + / "wheels" + / "pip-23.0.1-py3-none-any.whl" + ) + setuptools_wheel_path = ( + src_dir + / "third_party" + / "python" + / "_venv" + / "wheels" + / "setuptools-51.2.0-py3-none-any.whl" + ) + + if all(path.exists() for path in (pip_wheel_path, setuptools_wheel_path)): + break + else: + self.fatal("Can't find 'pip' and 'setuptools' wheels") + + venv_python_bin = Path(self.query_python_path()) + + if venv_python_bin.exists(): + self.info( + "Virtualenv %s appears to already exist; " + "skipping virtualenv creation." % self.query_python_path() + ) + else: + self.run_command( + [sys.executable, "--version"], + ) + + # Temporary hack to get around a bug with venv in Python 3.7.3 in CI + # https://bugs.python.org/issue36441 + if self._is_windows(): + if sys.version_info[:3] == (3, 7, 3): + python_exe = Path(sys.executable) + debug_exe_dir = ( + python_exe.parent / "lib" / "venv" / "scripts" / "nt" + ) + + if debug_exe_dir.exists(): + + for executable in { + "python.exe", + "python_d.exe", + "pythonw.exe", + "pythonw_d.exe", + }: + expected_python_debug_exe = debug_exe_dir / executable + if not expected_python_debug_exe.exists(): + shutil.copy( + sys.executable, str(expected_python_debug_exe) + ) + + venv_creation_flags = ["-m", "venv", venv_path] + + if self._is_windows(): + # To workaround an issue on Windows10 jobs in CI we have to + # explicitly install the default pip separately. Ideally we + # could just remove the "--without-pip" above and get the same + # result, but that's apparently not always the case. + venv_creation_flags = venv_creation_flags + ["--without-pip"] + + self.mkdir_p(dirs["abs_work_dir"]) + self.run_command( + [sys.executable] + venv_creation_flags, + cwd=dirs["abs_work_dir"], + error_list=VirtualenvErrorList, + halt_on_failure=True, + ) + + if self._is_windows(): + self.run_command( + [str(venv_python_bin), "-m", "ensurepip", "--default-pip"], + cwd=dirs["abs_work_dir"], + halt_on_failure=True, + ) + + self._ensure_python_exe(venv_python_bin.parent) + + # We can work around a bug on some versions of Python 3.6 on + # Windows by copying the 'pyvenv.cfg' of the current venv + # to the new venv. This will make the new venv reference + # the original Python install instead of the current venv, + # which resolves the issue. There shouldn't be any harm in + # always doing this, but we'll play it safe and restrict it + # to Windows Python 3.6 anyway. + if self._is_windows() and sys.version_info[:2] == (3, 6): + this_venv = Path(sys.executable).parent.parent + this_venv_config = this_venv / "pyvenv.cfg" + if this_venv_config.exists(): + new_venv_config = Path(venv_path) / "pyvenv.cfg" + shutil.copyfile(str(this_venv_config), str(new_venv_config)) + + self.run_command( + [ + str(venv_python_bin), + "-m", + "pip", + "install", + "--only-binary", + ":all:", + "--disable-pip-version-check", + str(pip_wheel_path), + str(setuptools_wheel_path), + ], + cwd=dirs["abs_work_dir"], + error_list=VirtualenvErrorList, + halt_on_failure=True, + ) + + self.info(self.platform_name()) + if self.platform_name().startswith("macos"): + tmp_path = "{}/bin/bak".format(venv_path) + self.info( + "Copying venv python binaries to {} to clear for re-sign".format( + tmp_path + ) + ) + subprocess.call("mkdir -p {}".format(tmp_path), shell=True) + subprocess.call( + "cp {}/bin/python* {}/".format(venv_path, tmp_path), shell=True + ) + self.info("Replacing venv python binaries with reset copies") + subprocess.call( + "mv -f {}/* {}/bin/".format(tmp_path, venv_path), shell=True + ) + self.info( + "codesign -s - --preserve-metadata=identifier,entitlements,flags,runtime " + "-f {}/bin/*".format(venv_path) + ) + subprocess.call( + "codesign -s - --preserve-metadata=identifier,entitlements,flags,runtime -f " + "{}/bin/python*".format(venv_path), + shell=True, + ) + + if not modules: + modules = c.get("virtualenv_modules", []) + if not requirements: + requirements = c.get("virtualenv_requirements", []) + if not modules and requirements: + self.install_module(requirements=requirements, install_method="pip") + for module in modules: + module_url = module + global_options = [] + if isinstance(module, dict): + if module.get("name", None): + module_name = module["name"] + else: + self.fatal( + "Can't install module without module name: %s" % str(module) + ) + module_url = module.get("url", None) + global_options = module.get("global_options", []) + else: + module_url = self.config.get("%s_url" % module, module_url) + module_name = module + install_method = "pip" + self.install_module( + module=module_name, + module_url=module_url, + install_method=install_method, + requirements=requirements, + global_options=global_options, + ) + + for ( + module, + url, + method, + requirements, + optional, + two_pass, + editable, + ) in self._virtualenv_modules: + if two_pass: + self.install_module( + module=module, + module_url=url, + install_method=method, + requirements=requirements or (), + optional=optional, + no_deps=True, + editable=editable, + ) + self.install_module( + module=module, + module_url=url, + install_method=method, + requirements=requirements or (), + optional=optional, + editable=editable, + ) + + self.info("Done creating virtualenv %s." % venv_path) + + self.package_versions(log_output=True) + + def activate_virtualenv(self): + """Import the virtualenv's packages into this Python interpreter.""" + venv_root_dir = Path(self.query_virtualenv_path()) + venv_name = venv_root_dir.name + bin_path = Path(self.query_python_path()) + bin_dir = bin_path.parent + + if self._is_windows(): + site_packages_dir = venv_root_dir / "Lib" / "site-packages" + else: + site_packages_dir = ( + venv_root_dir + / "lib" + / "python{}.{}".format(*sys.version_info) + / "site-packages" + ) + + os.environ["PATH"] = os.pathsep.join( + [str(bin_dir)] + os.environ.get("PATH", "").split(os.pathsep) + ) + os.environ["VIRTUAL_ENV"] = venv_name + + prev_path = set(sys.path) + + site.addsitedir(str(site_packages_dir.resolve())) + + new_path = list(sys.path) + + sys.path[:] = [p for p in new_path if p not in prev_path] + [ + p for p in new_path if p in prev_path + ] + + sys.real_prefix = sys.prefix + sys.prefix = str(venv_root_dir) + sys.executable = str(bin_path) + + def _ensure_python_exe(self, python_exe_root: Path): + """On some machines in CI venv does not behave consistently. Sometimes + only a "python3" executable is created, but we expect "python". Since + they are functionally identical, we can just copy "python3" to "python" + (and vice-versa) to solve the problem. + """ + python3_exe_path = python_exe_root / "python3" + python_exe_path = python_exe_root / "python" + + if self._is_windows(): + python3_exe_path = python3_exe_path.with_suffix(".exe") + python_exe_path = python_exe_path.with_suffix(".exe") + + if python3_exe_path.exists() and not python_exe_path.exists(): + shutil.copy(str(python3_exe_path), str(python_exe_path)) + + if python_exe_path.exists() and not python3_exe_path.exists(): + shutil.copy(str(python_exe_path), str(python3_exe_path)) + + if not python_exe_path.exists() and not python3_exe_path.exists(): + raise Exception( + f'Neither a "{python_exe_path.name}" or "{python3_exe_path.name}" ' + f"were found. This means something unexpected happened during the " + f"virtual environment creation and we cannot proceed." + ) + + +# This is (sadly) a mixin for logging methods. +class PerfherderResourceOptionsMixin(ScriptMixin): + def perfherder_resource_options(self): + """Obtain a list of extraOptions values to identify the env.""" + opts = [] + + if "TASKCLUSTER_INSTANCE_TYPE" in os.environ: + # Include the instance type so results can be grouped. + opts.append("taskcluster-%s" % os.environ["TASKCLUSTER_INSTANCE_TYPE"]) + else: + # We assume !taskcluster => buildbot. + instance = "unknown" + + # Try to load EC2 instance type from metadata file. This file + # may not exist in many scenarios (including when inside a chroot). + # So treat it as optional. + try: + # This file should exist on Linux in EC2. + with open("/etc/instance_metadata.json", "rb") as fh: + im = json.load(fh) + instance = im.get("aws_instance_type", "unknown").encode("ascii") + except IOError as e: + if e.errno != errno.ENOENT: + raise + self.info( + "instance_metadata.json not found; unable to " + "determine instance type" + ) + except Exception: + self.warning( + "error reading instance_metadata: %s" % traceback.format_exc() + ) + + opts.append("buildbot-%s" % instance) + + return opts + + +class ResourceMonitoringMixin(PerfherderResourceOptionsMixin): + """Provides resource monitoring capabilities to scripts. + + When this class is in the inheritance chain, resource usage stats of the + executing script will be recorded. + + This class requires the VirtualenvMixin in order to install a package used + for recording resource usage. + + While we would like to record resource usage for the entirety of a script, + since we require an external package, we can only record resource usage + after that package is installed (as part of creating the virtualenv). + That's just the way things have to be. + """ + + def __init__(self, *args, **kwargs): + super(ResourceMonitoringMixin, self).__init__(*args, **kwargs) + + self.register_virtualenv_module("psutil>=5.9.0", method="pip", optional=True) + self.register_virtualenv_module( + "mozsystemmonitor==1.0.1", method="pip", optional=True + ) + self.register_virtualenv_module("jsonschema==2.5.1", method="pip") + self._resource_monitor = None + + # 2-tuple of (name, options) to assign Perfherder resource monitor + # metrics to. This needs to be assigned by a script in order for + # Perfherder metrics to be reported. + self.resource_monitor_perfherder_id = None + + @PostScriptAction("create-virtualenv") + def _start_resource_monitoring(self, action, success=None): + self.activate_virtualenv() + + # Resource Monitor requires Python 2.7, however it's currently optional. + # Remove when all machines have had their Python version updated (bug 711299). + if sys.version_info[:2] < (2, 7): + self.warning( + "Resource monitoring will not be enabled! Python 2.7+ required." + ) + return + + try: + from mozsystemmonitor.resourcemonitor import SystemResourceMonitor + + self.info("Starting resource monitoring.") + self._resource_monitor = SystemResourceMonitor(poll_interval=1.0) + self._resource_monitor.start() + except Exception: + self.warning( + "Unable to start resource monitor: %s" % traceback.format_exc() + ) + + @PreScriptAction + def _resource_record_pre_action(self, action): + # Resource monitor isn't available until after create-virtualenv. + if not self._resource_monitor: + return + + self._resource_monitor.begin_phase(action) + + @PostScriptAction + def _resource_record_post_action(self, action, success=None): + # Resource monitor isn't available until after create-virtualenv. + if not self._resource_monitor: + return + + self._resource_monitor.finish_phase(action) + + @PostScriptRun + def _resource_record_post_run(self): + if not self._resource_monitor: + return + + # This should never raise an exception. This is a workaround until + # mozsystemmonitor is fixed. See bug 895388. + try: + self._resource_monitor.stop() + self._log_resource_usage() + + # Upload a JSON file containing the raw resource data. + try: + upload_dir = self.query_abs_dirs()["abs_blob_upload_dir"] + if not os.path.exists(upload_dir): + os.makedirs(upload_dir) + with open(os.path.join(upload_dir, "resource-usage.json"), "w") as fh: + json.dump( + self._resource_monitor.as_dict(), fh, sort_keys=True, indent=4 + ) + except (AttributeError, KeyError): + self.exception("could not upload resource usage JSON", level=WARNING) + + except Exception: + self.warning( + "Exception when reporting resource usage: %s" % traceback.format_exc() + ) + + def _log_resource_usage(self): + # Delay import because not available until virtualenv is populated. + import jsonschema + + rm = self._resource_monitor + + if rm.start_time is None: + return + + def resources(phase): + cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False) + cpu_times = rm.aggregate_cpu_times(phase=phase, per_cpu=False) + io = rm.aggregate_io(phase=phase) + + swap_in = sum(m.swap.sin for m in rm.measurements) + swap_out = sum(m.swap.sout for m in rm.measurements) + + return cpu_percent, cpu_times, io, (swap_in, swap_out) + + def log_usage(prefix, duration, cpu_percent, cpu_times, io): + message = ( + "{prefix} - Wall time: {duration:.0f}s; " + "CPU: {cpu_percent}; " + "Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; " + "Read time: {io_read_time}; Write time: {io_write_time}" + ) + + # XXX Some test harnesses are complaining about a string being + # being fed into a 'f' formatter. This will help diagnose the + # issue. + if cpu_percent: + # pylint: disable=W1633 + cpu_percent_str = str(round(cpu_percent)) + "%" + else: + cpu_percent_str = "Can't collect data" + + try: + self.info( + message.format( + prefix=prefix, + duration=duration, + cpu_percent=cpu_percent_str, + io_read_bytes=io.read_bytes, + io_write_bytes=io.write_bytes, + io_read_time=io.read_time, + io_write_time=io.write_time, + ) + ) + + except ValueError: + self.warning("Exception when formatting: %s" % traceback.format_exc()) + + cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None) + duration = rm.end_time - rm.start_time + + # Write out Perfherder data if configured. + if self.resource_monitor_perfherder_id: + perfherder_name, perfherder_options = self.resource_monitor_perfherder_id + + suites = [] + overall = [] + + if cpu_percent: + overall.append( + { + "name": "cpu_percent", + "value": cpu_percent, + } + ) + + overall.extend( + [ + {"name": "io_write_bytes", "value": io.write_bytes}, + {"name": "io.read_bytes", "value": io.read_bytes}, + {"name": "io_write_time", "value": io.write_time}, + {"name": "io_read_time", "value": io.read_time}, + ] + ) + + suites.append( + { + "name": "%s.overall" % perfherder_name, + "extraOptions": perfherder_options + + self.perfherder_resource_options(), + "subtests": overall, + } + ) + + for phase in rm.phases.keys(): + phase_duration = rm.phases[phase][1] - rm.phases[phase][0] + subtests = [ + { + "name": "time", + "value": phase_duration, + } + ] + cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False) + if cpu_percent is not None: + subtests.append( + { + "name": "cpu_percent", + "value": rm.aggregate_cpu_percent( + phase=phase, per_cpu=False + ), + } + ) + + # We don't report I/O during each step because measured I/O + # is system I/O and that I/O can be delayed (e.g. writes will + # buffer before being flushed and recorded in our metrics). + suites.append( + { + "name": "%s.%s" % (perfherder_name, phase), + "subtests": subtests, + } + ) + + data = { + "framework": {"name": "job_resource_usage"}, + "suites": suites, + } + + schema_path = os.path.join( + external_tools_path, "performance-artifact-schema.json" + ) + with open(schema_path, "rb") as fh: + schema = json.load(fh) + + # this will throw an exception that causes the job to fail if the + # perfherder data is not valid -- please don't change this + # behaviour, otherwise people will inadvertently break this + # functionality + self.info("Validating Perfherder data against %s" % schema_path) + jsonschema.validate(data, schema) + self.info("PERFHERDER_DATA: %s" % json.dumps(data)) + + log_usage("Total resource usage", duration, cpu_percent, cpu_times, io) + + # Print special messages so usage shows up in Treeherder. + if cpu_percent: + self._tinderbox_print("CPU usage<br/>{:,.1f}%".format(cpu_percent)) + + self._tinderbox_print( + "I/O read bytes / time<br/>{:,} / {:,}".format(io.read_bytes, io.read_time) + ) + self._tinderbox_print( + "I/O write bytes / time<br/>{:,} / {:,}".format( + io.write_bytes, io.write_time + ) + ) + + # Print CPU components having >1%. "cpu_times" is a data structure + # whose attributes are measurements. Ideally we'd have an API that + # returned just the measurements as a dict or something. + cpu_attrs = [] + for attr in sorted(dir(cpu_times)): + if attr.startswith("_"): + continue + if attr in ("count", "index"): + continue + cpu_attrs.append(attr) + + cpu_total = sum(getattr(cpu_times, attr) for attr in cpu_attrs) + + for attr in cpu_attrs: + value = getattr(cpu_times, attr) + # cpu_total can be 0.0. Guard against division by 0. + # pylint --py3k W1619 + percent = value / cpu_total * 100.0 if cpu_total else 0.0 + + if percent > 1.00: + self._tinderbox_print( + "CPU {}<br/>{:,.1f} ({:,.1f}%)".format(attr, value, percent) + ) + + # Swap on Windows isn't reported by psutil. + if not self._is_windows(): + self._tinderbox_print( + "Swap in / out<br/>{:,} / {:,}".format(swap_in, swap_out) + ) + + for phase in rm.phases.keys(): + start_time, end_time = rm.phases[phase] + cpu_percent, cpu_times, io, swap = resources(phase) + log_usage(phase, end_time - start_time, cpu_percent, cpu_times, io) + + def _tinderbox_print(self, message): + self.info("TinderboxPrint: %s" % message) + + +# This needs to be inherited only if you have already inherited ScriptMixin +class Python3Virtualenv(object): + """Support Python3.5+ virtualenv creation.""" + + py3_initialized_venv = False + + def py3_venv_configuration(self, python_path, venv_path): + """We don't use __init__ to allow integrating with other mixins. + + python_path - Path to Python 3 binary. + venv_path - Path to virtual environment to be created. + """ + self.py3_initialized_venv = True + self.py3_python_path = os.path.abspath(python_path) + version = self.get_output_from_command( + [self.py3_python_path, "--version"], env=self.query_env() + ).split()[-1] + # Using -m venv is only used on 3.5+ versions + assert version > "3.5.0" + self.py3_venv_path = os.path.abspath(venv_path) + self.py3_pip_path = os.path.join(self.py3_path_to_executables(), "pip") + + def py3_path_to_executables(self): + platform = self.platform_name() + if platform.startswith("win"): + return os.path.join(self.py3_venv_path, "Scripts") + else: + return os.path.join(self.py3_venv_path, "bin") + + def py3_venv_initialized(func): + def call(self, *args, **kwargs): + if not self.py3_initialized_venv: + raise Exception( + "You need to call py3_venv_configuration() " + "before using this method." + ) + func(self, *args, **kwargs) + + return call + + @py3_venv_initialized + def py3_create_venv(self): + """Create Python environment with python3 -m venv /path/to/venv.""" + if os.path.exists(self.py3_venv_path): + self.info( + "Virtualenv %s appears to already exist; skipping " + "virtualenv creation." % self.py3_venv_path + ) + else: + self.info("Running command...") + self.run_command( + "%s -m venv %s" % (self.py3_python_path, self.py3_venv_path), + error_list=VirtualenvErrorList, + halt_on_failure=True, + env=self.query_env(), + ) + + @py3_venv_initialized + def py3_install_modules(self, modules, use_mozharness_pip_config=True): + if not os.path.exists(self.py3_venv_path): + raise Exception("You need to call py3_create_venv() first.") + + for m in modules: + cmd = [self.py3_pip_path, "install"] + if use_mozharness_pip_config: + cmd += self._mozharness_pip_args() + cmd += [m] + self.run_command(cmd, env=self.query_env()) + + def _mozharness_pip_args(self): + """We have information in Mozharness configs that apply to pip""" + c = self.config + pip_args = [] + # To avoid timeouts with our pypi server, increase default timeout: + # https://bugzilla.mozilla.org/show_bug.cgi?id=1007230#c802 + pip_args += ["--timeout", str(c.get("pip_timeout", 120))] + + if c.get("find_links") and not c["pip_index"]: + pip_args += ["--no-index"] + + # Add --find-links pages to look at. Add --trusted-host automatically if + # the host isn't secure. This allows modern versions of pip to connect + # without requiring an override. + trusted_hosts = set() + for link in c.get("find_links", []): + parsed = urlparse.urlparse(link) + + try: + socket.gethostbyname(parsed.hostname) + except socket.gaierror as e: + self.info("error resolving %s (ignoring): %s" % (parsed.hostname, e)) + continue + + pip_args += ["--find-links", link] + if parsed.scheme != "https": + trusted_hosts.add(parsed.hostname) + + for host in sorted(trusted_hosts): + pip_args += ["--trusted-host", host] + + return pip_args + + @py3_venv_initialized + def py3_install_requirement_files( + self, requirements, pip_args=[], use_mozharness_pip_config=True + ): + """ + requirements - You can specify multiple requirements paths + """ + cmd = [self.py3_pip_path, "install"] + cmd += pip_args + + if use_mozharness_pip_config: + cmd += self._mozharness_pip_args() + + for requirement_path in requirements: + cmd += ["-r", requirement_path] + + self.run_command(cmd, env=self.query_env()) + + +# __main__ {{{1 + +if __name__ == "__main__": + """TODO: unit tests.""" + pass diff --git a/testing/mozharness/mozharness/base/script.py b/testing/mozharness/mozharness/base/script.py new file mode 100644 index 0000000000..0a5622440b --- /dev/null +++ b/testing/mozharness/mozharness/base/script.py @@ -0,0 +1,2551 @@ +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Generic script objects. + +script.py, along with config.py and log.py, represents the core of +mozharness. +""" + +import codecs +import datetime +import errno +import fnmatch +import functools +import gzip +import hashlib +import inspect +import itertools +import os +import platform +import pprint +import re +import shutil +import socket +import ssl +import stat +import subprocess +import sys +import tarfile +import time +import traceback +import zipfile +import zlib +from contextlib import contextmanager +from io import BytesIO + +import mozinfo +import six +from mozprocess import ProcessHandler +from six import binary_type + +from mozharness.base.config import BaseConfig +from mozharness.base.log import ( + DEBUG, + ERROR, + FATAL, + INFO, + WARNING, + ConsoleLogger, + LogMixin, + MultiFileLogger, + OutputParser, + SimpleFileLogger, +) + +try: + import httplib +except ImportError: + import http.client as httplib +try: + import simplejson as json +except ImportError: + import json +try: + from urllib2 import Request, quote, urlopen +except ImportError: + from urllib.request import Request, quote, urlopen +try: + import urlparse +except ImportError: + import urllib.parse as urlparse +if os.name == "nt": + import locale + + try: + import win32api + import win32file + + PYWIN32 = True + except ImportError: + PYWIN32 = False + +try: + from urllib2 import HTTPError, URLError +except ImportError: + from urllib.error import HTTPError, URLError + + +class ContentLengthMismatch(Exception): + pass + + +def _validate_tar_member(member, path): + def _is_within_directory(directory, target): + real_directory = os.path.realpath(directory) + real_target = os.path.realpath(target) + prefix = os.path.commonprefix([real_directory, real_target]) + return prefix == real_directory + + member_path = os.path.join(path, member.name) + if not _is_within_directory(path, member_path): + raise Exception("Attempted path traversal in tar file: " + member.name) + if member.issym(): + link_path = os.path.join(os.path.dirname(member_path), member.linkname) + if not _is_within_directory(path, link_path): + raise Exception("Attempted link path traversal in tar file: " + member.name) + if member.mode & (stat.S_ISUID | stat.S_ISGID): + raise Exception("Attempted setuid or setgid in tar file: " + member.name) + + +def _safe_extract(tar, path=".", *, numeric_owner=False): + def _files(tar, path): + for member in tar: + _validate_tar_member(member, path) + yield member + + tar.extractall(path, members=_files(tar, path), numeric_owner=numeric_owner) + + +def platform_name(): + pm = PlatformMixin() + + if pm._is_linux() and pm._is_64_bit(): + return "linux64" + elif pm._is_linux() and not pm._is_64_bit(): + return "linux" + elif pm._is_darwin(): + return "macosx" + elif pm._is_windows() and pm._is_64_bit(): + return "win64" + elif pm._is_windows() and not pm._is_64_bit(): + return "win32" + else: + return None + + +class PlatformMixin(object): + def _is_windows(self): + """check if the current operating system is Windows. + + Returns: + bool: True if the current platform is Windows, False otherwise + """ + system = platform.system() + if system in ("Windows", "Microsoft"): + return True + if system.startswith("CYGWIN"): + return True + if os.name == "nt": + return True + + def _is_darwin(self): + """check if the current operating system is Darwin. + + Returns: + bool: True if the current platform is Darwin, False otherwise + """ + if platform.system() in ("Darwin"): + return True + if sys.platform.startswith("darwin"): + return True + + def _is_linux(self): + """check if the current operating system is a Linux distribution. + + Returns: + bool: True if the current platform is a Linux distro, False otherwise + """ + if platform.system() in ("Linux"): + return True + if sys.platform.startswith("linux"): + return True + + def _is_debian(self): + """check if the current operating system is explicitly Debian. + This intentionally doesn't count Debian derivatives like Ubuntu. + + Returns: + bool: True if the current platform is debian, False otherwise + """ + if not self._is_linux(): + return False + self.info(mozinfo.linux_distro) + re_debian_distro = re.compile("debian") + return re_debian_distro.match(mozinfo.linux_distro) is not None + + def _is_redhat_based(self): + """check if the current operating system is a Redhat derived Linux distribution. + + Returns: + bool: True if the current platform is a Redhat Linux distro, False otherwise + """ + if not self._is_linux(): + return False + re_redhat_distro = re.compile("Redhat|Fedora|CentOS|Oracle") + return re_redhat_distro.match(mozinfo.linux_distro) is not None + + def _is_64_bit(self): + if self._is_darwin(): + # osx is a special snowflake and to ensure the arch, it is better to use the following + return ( + sys.maxsize > 2 ** 32 + ) # context: https://docs.python.org/2/library/platform.html + else: + # Using machine() gives you the architecture of the host rather + # than the build type of the Python binary + return "64" in platform.machine() + + +# ScriptMixin {{{1 +class ScriptMixin(PlatformMixin): + """This mixin contains simple filesystem commands and the like. + + It also contains some very special but very complex methods that, + together with logging and config, provide the base for all scripts + in this harness. + + WARNING !!! + This class depends entirely on `LogMixin` methods in such a way that it will + only works if a class inherits from both `ScriptMixin` and `LogMixin` + simultaneously. + + Depends on self.config of some sort. + + Attributes: + env (dict): a mapping object representing the string environment. + script_obj (ScriptMixin): reference to a ScriptMixin instance. + """ + + env = None + script_obj = None + ssl_context = None + + def query_filesize(self, file_path): + self.info("Determining filesize for %s" % file_path) + length = os.path.getsize(file_path) + self.info(" %s" % str(length)) + return length + + # TODO this should be parallelized with the to-be-written BaseHelper! + def query_sha512sum(self, file_path): + self.info("Determining sha512sum for %s" % file_path) + m = hashlib.sha512() + contents = self.read_from_file(file_path, verbose=False, open_mode="rb") + m.update(contents) + sha512 = m.hexdigest() + self.info(" %s" % sha512) + return sha512 + + def platform_name(self): + """Return the platform name on which the script is running on. + Returns: + None: for failure to determine the platform. + str: The name of the platform (e.g. linux64) + """ + return platform_name() + + # Simple filesystem commands {{{2 + def mkdir_p(self, path, error_level=ERROR): + """Create a directory if it doesn't exists. + This method also logs the creation, error or current existence of the + directory to be created. + + Args: + path (str): path of the directory to be created. + error_level (str): log level name to be used in case of error. + + Returns: + None: for sucess. + int: -1 on error + """ + + if not os.path.exists(path): + self.info("mkdir: %s" % path) + try: + os.makedirs(path) + except OSError: + self.log("Can't create directory %s!" % path, level=error_level) + return -1 + else: + self.debug("mkdir_p: %s Already exists." % path) + + def rmtree(self, path, log_level=INFO, error_level=ERROR, exit_code=-1): + """Delete an entire directory tree and log its result. + This method also logs the platform rmtree function, its retries, errors, + and current existence of the directory. + + Args: + path (str): path to the directory tree root to remove. + log_level (str, optional): log level name to for this operation. Defaults + to `INFO`. + error_level (str, optional): log level name to use in case of error. + Defaults to `ERROR`. + exit_code (int, optional): useless parameter, not use here. + Defaults to -1 + + Returns: + None: for success + """ + + self.log("rmtree: %s" % path, level=log_level) + error_message = "Unable to remove %s!" % path + if self._is_windows(): + # Call _rmtree_windows() directly, since even checking + # os.path.exists(path) will hang if path is longer than MAX_PATH. + self.info("Using _rmtree_windows ...") + return self.retry( + self._rmtree_windows, + error_level=error_level, + error_message=error_message, + args=(path,), + log_level=log_level, + ) + if os.path.exists(path): + if os.path.isdir(path): + return self.retry( + shutil.rmtree, + error_level=error_level, + error_message=error_message, + retry_exceptions=(OSError,), + args=(path,), + log_level=log_level, + ) + else: + return self.retry( + os.remove, + error_level=error_level, + error_message=error_message, + retry_exceptions=(OSError,), + args=(path,), + log_level=log_level, + ) + else: + self.debug("%s doesn't exist." % path) + + def query_msys_path(self, path): + """replaces the Windows harddrive letter path style with a linux + path style, e.g. C:// --> /C/ + Note: method, not used in any script. + + Args: + path (str?): path to convert to the linux path style. + Returns: + str: in case `path` is a string. The result is the path with the new notation. + type(path): `path` itself is returned in case `path` is not str type. + """ + if not isinstance(path, six.string_types): + return path + path = path.replace("\\", "/") + + def repl(m): + return "/%s/" % m.group(1) + + path = re.sub(r"""^([a-zA-Z]):/""", repl, path) + return path + + def _rmtree_windows(self, path): + """Windows-specific rmtree that handles path lengths longer than MAX_PATH. + Ported from clobberer.py. + + Args: + path (str): directory path to remove. + + Returns: + None: if the path doesn't exists. + int: the return number of calling `self.run_command` + int: in case the path specified is not a directory but a file. + 0 on success, non-zero on error. Note: The returned value + is the result of calling `win32file.DeleteFile` + """ + + assert self._is_windows() + path = os.path.realpath(path) + full_path = "\\\\?\\" + path + if not os.path.exists(full_path): + return + if not PYWIN32: + if not os.path.isdir(path): + return self.run_command('del /F /Q "%s"' % path) + else: + return self.run_command('rmdir /S /Q "%s"' % path) + # Make sure directory is writable + win32file.SetFileAttributesW("\\\\?\\" + path, win32file.FILE_ATTRIBUTE_NORMAL) + # Since we call rmtree() with a file, sometimes + if not os.path.isdir("\\\\?\\" + path): + return win32file.DeleteFile("\\\\?\\" + path) + + for ffrec in win32api.FindFiles("\\\\?\\" + path + "\\*.*"): + file_attr = ffrec[0] + name = ffrec[8] + if name == "." or name == "..": + continue + full_name = os.path.join(path, name) + + if file_attr & win32file.FILE_ATTRIBUTE_DIRECTORY: + self._rmtree_windows(full_name) + else: + try: + win32file.SetFileAttributesW( + "\\\\?\\" + full_name, win32file.FILE_ATTRIBUTE_NORMAL + ) + win32file.DeleteFile("\\\\?\\" + full_name) + except Exception: + # DeleteFile fails on long paths, del /f /q works just fine + self.run_command('del /F /Q "%s"' % full_name) + + win32file.RemoveDirectory("\\\\?\\" + path) + + def get_filename_from_url(self, url): + """parse a filename base on an url. + + Args: + url (str): url to parse for the filename + + Returns: + str: filename parsed from the url, or `netloc` network location part + of the url. + """ + + parsed = urlparse.urlsplit(url.rstrip("/")) + if parsed.path != "": + return parsed.path.rsplit("/", 1)[-1] + else: + return parsed.netloc + + def _urlopen(self, url, **kwargs): + """open the url `url` using `urllib2`.` + This method can be overwritten to extend its complexity + + Args: + url (str | urllib.request.Request): url to open + kwargs: Arbitrary keyword arguments passed to the `urllib.request.urlopen` function. + + Returns: + file-like: file-like object with additional methods as defined in + `urllib.request.urlopen`_. + None: None may be returned if no handler handles the request. + + Raises: + urllib2.URLError: on errors + + .. urillib.request.urlopen: + https://docs.python.org/2/library/urllib2.html#urllib2.urlopen + """ + # http://bugs.python.org/issue13359 - urllib2 does not automatically quote the URL + url_quoted = quote(url, safe="%/:=&?~#+!$,;'@()*[]|") + # windows certificates need to be refreshed (https://bugs.python.org/issue36011) + if self.platform_name() in ("win64",) and platform.architecture()[0] in ( + "x64", + ): + if self.ssl_context is None: + self.ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS) + self.ssl_context.load_default_certs() + return urlopen(url_quoted, context=self.ssl_context, **kwargs) + else: + return urlopen(url_quoted, **kwargs) + + def fetch_url_into_memory(self, url): + """Downloads a file from a url into memory instead of disk. + + Args: + url (str): URL path where the file to be downloaded is located. + + Raises: + IOError: When the url points to a file on disk and cannot be found + ContentLengthMismatch: When the length of the retrieved content does not match the + Content-Length response header. + ValueError: When the scheme of a url is not what is expected. + + Returns: + BytesIO: contents of url + """ + self.info("Fetch {} into memory".format(url)) + parsed_url = urlparse.urlparse(url) + + if parsed_url.scheme in ("", "file"): + path = parsed_url.path + if not os.path.isfile(path): + raise IOError("Could not find file to extract: {}".format(url)) + + content_length = os.stat(path).st_size + + # In case we're referrencing a file without file:// + if parsed_url.scheme == "": + url = "file://%s" % os.path.abspath(url) + parsed_url = urlparse.urlparse(url) + + request = Request(url) + # When calling fetch_url_into_memory() you should retry when we raise + # one of these exceptions: + # * Bug 1300663 - HTTPError: HTTP Error 404: Not Found + # * Bug 1300413 - HTTPError: HTTP Error 500: Internal Server Error + # * Bug 1300943 - HTTPError: HTTP Error 503: Service Unavailable + # * Bug 1300953 - URLError: <urlopen error [Errno -2] Name or service not known> + # * Bug 1301594 - URLError: <urlopen error [Errno 10054] An existing connection was ... + # * Bug 1301597 - URLError: <urlopen error [Errno 8] _ssl.c:504: EOF occurred in ... + # * Bug 1301855 - URLError: <urlopen error [Errno 60] Operation timed out> + # * Bug 1302237 - URLError: <urlopen error [Errno 104] Connection reset by peer> + # * Bug 1301807 - BadStatusLine: '' + # + # Bug 1309912 - Adding timeout in hopes to solve blocking on response.read() (bug 1300413) + response = urlopen(request, timeout=30) + + if parsed_url.scheme in ("http", "https"): + content_length = int(response.headers.get("Content-Length")) + + response_body = response.read() + response_body_size = len(response_body) + + self.info("Content-Length response header: {}".format(content_length)) + self.info("Bytes received: {}".format(response_body_size)) + + if response_body_size != content_length: + raise ContentLengthMismatch( + "The retrieved Content-Length header declares a body length " + "of {} bytes, while we actually retrieved {} bytes".format( + content_length, response_body_size + ) + ) + + if response.info().get("Content-Encoding") == "gzip": + self.info('Content-Encoding is "gzip", so decompressing response body') + # See http://www.zlib.net/manual.html#Advanced + # section "ZEXTERN int ZEXPORT inflateInit2 OF....": + # Add 32 to windowBits to enable zlib and gzip decoding with automatic + # header detection, or add 16 to decode only the gzip format (the zlib + # format will return a Z_DATA_ERROR). + # Adding 16 since we only wish to support gzip encoding. + file_contents = zlib.decompress(response_body, zlib.MAX_WBITS | 16) + else: + file_contents = response_body + + # Use BytesIO instead of StringIO + # http://stackoverflow.com/questions/34162017/unzip-buffer-with-python/34162395#34162395 + return BytesIO(file_contents) + + def _download_file(self, url, file_name): + """Helper function for download_file() + Additionaly this function logs all exceptions as warnings before + re-raising them + + Args: + url (str): string containing the URL with the file location + file_name (str): name of the file where the downloaded file + is written. + + Returns: + str: filename of the written file on disk + + Raises: + urllib2.URLError: on incomplete download. + urllib2.HTTPError: on Http error code + socket.timeout: on connection timeout + socket.error: on socket error + """ + # If our URLs look like files, prefix them with file:// so they can + # be loaded like URLs. + if not (url.startswith("http") or url.startswith("file://")): + if not os.path.isfile(url): + self.fatal("The file %s does not exist" % url) + url = "file://%s" % os.path.abspath(url) + + try: + f_length = None + f = self._urlopen(url, timeout=30) + + if f.info().get("content-length") is not None: + f_length = int(f.info()["content-length"]) + got_length = 0 + if f.info().get("Content-Encoding") == "gzip": + # Note, we'll download the full compressed content into its own + # file, since that allows the gzip library to seek through it. + # Once downloaded, we'll decompress it into the real target + # file, and delete the compressed version. + local_file = open(file_name + ".gz", "wb") + else: + local_file = open(file_name, "wb") + while True: + block = f.read(1024 ** 2) + if not block: + if f_length is not None and got_length != f_length: + raise URLError( + "Download incomplete; content-length was %d, " + "but only received %d" % (f_length, got_length) + ) + break + local_file.write(block) + if f_length is not None: + got_length += len(block) + local_file.close() + if f.info().get("Content-Encoding") == "gzip": + # Decompress file into target location, then remove compressed version + with open(file_name, "wb") as f_out: + # On some execution paths, this could be called with python 2.6 + # whereby gzip.open(...) cannot be used with a 'with' statement. + # So let's do this the python 2.6 way... + try: + f_in = gzip.open(file_name + ".gz", "rb") + shutil.copyfileobj(f_in, f_out) + finally: + f_in.close() + os.remove(file_name + ".gz") + return file_name + except HTTPError as e: + self.warning( + "Server returned status %s %s for %s" % (str(e.code), str(e), url) + ) + raise + except URLError as e: + self.warning("URL Error: %s" % url) + + # Failures due to missing local files won't benefit from retry. + # Raise the original OSError. + if isinstance(e.args[0], OSError) and e.args[0].errno == errno.ENOENT: + raise e.args[0] + + raise + except socket.timeout as e: + self.warning("Timed out accessing %s: %s" % (url, str(e))) + raise + except socket.error as e: + self.warning("Socket error when accessing %s: %s" % (url, str(e))) + raise + + def _retry_download(self, url, error_level, file_name=None, retry_config=None): + """Helper method to retry download methods. + + This method calls `self.retry` on `self._download_file` using the passed + parameters if a file_name is specified. If no file is specified, we will + instead call `self._urlopen`, which grabs the contents of a url but does + not create a file on disk. + + Args: + url (str): URL path where the file is located. + file_name (str): file_name where the file will be written to. + error_level (str): log level to use in case an error occurs. + retry_config (dict, optional): key-value pairs to be passed to + `self.retry`. Defaults to `None` + + Returns: + str: `self._download_file` return value is returned + unknown: `self.retry` `failure_status` is returned on failure, which + defaults to -1 + """ + retry_args = dict( + failure_status=None, + retry_exceptions=( + HTTPError, + URLError, + httplib.HTTPException, + socket.timeout, + socket.error, + ), + error_message="Can't download from %s to %s!" % (url, file_name), + error_level=error_level, + ) + + if retry_config: + retry_args.update(retry_config) + + download_func = self._urlopen + kwargs = {"url": url} + if file_name: + download_func = self._download_file + kwargs = {"url": url, "file_name": file_name} + + return self.retry(download_func, kwargs=kwargs, **retry_args) + + def _filter_entries(self, namelist, extract_dirs): + """Filter entries of the archive based on the specified list of to extract dirs.""" + filter_partial = functools.partial(fnmatch.filter, namelist) + entries = itertools.chain(*map(filter_partial, extract_dirs or ["*"])) + + for entry in entries: + yield entry + + def unzip(self, compressed_file, extract_to, extract_dirs="*", verbose=False): + """This method allows to extract a zip file without writing to disk first. + + Args: + compressed_file (object): File-like object with the contents of a compressed zip file. + extract_to (str): where to extract the compressed file. + extract_dirs (list, optional): directories inside the archive file to extract. + Defaults to '*'. + verbose (bool, optional): whether or not extracted content should be displayed. + Defaults to False. + + Raises: + zipfile.BadZipfile: on contents of zipfile being invalid + """ + with zipfile.ZipFile(compressed_file) as bundle: + entries = self._filter_entries(bundle.namelist(), extract_dirs) + + for entry in entries: + if verbose: + self.info(" {}".format(entry)) + + # Exception to be retried: + # Bug 1301645 - BadZipfile: Bad CRC-32 for file ... + # http://stackoverflow.com/questions/5624669/strange-badzipfile-bad-crc-32-problem/5626098#5626098 + # Bug 1301802 - error: Error -3 while decompressing: invalid stored block lengths + bundle.extract(entry, path=extract_to) + + # ZipFile doesn't preserve permissions during extraction: + # http://bugs.python.org/issue15795 + fname = os.path.realpath(os.path.join(extract_to, entry)) + try: + # getinfo() can raise KeyError + mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF + # Only set permissions if attributes are available. Otherwise all + # permissions will be removed eg. on Windows. + if mode: + os.chmod(fname, mode) + + except KeyError: + self.warning("{} was not found in the zip file".format(entry)) + + def deflate(self, compressed_file, mode, extract_to=".", *args, **kwargs): + """This method allows to extract a compressed file from a tar, tar.bz2 and tar.gz files. + + Args: + compressed_file (object): File-like object with the contents of a compressed file. + mode (str): string of the form 'filemode[:compression]' (e.g. 'r:gz' or 'r:bz2') + extract_to (str, optional): where to extract the compressed file. + """ + with tarfile.open(fileobj=compressed_file, mode=mode) as t: + _safe_extract(t, path=extract_to) + + def download_unpack(self, url, extract_to=".", extract_dirs="*", verbose=False): + """Generic method to download and extract a compressed file without writing it + to disk first. + + Args: + url (str): URL where the file to be downloaded is located. + extract_to (str, optional): directory where the downloaded file will + be extracted to. + extract_dirs (list, optional): directories inside the archive to extract. + Defaults to `*`. It currently only applies to zip files. + verbose (bool, optional): whether or not extracted content should be displayed. + Defaults to False. + + """ + + def _determine_extraction_method_and_kwargs(url): + EXTENSION_TO_MIMETYPE = { + "bz2": "application/x-bzip2", + "gz": "application/x-gzip", + "tar": "application/x-tar", + "zip": "application/zip", + } + MIMETYPES = { + "application/x-bzip2": { + "function": self.deflate, + "kwargs": {"mode": "r:bz2"}, + }, + "application/x-gzip": { + "function": self.deflate, + "kwargs": {"mode": "r:gz"}, + }, + "application/x-tar": { + "function": self.deflate, + "kwargs": {"mode": "r"}, + }, + "application/zip": { + "function": self.unzip, + }, + "application/x-zip-compressed": { + "function": self.unzip, + }, + } + + filename = url.split("/")[-1] + # XXX: bz2/gz instead of tar.{bz2/gz} + extension = filename[filename.rfind(".") + 1 :] + mimetype = EXTENSION_TO_MIMETYPE[extension] + self.debug("Mimetype: {}".format(mimetype)) + + function = MIMETYPES[mimetype]["function"] + kwargs = { + "compressed_file": compressed_file, + "extract_to": extract_to, + "extract_dirs": extract_dirs, + "verbose": verbose, + } + kwargs.update(MIMETYPES[mimetype].get("kwargs", {})) + + return function, kwargs + + # Many scripts overwrite this method and set extract_dirs to None + extract_dirs = "*" if extract_dirs is None else extract_dirs + self.info( + "Downloading and extracting to {} these dirs {} from {}".format( + extract_to, + ", ".join(extract_dirs), + url, + ) + ) + + # 1) Let's fetch the file + retry_args = dict( + retry_exceptions=( + HTTPError, + URLError, + httplib.HTTPException, + socket.timeout, + socket.error, + ContentLengthMismatch, + ), + sleeptime=30, + attempts=5, + error_message="Can't download from {}".format(url), + error_level=FATAL, + ) + compressed_file = self.retry( + self.fetch_url_into_memory, kwargs={"url": url}, **retry_args + ) + + # 2) We're guaranteed to have download the file with error_level=FATAL + # Let's unpack the file + function, kwargs = _determine_extraction_method_and_kwargs(url) + try: + function(**kwargs) + except zipfile.BadZipfile: + # Dump the exception and exit + self.exception(level=FATAL) + + def load_json_url(self, url, error_level=None, *args, **kwargs): + """Returns a json object from a url (it retries).""" + contents = self._retry_download( + url=url, error_level=error_level, *args, **kwargs + ) + return json.loads(contents.read()) + + # http://www.techniqal.com/blog/2008/07/31/python-file-read-write-with-urllib2/ + # TODO thinking about creating a transfer object. + def download_file( + self, + url, + file_name=None, + parent_dir=None, + create_parent_dir=True, + error_level=ERROR, + exit_code=3, + retry_config=None, + ): + """Python wget. + Download the filename at `url` into `file_name` and put it on `parent_dir`. + On error log with the specified `error_level`, on fatal exit with `exit_code`. + Execute all the above based on `retry_config` parameter. + + Args: + url (str): URL path where the file to be downloaded is located. + file_name (str, optional): file_name where the file will be written to. + Defaults to urls' filename. + parent_dir (str, optional): directory where the downloaded file will + be written to. Defaults to current working + directory + create_parent_dir (bool, optional): create the parent directory if it + doesn't exist. Defaults to `True` + error_level (str, optional): log level to use in case an error occurs. + Defaults to `ERROR` + retry_config (dict, optional): key-value pairs to be passed to + `self.retry`. Defaults to `None` + + Returns: + str: filename where the downloaded file was written to. + unknown: on failure, `failure_status` is returned. + """ + if not file_name: + try: + file_name = self.get_filename_from_url(url) + except AttributeError: + self.log( + "Unable to get filename from %s; bad url?" % url, + level=error_level, + exit_code=exit_code, + ) + return + if parent_dir: + file_name = os.path.join(parent_dir, file_name) + if create_parent_dir: + self.mkdir_p(parent_dir, error_level=error_level) + self.info("Downloading %s to %s" % (url, file_name)) + status = self._retry_download( + url=url, + error_level=error_level, + file_name=file_name, + retry_config=retry_config, + ) + if status == file_name: + self.info("Downloaded %d bytes." % os.path.getsize(file_name)) + return status + + def move(self, src, dest, log_level=INFO, error_level=ERROR, exit_code=-1): + """recursively move a file or directory (src) to another location (dest). + + Args: + src (str): file or directory path to move. + dest (str): file or directory path where to move the content to. + log_level (str): log level to use for normal operation. Defaults to + `INFO` + error_level (str): log level to use on error. Defaults to `ERROR` + + Returns: + int: 0 on success. -1 on error. + """ + self.log("Moving %s to %s" % (src, dest), level=log_level) + try: + shutil.move(src, dest) + # http://docs.python.org/tutorial/errors.html + except IOError as e: + self.log("IO error: %s" % str(e), level=error_level, exit_code=exit_code) + return -1 + except shutil.Error as e: + # ERROR level ends up reporting the failure to treeherder & + # pollutes the failure summary list. + self.log("shutil error: %s" % str(e), level=WARNING, exit_code=exit_code) + return -1 + return 0 + + def chmod(self, path, mode): + """change `path` mode to `mode`. + + Args: + path (str): path whose mode will be modified. + mode (hex): one of the values defined at `stat`_ + + .. _stat: + https://docs.python.org/2/library/os.html#os.chmod + """ + + self.info("Chmoding %s to %s" % (path, str(oct(mode)))) + os.chmod(path, mode) + + def copyfile( + self, + src, + dest, + log_level=INFO, + error_level=ERROR, + copystat=False, + compress=False, + ): + """copy or compress `src` into `dest`. + + Args: + src (str): filepath to copy. + dest (str): filepath where to move the content to. + log_level (str, optional): log level to use for normal operation. Defaults to + `INFO` + error_level (str, optional): log level to use on error. Defaults to `ERROR` + copystat (bool, optional): whether or not to copy the files metadata. + Defaults to `False`. + compress (bool, optional): whether or not to compress the destination file. + Defaults to `False`. + + Returns: + int: -1 on error + None: on success + """ + + if compress: + self.log("Compressing %s to %s" % (src, dest), level=log_level) + try: + infile = open(src, "rb") + outfile = gzip.open(dest, "wb") + outfile.writelines(infile) + outfile.close() + infile.close() + except IOError as e: + self.log( + "Can't compress %s to %s: %s!" % (src, dest, str(e)), + level=error_level, + ) + return -1 + else: + self.log("Copying %s to %s" % (src, dest), level=log_level) + try: + shutil.copyfile(src, dest) + except (IOError, shutil.Error) as e: + self.log( + "Can't copy %s to %s: %s!" % (src, dest, str(e)), level=error_level + ) + return -1 + + if copystat: + try: + shutil.copystat(src, dest) + except (IOError, shutil.Error) as e: + self.log( + "Can't copy attributes of %s to %s: %s!" % (src, dest, str(e)), + level=error_level, + ) + return -1 + + def copytree( + self, src, dest, overwrite="no_overwrite", log_level=INFO, error_level=ERROR + ): + """An implementation of `shutil.copytree` that allows for `dest` to exist + and implements different overwrite levels: + - 'no_overwrite' will keep all(any) existing files in destination tree + - 'overwrite_if_exists' will only overwrite destination paths that have + the same path names relative to the root of the + src and destination tree + - 'clobber' will replace the whole destination tree(clobber) if it exists + + Args: + src (str): directory path to move. + dest (str): directory path where to move the content to. + overwrite (str): string specifying the overwrite level. + log_level (str, optional): log level to use for normal operation. Defaults to + `INFO` + error_level (str, optional): log level to use on error. Defaults to `ERROR` + + Returns: + int: -1 on error + None: on success + """ + + self.info("copying tree: %s to %s" % (src, dest)) + try: + if overwrite == "clobber" or not os.path.exists(dest): + self.rmtree(dest) + shutil.copytree(src, dest) + elif overwrite == "no_overwrite" or overwrite == "overwrite_if_exists": + files = os.listdir(src) + for f in files: + abs_src_f = os.path.join(src, f) + abs_dest_f = os.path.join(dest, f) + if not os.path.exists(abs_dest_f): + if os.path.isdir(abs_src_f): + self.mkdir_p(abs_dest_f) + self.copytree(abs_src_f, abs_dest_f, overwrite="clobber") + else: + shutil.copy2(abs_src_f, abs_dest_f) + elif overwrite == "no_overwrite": # destination path exists + if os.path.isdir(abs_src_f) and os.path.isdir(abs_dest_f): + self.copytree( + abs_src_f, abs_dest_f, overwrite="no_overwrite" + ) + else: + self.debug( + "ignoring path: %s as destination: \ + %s exists" + % (abs_src_f, abs_dest_f) + ) + else: # overwrite == 'overwrite_if_exists' and destination exists + self.debug("overwriting: %s with: %s" % (abs_dest_f, abs_src_f)) + self.rmtree(abs_dest_f) + + if os.path.isdir(abs_src_f): + self.mkdir_p(abs_dest_f) + self.copytree( + abs_src_f, abs_dest_f, overwrite="overwrite_if_exists" + ) + else: + shutil.copy2(abs_src_f, abs_dest_f) + else: + self.fatal( + "%s is not a valid argument for param overwrite" % (overwrite) + ) + except (IOError, shutil.Error): + self.exception( + "There was an error while copying %s to %s!" % (src, dest), + level=error_level, + ) + return -1 + + def write_to_file( + self, + file_path, + contents, + verbose=True, + open_mode="w", + create_parent_dir=False, + error_level=ERROR, + ): + """Write `contents` to `file_path`, according to `open_mode`. + + Args: + file_path (str): filepath where the content will be written to. + contents (str): content to write to the filepath. + verbose (bool, optional): whether or not to log `contents` value. + Defaults to `True` + open_mode (str, optional): open mode to use for openning the file. + Defaults to `w` + create_parent_dir (bool, optional): whether or not to create the + parent directory of `file_path` + error_level (str, optional): log level to use on error. Defaults to `ERROR` + + Returns: + str: `file_path` on success + None: on error. + """ + self.info("Writing to file %s" % file_path) + if verbose: + self.info("Contents:") + for line in contents.splitlines(): + self.info(" %s" % line) + if create_parent_dir: + parent_dir = os.path.dirname(file_path) + self.mkdir_p(parent_dir, error_level=error_level) + try: + fh = open(file_path, open_mode) + try: + fh.write(contents) + except UnicodeEncodeError: + fh.write(contents.encode("utf-8", "replace")) + fh.close() + return file_path + except IOError: + self.log("%s can't be opened for writing!" % file_path, level=error_level) + + @contextmanager + def opened(self, file_path, verbose=True, open_mode="r", error_level=ERROR): + """Create a context manager to use on a with statement. + + Args: + file_path (str): filepath of the file to open. + verbose (bool, optional): useless parameter, not used here. + Defaults to True. + open_mode (str, optional): open mode to use for openning the file. + Defaults to `r` + error_level (str, optional): log level name to use on error. + Defaults to `ERROR` + + Yields: + tuple: (file object, error) pair. In case of error `None` is yielded + as file object, together with the corresponding error. + If there is no error, `None` is returned as the error. + """ + # See opened_w_error in http://www.python.org/dev/peps/pep-0343/ + self.info("Reading from file %s" % file_path) + try: + fh = open(file_path, open_mode) + except IOError as err: + self.log( + "unable to open %s: %s" % (file_path, err.strerror), level=error_level + ) + yield None, err + else: + try: + yield fh, None + finally: + fh.close() + + def read_from_file(self, file_path, verbose=True, open_mode="r", error_level=ERROR): + """Use `self.opened` context manager to open a file and read its + content. + + Args: + file_path (str): filepath of the file to read. + verbose (bool, optional): whether or not to log the file content. + Defaults to True. + open_mode (str, optional): open mode to use for openning the file. + Defaults to `r` + error_level (str, optional): log level name to use on error. + Defaults to `ERROR` + + Returns: + None: on error. + str: file content on success. + """ + with self.opened(file_path, verbose, open_mode, error_level) as (fh, err): + if err: + return None + contents = fh.read() + if verbose: + self.info("Contents:") + for line in contents.splitlines(): + self.info(" %s" % line) + return contents + + def chdir(self, dir_name): + self.log("Changing directory to %s." % dir_name) + os.chdir(dir_name) + + def is_exe(self, fpath): + """ + Determine if fpath is a file and if it is executable. + """ + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + def which(self, program): + """OS independent implementation of Unix's which command + + Args: + program (str): name or path to the program whose executable is + being searched. + + Returns: + None: if the executable was not found. + str: filepath of the executable file. + """ + if self._is_windows() and not program.endswith(".exe"): + program += ".exe" + fpath, fname = os.path.split(program) + if fpath: + if self.is_exe(program): + return program + else: + # If the exe file is defined in the configs let's use that + exe = self.query_exe(program) + if self.is_exe(exe): + return exe + + # If not defined, let's look for it in the $PATH + env = self.query_env() + for path in env["PATH"].split(os.pathsep): + exe_file = os.path.join(path, program) + if self.is_exe(exe_file): + return exe_file + return None + + # More complex commands {{{2 + def retry( + self, + action, + attempts=None, + sleeptime=60, + max_sleeptime=5 * 60, + retry_exceptions=(Exception,), + good_statuses=None, + cleanup=None, + error_level=ERROR, + error_message="%(action)s failed after %(attempts)d tries!", + failure_status=-1, + log_level=INFO, + args=(), + kwargs={}, + ): + """generic retry command. Ported from `util.retry`_ + + Args: + action (func): callable object to retry. + attempts (int, optinal): maximum number of times to call actions. + Defaults to `self.config.get('global_retries', 5)` + sleeptime (int, optional): number of seconds to wait between + attempts. Defaults to 60 and doubles each retry attempt, to + a maximum of `max_sleeptime' + max_sleeptime (int, optional): maximum value of sleeptime. Defaults + to 5 minutes + retry_exceptions (tuple, optional): Exceptions that should be caught. + If exceptions other than those listed in `retry_exceptions' are + raised from `action', they will be raised immediately. Defaults + to (Exception) + good_statuses (object, optional): return values which, if specified, + will result in retrying if the return value isn't listed. + Defaults to `None`. + cleanup (func, optional): If `cleanup' is provided and callable + it will be called immediately after an Exception is caught. + No arguments will be passed to it. If your cleanup function + requires arguments it is recommended that you wrap it in an + argumentless function. + Defaults to `None`. + error_level (str, optional): log level name in case of error. + Defaults to `ERROR`. + error_message (str, optional): string format to use in case + none of the attempts success. Defaults to + '%(action)s failed after %(attempts)d tries!' + failure_status (int, optional): flag to return in case the retries + were not successfull. Defaults to -1. + log_level (str, optional): log level name to use for normal activity. + Defaults to `INFO`. + args (tuple, optional): positional arguments to pass onto `action`. + kwargs (dict, optional): key-value arguments to pass onto `action`. + + Returns: + object: return value of `action`. + int: failure status in case of failure retries. + """ + if not callable(action): + self.fatal("retry() called with an uncallable method %s!" % action) + if cleanup and not callable(cleanup): + self.fatal("retry() called with an uncallable cleanup method %s!" % cleanup) + if not attempts: + attempts = self.config.get("global_retries", 5) + if max_sleeptime < sleeptime: + self.debug( + "max_sleeptime %d less than sleeptime %d" % (max_sleeptime, sleeptime) + ) + n = 0 + while n <= attempts: + retry = False + n += 1 + try: + self.log( + "retry: Calling %s with args: %s, kwargs: %s, attempt #%d" + % (action.__name__, str(args), str(kwargs), n), + level=log_level, + ) + status = action(*args, **kwargs) + if good_statuses and status not in good_statuses: + retry = True + except retry_exceptions as e: + retry = True + error_message = "%s\nCaught exception: %s" % (error_message, str(e)) + self.log( + "retry: attempt #%d caught %s exception: %s" + % (n, type(e).__name__, str(e)), + level=INFO, + ) + + if not retry: + return status + else: + if cleanup: + cleanup() + if n == attempts: + self.log( + error_message % {"action": action, "attempts": n}, + level=error_level, + ) + return failure_status + if sleeptime > 0: + self.log( + "retry: Failed, sleeping %d seconds before retrying" + % sleeptime, + level=log_level, + ) + time.sleep(sleeptime) + sleeptime = sleeptime * 2 + if sleeptime > max_sleeptime: + sleeptime = max_sleeptime + + def query_env( + self, + partial_env=None, + replace_dict=None, + purge_env=(), + set_self_env=None, + log_level=DEBUG, + avoid_host_env=False, + ): + """Environment query/generation method. + The default, self.query_env(), will look for self.config['env'] + and replace any special strings in there ( %(PATH)s ). + It will then store it as self.env for speeding things up later. + + If you specify partial_env, partial_env will be used instead of + self.config['env'], and we don't save self.env as it's a one-off. + + + Args: + partial_env (dict, optional): key-value pairs of the name and value + of different environment variables. Defaults to an empty dictionary. + replace_dict (dict, optional): key-value pairs to replace the old + environment variables. + purge_env (list): environment names to delete from the final + environment dictionary. + set_self_env (boolean, optional): whether or not the environment + variables dictionary should be copied to `self`. + Defaults to True. + log_level (str, optional): log level name to use on normal operation. + Defaults to `DEBUG`. + avoid_host_env (boolean, optional): if set to True, we will not use + any environment variables set on the host except PATH. + Defaults to False. + + Returns: + dict: environment variables names with their values. + """ + if partial_env is None: + if self.env is not None: + return self.env + partial_env = self.config.get("env", None) + if partial_env is None: + partial_env = {} + if set_self_env is None: + set_self_env = True + + env = {"PATH": os.environ["PATH"]} if avoid_host_env else os.environ.copy() + + default_replace_dict = self.query_abs_dirs() + default_replace_dict["PATH"] = os.environ["PATH"] + if not replace_dict: + replace_dict = default_replace_dict + else: + for key in default_replace_dict: + if key not in replace_dict: + replace_dict[key] = default_replace_dict[key] + for key in partial_env.keys(): + env[key] = partial_env[key] % replace_dict + self.log("ENV: %s is now %s" % (key, env[key]), level=log_level) + for k in purge_env: + if k in env: + del env[k] + if os.name == "nt": + pref_encoding = locale.getpreferredencoding() + for k, v in six.iteritems(env): + # When run locally on Windows machines, some environment + # variables may be unicode. + env[k] = six.ensure_str(v, pref_encoding) + if set_self_env: + self.env = env + return env + + def query_exe( + self, + exe_name, + exe_dict="exes", + default=None, + return_type=None, + error_level=FATAL, + ): + """One way to work around PATH rewrites. + + By default, return exe_name, and we'll fall through to searching + os.environ["PATH"]. + However, if self.config[exe_dict][exe_name] exists, return that. + This lets us override exe paths via config file. + + If we need runtime setting, we can build in self.exes support later. + + Args: + exe_name (str): name of the executable to search for. + exe_dict(str, optional): name of the dictionary of executables + present in `self.config`. Defaults to `exes`. + default (str, optional): default name of the executable to search + for. Defaults to `exe_name`. + return_type (str, optional): type to which the original return + value will be turn into. Only 'list', 'string' and `None` are + supported. Defaults to `None`. + error_level (str, optional): log level name to use on error. + + Returns: + list: in case return_type is 'list' + str: in case return_type is 'string' + None: in case return_type is `None` + Any: if the found executable is not of type list, tuple nor str. + """ + if default is None: + default = exe_name + exe = self.config.get(exe_dict, {}).get(exe_name, default) + repl_dict = {} + if hasattr(self.script_obj, "query_abs_dirs"): + # allow for 'make': '%(abs_work_dir)s/...' etc. + dirs = self.script_obj.query_abs_dirs() + repl_dict.update(dirs) + if isinstance(exe, dict): + found = False + # allow for searchable paths of the exe + for name, path in six.iteritems(exe): + if isinstance(path, list) or isinstance(path, tuple): + path = [x % repl_dict for x in path] + if all([os.path.exists(section) for section in path]): + found = True + elif isinstance(path, str): + path = path % repl_dict + if os.path.exists(path): + found = True + else: + self.log( + "a exes %s dict's value is not a string, list, or tuple. Got key " + "%s and value %s" % (exe_name, name, str(path)), + level=error_level, + ) + if found: + exe = path + break + else: + self.log( + "query_exe was a searchable dict but an existing " + "path could not be determined. Tried searching in " + "paths: %s" % (str(exe)), + level=error_level, + ) + return None + elif isinstance(exe, list) or isinstance(exe, tuple): + exe = [x % repl_dict for x in exe] + elif isinstance(exe, str): + exe = exe % repl_dict + else: + self.log( + "query_exe: %s is not a list, tuple, dict, or string: " + "%s!" % (exe_name, str(exe)), + level=error_level, + ) + return exe + if return_type == "list": + if isinstance(exe, str): + exe = [exe] + elif return_type == "string": + if isinstance(exe, list): + exe = subprocess.list2cmdline(exe) + elif return_type is not None: + self.log( + "Unknown return_type type %s requested in query_exe!" % return_type, + level=error_level, + ) + return exe + + def run_command( + self, + command, + cwd=None, + error_list=None, + halt_on_failure=False, + success_codes=None, + env=None, + partial_env=None, + return_type="status", + throw_exception=False, + output_parser=None, + output_timeout=None, + fatal_exit_code=2, + error_level=ERROR, + **kwargs + ): + """Run a command, with logging and error parsing. + TODO: context_lines + + error_list example: + [{'regex': re.compile('^Error: LOL J/K'), level=IGNORE}, + {'regex': re.compile('^Error:'), level=ERROR, contextLines='5:5'}, + {'substr': 'THE WORLD IS ENDING', level=FATAL, contextLines='20:'} + ] + (context_lines isn't written yet) + + Args: + command (str | list | tuple): command or sequence of commands to + execute and log. + cwd (str, optional): directory path from where to execute the + command. Defaults to `None`. + error_list (list, optional): list of errors to pass to + `mozharness.base.log.OutputParser`. Defaults to `None`. + halt_on_failure (bool, optional): whether or not to redefine the + log level as `FATAL` on errors. Defaults to False. + success_codes (int, optional): numeric value to compare against + the command return value. + env (dict, optional): key-value of environment values to use to + run the command. Defaults to None. + partial_env (dict, optional): key-value of environment values to + replace from the current environment values. Defaults to None. + return_type (str, optional): if equal to 'num_errors' then the + amount of errors matched by `error_list` is returned. Defaults + to 'status'. + throw_exception (bool, optional): whether or not to raise an + exception if the return value of the command doesn't match + any of the `success_codes`. Defaults to False. + output_parser (OutputParser, optional): lets you provide an + instance of your own OutputParser subclass. Defaults to `OutputParser`. + output_timeout (int): amount of seconds to wait for output before + the process is killed. + fatal_exit_code (int, optional): call `self.fatal` if the return value + of the command is not in `success_codes`. Defaults to 2. + error_level (str, optional): log level name to use on error. Defaults + to `ERROR`. + **kwargs: Arbitrary keyword arguments. + + Returns: + int: -1 on error. + Any: `command` return value is returned otherwise. + """ + if success_codes is None: + success_codes = [0] + if cwd is not None: + if not os.path.isdir(cwd): + level = error_level + if halt_on_failure: + level = FATAL + self.log( + "Can't run command %s in non-existent directory '%s'!" + % (command, cwd), + level=level, + ) + return -1 + self.info("Running command: %s in %s" % (command, cwd)) + else: + self.info("Running command: %s" % (command,)) + if isinstance(command, list) or isinstance(command, tuple): + self.info("Copy/paste: %s" % subprocess.list2cmdline(command)) + shell = True + if isinstance(command, list) or isinstance(command, tuple): + shell = False + if env is None: + if partial_env: + self.info("Using partial env: %s" % pprint.pformat(partial_env)) + env = self.query_env(partial_env=partial_env) + else: + if hasattr(self, "previous_env") and env == self.previous_env: + self.info("Using env: (same as previous command)") + else: + self.info("Using env: %s" % pprint.pformat(env)) + self.previous_env = env + + if output_parser is None: + parser = OutputParser( + config=self.config, log_obj=self.log_obj, error_list=error_list + ) + else: + parser = output_parser + + try: + if output_timeout: + + def processOutput(line): + parser.add_lines(line) + + def onTimeout(): + self.info( + "Automation Error: mozprocess timed out after " + "%s seconds running %s" % (str(output_timeout), str(command)) + ) + + p = ProcessHandler( + command, + shell=shell, + env=env, + cwd=cwd, + storeOutput=False, + onTimeout=(onTimeout,), + processOutputLine=[processOutput], + ) + self.info( + "Calling %s with output_timeout %d" % (command, output_timeout) + ) + p.run(outputTimeout=output_timeout) + p.wait() + if p.timedOut: + self.log( + "timed out after %s seconds of no output" % output_timeout, + level=error_level, + ) + returncode = int(p.proc.returncode) + else: + p = subprocess.Popen( + command, + shell=shell, + stdout=subprocess.PIPE, + cwd=cwd, + stderr=subprocess.STDOUT, + env=env, + bufsize=0, + ) + loop = True + while loop: + if p.poll() is not None: + """Avoid losing the final lines of the log?""" + loop = False + while True: + line = p.stdout.readline() + if not line: + break + parser.add_lines(line) + returncode = p.returncode + except KeyboardInterrupt: + level = error_level + if halt_on_failure: + level = FATAL + self.log( + "Process interrupted by the user, killing process with pid %s" % p.pid, + level=level, + ) + p.kill() + return -1 + except OSError as e: + level = error_level + if halt_on_failure: + level = FATAL + self.log( + "caught OS error %s: %s while running %s" + % (e.errno, e.strerror, command), + level=level, + ) + return -1 + + if returncode not in success_codes: + if throw_exception: + raise subprocess.CalledProcessError(returncode, command) + # Force level to be INFO as message is not necessary in Treeherder + self.log("Return code: %d" % returncode, level=INFO) + + if halt_on_failure: + _fail = False + if returncode not in success_codes: + self.log( + "%s not in success codes: %s" % (returncode, success_codes), + level=error_level, + ) + _fail = True + if parser.num_errors: + self.log("failures found while parsing output", level=error_level) + _fail = True + if _fail: + self.return_code = fatal_exit_code + self.fatal( + "Halting on failure while running %s" % (command,), + exit_code=fatal_exit_code, + ) + if return_type == "num_errors": + return parser.num_errors + return returncode + + def get_output_from_command( + self, + command, + cwd=None, + halt_on_failure=False, + env=None, + silent=False, + log_level=INFO, + tmpfile_base_path="tmpfile", + return_type="output", + save_tmpfiles=False, + throw_exception=False, + fatal_exit_code=2, + ignore_errors=False, + success_codes=None, + output_filter=None, + ): + """Similar to run_command, but where run_command is an + os.system(command) analog, get_output_from_command is a `command` + analog. + + Less error checking by design, though if we figure out how to + do it without borking the output, great. + + TODO: binary mode? silent is kinda like that. + TODO: since p.wait() can take a long time, optionally log something + every N seconds? + TODO: optionally only keep the first or last (N) line(s) of output? + TODO: optionally only return the tmp_stdout_filename? + + ignore_errors=True is for the case where a command might produce standard + error output, but you don't particularly care; setting to True will + cause standard error to be logged at DEBUG rather than ERROR + + Args: + command (str | list): command or list of commands to + execute and log. + cwd (str, optional): directory path from where to execute the + command. Defaults to `None`. + halt_on_failure (bool, optional): whether or not to redefine the + log level as `FATAL` on error. Defaults to False. + env (dict, optional): key-value of environment values to use to + run the command. Defaults to None. + silent (bool, optional): whether or not to output the stdout of + executing the command. Defaults to False. + log_level (str, optional): log level name to use on normal execution. + Defaults to `INFO`. + tmpfile_base_path (str, optional): base path of the file to which + the output will be writen to. Defaults to 'tmpfile'. + return_type (str, optional): if equal to 'output' then the complete + output of the executed command is returned, otherwise the written + filenames are returned. Defaults to 'output'. + save_tmpfiles (bool, optional): whether or not to save the temporary + files created from the command output. Defaults to False. + throw_exception (bool, optional): whether or not to raise an + exception if the return value of the command is not zero. + Defaults to False. + fatal_exit_code (int, optional): call self.fatal if the return value + of the command match this value. + ignore_errors (bool, optional): whether or not to change the log + level to `ERROR` for the output of stderr. Defaults to False. + success_codes (int, optional): numeric value to compare against + the command return value. + output_filter (func, optional): provide a function to filter output + so that noise is reduced and lines are sanitized. default: None + + Returns: + None: if the cwd is not a directory. + None: on IOError. + tuple: stdout and stderr filenames. + str: stdout output. + """ + if cwd: + if not os.path.isdir(cwd): + level = ERROR + if halt_on_failure: + level = FATAL + self.log( + "Can't run command %s in non-existent directory %s!" + % (command, cwd), + level=level, + ) + return None + self.info("Getting output from command: %s in %s" % (command, cwd)) + else: + self.info("Getting output from command: %s" % command) + if isinstance(command, list): + self.info("Copy/paste: %s" % subprocess.list2cmdline(command)) + # This could potentially return something? + tmp_stdout = None + tmp_stderr = None + tmp_stdout_filename = "%s_stdout" % tmpfile_base_path + tmp_stderr_filename = "%s_stderr" % tmpfile_base_path + if success_codes is None: + success_codes = [0] + + # TODO probably some more elegant solution than 2 similar passes + try: + tmp_stdout = open(tmp_stdout_filename, "w") + except IOError: + level = ERROR + if halt_on_failure: + level = FATAL + self.log( + "Can't open %s for writing!" % tmp_stdout_filename + self.exception(), + level=level, + ) + return None + try: + tmp_stderr = open(tmp_stderr_filename, "w") + except IOError: + level = ERROR + if halt_on_failure: + level = FATAL + self.log( + "Can't open %s for writing!" % tmp_stderr_filename + self.exception(), + level=level, + ) + return None + shell = True + if isinstance(command, list): + shell = False + + p = subprocess.Popen( + command, + shell=shell, + stdout=tmp_stdout, + cwd=cwd, + stderr=tmp_stderr, + env=env, + bufsize=0, + ) + # XXX: changed from self.debug to self.log due to this error: + # TypeError: debug() takes exactly 1 argument (2 given) + self.log( + "Temporary files: %s and %s" % (tmp_stdout_filename, tmp_stderr_filename), + level=DEBUG, + ) + p.wait() + tmp_stdout.close() + tmp_stderr.close() + return_level = DEBUG + output = None + if return_type == "output" or not silent: + if os.path.exists(tmp_stdout_filename) and os.path.getsize( + tmp_stdout_filename + ): + output = self.read_from_file(tmp_stdout_filename, verbose=False) + if output_filter: + output = output_filter(output) + if not silent: + self.log("Output received:", level=log_level) + output_lines = output.rstrip().splitlines() + for line in output_lines: + if not line or line.isspace(): + continue + if isinstance(line, binary_type): + line = line.decode("utf-8") + self.log(" %s" % line, level=log_level) + output = "\n".join(output_lines) + if os.path.exists(tmp_stderr_filename) and os.path.getsize(tmp_stderr_filename): + errors = self.read_from_file(tmp_stderr_filename, verbose=False) + if output_filter: + errors = output_filter(errors) + if errors: + if not ignore_errors: + return_level = ERROR + self.log("Errors received:", level=return_level) + for line in errors.rstrip().splitlines(): + if not line or line.isspace(): + continue + if isinstance(line, binary_type): + line = line.decode("utf-8") + self.log(" %s" % line, level=return_level) + elif p.returncode not in success_codes and not ignore_errors: + return_level = ERROR + # Clean up. + if not save_tmpfiles: + self.rmtree(tmp_stderr_filename, log_level=DEBUG) + self.rmtree(tmp_stdout_filename, log_level=DEBUG) + if p.returncode and throw_exception: + raise subprocess.CalledProcessError(p.returncode, command) + # Force level to be INFO as message is not necessary in Treeherder + self.log("Return code: %d" % p.returncode, level=INFO) + if halt_on_failure and return_level == ERROR: + self.return_code = fatal_exit_code + self.fatal( + "Halting on failure while running %s" % command, + exit_code=fatal_exit_code, + ) + # Hm, options on how to return this? I bet often we'll want + # output_lines[0] with no newline. + if return_type != "output": + return (tmp_stdout_filename, tmp_stderr_filename) + else: + return output + + def _touch_file(self, file_name, times=None, error_level=FATAL): + """touch a file. + + Args: + file_name (str): name of the file to touch. + times (tuple, optional): 2-tuple as specified by `os.utime`_ + Defaults to None. + error_level (str, optional): log level name in case of error. + Defaults to `FATAL`. + + .. _`os.utime`: + https://docs.python.org/3.4/library/os.html?highlight=os.utime#os.utime + """ + self.info("Touching: %s" % file_name) + try: + os.utime(file_name, times) + except OSError: + try: + open(file_name, "w").close() + except IOError as e: + msg = "I/O error(%s): %s" % (e.errno, e.strerror) + self.log(msg, error_level=error_level) + os.utime(file_name, times) + + def unpack( + self, + filename, + extract_to, + extract_dirs=None, + error_level=ERROR, + fatal_exit_code=2, + verbose=False, + ): + """The method allows to extract a file regardless of its extension. + + Args: + filename (str): filename of the compressed file. + extract_to (str): where to extract the compressed file. + extract_dirs (list, optional): directories inside the archive file to extract. + Defaults to `None`. + fatal_exit_code (int, optional): call `self.fatal` if the return value + of the command is not in `success_codes`. Defaults to 2. + verbose (bool, optional): whether or not extracted content should be displayed. + Defaults to False. + + Raises: + IOError: on `filename` file not found. + + """ + if not os.path.isfile(filename): + raise IOError("Could not find file to extract: %s" % filename) + + if zipfile.is_zipfile(filename): + try: + self.info( + "Using ZipFile to extract {} to {}".format(filename, extract_to) + ) + with zipfile.ZipFile(filename) as bundle: + for entry in self._filter_entries(bundle.namelist(), extract_dirs): + if verbose: + self.info(" %s" % entry) + bundle.extract(entry, path=extract_to) + + # ZipFile doesn't preserve permissions during extraction: + # http://bugs.python.org/issue15795 + fname = os.path.realpath(os.path.join(extract_to, entry)) + mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF + # Only set permissions if attributes are available. Otherwise all + # permissions will be removed eg. on Windows. + if mode: + os.chmod(fname, mode) + except zipfile.BadZipfile as e: + self.log( + "%s (%s)" % (str(e), filename), + level=error_level, + exit_code=fatal_exit_code, + ) + + # Bug 1211882 - is_tarfile cannot be trusted for dmg files + elif tarfile.is_tarfile(filename) and not filename.lower().endswith(".dmg"): + try: + self.info( + "Using TarFile to extract {} to {}".format(filename, extract_to) + ) + with tarfile.open(filename) as bundle: + for entry in self._filter_entries(bundle.getnames(), extract_dirs): + _validate_tar_member(bundle.getmember(entry), extract_to) + if verbose: + self.info(" %s" % entry) + bundle.extract(entry, path=extract_to) + except tarfile.TarError as e: + self.log( + "%s (%s)" % (str(e), filename), + level=error_level, + exit_code=fatal_exit_code, + ) + else: + self.log( + "No extraction method found for: %s" % filename, + level=error_level, + exit_code=fatal_exit_code, + ) + + def is_taskcluster(self): + """Returns boolean indicating if we're running in TaskCluster.""" + # This may need expanding in the future to work on + return "TASKCLUSTER_WORKER_TYPE" in os.environ + + +def PreScriptRun(func): + """Decorator for methods that will be called before script execution. + + Each method on a BaseScript having this decorator will be called at the + beginning of BaseScript.run(). + + The return value is ignored. Exceptions will abort execution. + """ + func._pre_run_listener = True + return func + + +def PostScriptRun(func): + """Decorator for methods that will be called after script execution. + + This is similar to PreScriptRun except it is called at the end of + execution. The method will always be fired, even if execution fails. + """ + func._post_run_listener = True + return func + + +def PreScriptAction(action=None): + """Decorator for methods that will be called at the beginning of each action. + + Each method on a BaseScript having this decorator will be called during + BaseScript.run() before an individual action is executed. The method will + receive the action's name as an argument. + + If no values are passed to the decorator, it will be applied to every + action. If a string is passed, the decorated function will only be called + for the action of that name. + + The return value of the method is ignored. Exceptions will abort execution. + """ + + def _wrapped(func): + func._pre_action_listener = action + return func + + def _wrapped_none(func): + func._pre_action_listener = None + return func + + if type(action) == type(_wrapped): + return _wrapped_none(action) + + return _wrapped + + +def PostScriptAction(action=None): + """Decorator for methods that will be called at the end of each action. + + This behaves similarly to PreScriptAction. It varies in that it is called + after execution of the action. + + The decorated method will receive the action name as a positional argument. + It will then receive the following named arguments: + + success - Bool indicating whether the action finished successfully. + + The decorated method will always be called, even if the action threw an + exception. + + The return value is ignored. + """ + + def _wrapped(func): + func._post_action_listener = action + return func + + def _wrapped_none(func): + func._post_action_listener = None + return func + + if type(action) == type(_wrapped): + return _wrapped_none(action) + + return _wrapped + + +# BaseScript {{{1 +class BaseScript(ScriptMixin, LogMixin, object): + def __init__( + self, + config_options=None, + ConfigClass=BaseConfig, + default_log_level="info", + **kwargs + ): + self._return_code = 0 + super(BaseScript, self).__init__() + + self.log_obj = None + self.abs_dirs = None + if config_options is None: + config_options = [] + self.summary_list = [] + self.failures = [] + rw_config = ConfigClass(config_options=config_options, **kwargs) + self.config = rw_config.get_read_only_config() + self.actions = tuple(rw_config.actions) + self.all_actions = tuple(rw_config.all_actions) + self.env = None + self.new_log_obj(default_log_level=default_log_level) + self.script_obj = self + + # Indicate we're a source checkout if VCS directory is present at the + # appropriate place. This code will break if this file is ever moved + # to another directory. + self.topsrcdir = None + + srcreldir = "testing/mozharness/mozharness/base" + here = os.path.normpath(os.path.dirname(__file__)) + if here.replace("\\", "/").endswith(srcreldir): + topsrcdir = os.path.normpath(os.path.join(here, "..", "..", "..", "..")) + hg_dir = os.path.join(topsrcdir, ".hg") + git_dir = os.path.join(topsrcdir, ".git") + if os.path.isdir(hg_dir) or os.path.isdir(git_dir): + self.topsrcdir = topsrcdir + + # Set self.config to read-only. + # + # We can create intermediate config info programmatically from + # this in a repeatable way, with logs; this is how we straddle the + # ideal-but-not-user-friendly static config and the + # easy-to-write-hard-to-debug writable config. + # + # To allow for other, script-specific configurations + # (e.g., props json parsing), before locking, + # call self._pre_config_lock(). If needed, this method can + # alter self.config. + self._pre_config_lock(rw_config) + self._config_lock() + + self.info("Run as %s" % rw_config.command_line) + if self.config.get("dump_config_hierarchy"): + # we only wish to dump and display what self.config is made up of, + # against the current script + args, without actually running any + # actions + self._dump_config_hierarchy(rw_config.all_cfg_files_and_dicts) + if self.config.get("dump_config"): + self.dump_config(exit_on_finish=True) + + # Collect decorated methods. We simply iterate over the attributes of + # the current class instance and look for signatures deposited by + # the decorators. + self._listeners = dict( + pre_run=[], + pre_action=[], + post_action=[], + post_run=[], + ) + for k in dir(self): + try: + item = self._getattr(k) + except Exception as e: + item = None + self.warning( + "BaseScript collecting decorated methods: " + "failure to get attribute {}: {}".format(k, str(e)) + ) + if not item: + continue + + # We only decorate methods, so ignore other types. + if not inspect.ismethod(item): + continue + + if hasattr(item, "_pre_run_listener"): + self._listeners["pre_run"].append(k) + + if hasattr(item, "_pre_action_listener"): + self._listeners["pre_action"].append((k, item._pre_action_listener)) + + if hasattr(item, "_post_action_listener"): + self._listeners["post_action"].append((k, item._post_action_listener)) + + if hasattr(item, "_post_run_listener"): + self._listeners["post_run"].append(k) + + def _getattr(self, name): + # `getattr(self, k)` will call the method `k` for any property + # access. If the property depends upon a module which has not + # been imported at the time the BaseScript initializer is + # executed, this property access will result in an + # Exception. Until Python 3's `inspect.getattr_static` is + # available, the simplest approach is to ignore the specific + # properties which are known to cause issues. Currently + # adb_path and device are ignored since they require the + # availablity of the mozdevice package which is not guaranteed + # when BaseScript is called. + property_list = set(["adb_path", "device"]) + if six.PY2: + if name in property_list: + item = None + else: + item = getattr(self, name) + else: + item = inspect.getattr_static(self, name) + if type(item) == property: + item = None + else: + item = getattr(self, name) + return item + + def _dump_config_hierarchy(self, cfg_files): + """interpret each config file used. + + This will show which keys/values are being added or overwritten by + other config files depending on their hierarchy (when they were added). + """ + # go through each config_file. We will start with the lowest and + # print its keys/values that are being used in self.config. If any + # keys/values are present in a config file with a higher precedence, + # ignore those. + dirs = self.query_abs_dirs() + cfg_files_dump_config = {} # we will dump this to file + # keep track of keys that did not come from a config file + keys_not_from_file = set(self.config.keys()) + if not cfg_files: + cfg_files = [] + self.info("Total config files: %d" % (len(cfg_files))) + if len(cfg_files): + self.info("cfg files used from lowest precedence to highest:") + for i, (target_file, target_dict) in enumerate(cfg_files): + unique_keys = set(target_dict.keys()) + unique_dict = {} + # iterate through the target_dicts remaining 'higher' cfg_files + remaining_cfgs = cfg_files[slice(i + 1, len(cfg_files))] + # where higher == more precedent + for ii, (higher_file, higher_dict) in enumerate(remaining_cfgs): + # now only keep keys/values that are not overwritten by a + # higher config + unique_keys = unique_keys.difference(set(higher_dict.keys())) + # unique_dict we know now has only keys/values that are unique to + # this config file. + unique_dict = dict((key, target_dict.get(key)) for key in unique_keys) + cfg_files_dump_config[target_file] = unique_dict + self.action_message("Config File %d: %s" % (i + 1, target_file)) + self.info(pprint.pformat(unique_dict)) + # let's also find out which keys/values from self.config are not + # from each target config file dict + keys_not_from_file = keys_not_from_file.difference(set(target_dict.keys())) + not_from_file_dict = dict( + (key, self.config.get(key)) for key in keys_not_from_file + ) + cfg_files_dump_config["not_from_cfg_file"] = not_from_file_dict + self.action_message( + "Not from any config file (default_config, " "cmd line options, etc)" + ) + self.info(pprint.pformat(not_from_file_dict)) + + # finally, let's dump this output as JSON and exit early + self.dump_config( + os.path.join(dirs["abs_log_dir"], "localconfigfiles.json"), + cfg_files_dump_config, + console_output=False, + exit_on_finish=True, + ) + + def _pre_config_lock(self, rw_config): + """This empty method can allow for config checking and manipulation + before the config lock, when overridden in scripts. + """ + pass + + def _config_lock(self): + """After this point, the config is locked and should not be + manipulated (based on mozharness.base.config.ReadOnlyDict) + """ + self.config.lock() + + def _possibly_run_method(self, method_name, error_if_missing=False): + """This is here for run().""" + if hasattr(self, method_name) and callable(self._getattr(method_name)): + return getattr(self, method_name)() + elif error_if_missing: + self.error("No such method %s!" % method_name) + + def run_action(self, action): + if action not in self.actions: + self.action_message("Skipping %s step." % action) + return + + method_name = action.replace("-", "_") + self.action_message("Running %s step." % action) + + # An exception during a pre action listener should abort execution. + for fn, target in self._listeners["pre_action"]: + if target is not None and target != action: + continue + + try: + self.info("Running pre-action listener: %s" % fn) + method = getattr(self, fn) + method(action) + except Exception: + self.error( + "Exception during pre-action for %s: %s" + % (action, traceback.format_exc()) + ) + + for fn, target in self._listeners["post_action"]: + if target is not None and target != action: + continue + + try: + self.info("Running post-action listener: %s" % fn) + method = getattr(self, fn) + method(action, success=False) + except Exception: + self.error( + "An additional exception occurred during " + "post-action for %s: %s" % (action, traceback.format_exc()) + ) + + self.fatal("Aborting due to exception in pre-action listener.") + + # We always run post action listeners, even if the main routine failed. + success = False + try: + self.info("Running main action method: %s" % method_name) + self._possibly_run_method("preflight_%s" % method_name) + self._possibly_run_method(method_name, error_if_missing=True) + self._possibly_run_method("postflight_%s" % method_name) + success = True + finally: + post_success = True + for fn, target in self._listeners["post_action"]: + if target is not None and target != action: + continue + + try: + self.info("Running post-action listener: %s" % fn) + method = getattr(self, fn) + method(action, success=success and self.return_code == 0) + except Exception: + post_success = False + self.error( + "Exception during post-action for %s: %s" + % (action, traceback.format_exc()) + ) + + step_result = "success" if success else "failed" + self.action_message("Finished %s step (%s)" % (action, step_result)) + + if not post_success: + self.fatal("Aborting due to failure in post-action listener.") + + def run(self): + """Default run method. + This is the "do everything" method, based on actions and all_actions. + + First run self.dump_config() if it exists. + Second, go through the list of all_actions. + If they're in the list of self.actions, try to run + self.preflight_ACTION(), self.ACTION(), and self.postflight_ACTION(). + + Preflight is sanity checking before doing anything time consuming or + destructive. + + Postflight is quick testing for success after an action. + + """ + for fn in self._listeners["pre_run"]: + try: + self.info("Running pre-run listener: %s" % fn) + method = getattr(self, fn) + method() + except Exception: + self.error( + "Exception during pre-run listener: %s" % traceback.format_exc() + ) + + for fn in self._listeners["post_run"]: + try: + method = getattr(self, fn) + method() + except Exception: + self.error( + "An additional exception occurred during a " + "post-run listener: %s" % traceback.format_exc() + ) + + self.fatal("Aborting due to failure in pre-run listener.") + + self.dump_config() + try: + for action in self.all_actions: + self.run_action(action) + except Exception: + self.fatal("Uncaught exception: %s" % traceback.format_exc()) + finally: + post_success = True + for fn in self._listeners["post_run"]: + try: + self.info("Running post-run listener: %s" % fn) + method = getattr(self, fn) + method() + except Exception: + post_success = False + self.error( + "Exception during post-run listener: %s" + % traceback.format_exc() + ) + + if not post_success: + self.fatal("Aborting due to failure in post-run listener.") + + return self.return_code + + def run_and_exit(self): + """Runs the script and exits the current interpreter.""" + rc = self.run() + if rc != 0: + self.warning("returning nonzero exit status %d" % rc) + sys.exit(rc) + + def clobber(self): + """ + Delete the working directory + """ + dirs = self.query_abs_dirs() + self.rmtree(dirs["abs_work_dir"], error_level=FATAL) + + def query_abs_dirs(self): + """We want to be able to determine where all the important things + are. Absolute paths lend themselves well to this, though I wouldn't + be surprised if this causes some issues somewhere. + + This should be overridden in any script that has additional dirs + to query. + + The query_* methods tend to set self.VAR variables as their + runtime cache. + """ + if self.abs_dirs: + return self.abs_dirs + c = self.config + dirs = {} + dirs["base_work_dir"] = c["base_work_dir"] + dirs["abs_work_dir"] = os.path.join(c["base_work_dir"], c["work_dir"]) + dirs["abs_log_dir"] = os.path.join(c["base_work_dir"], c.get("log_dir", "logs")) + if "GECKO_PATH" in os.environ: + dirs["abs_src_dir"] = os.environ["GECKO_PATH"] + self.abs_dirs = dirs + return self.abs_dirs + + def dump_config( + self, file_path=None, config=None, console_output=True, exit_on_finish=False + ): + """Dump self.config to localconfig.json""" + config = config or self.config + dirs = self.query_abs_dirs() + if not file_path: + file_path = os.path.join(dirs["abs_log_dir"], "localconfig.json") + self.info("Dumping config to %s." % file_path) + self.mkdir_p(os.path.dirname(file_path)) + json_config = json.dumps(config, sort_keys=True, indent=4) + fh = codecs.open(file_path, encoding="utf-8", mode="w+") + fh.write(json_config) + fh.close() + if console_output: + self.info(pprint.pformat(config)) + if exit_on_finish: + sys.exit() + + # logging {{{2 + def new_log_obj(self, default_log_level="info"): + c = self.config + log_dir = os.path.join(c["base_work_dir"], c.get("log_dir", "logs")) + log_config = { + "logger_name": "Simple", + "log_name": "log", + "log_dir": log_dir, + "log_level": default_log_level, + "log_format": "%(asctime)s %(levelname)8s - %(message)s", + "log_to_console": True, + "append_to_log": False, + } + log_type = self.config.get("log_type", "console") + for key in log_config.keys(): + value = self.config.get(key, None) + if value is not None: + log_config[key] = value + if log_type == "multi": + self.log_obj = MultiFileLogger(**log_config) + elif log_type == "simple": + self.log_obj = SimpleFileLogger(**log_config) + else: + self.log_obj = ConsoleLogger(**log_config) + + def action_message(self, message): + self.info( + "[mozharness: %sZ] %s" + % (datetime.datetime.utcnow().isoformat(" "), message) + ) + + def summary(self): + """Print out all the summary lines added via add_summary() + throughout the script. + + I'd like to revisit how to do this in a prettier fashion. + """ + self.action_message("%s summary:" % self.__class__.__name__) + if self.summary_list: + for item in self.summary_list: + try: + self.log(item["message"], level=item["level"]) + except ValueError: + """log is closed; print as a default. Ran into this + when calling from __del__()""" + print("### Log is closed! (%s)" % item["message"]) + + def add_summary(self, message, level=INFO): + self.summary_list.append({"message": message, "level": level}) + # TODO write to a summary-only log? + # Summaries need a lot more love. + self.log(message, level=level) + + def summarize_success_count( + self, success_count, total_count, message="%d of %d successful.", level=None + ): + if level is None: + level = INFO + if success_count < total_count: + level = ERROR + self.add_summary(message % (success_count, total_count), level=level) + + def get_hash_for_file(self, file_path, hash_type="sha512"): + bs = 65536 + hasher = hashlib.new(hash_type) + with open(file_path, "rb") as fh: + buf = fh.read(bs) + while len(buf) > 0: + hasher.update(buf) + buf = fh.read(bs) + return hasher.hexdigest() + + @property + def return_code(self): + return self._return_code + + @return_code.setter + def return_code(self, code): + old_return_code, self._return_code = self._return_code, code + if old_return_code != code: + self.warning("setting return code to %d" % code) diff --git a/testing/mozharness/mozharness/base/transfer.py b/testing/mozharness/mozharness/base/transfer.py new file mode 100755 index 0000000000..610e93ecc9 --- /dev/null +++ b/testing/mozharness/mozharness/base/transfer.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Generic ways to upload + download files. +""" + +import pprint + +try: + from urllib2 import urlopen +except ImportError: + from urllib.request import urlopen + +import json + +from mozharness.base.log import DEBUG + + +# TransferMixin {{{1 +class TransferMixin(object): + """ + Generic transfer methods. + + Dependent on BaseScript. + """ + + def load_json_from_url(self, url, timeout=30, log_level=DEBUG): + self.log( + "Attempting to download %s; timeout=%i" % (url, timeout), level=log_level + ) + try: + r = urlopen(url, timeout=timeout) + j = json.load(r) + self.log(pprint.pformat(j), level=log_level) + except BaseException: + self.exception(message="Unable to download %s!" % url) + raise + return j diff --git a/testing/mozharness/mozharness/base/vcs/__init__.py b/testing/mozharness/mozharness/base/vcs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/testing/mozharness/mozharness/base/vcs/__init__.py diff --git a/testing/mozharness/mozharness/base/vcs/gittool.py b/testing/mozharness/mozharness/base/vcs/gittool.py new file mode 100644 index 0000000000..e9d0c0e2c9 --- /dev/null +++ b/testing/mozharness/mozharness/base/vcs/gittool.py @@ -0,0 +1,107 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import re + +try: + import urlparse +except ImportError: + import urllib.parse as urlparse + +from mozharness.base.errors import GitErrorList, VCSException +from mozharness.base.log import LogMixin, OutputParser +from mozharness.base.script import ScriptMixin + + +class GittoolParser(OutputParser): + """ + A class that extends OutputParser such that it can find the "Got revision" + string from gittool.py output + """ + + got_revision_exp = re.compile(r"Got revision (\w+)") + got_revision = None + + def parse_single_line(self, line): + m = self.got_revision_exp.match(line) + if m: + self.got_revision = m.group(1) + super(GittoolParser, self).parse_single_line(line) + + +class GittoolVCS(ScriptMixin, LogMixin): + def __init__(self, log_obj=None, config=None, vcs_config=None, script_obj=None): + super(GittoolVCS, self).__init__() + + self.log_obj = log_obj + self.script_obj = script_obj + if config: + self.config = config + else: + self.config = {} + # vcs_config = { + # repo: repository, + # branch: branch, + # revision: revision, + # ssh_username: ssh_username, + # ssh_key: ssh_key, + # } + self.vcs_config = vcs_config + self.gittool = self.query_exe("gittool.py", return_type="list") + + def ensure_repo_and_revision(self): + """Makes sure that `dest` is has `revision` or `branch` checked out + from `repo`. + + Do what it takes to make that happen, including possibly clobbering + dest. + """ + c = self.vcs_config + for conf_item in ("dest", "repo"): + assert self.vcs_config[conf_item] + dest = os.path.abspath(c["dest"]) + repo = c["repo"] + revision = c.get("revision") + branch = c.get("branch") + clean = c.get("clean") + share_base = c.get("vcs_share_base", os.environ.get("GIT_SHARE_BASE_DIR", None)) + env = {"PATH": os.environ.get("PATH")} + env.update(c.get("env", {})) + if self._is_windows(): + # git.exe is not in the PATH by default + env["PATH"] = "%s;C:/mozilla-build/Git/bin" % env["PATH"] + # SYSTEMROOT is needed for 'import random' + if "SYSTEMROOT" not in env: + env["SYSTEMROOT"] = os.environ.get("SYSTEMROOT") + if share_base is not None: + env["GIT_SHARE_BASE_DIR"] = share_base + + cmd = self.gittool[:] + if branch: + cmd.extend(["-b", branch]) + if revision: + cmd.extend(["-r", revision]) + if clean: + cmd.append("--clean") + + for base_mirror_url in self.config.get( + "gittool_base_mirror_urls", self.config.get("vcs_base_mirror_urls", []) + ): + bits = urlparse.urlparse(repo) + mirror_url = urlparse.urljoin(base_mirror_url, bits.path) + cmd.extend(["--mirror", mirror_url]) + + cmd.extend([repo, dest]) + parser = GittoolParser( + config=self.config, log_obj=self.log_obj, error_list=GitErrorList + ) + retval = self.run_command( + cmd, error_list=GitErrorList, env=env, output_parser=parser + ) + + if retval != 0: + raise VCSException("Unable to checkout") + + return parser.got_revision diff --git a/testing/mozharness/mozharness/base/vcs/mercurial.py b/testing/mozharness/mozharness/base/vcs/mercurial.py new file mode 100755 index 0000000000..63b0d27c34 --- /dev/null +++ b/testing/mozharness/mozharness/base/vcs/mercurial.py @@ -0,0 +1,478 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Mercurial VCS support. +""" + +import hashlib +import os +import re +import subprocess +import sys +from collections import namedtuple + +try: + from urlparse import urlsplit +except ImportError: + from urllib.parse import urlsplit + +import mozharness +from mozharness.base.errors import HgErrorList, VCSException +from mozharness.base.log import LogMixin, OutputParser +from mozharness.base.script import ScriptMixin +from mozharness.base.transfer import TransferMixin + +sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.dirname(sys.path[0])))) + + +external_tools_path = os.path.join( + os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))), + "external_tools", +) + + +HG_OPTIONS = ["--config", "ui.merge=internal:merge"] + +# MercurialVCS {{{1 +# TODO Make the remaining functions more mozharness-friendly. +# TODO Add the various tag functionality that are currently in +# build/tools/scripts to MercurialVCS -- generic tagging logic belongs here. +REVISION, BRANCH = 0, 1 + + +class RepositoryUpdateRevisionParser(OutputParser): + """Parse `hg pull` output for "repository unrelated" errors.""" + + revision = None + RE_UPDATED = re.compile("^updated to ([a-f0-9]{40})$") + + def parse_single_line(self, line): + m = self.RE_UPDATED.match(line) + if m: + self.revision = m.group(1) + + return super(RepositoryUpdateRevisionParser, self).parse_single_line(line) + + +def make_hg_url(hg_host, repo_path, protocol="http", revision=None, filename=None): + """Helper function. + + Construct a valid hg url from a base hg url (hg.mozilla.org), + repo_path, revision and possible filename + """ + base = "%s://%s" % (protocol, hg_host) + repo = "/".join(p.strip("/") for p in [base, repo_path]) + if not filename: + if not revision: + return repo + else: + return "/".join([p.strip("/") for p in [repo, "rev", revision]]) + else: + assert revision + return "/".join([p.strip("/") for p in [repo, "raw-file", revision, filename]]) + + +class MercurialVCS(ScriptMixin, LogMixin, TransferMixin): + # For the most part, scripts import mercurial, update + # tag-release.py imports + # apply_and_push, update, get_revision, out, BRANCH, REVISION, + # get_branches, cleanOutgoingRevs + + def __init__(self, log_obj=None, config=None, vcs_config=None, script_obj=None): + super(MercurialVCS, self).__init__() + self.can_share = None + self.log_obj = log_obj + self.script_obj = script_obj + if config: + self.config = config + else: + self.config = {} + # vcs_config = { + # hg_host: hg_host, + # repo: repository, + # branch: branch, + # revision: revision, + # ssh_username: ssh_username, + # ssh_key: ssh_key, + # } + self.vcs_config = vcs_config or {} + self.hg = self.query_exe("hg", return_type="list") + HG_OPTIONS + + def _make_absolute(self, repo): + if repo.startswith("file://"): + path = repo[len("file://") :] + repo = "file://%s" % os.path.abspath(path) + elif "://" not in repo: + repo = os.path.abspath(repo) + return repo + + def get_repo_name(self, repo): + return repo.rstrip("/").split("/")[-1] + + def get_repo_path(self, repo): + repo = self._make_absolute(repo) + if repo.startswith("/"): + return repo.lstrip("/") + else: + return urlsplit(repo).path.lstrip("/") + + def get_revision_from_path(self, path): + """Returns which revision directory `path` currently has checked out.""" + return self.get_output_from_command( + self.hg + ["parent", "--template", "{node}"], cwd=path + ) + + def get_branch_from_path(self, path): + branch = self.get_output_from_command(self.hg + ["branch"], cwd=path) + return str(branch).strip() + + def get_branches_from_path(self, path): + branches = [] + for line in self.get_output_from_command( + self.hg + ["branches", "-c"], cwd=path + ).splitlines(): + branches.append(line.split()[0]) + return branches + + def hg_ver(self): + """Returns the current version of hg, as a tuple of + (major, minor, build)""" + ver_string = self.get_output_from_command(self.hg + ["-q", "version"]) + match = re.search(r"\(version ([0-9.]+)\)", ver_string) + if match: + bits = match.group(1).split(".") + if len(bits) < 3: + bits += (0,) + ver = tuple(int(b) for b in bits) + else: + ver = (0, 0, 0) + self.debug("Running hg version %s" % str(ver)) + return ver + + def update(self, dest, branch=None, revision=None): + """Updates working copy `dest` to `branch` or `revision`. + If revision is set, branch will be ignored. + If neither is set then the working copy will be updated to the + latest revision on the current branch. Local changes will be + discarded. + """ + # If we have a revision, switch to that + msg = "Updating %s" % dest + if branch: + msg += " to branch %s" % branch + if revision: + msg += " revision %s" % revision + self.info("%s." % msg) + if revision is not None: + cmd = self.hg + ["update", "-C", "-r", revision] + if self.run_command(cmd, cwd=dest, error_list=HgErrorList): + raise VCSException("Unable to update %s to %s!" % (dest, revision)) + else: + # Check & switch branch + local_branch = self.get_branch_from_path(dest) + + cmd = self.hg + ["update", "-C"] + + # If this is different, checkout the other branch + if branch and branch != local_branch: + cmd.append(branch) + + if self.run_command(cmd, cwd=dest, error_list=HgErrorList): + raise VCSException("Unable to update %s!" % dest) + return self.get_revision_from_path(dest) + + def clone(self, repo, dest, branch=None, revision=None, update_dest=True): + """Clones hg repo and places it at `dest`, replacing whatever else + is there. The working copy will be empty. + + If `revision` is set, only the specified revision and its ancestors + will be cloned. If revision is set, branch is ignored. + + If `update_dest` is set, then `dest` will be updated to `revision` + if set, otherwise to `branch`, otherwise to the head of default. + """ + msg = "Cloning %s to %s" % (repo, dest) + if branch: + msg += " on branch %s" % branch + if revision: + msg += " to revision %s" % revision + self.info("%s." % msg) + parent_dest = os.path.dirname(dest) + if parent_dest and not os.path.exists(parent_dest): + self.mkdir_p(parent_dest) + if os.path.exists(dest): + self.info("Removing %s before clone." % dest) + self.rmtree(dest) + + cmd = self.hg + ["clone"] + if not update_dest: + cmd.append("-U") + + if revision: + cmd.extend(["-r", revision]) + elif branch: + # hg >= 1.6 supports -b branch for cloning + ver = self.hg_ver() + if ver >= (1, 6, 0): + cmd.extend(["-b", branch]) + + cmd.extend([repo, dest]) + output_timeout = self.config.get( + "vcs_output_timeout", self.vcs_config.get("output_timeout") + ) + if ( + self.run_command(cmd, error_list=HgErrorList, output_timeout=output_timeout) + != 0 + ): + raise VCSException("Unable to clone %s to %s!" % (repo, dest)) + + if update_dest: + return self.update(dest, branch, revision) + + def common_args(self, revision=None, branch=None, ssh_username=None, ssh_key=None): + """Fill in common hg arguments, encapsulating logic checks that + depend on mercurial versions and provided arguments + """ + args = [] + if ssh_username or ssh_key: + opt = ["-e", "ssh"] + if ssh_username: + opt[1] += " -l %s" % ssh_username + if ssh_key: + opt[1] += " -i %s" % ssh_key + args.extend(opt) + if revision: + args.extend(["-r", revision]) + elif branch: + if self.hg_ver() >= (1, 6, 0): + args.extend(["-b", branch]) + return args + + def pull(self, repo, dest, update_dest=True, **kwargs): + """Pulls changes from hg repo and places it in `dest`. + + If `revision` is set, only the specified revision and its ancestors + will be pulled. + + If `update_dest` is set, then `dest` will be updated to `revision` + if set, otherwise to `branch`, otherwise to the head of default. + """ + msg = "Pulling %s to %s" % (repo, dest) + if update_dest: + msg += " and updating" + self.info("%s." % msg) + if not os.path.exists(dest): + # Error or clone? + # If error, should we have a halt_on_error=False above? + self.error("Can't hg pull in nonexistent directory %s." % dest) + return -1 + # Convert repo to an absolute path if it's a local repository + repo = self._make_absolute(repo) + cmd = self.hg + ["pull"] + cmd.extend(self.common_args(**kwargs)) + cmd.append(repo) + output_timeout = self.config.get( + "vcs_output_timeout", self.vcs_config.get("output_timeout") + ) + if ( + self.run_command( + cmd, cwd=dest, error_list=HgErrorList, output_timeout=output_timeout + ) + != 0 + ): + raise VCSException("Can't pull in %s!" % dest) + + if update_dest: + branch = self.vcs_config.get("branch") + revision = self.vcs_config.get("revision") + return self.update(dest, branch=branch, revision=revision) + + # Defines the places of attributes in the tuples returned by `out' + + def out(self, src, remote, **kwargs): + """Check for outgoing changesets present in a repo""" + self.info("Checking for outgoing changesets from %s to %s." % (src, remote)) + cmd = self.hg + ["-q", "out", "--template", "{node} {branches}\n"] + cmd.extend(self.common_args(**kwargs)) + cmd.append(remote) + if os.path.exists(src): + try: + revs = [] + for line in ( + self.get_output_from_command(cmd, cwd=src, throw_exception=True) + .rstrip() + .split("\n") + ): + try: + rev, branch = line.split() + # Mercurial displays no branch at all if the revision + # is on "default" + except ValueError: + rev = line.rstrip() + branch = "default" + revs.append((rev, branch)) + return revs + except subprocess.CalledProcessError as inst: + # In some situations, some versions of Mercurial return "1" + # if no changes are found, so we need to ignore this return + # code + if inst.returncode == 1: + return [] + raise + + def push(self, src, remote, push_new_branches=True, **kwargs): + # This doesn't appear to work with hg_ver < (1, 6, 0). + # Error out, or let you try? + self.info("Pushing new changes from %s to %s." % (src, remote)) + cmd = self.hg + ["push"] + cmd.extend(self.common_args(**kwargs)) + if push_new_branches and self.hg_ver() >= (1, 6, 0): + cmd.append("--new-branch") + cmd.append(remote) + status = self.run_command( + cmd, + cwd=src, + error_list=HgErrorList, + success_codes=(0, 1), + return_type="num_errors", + ) + if status: + raise VCSException("Can't push %s to %s!" % (src, remote)) + return status + + @property + def robustcheckout_path(self): + """Path to the robustcheckout extension.""" + ext = os.path.join(external_tools_path, "robustcheckout.py") + if os.path.exists(ext): + return ext + + def ensure_repo_and_revision(self): + """Makes sure that `dest` is has `revision` or `branch` checked out + from `repo`. + + Do what it takes to make that happen, including possibly clobbering + dest. + """ + c = self.vcs_config + dest = c["dest"] + repo_url = c["repo"] + rev = c.get("revision") + branch = c.get("branch") + purge = c.get("clone_with_purge", False) + upstream = c.get("clone_upstream_url") + + # The API here is kind of bad because we're relying on state in + # self.vcs_config instead of passing arguments. This confuses + # scripts that have multiple repos. This includes the clone_tools() + # step :( + + if not rev and not branch: + self.warning('did not specify revision or branch; assuming "default"') + branch = "default" + + share_base = c.get("vcs_share_base") or os.environ.get("HG_SHARE_BASE_DIR") + if share_base and c.get("use_vcs_unique_share"): + # Bug 1277041 - update migration scripts to support robustcheckout + # fake a share but don't really share + share_base = os.path.join(share_base, hashlib.md5(dest).hexdigest()) + + # We require shared storage is configured because it guarantees we + # only have 1 local copy of logical repo stores. + if not share_base: + raise VCSException( + "vcs share base not defined; " "refusing to operate sub-optimally" + ) + + if not self.robustcheckout_path: + raise VCSException("could not find the robustcheckout Mercurial extension") + + # Log HG version and install info to aid debugging. + self.run_command(self.hg + ["--version"]) + self.run_command(self.hg + ["debuginstall", "--config=ui.username=worker"]) + + args = self.hg + [ + "--config", + "extensions.robustcheckout=%s" % self.robustcheckout_path, + "robustcheckout", + repo_url, + dest, + "--sharebase", + share_base, + ] + if purge: + args.append("--purge") + if upstream: + args.extend(["--upstream", upstream]) + + if rev: + args.extend(["--revision", rev]) + if branch: + args.extend(["--branch", branch]) + + parser = RepositoryUpdateRevisionParser( + config=self.config, log_obj=self.log_obj + ) + if self.run_command(args, output_parser=parser): + raise VCSException("repo checkout failed!") + + if not parser.revision: + raise VCSException("could not identify revision updated to") + + return parser.revision + + def cleanOutgoingRevs(self, reponame, remote, username, sshKey): + # TODO retry + self.info("Wiping outgoing local changes from %s to %s." % (reponame, remote)) + outgoingRevs = self.out( + src=reponame, remote=remote, ssh_username=username, ssh_key=sshKey + ) + for r in reversed(outgoingRevs): + self.run_command( + self.hg + ["strip", "-n", r[REVISION]], + cwd=reponame, + error_list=HgErrorList, + ) + + def query_pushinfo(self, repository, revision): + """Query the pushdate and pushid of a repository/revision. + This is intended to be used on hg.mozilla.org/mozilla-central and + similar. It may or may not work for other hg repositories. + """ + PushInfo = namedtuple("PushInfo", ["pushid", "pushdate"]) + + try: + url = "%s/json-pushes?changeset=%s" % (repository, revision) + self.info("Pushdate URL is: %s" % url) + contents = self.retry(self.load_json_from_url, args=(url,)) + + # The contents should be something like: + # { + # "28537": { + # "changesets": [ + # "1d0a914ae676cc5ed203cdc05c16d8e0c22af7e5", + # ], + # "date": 1428072488, + # "user": "user@mozilla.com" + # } + # } + # + # So we grab the first element ("28537" in this case) and then pull + # out the 'date' field. + pushid = next(contents.keys()) + self.info("Pushid is: %s" % pushid) + pushdate = contents[pushid]["date"] + self.info("Pushdate is: %s" % pushdate) + return PushInfo(pushid, pushdate) + + except Exception: + self.exception("Failed to get push info from hg.mozilla.org") + raise + + +# __main__ {{{1 +if __name__ == "__main__": + pass diff --git a/testing/mozharness/mozharness/base/vcs/vcsbase.py b/testing/mozharness/mozharness/base/vcs/vcsbase.py new file mode 100755 index 0000000000..c587a8b1ca --- /dev/null +++ b/testing/mozharness/mozharness/base/vcs/vcsbase.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# ***** BEGIN LICENSE BLOCK ***** +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. +# ***** END LICENSE BLOCK ***** +"""Generic VCS support. +""" + +import os +import sys +from copy import deepcopy + +from mozharness.base.errors import VCSException +from mozharness.base.log import FATAL +from mozharness.base.script import BaseScript +from mozharness.base.vcs.gittool import GittoolVCS +from mozharness.base.vcs.mercurial import MercurialVCS + +sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.dirname(sys.path[0])))) + + +# Update this with supported VCS name : VCS object +VCS_DICT = { + "hg": MercurialVCS, + "gittool": GittoolVCS, +} + + +# VCSMixin {{{1 +class VCSMixin(object): + """Basic VCS methods that are vcs-agnostic. + The vcs_class handles all the vcs-specific tasks. + """ + + def query_dest(self, kwargs): + if "dest" in kwargs: + return kwargs["dest"] + dest = os.path.basename(kwargs["repo"]) + # Git fun + if dest.endswith(".git"): + dest = dest.replace(".git", "") + return dest + + def _get_revision(self, vcs_obj, dest): + try: + got_revision = vcs_obj.ensure_repo_and_revision() + if got_revision: + return got_revision + except VCSException: + self.rmtree(dest) + raise + + def _get_vcs_class(self, vcs): + vcs = vcs or self.config.get("default_vcs", getattr(self, "default_vcs", None)) + vcs_class = VCS_DICT.get(vcs) + return vcs_class + + def vcs_checkout(self, vcs=None, error_level=FATAL, **kwargs): + """Check out a single repo.""" + c = self.config + vcs_class = self._get_vcs_class(vcs) + if not vcs_class: + self.error("Running vcs_checkout with kwargs %s" % str(kwargs)) + raise VCSException("No VCS set!") + # need a better way to do this. + if "dest" not in kwargs: + kwargs["dest"] = self.query_dest(kwargs) + if "vcs_share_base" not in kwargs: + kwargs["vcs_share_base"] = c.get( + "%s_share_base" % vcs, c.get("vcs_share_base") + ) + vcs_obj = vcs_class( + log_obj=self.log_obj, + config=self.config, + vcs_config=kwargs, + script_obj=self, + ) + return self.retry( + self._get_revision, + error_level=error_level, + error_message="Automation Error: Can't checkout %s!" % kwargs["repo"], + args=(vcs_obj, kwargs["dest"]), + ) + + def vcs_checkout_repos( + self, repo_list, parent_dir=None, tag_override=None, **kwargs + ): + """Check out a list of repos.""" + orig_dir = os.getcwd() + c = self.config + if not parent_dir: + parent_dir = os.path.join(c["base_work_dir"], c["work_dir"]) + self.mkdir_p(parent_dir) + self.chdir(parent_dir) + revision_dict = {} + kwargs_orig = deepcopy(kwargs) + for repo_dict in repo_list: + kwargs = deepcopy(kwargs_orig) + kwargs.update(repo_dict) + if tag_override: + kwargs["branch"] = tag_override + dest = self.query_dest(kwargs) + revision_dict[dest] = {"repo": kwargs["repo"]} + revision_dict[dest]["revision"] = self.vcs_checkout(**kwargs) + self.chdir(orig_dir) + return revision_dict + + def vcs_query_pushinfo(self, repository, revision, vcs=None): + """Query the pushid/pushdate of a repository/revision + Returns a namedtuple with "pushid" and "pushdate" elements + """ + vcs_class = self._get_vcs_class(vcs) + if not vcs_class: + raise VCSException("No VCS set in vcs_query_pushinfo!") + vcs_obj = vcs_class( + log_obj=self.log_obj, + config=self.config, + script_obj=self, + ) + return vcs_obj.query_pushinfo(repository, revision) + + +class VCSScript(VCSMixin, BaseScript): + def __init__(self, **kwargs): + super(VCSScript, self).__init__(**kwargs) + + def pull(self, repos=None, parent_dir=None): + repos = repos or self.config.get("repos") + if not repos: + self.info("Pull has nothing to do!") + return + dirs = self.query_abs_dirs() + parent_dir = parent_dir or dirs["abs_work_dir"] + return self.vcs_checkout_repos(repos, parent_dir=parent_dir) + + +# Specific VCS stubs {{{1 +# For ease of use. +# This is here instead of mercurial.py because importing MercurialVCS into +# vcsbase from mercurial, and importing VCSScript into mercurial from +# vcsbase, was giving me issues. +class MercurialScript(VCSScript): + default_vcs = "hg" + + +# __main__ {{{1 +if __name__ == "__main__": + pass |