summaryrefslogtreecommitdiffstats
path: root/testing/mozharness/mozharness/base
diff options
context:
space:
mode:
Diffstat (limited to 'testing/mozharness/mozharness/base')
-rw-r--r--testing/mozharness/mozharness/base/__init__.py0
-rw-r--r--testing/mozharness/mozharness/base/config.py693
-rw-r--r--testing/mozharness/mozharness/base/diskutils.py170
-rwxr-xr-xtesting/mozharness/mozharness/base/errors.py164
-rwxr-xr-xtesting/mozharness/mozharness/base/log.py783
-rwxr-xr-xtesting/mozharness/mozharness/base/parallel.py35
-rw-r--r--testing/mozharness/mozharness/base/python.py1182
-rw-r--r--testing/mozharness/mozharness/base/script.py2551
-rwxr-xr-xtesting/mozharness/mozharness/base/transfer.py41
-rw-r--r--testing/mozharness/mozharness/base/vcs/__init__.py0
-rw-r--r--testing/mozharness/mozharness/base/vcs/gittool.py107
-rwxr-xr-xtesting/mozharness/mozharness/base/vcs/mercurial.py478
-rwxr-xr-xtesting/mozharness/mozharness/base/vcs/vcsbase.py149
13 files changed, 6353 insertions, 0 deletions
diff --git a/testing/mozharness/mozharness/base/__init__.py b/testing/mozharness/mozharness/base/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/testing/mozharness/mozharness/base/__init__.py
diff --git a/testing/mozharness/mozharness/base/config.py b/testing/mozharness/mozharness/base/config.py
new file mode 100644
index 0000000000..d12b3aecad
--- /dev/null
+++ b/testing/mozharness/mozharness/base/config.py
@@ -0,0 +1,693 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Generic config parsing and dumping, the way I remember it from scripts
+gone by.
+
+The config should be built from script-level defaults, overlaid by
+config-file defaults, overlaid by command line options.
+
+ (For buildbot-analogues that would be factory-level defaults,
+ builder-level defaults, and build request/scheduler settings.)
+
+The config should then be locked (set to read-only, to prevent runtime
+alterations). Afterwards we should dump the config to a file that is
+uploaded with the build, and can be used to debug or replicate the build
+at a later time.
+
+TODO:
+
+* check_required_settings or something -- run at init, assert that
+ these settings are set.
+"""
+
+import os
+import socket
+import sys
+import time
+from copy import deepcopy
+from optparse import Option, OptionGroup, OptionParser
+
+from mozharness.base.log import CRITICAL, DEBUG, ERROR, FATAL, INFO, WARNING
+
+try:
+ from urllib2 import URLError, urlopen
+except ImportError:
+ from urllib.error import URLError
+ from urllib.request import urlopen
+
+
+try:
+ import simplejson as json
+except ImportError:
+ import json
+
+
+# optparse {{{1
+class ExtendedOptionParser(OptionParser):
+ """OptionParser, but with ExtendOption as the option_class."""
+
+ def __init__(self, **kwargs):
+ kwargs["option_class"] = ExtendOption
+ OptionParser.__init__(self, **kwargs)
+
+
+class ExtendOption(Option):
+ """from http://docs.python.org/library/optparse.html?highlight=optparse#adding-new-actions"""
+
+ ACTIONS = Option.ACTIONS + ("extend",)
+ STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
+ TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
+ ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)
+
+ def take_action(self, action, dest, opt, value, values, parser):
+ if action == "extend":
+ lvalue = value.split(",")
+ values.ensure_value(dest, []).extend(lvalue)
+ else:
+ Option.take_action(self, action, dest, opt, value, values, parser)
+
+
+def make_immutable(item):
+ if isinstance(item, list) or isinstance(item, tuple):
+ result = LockedTuple(item)
+ elif isinstance(item, dict):
+ result = ReadOnlyDict(item)
+ result.lock()
+ else:
+ result = item
+ return result
+
+
+class LockedTuple(tuple):
+ def __new__(cls, items):
+ return tuple.__new__(cls, (make_immutable(x) for x in items))
+
+ def __deepcopy__(self, memo):
+ return [deepcopy(elem, memo) for elem in self]
+
+
+# ReadOnlyDict {{{1
+class ReadOnlyDict(dict):
+ def __init__(self, dictionary):
+ self._lock = False
+ self.update(dictionary.copy())
+
+ def _check_lock(self):
+ assert not self._lock, "ReadOnlyDict is locked!"
+
+ def lock(self):
+ for (k, v) in list(self.items()):
+ self[k] = make_immutable(v)
+ self._lock = True
+
+ def __setitem__(self, *args):
+ self._check_lock()
+ return dict.__setitem__(self, *args)
+
+ def __delitem__(self, *args):
+ self._check_lock()
+ return dict.__delitem__(self, *args)
+
+ def clear(self, *args):
+ self._check_lock()
+ return dict.clear(self, *args)
+
+ def pop(self, *args):
+ self._check_lock()
+ return dict.pop(self, *args)
+
+ def popitem(self, *args):
+ self._check_lock()
+ return dict.popitem(self, *args)
+
+ def setdefault(self, *args):
+ self._check_lock()
+ return dict.setdefault(self, *args)
+
+ def update(self, *args):
+ self._check_lock()
+ dict.update(self, *args)
+
+ def __deepcopy__(self, memo):
+ cls = self.__class__
+ result = cls.__new__(cls)
+ memo[id(self)] = result
+ for k, v in list(self.__dict__.items()):
+ setattr(result, k, deepcopy(v, memo))
+ result._lock = False
+ for k, v in list(self.items()):
+ result[k] = deepcopy(v, memo)
+ return result
+
+
+DEFAULT_CONFIG_PATH = os.path.join(
+ os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+ "configs",
+)
+
+
+# parse_config_file {{{1
+def parse_config_file(
+ file_name, quiet=False, search_path=None, config_dict_name="config"
+):
+ """Read a config file and return a dictionary."""
+ file_path = None
+ if os.path.exists(file_name):
+ file_path = file_name
+ else:
+ if not search_path:
+ search_path = [".", DEFAULT_CONFIG_PATH]
+ for path in search_path:
+ if os.path.exists(os.path.join(path, file_name)):
+ file_path = os.path.join(path, file_name)
+ break
+ else:
+ raise IOError("Can't find %s in %s!" % (file_name, search_path))
+ if file_name.endswith(".py"):
+ global_dict = {}
+ local_dict = {}
+ exec(
+ compile(open(file_path, "rb").read(), file_path, "exec"),
+ global_dict,
+ local_dict,
+ )
+ config = local_dict[config_dict_name]
+ elif file_name.endswith(".json"):
+ fh = open(file_path)
+ config = {}
+ json_config = json.load(fh)
+ config = dict(json_config)
+ fh.close()
+ else:
+ raise RuntimeError(
+ "Unknown config file type %s! (config files must end in .json or .py)"
+ % file_name
+ )
+ # TODO return file_path
+ return config
+
+
+def download_config_file(url, file_name):
+ n = 0
+ attempts = 5
+ sleeptime = 60
+ max_sleeptime = 5 * 60
+ while True:
+ if n >= attempts:
+ print(
+ "Failed to download from url %s after %d attempts, quiting..."
+ % (url, attempts)
+ )
+ raise SystemError(-1)
+ try:
+ contents = urlopen(url, timeout=30).read()
+ break
+ except URLError as e:
+ print("Error downloading from url %s: %s" % (url, str(e)))
+ except socket.timeout as e:
+ print("Time out accessing %s: %s" % (url, str(e)))
+ except socket.error as e:
+ print("Socket error when accessing %s: %s" % (url, str(e)))
+ print("Sleeping %d seconds before retrying" % sleeptime)
+ time.sleep(sleeptime)
+ sleeptime = sleeptime * 2
+ if sleeptime > max_sleeptime:
+ sleeptime = max_sleeptime
+ n += 1
+
+ try:
+ f = open(file_name, "w")
+ f.write(contents)
+ f.close()
+ except IOError as e:
+ print("Error writing downloaded contents to file %s: %s" % (file_name, str(e)))
+ raise SystemError(-1)
+
+
+# BaseConfig {{{1
+class BaseConfig(object):
+ """Basic config setting/getting."""
+
+ def __init__(
+ self,
+ config=None,
+ initial_config_file=None,
+ config_options=None,
+ all_actions=None,
+ default_actions=None,
+ volatile_config=None,
+ option_args=None,
+ require_config_file=False,
+ append_env_variables_from_configs=False,
+ usage="usage: %prog [options]",
+ ):
+ self._config = {}
+ self.all_cfg_files_and_dicts = []
+ self.actions = []
+ self.config_lock = False
+ self.require_config_file = require_config_file
+ # It allows to append env variables from multiple config files
+ self.append_env_variables_from_configs = append_env_variables_from_configs
+
+ if all_actions:
+ self.all_actions = all_actions[:]
+ else:
+ self.all_actions = ["clobber", "build"]
+ if default_actions:
+ self.default_actions = default_actions[:]
+ else:
+ self.default_actions = self.all_actions[:]
+ if volatile_config is None:
+ self.volatile_config = {
+ "actions": None,
+ "add_actions": None,
+ "no_actions": None,
+ }
+ else:
+ self.volatile_config = deepcopy(volatile_config)
+
+ if config:
+ self.set_config(config)
+ if initial_config_file:
+ initial_config = parse_config_file(initial_config_file)
+ self.all_cfg_files_and_dicts.append((initial_config_file, initial_config))
+ self.set_config(initial_config)
+ # Since initial_config_file is only set when running unit tests,
+ # if no option_args have been specified, then the parser will
+ # parse sys.argv which in this case would be the command line
+ # options specified to run the tests, e.g. nosetests -v. Clearly,
+ # the options passed to nosetests (such as -v) should not be
+ # interpreted by mozharness as mozharness options, so we specify
+ # a dummy command line with no options, so that the parser does
+ # not add anything from the test invocation command line
+ # arguments to the mozharness options.
+ if option_args is None:
+ option_args = [
+ "dummy_mozharness_script_with_no_command_line_options.py"
+ ]
+ if config_options is None:
+ config_options = []
+ self._create_config_parser(config_options, usage)
+ # we allow manually passing of option args for things like nosetests
+ self.parse_args(args=option_args)
+
+ def get_read_only_config(self):
+ return ReadOnlyDict(self._config)
+
+ def _create_config_parser(self, config_options, usage):
+ self.config_parser = ExtendedOptionParser(usage=usage)
+ self.config_parser.add_option(
+ "--work-dir",
+ action="store",
+ dest="work_dir",
+ type="string",
+ default="build",
+ help="Specify the work_dir (subdir of base_work_dir)",
+ )
+ self.config_parser.add_option(
+ "--base-work-dir",
+ action="store",
+ dest="base_work_dir",
+ type="string",
+ default=os.getcwd(),
+ help="Specify the absolute path of the parent of the working directory",
+ )
+ self.config_parser.add_option(
+ "--extra-config-path",
+ action="extend",
+ dest="config_paths",
+ type="string",
+ help="Specify additional paths to search for config files.",
+ )
+ self.config_parser.add_option(
+ "-c",
+ "--config-file",
+ "--cfg",
+ action="extend",
+ dest="config_files",
+ default=[],
+ type="string",
+ help="Specify a config file; can be repeated",
+ )
+ self.config_parser.add_option(
+ "-C",
+ "--opt-config-file",
+ "--opt-cfg",
+ action="extend",
+ dest="opt_config_files",
+ type="string",
+ default=[],
+ help="Specify an optional config file, like --config-file but with no "
+ "error if the file is missing; can be repeated",
+ )
+ self.config_parser.add_option(
+ "--dump-config",
+ action="store_true",
+ dest="dump_config",
+ help="List and dump the config generated from this run to " "a JSON file.",
+ )
+ self.config_parser.add_option(
+ "--dump-config-hierarchy",
+ action="store_true",
+ dest="dump_config_hierarchy",
+ help="Like --dump-config but will list and dump which config "
+ "files were used making up the config and specify their own "
+ "keys/values that were not overwritten by another cfg -- "
+ "held the highest hierarchy.",
+ )
+ self.config_parser.add_option(
+ "--append-env-variables-from-configs",
+ action="store_true",
+ dest="append_env_variables_from_configs",
+ help="Merge environment variables from config files.",
+ )
+
+ # Logging
+ log_option_group = OptionGroup(self.config_parser, "Logging")
+ log_option_group.add_option(
+ "--log-level",
+ action="store",
+ type="choice",
+ dest="log_level",
+ default=INFO,
+ choices=[DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL],
+ help="Set log level (debug|info|warning|error|critical|fatal)",
+ )
+ log_option_group.add_option(
+ "-q",
+ "--quiet",
+ action="store_false",
+ dest="log_to_console",
+ default=True,
+ help="Don't log to the console",
+ )
+ log_option_group.add_option(
+ "--append-to-log",
+ action="store_true",
+ dest="append_to_log",
+ default=False,
+ help="Append to the log",
+ )
+ log_option_group.add_option(
+ "--multi-log",
+ action="store_const",
+ const="multi",
+ dest="log_type",
+ help="Log using MultiFileLogger",
+ )
+ log_option_group.add_option(
+ "--simple-log",
+ action="store_const",
+ const="simple",
+ dest="log_type",
+ help="Log using SimpleFileLogger",
+ )
+ self.config_parser.add_option_group(log_option_group)
+
+ # Actions
+ action_option_group = OptionGroup(
+ self.config_parser,
+ "Actions",
+ "Use these options to list or enable/disable actions.",
+ )
+ action_option_group.add_option(
+ "--list-actions",
+ action="store_true",
+ dest="list_actions",
+ help="List all available actions, then exit",
+ )
+ action_option_group.add_option(
+ "--add-action",
+ action="extend",
+ dest="add_actions",
+ metavar="ACTIONS",
+ help="Add action %s to the list of actions" % self.all_actions,
+ )
+ action_option_group.add_option(
+ "--no-action",
+ action="extend",
+ dest="no_actions",
+ metavar="ACTIONS",
+ help="Don't perform action",
+ )
+ action_option_group.add_option(
+ "--requires-gpu",
+ action="store_true",
+ dest="requires_gpu",
+ default=False,
+ help="Indicates if the task requires gpu. ",
+ )
+ for action in self.all_actions:
+ action_option_group.add_option(
+ "--%s" % action,
+ action="append_const",
+ dest="actions",
+ const=action,
+ help="Add %s to the limited list of actions" % action,
+ )
+ action_option_group.add_option(
+ "--no-%s" % action,
+ action="append_const",
+ dest="no_actions",
+ const=action,
+ help="Remove %s from the list of actions to perform" % action,
+ )
+ self.config_parser.add_option_group(action_option_group)
+ # Child-specified options
+ # TODO error checking for overlapping options
+ if config_options:
+ for option in config_options:
+ self.config_parser.add_option(*option[0], **option[1])
+
+ # Initial-config-specified options
+ config_options = self._config.get("config_options", None)
+ if config_options:
+ for option in config_options:
+ self.config_parser.add_option(*option[0], **option[1])
+
+ def set_config(self, config, overwrite=False):
+ """This is probably doable some other way."""
+ if self._config and not overwrite:
+ self._config.update(config)
+ else:
+ self._config = config
+ return self._config
+
+ def get_actions(self):
+ return self.actions
+
+ def verify_actions(self, action_list, quiet=False):
+ for action in action_list:
+ if action not in self.all_actions:
+ if not quiet:
+ print("Invalid action %s not in %s!" % (action, self.all_actions))
+ raise SystemExit(-1)
+ return action_list
+
+ def verify_actions_order(self, action_list):
+ try:
+ indexes = [self.all_actions.index(elt) for elt in action_list]
+ sorted_indexes = sorted(indexes)
+ for i in range(len(indexes)):
+ if indexes[i] != sorted_indexes[i]:
+ print(
+ ("Action %s comes in different order in %s\n" + "than in %s")
+ % (action_list[i], action_list, self.all_actions)
+ )
+ raise SystemExit(-1)
+ except ValueError as e:
+ print("Invalid action found: " + str(e))
+ raise SystemExit(-1)
+
+ def list_actions(self):
+ print("Actions available:")
+ for a in self.all_actions:
+ print(" " + ("*" if a in self.default_actions else " "), a)
+ raise SystemExit(0)
+
+ def get_cfgs_from_files(self, all_config_files, options):
+ """Returns the configuration derived from the list of configuration
+ files. The result is represented as a list of `(filename,
+ config_dict)` tuples; they will be combined with keys in later
+ dictionaries taking precedence over earlier.
+
+ `all_config_files` is all files specified with `--config-file` and
+ `--opt-config-file`; `options` is the argparse options object giving
+ access to any other command-line options.
+
+ This function is also responsible for downloading any configuration
+ files specified by URL. It uses ``parse_config_file`` in this module
+ to parse individual files.
+
+ This method can be overridden in a subclass to add extra logic to the
+ way that self.config is made up. See
+ `mozharness.mozilla.building.buildbase.BuildingConfig` for an example.
+ """
+ config_paths = options.config_paths or ["."]
+ all_cfg_files_and_dicts = []
+ for cf in all_config_files:
+ try:
+ if "://" in cf: # config file is an url
+ file_name = os.path.basename(cf)
+ file_path = os.path.join(os.getcwd(), file_name)
+ download_config_file(cf, file_path)
+ all_cfg_files_and_dicts.append(
+ (
+ file_path,
+ parse_config_file(
+ file_path,
+ search_path=["."],
+ ),
+ )
+ )
+ else:
+ all_cfg_files_and_dicts.append(
+ (
+ cf,
+ parse_config_file(
+ cf,
+ search_path=config_paths + [DEFAULT_CONFIG_PATH],
+ ),
+ )
+ )
+ except Exception:
+ if cf in options.opt_config_files:
+ print("WARNING: optional config file not found %s" % cf)
+ else:
+ raise
+
+ if "EXTRA_MOZHARNESS_CONFIG" in os.environ:
+ env_config = json.loads(os.environ["EXTRA_MOZHARNESS_CONFIG"])
+ all_cfg_files_and_dicts.append(("[EXTRA_MOZHARENSS_CONFIG]", env_config))
+
+ return all_cfg_files_and_dicts
+
+ def parse_args(self, args=None):
+ """Parse command line arguments in a generic way.
+ Return the parser object after adding the basic options, so
+ child objects can manipulate it.
+ """
+ self.command_line = " ".join(sys.argv)
+ if args is None:
+ args = sys.argv[1:]
+ (options, args) = self.config_parser.parse_args(args)
+
+ defaults = self.config_parser.defaults.copy()
+
+ if not options.config_files:
+ if self.require_config_file:
+ if options.list_actions:
+ self.list_actions()
+ print("Required config file not set! (use --config-file option)")
+ raise SystemExit(-1)
+
+ os.environ["REQUIRE_GPU"] = "0"
+ if options.requires_gpu:
+ os.environ["REQUIRE_GPU"] = "1"
+
+ # this is what get_cfgs_from_files returns. It will represent each
+ # config file name and its assoctiated dict
+ # eg ('builds/branch_specifics.py', {'foo': 'bar'})
+ # let's store this to self for things like --interpret-config-files
+ self.all_cfg_files_and_dicts.extend(
+ self.get_cfgs_from_files(
+ # append opt_config to allow them to overwrite previous configs
+ options.config_files + options.opt_config_files,
+ options=options,
+ )
+ )
+ config = {}
+ if (
+ self.append_env_variables_from_configs
+ or options.append_env_variables_from_configs
+ ):
+ # We only append values from various configs for the 'env' entry
+ # For everything else we follow the standard behaviour
+ for i, (c_file, c_dict) in enumerate(self.all_cfg_files_and_dicts):
+ for v in list(c_dict.keys()):
+ if v == "env" and v in config:
+ config[v].update(c_dict[v])
+ else:
+ config[v] = c_dict[v]
+ else:
+ for i, (c_file, c_dict) in enumerate(self.all_cfg_files_and_dicts):
+ config.update(c_dict)
+ # assign or update self._config depending on if it exists or not
+ # NOTE self._config will be passed to ReadOnlyConfig's init -- a
+ # dict subclass with immutable locking capabilities -- and serve
+ # as the keys/values that make up that instance. Ultimately,
+ # this becomes self.config during BaseScript's init
+ self.set_config(config)
+
+ for key in list(defaults.keys()):
+ value = getattr(options, key)
+ if value is None:
+ continue
+ # Don't override config_file defaults with config_parser defaults
+ if key in defaults and value == defaults[key] and key in self._config:
+ continue
+ self._config[key] = value
+
+ # The idea behind the volatile_config is we don't want to save this
+ # info over multiple runs. This defaults to the action-specific
+ # config options, but can be anything.
+ for key in list(self.volatile_config.keys()):
+ if self._config.get(key) is not None:
+ self.volatile_config[key] = self._config[key]
+ del self._config[key]
+
+ self.update_actions()
+ if options.list_actions:
+ self.list_actions()
+
+ # Keep? This is for saving the volatile config in the dump_config
+ self._config["volatile_config"] = self.volatile_config
+
+ self.options = options
+ self.args = args
+ return (self.options, self.args)
+
+ def update_actions(self):
+ """Update actions after reading in config.
+
+ Seems a little complex, but the logic goes:
+
+ First, if default_actions is specified in the config, set our
+ default actions even if the script specifies other default actions.
+
+ Without any other action-specific options, run with default actions.
+
+ If we specify --ACTION or --only-ACTION once or multiple times,
+ we want to override the default_actions list with the one(s) we list.
+
+ Otherwise, if we specify --add-action ACTION, we want to add an
+ action to the list.
+
+ Finally, if we specify --no-ACTION, remove that from the list of
+ actions to perform.
+ """
+ if self._config.get("default_actions"):
+ default_actions = self.verify_actions(self._config["default_actions"])
+ self.default_actions = default_actions
+ self.verify_actions_order(self.default_actions)
+ self.actions = self.default_actions[:]
+ if self.volatile_config["actions"]:
+ actions = self.verify_actions(self.volatile_config["actions"])
+ self.actions = actions
+ elif self.volatile_config["add_actions"]:
+ actions = self.verify_actions(self.volatile_config["add_actions"])
+ self.actions.extend(actions)
+ if self.volatile_config["no_actions"]:
+ actions = self.verify_actions(self.volatile_config["no_actions"])
+ for action in actions:
+ if action in self.actions:
+ self.actions.remove(action)
+
+
+# __main__ {{{1
+if __name__ == "__main__":
+ pass
diff --git a/testing/mozharness/mozharness/base/diskutils.py b/testing/mozharness/mozharness/base/diskutils.py
new file mode 100644
index 0000000000..757a6ffb7a
--- /dev/null
+++ b/testing/mozharness/mozharness/base/diskutils.py
@@ -0,0 +1,170 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Disk utility module, no mixins here!
+
+ examples:
+ 1) get disk size
+ from mozharness.base.diskutils import DiskInfo, DiskutilsError
+ ...
+ try:
+ DiskSize().get_size(path='/', unit='Mb')
+ except DiskutilsError as e:
+ # manage the exception e.g: log.error(e)
+ pass
+ log.info("%s" % di)
+
+
+ 2) convert disk size:
+ from mozharness.base.diskutils import DiskutilsError, convert_to
+ ...
+ file_size = <function that gets file size in bytes>
+ # convert file_size to GB
+ try:
+ file_size = convert_to(file_size, from_unit='bytes', to_unit='GB')
+ except DiskutilsError as e:
+ # manage the exception e.g: log.error(e)
+ pass
+
+"""
+import ctypes
+import logging
+import os
+import sys
+
+from six import string_types
+
+from mozharness.base.log import INFO, numeric_log_level
+
+# use mozharness log
+log = logging.getLogger(__name__)
+
+
+class DiskutilsError(Exception):
+ """Exception thrown by Diskutils module"""
+
+ pass
+
+
+def convert_to(size, from_unit, to_unit):
+ """Helper method to convert filesystem sizes to kB/ MB/ GB/ TB/
+ valid values for source_format and destination format are:
+ * bytes
+ * kB
+ * MB
+ * GB
+ * TB
+ returns: size converted from source_format to destination_format.
+ """
+ sizes = {
+ "bytes": 1,
+ "kB": 1024,
+ "MB": 1024 * 1024,
+ "GB": 1024 * 1024 * 1024,
+ "TB": 1024 * 1024 * 1024 * 1024,
+ }
+ try:
+ df = sizes[to_unit]
+ sf = sizes[from_unit]
+ # pylint --py3k W1619
+ return size * sf / df
+ except KeyError:
+ raise DiskutilsError("conversion error: Invalid source or destination format")
+ except TypeError:
+ raise DiskutilsError("conversion error: size (%s) is not a number" % size)
+
+
+class DiskInfo(object):
+ """Stores basic information about the disk"""
+
+ def __init__(self):
+ self.unit = "bytes"
+ self.free = 0
+ self.used = 0
+ self.total = 0
+
+ def __str__(self):
+ string = ["Disk space info (in %s)" % self.unit]
+ string += ["total: %s" % self.total]
+ string += ["used: %s" % self.used]
+ string += ["free: %s" % self.free]
+ return " ".join(string)
+
+ def _to(self, unit):
+ from_unit = self.unit
+ to_unit = unit
+ self.free = convert_to(self.free, from_unit=from_unit, to_unit=to_unit)
+ self.used = convert_to(self.used, from_unit=from_unit, to_unit=to_unit)
+ self.total = convert_to(self.total, from_unit=from_unit, to_unit=to_unit)
+ self.unit = unit
+
+
+class DiskSize(object):
+ """DiskSize object"""
+
+ @staticmethod
+ def _posix_size(path):
+ """returns the disk size in bytes
+ disk size is relative to path
+ """
+ # we are on a POSIX system
+ st = os.statvfs(path)
+ disk_info = DiskInfo()
+ disk_info.free = st.f_bavail * st.f_frsize
+ disk_info.used = (st.f_blocks - st.f_bfree) * st.f_frsize
+ disk_info.total = st.f_blocks * st.f_frsize
+ return disk_info
+
+ @staticmethod
+ def _windows_size(path):
+ """returns size in bytes, works only on windows platforms"""
+ # we're on a non POSIX system (windows)
+ # DLL call
+ disk_info = DiskInfo()
+ dummy = ctypes.c_ulonglong() # needed by the dll call but not used
+ total = ctypes.c_ulonglong() # stores the total space value
+ free = ctypes.c_ulonglong() # stores the free space value
+ # depending on path format (unicode or not) and python version (2 or 3)
+ # we need to call GetDiskFreeSpaceExW or GetDiskFreeSpaceExA
+ called_function = ctypes.windll.kernel32.GetDiskFreeSpaceExA
+ if isinstance(path, string_types) or sys.version_info >= (3,):
+ called_function = ctypes.windll.kernel32.GetDiskFreeSpaceExW
+ # we're ready for the dll call. On error it returns 0
+ if (
+ called_function(
+ path, ctypes.byref(dummy), ctypes.byref(total), ctypes.byref(free)
+ )
+ != 0
+ ):
+ # success, we can use the values returned by the dll call
+ disk_info.free = free.value
+ disk_info.total = total.value
+ disk_info.used = total.value - free.value
+ return disk_info
+
+ @staticmethod
+ def get_size(path, unit, log_level=INFO):
+ """Disk info stats:
+ total => size of the disk
+ used => space used
+ free => free space
+ In case of error raises a DiskutilError Exception
+ """
+ try:
+ # let's try to get the disk size using os module
+ disk_info = DiskSize()._posix_size(path)
+ except AttributeError:
+ try:
+ # os module failed. let's try to get the size using
+ # ctypes.windll...
+ disk_info = DiskSize()._windows_size(path)
+ except AttributeError:
+ # No luck! This is not a posix nor window platform
+ # raise an exception
+ raise DiskutilsError("Unsupported platform")
+
+ disk_info._to(unit)
+ lvl = numeric_log_level(log_level)
+ log.log(lvl, msg="%s" % disk_info)
+ return disk_info
diff --git a/testing/mozharness/mozharness/base/errors.py b/testing/mozharness/mozharness/base/errors.py
new file mode 100755
index 0000000000..814dd2e045
--- /dev/null
+++ b/testing/mozharness/mozharness/base/errors.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Generic error lists.
+
+Error lists are used to parse output in mozharness.base.log.OutputParser.
+
+Each line of output is matched against each substring or regular expression
+in the error list. On a match, we determine the 'level' of that line,
+whether IGNORE, DEBUG, INFO, WARNING, ERROR, CRITICAL, or FATAL.
+
+TODO: Context lines (requires work on the OutputParser side)
+
+TODO: We could also create classes that generate these, but with the
+appropriate level (please don't die on any errors; please die on any
+warning; etc.) or platform or language or whatever.
+"""
+
+import re
+
+from mozharness.base.log import CRITICAL, DEBUG, ERROR, WARNING
+
+
+# Exceptions
+class VCSException(Exception):
+ pass
+
+
+# ErrorLists {{{1
+BaseErrorList = [{"substr": r"""command not found""", "level": ERROR}]
+
+HgErrorList = BaseErrorList + [
+ {
+ "regex": re.compile(r"""^abort:"""),
+ "level": ERROR,
+ "explanation": "Automation Error: hg not responding",
+ },
+ {
+ "substr": r"""unknown exception encountered""",
+ "level": ERROR,
+ "explanation": "Automation Error: python exception in hg",
+ },
+ {
+ "substr": r"""failed to import extension""",
+ "level": WARNING,
+ "explanation": "Automation Error: hg extension missing",
+ },
+]
+
+GitErrorList = BaseErrorList + [
+ {"substr": r"""Permission denied (publickey).""", "level": ERROR},
+ {"substr": r"""fatal: The remote end hung up unexpectedly""", "level": ERROR},
+ {"substr": r"""does not appear to be a git repository""", "level": ERROR},
+ {"substr": r"""error: src refspec""", "level": ERROR},
+ {"substr": r"""invalid author/committer line -""", "level": ERROR},
+ {"substr": r"""remote: fatal: Error in object""", "level": ERROR},
+ {
+ "substr": r"""fatal: sha1 file '<stdout>' write error: Broken pipe""",
+ "level": ERROR,
+ },
+ {"substr": r"""error: failed to push some refs to """, "level": ERROR},
+ {"substr": r"""remote: error: denying non-fast-forward """, "level": ERROR},
+ {"substr": r"""! [remote rejected] """, "level": ERROR},
+ {"regex": re.compile(r"""remote:.*No such file or directory"""), "level": ERROR},
+]
+
+PythonErrorList = BaseErrorList + [
+ {"regex": re.compile(r"""Warning:.*Error: """), "level": WARNING},
+ {"regex": re.compile(r"""package.*> Error:"""), "level": ERROR},
+ {"substr": r"""Traceback (most recent call last)""", "level": ERROR},
+ {"substr": r"""SyntaxError: """, "level": ERROR},
+ {"substr": r"""TypeError: """, "level": ERROR},
+ {"substr": r"""NameError: """, "level": ERROR},
+ {"substr": r"""ZeroDivisionError: """, "level": ERROR},
+ {"regex": re.compile(r"""raise \w*Exception: """), "level": CRITICAL},
+ {"regex": re.compile(r"""raise \w*Error: """), "level": CRITICAL},
+]
+
+VirtualenvErrorList = [
+ {"substr": r"""not found or a compiler error:""", "level": WARNING},
+ {"regex": re.compile("""\d+: error: """), "level": ERROR},
+ {"regex": re.compile("""\d+: warning: """), "level": WARNING},
+ {
+ "regex": re.compile(r"""Downloading .* \(.*\): *([0-9]+%)? *[0-9\.]+[kmKM]b"""),
+ "level": DEBUG,
+ },
+] + PythonErrorList
+
+RustErrorList = [
+ {"regex": re.compile(r"""error\[E\d+\]:"""), "level": ERROR},
+ {"substr": r"""error: Could not compile""", "level": ERROR},
+ {"substr": r"""error: aborting due to previous error""", "level": ERROR},
+ {"substr": r"""thread 'main' panicked at""", "level": ERROR},
+]
+
+# We may need to have various MakefileErrorLists for differing amounts of
+# warning-ignoring-ness.
+MakefileErrorList = (
+ BaseErrorList
+ + PythonErrorList
+ + RustErrorList
+ + [
+ {"substr": r""": error: """, "level": ERROR},
+ {"substr": r"""No rule to make target """, "level": ERROR},
+ {"regex": re.compile(r"""akefile.*was not found\."""), "level": ERROR},
+ {"regex": re.compile(r"""Stop\.$"""), "level": ERROR},
+ {
+ "regex": re.compile(r"""make\[\d+\]: \*\*\* \[.*\] Error \d+"""),
+ "level": ERROR,
+ },
+ {"regex": re.compile(r""":\d+: warning:"""), "level": WARNING},
+ {"regex": re.compile(r"""make(?:\[\d+\])?: \*\*\*/"""), "level": ERROR},
+ {"substr": r"""Warning: """, "level": WARNING},
+ ]
+)
+
+TarErrorList = BaseErrorList + [
+ {"substr": r"""(stdin) is not a bzip2 file.""", "level": ERROR},
+ {"regex": re.compile(r"""Child returned status [1-9]"""), "level": ERROR},
+ {"substr": r"""Error exit delayed from previous errors""", "level": ERROR},
+ {"substr": r"""stdin: unexpected end of file""", "level": ERROR},
+ {"substr": r"""stdin: not in gzip format""", "level": ERROR},
+ {"substr": r"""Cannot exec: No such file or directory""", "level": ERROR},
+ {"substr": r""": Error is not recoverable: exiting now""", "level": ERROR},
+]
+
+ZipErrorList = BaseErrorList + [
+ {
+ "substr": r"""zip warning:""",
+ "level": WARNING,
+ },
+ {
+ "substr": r"""zip error:""",
+ "level": ERROR,
+ },
+ {
+ "substr": r"""Cannot open file: it does not appear to be a valid archive""",
+ "level": ERROR,
+ },
+]
+
+ZipalignErrorList = BaseErrorList + [
+ {
+ "regex": re.compile(r"""Unable to open .* as a zip archive"""),
+ "level": ERROR,
+ },
+ {
+ "regex": re.compile(r"""Output file .* exists"""),
+ "level": ERROR,
+ },
+ {
+ "substr": r"""Input and output can't be the same file""",
+ "level": ERROR,
+ },
+]
+
+
+# __main__ {{{1
+if __name__ == "__main__":
+ """TODO: unit tests."""
+ pass
diff --git a/testing/mozharness/mozharness/base/log.py b/testing/mozharness/mozharness/base/log.py
new file mode 100755
index 0000000000..3276696751
--- /dev/null
+++ b/testing/mozharness/mozharness/base/log.py
@@ -0,0 +1,783 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Generic logging classes and functionalities for single and multi file logging.
+Capturing console output and providing general logging functionalities.
+
+Attributes:
+ FATAL_LEVEL (int): constant logging level value set based on the logging.CRITICAL
+ value
+ DEBUG (str): mozharness `debug` log name
+ INFO (str): mozharness `info` log name
+ WARNING (str): mozharness `warning` log name
+ CRITICAL (str): mozharness `critical` log name
+ FATAL (str): mozharness `fatal` log name
+ IGNORE (str): mozharness `ignore` log name
+ LOG_LEVELS (dict): mapping of the mozharness log level names to logging values
+ ROOT_LOGGER (logging.Logger): instance of a logging.Logger class
+
+TODO:
+- network logging support.
+- log rotation config
+"""
+
+import logging
+import os
+import sys
+import time
+import traceback
+from datetime import datetime
+
+from six import binary_type
+
+# Define our own FATAL_LEVEL
+FATAL_LEVEL = logging.CRITICAL + 10
+logging.addLevelName(FATAL_LEVEL, "FATAL")
+
+# mozharness log levels.
+DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL, IGNORE = (
+ "debug",
+ "info",
+ "warning",
+ "error",
+ "critical",
+ "fatal",
+ "ignore",
+)
+
+
+LOG_LEVELS = {
+ DEBUG: logging.DEBUG,
+ INFO: logging.INFO,
+ WARNING: logging.WARNING,
+ ERROR: logging.ERROR,
+ CRITICAL: logging.CRITICAL,
+ FATAL: FATAL_LEVEL,
+}
+
+# mozharness root logger
+ROOT_LOGGER = logging.getLogger()
+
+# Force logging to use UTC timestamps
+logging.Formatter.converter = time.gmtime
+
+
+# LogMixin {{{1
+class LogMixin(object):
+ """This is a mixin for any object to access similar logging functionality
+
+ The logging functionality described here is specially useful for those
+ objects with self.config and self.log_obj member variables
+ """
+
+ def _log_level_at_least(self, level):
+ """Check if the current logging level is greater or equal than level
+
+ Args:
+ level (str): log level name to compare against mozharness log levels
+ names
+
+ Returns:
+ bool: True if the current logging level is great or equal than level,
+ False otherwise
+ """
+ log_level = INFO
+ levels = [DEBUG, INFO, WARNING, ERROR, CRITICAL, FATAL]
+ if hasattr(self, "config"):
+ log_level = self.config.get("log_level", INFO)
+ return levels.index(level) >= levels.index(log_level)
+
+ def _print(self, message, stderr=False):
+ """prints a message to the sys.stdout or sys.stderr according to the
+ value of the stderr argument.
+
+ Args:
+ message (str): The message to be printed
+ stderr (bool, optional): if True, message will be printed to
+ sys.stderr. Defaults to False.
+
+ Returns:
+ None
+ """
+ if not hasattr(self, "config") or self.config.get("log_to_console", True):
+ if stderr:
+ print(message, file=sys.stderr)
+ else:
+ print(message)
+
+ def log(self, message, level=INFO, exit_code=-1):
+ """log the message passed to it according to level, exit if level == FATAL
+
+ Args:
+ message (str): message to be logged
+ level (str, optional): logging level of the message. Defaults to INFO
+ exit_code (int, optional): exit code to log before the scripts calls
+ SystemExit.
+
+ Returns:
+ None
+ """
+ if self.log_obj:
+ return self.log_obj.log_message(
+ message,
+ level=level,
+ exit_code=exit_code,
+ post_fatal_callback=self._post_fatal,
+ )
+ if level == INFO:
+ if self._log_level_at_least(level):
+ self._print(message)
+ elif level == DEBUG:
+ if self._log_level_at_least(level):
+ self._print("DEBUG: %s" % message)
+ elif level in (WARNING, ERROR, CRITICAL):
+ if self._log_level_at_least(level):
+ self._print("%s: %s" % (level.upper(), message), stderr=True)
+ elif level == FATAL:
+ if self._log_level_at_least(level):
+ self._print("FATAL: %s" % message, stderr=True)
+ raise SystemExit(exit_code)
+
+ def worst_level(self, target_level, existing_level, levels=None):
+ """Compare target_level with existing_level according to levels values
+ and return the worst among them.
+
+ Args:
+ target_level (str): minimum logging level to which the current object
+ should be set
+ existing_level (str): current logging level
+ levels (list(str), optional): list of logging levels names to compare
+ target_level and existing_level against.
+ Defaults to mozharness log level
+ list sorted from most to less critical.
+
+ Returns:
+ str: the logging lavel that is closest to the first levels value,
+ i.e. levels[0]
+ """
+ if not levels:
+ levels = [FATAL, CRITICAL, ERROR, WARNING, INFO, DEBUG, IGNORE]
+ if target_level not in levels:
+ self.fatal("'%s' not in %s'." % (target_level, levels))
+ for l in levels:
+ if l in (target_level, existing_level):
+ return l
+
+ # Copying Bear's dumpException():
+ # https://hg.mozilla.org/build/tools/annotate/1485f23c38e0/sut_tools/sut_lib.py#l23
+ def exception(self, message=None, level=ERROR):
+ """log an exception message base on the log level passed to it.
+
+ This function fetches the information of the current exception being handled and
+ adds it to the message argument.
+
+ Args:
+ message (str, optional): message to be printed at the beginning of the log.
+ Default to an empty string.
+ level (str, optional): log level to use for the logging. Defaults to ERROR
+
+ Returns:
+ None
+ """
+ tb_type, tb_value, tb_traceback = sys.exc_info()
+ if message is None:
+ message = ""
+ else:
+ message = "%s\n" % message
+ for s in traceback.format_exception(tb_type, tb_value, tb_traceback):
+ message += "%s\n" % s
+ # Log at the end, as a fatal will attempt to exit after the 1st line.
+ self.log(message, level=level)
+
+ def debug(self, message):
+ """calls the log method with DEBUG as logging level
+
+ Args:
+ message (str): message to log
+ """
+ self.log(message, level=DEBUG)
+
+ def info(self, message):
+ """calls the log method with INFO as logging level
+
+ Args:
+ message (str): message to log
+ """
+ self.log(message, level=INFO)
+
+ def warning(self, message):
+ """calls the log method with WARNING as logging level
+
+ Args:
+ message (str): message to log
+ """
+ self.log(message, level=WARNING)
+
+ def error(self, message):
+ """calls the log method with ERROR as logging level
+
+ Args:
+ message (str): message to log
+ """
+ self.log(message, level=ERROR)
+
+ def critical(self, message):
+ """calls the log method with CRITICAL as logging level
+
+ Args:
+ message (str): message to log
+ """
+ self.log(message, level=CRITICAL)
+
+ def fatal(self, message, exit_code=-1):
+ """calls the log method with FATAL as logging level
+
+ Args:
+ message (str): message to log
+ exit_code (int, optional): exit code to use for the SystemExit
+ exception to be raised. Default to -1.
+ """
+ self.log(message, level=FATAL, exit_code=exit_code)
+
+ def _post_fatal(self, message=None, exit_code=None):
+ """Sometimes you want to create a report or cleanup
+ or notify on fatal(); override this method to do so.
+
+ Please don't use this for anything significantly long-running.
+
+ Args:
+ message (str, optional): message to report. Default to None
+ exit_code (int, optional): exit code to use for the SystemExit
+ exception to be raised. Default to None
+ """
+ pass
+
+
+# OutputParser {{{1
+class OutputParser(LogMixin):
+ """Helper object to parse command output.
+
+ This will buffer output if needed, so we can go back and mark
+ [(linenum - 10) : linenum+10] as errors if need be, without having to
+ get all the output first.
+
+ linenum+10 will be easy; we can set self.num_post_context_lines to 10,
+ and self.num_post_context_lines-- as we mark each line to at least error
+ level X.
+
+ linenum-10 will be trickier. We'll not only need to save the line
+ itself, but also the level that we've set for that line previously,
+ whether by matching on that line, or by a previous line's context.
+ We should only log that line if all output has ended (self.finish() ?);
+ otherwise store a list of dictionaries in self.context_buffer that is
+ buffered up to self.num_pre_context_lines (set to the largest
+ pre-context-line setting in error_list.)
+ """
+
+ def __init__(
+ self, config=None, log_obj=None, error_list=None, log_output=True, **kwargs
+ ):
+ """Initialization method for the OutputParser class
+
+ Args:
+ config (dict, optional): dictionary containing values such as `log_level`
+ or `log_to_console`. Defaults to `None`.
+ log_obj (BaseLogger, optional): instance of the BaseLogger class. Defaults
+ to `None`.
+ error_list (list, optional): list of the error to look for. Defaults to
+ `None`.
+ log_output (boolean, optional): flag for deciding if the commands
+ output should be logged or not.
+ Defaults to `True`.
+ """
+ self.config = config
+ self.log_obj = log_obj
+ self.error_list = error_list or []
+ self.log_output = log_output
+ self.num_errors = 0
+ self.num_warnings = 0
+ # TODO context_lines.
+ # Not in use yet, but will be based off error_list.
+ self.context_buffer = []
+ self.num_pre_context_lines = 0
+ self.num_post_context_lines = 0
+ self.worst_log_level = INFO
+
+ def parse_single_line(self, line):
+ """parse a console output line and check if it matches one in `error_list`,
+ if so then log it according to `log_output`.
+
+ Args:
+ line (str): command line output to parse.
+
+ Returns:
+ If the line hits a match in the error_list, the new log level the line was
+ (or should be) logged at is returned. Otherwise, returns None.
+ """
+ for error_check in self.error_list:
+ # TODO buffer for context_lines.
+ match = False
+ if "substr" in error_check:
+ if error_check["substr"] in line:
+ match = True
+ elif "regex" in error_check:
+ if error_check["regex"].search(line):
+ match = True
+ else:
+ self.warning("error_list: 'substr' and 'regex' not in %s" % error_check)
+ if match:
+ log_level = error_check.get("level", INFO)
+ if self.log_output:
+ message = " %s" % line
+ if error_check.get("explanation"):
+ message += "\n %s" % error_check["explanation"]
+ if error_check.get("summary"):
+ self.add_summary(message, level=log_level)
+ else:
+ self.log(message, level=log_level)
+ if log_level in (ERROR, CRITICAL, FATAL):
+ self.num_errors += 1
+ if log_level == WARNING:
+ self.num_warnings += 1
+ self.worst_log_level = self.worst_level(log_level, self.worst_log_level)
+ return log_level
+
+ if self.log_output:
+ self.info(" %s" % line)
+
+ def add_lines(self, output):
+ """process a string or list of strings, decode them to utf-8,strip
+ them of any trailing whitespaces and parse them using `parse_single_line`
+
+ strings consisting only of whitespaces are ignored.
+
+ Args:
+ output (str | list): string or list of string to parse
+ """
+ if not isinstance(output, list):
+ output = [output]
+
+ for line in output:
+ if not line or line.isspace():
+ continue
+
+ if isinstance(line, binary_type):
+ line = line.decode("utf-8", "replace")
+
+ line = line.rstrip()
+ self.parse_single_line(line)
+
+
+# BaseLogger {{{1
+class BaseLogger(object):
+ """Base class in charge of logging handling logic such as creating logging
+ files, dirs, attaching to the console output and managing its output.
+
+ Attributes:
+ LEVELS (dict): flat copy of the `LOG_LEVELS` attribute of the `log` module.
+
+ TODO: status? There may be a status object or status capability in
+ either logging or config that allows you to count the number of
+ error,critical,fatal messages for us to count up at the end (aiming
+ for 0).
+ """
+
+ LEVELS = LOG_LEVELS
+
+ def __init__(
+ self,
+ log_level=INFO,
+ log_format="%(message)s",
+ log_date_format="%H:%M:%S",
+ log_name="test",
+ log_to_console=True,
+ log_dir=".",
+ log_to_raw=False,
+ logger_name="",
+ append_to_log=False,
+ ):
+ """BaseLogger constructor
+
+ Args:
+ log_level (str, optional): mozharness log level name. Defaults to INFO.
+ log_format (str, optional): message format string to instantiate a
+ `logging.Formatter`. Defaults to '%(message)s'
+ log_date_format (str, optional): date format string to instantiate a
+ `logging.Formatter`. Defaults to '%H:%M:%S'
+ log_name (str, optional): name to use for the log files to be created.
+ Defaults to 'test'
+ log_to_console (bool, optional): set to True in order to create a Handler
+ instance base on the `Logger`
+ current instance. Defaults to True.
+ log_dir (str, optional): directory location to store the log files.
+ Defaults to '.', i.e. current working directory.
+ log_to_raw (bool, optional): set to True in order to create a *raw.log
+ file. Defaults to False.
+ logger_name (str, optional): currently useless parameter. According
+ to the code comments, it could be useful
+ if we were to have multiple logging
+ objects that don't trample each other.
+ append_to_log (bool, optional): set to True if the logging content should
+ be appended to old logging files. Defaults to False
+ """
+
+ self.log_format = log_format
+ self.log_date_format = log_date_format
+ self.log_to_console = log_to_console
+ self.log_to_raw = log_to_raw
+ self.log_level = log_level
+ self.log_name = log_name
+ self.log_dir = log_dir
+ self.append_to_log = append_to_log
+
+ # Not sure what I'm going to use this for; useless unless we
+ # can have multiple logging objects that don't trample each other
+ self.logger_name = logger_name
+
+ self.all_handlers = []
+ self.log_files = {}
+
+ self.create_log_dir()
+
+ def create_log_dir(self):
+ """create a logging directory if it doesn't exits. If there is a file with
+ same name as the future logging directory it will be deleted.
+ """
+
+ if os.path.exists(self.log_dir):
+ if not os.path.isdir(self.log_dir):
+ os.remove(self.log_dir)
+ if not os.path.exists(self.log_dir):
+ os.makedirs(self.log_dir)
+ self.abs_log_dir = os.path.abspath(self.log_dir)
+
+ def init_message(self, name=None):
+ """log an init message stating the name passed to it, the current date
+ and time and, the current working directory.
+
+ Args:
+ name (str, optional): name to use for the init log message. Defaults to
+ the current instance class name.
+ """
+
+ if not name:
+ name = self.__class__.__name__
+ self.log_message(
+ "%s online at %s in %s"
+ % (name, datetime.utcnow().strftime("%Y%m%d %H:%M:%SZ"), os.getcwd())
+ )
+
+ def get_logger_level(self, level=None):
+ """translate the level name passed to it and return its numeric value
+ according to `LEVELS` values.
+
+ Args:
+ level (str, optional): level name to be translated. Defaults to the current
+ instance `log_level`.
+
+ Returns:
+ int: numeric value of the log level name passed to it or 0 (NOTSET) if the
+ name doesn't exists
+ """
+
+ if not level:
+ level = self.log_level
+ return self.LEVELS.get(level, logging.NOTSET)
+
+ def get_log_formatter(self, log_format=None, date_format=None):
+ """create a `logging.Formatter` base on the log and date format.
+
+ Args:
+ log_format (str, optional): log format to use for the Formatter constructor.
+ Defaults to the current instance log format.
+ date_format (str, optional): date format to use for the Formatter constructor.
+ Defaults to the current instance date format.
+
+ Returns:
+ logging.Formatter: instance created base on the passed arguments
+ """
+
+ if not log_format:
+ log_format = self.log_format
+ if not date_format:
+ date_format = self.log_date_format
+ return logging.Formatter(log_format, date_format)
+
+ def new_logger(self):
+ """Create a new logger based on the ROOT_LOGGER instance. By default there are no handlers.
+ The new logger becomes a member variable of the current instance as `self.logger`.
+ """
+
+ self.logger = ROOT_LOGGER
+ self.logger.setLevel(self.get_logger_level())
+ self._clear_handlers()
+ if self.log_to_console:
+ self.add_console_handler()
+ if self.log_to_raw:
+ self.log_files["raw"] = "%s_raw.log" % self.log_name
+ self.add_file_handler(
+ os.path.join(self.abs_log_dir, self.log_files["raw"]),
+ log_format="%(message)s",
+ )
+
+ def _clear_handlers(self):
+ """remove all handlers stored in `self.all_handlers`.
+
+ To prevent dups -- logging will preserve Handlers across
+ objects :(
+ """
+ attrs = dir(self)
+ if "all_handlers" in attrs and "logger" in attrs:
+ for handler in self.all_handlers:
+ self.logger.removeHandler(handler)
+ self.all_handlers = []
+
+ def __del__(self):
+ """BaseLogger class destructor; shutdown, flush and remove all handlers"""
+ logging.shutdown()
+ self._clear_handlers()
+
+ def add_console_handler(self, log_level=None, log_format=None, date_format=None):
+ """create a `logging.StreamHandler` using `sys.stderr` for logging the console
+ output and add it to the `all_handlers` member variable
+
+ Args:
+ log_level (str, optional): useless argument. Not used here.
+ Defaults to None.
+ log_format (str, optional): format used for the Formatter attached to the
+ StreamHandler. Defaults to None.
+ date_format (str, optional): format used for the Formatter attached to the
+ StreamHandler. Defaults to None.
+ """
+
+ console_handler = logging.StreamHandler()
+ console_handler.setFormatter(
+ self.get_log_formatter(log_format=log_format, date_format=date_format)
+ )
+ self.logger.addHandler(console_handler)
+ self.all_handlers.append(console_handler)
+
+ def add_file_handler(
+ self, log_path, log_level=None, log_format=None, date_format=None
+ ):
+ """create a `logging.FileHandler` base on the path, log and date format
+ and add it to the `all_handlers` member variable.
+
+ Args:
+ log_path (str): filepath to use for the `FileHandler`.
+ log_level (str, optional): useless argument. Not used here.
+ Defaults to None.
+ log_format (str, optional): log format to use for the Formatter constructor.
+ Defaults to the current instance log format.
+ date_format (str, optional): date format to use for the Formatter constructor.
+ Defaults to the current instance date format.
+ """
+
+ if not self.append_to_log and os.path.exists(log_path):
+ os.remove(log_path)
+ file_handler = logging.FileHandler(log_path)
+ file_handler.setLevel(self.get_logger_level(log_level))
+ file_handler.setFormatter(
+ self.get_log_formatter(log_format=log_format, date_format=date_format)
+ )
+ self.logger.addHandler(file_handler)
+ self.all_handlers.append(file_handler)
+
+ def log_message(self, message, level=INFO, exit_code=-1, post_fatal_callback=None):
+ """Generic log method.
+ There should be more options here -- do or don't split by line,
+ use os.linesep instead of assuming \n, be able to pass in log level
+ by name or number.
+
+ Adding the IGNORE special level for runCommand.
+
+ Args:
+ message (str): message to log using the current `logger`
+ level (str, optional): log level of the message. Defaults to INFO.
+ exit_code (int, optional): exit code to use in case of a FATAL level is used.
+ Defaults to -1.
+ post_fatal_callback (function, optional): function to callback in case of
+ of a fatal log level. Defaults None.
+ """
+
+ if level == IGNORE:
+ return
+ for line in message.splitlines():
+ self.logger.log(self.get_logger_level(level), line)
+ if level == FATAL:
+ if callable(post_fatal_callback):
+ self.logger.log(FATAL_LEVEL, "Running post_fatal callback...")
+ post_fatal_callback(message=message, exit_code=exit_code)
+ self.logger.log(FATAL_LEVEL, "Exiting %d" % exit_code)
+ raise SystemExit(exit_code)
+
+
+# SimpleFileLogger {{{1
+class SimpleFileLogger(BaseLogger):
+ """Subclass of the BaseLogger.
+
+ Create one logFile. Possibly also output to the terminal and a raw log
+ (no prepending of level or date)
+ """
+
+ def __init__(
+ self,
+ log_format="%(asctime)s %(levelname)8s - %(message)s",
+ logger_name="Simple",
+ log_dir="logs",
+ **kwargs
+ ):
+ """SimpleFileLogger constructor. Calls its superclass constructor,
+ creates a new logger instance and log an init message.
+
+ Args:
+ log_format (str, optional): message format string to instantiate a
+ `logging.Formatter`. Defaults to
+ '%(asctime)s %(levelname)8s - %(message)s'
+ log_name (str, optional): name to use for the log files to be created.
+ Defaults to 'Simple'
+ log_dir (str, optional): directory location to store the log files.
+ Defaults to 'logs'
+ **kwargs: Arbitrary keyword arguments passed to the BaseLogger constructor
+ """
+
+ BaseLogger.__init__(
+ self,
+ logger_name=logger_name,
+ log_format=log_format,
+ log_dir=log_dir,
+ **kwargs
+ )
+ self.new_logger()
+ self.init_message()
+
+ def new_logger(self):
+ """calls the BaseLogger.new_logger method and adds a file handler to it."""
+
+ BaseLogger.new_logger(self)
+ self.log_path = os.path.join(self.abs_log_dir, "%s.log" % self.log_name)
+ self.log_files["default"] = self.log_path
+ self.add_file_handler(self.log_path)
+
+
+# MultiFileLogger {{{1
+class MultiFileLogger(BaseLogger):
+ """Subclass of the BaseLogger class. Create a log per log level in log_dir.
+ Possibly also output to the terminal and a raw log (no prepending of level or date)
+ """
+
+ def __init__(
+ self,
+ logger_name="Multi",
+ log_format="%(asctime)s %(levelname)8s - %(message)s",
+ log_dir="logs",
+ log_to_raw=True,
+ **kwargs
+ ):
+ """MultiFileLogger constructor. Calls its superclass constructor,
+ creates a new logger instance and log an init message.
+
+ Args:
+ log_format (str, optional): message format string to instantiate a
+ `logging.Formatter`. Defaults to
+ '%(asctime)s %(levelname)8s - %(message)s'
+ log_name (str, optional): name to use for the log files to be created.
+ Defaults to 'Multi'
+ log_dir (str, optional): directory location to store the log files.
+ Defaults to 'logs'
+ log_to_raw (bool, optional): set to True in order to create a *raw.log
+ file. Defaults to False.
+ **kwargs: Arbitrary keyword arguments passed to the BaseLogger constructor
+ """
+
+ BaseLogger.__init__(
+ self,
+ logger_name=logger_name,
+ log_format=log_format,
+ log_to_raw=log_to_raw,
+ log_dir=log_dir,
+ **kwargs
+ )
+
+ self.new_logger()
+ self.init_message()
+
+ def new_logger(self):
+ """calls the BaseLogger.new_logger method and adds a file handler per
+ logging level in the `LEVELS` class attribute.
+ """
+
+ BaseLogger.new_logger(self)
+ min_logger_level = self.get_logger_level(self.log_level)
+ for level in list(self.LEVELS.keys()):
+ if self.get_logger_level(level) >= min_logger_level:
+ self.log_files[level] = "%s_%s.log" % (self.log_name, level)
+ self.add_file_handler(
+ os.path.join(self.abs_log_dir, self.log_files[level]),
+ log_level=level,
+ )
+
+
+# ConsoleLogger {{{1
+class ConsoleLogger(BaseLogger):
+ """Subclass of the BaseLogger.
+
+ Output logs to stderr.
+ """
+
+ def __init__(
+ self,
+ log_format="%(levelname)8s - %(message)s",
+ log_date_format="%H:%M:%S",
+ logger_name="Console",
+ **kwargs
+ ):
+ """ConsoleLogger constructor. Calls its superclass constructor,
+ creates a new logger instance and log an init message.
+
+ Args:
+ log_format (str, optional): message format string to instantiate a
+ `logging.Formatter`. Defaults to
+ '%(levelname)8s - %(message)s'
+ **kwargs: Arbitrary keyword arguments passed to the BaseLogger
+ constructor
+ """
+
+ BaseLogger.__init__(
+ self, logger_name=logger_name, log_format=log_format, **kwargs
+ )
+ self.new_logger()
+ self.init_message()
+
+ def new_logger(self):
+ """Create a new logger based on the ROOT_LOGGER instance. By default
+ there are no handlers. The new logger becomes a member variable of the
+ current instance as `self.logger`.
+ """
+ self.logger = ROOT_LOGGER
+ self.logger.setLevel(self.get_logger_level())
+ self._clear_handlers()
+ self.add_console_handler()
+
+
+def numeric_log_level(level):
+ """Converts a mozharness log level (string) to the corresponding logger
+ level (number). This function makes possible to set the log level
+ in functions that do not inherit from LogMixin
+
+ Args:
+ level (str): log level name to convert.
+
+ Returns:
+ int: numeric value of the log level name.
+ """
+ return LOG_LEVELS[level]
+
+
+# __main__ {{{1
+if __name__ == "__main__":
+ """Useless comparison, due to the `pass` keyword on its body"""
+ pass
diff --git a/testing/mozharness/mozharness/base/parallel.py b/testing/mozharness/mozharness/base/parallel.py
new file mode 100755
index 0000000000..678dadeede
--- /dev/null
+++ b/testing/mozharness/mozharness/base/parallel.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Generic ways to parallelize jobs.
+"""
+
+
+# ChunkingMixin {{{1
+class ChunkingMixin(object):
+ """Generic Chunking helper methods."""
+
+ def query_chunked_list(self, possible_list, this_chunk, total_chunks, sort=False):
+ """Split a list of items into a certain number of chunks and
+ return the subset of that will occur in this chunk.
+
+ Ported from build.l10n.getLocalesForChunk in build/tools.
+ """
+ if sort:
+ possible_list = sorted(possible_list)
+ else:
+ # Copy to prevent altering
+ possible_list = possible_list[:]
+ length = len(possible_list)
+ for c in range(1, total_chunks + 1):
+ n = length // total_chunks
+ # If the total number of items isn't evenly divisible by the
+ # number of chunks, we need to append one more onto some chunks
+ if c <= (length % total_chunks):
+ n += 1
+ if c == this_chunk:
+ return possible_list[0:n]
+ del possible_list[0:n]
diff --git a/testing/mozharness/mozharness/base/python.py b/testing/mozharness/mozharness/base/python.py
new file mode 100644
index 0000000000..73e50bfe7c
--- /dev/null
+++ b/testing/mozharness/mozharness/base/python.py
@@ -0,0 +1,1182 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Python usage, esp. virtualenv.
+"""
+
+import errno
+import json
+import os
+import shutil
+import site
+import socket
+import subprocess
+import sys
+import traceback
+from pathlib import Path
+
+try:
+ import urlparse
+except ImportError:
+ import urllib.parse as urlparse
+
+from six import string_types
+
+import mozharness
+from mozharness.base.errors import VirtualenvErrorList
+from mozharness.base.log import FATAL, WARNING
+from mozharness.base.script import (
+ PostScriptAction,
+ PostScriptRun,
+ PreScriptAction,
+ ScriptMixin,
+)
+
+external_tools_path = os.path.join(
+ os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))),
+ "external_tools",
+)
+
+
+def get_tlsv1_post():
+ # Monkeypatch to work around SSL errors in non-bleeding-edge Python.
+ # Taken from https://lukasa.co.uk/2013/01/Choosing_SSL_Version_In_Requests/
+ import ssl
+
+ import requests
+ from requests.packages.urllib3.poolmanager import PoolManager
+
+ class TLSV1Adapter(requests.adapters.HTTPAdapter):
+ def init_poolmanager(self, connections, maxsize, block=False):
+ self.poolmanager = PoolManager(
+ num_pools=connections,
+ maxsize=maxsize,
+ block=block,
+ ssl_version=ssl.PROTOCOL_TLSv1,
+ )
+
+ s = requests.Session()
+ s.mount("https://", TLSV1Adapter())
+ return s.post
+
+
+# Virtualenv {{{1
+virtualenv_config_options = [
+ [
+ ["--virtualenv-path"],
+ {
+ "action": "store",
+ "dest": "virtualenv_path",
+ "default": "venv",
+ "help": "Specify the path to the virtualenv top level directory",
+ },
+ ],
+ [
+ ["--find-links"],
+ {
+ "action": "extend",
+ "dest": "find_links",
+ "default": ["https://pypi.pub.build.mozilla.org/pub/"],
+ "help": "URL to look for packages at",
+ },
+ ],
+ [
+ ["--pip-index"],
+ {
+ "action": "store_true",
+ "default": False,
+ "dest": "pip_index",
+ "help": "Use pip indexes",
+ },
+ ],
+ [
+ ["--no-pip-index"],
+ {
+ "action": "store_false",
+ "dest": "pip_index",
+ "help": "Don't use pip indexes (default)",
+ },
+ ],
+]
+
+
+class VirtualenvMixin(object):
+ """BaseScript mixin, designed to create and use virtualenvs.
+
+ Config items:
+ * virtualenv_path points to the virtualenv location on disk.
+ * virtualenv_modules lists the module names.
+ * MODULE_url list points to the module URLs (optional)
+ Requires virtualenv to be in PATH.
+ Depends on ScriptMixin
+ """
+
+ python_paths = {}
+ site_packages_path = None
+
+ def __init__(self, *args, **kwargs):
+ self._virtualenv_modules = []
+ super(VirtualenvMixin, self).__init__(*args, **kwargs)
+
+ def register_virtualenv_module(
+ self,
+ name=None,
+ url=None,
+ method=None,
+ requirements=None,
+ optional=False,
+ two_pass=False,
+ editable=False,
+ ):
+ """Register a module to be installed with the virtualenv.
+
+ This method can be called up until create_virtualenv() to register
+ modules that should be installed in the virtualenv.
+
+ See the documentation for install_module for how the arguments are
+ applied.
+ """
+ self._virtualenv_modules.append(
+ (name, url, method, requirements, optional, two_pass, editable)
+ )
+
+ def query_virtualenv_path(self):
+ """Determine the absolute path to the virtualenv."""
+ dirs = self.query_abs_dirs()
+
+ if "abs_virtualenv_dir" in dirs:
+ return dirs["abs_virtualenv_dir"]
+
+ p = self.config["virtualenv_path"]
+ if not p:
+ self.fatal(
+ "virtualenv_path config option not set; " "this should never happen"
+ )
+
+ if os.path.isabs(p):
+ return p
+ else:
+ return os.path.join(dirs["abs_work_dir"], p)
+
+ def query_python_path(self, binary="python"):
+ """Return the path of a binary inside the virtualenv, if
+ c['virtualenv_path'] is set; otherwise return the binary name.
+ Otherwise return None
+ """
+ if binary not in self.python_paths:
+ bin_dir = "bin"
+ if self._is_windows():
+ bin_dir = "Scripts"
+ virtualenv_path = self.query_virtualenv_path()
+ self.python_paths[binary] = os.path.abspath(
+ os.path.join(virtualenv_path, bin_dir, binary)
+ )
+
+ return self.python_paths[binary]
+
+ def query_python_site_packages_path(self):
+ if self.site_packages_path:
+ return self.site_packages_path
+ python = self.query_python_path()
+ self.site_packages_path = self.get_output_from_command(
+ [
+ python,
+ "-c",
+ "from distutils.sysconfig import get_python_lib; "
+ + "print(get_python_lib())",
+ ]
+ )
+ return self.site_packages_path
+
+ def package_versions(
+ self, pip_freeze_output=None, error_level=WARNING, log_output=False
+ ):
+ """
+ reads packages from `pip freeze` output and returns a dict of
+ {package_name: 'version'}
+ """
+ packages = {}
+
+ if pip_freeze_output is None:
+ # get the output from `pip freeze`
+ pip = self.query_python_path("pip")
+ if not pip:
+ self.log("package_versions: Program pip not in path", level=error_level)
+ return {}
+ pip_freeze_output = self.get_output_from_command(
+ [pip, "list", "--format", "freeze", "--no-index"],
+ silent=True,
+ ignore_errors=True,
+ )
+ if not isinstance(pip_freeze_output, string_types):
+ self.fatal(
+ "package_versions: Error encountered running `pip freeze`: "
+ + pip_freeze_output
+ )
+
+ for l in pip_freeze_output.splitlines():
+ # parse the output into package, version
+ line = l.strip()
+ if not line:
+ # whitespace
+ continue
+ if line.startswith("-"):
+ # not a package, probably like '-e http://example.com/path#egg=package-dev'
+ continue
+ if "==" not in line:
+ self.fatal("pip_freeze_packages: Unrecognized output line: %s" % line)
+ package, version = line.split("==", 1)
+ packages[package] = version
+
+ if log_output:
+ self.info("Current package versions:")
+ for package in sorted(packages):
+ self.info(" %s == %s" % (package, packages[package]))
+
+ return packages
+
+ def is_python_package_installed(self, package_name, error_level=WARNING):
+ """
+ Return whether the package is installed
+ """
+ # pylint --py3k W1655
+ package_versions = self.package_versions(error_level=error_level)
+ return package_name.lower() in [package.lower() for package in package_versions]
+
+ def install_module(
+ self,
+ module=None,
+ module_url=None,
+ install_method=None,
+ requirements=(),
+ optional=False,
+ global_options=[],
+ no_deps=False,
+ editable=False,
+ ):
+ """
+ Install module via pip.
+
+ module_url can be a url to a python package tarball, a path to
+ a directory containing a setup.py (absolute or relative to work_dir)
+ or None, in which case it will default to the module name.
+
+ requirements is a list of pip requirements files. If specified, these
+ will be combined with the module_url (if any), like so:
+
+ pip install -r requirements1.txt -r requirements2.txt module_url
+ """
+ import http.client
+ import time
+ import urllib.error
+ import urllib.request
+
+ c = self.config
+ dirs = self.query_abs_dirs()
+ env = self.query_env()
+ venv_path = self.query_virtualenv_path()
+ self.info("Installing %s into virtualenv %s" % (module, venv_path))
+ if not module_url:
+ module_url = module
+ if install_method in (None, "pip"):
+ if not module_url and not requirements:
+ self.fatal("Must specify module and/or requirements")
+ pip = self.query_python_path("pip")
+ if c.get("verbose_pip"):
+ command = [pip, "-v", "install"]
+ else:
+ command = [pip, "install"]
+ if no_deps:
+ command += ["--no-deps"]
+ # To avoid timeouts with our pypi server, increase default timeout:
+ # https://bugzilla.mozilla.org/show_bug.cgi?id=1007230#c802
+ command += ["--timeout", str(c.get("pip_timeout", 120))]
+ for requirement in requirements:
+ command += ["-r", requirement]
+ if c.get("find_links") and not c["pip_index"]:
+ command += ["--no-index"]
+ for opt in global_options:
+ command += ["--global-option", opt]
+ else:
+ self.fatal(
+ "install_module() doesn't understand an install_method of %s!"
+ % install_method
+ )
+
+ # find_links connection check while loop
+ find_links_added = 0
+ fl_retry_sleep_seconds = 10
+ fl_max_retry_minutes = 5
+ fl_retry_loops = (fl_max_retry_minutes * 60) / fl_retry_sleep_seconds
+ for link in c.get("find_links", []):
+ parsed = urlparse.urlparse(link)
+ dns_result = None
+ get_result = None
+ retry_counter = 0
+ while retry_counter < fl_retry_loops and (
+ dns_result is None or get_result is None
+ ):
+ try:
+ dns_result = socket.gethostbyname(parsed.hostname)
+ get_result = urllib.request.urlopen(link, timeout=10).read()
+ break
+ except socket.gaierror:
+ retry_counter += 1
+ self.warning(
+ "find_links: dns check failed for %s, sleeping %ss and retrying..."
+ % (parsed.hostname, fl_retry_sleep_seconds)
+ )
+ time.sleep(fl_retry_sleep_seconds)
+ except (
+ urllib.error.HTTPError,
+ urllib.error.URLError,
+ socket.timeout,
+ http.client.RemoteDisconnected,
+ ) as e:
+ retry_counter += 1
+ self.warning(
+ "find_links: connection check failed for %s, sleeping %ss and retrying..."
+ % (link, fl_retry_sleep_seconds)
+ )
+ self.warning("find_links: exception: %s" % e)
+ time.sleep(fl_retry_sleep_seconds)
+ # now that the connectivity check is good, add the link
+ if dns_result and get_result:
+ self.info("find_links: connection checks passed for %s, adding." % link)
+ find_links_added += 1
+ command.extend(["--find-links", link])
+ else:
+ self.warning(
+ "find_links: connection checks failed for %s"
+ ", but max retries reached. continuing..." % link
+ )
+
+ # TODO: make this fatal if we always see failures after this
+ if find_links_added == 0:
+ self.warning(
+ "find_links: no find_links added. pip installation will probably fail!"
+ )
+
+ # module_url can be None if only specifying requirements files
+ if module_url:
+ if editable:
+ if install_method in (None, "pip"):
+ command += ["-e"]
+ else:
+ self.fatal(
+ "editable installs not supported for install_method %s"
+ % install_method
+ )
+ command += [module_url]
+
+ # If we're only installing a single requirements file, use
+ # the file's directory as cwd, so relative paths work correctly.
+ cwd = dirs["abs_work_dir"]
+ if not module and len(requirements) == 1:
+ cwd = os.path.dirname(requirements[0])
+
+ # Allow for errors while building modules, but require a
+ # return status of 0.
+ self.retry(
+ self.run_command,
+ # None will cause default value to be used
+ attempts=1 if optional else None,
+ good_statuses=(0,),
+ error_level=WARNING if optional else FATAL,
+ error_message=("Could not install python package: failed all attempts."),
+ args=[
+ command,
+ ],
+ kwargs={
+ "error_list": VirtualenvErrorList,
+ "cwd": cwd,
+ "env": env,
+ # WARNING only since retry will raise final FATAL if all
+ # retry attempts are unsuccessful - and we only want
+ # an ERROR of FATAL if *no* retry attempt works
+ "error_level": WARNING,
+ },
+ )
+
+ def create_virtualenv(self, modules=(), requirements=()):
+ """
+ Create a python virtualenv.
+
+ This uses the copy of virtualenv that is vendored in mozharness.
+
+ virtualenv_modules can be a list of module names to install, e.g.
+
+ virtualenv_modules = ['module1', 'module2']
+
+ or it can be a heterogeneous list of modules names and dicts that
+ define a module by its name, url-or-path, and a list of its global
+ options.
+
+ virtualenv_modules = [
+ {
+ 'name': 'module1',
+ 'url': None,
+ 'global_options': ['--opt', '--without-gcc']
+ },
+ {
+ 'name': 'module2',
+ 'url': 'http://url/to/package',
+ 'global_options': ['--use-clang']
+ },
+ {
+ 'name': 'module3',
+ 'url': os.path.join('path', 'to', 'setup_py', 'dir')
+ 'global_options': []
+ },
+ 'module4'
+ ]
+
+ virtualenv_requirements is an optional list of pip requirements files to
+ use when invoking pip, e.g.,
+
+ virtualenv_requirements = [
+ '/path/to/requirements1.txt',
+ '/path/to/requirements2.txt'
+ ]
+ """
+ c = self.config
+ dirs = self.query_abs_dirs()
+ venv_path = self.query_virtualenv_path()
+ self.info("Creating virtualenv %s" % venv_path)
+
+ # Always use the virtualenv that is vendored since that is deterministic.
+ # base_work_dir is for when we're running with mozharness.zip, e.g. on
+ # test jobs
+ # abs_src_dir is for when we're running out of a checked out copy of
+ # the source code
+ vendor_search_dirs = [
+ os.path.join("{base_work_dir}", "mozharness"),
+ "{abs_src_dir}",
+ ]
+ if "abs_src_dir" not in dirs and "repo_path" in self.config:
+ dirs["abs_src_dir"] = os.path.normpath(self.config["repo_path"])
+ for d in vendor_search_dirs:
+ try:
+ src_dir = Path(d.format(**dirs))
+ except KeyError:
+ continue
+
+ pip_wheel_path = (
+ src_dir
+ / "third_party"
+ / "python"
+ / "_venv"
+ / "wheels"
+ / "pip-23.0.1-py3-none-any.whl"
+ )
+ setuptools_wheel_path = (
+ src_dir
+ / "third_party"
+ / "python"
+ / "_venv"
+ / "wheels"
+ / "setuptools-51.2.0-py3-none-any.whl"
+ )
+
+ if all(path.exists() for path in (pip_wheel_path, setuptools_wheel_path)):
+ break
+ else:
+ self.fatal("Can't find 'pip' and 'setuptools' wheels")
+
+ venv_python_bin = Path(self.query_python_path())
+
+ if venv_python_bin.exists():
+ self.info(
+ "Virtualenv %s appears to already exist; "
+ "skipping virtualenv creation." % self.query_python_path()
+ )
+ else:
+ self.run_command(
+ [sys.executable, "--version"],
+ )
+
+ # Temporary hack to get around a bug with venv in Python 3.7.3 in CI
+ # https://bugs.python.org/issue36441
+ if self._is_windows():
+ if sys.version_info[:3] == (3, 7, 3):
+ python_exe = Path(sys.executable)
+ debug_exe_dir = (
+ python_exe.parent / "lib" / "venv" / "scripts" / "nt"
+ )
+
+ if debug_exe_dir.exists():
+
+ for executable in {
+ "python.exe",
+ "python_d.exe",
+ "pythonw.exe",
+ "pythonw_d.exe",
+ }:
+ expected_python_debug_exe = debug_exe_dir / executable
+ if not expected_python_debug_exe.exists():
+ shutil.copy(
+ sys.executable, str(expected_python_debug_exe)
+ )
+
+ venv_creation_flags = ["-m", "venv", venv_path]
+
+ if self._is_windows():
+ # To workaround an issue on Windows10 jobs in CI we have to
+ # explicitly install the default pip separately. Ideally we
+ # could just remove the "--without-pip" above and get the same
+ # result, but that's apparently not always the case.
+ venv_creation_flags = venv_creation_flags + ["--without-pip"]
+
+ self.mkdir_p(dirs["abs_work_dir"])
+ self.run_command(
+ [sys.executable] + venv_creation_flags,
+ cwd=dirs["abs_work_dir"],
+ error_list=VirtualenvErrorList,
+ halt_on_failure=True,
+ )
+
+ if self._is_windows():
+ self.run_command(
+ [str(venv_python_bin), "-m", "ensurepip", "--default-pip"],
+ cwd=dirs["abs_work_dir"],
+ halt_on_failure=True,
+ )
+
+ self._ensure_python_exe(venv_python_bin.parent)
+
+ # We can work around a bug on some versions of Python 3.6 on
+ # Windows by copying the 'pyvenv.cfg' of the current venv
+ # to the new venv. This will make the new venv reference
+ # the original Python install instead of the current venv,
+ # which resolves the issue. There shouldn't be any harm in
+ # always doing this, but we'll play it safe and restrict it
+ # to Windows Python 3.6 anyway.
+ if self._is_windows() and sys.version_info[:2] == (3, 6):
+ this_venv = Path(sys.executable).parent.parent
+ this_venv_config = this_venv / "pyvenv.cfg"
+ if this_venv_config.exists():
+ new_venv_config = Path(venv_path) / "pyvenv.cfg"
+ shutil.copyfile(str(this_venv_config), str(new_venv_config))
+
+ self.run_command(
+ [
+ str(venv_python_bin),
+ "-m",
+ "pip",
+ "install",
+ "--only-binary",
+ ":all:",
+ "--disable-pip-version-check",
+ str(pip_wheel_path),
+ str(setuptools_wheel_path),
+ ],
+ cwd=dirs["abs_work_dir"],
+ error_list=VirtualenvErrorList,
+ halt_on_failure=True,
+ )
+
+ self.info(self.platform_name())
+ if self.platform_name().startswith("macos"):
+ tmp_path = "{}/bin/bak".format(venv_path)
+ self.info(
+ "Copying venv python binaries to {} to clear for re-sign".format(
+ tmp_path
+ )
+ )
+ subprocess.call("mkdir -p {}".format(tmp_path), shell=True)
+ subprocess.call(
+ "cp {}/bin/python* {}/".format(venv_path, tmp_path), shell=True
+ )
+ self.info("Replacing venv python binaries with reset copies")
+ subprocess.call(
+ "mv -f {}/* {}/bin/".format(tmp_path, venv_path), shell=True
+ )
+ self.info(
+ "codesign -s - --preserve-metadata=identifier,entitlements,flags,runtime "
+ "-f {}/bin/*".format(venv_path)
+ )
+ subprocess.call(
+ "codesign -s - --preserve-metadata=identifier,entitlements,flags,runtime -f "
+ "{}/bin/python*".format(venv_path),
+ shell=True,
+ )
+
+ if not modules:
+ modules = c.get("virtualenv_modules", [])
+ if not requirements:
+ requirements = c.get("virtualenv_requirements", [])
+ if not modules and requirements:
+ self.install_module(requirements=requirements, install_method="pip")
+ for module in modules:
+ module_url = module
+ global_options = []
+ if isinstance(module, dict):
+ if module.get("name", None):
+ module_name = module["name"]
+ else:
+ self.fatal(
+ "Can't install module without module name: %s" % str(module)
+ )
+ module_url = module.get("url", None)
+ global_options = module.get("global_options", [])
+ else:
+ module_url = self.config.get("%s_url" % module, module_url)
+ module_name = module
+ install_method = "pip"
+ self.install_module(
+ module=module_name,
+ module_url=module_url,
+ install_method=install_method,
+ requirements=requirements,
+ global_options=global_options,
+ )
+
+ for (
+ module,
+ url,
+ method,
+ requirements,
+ optional,
+ two_pass,
+ editable,
+ ) in self._virtualenv_modules:
+ if two_pass:
+ self.install_module(
+ module=module,
+ module_url=url,
+ install_method=method,
+ requirements=requirements or (),
+ optional=optional,
+ no_deps=True,
+ editable=editable,
+ )
+ self.install_module(
+ module=module,
+ module_url=url,
+ install_method=method,
+ requirements=requirements or (),
+ optional=optional,
+ editable=editable,
+ )
+
+ self.info("Done creating virtualenv %s." % venv_path)
+
+ self.package_versions(log_output=True)
+
+ def activate_virtualenv(self):
+ """Import the virtualenv's packages into this Python interpreter."""
+ venv_root_dir = Path(self.query_virtualenv_path())
+ venv_name = venv_root_dir.name
+ bin_path = Path(self.query_python_path())
+ bin_dir = bin_path.parent
+
+ if self._is_windows():
+ site_packages_dir = venv_root_dir / "Lib" / "site-packages"
+ else:
+ site_packages_dir = (
+ venv_root_dir
+ / "lib"
+ / "python{}.{}".format(*sys.version_info)
+ / "site-packages"
+ )
+
+ os.environ["PATH"] = os.pathsep.join(
+ [str(bin_dir)] + os.environ.get("PATH", "").split(os.pathsep)
+ )
+ os.environ["VIRTUAL_ENV"] = venv_name
+
+ prev_path = set(sys.path)
+
+ site.addsitedir(str(site_packages_dir.resolve()))
+
+ new_path = list(sys.path)
+
+ sys.path[:] = [p for p in new_path if p not in prev_path] + [
+ p for p in new_path if p in prev_path
+ ]
+
+ sys.real_prefix = sys.prefix
+ sys.prefix = str(venv_root_dir)
+ sys.executable = str(bin_path)
+
+ def _ensure_python_exe(self, python_exe_root: Path):
+ """On some machines in CI venv does not behave consistently. Sometimes
+ only a "python3" executable is created, but we expect "python". Since
+ they are functionally identical, we can just copy "python3" to "python"
+ (and vice-versa) to solve the problem.
+ """
+ python3_exe_path = python_exe_root / "python3"
+ python_exe_path = python_exe_root / "python"
+
+ if self._is_windows():
+ python3_exe_path = python3_exe_path.with_suffix(".exe")
+ python_exe_path = python_exe_path.with_suffix(".exe")
+
+ if python3_exe_path.exists() and not python_exe_path.exists():
+ shutil.copy(str(python3_exe_path), str(python_exe_path))
+
+ if python_exe_path.exists() and not python3_exe_path.exists():
+ shutil.copy(str(python_exe_path), str(python3_exe_path))
+
+ if not python_exe_path.exists() and not python3_exe_path.exists():
+ raise Exception(
+ f'Neither a "{python_exe_path.name}" or "{python3_exe_path.name}" '
+ f"were found. This means something unexpected happened during the "
+ f"virtual environment creation and we cannot proceed."
+ )
+
+
+# This is (sadly) a mixin for logging methods.
+class PerfherderResourceOptionsMixin(ScriptMixin):
+ def perfherder_resource_options(self):
+ """Obtain a list of extraOptions values to identify the env."""
+ opts = []
+
+ if "TASKCLUSTER_INSTANCE_TYPE" in os.environ:
+ # Include the instance type so results can be grouped.
+ opts.append("taskcluster-%s" % os.environ["TASKCLUSTER_INSTANCE_TYPE"])
+ else:
+ # We assume !taskcluster => buildbot.
+ instance = "unknown"
+
+ # Try to load EC2 instance type from metadata file. This file
+ # may not exist in many scenarios (including when inside a chroot).
+ # So treat it as optional.
+ try:
+ # This file should exist on Linux in EC2.
+ with open("/etc/instance_metadata.json", "rb") as fh:
+ im = json.load(fh)
+ instance = im.get("aws_instance_type", "unknown").encode("ascii")
+ except IOError as e:
+ if e.errno != errno.ENOENT:
+ raise
+ self.info(
+ "instance_metadata.json not found; unable to "
+ "determine instance type"
+ )
+ except Exception:
+ self.warning(
+ "error reading instance_metadata: %s" % traceback.format_exc()
+ )
+
+ opts.append("buildbot-%s" % instance)
+
+ return opts
+
+
+class ResourceMonitoringMixin(PerfherderResourceOptionsMixin):
+ """Provides resource monitoring capabilities to scripts.
+
+ When this class is in the inheritance chain, resource usage stats of the
+ executing script will be recorded.
+
+ This class requires the VirtualenvMixin in order to install a package used
+ for recording resource usage.
+
+ While we would like to record resource usage for the entirety of a script,
+ since we require an external package, we can only record resource usage
+ after that package is installed (as part of creating the virtualenv).
+ That's just the way things have to be.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super(ResourceMonitoringMixin, self).__init__(*args, **kwargs)
+
+ self.register_virtualenv_module("psutil>=5.9.0", method="pip", optional=True)
+ self.register_virtualenv_module(
+ "mozsystemmonitor==1.0.1", method="pip", optional=True
+ )
+ self.register_virtualenv_module("jsonschema==2.5.1", method="pip")
+ self._resource_monitor = None
+
+ # 2-tuple of (name, options) to assign Perfherder resource monitor
+ # metrics to. This needs to be assigned by a script in order for
+ # Perfherder metrics to be reported.
+ self.resource_monitor_perfherder_id = None
+
+ @PostScriptAction("create-virtualenv")
+ def _start_resource_monitoring(self, action, success=None):
+ self.activate_virtualenv()
+
+ # Resource Monitor requires Python 2.7, however it's currently optional.
+ # Remove when all machines have had their Python version updated (bug 711299).
+ if sys.version_info[:2] < (2, 7):
+ self.warning(
+ "Resource monitoring will not be enabled! Python 2.7+ required."
+ )
+ return
+
+ try:
+ from mozsystemmonitor.resourcemonitor import SystemResourceMonitor
+
+ self.info("Starting resource monitoring.")
+ self._resource_monitor = SystemResourceMonitor(poll_interval=1.0)
+ self._resource_monitor.start()
+ except Exception:
+ self.warning(
+ "Unable to start resource monitor: %s" % traceback.format_exc()
+ )
+
+ @PreScriptAction
+ def _resource_record_pre_action(self, action):
+ # Resource monitor isn't available until after create-virtualenv.
+ if not self._resource_monitor:
+ return
+
+ self._resource_monitor.begin_phase(action)
+
+ @PostScriptAction
+ def _resource_record_post_action(self, action, success=None):
+ # Resource monitor isn't available until after create-virtualenv.
+ if not self._resource_monitor:
+ return
+
+ self._resource_monitor.finish_phase(action)
+
+ @PostScriptRun
+ def _resource_record_post_run(self):
+ if not self._resource_monitor:
+ return
+
+ # This should never raise an exception. This is a workaround until
+ # mozsystemmonitor is fixed. See bug 895388.
+ try:
+ self._resource_monitor.stop()
+ self._log_resource_usage()
+
+ # Upload a JSON file containing the raw resource data.
+ try:
+ upload_dir = self.query_abs_dirs()["abs_blob_upload_dir"]
+ if not os.path.exists(upload_dir):
+ os.makedirs(upload_dir)
+ with open(os.path.join(upload_dir, "resource-usage.json"), "w") as fh:
+ json.dump(
+ self._resource_monitor.as_dict(), fh, sort_keys=True, indent=4
+ )
+ except (AttributeError, KeyError):
+ self.exception("could not upload resource usage JSON", level=WARNING)
+
+ except Exception:
+ self.warning(
+ "Exception when reporting resource usage: %s" % traceback.format_exc()
+ )
+
+ def _log_resource_usage(self):
+ # Delay import because not available until virtualenv is populated.
+ import jsonschema
+
+ rm = self._resource_monitor
+
+ if rm.start_time is None:
+ return
+
+ def resources(phase):
+ cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False)
+ cpu_times = rm.aggregate_cpu_times(phase=phase, per_cpu=False)
+ io = rm.aggregate_io(phase=phase)
+
+ swap_in = sum(m.swap.sin for m in rm.measurements)
+ swap_out = sum(m.swap.sout for m in rm.measurements)
+
+ return cpu_percent, cpu_times, io, (swap_in, swap_out)
+
+ def log_usage(prefix, duration, cpu_percent, cpu_times, io):
+ message = (
+ "{prefix} - Wall time: {duration:.0f}s; "
+ "CPU: {cpu_percent}; "
+ "Read bytes: {io_read_bytes}; Write bytes: {io_write_bytes}; "
+ "Read time: {io_read_time}; Write time: {io_write_time}"
+ )
+
+ # XXX Some test harnesses are complaining about a string being
+ # being fed into a 'f' formatter. This will help diagnose the
+ # issue.
+ if cpu_percent:
+ # pylint: disable=W1633
+ cpu_percent_str = str(round(cpu_percent)) + "%"
+ else:
+ cpu_percent_str = "Can't collect data"
+
+ try:
+ self.info(
+ message.format(
+ prefix=prefix,
+ duration=duration,
+ cpu_percent=cpu_percent_str,
+ io_read_bytes=io.read_bytes,
+ io_write_bytes=io.write_bytes,
+ io_read_time=io.read_time,
+ io_write_time=io.write_time,
+ )
+ )
+
+ except ValueError:
+ self.warning("Exception when formatting: %s" % traceback.format_exc())
+
+ cpu_percent, cpu_times, io, (swap_in, swap_out) = resources(None)
+ duration = rm.end_time - rm.start_time
+
+ # Write out Perfherder data if configured.
+ if self.resource_monitor_perfherder_id:
+ perfherder_name, perfherder_options = self.resource_monitor_perfherder_id
+
+ suites = []
+ overall = []
+
+ if cpu_percent:
+ overall.append(
+ {
+ "name": "cpu_percent",
+ "value": cpu_percent,
+ }
+ )
+
+ overall.extend(
+ [
+ {"name": "io_write_bytes", "value": io.write_bytes},
+ {"name": "io.read_bytes", "value": io.read_bytes},
+ {"name": "io_write_time", "value": io.write_time},
+ {"name": "io_read_time", "value": io.read_time},
+ ]
+ )
+
+ suites.append(
+ {
+ "name": "%s.overall" % perfherder_name,
+ "extraOptions": perfherder_options
+ + self.perfherder_resource_options(),
+ "subtests": overall,
+ }
+ )
+
+ for phase in rm.phases.keys():
+ phase_duration = rm.phases[phase][1] - rm.phases[phase][0]
+ subtests = [
+ {
+ "name": "time",
+ "value": phase_duration,
+ }
+ ]
+ cpu_percent = rm.aggregate_cpu_percent(phase=phase, per_cpu=False)
+ if cpu_percent is not None:
+ subtests.append(
+ {
+ "name": "cpu_percent",
+ "value": rm.aggregate_cpu_percent(
+ phase=phase, per_cpu=False
+ ),
+ }
+ )
+
+ # We don't report I/O during each step because measured I/O
+ # is system I/O and that I/O can be delayed (e.g. writes will
+ # buffer before being flushed and recorded in our metrics).
+ suites.append(
+ {
+ "name": "%s.%s" % (perfherder_name, phase),
+ "subtests": subtests,
+ }
+ )
+
+ data = {
+ "framework": {"name": "job_resource_usage"},
+ "suites": suites,
+ }
+
+ schema_path = os.path.join(
+ external_tools_path, "performance-artifact-schema.json"
+ )
+ with open(schema_path, "rb") as fh:
+ schema = json.load(fh)
+
+ # this will throw an exception that causes the job to fail if the
+ # perfherder data is not valid -- please don't change this
+ # behaviour, otherwise people will inadvertently break this
+ # functionality
+ self.info("Validating Perfherder data against %s" % schema_path)
+ jsonschema.validate(data, schema)
+ self.info("PERFHERDER_DATA: %s" % json.dumps(data))
+
+ log_usage("Total resource usage", duration, cpu_percent, cpu_times, io)
+
+ # Print special messages so usage shows up in Treeherder.
+ if cpu_percent:
+ self._tinderbox_print("CPU usage<br/>{:,.1f}%".format(cpu_percent))
+
+ self._tinderbox_print(
+ "I/O read bytes / time<br/>{:,} / {:,}".format(io.read_bytes, io.read_time)
+ )
+ self._tinderbox_print(
+ "I/O write bytes / time<br/>{:,} / {:,}".format(
+ io.write_bytes, io.write_time
+ )
+ )
+
+ # Print CPU components having >1%. "cpu_times" is a data structure
+ # whose attributes are measurements. Ideally we'd have an API that
+ # returned just the measurements as a dict or something.
+ cpu_attrs = []
+ for attr in sorted(dir(cpu_times)):
+ if attr.startswith("_"):
+ continue
+ if attr in ("count", "index"):
+ continue
+ cpu_attrs.append(attr)
+
+ cpu_total = sum(getattr(cpu_times, attr) for attr in cpu_attrs)
+
+ for attr in cpu_attrs:
+ value = getattr(cpu_times, attr)
+ # cpu_total can be 0.0. Guard against division by 0.
+ # pylint --py3k W1619
+ percent = value / cpu_total * 100.0 if cpu_total else 0.0
+
+ if percent > 1.00:
+ self._tinderbox_print(
+ "CPU {}<br/>{:,.1f} ({:,.1f}%)".format(attr, value, percent)
+ )
+
+ # Swap on Windows isn't reported by psutil.
+ if not self._is_windows():
+ self._tinderbox_print(
+ "Swap in / out<br/>{:,} / {:,}".format(swap_in, swap_out)
+ )
+
+ for phase in rm.phases.keys():
+ start_time, end_time = rm.phases[phase]
+ cpu_percent, cpu_times, io, swap = resources(phase)
+ log_usage(phase, end_time - start_time, cpu_percent, cpu_times, io)
+
+ def _tinderbox_print(self, message):
+ self.info("TinderboxPrint: %s" % message)
+
+
+# This needs to be inherited only if you have already inherited ScriptMixin
+class Python3Virtualenv(object):
+ """Support Python3.5+ virtualenv creation."""
+
+ py3_initialized_venv = False
+
+ def py3_venv_configuration(self, python_path, venv_path):
+ """We don't use __init__ to allow integrating with other mixins.
+
+ python_path - Path to Python 3 binary.
+ venv_path - Path to virtual environment to be created.
+ """
+ self.py3_initialized_venv = True
+ self.py3_python_path = os.path.abspath(python_path)
+ version = self.get_output_from_command(
+ [self.py3_python_path, "--version"], env=self.query_env()
+ ).split()[-1]
+ # Using -m venv is only used on 3.5+ versions
+ assert version > "3.5.0"
+ self.py3_venv_path = os.path.abspath(venv_path)
+ self.py3_pip_path = os.path.join(self.py3_path_to_executables(), "pip")
+
+ def py3_path_to_executables(self):
+ platform = self.platform_name()
+ if platform.startswith("win"):
+ return os.path.join(self.py3_venv_path, "Scripts")
+ else:
+ return os.path.join(self.py3_venv_path, "bin")
+
+ def py3_venv_initialized(func):
+ def call(self, *args, **kwargs):
+ if not self.py3_initialized_venv:
+ raise Exception(
+ "You need to call py3_venv_configuration() "
+ "before using this method."
+ )
+ func(self, *args, **kwargs)
+
+ return call
+
+ @py3_venv_initialized
+ def py3_create_venv(self):
+ """Create Python environment with python3 -m venv /path/to/venv."""
+ if os.path.exists(self.py3_venv_path):
+ self.info(
+ "Virtualenv %s appears to already exist; skipping "
+ "virtualenv creation." % self.py3_venv_path
+ )
+ else:
+ self.info("Running command...")
+ self.run_command(
+ "%s -m venv %s" % (self.py3_python_path, self.py3_venv_path),
+ error_list=VirtualenvErrorList,
+ halt_on_failure=True,
+ env=self.query_env(),
+ )
+
+ @py3_venv_initialized
+ def py3_install_modules(self, modules, use_mozharness_pip_config=True):
+ if not os.path.exists(self.py3_venv_path):
+ raise Exception("You need to call py3_create_venv() first.")
+
+ for m in modules:
+ cmd = [self.py3_pip_path, "install"]
+ if use_mozharness_pip_config:
+ cmd += self._mozharness_pip_args()
+ cmd += [m]
+ self.run_command(cmd, env=self.query_env())
+
+ def _mozharness_pip_args(self):
+ """We have information in Mozharness configs that apply to pip"""
+ c = self.config
+ pip_args = []
+ # To avoid timeouts with our pypi server, increase default timeout:
+ # https://bugzilla.mozilla.org/show_bug.cgi?id=1007230#c802
+ pip_args += ["--timeout", str(c.get("pip_timeout", 120))]
+
+ if c.get("find_links") and not c["pip_index"]:
+ pip_args += ["--no-index"]
+
+ # Add --find-links pages to look at. Add --trusted-host automatically if
+ # the host isn't secure. This allows modern versions of pip to connect
+ # without requiring an override.
+ trusted_hosts = set()
+ for link in c.get("find_links", []):
+ parsed = urlparse.urlparse(link)
+
+ try:
+ socket.gethostbyname(parsed.hostname)
+ except socket.gaierror as e:
+ self.info("error resolving %s (ignoring): %s" % (parsed.hostname, e))
+ continue
+
+ pip_args += ["--find-links", link]
+ if parsed.scheme != "https":
+ trusted_hosts.add(parsed.hostname)
+
+ for host in sorted(trusted_hosts):
+ pip_args += ["--trusted-host", host]
+
+ return pip_args
+
+ @py3_venv_initialized
+ def py3_install_requirement_files(
+ self, requirements, pip_args=[], use_mozharness_pip_config=True
+ ):
+ """
+ requirements - You can specify multiple requirements paths
+ """
+ cmd = [self.py3_pip_path, "install"]
+ cmd += pip_args
+
+ if use_mozharness_pip_config:
+ cmd += self._mozharness_pip_args()
+
+ for requirement_path in requirements:
+ cmd += ["-r", requirement_path]
+
+ self.run_command(cmd, env=self.query_env())
+
+
+# __main__ {{{1
+
+if __name__ == "__main__":
+ """TODO: unit tests."""
+ pass
diff --git a/testing/mozharness/mozharness/base/script.py b/testing/mozharness/mozharness/base/script.py
new file mode 100644
index 0000000000..0a5622440b
--- /dev/null
+++ b/testing/mozharness/mozharness/base/script.py
@@ -0,0 +1,2551 @@
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Generic script objects.
+
+script.py, along with config.py and log.py, represents the core of
+mozharness.
+"""
+
+import codecs
+import datetime
+import errno
+import fnmatch
+import functools
+import gzip
+import hashlib
+import inspect
+import itertools
+import os
+import platform
+import pprint
+import re
+import shutil
+import socket
+import ssl
+import stat
+import subprocess
+import sys
+import tarfile
+import time
+import traceback
+import zipfile
+import zlib
+from contextlib import contextmanager
+from io import BytesIO
+
+import mozinfo
+import six
+from mozprocess import ProcessHandler
+from six import binary_type
+
+from mozharness.base.config import BaseConfig
+from mozharness.base.log import (
+ DEBUG,
+ ERROR,
+ FATAL,
+ INFO,
+ WARNING,
+ ConsoleLogger,
+ LogMixin,
+ MultiFileLogger,
+ OutputParser,
+ SimpleFileLogger,
+)
+
+try:
+ import httplib
+except ImportError:
+ import http.client as httplib
+try:
+ import simplejson as json
+except ImportError:
+ import json
+try:
+ from urllib2 import Request, quote, urlopen
+except ImportError:
+ from urllib.request import Request, quote, urlopen
+try:
+ import urlparse
+except ImportError:
+ import urllib.parse as urlparse
+if os.name == "nt":
+ import locale
+
+ try:
+ import win32api
+ import win32file
+
+ PYWIN32 = True
+ except ImportError:
+ PYWIN32 = False
+
+try:
+ from urllib2 import HTTPError, URLError
+except ImportError:
+ from urllib.error import HTTPError, URLError
+
+
+class ContentLengthMismatch(Exception):
+ pass
+
+
+def _validate_tar_member(member, path):
+ def _is_within_directory(directory, target):
+ real_directory = os.path.realpath(directory)
+ real_target = os.path.realpath(target)
+ prefix = os.path.commonprefix([real_directory, real_target])
+ return prefix == real_directory
+
+ member_path = os.path.join(path, member.name)
+ if not _is_within_directory(path, member_path):
+ raise Exception("Attempted path traversal in tar file: " + member.name)
+ if member.issym():
+ link_path = os.path.join(os.path.dirname(member_path), member.linkname)
+ if not _is_within_directory(path, link_path):
+ raise Exception("Attempted link path traversal in tar file: " + member.name)
+ if member.mode & (stat.S_ISUID | stat.S_ISGID):
+ raise Exception("Attempted setuid or setgid in tar file: " + member.name)
+
+
+def _safe_extract(tar, path=".", *, numeric_owner=False):
+ def _files(tar, path):
+ for member in tar:
+ _validate_tar_member(member, path)
+ yield member
+
+ tar.extractall(path, members=_files(tar, path), numeric_owner=numeric_owner)
+
+
+def platform_name():
+ pm = PlatformMixin()
+
+ if pm._is_linux() and pm._is_64_bit():
+ return "linux64"
+ elif pm._is_linux() and not pm._is_64_bit():
+ return "linux"
+ elif pm._is_darwin():
+ return "macosx"
+ elif pm._is_windows() and pm._is_64_bit():
+ return "win64"
+ elif pm._is_windows() and not pm._is_64_bit():
+ return "win32"
+ else:
+ return None
+
+
+class PlatformMixin(object):
+ def _is_windows(self):
+ """check if the current operating system is Windows.
+
+ Returns:
+ bool: True if the current platform is Windows, False otherwise
+ """
+ system = platform.system()
+ if system in ("Windows", "Microsoft"):
+ return True
+ if system.startswith("CYGWIN"):
+ return True
+ if os.name == "nt":
+ return True
+
+ def _is_darwin(self):
+ """check if the current operating system is Darwin.
+
+ Returns:
+ bool: True if the current platform is Darwin, False otherwise
+ """
+ if platform.system() in ("Darwin"):
+ return True
+ if sys.platform.startswith("darwin"):
+ return True
+
+ def _is_linux(self):
+ """check if the current operating system is a Linux distribution.
+
+ Returns:
+ bool: True if the current platform is a Linux distro, False otherwise
+ """
+ if platform.system() in ("Linux"):
+ return True
+ if sys.platform.startswith("linux"):
+ return True
+
+ def _is_debian(self):
+ """check if the current operating system is explicitly Debian.
+ This intentionally doesn't count Debian derivatives like Ubuntu.
+
+ Returns:
+ bool: True if the current platform is debian, False otherwise
+ """
+ if not self._is_linux():
+ return False
+ self.info(mozinfo.linux_distro)
+ re_debian_distro = re.compile("debian")
+ return re_debian_distro.match(mozinfo.linux_distro) is not None
+
+ def _is_redhat_based(self):
+ """check if the current operating system is a Redhat derived Linux distribution.
+
+ Returns:
+ bool: True if the current platform is a Redhat Linux distro, False otherwise
+ """
+ if not self._is_linux():
+ return False
+ re_redhat_distro = re.compile("Redhat|Fedora|CentOS|Oracle")
+ return re_redhat_distro.match(mozinfo.linux_distro) is not None
+
+ def _is_64_bit(self):
+ if self._is_darwin():
+ # osx is a special snowflake and to ensure the arch, it is better to use the following
+ return (
+ sys.maxsize > 2 ** 32
+ ) # context: https://docs.python.org/2/library/platform.html
+ else:
+ # Using machine() gives you the architecture of the host rather
+ # than the build type of the Python binary
+ return "64" in platform.machine()
+
+
+# ScriptMixin {{{1
+class ScriptMixin(PlatformMixin):
+ """This mixin contains simple filesystem commands and the like.
+
+ It also contains some very special but very complex methods that,
+ together with logging and config, provide the base for all scripts
+ in this harness.
+
+ WARNING !!!
+ This class depends entirely on `LogMixin` methods in such a way that it will
+ only works if a class inherits from both `ScriptMixin` and `LogMixin`
+ simultaneously.
+
+ Depends on self.config of some sort.
+
+ Attributes:
+ env (dict): a mapping object representing the string environment.
+ script_obj (ScriptMixin): reference to a ScriptMixin instance.
+ """
+
+ env = None
+ script_obj = None
+ ssl_context = None
+
+ def query_filesize(self, file_path):
+ self.info("Determining filesize for %s" % file_path)
+ length = os.path.getsize(file_path)
+ self.info(" %s" % str(length))
+ return length
+
+ # TODO this should be parallelized with the to-be-written BaseHelper!
+ def query_sha512sum(self, file_path):
+ self.info("Determining sha512sum for %s" % file_path)
+ m = hashlib.sha512()
+ contents = self.read_from_file(file_path, verbose=False, open_mode="rb")
+ m.update(contents)
+ sha512 = m.hexdigest()
+ self.info(" %s" % sha512)
+ return sha512
+
+ def platform_name(self):
+ """Return the platform name on which the script is running on.
+ Returns:
+ None: for failure to determine the platform.
+ str: The name of the platform (e.g. linux64)
+ """
+ return platform_name()
+
+ # Simple filesystem commands {{{2
+ def mkdir_p(self, path, error_level=ERROR):
+ """Create a directory if it doesn't exists.
+ This method also logs the creation, error or current existence of the
+ directory to be created.
+
+ Args:
+ path (str): path of the directory to be created.
+ error_level (str): log level name to be used in case of error.
+
+ Returns:
+ None: for sucess.
+ int: -1 on error
+ """
+
+ if not os.path.exists(path):
+ self.info("mkdir: %s" % path)
+ try:
+ os.makedirs(path)
+ except OSError:
+ self.log("Can't create directory %s!" % path, level=error_level)
+ return -1
+ else:
+ self.debug("mkdir_p: %s Already exists." % path)
+
+ def rmtree(self, path, log_level=INFO, error_level=ERROR, exit_code=-1):
+ """Delete an entire directory tree and log its result.
+ This method also logs the platform rmtree function, its retries, errors,
+ and current existence of the directory.
+
+ Args:
+ path (str): path to the directory tree root to remove.
+ log_level (str, optional): log level name to for this operation. Defaults
+ to `INFO`.
+ error_level (str, optional): log level name to use in case of error.
+ Defaults to `ERROR`.
+ exit_code (int, optional): useless parameter, not use here.
+ Defaults to -1
+
+ Returns:
+ None: for success
+ """
+
+ self.log("rmtree: %s" % path, level=log_level)
+ error_message = "Unable to remove %s!" % path
+ if self._is_windows():
+ # Call _rmtree_windows() directly, since even checking
+ # os.path.exists(path) will hang if path is longer than MAX_PATH.
+ self.info("Using _rmtree_windows ...")
+ return self.retry(
+ self._rmtree_windows,
+ error_level=error_level,
+ error_message=error_message,
+ args=(path,),
+ log_level=log_level,
+ )
+ if os.path.exists(path):
+ if os.path.isdir(path):
+ return self.retry(
+ shutil.rmtree,
+ error_level=error_level,
+ error_message=error_message,
+ retry_exceptions=(OSError,),
+ args=(path,),
+ log_level=log_level,
+ )
+ else:
+ return self.retry(
+ os.remove,
+ error_level=error_level,
+ error_message=error_message,
+ retry_exceptions=(OSError,),
+ args=(path,),
+ log_level=log_level,
+ )
+ else:
+ self.debug("%s doesn't exist." % path)
+
+ def query_msys_path(self, path):
+ """replaces the Windows harddrive letter path style with a linux
+ path style, e.g. C:// --> /C/
+ Note: method, not used in any script.
+
+ Args:
+ path (str?): path to convert to the linux path style.
+ Returns:
+ str: in case `path` is a string. The result is the path with the new notation.
+ type(path): `path` itself is returned in case `path` is not str type.
+ """
+ if not isinstance(path, six.string_types):
+ return path
+ path = path.replace("\\", "/")
+
+ def repl(m):
+ return "/%s/" % m.group(1)
+
+ path = re.sub(r"""^([a-zA-Z]):/""", repl, path)
+ return path
+
+ def _rmtree_windows(self, path):
+ """Windows-specific rmtree that handles path lengths longer than MAX_PATH.
+ Ported from clobberer.py.
+
+ Args:
+ path (str): directory path to remove.
+
+ Returns:
+ None: if the path doesn't exists.
+ int: the return number of calling `self.run_command`
+ int: in case the path specified is not a directory but a file.
+ 0 on success, non-zero on error. Note: The returned value
+ is the result of calling `win32file.DeleteFile`
+ """
+
+ assert self._is_windows()
+ path = os.path.realpath(path)
+ full_path = "\\\\?\\" + path
+ if not os.path.exists(full_path):
+ return
+ if not PYWIN32:
+ if not os.path.isdir(path):
+ return self.run_command('del /F /Q "%s"' % path)
+ else:
+ return self.run_command('rmdir /S /Q "%s"' % path)
+ # Make sure directory is writable
+ win32file.SetFileAttributesW("\\\\?\\" + path, win32file.FILE_ATTRIBUTE_NORMAL)
+ # Since we call rmtree() with a file, sometimes
+ if not os.path.isdir("\\\\?\\" + path):
+ return win32file.DeleteFile("\\\\?\\" + path)
+
+ for ffrec in win32api.FindFiles("\\\\?\\" + path + "\\*.*"):
+ file_attr = ffrec[0]
+ name = ffrec[8]
+ if name == "." or name == "..":
+ continue
+ full_name = os.path.join(path, name)
+
+ if file_attr & win32file.FILE_ATTRIBUTE_DIRECTORY:
+ self._rmtree_windows(full_name)
+ else:
+ try:
+ win32file.SetFileAttributesW(
+ "\\\\?\\" + full_name, win32file.FILE_ATTRIBUTE_NORMAL
+ )
+ win32file.DeleteFile("\\\\?\\" + full_name)
+ except Exception:
+ # DeleteFile fails on long paths, del /f /q works just fine
+ self.run_command('del /F /Q "%s"' % full_name)
+
+ win32file.RemoveDirectory("\\\\?\\" + path)
+
+ def get_filename_from_url(self, url):
+ """parse a filename base on an url.
+
+ Args:
+ url (str): url to parse for the filename
+
+ Returns:
+ str: filename parsed from the url, or `netloc` network location part
+ of the url.
+ """
+
+ parsed = urlparse.urlsplit(url.rstrip("/"))
+ if parsed.path != "":
+ return parsed.path.rsplit("/", 1)[-1]
+ else:
+ return parsed.netloc
+
+ def _urlopen(self, url, **kwargs):
+ """open the url `url` using `urllib2`.`
+ This method can be overwritten to extend its complexity
+
+ Args:
+ url (str | urllib.request.Request): url to open
+ kwargs: Arbitrary keyword arguments passed to the `urllib.request.urlopen` function.
+
+ Returns:
+ file-like: file-like object with additional methods as defined in
+ `urllib.request.urlopen`_.
+ None: None may be returned if no handler handles the request.
+
+ Raises:
+ urllib2.URLError: on errors
+
+ .. urillib.request.urlopen:
+ https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
+ """
+ # http://bugs.python.org/issue13359 - urllib2 does not automatically quote the URL
+ url_quoted = quote(url, safe="%/:=&?~#+!$,;'@()*[]|")
+ # windows certificates need to be refreshed (https://bugs.python.org/issue36011)
+ if self.platform_name() in ("win64",) and platform.architecture()[0] in (
+ "x64",
+ ):
+ if self.ssl_context is None:
+ self.ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
+ self.ssl_context.load_default_certs()
+ return urlopen(url_quoted, context=self.ssl_context, **kwargs)
+ else:
+ return urlopen(url_quoted, **kwargs)
+
+ def fetch_url_into_memory(self, url):
+ """Downloads a file from a url into memory instead of disk.
+
+ Args:
+ url (str): URL path where the file to be downloaded is located.
+
+ Raises:
+ IOError: When the url points to a file on disk and cannot be found
+ ContentLengthMismatch: When the length of the retrieved content does not match the
+ Content-Length response header.
+ ValueError: When the scheme of a url is not what is expected.
+
+ Returns:
+ BytesIO: contents of url
+ """
+ self.info("Fetch {} into memory".format(url))
+ parsed_url = urlparse.urlparse(url)
+
+ if parsed_url.scheme in ("", "file"):
+ path = parsed_url.path
+ if not os.path.isfile(path):
+ raise IOError("Could not find file to extract: {}".format(url))
+
+ content_length = os.stat(path).st_size
+
+ # In case we're referrencing a file without file://
+ if parsed_url.scheme == "":
+ url = "file://%s" % os.path.abspath(url)
+ parsed_url = urlparse.urlparse(url)
+
+ request = Request(url)
+ # When calling fetch_url_into_memory() you should retry when we raise
+ # one of these exceptions:
+ # * Bug 1300663 - HTTPError: HTTP Error 404: Not Found
+ # * Bug 1300413 - HTTPError: HTTP Error 500: Internal Server Error
+ # * Bug 1300943 - HTTPError: HTTP Error 503: Service Unavailable
+ # * Bug 1300953 - URLError: <urlopen error [Errno -2] Name or service not known>
+ # * Bug 1301594 - URLError: <urlopen error [Errno 10054] An existing connection was ...
+ # * Bug 1301597 - URLError: <urlopen error [Errno 8] _ssl.c:504: EOF occurred in ...
+ # * Bug 1301855 - URLError: <urlopen error [Errno 60] Operation timed out>
+ # * Bug 1302237 - URLError: <urlopen error [Errno 104] Connection reset by peer>
+ # * Bug 1301807 - BadStatusLine: ''
+ #
+ # Bug 1309912 - Adding timeout in hopes to solve blocking on response.read() (bug 1300413)
+ response = urlopen(request, timeout=30)
+
+ if parsed_url.scheme in ("http", "https"):
+ content_length = int(response.headers.get("Content-Length"))
+
+ response_body = response.read()
+ response_body_size = len(response_body)
+
+ self.info("Content-Length response header: {}".format(content_length))
+ self.info("Bytes received: {}".format(response_body_size))
+
+ if response_body_size != content_length:
+ raise ContentLengthMismatch(
+ "The retrieved Content-Length header declares a body length "
+ "of {} bytes, while we actually retrieved {} bytes".format(
+ content_length, response_body_size
+ )
+ )
+
+ if response.info().get("Content-Encoding") == "gzip":
+ self.info('Content-Encoding is "gzip", so decompressing response body')
+ # See http://www.zlib.net/manual.html#Advanced
+ # section "ZEXTERN int ZEXPORT inflateInit2 OF....":
+ # Add 32 to windowBits to enable zlib and gzip decoding with automatic
+ # header detection, or add 16 to decode only the gzip format (the zlib
+ # format will return a Z_DATA_ERROR).
+ # Adding 16 since we only wish to support gzip encoding.
+ file_contents = zlib.decompress(response_body, zlib.MAX_WBITS | 16)
+ else:
+ file_contents = response_body
+
+ # Use BytesIO instead of StringIO
+ # http://stackoverflow.com/questions/34162017/unzip-buffer-with-python/34162395#34162395
+ return BytesIO(file_contents)
+
+ def _download_file(self, url, file_name):
+ """Helper function for download_file()
+ Additionaly this function logs all exceptions as warnings before
+ re-raising them
+
+ Args:
+ url (str): string containing the URL with the file location
+ file_name (str): name of the file where the downloaded file
+ is written.
+
+ Returns:
+ str: filename of the written file on disk
+
+ Raises:
+ urllib2.URLError: on incomplete download.
+ urllib2.HTTPError: on Http error code
+ socket.timeout: on connection timeout
+ socket.error: on socket error
+ """
+ # If our URLs look like files, prefix them with file:// so they can
+ # be loaded like URLs.
+ if not (url.startswith("http") or url.startswith("file://")):
+ if not os.path.isfile(url):
+ self.fatal("The file %s does not exist" % url)
+ url = "file://%s" % os.path.abspath(url)
+
+ try:
+ f_length = None
+ f = self._urlopen(url, timeout=30)
+
+ if f.info().get("content-length") is not None:
+ f_length = int(f.info()["content-length"])
+ got_length = 0
+ if f.info().get("Content-Encoding") == "gzip":
+ # Note, we'll download the full compressed content into its own
+ # file, since that allows the gzip library to seek through it.
+ # Once downloaded, we'll decompress it into the real target
+ # file, and delete the compressed version.
+ local_file = open(file_name + ".gz", "wb")
+ else:
+ local_file = open(file_name, "wb")
+ while True:
+ block = f.read(1024 ** 2)
+ if not block:
+ if f_length is not None and got_length != f_length:
+ raise URLError(
+ "Download incomplete; content-length was %d, "
+ "but only received %d" % (f_length, got_length)
+ )
+ break
+ local_file.write(block)
+ if f_length is not None:
+ got_length += len(block)
+ local_file.close()
+ if f.info().get("Content-Encoding") == "gzip":
+ # Decompress file into target location, then remove compressed version
+ with open(file_name, "wb") as f_out:
+ # On some execution paths, this could be called with python 2.6
+ # whereby gzip.open(...) cannot be used with a 'with' statement.
+ # So let's do this the python 2.6 way...
+ try:
+ f_in = gzip.open(file_name + ".gz", "rb")
+ shutil.copyfileobj(f_in, f_out)
+ finally:
+ f_in.close()
+ os.remove(file_name + ".gz")
+ return file_name
+ except HTTPError as e:
+ self.warning(
+ "Server returned status %s %s for %s" % (str(e.code), str(e), url)
+ )
+ raise
+ except URLError as e:
+ self.warning("URL Error: %s" % url)
+
+ # Failures due to missing local files won't benefit from retry.
+ # Raise the original OSError.
+ if isinstance(e.args[0], OSError) and e.args[0].errno == errno.ENOENT:
+ raise e.args[0]
+
+ raise
+ except socket.timeout as e:
+ self.warning("Timed out accessing %s: %s" % (url, str(e)))
+ raise
+ except socket.error as e:
+ self.warning("Socket error when accessing %s: %s" % (url, str(e)))
+ raise
+
+ def _retry_download(self, url, error_level, file_name=None, retry_config=None):
+ """Helper method to retry download methods.
+
+ This method calls `self.retry` on `self._download_file` using the passed
+ parameters if a file_name is specified. If no file is specified, we will
+ instead call `self._urlopen`, which grabs the contents of a url but does
+ not create a file on disk.
+
+ Args:
+ url (str): URL path where the file is located.
+ file_name (str): file_name where the file will be written to.
+ error_level (str): log level to use in case an error occurs.
+ retry_config (dict, optional): key-value pairs to be passed to
+ `self.retry`. Defaults to `None`
+
+ Returns:
+ str: `self._download_file` return value is returned
+ unknown: `self.retry` `failure_status` is returned on failure, which
+ defaults to -1
+ """
+ retry_args = dict(
+ failure_status=None,
+ retry_exceptions=(
+ HTTPError,
+ URLError,
+ httplib.HTTPException,
+ socket.timeout,
+ socket.error,
+ ),
+ error_message="Can't download from %s to %s!" % (url, file_name),
+ error_level=error_level,
+ )
+
+ if retry_config:
+ retry_args.update(retry_config)
+
+ download_func = self._urlopen
+ kwargs = {"url": url}
+ if file_name:
+ download_func = self._download_file
+ kwargs = {"url": url, "file_name": file_name}
+
+ return self.retry(download_func, kwargs=kwargs, **retry_args)
+
+ def _filter_entries(self, namelist, extract_dirs):
+ """Filter entries of the archive based on the specified list of to extract dirs."""
+ filter_partial = functools.partial(fnmatch.filter, namelist)
+ entries = itertools.chain(*map(filter_partial, extract_dirs or ["*"]))
+
+ for entry in entries:
+ yield entry
+
+ def unzip(self, compressed_file, extract_to, extract_dirs="*", verbose=False):
+ """This method allows to extract a zip file without writing to disk first.
+
+ Args:
+ compressed_file (object): File-like object with the contents of a compressed zip file.
+ extract_to (str): where to extract the compressed file.
+ extract_dirs (list, optional): directories inside the archive file to extract.
+ Defaults to '*'.
+ verbose (bool, optional): whether or not extracted content should be displayed.
+ Defaults to False.
+
+ Raises:
+ zipfile.BadZipfile: on contents of zipfile being invalid
+ """
+ with zipfile.ZipFile(compressed_file) as bundle:
+ entries = self._filter_entries(bundle.namelist(), extract_dirs)
+
+ for entry in entries:
+ if verbose:
+ self.info(" {}".format(entry))
+
+ # Exception to be retried:
+ # Bug 1301645 - BadZipfile: Bad CRC-32 for file ...
+ # http://stackoverflow.com/questions/5624669/strange-badzipfile-bad-crc-32-problem/5626098#5626098
+ # Bug 1301802 - error: Error -3 while decompressing: invalid stored block lengths
+ bundle.extract(entry, path=extract_to)
+
+ # ZipFile doesn't preserve permissions during extraction:
+ # http://bugs.python.org/issue15795
+ fname = os.path.realpath(os.path.join(extract_to, entry))
+ try:
+ # getinfo() can raise KeyError
+ mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
+ # Only set permissions if attributes are available. Otherwise all
+ # permissions will be removed eg. on Windows.
+ if mode:
+ os.chmod(fname, mode)
+
+ except KeyError:
+ self.warning("{} was not found in the zip file".format(entry))
+
+ def deflate(self, compressed_file, mode, extract_to=".", *args, **kwargs):
+ """This method allows to extract a compressed file from a tar, tar.bz2 and tar.gz files.
+
+ Args:
+ compressed_file (object): File-like object with the contents of a compressed file.
+ mode (str): string of the form 'filemode[:compression]' (e.g. 'r:gz' or 'r:bz2')
+ extract_to (str, optional): where to extract the compressed file.
+ """
+ with tarfile.open(fileobj=compressed_file, mode=mode) as t:
+ _safe_extract(t, path=extract_to)
+
+ def download_unpack(self, url, extract_to=".", extract_dirs="*", verbose=False):
+ """Generic method to download and extract a compressed file without writing it
+ to disk first.
+
+ Args:
+ url (str): URL where the file to be downloaded is located.
+ extract_to (str, optional): directory where the downloaded file will
+ be extracted to.
+ extract_dirs (list, optional): directories inside the archive to extract.
+ Defaults to `*`. It currently only applies to zip files.
+ verbose (bool, optional): whether or not extracted content should be displayed.
+ Defaults to False.
+
+ """
+
+ def _determine_extraction_method_and_kwargs(url):
+ EXTENSION_TO_MIMETYPE = {
+ "bz2": "application/x-bzip2",
+ "gz": "application/x-gzip",
+ "tar": "application/x-tar",
+ "zip": "application/zip",
+ }
+ MIMETYPES = {
+ "application/x-bzip2": {
+ "function": self.deflate,
+ "kwargs": {"mode": "r:bz2"},
+ },
+ "application/x-gzip": {
+ "function": self.deflate,
+ "kwargs": {"mode": "r:gz"},
+ },
+ "application/x-tar": {
+ "function": self.deflate,
+ "kwargs": {"mode": "r"},
+ },
+ "application/zip": {
+ "function": self.unzip,
+ },
+ "application/x-zip-compressed": {
+ "function": self.unzip,
+ },
+ }
+
+ filename = url.split("/")[-1]
+ # XXX: bz2/gz instead of tar.{bz2/gz}
+ extension = filename[filename.rfind(".") + 1 :]
+ mimetype = EXTENSION_TO_MIMETYPE[extension]
+ self.debug("Mimetype: {}".format(mimetype))
+
+ function = MIMETYPES[mimetype]["function"]
+ kwargs = {
+ "compressed_file": compressed_file,
+ "extract_to": extract_to,
+ "extract_dirs": extract_dirs,
+ "verbose": verbose,
+ }
+ kwargs.update(MIMETYPES[mimetype].get("kwargs", {}))
+
+ return function, kwargs
+
+ # Many scripts overwrite this method and set extract_dirs to None
+ extract_dirs = "*" if extract_dirs is None else extract_dirs
+ self.info(
+ "Downloading and extracting to {} these dirs {} from {}".format(
+ extract_to,
+ ", ".join(extract_dirs),
+ url,
+ )
+ )
+
+ # 1) Let's fetch the file
+ retry_args = dict(
+ retry_exceptions=(
+ HTTPError,
+ URLError,
+ httplib.HTTPException,
+ socket.timeout,
+ socket.error,
+ ContentLengthMismatch,
+ ),
+ sleeptime=30,
+ attempts=5,
+ error_message="Can't download from {}".format(url),
+ error_level=FATAL,
+ )
+ compressed_file = self.retry(
+ self.fetch_url_into_memory, kwargs={"url": url}, **retry_args
+ )
+
+ # 2) We're guaranteed to have download the file with error_level=FATAL
+ # Let's unpack the file
+ function, kwargs = _determine_extraction_method_and_kwargs(url)
+ try:
+ function(**kwargs)
+ except zipfile.BadZipfile:
+ # Dump the exception and exit
+ self.exception(level=FATAL)
+
+ def load_json_url(self, url, error_level=None, *args, **kwargs):
+ """Returns a json object from a url (it retries)."""
+ contents = self._retry_download(
+ url=url, error_level=error_level, *args, **kwargs
+ )
+ return json.loads(contents.read())
+
+ # http://www.techniqal.com/blog/2008/07/31/python-file-read-write-with-urllib2/
+ # TODO thinking about creating a transfer object.
+ def download_file(
+ self,
+ url,
+ file_name=None,
+ parent_dir=None,
+ create_parent_dir=True,
+ error_level=ERROR,
+ exit_code=3,
+ retry_config=None,
+ ):
+ """Python wget.
+ Download the filename at `url` into `file_name` and put it on `parent_dir`.
+ On error log with the specified `error_level`, on fatal exit with `exit_code`.
+ Execute all the above based on `retry_config` parameter.
+
+ Args:
+ url (str): URL path where the file to be downloaded is located.
+ file_name (str, optional): file_name where the file will be written to.
+ Defaults to urls' filename.
+ parent_dir (str, optional): directory where the downloaded file will
+ be written to. Defaults to current working
+ directory
+ create_parent_dir (bool, optional): create the parent directory if it
+ doesn't exist. Defaults to `True`
+ error_level (str, optional): log level to use in case an error occurs.
+ Defaults to `ERROR`
+ retry_config (dict, optional): key-value pairs to be passed to
+ `self.retry`. Defaults to `None`
+
+ Returns:
+ str: filename where the downloaded file was written to.
+ unknown: on failure, `failure_status` is returned.
+ """
+ if not file_name:
+ try:
+ file_name = self.get_filename_from_url(url)
+ except AttributeError:
+ self.log(
+ "Unable to get filename from %s; bad url?" % url,
+ level=error_level,
+ exit_code=exit_code,
+ )
+ return
+ if parent_dir:
+ file_name = os.path.join(parent_dir, file_name)
+ if create_parent_dir:
+ self.mkdir_p(parent_dir, error_level=error_level)
+ self.info("Downloading %s to %s" % (url, file_name))
+ status = self._retry_download(
+ url=url,
+ error_level=error_level,
+ file_name=file_name,
+ retry_config=retry_config,
+ )
+ if status == file_name:
+ self.info("Downloaded %d bytes." % os.path.getsize(file_name))
+ return status
+
+ def move(self, src, dest, log_level=INFO, error_level=ERROR, exit_code=-1):
+ """recursively move a file or directory (src) to another location (dest).
+
+ Args:
+ src (str): file or directory path to move.
+ dest (str): file or directory path where to move the content to.
+ log_level (str): log level to use for normal operation. Defaults to
+ `INFO`
+ error_level (str): log level to use on error. Defaults to `ERROR`
+
+ Returns:
+ int: 0 on success. -1 on error.
+ """
+ self.log("Moving %s to %s" % (src, dest), level=log_level)
+ try:
+ shutil.move(src, dest)
+ # http://docs.python.org/tutorial/errors.html
+ except IOError as e:
+ self.log("IO error: %s" % str(e), level=error_level, exit_code=exit_code)
+ return -1
+ except shutil.Error as e:
+ # ERROR level ends up reporting the failure to treeherder &
+ # pollutes the failure summary list.
+ self.log("shutil error: %s" % str(e), level=WARNING, exit_code=exit_code)
+ return -1
+ return 0
+
+ def chmod(self, path, mode):
+ """change `path` mode to `mode`.
+
+ Args:
+ path (str): path whose mode will be modified.
+ mode (hex): one of the values defined at `stat`_
+
+ .. _stat:
+ https://docs.python.org/2/library/os.html#os.chmod
+ """
+
+ self.info("Chmoding %s to %s" % (path, str(oct(mode))))
+ os.chmod(path, mode)
+
+ def copyfile(
+ self,
+ src,
+ dest,
+ log_level=INFO,
+ error_level=ERROR,
+ copystat=False,
+ compress=False,
+ ):
+ """copy or compress `src` into `dest`.
+
+ Args:
+ src (str): filepath to copy.
+ dest (str): filepath where to move the content to.
+ log_level (str, optional): log level to use for normal operation. Defaults to
+ `INFO`
+ error_level (str, optional): log level to use on error. Defaults to `ERROR`
+ copystat (bool, optional): whether or not to copy the files metadata.
+ Defaults to `False`.
+ compress (bool, optional): whether or not to compress the destination file.
+ Defaults to `False`.
+
+ Returns:
+ int: -1 on error
+ None: on success
+ """
+
+ if compress:
+ self.log("Compressing %s to %s" % (src, dest), level=log_level)
+ try:
+ infile = open(src, "rb")
+ outfile = gzip.open(dest, "wb")
+ outfile.writelines(infile)
+ outfile.close()
+ infile.close()
+ except IOError as e:
+ self.log(
+ "Can't compress %s to %s: %s!" % (src, dest, str(e)),
+ level=error_level,
+ )
+ return -1
+ else:
+ self.log("Copying %s to %s" % (src, dest), level=log_level)
+ try:
+ shutil.copyfile(src, dest)
+ except (IOError, shutil.Error) as e:
+ self.log(
+ "Can't copy %s to %s: %s!" % (src, dest, str(e)), level=error_level
+ )
+ return -1
+
+ if copystat:
+ try:
+ shutil.copystat(src, dest)
+ except (IOError, shutil.Error) as e:
+ self.log(
+ "Can't copy attributes of %s to %s: %s!" % (src, dest, str(e)),
+ level=error_level,
+ )
+ return -1
+
+ def copytree(
+ self, src, dest, overwrite="no_overwrite", log_level=INFO, error_level=ERROR
+ ):
+ """An implementation of `shutil.copytree` that allows for `dest` to exist
+ and implements different overwrite levels:
+ - 'no_overwrite' will keep all(any) existing files in destination tree
+ - 'overwrite_if_exists' will only overwrite destination paths that have
+ the same path names relative to the root of the
+ src and destination tree
+ - 'clobber' will replace the whole destination tree(clobber) if it exists
+
+ Args:
+ src (str): directory path to move.
+ dest (str): directory path where to move the content to.
+ overwrite (str): string specifying the overwrite level.
+ log_level (str, optional): log level to use for normal operation. Defaults to
+ `INFO`
+ error_level (str, optional): log level to use on error. Defaults to `ERROR`
+
+ Returns:
+ int: -1 on error
+ None: on success
+ """
+
+ self.info("copying tree: %s to %s" % (src, dest))
+ try:
+ if overwrite == "clobber" or not os.path.exists(dest):
+ self.rmtree(dest)
+ shutil.copytree(src, dest)
+ elif overwrite == "no_overwrite" or overwrite == "overwrite_if_exists":
+ files = os.listdir(src)
+ for f in files:
+ abs_src_f = os.path.join(src, f)
+ abs_dest_f = os.path.join(dest, f)
+ if not os.path.exists(abs_dest_f):
+ if os.path.isdir(abs_src_f):
+ self.mkdir_p(abs_dest_f)
+ self.copytree(abs_src_f, abs_dest_f, overwrite="clobber")
+ else:
+ shutil.copy2(abs_src_f, abs_dest_f)
+ elif overwrite == "no_overwrite": # destination path exists
+ if os.path.isdir(abs_src_f) and os.path.isdir(abs_dest_f):
+ self.copytree(
+ abs_src_f, abs_dest_f, overwrite="no_overwrite"
+ )
+ else:
+ self.debug(
+ "ignoring path: %s as destination: \
+ %s exists"
+ % (abs_src_f, abs_dest_f)
+ )
+ else: # overwrite == 'overwrite_if_exists' and destination exists
+ self.debug("overwriting: %s with: %s" % (abs_dest_f, abs_src_f))
+ self.rmtree(abs_dest_f)
+
+ if os.path.isdir(abs_src_f):
+ self.mkdir_p(abs_dest_f)
+ self.copytree(
+ abs_src_f, abs_dest_f, overwrite="overwrite_if_exists"
+ )
+ else:
+ shutil.copy2(abs_src_f, abs_dest_f)
+ else:
+ self.fatal(
+ "%s is not a valid argument for param overwrite" % (overwrite)
+ )
+ except (IOError, shutil.Error):
+ self.exception(
+ "There was an error while copying %s to %s!" % (src, dest),
+ level=error_level,
+ )
+ return -1
+
+ def write_to_file(
+ self,
+ file_path,
+ contents,
+ verbose=True,
+ open_mode="w",
+ create_parent_dir=False,
+ error_level=ERROR,
+ ):
+ """Write `contents` to `file_path`, according to `open_mode`.
+
+ Args:
+ file_path (str): filepath where the content will be written to.
+ contents (str): content to write to the filepath.
+ verbose (bool, optional): whether or not to log `contents` value.
+ Defaults to `True`
+ open_mode (str, optional): open mode to use for openning the file.
+ Defaults to `w`
+ create_parent_dir (bool, optional): whether or not to create the
+ parent directory of `file_path`
+ error_level (str, optional): log level to use on error. Defaults to `ERROR`
+
+ Returns:
+ str: `file_path` on success
+ None: on error.
+ """
+ self.info("Writing to file %s" % file_path)
+ if verbose:
+ self.info("Contents:")
+ for line in contents.splitlines():
+ self.info(" %s" % line)
+ if create_parent_dir:
+ parent_dir = os.path.dirname(file_path)
+ self.mkdir_p(parent_dir, error_level=error_level)
+ try:
+ fh = open(file_path, open_mode)
+ try:
+ fh.write(contents)
+ except UnicodeEncodeError:
+ fh.write(contents.encode("utf-8", "replace"))
+ fh.close()
+ return file_path
+ except IOError:
+ self.log("%s can't be opened for writing!" % file_path, level=error_level)
+
+ @contextmanager
+ def opened(self, file_path, verbose=True, open_mode="r", error_level=ERROR):
+ """Create a context manager to use on a with statement.
+
+ Args:
+ file_path (str): filepath of the file to open.
+ verbose (bool, optional): useless parameter, not used here.
+ Defaults to True.
+ open_mode (str, optional): open mode to use for openning the file.
+ Defaults to `r`
+ error_level (str, optional): log level name to use on error.
+ Defaults to `ERROR`
+
+ Yields:
+ tuple: (file object, error) pair. In case of error `None` is yielded
+ as file object, together with the corresponding error.
+ If there is no error, `None` is returned as the error.
+ """
+ # See opened_w_error in http://www.python.org/dev/peps/pep-0343/
+ self.info("Reading from file %s" % file_path)
+ try:
+ fh = open(file_path, open_mode)
+ except IOError as err:
+ self.log(
+ "unable to open %s: %s" % (file_path, err.strerror), level=error_level
+ )
+ yield None, err
+ else:
+ try:
+ yield fh, None
+ finally:
+ fh.close()
+
+ def read_from_file(self, file_path, verbose=True, open_mode="r", error_level=ERROR):
+ """Use `self.opened` context manager to open a file and read its
+ content.
+
+ Args:
+ file_path (str): filepath of the file to read.
+ verbose (bool, optional): whether or not to log the file content.
+ Defaults to True.
+ open_mode (str, optional): open mode to use for openning the file.
+ Defaults to `r`
+ error_level (str, optional): log level name to use on error.
+ Defaults to `ERROR`
+
+ Returns:
+ None: on error.
+ str: file content on success.
+ """
+ with self.opened(file_path, verbose, open_mode, error_level) as (fh, err):
+ if err:
+ return None
+ contents = fh.read()
+ if verbose:
+ self.info("Contents:")
+ for line in contents.splitlines():
+ self.info(" %s" % line)
+ return contents
+
+ def chdir(self, dir_name):
+ self.log("Changing directory to %s." % dir_name)
+ os.chdir(dir_name)
+
+ def is_exe(self, fpath):
+ """
+ Determine if fpath is a file and if it is executable.
+ """
+ return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+
+ def which(self, program):
+ """OS independent implementation of Unix's which command
+
+ Args:
+ program (str): name or path to the program whose executable is
+ being searched.
+
+ Returns:
+ None: if the executable was not found.
+ str: filepath of the executable file.
+ """
+ if self._is_windows() and not program.endswith(".exe"):
+ program += ".exe"
+ fpath, fname = os.path.split(program)
+ if fpath:
+ if self.is_exe(program):
+ return program
+ else:
+ # If the exe file is defined in the configs let's use that
+ exe = self.query_exe(program)
+ if self.is_exe(exe):
+ return exe
+
+ # If not defined, let's look for it in the $PATH
+ env = self.query_env()
+ for path in env["PATH"].split(os.pathsep):
+ exe_file = os.path.join(path, program)
+ if self.is_exe(exe_file):
+ return exe_file
+ return None
+
+ # More complex commands {{{2
+ def retry(
+ self,
+ action,
+ attempts=None,
+ sleeptime=60,
+ max_sleeptime=5 * 60,
+ retry_exceptions=(Exception,),
+ good_statuses=None,
+ cleanup=None,
+ error_level=ERROR,
+ error_message="%(action)s failed after %(attempts)d tries!",
+ failure_status=-1,
+ log_level=INFO,
+ args=(),
+ kwargs={},
+ ):
+ """generic retry command. Ported from `util.retry`_
+
+ Args:
+ action (func): callable object to retry.
+ attempts (int, optinal): maximum number of times to call actions.
+ Defaults to `self.config.get('global_retries', 5)`
+ sleeptime (int, optional): number of seconds to wait between
+ attempts. Defaults to 60 and doubles each retry attempt, to
+ a maximum of `max_sleeptime'
+ max_sleeptime (int, optional): maximum value of sleeptime. Defaults
+ to 5 minutes
+ retry_exceptions (tuple, optional): Exceptions that should be caught.
+ If exceptions other than those listed in `retry_exceptions' are
+ raised from `action', they will be raised immediately. Defaults
+ to (Exception)
+ good_statuses (object, optional): return values which, if specified,
+ will result in retrying if the return value isn't listed.
+ Defaults to `None`.
+ cleanup (func, optional): If `cleanup' is provided and callable
+ it will be called immediately after an Exception is caught.
+ No arguments will be passed to it. If your cleanup function
+ requires arguments it is recommended that you wrap it in an
+ argumentless function.
+ Defaults to `None`.
+ error_level (str, optional): log level name in case of error.
+ Defaults to `ERROR`.
+ error_message (str, optional): string format to use in case
+ none of the attempts success. Defaults to
+ '%(action)s failed after %(attempts)d tries!'
+ failure_status (int, optional): flag to return in case the retries
+ were not successfull. Defaults to -1.
+ log_level (str, optional): log level name to use for normal activity.
+ Defaults to `INFO`.
+ args (tuple, optional): positional arguments to pass onto `action`.
+ kwargs (dict, optional): key-value arguments to pass onto `action`.
+
+ Returns:
+ object: return value of `action`.
+ int: failure status in case of failure retries.
+ """
+ if not callable(action):
+ self.fatal("retry() called with an uncallable method %s!" % action)
+ if cleanup and not callable(cleanup):
+ self.fatal("retry() called with an uncallable cleanup method %s!" % cleanup)
+ if not attempts:
+ attempts = self.config.get("global_retries", 5)
+ if max_sleeptime < sleeptime:
+ self.debug(
+ "max_sleeptime %d less than sleeptime %d" % (max_sleeptime, sleeptime)
+ )
+ n = 0
+ while n <= attempts:
+ retry = False
+ n += 1
+ try:
+ self.log(
+ "retry: Calling %s with args: %s, kwargs: %s, attempt #%d"
+ % (action.__name__, str(args), str(kwargs), n),
+ level=log_level,
+ )
+ status = action(*args, **kwargs)
+ if good_statuses and status not in good_statuses:
+ retry = True
+ except retry_exceptions as e:
+ retry = True
+ error_message = "%s\nCaught exception: %s" % (error_message, str(e))
+ self.log(
+ "retry: attempt #%d caught %s exception: %s"
+ % (n, type(e).__name__, str(e)),
+ level=INFO,
+ )
+
+ if not retry:
+ return status
+ else:
+ if cleanup:
+ cleanup()
+ if n == attempts:
+ self.log(
+ error_message % {"action": action, "attempts": n},
+ level=error_level,
+ )
+ return failure_status
+ if sleeptime > 0:
+ self.log(
+ "retry: Failed, sleeping %d seconds before retrying"
+ % sleeptime,
+ level=log_level,
+ )
+ time.sleep(sleeptime)
+ sleeptime = sleeptime * 2
+ if sleeptime > max_sleeptime:
+ sleeptime = max_sleeptime
+
+ def query_env(
+ self,
+ partial_env=None,
+ replace_dict=None,
+ purge_env=(),
+ set_self_env=None,
+ log_level=DEBUG,
+ avoid_host_env=False,
+ ):
+ """Environment query/generation method.
+ The default, self.query_env(), will look for self.config['env']
+ and replace any special strings in there ( %(PATH)s ).
+ It will then store it as self.env for speeding things up later.
+
+ If you specify partial_env, partial_env will be used instead of
+ self.config['env'], and we don't save self.env as it's a one-off.
+
+
+ Args:
+ partial_env (dict, optional): key-value pairs of the name and value
+ of different environment variables. Defaults to an empty dictionary.
+ replace_dict (dict, optional): key-value pairs to replace the old
+ environment variables.
+ purge_env (list): environment names to delete from the final
+ environment dictionary.
+ set_self_env (boolean, optional): whether or not the environment
+ variables dictionary should be copied to `self`.
+ Defaults to True.
+ log_level (str, optional): log level name to use on normal operation.
+ Defaults to `DEBUG`.
+ avoid_host_env (boolean, optional): if set to True, we will not use
+ any environment variables set on the host except PATH.
+ Defaults to False.
+
+ Returns:
+ dict: environment variables names with their values.
+ """
+ if partial_env is None:
+ if self.env is not None:
+ return self.env
+ partial_env = self.config.get("env", None)
+ if partial_env is None:
+ partial_env = {}
+ if set_self_env is None:
+ set_self_env = True
+
+ env = {"PATH": os.environ["PATH"]} if avoid_host_env else os.environ.copy()
+
+ default_replace_dict = self.query_abs_dirs()
+ default_replace_dict["PATH"] = os.environ["PATH"]
+ if not replace_dict:
+ replace_dict = default_replace_dict
+ else:
+ for key in default_replace_dict:
+ if key not in replace_dict:
+ replace_dict[key] = default_replace_dict[key]
+ for key in partial_env.keys():
+ env[key] = partial_env[key] % replace_dict
+ self.log("ENV: %s is now %s" % (key, env[key]), level=log_level)
+ for k in purge_env:
+ if k in env:
+ del env[k]
+ if os.name == "nt":
+ pref_encoding = locale.getpreferredencoding()
+ for k, v in six.iteritems(env):
+ # When run locally on Windows machines, some environment
+ # variables may be unicode.
+ env[k] = six.ensure_str(v, pref_encoding)
+ if set_self_env:
+ self.env = env
+ return env
+
+ def query_exe(
+ self,
+ exe_name,
+ exe_dict="exes",
+ default=None,
+ return_type=None,
+ error_level=FATAL,
+ ):
+ """One way to work around PATH rewrites.
+
+ By default, return exe_name, and we'll fall through to searching
+ os.environ["PATH"].
+ However, if self.config[exe_dict][exe_name] exists, return that.
+ This lets us override exe paths via config file.
+
+ If we need runtime setting, we can build in self.exes support later.
+
+ Args:
+ exe_name (str): name of the executable to search for.
+ exe_dict(str, optional): name of the dictionary of executables
+ present in `self.config`. Defaults to `exes`.
+ default (str, optional): default name of the executable to search
+ for. Defaults to `exe_name`.
+ return_type (str, optional): type to which the original return
+ value will be turn into. Only 'list', 'string' and `None` are
+ supported. Defaults to `None`.
+ error_level (str, optional): log level name to use on error.
+
+ Returns:
+ list: in case return_type is 'list'
+ str: in case return_type is 'string'
+ None: in case return_type is `None`
+ Any: if the found executable is not of type list, tuple nor str.
+ """
+ if default is None:
+ default = exe_name
+ exe = self.config.get(exe_dict, {}).get(exe_name, default)
+ repl_dict = {}
+ if hasattr(self.script_obj, "query_abs_dirs"):
+ # allow for 'make': '%(abs_work_dir)s/...' etc.
+ dirs = self.script_obj.query_abs_dirs()
+ repl_dict.update(dirs)
+ if isinstance(exe, dict):
+ found = False
+ # allow for searchable paths of the exe
+ for name, path in six.iteritems(exe):
+ if isinstance(path, list) or isinstance(path, tuple):
+ path = [x % repl_dict for x in path]
+ if all([os.path.exists(section) for section in path]):
+ found = True
+ elif isinstance(path, str):
+ path = path % repl_dict
+ if os.path.exists(path):
+ found = True
+ else:
+ self.log(
+ "a exes %s dict's value is not a string, list, or tuple. Got key "
+ "%s and value %s" % (exe_name, name, str(path)),
+ level=error_level,
+ )
+ if found:
+ exe = path
+ break
+ else:
+ self.log(
+ "query_exe was a searchable dict but an existing "
+ "path could not be determined. Tried searching in "
+ "paths: %s" % (str(exe)),
+ level=error_level,
+ )
+ return None
+ elif isinstance(exe, list) or isinstance(exe, tuple):
+ exe = [x % repl_dict for x in exe]
+ elif isinstance(exe, str):
+ exe = exe % repl_dict
+ else:
+ self.log(
+ "query_exe: %s is not a list, tuple, dict, or string: "
+ "%s!" % (exe_name, str(exe)),
+ level=error_level,
+ )
+ return exe
+ if return_type == "list":
+ if isinstance(exe, str):
+ exe = [exe]
+ elif return_type == "string":
+ if isinstance(exe, list):
+ exe = subprocess.list2cmdline(exe)
+ elif return_type is not None:
+ self.log(
+ "Unknown return_type type %s requested in query_exe!" % return_type,
+ level=error_level,
+ )
+ return exe
+
+ def run_command(
+ self,
+ command,
+ cwd=None,
+ error_list=None,
+ halt_on_failure=False,
+ success_codes=None,
+ env=None,
+ partial_env=None,
+ return_type="status",
+ throw_exception=False,
+ output_parser=None,
+ output_timeout=None,
+ fatal_exit_code=2,
+ error_level=ERROR,
+ **kwargs
+ ):
+ """Run a command, with logging and error parsing.
+ TODO: context_lines
+
+ error_list example:
+ [{'regex': re.compile('^Error: LOL J/K'), level=IGNORE},
+ {'regex': re.compile('^Error:'), level=ERROR, contextLines='5:5'},
+ {'substr': 'THE WORLD IS ENDING', level=FATAL, contextLines='20:'}
+ ]
+ (context_lines isn't written yet)
+
+ Args:
+ command (str | list | tuple): command or sequence of commands to
+ execute and log.
+ cwd (str, optional): directory path from where to execute the
+ command. Defaults to `None`.
+ error_list (list, optional): list of errors to pass to
+ `mozharness.base.log.OutputParser`. Defaults to `None`.
+ halt_on_failure (bool, optional): whether or not to redefine the
+ log level as `FATAL` on errors. Defaults to False.
+ success_codes (int, optional): numeric value to compare against
+ the command return value.
+ env (dict, optional): key-value of environment values to use to
+ run the command. Defaults to None.
+ partial_env (dict, optional): key-value of environment values to
+ replace from the current environment values. Defaults to None.
+ return_type (str, optional): if equal to 'num_errors' then the
+ amount of errors matched by `error_list` is returned. Defaults
+ to 'status'.
+ throw_exception (bool, optional): whether or not to raise an
+ exception if the return value of the command doesn't match
+ any of the `success_codes`. Defaults to False.
+ output_parser (OutputParser, optional): lets you provide an
+ instance of your own OutputParser subclass. Defaults to `OutputParser`.
+ output_timeout (int): amount of seconds to wait for output before
+ the process is killed.
+ fatal_exit_code (int, optional): call `self.fatal` if the return value
+ of the command is not in `success_codes`. Defaults to 2.
+ error_level (str, optional): log level name to use on error. Defaults
+ to `ERROR`.
+ **kwargs: Arbitrary keyword arguments.
+
+ Returns:
+ int: -1 on error.
+ Any: `command` return value is returned otherwise.
+ """
+ if success_codes is None:
+ success_codes = [0]
+ if cwd is not None:
+ if not os.path.isdir(cwd):
+ level = error_level
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Can't run command %s in non-existent directory '%s'!"
+ % (command, cwd),
+ level=level,
+ )
+ return -1
+ self.info("Running command: %s in %s" % (command, cwd))
+ else:
+ self.info("Running command: %s" % (command,))
+ if isinstance(command, list) or isinstance(command, tuple):
+ self.info("Copy/paste: %s" % subprocess.list2cmdline(command))
+ shell = True
+ if isinstance(command, list) or isinstance(command, tuple):
+ shell = False
+ if env is None:
+ if partial_env:
+ self.info("Using partial env: %s" % pprint.pformat(partial_env))
+ env = self.query_env(partial_env=partial_env)
+ else:
+ if hasattr(self, "previous_env") and env == self.previous_env:
+ self.info("Using env: (same as previous command)")
+ else:
+ self.info("Using env: %s" % pprint.pformat(env))
+ self.previous_env = env
+
+ if output_parser is None:
+ parser = OutputParser(
+ config=self.config, log_obj=self.log_obj, error_list=error_list
+ )
+ else:
+ parser = output_parser
+
+ try:
+ if output_timeout:
+
+ def processOutput(line):
+ parser.add_lines(line)
+
+ def onTimeout():
+ self.info(
+ "Automation Error: mozprocess timed out after "
+ "%s seconds running %s" % (str(output_timeout), str(command))
+ )
+
+ p = ProcessHandler(
+ command,
+ shell=shell,
+ env=env,
+ cwd=cwd,
+ storeOutput=False,
+ onTimeout=(onTimeout,),
+ processOutputLine=[processOutput],
+ )
+ self.info(
+ "Calling %s with output_timeout %d" % (command, output_timeout)
+ )
+ p.run(outputTimeout=output_timeout)
+ p.wait()
+ if p.timedOut:
+ self.log(
+ "timed out after %s seconds of no output" % output_timeout,
+ level=error_level,
+ )
+ returncode = int(p.proc.returncode)
+ else:
+ p = subprocess.Popen(
+ command,
+ shell=shell,
+ stdout=subprocess.PIPE,
+ cwd=cwd,
+ stderr=subprocess.STDOUT,
+ env=env,
+ bufsize=0,
+ )
+ loop = True
+ while loop:
+ if p.poll() is not None:
+ """Avoid losing the final lines of the log?"""
+ loop = False
+ while True:
+ line = p.stdout.readline()
+ if not line:
+ break
+ parser.add_lines(line)
+ returncode = p.returncode
+ except KeyboardInterrupt:
+ level = error_level
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Process interrupted by the user, killing process with pid %s" % p.pid,
+ level=level,
+ )
+ p.kill()
+ return -1
+ except OSError as e:
+ level = error_level
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "caught OS error %s: %s while running %s"
+ % (e.errno, e.strerror, command),
+ level=level,
+ )
+ return -1
+
+ if returncode not in success_codes:
+ if throw_exception:
+ raise subprocess.CalledProcessError(returncode, command)
+ # Force level to be INFO as message is not necessary in Treeherder
+ self.log("Return code: %d" % returncode, level=INFO)
+
+ if halt_on_failure:
+ _fail = False
+ if returncode not in success_codes:
+ self.log(
+ "%s not in success codes: %s" % (returncode, success_codes),
+ level=error_level,
+ )
+ _fail = True
+ if parser.num_errors:
+ self.log("failures found while parsing output", level=error_level)
+ _fail = True
+ if _fail:
+ self.return_code = fatal_exit_code
+ self.fatal(
+ "Halting on failure while running %s" % (command,),
+ exit_code=fatal_exit_code,
+ )
+ if return_type == "num_errors":
+ return parser.num_errors
+ return returncode
+
+ def get_output_from_command(
+ self,
+ command,
+ cwd=None,
+ halt_on_failure=False,
+ env=None,
+ silent=False,
+ log_level=INFO,
+ tmpfile_base_path="tmpfile",
+ return_type="output",
+ save_tmpfiles=False,
+ throw_exception=False,
+ fatal_exit_code=2,
+ ignore_errors=False,
+ success_codes=None,
+ output_filter=None,
+ ):
+ """Similar to run_command, but where run_command is an
+ os.system(command) analog, get_output_from_command is a `command`
+ analog.
+
+ Less error checking by design, though if we figure out how to
+ do it without borking the output, great.
+
+ TODO: binary mode? silent is kinda like that.
+ TODO: since p.wait() can take a long time, optionally log something
+ every N seconds?
+ TODO: optionally only keep the first or last (N) line(s) of output?
+ TODO: optionally only return the tmp_stdout_filename?
+
+ ignore_errors=True is for the case where a command might produce standard
+ error output, but you don't particularly care; setting to True will
+ cause standard error to be logged at DEBUG rather than ERROR
+
+ Args:
+ command (str | list): command or list of commands to
+ execute and log.
+ cwd (str, optional): directory path from where to execute the
+ command. Defaults to `None`.
+ halt_on_failure (bool, optional): whether or not to redefine the
+ log level as `FATAL` on error. Defaults to False.
+ env (dict, optional): key-value of environment values to use to
+ run the command. Defaults to None.
+ silent (bool, optional): whether or not to output the stdout of
+ executing the command. Defaults to False.
+ log_level (str, optional): log level name to use on normal execution.
+ Defaults to `INFO`.
+ tmpfile_base_path (str, optional): base path of the file to which
+ the output will be writen to. Defaults to 'tmpfile'.
+ return_type (str, optional): if equal to 'output' then the complete
+ output of the executed command is returned, otherwise the written
+ filenames are returned. Defaults to 'output'.
+ save_tmpfiles (bool, optional): whether or not to save the temporary
+ files created from the command output. Defaults to False.
+ throw_exception (bool, optional): whether or not to raise an
+ exception if the return value of the command is not zero.
+ Defaults to False.
+ fatal_exit_code (int, optional): call self.fatal if the return value
+ of the command match this value.
+ ignore_errors (bool, optional): whether or not to change the log
+ level to `ERROR` for the output of stderr. Defaults to False.
+ success_codes (int, optional): numeric value to compare against
+ the command return value.
+ output_filter (func, optional): provide a function to filter output
+ so that noise is reduced and lines are sanitized. default: None
+
+ Returns:
+ None: if the cwd is not a directory.
+ None: on IOError.
+ tuple: stdout and stderr filenames.
+ str: stdout output.
+ """
+ if cwd:
+ if not os.path.isdir(cwd):
+ level = ERROR
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Can't run command %s in non-existent directory %s!"
+ % (command, cwd),
+ level=level,
+ )
+ return None
+ self.info("Getting output from command: %s in %s" % (command, cwd))
+ else:
+ self.info("Getting output from command: %s" % command)
+ if isinstance(command, list):
+ self.info("Copy/paste: %s" % subprocess.list2cmdline(command))
+ # This could potentially return something?
+ tmp_stdout = None
+ tmp_stderr = None
+ tmp_stdout_filename = "%s_stdout" % tmpfile_base_path
+ tmp_stderr_filename = "%s_stderr" % tmpfile_base_path
+ if success_codes is None:
+ success_codes = [0]
+
+ # TODO probably some more elegant solution than 2 similar passes
+ try:
+ tmp_stdout = open(tmp_stdout_filename, "w")
+ except IOError:
+ level = ERROR
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Can't open %s for writing!" % tmp_stdout_filename + self.exception(),
+ level=level,
+ )
+ return None
+ try:
+ tmp_stderr = open(tmp_stderr_filename, "w")
+ except IOError:
+ level = ERROR
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Can't open %s for writing!" % tmp_stderr_filename + self.exception(),
+ level=level,
+ )
+ return None
+ shell = True
+ if isinstance(command, list):
+ shell = False
+
+ p = subprocess.Popen(
+ command,
+ shell=shell,
+ stdout=tmp_stdout,
+ cwd=cwd,
+ stderr=tmp_stderr,
+ env=env,
+ bufsize=0,
+ )
+ # XXX: changed from self.debug to self.log due to this error:
+ # TypeError: debug() takes exactly 1 argument (2 given)
+ self.log(
+ "Temporary files: %s and %s" % (tmp_stdout_filename, tmp_stderr_filename),
+ level=DEBUG,
+ )
+ p.wait()
+ tmp_stdout.close()
+ tmp_stderr.close()
+ return_level = DEBUG
+ output = None
+ if return_type == "output" or not silent:
+ if os.path.exists(tmp_stdout_filename) and os.path.getsize(
+ tmp_stdout_filename
+ ):
+ output = self.read_from_file(tmp_stdout_filename, verbose=False)
+ if output_filter:
+ output = output_filter(output)
+ if not silent:
+ self.log("Output received:", level=log_level)
+ output_lines = output.rstrip().splitlines()
+ for line in output_lines:
+ if not line or line.isspace():
+ continue
+ if isinstance(line, binary_type):
+ line = line.decode("utf-8")
+ self.log(" %s" % line, level=log_level)
+ output = "\n".join(output_lines)
+ if os.path.exists(tmp_stderr_filename) and os.path.getsize(tmp_stderr_filename):
+ errors = self.read_from_file(tmp_stderr_filename, verbose=False)
+ if output_filter:
+ errors = output_filter(errors)
+ if errors:
+ if not ignore_errors:
+ return_level = ERROR
+ self.log("Errors received:", level=return_level)
+ for line in errors.rstrip().splitlines():
+ if not line or line.isspace():
+ continue
+ if isinstance(line, binary_type):
+ line = line.decode("utf-8")
+ self.log(" %s" % line, level=return_level)
+ elif p.returncode not in success_codes and not ignore_errors:
+ return_level = ERROR
+ # Clean up.
+ if not save_tmpfiles:
+ self.rmtree(tmp_stderr_filename, log_level=DEBUG)
+ self.rmtree(tmp_stdout_filename, log_level=DEBUG)
+ if p.returncode and throw_exception:
+ raise subprocess.CalledProcessError(p.returncode, command)
+ # Force level to be INFO as message is not necessary in Treeherder
+ self.log("Return code: %d" % p.returncode, level=INFO)
+ if halt_on_failure and return_level == ERROR:
+ self.return_code = fatal_exit_code
+ self.fatal(
+ "Halting on failure while running %s" % command,
+ exit_code=fatal_exit_code,
+ )
+ # Hm, options on how to return this? I bet often we'll want
+ # output_lines[0] with no newline.
+ if return_type != "output":
+ return (tmp_stdout_filename, tmp_stderr_filename)
+ else:
+ return output
+
+ def _touch_file(self, file_name, times=None, error_level=FATAL):
+ """touch a file.
+
+ Args:
+ file_name (str): name of the file to touch.
+ times (tuple, optional): 2-tuple as specified by `os.utime`_
+ Defaults to None.
+ error_level (str, optional): log level name in case of error.
+ Defaults to `FATAL`.
+
+ .. _`os.utime`:
+ https://docs.python.org/3.4/library/os.html?highlight=os.utime#os.utime
+ """
+ self.info("Touching: %s" % file_name)
+ try:
+ os.utime(file_name, times)
+ except OSError:
+ try:
+ open(file_name, "w").close()
+ except IOError as e:
+ msg = "I/O error(%s): %s" % (e.errno, e.strerror)
+ self.log(msg, error_level=error_level)
+ os.utime(file_name, times)
+
+ def unpack(
+ self,
+ filename,
+ extract_to,
+ extract_dirs=None,
+ error_level=ERROR,
+ fatal_exit_code=2,
+ verbose=False,
+ ):
+ """The method allows to extract a file regardless of its extension.
+
+ Args:
+ filename (str): filename of the compressed file.
+ extract_to (str): where to extract the compressed file.
+ extract_dirs (list, optional): directories inside the archive file to extract.
+ Defaults to `None`.
+ fatal_exit_code (int, optional): call `self.fatal` if the return value
+ of the command is not in `success_codes`. Defaults to 2.
+ verbose (bool, optional): whether or not extracted content should be displayed.
+ Defaults to False.
+
+ Raises:
+ IOError: on `filename` file not found.
+
+ """
+ if not os.path.isfile(filename):
+ raise IOError("Could not find file to extract: %s" % filename)
+
+ if zipfile.is_zipfile(filename):
+ try:
+ self.info(
+ "Using ZipFile to extract {} to {}".format(filename, extract_to)
+ )
+ with zipfile.ZipFile(filename) as bundle:
+ for entry in self._filter_entries(bundle.namelist(), extract_dirs):
+ if verbose:
+ self.info(" %s" % entry)
+ bundle.extract(entry, path=extract_to)
+
+ # ZipFile doesn't preserve permissions during extraction:
+ # http://bugs.python.org/issue15795
+ fname = os.path.realpath(os.path.join(extract_to, entry))
+ mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
+ # Only set permissions if attributes are available. Otherwise all
+ # permissions will be removed eg. on Windows.
+ if mode:
+ os.chmod(fname, mode)
+ except zipfile.BadZipfile as e:
+ self.log(
+ "%s (%s)" % (str(e), filename),
+ level=error_level,
+ exit_code=fatal_exit_code,
+ )
+
+ # Bug 1211882 - is_tarfile cannot be trusted for dmg files
+ elif tarfile.is_tarfile(filename) and not filename.lower().endswith(".dmg"):
+ try:
+ self.info(
+ "Using TarFile to extract {} to {}".format(filename, extract_to)
+ )
+ with tarfile.open(filename) as bundle:
+ for entry in self._filter_entries(bundle.getnames(), extract_dirs):
+ _validate_tar_member(bundle.getmember(entry), extract_to)
+ if verbose:
+ self.info(" %s" % entry)
+ bundle.extract(entry, path=extract_to)
+ except tarfile.TarError as e:
+ self.log(
+ "%s (%s)" % (str(e), filename),
+ level=error_level,
+ exit_code=fatal_exit_code,
+ )
+ else:
+ self.log(
+ "No extraction method found for: %s" % filename,
+ level=error_level,
+ exit_code=fatal_exit_code,
+ )
+
+ def is_taskcluster(self):
+ """Returns boolean indicating if we're running in TaskCluster."""
+ # This may need expanding in the future to work on
+ return "TASKCLUSTER_WORKER_TYPE" in os.environ
+
+
+def PreScriptRun(func):
+ """Decorator for methods that will be called before script execution.
+
+ Each method on a BaseScript having this decorator will be called at the
+ beginning of BaseScript.run().
+
+ The return value is ignored. Exceptions will abort execution.
+ """
+ func._pre_run_listener = True
+ return func
+
+
+def PostScriptRun(func):
+ """Decorator for methods that will be called after script execution.
+
+ This is similar to PreScriptRun except it is called at the end of
+ execution. The method will always be fired, even if execution fails.
+ """
+ func._post_run_listener = True
+ return func
+
+
+def PreScriptAction(action=None):
+ """Decorator for methods that will be called at the beginning of each action.
+
+ Each method on a BaseScript having this decorator will be called during
+ BaseScript.run() before an individual action is executed. The method will
+ receive the action's name as an argument.
+
+ If no values are passed to the decorator, it will be applied to every
+ action. If a string is passed, the decorated function will only be called
+ for the action of that name.
+
+ The return value of the method is ignored. Exceptions will abort execution.
+ """
+
+ def _wrapped(func):
+ func._pre_action_listener = action
+ return func
+
+ def _wrapped_none(func):
+ func._pre_action_listener = None
+ return func
+
+ if type(action) == type(_wrapped):
+ return _wrapped_none(action)
+
+ return _wrapped
+
+
+def PostScriptAction(action=None):
+ """Decorator for methods that will be called at the end of each action.
+
+ This behaves similarly to PreScriptAction. It varies in that it is called
+ after execution of the action.
+
+ The decorated method will receive the action name as a positional argument.
+ It will then receive the following named arguments:
+
+ success - Bool indicating whether the action finished successfully.
+
+ The decorated method will always be called, even if the action threw an
+ exception.
+
+ The return value is ignored.
+ """
+
+ def _wrapped(func):
+ func._post_action_listener = action
+ return func
+
+ def _wrapped_none(func):
+ func._post_action_listener = None
+ return func
+
+ if type(action) == type(_wrapped):
+ return _wrapped_none(action)
+
+ return _wrapped
+
+
+# BaseScript {{{1
+class BaseScript(ScriptMixin, LogMixin, object):
+ def __init__(
+ self,
+ config_options=None,
+ ConfigClass=BaseConfig,
+ default_log_level="info",
+ **kwargs
+ ):
+ self._return_code = 0
+ super(BaseScript, self).__init__()
+
+ self.log_obj = None
+ self.abs_dirs = None
+ if config_options is None:
+ config_options = []
+ self.summary_list = []
+ self.failures = []
+ rw_config = ConfigClass(config_options=config_options, **kwargs)
+ self.config = rw_config.get_read_only_config()
+ self.actions = tuple(rw_config.actions)
+ self.all_actions = tuple(rw_config.all_actions)
+ self.env = None
+ self.new_log_obj(default_log_level=default_log_level)
+ self.script_obj = self
+
+ # Indicate we're a source checkout if VCS directory is present at the
+ # appropriate place. This code will break if this file is ever moved
+ # to another directory.
+ self.topsrcdir = None
+
+ srcreldir = "testing/mozharness/mozharness/base"
+ here = os.path.normpath(os.path.dirname(__file__))
+ if here.replace("\\", "/").endswith(srcreldir):
+ topsrcdir = os.path.normpath(os.path.join(here, "..", "..", "..", ".."))
+ hg_dir = os.path.join(topsrcdir, ".hg")
+ git_dir = os.path.join(topsrcdir, ".git")
+ if os.path.isdir(hg_dir) or os.path.isdir(git_dir):
+ self.topsrcdir = topsrcdir
+
+ # Set self.config to read-only.
+ #
+ # We can create intermediate config info programmatically from
+ # this in a repeatable way, with logs; this is how we straddle the
+ # ideal-but-not-user-friendly static config and the
+ # easy-to-write-hard-to-debug writable config.
+ #
+ # To allow for other, script-specific configurations
+ # (e.g., props json parsing), before locking,
+ # call self._pre_config_lock(). If needed, this method can
+ # alter self.config.
+ self._pre_config_lock(rw_config)
+ self._config_lock()
+
+ self.info("Run as %s" % rw_config.command_line)
+ if self.config.get("dump_config_hierarchy"):
+ # we only wish to dump and display what self.config is made up of,
+ # against the current script + args, without actually running any
+ # actions
+ self._dump_config_hierarchy(rw_config.all_cfg_files_and_dicts)
+ if self.config.get("dump_config"):
+ self.dump_config(exit_on_finish=True)
+
+ # Collect decorated methods. We simply iterate over the attributes of
+ # the current class instance and look for signatures deposited by
+ # the decorators.
+ self._listeners = dict(
+ pre_run=[],
+ pre_action=[],
+ post_action=[],
+ post_run=[],
+ )
+ for k in dir(self):
+ try:
+ item = self._getattr(k)
+ except Exception as e:
+ item = None
+ self.warning(
+ "BaseScript collecting decorated methods: "
+ "failure to get attribute {}: {}".format(k, str(e))
+ )
+ if not item:
+ continue
+
+ # We only decorate methods, so ignore other types.
+ if not inspect.ismethod(item):
+ continue
+
+ if hasattr(item, "_pre_run_listener"):
+ self._listeners["pre_run"].append(k)
+
+ if hasattr(item, "_pre_action_listener"):
+ self._listeners["pre_action"].append((k, item._pre_action_listener))
+
+ if hasattr(item, "_post_action_listener"):
+ self._listeners["post_action"].append((k, item._post_action_listener))
+
+ if hasattr(item, "_post_run_listener"):
+ self._listeners["post_run"].append(k)
+
+ def _getattr(self, name):
+ # `getattr(self, k)` will call the method `k` for any property
+ # access. If the property depends upon a module which has not
+ # been imported at the time the BaseScript initializer is
+ # executed, this property access will result in an
+ # Exception. Until Python 3's `inspect.getattr_static` is
+ # available, the simplest approach is to ignore the specific
+ # properties which are known to cause issues. Currently
+ # adb_path and device are ignored since they require the
+ # availablity of the mozdevice package which is not guaranteed
+ # when BaseScript is called.
+ property_list = set(["adb_path", "device"])
+ if six.PY2:
+ if name in property_list:
+ item = None
+ else:
+ item = getattr(self, name)
+ else:
+ item = inspect.getattr_static(self, name)
+ if type(item) == property:
+ item = None
+ else:
+ item = getattr(self, name)
+ return item
+
+ def _dump_config_hierarchy(self, cfg_files):
+ """interpret each config file used.
+
+ This will show which keys/values are being added or overwritten by
+ other config files depending on their hierarchy (when they were added).
+ """
+ # go through each config_file. We will start with the lowest and
+ # print its keys/values that are being used in self.config. If any
+ # keys/values are present in a config file with a higher precedence,
+ # ignore those.
+ dirs = self.query_abs_dirs()
+ cfg_files_dump_config = {} # we will dump this to file
+ # keep track of keys that did not come from a config file
+ keys_not_from_file = set(self.config.keys())
+ if not cfg_files:
+ cfg_files = []
+ self.info("Total config files: %d" % (len(cfg_files)))
+ if len(cfg_files):
+ self.info("cfg files used from lowest precedence to highest:")
+ for i, (target_file, target_dict) in enumerate(cfg_files):
+ unique_keys = set(target_dict.keys())
+ unique_dict = {}
+ # iterate through the target_dicts remaining 'higher' cfg_files
+ remaining_cfgs = cfg_files[slice(i + 1, len(cfg_files))]
+ # where higher == more precedent
+ for ii, (higher_file, higher_dict) in enumerate(remaining_cfgs):
+ # now only keep keys/values that are not overwritten by a
+ # higher config
+ unique_keys = unique_keys.difference(set(higher_dict.keys()))
+ # unique_dict we know now has only keys/values that are unique to
+ # this config file.
+ unique_dict = dict((key, target_dict.get(key)) for key in unique_keys)
+ cfg_files_dump_config[target_file] = unique_dict
+ self.action_message("Config File %d: %s" % (i + 1, target_file))
+ self.info(pprint.pformat(unique_dict))
+ # let's also find out which keys/values from self.config are not
+ # from each target config file dict
+ keys_not_from_file = keys_not_from_file.difference(set(target_dict.keys()))
+ not_from_file_dict = dict(
+ (key, self.config.get(key)) for key in keys_not_from_file
+ )
+ cfg_files_dump_config["not_from_cfg_file"] = not_from_file_dict
+ self.action_message(
+ "Not from any config file (default_config, " "cmd line options, etc)"
+ )
+ self.info(pprint.pformat(not_from_file_dict))
+
+ # finally, let's dump this output as JSON and exit early
+ self.dump_config(
+ os.path.join(dirs["abs_log_dir"], "localconfigfiles.json"),
+ cfg_files_dump_config,
+ console_output=False,
+ exit_on_finish=True,
+ )
+
+ def _pre_config_lock(self, rw_config):
+ """This empty method can allow for config checking and manipulation
+ before the config lock, when overridden in scripts.
+ """
+ pass
+
+ def _config_lock(self):
+ """After this point, the config is locked and should not be
+ manipulated (based on mozharness.base.config.ReadOnlyDict)
+ """
+ self.config.lock()
+
+ def _possibly_run_method(self, method_name, error_if_missing=False):
+ """This is here for run()."""
+ if hasattr(self, method_name) and callable(self._getattr(method_name)):
+ return getattr(self, method_name)()
+ elif error_if_missing:
+ self.error("No such method %s!" % method_name)
+
+ def run_action(self, action):
+ if action not in self.actions:
+ self.action_message("Skipping %s step." % action)
+ return
+
+ method_name = action.replace("-", "_")
+ self.action_message("Running %s step." % action)
+
+ # An exception during a pre action listener should abort execution.
+ for fn, target in self._listeners["pre_action"]:
+ if target is not None and target != action:
+ continue
+
+ try:
+ self.info("Running pre-action listener: %s" % fn)
+ method = getattr(self, fn)
+ method(action)
+ except Exception:
+ self.error(
+ "Exception during pre-action for %s: %s"
+ % (action, traceback.format_exc())
+ )
+
+ for fn, target in self._listeners["post_action"]:
+ if target is not None and target != action:
+ continue
+
+ try:
+ self.info("Running post-action listener: %s" % fn)
+ method = getattr(self, fn)
+ method(action, success=False)
+ except Exception:
+ self.error(
+ "An additional exception occurred during "
+ "post-action for %s: %s" % (action, traceback.format_exc())
+ )
+
+ self.fatal("Aborting due to exception in pre-action listener.")
+
+ # We always run post action listeners, even if the main routine failed.
+ success = False
+ try:
+ self.info("Running main action method: %s" % method_name)
+ self._possibly_run_method("preflight_%s" % method_name)
+ self._possibly_run_method(method_name, error_if_missing=True)
+ self._possibly_run_method("postflight_%s" % method_name)
+ success = True
+ finally:
+ post_success = True
+ for fn, target in self._listeners["post_action"]:
+ if target is not None and target != action:
+ continue
+
+ try:
+ self.info("Running post-action listener: %s" % fn)
+ method = getattr(self, fn)
+ method(action, success=success and self.return_code == 0)
+ except Exception:
+ post_success = False
+ self.error(
+ "Exception during post-action for %s: %s"
+ % (action, traceback.format_exc())
+ )
+
+ step_result = "success" if success else "failed"
+ self.action_message("Finished %s step (%s)" % (action, step_result))
+
+ if not post_success:
+ self.fatal("Aborting due to failure in post-action listener.")
+
+ def run(self):
+ """Default run method.
+ This is the "do everything" method, based on actions and all_actions.
+
+ First run self.dump_config() if it exists.
+ Second, go through the list of all_actions.
+ If they're in the list of self.actions, try to run
+ self.preflight_ACTION(), self.ACTION(), and self.postflight_ACTION().
+
+ Preflight is sanity checking before doing anything time consuming or
+ destructive.
+
+ Postflight is quick testing for success after an action.
+
+ """
+ for fn in self._listeners["pre_run"]:
+ try:
+ self.info("Running pre-run listener: %s" % fn)
+ method = getattr(self, fn)
+ method()
+ except Exception:
+ self.error(
+ "Exception during pre-run listener: %s" % traceback.format_exc()
+ )
+
+ for fn in self._listeners["post_run"]:
+ try:
+ method = getattr(self, fn)
+ method()
+ except Exception:
+ self.error(
+ "An additional exception occurred during a "
+ "post-run listener: %s" % traceback.format_exc()
+ )
+
+ self.fatal("Aborting due to failure in pre-run listener.")
+
+ self.dump_config()
+ try:
+ for action in self.all_actions:
+ self.run_action(action)
+ except Exception:
+ self.fatal("Uncaught exception: %s" % traceback.format_exc())
+ finally:
+ post_success = True
+ for fn in self._listeners["post_run"]:
+ try:
+ self.info("Running post-run listener: %s" % fn)
+ method = getattr(self, fn)
+ method()
+ except Exception:
+ post_success = False
+ self.error(
+ "Exception during post-run listener: %s"
+ % traceback.format_exc()
+ )
+
+ if not post_success:
+ self.fatal("Aborting due to failure in post-run listener.")
+
+ return self.return_code
+
+ def run_and_exit(self):
+ """Runs the script and exits the current interpreter."""
+ rc = self.run()
+ if rc != 0:
+ self.warning("returning nonzero exit status %d" % rc)
+ sys.exit(rc)
+
+ def clobber(self):
+ """
+ Delete the working directory
+ """
+ dirs = self.query_abs_dirs()
+ self.rmtree(dirs["abs_work_dir"], error_level=FATAL)
+
+ def query_abs_dirs(self):
+ """We want to be able to determine where all the important things
+ are. Absolute paths lend themselves well to this, though I wouldn't
+ be surprised if this causes some issues somewhere.
+
+ This should be overridden in any script that has additional dirs
+ to query.
+
+ The query_* methods tend to set self.VAR variables as their
+ runtime cache.
+ """
+ if self.abs_dirs:
+ return self.abs_dirs
+ c = self.config
+ dirs = {}
+ dirs["base_work_dir"] = c["base_work_dir"]
+ dirs["abs_work_dir"] = os.path.join(c["base_work_dir"], c["work_dir"])
+ dirs["abs_log_dir"] = os.path.join(c["base_work_dir"], c.get("log_dir", "logs"))
+ if "GECKO_PATH" in os.environ:
+ dirs["abs_src_dir"] = os.environ["GECKO_PATH"]
+ self.abs_dirs = dirs
+ return self.abs_dirs
+
+ def dump_config(
+ self, file_path=None, config=None, console_output=True, exit_on_finish=False
+ ):
+ """Dump self.config to localconfig.json"""
+ config = config or self.config
+ dirs = self.query_abs_dirs()
+ if not file_path:
+ file_path = os.path.join(dirs["abs_log_dir"], "localconfig.json")
+ self.info("Dumping config to %s." % file_path)
+ self.mkdir_p(os.path.dirname(file_path))
+ json_config = json.dumps(config, sort_keys=True, indent=4)
+ fh = codecs.open(file_path, encoding="utf-8", mode="w+")
+ fh.write(json_config)
+ fh.close()
+ if console_output:
+ self.info(pprint.pformat(config))
+ if exit_on_finish:
+ sys.exit()
+
+ # logging {{{2
+ def new_log_obj(self, default_log_level="info"):
+ c = self.config
+ log_dir = os.path.join(c["base_work_dir"], c.get("log_dir", "logs"))
+ log_config = {
+ "logger_name": "Simple",
+ "log_name": "log",
+ "log_dir": log_dir,
+ "log_level": default_log_level,
+ "log_format": "%(asctime)s %(levelname)8s - %(message)s",
+ "log_to_console": True,
+ "append_to_log": False,
+ }
+ log_type = self.config.get("log_type", "console")
+ for key in log_config.keys():
+ value = self.config.get(key, None)
+ if value is not None:
+ log_config[key] = value
+ if log_type == "multi":
+ self.log_obj = MultiFileLogger(**log_config)
+ elif log_type == "simple":
+ self.log_obj = SimpleFileLogger(**log_config)
+ else:
+ self.log_obj = ConsoleLogger(**log_config)
+
+ def action_message(self, message):
+ self.info(
+ "[mozharness: %sZ] %s"
+ % (datetime.datetime.utcnow().isoformat(" "), message)
+ )
+
+ def summary(self):
+ """Print out all the summary lines added via add_summary()
+ throughout the script.
+
+ I'd like to revisit how to do this in a prettier fashion.
+ """
+ self.action_message("%s summary:" % self.__class__.__name__)
+ if self.summary_list:
+ for item in self.summary_list:
+ try:
+ self.log(item["message"], level=item["level"])
+ except ValueError:
+ """log is closed; print as a default. Ran into this
+ when calling from __del__()"""
+ print("### Log is closed! (%s)" % item["message"])
+
+ def add_summary(self, message, level=INFO):
+ self.summary_list.append({"message": message, "level": level})
+ # TODO write to a summary-only log?
+ # Summaries need a lot more love.
+ self.log(message, level=level)
+
+ def summarize_success_count(
+ self, success_count, total_count, message="%d of %d successful.", level=None
+ ):
+ if level is None:
+ level = INFO
+ if success_count < total_count:
+ level = ERROR
+ self.add_summary(message % (success_count, total_count), level=level)
+
+ def get_hash_for_file(self, file_path, hash_type="sha512"):
+ bs = 65536
+ hasher = hashlib.new(hash_type)
+ with open(file_path, "rb") as fh:
+ buf = fh.read(bs)
+ while len(buf) > 0:
+ hasher.update(buf)
+ buf = fh.read(bs)
+ return hasher.hexdigest()
+
+ @property
+ def return_code(self):
+ return self._return_code
+
+ @return_code.setter
+ def return_code(self, code):
+ old_return_code, self._return_code = self._return_code, code
+ if old_return_code != code:
+ self.warning("setting return code to %d" % code)
diff --git a/testing/mozharness/mozharness/base/transfer.py b/testing/mozharness/mozharness/base/transfer.py
new file mode 100755
index 0000000000..610e93ecc9
--- /dev/null
+++ b/testing/mozharness/mozharness/base/transfer.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Generic ways to upload + download files.
+"""
+
+import pprint
+
+try:
+ from urllib2 import urlopen
+except ImportError:
+ from urllib.request import urlopen
+
+import json
+
+from mozharness.base.log import DEBUG
+
+
+# TransferMixin {{{1
+class TransferMixin(object):
+ """
+ Generic transfer methods.
+
+ Dependent on BaseScript.
+ """
+
+ def load_json_from_url(self, url, timeout=30, log_level=DEBUG):
+ self.log(
+ "Attempting to download %s; timeout=%i" % (url, timeout), level=log_level
+ )
+ try:
+ r = urlopen(url, timeout=timeout)
+ j = json.load(r)
+ self.log(pprint.pformat(j), level=log_level)
+ except BaseException:
+ self.exception(message="Unable to download %s!" % url)
+ raise
+ return j
diff --git a/testing/mozharness/mozharness/base/vcs/__init__.py b/testing/mozharness/mozharness/base/vcs/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/testing/mozharness/mozharness/base/vcs/__init__.py
diff --git a/testing/mozharness/mozharness/base/vcs/gittool.py b/testing/mozharness/mozharness/base/vcs/gittool.py
new file mode 100644
index 0000000000..e9d0c0e2c9
--- /dev/null
+++ b/testing/mozharness/mozharness/base/vcs/gittool.py
@@ -0,0 +1,107 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os
+import re
+
+try:
+ import urlparse
+except ImportError:
+ import urllib.parse as urlparse
+
+from mozharness.base.errors import GitErrorList, VCSException
+from mozharness.base.log import LogMixin, OutputParser
+from mozharness.base.script import ScriptMixin
+
+
+class GittoolParser(OutputParser):
+ """
+ A class that extends OutputParser such that it can find the "Got revision"
+ string from gittool.py output
+ """
+
+ got_revision_exp = re.compile(r"Got revision (\w+)")
+ got_revision = None
+
+ def parse_single_line(self, line):
+ m = self.got_revision_exp.match(line)
+ if m:
+ self.got_revision = m.group(1)
+ super(GittoolParser, self).parse_single_line(line)
+
+
+class GittoolVCS(ScriptMixin, LogMixin):
+ def __init__(self, log_obj=None, config=None, vcs_config=None, script_obj=None):
+ super(GittoolVCS, self).__init__()
+
+ self.log_obj = log_obj
+ self.script_obj = script_obj
+ if config:
+ self.config = config
+ else:
+ self.config = {}
+ # vcs_config = {
+ # repo: repository,
+ # branch: branch,
+ # revision: revision,
+ # ssh_username: ssh_username,
+ # ssh_key: ssh_key,
+ # }
+ self.vcs_config = vcs_config
+ self.gittool = self.query_exe("gittool.py", return_type="list")
+
+ def ensure_repo_and_revision(self):
+ """Makes sure that `dest` is has `revision` or `branch` checked out
+ from `repo`.
+
+ Do what it takes to make that happen, including possibly clobbering
+ dest.
+ """
+ c = self.vcs_config
+ for conf_item in ("dest", "repo"):
+ assert self.vcs_config[conf_item]
+ dest = os.path.abspath(c["dest"])
+ repo = c["repo"]
+ revision = c.get("revision")
+ branch = c.get("branch")
+ clean = c.get("clean")
+ share_base = c.get("vcs_share_base", os.environ.get("GIT_SHARE_BASE_DIR", None))
+ env = {"PATH": os.environ.get("PATH")}
+ env.update(c.get("env", {}))
+ if self._is_windows():
+ # git.exe is not in the PATH by default
+ env["PATH"] = "%s;C:/mozilla-build/Git/bin" % env["PATH"]
+ # SYSTEMROOT is needed for 'import random'
+ if "SYSTEMROOT" not in env:
+ env["SYSTEMROOT"] = os.environ.get("SYSTEMROOT")
+ if share_base is not None:
+ env["GIT_SHARE_BASE_DIR"] = share_base
+
+ cmd = self.gittool[:]
+ if branch:
+ cmd.extend(["-b", branch])
+ if revision:
+ cmd.extend(["-r", revision])
+ if clean:
+ cmd.append("--clean")
+
+ for base_mirror_url in self.config.get(
+ "gittool_base_mirror_urls", self.config.get("vcs_base_mirror_urls", [])
+ ):
+ bits = urlparse.urlparse(repo)
+ mirror_url = urlparse.urljoin(base_mirror_url, bits.path)
+ cmd.extend(["--mirror", mirror_url])
+
+ cmd.extend([repo, dest])
+ parser = GittoolParser(
+ config=self.config, log_obj=self.log_obj, error_list=GitErrorList
+ )
+ retval = self.run_command(
+ cmd, error_list=GitErrorList, env=env, output_parser=parser
+ )
+
+ if retval != 0:
+ raise VCSException("Unable to checkout")
+
+ return parser.got_revision
diff --git a/testing/mozharness/mozharness/base/vcs/mercurial.py b/testing/mozharness/mozharness/base/vcs/mercurial.py
new file mode 100755
index 0000000000..63b0d27c34
--- /dev/null
+++ b/testing/mozharness/mozharness/base/vcs/mercurial.py
@@ -0,0 +1,478 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Mercurial VCS support.
+"""
+
+import hashlib
+import os
+import re
+import subprocess
+import sys
+from collections import namedtuple
+
+try:
+ from urlparse import urlsplit
+except ImportError:
+ from urllib.parse import urlsplit
+
+import mozharness
+from mozharness.base.errors import HgErrorList, VCSException
+from mozharness.base.log import LogMixin, OutputParser
+from mozharness.base.script import ScriptMixin
+from mozharness.base.transfer import TransferMixin
+
+sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.dirname(sys.path[0]))))
+
+
+external_tools_path = os.path.join(
+ os.path.abspath(os.path.dirname(os.path.dirname(mozharness.__file__))),
+ "external_tools",
+)
+
+
+HG_OPTIONS = ["--config", "ui.merge=internal:merge"]
+
+# MercurialVCS {{{1
+# TODO Make the remaining functions more mozharness-friendly.
+# TODO Add the various tag functionality that are currently in
+# build/tools/scripts to MercurialVCS -- generic tagging logic belongs here.
+REVISION, BRANCH = 0, 1
+
+
+class RepositoryUpdateRevisionParser(OutputParser):
+ """Parse `hg pull` output for "repository unrelated" errors."""
+
+ revision = None
+ RE_UPDATED = re.compile("^updated to ([a-f0-9]{40})$")
+
+ def parse_single_line(self, line):
+ m = self.RE_UPDATED.match(line)
+ if m:
+ self.revision = m.group(1)
+
+ return super(RepositoryUpdateRevisionParser, self).parse_single_line(line)
+
+
+def make_hg_url(hg_host, repo_path, protocol="http", revision=None, filename=None):
+ """Helper function.
+
+ Construct a valid hg url from a base hg url (hg.mozilla.org),
+ repo_path, revision and possible filename
+ """
+ base = "%s://%s" % (protocol, hg_host)
+ repo = "/".join(p.strip("/") for p in [base, repo_path])
+ if not filename:
+ if not revision:
+ return repo
+ else:
+ return "/".join([p.strip("/") for p in [repo, "rev", revision]])
+ else:
+ assert revision
+ return "/".join([p.strip("/") for p in [repo, "raw-file", revision, filename]])
+
+
+class MercurialVCS(ScriptMixin, LogMixin, TransferMixin):
+ # For the most part, scripts import mercurial, update
+ # tag-release.py imports
+ # apply_and_push, update, get_revision, out, BRANCH, REVISION,
+ # get_branches, cleanOutgoingRevs
+
+ def __init__(self, log_obj=None, config=None, vcs_config=None, script_obj=None):
+ super(MercurialVCS, self).__init__()
+ self.can_share = None
+ self.log_obj = log_obj
+ self.script_obj = script_obj
+ if config:
+ self.config = config
+ else:
+ self.config = {}
+ # vcs_config = {
+ # hg_host: hg_host,
+ # repo: repository,
+ # branch: branch,
+ # revision: revision,
+ # ssh_username: ssh_username,
+ # ssh_key: ssh_key,
+ # }
+ self.vcs_config = vcs_config or {}
+ self.hg = self.query_exe("hg", return_type="list") + HG_OPTIONS
+
+ def _make_absolute(self, repo):
+ if repo.startswith("file://"):
+ path = repo[len("file://") :]
+ repo = "file://%s" % os.path.abspath(path)
+ elif "://" not in repo:
+ repo = os.path.abspath(repo)
+ return repo
+
+ def get_repo_name(self, repo):
+ return repo.rstrip("/").split("/")[-1]
+
+ def get_repo_path(self, repo):
+ repo = self._make_absolute(repo)
+ if repo.startswith("/"):
+ return repo.lstrip("/")
+ else:
+ return urlsplit(repo).path.lstrip("/")
+
+ def get_revision_from_path(self, path):
+ """Returns which revision directory `path` currently has checked out."""
+ return self.get_output_from_command(
+ self.hg + ["parent", "--template", "{node}"], cwd=path
+ )
+
+ def get_branch_from_path(self, path):
+ branch = self.get_output_from_command(self.hg + ["branch"], cwd=path)
+ return str(branch).strip()
+
+ def get_branches_from_path(self, path):
+ branches = []
+ for line in self.get_output_from_command(
+ self.hg + ["branches", "-c"], cwd=path
+ ).splitlines():
+ branches.append(line.split()[0])
+ return branches
+
+ def hg_ver(self):
+ """Returns the current version of hg, as a tuple of
+ (major, minor, build)"""
+ ver_string = self.get_output_from_command(self.hg + ["-q", "version"])
+ match = re.search(r"\(version ([0-9.]+)\)", ver_string)
+ if match:
+ bits = match.group(1).split(".")
+ if len(bits) < 3:
+ bits += (0,)
+ ver = tuple(int(b) for b in bits)
+ else:
+ ver = (0, 0, 0)
+ self.debug("Running hg version %s" % str(ver))
+ return ver
+
+ def update(self, dest, branch=None, revision=None):
+ """Updates working copy `dest` to `branch` or `revision`.
+ If revision is set, branch will be ignored.
+ If neither is set then the working copy will be updated to the
+ latest revision on the current branch. Local changes will be
+ discarded.
+ """
+ # If we have a revision, switch to that
+ msg = "Updating %s" % dest
+ if branch:
+ msg += " to branch %s" % branch
+ if revision:
+ msg += " revision %s" % revision
+ self.info("%s." % msg)
+ if revision is not None:
+ cmd = self.hg + ["update", "-C", "-r", revision]
+ if self.run_command(cmd, cwd=dest, error_list=HgErrorList):
+ raise VCSException("Unable to update %s to %s!" % (dest, revision))
+ else:
+ # Check & switch branch
+ local_branch = self.get_branch_from_path(dest)
+
+ cmd = self.hg + ["update", "-C"]
+
+ # If this is different, checkout the other branch
+ if branch and branch != local_branch:
+ cmd.append(branch)
+
+ if self.run_command(cmd, cwd=dest, error_list=HgErrorList):
+ raise VCSException("Unable to update %s!" % dest)
+ return self.get_revision_from_path(dest)
+
+ def clone(self, repo, dest, branch=None, revision=None, update_dest=True):
+ """Clones hg repo and places it at `dest`, replacing whatever else
+ is there. The working copy will be empty.
+
+ If `revision` is set, only the specified revision and its ancestors
+ will be cloned. If revision is set, branch is ignored.
+
+ If `update_dest` is set, then `dest` will be updated to `revision`
+ if set, otherwise to `branch`, otherwise to the head of default.
+ """
+ msg = "Cloning %s to %s" % (repo, dest)
+ if branch:
+ msg += " on branch %s" % branch
+ if revision:
+ msg += " to revision %s" % revision
+ self.info("%s." % msg)
+ parent_dest = os.path.dirname(dest)
+ if parent_dest and not os.path.exists(parent_dest):
+ self.mkdir_p(parent_dest)
+ if os.path.exists(dest):
+ self.info("Removing %s before clone." % dest)
+ self.rmtree(dest)
+
+ cmd = self.hg + ["clone"]
+ if not update_dest:
+ cmd.append("-U")
+
+ if revision:
+ cmd.extend(["-r", revision])
+ elif branch:
+ # hg >= 1.6 supports -b branch for cloning
+ ver = self.hg_ver()
+ if ver >= (1, 6, 0):
+ cmd.extend(["-b", branch])
+
+ cmd.extend([repo, dest])
+ output_timeout = self.config.get(
+ "vcs_output_timeout", self.vcs_config.get("output_timeout")
+ )
+ if (
+ self.run_command(cmd, error_list=HgErrorList, output_timeout=output_timeout)
+ != 0
+ ):
+ raise VCSException("Unable to clone %s to %s!" % (repo, dest))
+
+ if update_dest:
+ return self.update(dest, branch, revision)
+
+ def common_args(self, revision=None, branch=None, ssh_username=None, ssh_key=None):
+ """Fill in common hg arguments, encapsulating logic checks that
+ depend on mercurial versions and provided arguments
+ """
+ args = []
+ if ssh_username or ssh_key:
+ opt = ["-e", "ssh"]
+ if ssh_username:
+ opt[1] += " -l %s" % ssh_username
+ if ssh_key:
+ opt[1] += " -i %s" % ssh_key
+ args.extend(opt)
+ if revision:
+ args.extend(["-r", revision])
+ elif branch:
+ if self.hg_ver() >= (1, 6, 0):
+ args.extend(["-b", branch])
+ return args
+
+ def pull(self, repo, dest, update_dest=True, **kwargs):
+ """Pulls changes from hg repo and places it in `dest`.
+
+ If `revision` is set, only the specified revision and its ancestors
+ will be pulled.
+
+ If `update_dest` is set, then `dest` will be updated to `revision`
+ if set, otherwise to `branch`, otherwise to the head of default.
+ """
+ msg = "Pulling %s to %s" % (repo, dest)
+ if update_dest:
+ msg += " and updating"
+ self.info("%s." % msg)
+ if not os.path.exists(dest):
+ # Error or clone?
+ # If error, should we have a halt_on_error=False above?
+ self.error("Can't hg pull in nonexistent directory %s." % dest)
+ return -1
+ # Convert repo to an absolute path if it's a local repository
+ repo = self._make_absolute(repo)
+ cmd = self.hg + ["pull"]
+ cmd.extend(self.common_args(**kwargs))
+ cmd.append(repo)
+ output_timeout = self.config.get(
+ "vcs_output_timeout", self.vcs_config.get("output_timeout")
+ )
+ if (
+ self.run_command(
+ cmd, cwd=dest, error_list=HgErrorList, output_timeout=output_timeout
+ )
+ != 0
+ ):
+ raise VCSException("Can't pull in %s!" % dest)
+
+ if update_dest:
+ branch = self.vcs_config.get("branch")
+ revision = self.vcs_config.get("revision")
+ return self.update(dest, branch=branch, revision=revision)
+
+ # Defines the places of attributes in the tuples returned by `out'
+
+ def out(self, src, remote, **kwargs):
+ """Check for outgoing changesets present in a repo"""
+ self.info("Checking for outgoing changesets from %s to %s." % (src, remote))
+ cmd = self.hg + ["-q", "out", "--template", "{node} {branches}\n"]
+ cmd.extend(self.common_args(**kwargs))
+ cmd.append(remote)
+ if os.path.exists(src):
+ try:
+ revs = []
+ for line in (
+ self.get_output_from_command(cmd, cwd=src, throw_exception=True)
+ .rstrip()
+ .split("\n")
+ ):
+ try:
+ rev, branch = line.split()
+ # Mercurial displays no branch at all if the revision
+ # is on "default"
+ except ValueError:
+ rev = line.rstrip()
+ branch = "default"
+ revs.append((rev, branch))
+ return revs
+ except subprocess.CalledProcessError as inst:
+ # In some situations, some versions of Mercurial return "1"
+ # if no changes are found, so we need to ignore this return
+ # code
+ if inst.returncode == 1:
+ return []
+ raise
+
+ def push(self, src, remote, push_new_branches=True, **kwargs):
+ # This doesn't appear to work with hg_ver < (1, 6, 0).
+ # Error out, or let you try?
+ self.info("Pushing new changes from %s to %s." % (src, remote))
+ cmd = self.hg + ["push"]
+ cmd.extend(self.common_args(**kwargs))
+ if push_new_branches and self.hg_ver() >= (1, 6, 0):
+ cmd.append("--new-branch")
+ cmd.append(remote)
+ status = self.run_command(
+ cmd,
+ cwd=src,
+ error_list=HgErrorList,
+ success_codes=(0, 1),
+ return_type="num_errors",
+ )
+ if status:
+ raise VCSException("Can't push %s to %s!" % (src, remote))
+ return status
+
+ @property
+ def robustcheckout_path(self):
+ """Path to the robustcheckout extension."""
+ ext = os.path.join(external_tools_path, "robustcheckout.py")
+ if os.path.exists(ext):
+ return ext
+
+ def ensure_repo_and_revision(self):
+ """Makes sure that `dest` is has `revision` or `branch` checked out
+ from `repo`.
+
+ Do what it takes to make that happen, including possibly clobbering
+ dest.
+ """
+ c = self.vcs_config
+ dest = c["dest"]
+ repo_url = c["repo"]
+ rev = c.get("revision")
+ branch = c.get("branch")
+ purge = c.get("clone_with_purge", False)
+ upstream = c.get("clone_upstream_url")
+
+ # The API here is kind of bad because we're relying on state in
+ # self.vcs_config instead of passing arguments. This confuses
+ # scripts that have multiple repos. This includes the clone_tools()
+ # step :(
+
+ if not rev and not branch:
+ self.warning('did not specify revision or branch; assuming "default"')
+ branch = "default"
+
+ share_base = c.get("vcs_share_base") or os.environ.get("HG_SHARE_BASE_DIR")
+ if share_base and c.get("use_vcs_unique_share"):
+ # Bug 1277041 - update migration scripts to support robustcheckout
+ # fake a share but don't really share
+ share_base = os.path.join(share_base, hashlib.md5(dest).hexdigest())
+
+ # We require shared storage is configured because it guarantees we
+ # only have 1 local copy of logical repo stores.
+ if not share_base:
+ raise VCSException(
+ "vcs share base not defined; " "refusing to operate sub-optimally"
+ )
+
+ if not self.robustcheckout_path:
+ raise VCSException("could not find the robustcheckout Mercurial extension")
+
+ # Log HG version and install info to aid debugging.
+ self.run_command(self.hg + ["--version"])
+ self.run_command(self.hg + ["debuginstall", "--config=ui.username=worker"])
+
+ args = self.hg + [
+ "--config",
+ "extensions.robustcheckout=%s" % self.robustcheckout_path,
+ "robustcheckout",
+ repo_url,
+ dest,
+ "--sharebase",
+ share_base,
+ ]
+ if purge:
+ args.append("--purge")
+ if upstream:
+ args.extend(["--upstream", upstream])
+
+ if rev:
+ args.extend(["--revision", rev])
+ if branch:
+ args.extend(["--branch", branch])
+
+ parser = RepositoryUpdateRevisionParser(
+ config=self.config, log_obj=self.log_obj
+ )
+ if self.run_command(args, output_parser=parser):
+ raise VCSException("repo checkout failed!")
+
+ if not parser.revision:
+ raise VCSException("could not identify revision updated to")
+
+ return parser.revision
+
+ def cleanOutgoingRevs(self, reponame, remote, username, sshKey):
+ # TODO retry
+ self.info("Wiping outgoing local changes from %s to %s." % (reponame, remote))
+ outgoingRevs = self.out(
+ src=reponame, remote=remote, ssh_username=username, ssh_key=sshKey
+ )
+ for r in reversed(outgoingRevs):
+ self.run_command(
+ self.hg + ["strip", "-n", r[REVISION]],
+ cwd=reponame,
+ error_list=HgErrorList,
+ )
+
+ def query_pushinfo(self, repository, revision):
+ """Query the pushdate and pushid of a repository/revision.
+ This is intended to be used on hg.mozilla.org/mozilla-central and
+ similar. It may or may not work for other hg repositories.
+ """
+ PushInfo = namedtuple("PushInfo", ["pushid", "pushdate"])
+
+ try:
+ url = "%s/json-pushes?changeset=%s" % (repository, revision)
+ self.info("Pushdate URL is: %s" % url)
+ contents = self.retry(self.load_json_from_url, args=(url,))
+
+ # The contents should be something like:
+ # {
+ # "28537": {
+ # "changesets": [
+ # "1d0a914ae676cc5ed203cdc05c16d8e0c22af7e5",
+ # ],
+ # "date": 1428072488,
+ # "user": "user@mozilla.com"
+ # }
+ # }
+ #
+ # So we grab the first element ("28537" in this case) and then pull
+ # out the 'date' field.
+ pushid = next(contents.keys())
+ self.info("Pushid is: %s" % pushid)
+ pushdate = contents[pushid]["date"]
+ self.info("Pushdate is: %s" % pushdate)
+ return PushInfo(pushid, pushdate)
+
+ except Exception:
+ self.exception("Failed to get push info from hg.mozilla.org")
+ raise
+
+
+# __main__ {{{1
+if __name__ == "__main__":
+ pass
diff --git a/testing/mozharness/mozharness/base/vcs/vcsbase.py b/testing/mozharness/mozharness/base/vcs/vcsbase.py
new file mode 100755
index 0000000000..c587a8b1ca
--- /dev/null
+++ b/testing/mozharness/mozharness/base/vcs/vcsbase.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Generic VCS support.
+"""
+
+import os
+import sys
+from copy import deepcopy
+
+from mozharness.base.errors import VCSException
+from mozharness.base.log import FATAL
+from mozharness.base.script import BaseScript
+from mozharness.base.vcs.gittool import GittoolVCS
+from mozharness.base.vcs.mercurial import MercurialVCS
+
+sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.dirname(sys.path[0]))))
+
+
+# Update this with supported VCS name : VCS object
+VCS_DICT = {
+ "hg": MercurialVCS,
+ "gittool": GittoolVCS,
+}
+
+
+# VCSMixin {{{1
+class VCSMixin(object):
+ """Basic VCS methods that are vcs-agnostic.
+ The vcs_class handles all the vcs-specific tasks.
+ """
+
+ def query_dest(self, kwargs):
+ if "dest" in kwargs:
+ return kwargs["dest"]
+ dest = os.path.basename(kwargs["repo"])
+ # Git fun
+ if dest.endswith(".git"):
+ dest = dest.replace(".git", "")
+ return dest
+
+ def _get_revision(self, vcs_obj, dest):
+ try:
+ got_revision = vcs_obj.ensure_repo_and_revision()
+ if got_revision:
+ return got_revision
+ except VCSException:
+ self.rmtree(dest)
+ raise
+
+ def _get_vcs_class(self, vcs):
+ vcs = vcs or self.config.get("default_vcs", getattr(self, "default_vcs", None))
+ vcs_class = VCS_DICT.get(vcs)
+ return vcs_class
+
+ def vcs_checkout(self, vcs=None, error_level=FATAL, **kwargs):
+ """Check out a single repo."""
+ c = self.config
+ vcs_class = self._get_vcs_class(vcs)
+ if not vcs_class:
+ self.error("Running vcs_checkout with kwargs %s" % str(kwargs))
+ raise VCSException("No VCS set!")
+ # need a better way to do this.
+ if "dest" not in kwargs:
+ kwargs["dest"] = self.query_dest(kwargs)
+ if "vcs_share_base" not in kwargs:
+ kwargs["vcs_share_base"] = c.get(
+ "%s_share_base" % vcs, c.get("vcs_share_base")
+ )
+ vcs_obj = vcs_class(
+ log_obj=self.log_obj,
+ config=self.config,
+ vcs_config=kwargs,
+ script_obj=self,
+ )
+ return self.retry(
+ self._get_revision,
+ error_level=error_level,
+ error_message="Automation Error: Can't checkout %s!" % kwargs["repo"],
+ args=(vcs_obj, kwargs["dest"]),
+ )
+
+ def vcs_checkout_repos(
+ self, repo_list, parent_dir=None, tag_override=None, **kwargs
+ ):
+ """Check out a list of repos."""
+ orig_dir = os.getcwd()
+ c = self.config
+ if not parent_dir:
+ parent_dir = os.path.join(c["base_work_dir"], c["work_dir"])
+ self.mkdir_p(parent_dir)
+ self.chdir(parent_dir)
+ revision_dict = {}
+ kwargs_orig = deepcopy(kwargs)
+ for repo_dict in repo_list:
+ kwargs = deepcopy(kwargs_orig)
+ kwargs.update(repo_dict)
+ if tag_override:
+ kwargs["branch"] = tag_override
+ dest = self.query_dest(kwargs)
+ revision_dict[dest] = {"repo": kwargs["repo"]}
+ revision_dict[dest]["revision"] = self.vcs_checkout(**kwargs)
+ self.chdir(orig_dir)
+ return revision_dict
+
+ def vcs_query_pushinfo(self, repository, revision, vcs=None):
+ """Query the pushid/pushdate of a repository/revision
+ Returns a namedtuple with "pushid" and "pushdate" elements
+ """
+ vcs_class = self._get_vcs_class(vcs)
+ if not vcs_class:
+ raise VCSException("No VCS set in vcs_query_pushinfo!")
+ vcs_obj = vcs_class(
+ log_obj=self.log_obj,
+ config=self.config,
+ script_obj=self,
+ )
+ return vcs_obj.query_pushinfo(repository, revision)
+
+
+class VCSScript(VCSMixin, BaseScript):
+ def __init__(self, **kwargs):
+ super(VCSScript, self).__init__(**kwargs)
+
+ def pull(self, repos=None, parent_dir=None):
+ repos = repos or self.config.get("repos")
+ if not repos:
+ self.info("Pull has nothing to do!")
+ return
+ dirs = self.query_abs_dirs()
+ parent_dir = parent_dir or dirs["abs_work_dir"]
+ return self.vcs_checkout_repos(repos, parent_dir=parent_dir)
+
+
+# Specific VCS stubs {{{1
+# For ease of use.
+# This is here instead of mercurial.py because importing MercurialVCS into
+# vcsbase from mercurial, and importing VCSScript into mercurial from
+# vcsbase, was giving me issues.
+class MercurialScript(VCSScript):
+ default_vcs = "hg"
+
+
+# __main__ {{{1
+if __name__ == "__main__":
+ pass