summaryrefslogtreecommitdiffstats
path: root/testing/mozharness/mozharness/base/script.py
diff options
context:
space:
mode:
Diffstat (limited to 'testing/mozharness/mozharness/base/script.py')
-rw-r--r--testing/mozharness/mozharness/base/script.py2551
1 files changed, 2551 insertions, 0 deletions
diff --git a/testing/mozharness/mozharness/base/script.py b/testing/mozharness/mozharness/base/script.py
new file mode 100644
index 0000000000..0a5622440b
--- /dev/null
+++ b/testing/mozharness/mozharness/base/script.py
@@ -0,0 +1,2551 @@
+# ***** BEGIN LICENSE BLOCK *****
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+# ***** END LICENSE BLOCK *****
+"""Generic script objects.
+
+script.py, along with config.py and log.py, represents the core of
+mozharness.
+"""
+
+import codecs
+import datetime
+import errno
+import fnmatch
+import functools
+import gzip
+import hashlib
+import inspect
+import itertools
+import os
+import platform
+import pprint
+import re
+import shutil
+import socket
+import ssl
+import stat
+import subprocess
+import sys
+import tarfile
+import time
+import traceback
+import zipfile
+import zlib
+from contextlib import contextmanager
+from io import BytesIO
+
+import mozinfo
+import six
+from mozprocess import ProcessHandler
+from six import binary_type
+
+from mozharness.base.config import BaseConfig
+from mozharness.base.log import (
+ DEBUG,
+ ERROR,
+ FATAL,
+ INFO,
+ WARNING,
+ ConsoleLogger,
+ LogMixin,
+ MultiFileLogger,
+ OutputParser,
+ SimpleFileLogger,
+)
+
+try:
+ import httplib
+except ImportError:
+ import http.client as httplib
+try:
+ import simplejson as json
+except ImportError:
+ import json
+try:
+ from urllib2 import Request, quote, urlopen
+except ImportError:
+ from urllib.request import Request, quote, urlopen
+try:
+ import urlparse
+except ImportError:
+ import urllib.parse as urlparse
+if os.name == "nt":
+ import locale
+
+ try:
+ import win32api
+ import win32file
+
+ PYWIN32 = True
+ except ImportError:
+ PYWIN32 = False
+
+try:
+ from urllib2 import HTTPError, URLError
+except ImportError:
+ from urllib.error import HTTPError, URLError
+
+
+class ContentLengthMismatch(Exception):
+ pass
+
+
+def _validate_tar_member(member, path):
+ def _is_within_directory(directory, target):
+ real_directory = os.path.realpath(directory)
+ real_target = os.path.realpath(target)
+ prefix = os.path.commonprefix([real_directory, real_target])
+ return prefix == real_directory
+
+ member_path = os.path.join(path, member.name)
+ if not _is_within_directory(path, member_path):
+ raise Exception("Attempted path traversal in tar file: " + member.name)
+ if member.issym():
+ link_path = os.path.join(os.path.dirname(member_path), member.linkname)
+ if not _is_within_directory(path, link_path):
+ raise Exception("Attempted link path traversal in tar file: " + member.name)
+ if member.mode & (stat.S_ISUID | stat.S_ISGID):
+ raise Exception("Attempted setuid or setgid in tar file: " + member.name)
+
+
+def _safe_extract(tar, path=".", *, numeric_owner=False):
+ def _files(tar, path):
+ for member in tar:
+ _validate_tar_member(member, path)
+ yield member
+
+ tar.extractall(path, members=_files(tar, path), numeric_owner=numeric_owner)
+
+
+def platform_name():
+ pm = PlatformMixin()
+
+ if pm._is_linux() and pm._is_64_bit():
+ return "linux64"
+ elif pm._is_linux() and not pm._is_64_bit():
+ return "linux"
+ elif pm._is_darwin():
+ return "macosx"
+ elif pm._is_windows() and pm._is_64_bit():
+ return "win64"
+ elif pm._is_windows() and not pm._is_64_bit():
+ return "win32"
+ else:
+ return None
+
+
+class PlatformMixin(object):
+ def _is_windows(self):
+ """check if the current operating system is Windows.
+
+ Returns:
+ bool: True if the current platform is Windows, False otherwise
+ """
+ system = platform.system()
+ if system in ("Windows", "Microsoft"):
+ return True
+ if system.startswith("CYGWIN"):
+ return True
+ if os.name == "nt":
+ return True
+
+ def _is_darwin(self):
+ """check if the current operating system is Darwin.
+
+ Returns:
+ bool: True if the current platform is Darwin, False otherwise
+ """
+ if platform.system() in ("Darwin"):
+ return True
+ if sys.platform.startswith("darwin"):
+ return True
+
+ def _is_linux(self):
+ """check if the current operating system is a Linux distribution.
+
+ Returns:
+ bool: True if the current platform is a Linux distro, False otherwise
+ """
+ if platform.system() in ("Linux"):
+ return True
+ if sys.platform.startswith("linux"):
+ return True
+
+ def _is_debian(self):
+ """check if the current operating system is explicitly Debian.
+ This intentionally doesn't count Debian derivatives like Ubuntu.
+
+ Returns:
+ bool: True if the current platform is debian, False otherwise
+ """
+ if not self._is_linux():
+ return False
+ self.info(mozinfo.linux_distro)
+ re_debian_distro = re.compile("debian")
+ return re_debian_distro.match(mozinfo.linux_distro) is not None
+
+ def _is_redhat_based(self):
+ """check if the current operating system is a Redhat derived Linux distribution.
+
+ Returns:
+ bool: True if the current platform is a Redhat Linux distro, False otherwise
+ """
+ if not self._is_linux():
+ return False
+ re_redhat_distro = re.compile("Redhat|Fedora|CentOS|Oracle")
+ return re_redhat_distro.match(mozinfo.linux_distro) is not None
+
+ def _is_64_bit(self):
+ if self._is_darwin():
+ # osx is a special snowflake and to ensure the arch, it is better to use the following
+ return (
+ sys.maxsize > 2 ** 32
+ ) # context: https://docs.python.org/2/library/platform.html
+ else:
+ # Using machine() gives you the architecture of the host rather
+ # than the build type of the Python binary
+ return "64" in platform.machine()
+
+
+# ScriptMixin {{{1
+class ScriptMixin(PlatformMixin):
+ """This mixin contains simple filesystem commands and the like.
+
+ It also contains some very special but very complex methods that,
+ together with logging and config, provide the base for all scripts
+ in this harness.
+
+ WARNING !!!
+ This class depends entirely on `LogMixin` methods in such a way that it will
+ only works if a class inherits from both `ScriptMixin` and `LogMixin`
+ simultaneously.
+
+ Depends on self.config of some sort.
+
+ Attributes:
+ env (dict): a mapping object representing the string environment.
+ script_obj (ScriptMixin): reference to a ScriptMixin instance.
+ """
+
+ env = None
+ script_obj = None
+ ssl_context = None
+
+ def query_filesize(self, file_path):
+ self.info("Determining filesize for %s" % file_path)
+ length = os.path.getsize(file_path)
+ self.info(" %s" % str(length))
+ return length
+
+ # TODO this should be parallelized with the to-be-written BaseHelper!
+ def query_sha512sum(self, file_path):
+ self.info("Determining sha512sum for %s" % file_path)
+ m = hashlib.sha512()
+ contents = self.read_from_file(file_path, verbose=False, open_mode="rb")
+ m.update(contents)
+ sha512 = m.hexdigest()
+ self.info(" %s" % sha512)
+ return sha512
+
+ def platform_name(self):
+ """Return the platform name on which the script is running on.
+ Returns:
+ None: for failure to determine the platform.
+ str: The name of the platform (e.g. linux64)
+ """
+ return platform_name()
+
+ # Simple filesystem commands {{{2
+ def mkdir_p(self, path, error_level=ERROR):
+ """Create a directory if it doesn't exists.
+ This method also logs the creation, error or current existence of the
+ directory to be created.
+
+ Args:
+ path (str): path of the directory to be created.
+ error_level (str): log level name to be used in case of error.
+
+ Returns:
+ None: for sucess.
+ int: -1 on error
+ """
+
+ if not os.path.exists(path):
+ self.info("mkdir: %s" % path)
+ try:
+ os.makedirs(path)
+ except OSError:
+ self.log("Can't create directory %s!" % path, level=error_level)
+ return -1
+ else:
+ self.debug("mkdir_p: %s Already exists." % path)
+
+ def rmtree(self, path, log_level=INFO, error_level=ERROR, exit_code=-1):
+ """Delete an entire directory tree and log its result.
+ This method also logs the platform rmtree function, its retries, errors,
+ and current existence of the directory.
+
+ Args:
+ path (str): path to the directory tree root to remove.
+ log_level (str, optional): log level name to for this operation. Defaults
+ to `INFO`.
+ error_level (str, optional): log level name to use in case of error.
+ Defaults to `ERROR`.
+ exit_code (int, optional): useless parameter, not use here.
+ Defaults to -1
+
+ Returns:
+ None: for success
+ """
+
+ self.log("rmtree: %s" % path, level=log_level)
+ error_message = "Unable to remove %s!" % path
+ if self._is_windows():
+ # Call _rmtree_windows() directly, since even checking
+ # os.path.exists(path) will hang if path is longer than MAX_PATH.
+ self.info("Using _rmtree_windows ...")
+ return self.retry(
+ self._rmtree_windows,
+ error_level=error_level,
+ error_message=error_message,
+ args=(path,),
+ log_level=log_level,
+ )
+ if os.path.exists(path):
+ if os.path.isdir(path):
+ return self.retry(
+ shutil.rmtree,
+ error_level=error_level,
+ error_message=error_message,
+ retry_exceptions=(OSError,),
+ args=(path,),
+ log_level=log_level,
+ )
+ else:
+ return self.retry(
+ os.remove,
+ error_level=error_level,
+ error_message=error_message,
+ retry_exceptions=(OSError,),
+ args=(path,),
+ log_level=log_level,
+ )
+ else:
+ self.debug("%s doesn't exist." % path)
+
+ def query_msys_path(self, path):
+ """replaces the Windows harddrive letter path style with a linux
+ path style, e.g. C:// --> /C/
+ Note: method, not used in any script.
+
+ Args:
+ path (str?): path to convert to the linux path style.
+ Returns:
+ str: in case `path` is a string. The result is the path with the new notation.
+ type(path): `path` itself is returned in case `path` is not str type.
+ """
+ if not isinstance(path, six.string_types):
+ return path
+ path = path.replace("\\", "/")
+
+ def repl(m):
+ return "/%s/" % m.group(1)
+
+ path = re.sub(r"""^([a-zA-Z]):/""", repl, path)
+ return path
+
+ def _rmtree_windows(self, path):
+ """Windows-specific rmtree that handles path lengths longer than MAX_PATH.
+ Ported from clobberer.py.
+
+ Args:
+ path (str): directory path to remove.
+
+ Returns:
+ None: if the path doesn't exists.
+ int: the return number of calling `self.run_command`
+ int: in case the path specified is not a directory but a file.
+ 0 on success, non-zero on error. Note: The returned value
+ is the result of calling `win32file.DeleteFile`
+ """
+
+ assert self._is_windows()
+ path = os.path.realpath(path)
+ full_path = "\\\\?\\" + path
+ if not os.path.exists(full_path):
+ return
+ if not PYWIN32:
+ if not os.path.isdir(path):
+ return self.run_command('del /F /Q "%s"' % path)
+ else:
+ return self.run_command('rmdir /S /Q "%s"' % path)
+ # Make sure directory is writable
+ win32file.SetFileAttributesW("\\\\?\\" + path, win32file.FILE_ATTRIBUTE_NORMAL)
+ # Since we call rmtree() with a file, sometimes
+ if not os.path.isdir("\\\\?\\" + path):
+ return win32file.DeleteFile("\\\\?\\" + path)
+
+ for ffrec in win32api.FindFiles("\\\\?\\" + path + "\\*.*"):
+ file_attr = ffrec[0]
+ name = ffrec[8]
+ if name == "." or name == "..":
+ continue
+ full_name = os.path.join(path, name)
+
+ if file_attr & win32file.FILE_ATTRIBUTE_DIRECTORY:
+ self._rmtree_windows(full_name)
+ else:
+ try:
+ win32file.SetFileAttributesW(
+ "\\\\?\\" + full_name, win32file.FILE_ATTRIBUTE_NORMAL
+ )
+ win32file.DeleteFile("\\\\?\\" + full_name)
+ except Exception:
+ # DeleteFile fails on long paths, del /f /q works just fine
+ self.run_command('del /F /Q "%s"' % full_name)
+
+ win32file.RemoveDirectory("\\\\?\\" + path)
+
+ def get_filename_from_url(self, url):
+ """parse a filename base on an url.
+
+ Args:
+ url (str): url to parse for the filename
+
+ Returns:
+ str: filename parsed from the url, or `netloc` network location part
+ of the url.
+ """
+
+ parsed = urlparse.urlsplit(url.rstrip("/"))
+ if parsed.path != "":
+ return parsed.path.rsplit("/", 1)[-1]
+ else:
+ return parsed.netloc
+
+ def _urlopen(self, url, **kwargs):
+ """open the url `url` using `urllib2`.`
+ This method can be overwritten to extend its complexity
+
+ Args:
+ url (str | urllib.request.Request): url to open
+ kwargs: Arbitrary keyword arguments passed to the `urllib.request.urlopen` function.
+
+ Returns:
+ file-like: file-like object with additional methods as defined in
+ `urllib.request.urlopen`_.
+ None: None may be returned if no handler handles the request.
+
+ Raises:
+ urllib2.URLError: on errors
+
+ .. urillib.request.urlopen:
+ https://docs.python.org/2/library/urllib2.html#urllib2.urlopen
+ """
+ # http://bugs.python.org/issue13359 - urllib2 does not automatically quote the URL
+ url_quoted = quote(url, safe="%/:=&?~#+!$,;'@()*[]|")
+ # windows certificates need to be refreshed (https://bugs.python.org/issue36011)
+ if self.platform_name() in ("win64",) and platform.architecture()[0] in (
+ "x64",
+ ):
+ if self.ssl_context is None:
+ self.ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
+ self.ssl_context.load_default_certs()
+ return urlopen(url_quoted, context=self.ssl_context, **kwargs)
+ else:
+ return urlopen(url_quoted, **kwargs)
+
+ def fetch_url_into_memory(self, url):
+ """Downloads a file from a url into memory instead of disk.
+
+ Args:
+ url (str): URL path where the file to be downloaded is located.
+
+ Raises:
+ IOError: When the url points to a file on disk and cannot be found
+ ContentLengthMismatch: When the length of the retrieved content does not match the
+ Content-Length response header.
+ ValueError: When the scheme of a url is not what is expected.
+
+ Returns:
+ BytesIO: contents of url
+ """
+ self.info("Fetch {} into memory".format(url))
+ parsed_url = urlparse.urlparse(url)
+
+ if parsed_url.scheme in ("", "file"):
+ path = parsed_url.path
+ if not os.path.isfile(path):
+ raise IOError("Could not find file to extract: {}".format(url))
+
+ content_length = os.stat(path).st_size
+
+ # In case we're referrencing a file without file://
+ if parsed_url.scheme == "":
+ url = "file://%s" % os.path.abspath(url)
+ parsed_url = urlparse.urlparse(url)
+
+ request = Request(url)
+ # When calling fetch_url_into_memory() you should retry when we raise
+ # one of these exceptions:
+ # * Bug 1300663 - HTTPError: HTTP Error 404: Not Found
+ # * Bug 1300413 - HTTPError: HTTP Error 500: Internal Server Error
+ # * Bug 1300943 - HTTPError: HTTP Error 503: Service Unavailable
+ # * Bug 1300953 - URLError: <urlopen error [Errno -2] Name or service not known>
+ # * Bug 1301594 - URLError: <urlopen error [Errno 10054] An existing connection was ...
+ # * Bug 1301597 - URLError: <urlopen error [Errno 8] _ssl.c:504: EOF occurred in ...
+ # * Bug 1301855 - URLError: <urlopen error [Errno 60] Operation timed out>
+ # * Bug 1302237 - URLError: <urlopen error [Errno 104] Connection reset by peer>
+ # * Bug 1301807 - BadStatusLine: ''
+ #
+ # Bug 1309912 - Adding timeout in hopes to solve blocking on response.read() (bug 1300413)
+ response = urlopen(request, timeout=30)
+
+ if parsed_url.scheme in ("http", "https"):
+ content_length = int(response.headers.get("Content-Length"))
+
+ response_body = response.read()
+ response_body_size = len(response_body)
+
+ self.info("Content-Length response header: {}".format(content_length))
+ self.info("Bytes received: {}".format(response_body_size))
+
+ if response_body_size != content_length:
+ raise ContentLengthMismatch(
+ "The retrieved Content-Length header declares a body length "
+ "of {} bytes, while we actually retrieved {} bytes".format(
+ content_length, response_body_size
+ )
+ )
+
+ if response.info().get("Content-Encoding") == "gzip":
+ self.info('Content-Encoding is "gzip", so decompressing response body')
+ # See http://www.zlib.net/manual.html#Advanced
+ # section "ZEXTERN int ZEXPORT inflateInit2 OF....":
+ # Add 32 to windowBits to enable zlib and gzip decoding with automatic
+ # header detection, or add 16 to decode only the gzip format (the zlib
+ # format will return a Z_DATA_ERROR).
+ # Adding 16 since we only wish to support gzip encoding.
+ file_contents = zlib.decompress(response_body, zlib.MAX_WBITS | 16)
+ else:
+ file_contents = response_body
+
+ # Use BytesIO instead of StringIO
+ # http://stackoverflow.com/questions/34162017/unzip-buffer-with-python/34162395#34162395
+ return BytesIO(file_contents)
+
+ def _download_file(self, url, file_name):
+ """Helper function for download_file()
+ Additionaly this function logs all exceptions as warnings before
+ re-raising them
+
+ Args:
+ url (str): string containing the URL with the file location
+ file_name (str): name of the file where the downloaded file
+ is written.
+
+ Returns:
+ str: filename of the written file on disk
+
+ Raises:
+ urllib2.URLError: on incomplete download.
+ urllib2.HTTPError: on Http error code
+ socket.timeout: on connection timeout
+ socket.error: on socket error
+ """
+ # If our URLs look like files, prefix them with file:// so they can
+ # be loaded like URLs.
+ if not (url.startswith("http") or url.startswith("file://")):
+ if not os.path.isfile(url):
+ self.fatal("The file %s does not exist" % url)
+ url = "file://%s" % os.path.abspath(url)
+
+ try:
+ f_length = None
+ f = self._urlopen(url, timeout=30)
+
+ if f.info().get("content-length") is not None:
+ f_length = int(f.info()["content-length"])
+ got_length = 0
+ if f.info().get("Content-Encoding") == "gzip":
+ # Note, we'll download the full compressed content into its own
+ # file, since that allows the gzip library to seek through it.
+ # Once downloaded, we'll decompress it into the real target
+ # file, and delete the compressed version.
+ local_file = open(file_name + ".gz", "wb")
+ else:
+ local_file = open(file_name, "wb")
+ while True:
+ block = f.read(1024 ** 2)
+ if not block:
+ if f_length is not None and got_length != f_length:
+ raise URLError(
+ "Download incomplete; content-length was %d, "
+ "but only received %d" % (f_length, got_length)
+ )
+ break
+ local_file.write(block)
+ if f_length is not None:
+ got_length += len(block)
+ local_file.close()
+ if f.info().get("Content-Encoding") == "gzip":
+ # Decompress file into target location, then remove compressed version
+ with open(file_name, "wb") as f_out:
+ # On some execution paths, this could be called with python 2.6
+ # whereby gzip.open(...) cannot be used with a 'with' statement.
+ # So let's do this the python 2.6 way...
+ try:
+ f_in = gzip.open(file_name + ".gz", "rb")
+ shutil.copyfileobj(f_in, f_out)
+ finally:
+ f_in.close()
+ os.remove(file_name + ".gz")
+ return file_name
+ except HTTPError as e:
+ self.warning(
+ "Server returned status %s %s for %s" % (str(e.code), str(e), url)
+ )
+ raise
+ except URLError as e:
+ self.warning("URL Error: %s" % url)
+
+ # Failures due to missing local files won't benefit from retry.
+ # Raise the original OSError.
+ if isinstance(e.args[0], OSError) and e.args[0].errno == errno.ENOENT:
+ raise e.args[0]
+
+ raise
+ except socket.timeout as e:
+ self.warning("Timed out accessing %s: %s" % (url, str(e)))
+ raise
+ except socket.error as e:
+ self.warning("Socket error when accessing %s: %s" % (url, str(e)))
+ raise
+
+ def _retry_download(self, url, error_level, file_name=None, retry_config=None):
+ """Helper method to retry download methods.
+
+ This method calls `self.retry` on `self._download_file` using the passed
+ parameters if a file_name is specified. If no file is specified, we will
+ instead call `self._urlopen`, which grabs the contents of a url but does
+ not create a file on disk.
+
+ Args:
+ url (str): URL path where the file is located.
+ file_name (str): file_name where the file will be written to.
+ error_level (str): log level to use in case an error occurs.
+ retry_config (dict, optional): key-value pairs to be passed to
+ `self.retry`. Defaults to `None`
+
+ Returns:
+ str: `self._download_file` return value is returned
+ unknown: `self.retry` `failure_status` is returned on failure, which
+ defaults to -1
+ """
+ retry_args = dict(
+ failure_status=None,
+ retry_exceptions=(
+ HTTPError,
+ URLError,
+ httplib.HTTPException,
+ socket.timeout,
+ socket.error,
+ ),
+ error_message="Can't download from %s to %s!" % (url, file_name),
+ error_level=error_level,
+ )
+
+ if retry_config:
+ retry_args.update(retry_config)
+
+ download_func = self._urlopen
+ kwargs = {"url": url}
+ if file_name:
+ download_func = self._download_file
+ kwargs = {"url": url, "file_name": file_name}
+
+ return self.retry(download_func, kwargs=kwargs, **retry_args)
+
+ def _filter_entries(self, namelist, extract_dirs):
+ """Filter entries of the archive based on the specified list of to extract dirs."""
+ filter_partial = functools.partial(fnmatch.filter, namelist)
+ entries = itertools.chain(*map(filter_partial, extract_dirs or ["*"]))
+
+ for entry in entries:
+ yield entry
+
+ def unzip(self, compressed_file, extract_to, extract_dirs="*", verbose=False):
+ """This method allows to extract a zip file without writing to disk first.
+
+ Args:
+ compressed_file (object): File-like object with the contents of a compressed zip file.
+ extract_to (str): where to extract the compressed file.
+ extract_dirs (list, optional): directories inside the archive file to extract.
+ Defaults to '*'.
+ verbose (bool, optional): whether or not extracted content should be displayed.
+ Defaults to False.
+
+ Raises:
+ zipfile.BadZipfile: on contents of zipfile being invalid
+ """
+ with zipfile.ZipFile(compressed_file) as bundle:
+ entries = self._filter_entries(bundle.namelist(), extract_dirs)
+
+ for entry in entries:
+ if verbose:
+ self.info(" {}".format(entry))
+
+ # Exception to be retried:
+ # Bug 1301645 - BadZipfile: Bad CRC-32 for file ...
+ # http://stackoverflow.com/questions/5624669/strange-badzipfile-bad-crc-32-problem/5626098#5626098
+ # Bug 1301802 - error: Error -3 while decompressing: invalid stored block lengths
+ bundle.extract(entry, path=extract_to)
+
+ # ZipFile doesn't preserve permissions during extraction:
+ # http://bugs.python.org/issue15795
+ fname = os.path.realpath(os.path.join(extract_to, entry))
+ try:
+ # getinfo() can raise KeyError
+ mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
+ # Only set permissions if attributes are available. Otherwise all
+ # permissions will be removed eg. on Windows.
+ if mode:
+ os.chmod(fname, mode)
+
+ except KeyError:
+ self.warning("{} was not found in the zip file".format(entry))
+
+ def deflate(self, compressed_file, mode, extract_to=".", *args, **kwargs):
+ """This method allows to extract a compressed file from a tar, tar.bz2 and tar.gz files.
+
+ Args:
+ compressed_file (object): File-like object with the contents of a compressed file.
+ mode (str): string of the form 'filemode[:compression]' (e.g. 'r:gz' or 'r:bz2')
+ extract_to (str, optional): where to extract the compressed file.
+ """
+ with tarfile.open(fileobj=compressed_file, mode=mode) as t:
+ _safe_extract(t, path=extract_to)
+
+ def download_unpack(self, url, extract_to=".", extract_dirs="*", verbose=False):
+ """Generic method to download and extract a compressed file without writing it
+ to disk first.
+
+ Args:
+ url (str): URL where the file to be downloaded is located.
+ extract_to (str, optional): directory where the downloaded file will
+ be extracted to.
+ extract_dirs (list, optional): directories inside the archive to extract.
+ Defaults to `*`. It currently only applies to zip files.
+ verbose (bool, optional): whether or not extracted content should be displayed.
+ Defaults to False.
+
+ """
+
+ def _determine_extraction_method_and_kwargs(url):
+ EXTENSION_TO_MIMETYPE = {
+ "bz2": "application/x-bzip2",
+ "gz": "application/x-gzip",
+ "tar": "application/x-tar",
+ "zip": "application/zip",
+ }
+ MIMETYPES = {
+ "application/x-bzip2": {
+ "function": self.deflate,
+ "kwargs": {"mode": "r:bz2"},
+ },
+ "application/x-gzip": {
+ "function": self.deflate,
+ "kwargs": {"mode": "r:gz"},
+ },
+ "application/x-tar": {
+ "function": self.deflate,
+ "kwargs": {"mode": "r"},
+ },
+ "application/zip": {
+ "function": self.unzip,
+ },
+ "application/x-zip-compressed": {
+ "function": self.unzip,
+ },
+ }
+
+ filename = url.split("/")[-1]
+ # XXX: bz2/gz instead of tar.{bz2/gz}
+ extension = filename[filename.rfind(".") + 1 :]
+ mimetype = EXTENSION_TO_MIMETYPE[extension]
+ self.debug("Mimetype: {}".format(mimetype))
+
+ function = MIMETYPES[mimetype]["function"]
+ kwargs = {
+ "compressed_file": compressed_file,
+ "extract_to": extract_to,
+ "extract_dirs": extract_dirs,
+ "verbose": verbose,
+ }
+ kwargs.update(MIMETYPES[mimetype].get("kwargs", {}))
+
+ return function, kwargs
+
+ # Many scripts overwrite this method and set extract_dirs to None
+ extract_dirs = "*" if extract_dirs is None else extract_dirs
+ self.info(
+ "Downloading and extracting to {} these dirs {} from {}".format(
+ extract_to,
+ ", ".join(extract_dirs),
+ url,
+ )
+ )
+
+ # 1) Let's fetch the file
+ retry_args = dict(
+ retry_exceptions=(
+ HTTPError,
+ URLError,
+ httplib.HTTPException,
+ socket.timeout,
+ socket.error,
+ ContentLengthMismatch,
+ ),
+ sleeptime=30,
+ attempts=5,
+ error_message="Can't download from {}".format(url),
+ error_level=FATAL,
+ )
+ compressed_file = self.retry(
+ self.fetch_url_into_memory, kwargs={"url": url}, **retry_args
+ )
+
+ # 2) We're guaranteed to have download the file with error_level=FATAL
+ # Let's unpack the file
+ function, kwargs = _determine_extraction_method_and_kwargs(url)
+ try:
+ function(**kwargs)
+ except zipfile.BadZipfile:
+ # Dump the exception and exit
+ self.exception(level=FATAL)
+
+ def load_json_url(self, url, error_level=None, *args, **kwargs):
+ """Returns a json object from a url (it retries)."""
+ contents = self._retry_download(
+ url=url, error_level=error_level, *args, **kwargs
+ )
+ return json.loads(contents.read())
+
+ # http://www.techniqal.com/blog/2008/07/31/python-file-read-write-with-urllib2/
+ # TODO thinking about creating a transfer object.
+ def download_file(
+ self,
+ url,
+ file_name=None,
+ parent_dir=None,
+ create_parent_dir=True,
+ error_level=ERROR,
+ exit_code=3,
+ retry_config=None,
+ ):
+ """Python wget.
+ Download the filename at `url` into `file_name` and put it on `parent_dir`.
+ On error log with the specified `error_level`, on fatal exit with `exit_code`.
+ Execute all the above based on `retry_config` parameter.
+
+ Args:
+ url (str): URL path where the file to be downloaded is located.
+ file_name (str, optional): file_name where the file will be written to.
+ Defaults to urls' filename.
+ parent_dir (str, optional): directory where the downloaded file will
+ be written to. Defaults to current working
+ directory
+ create_parent_dir (bool, optional): create the parent directory if it
+ doesn't exist. Defaults to `True`
+ error_level (str, optional): log level to use in case an error occurs.
+ Defaults to `ERROR`
+ retry_config (dict, optional): key-value pairs to be passed to
+ `self.retry`. Defaults to `None`
+
+ Returns:
+ str: filename where the downloaded file was written to.
+ unknown: on failure, `failure_status` is returned.
+ """
+ if not file_name:
+ try:
+ file_name = self.get_filename_from_url(url)
+ except AttributeError:
+ self.log(
+ "Unable to get filename from %s; bad url?" % url,
+ level=error_level,
+ exit_code=exit_code,
+ )
+ return
+ if parent_dir:
+ file_name = os.path.join(parent_dir, file_name)
+ if create_parent_dir:
+ self.mkdir_p(parent_dir, error_level=error_level)
+ self.info("Downloading %s to %s" % (url, file_name))
+ status = self._retry_download(
+ url=url,
+ error_level=error_level,
+ file_name=file_name,
+ retry_config=retry_config,
+ )
+ if status == file_name:
+ self.info("Downloaded %d bytes." % os.path.getsize(file_name))
+ return status
+
+ def move(self, src, dest, log_level=INFO, error_level=ERROR, exit_code=-1):
+ """recursively move a file or directory (src) to another location (dest).
+
+ Args:
+ src (str): file or directory path to move.
+ dest (str): file or directory path where to move the content to.
+ log_level (str): log level to use for normal operation. Defaults to
+ `INFO`
+ error_level (str): log level to use on error. Defaults to `ERROR`
+
+ Returns:
+ int: 0 on success. -1 on error.
+ """
+ self.log("Moving %s to %s" % (src, dest), level=log_level)
+ try:
+ shutil.move(src, dest)
+ # http://docs.python.org/tutorial/errors.html
+ except IOError as e:
+ self.log("IO error: %s" % str(e), level=error_level, exit_code=exit_code)
+ return -1
+ except shutil.Error as e:
+ # ERROR level ends up reporting the failure to treeherder &
+ # pollutes the failure summary list.
+ self.log("shutil error: %s" % str(e), level=WARNING, exit_code=exit_code)
+ return -1
+ return 0
+
+ def chmod(self, path, mode):
+ """change `path` mode to `mode`.
+
+ Args:
+ path (str): path whose mode will be modified.
+ mode (hex): one of the values defined at `stat`_
+
+ .. _stat:
+ https://docs.python.org/2/library/os.html#os.chmod
+ """
+
+ self.info("Chmoding %s to %s" % (path, str(oct(mode))))
+ os.chmod(path, mode)
+
+ def copyfile(
+ self,
+ src,
+ dest,
+ log_level=INFO,
+ error_level=ERROR,
+ copystat=False,
+ compress=False,
+ ):
+ """copy or compress `src` into `dest`.
+
+ Args:
+ src (str): filepath to copy.
+ dest (str): filepath where to move the content to.
+ log_level (str, optional): log level to use for normal operation. Defaults to
+ `INFO`
+ error_level (str, optional): log level to use on error. Defaults to `ERROR`
+ copystat (bool, optional): whether or not to copy the files metadata.
+ Defaults to `False`.
+ compress (bool, optional): whether or not to compress the destination file.
+ Defaults to `False`.
+
+ Returns:
+ int: -1 on error
+ None: on success
+ """
+
+ if compress:
+ self.log("Compressing %s to %s" % (src, dest), level=log_level)
+ try:
+ infile = open(src, "rb")
+ outfile = gzip.open(dest, "wb")
+ outfile.writelines(infile)
+ outfile.close()
+ infile.close()
+ except IOError as e:
+ self.log(
+ "Can't compress %s to %s: %s!" % (src, dest, str(e)),
+ level=error_level,
+ )
+ return -1
+ else:
+ self.log("Copying %s to %s" % (src, dest), level=log_level)
+ try:
+ shutil.copyfile(src, dest)
+ except (IOError, shutil.Error) as e:
+ self.log(
+ "Can't copy %s to %s: %s!" % (src, dest, str(e)), level=error_level
+ )
+ return -1
+
+ if copystat:
+ try:
+ shutil.copystat(src, dest)
+ except (IOError, shutil.Error) as e:
+ self.log(
+ "Can't copy attributes of %s to %s: %s!" % (src, dest, str(e)),
+ level=error_level,
+ )
+ return -1
+
+ def copytree(
+ self, src, dest, overwrite="no_overwrite", log_level=INFO, error_level=ERROR
+ ):
+ """An implementation of `shutil.copytree` that allows for `dest` to exist
+ and implements different overwrite levels:
+ - 'no_overwrite' will keep all(any) existing files in destination tree
+ - 'overwrite_if_exists' will only overwrite destination paths that have
+ the same path names relative to the root of the
+ src and destination tree
+ - 'clobber' will replace the whole destination tree(clobber) if it exists
+
+ Args:
+ src (str): directory path to move.
+ dest (str): directory path where to move the content to.
+ overwrite (str): string specifying the overwrite level.
+ log_level (str, optional): log level to use for normal operation. Defaults to
+ `INFO`
+ error_level (str, optional): log level to use on error. Defaults to `ERROR`
+
+ Returns:
+ int: -1 on error
+ None: on success
+ """
+
+ self.info("copying tree: %s to %s" % (src, dest))
+ try:
+ if overwrite == "clobber" or not os.path.exists(dest):
+ self.rmtree(dest)
+ shutil.copytree(src, dest)
+ elif overwrite == "no_overwrite" or overwrite == "overwrite_if_exists":
+ files = os.listdir(src)
+ for f in files:
+ abs_src_f = os.path.join(src, f)
+ abs_dest_f = os.path.join(dest, f)
+ if not os.path.exists(abs_dest_f):
+ if os.path.isdir(abs_src_f):
+ self.mkdir_p(abs_dest_f)
+ self.copytree(abs_src_f, abs_dest_f, overwrite="clobber")
+ else:
+ shutil.copy2(abs_src_f, abs_dest_f)
+ elif overwrite == "no_overwrite": # destination path exists
+ if os.path.isdir(abs_src_f) and os.path.isdir(abs_dest_f):
+ self.copytree(
+ abs_src_f, abs_dest_f, overwrite="no_overwrite"
+ )
+ else:
+ self.debug(
+ "ignoring path: %s as destination: \
+ %s exists"
+ % (abs_src_f, abs_dest_f)
+ )
+ else: # overwrite == 'overwrite_if_exists' and destination exists
+ self.debug("overwriting: %s with: %s" % (abs_dest_f, abs_src_f))
+ self.rmtree(abs_dest_f)
+
+ if os.path.isdir(abs_src_f):
+ self.mkdir_p(abs_dest_f)
+ self.copytree(
+ abs_src_f, abs_dest_f, overwrite="overwrite_if_exists"
+ )
+ else:
+ shutil.copy2(abs_src_f, abs_dest_f)
+ else:
+ self.fatal(
+ "%s is not a valid argument for param overwrite" % (overwrite)
+ )
+ except (IOError, shutil.Error):
+ self.exception(
+ "There was an error while copying %s to %s!" % (src, dest),
+ level=error_level,
+ )
+ return -1
+
+ def write_to_file(
+ self,
+ file_path,
+ contents,
+ verbose=True,
+ open_mode="w",
+ create_parent_dir=False,
+ error_level=ERROR,
+ ):
+ """Write `contents` to `file_path`, according to `open_mode`.
+
+ Args:
+ file_path (str): filepath where the content will be written to.
+ contents (str): content to write to the filepath.
+ verbose (bool, optional): whether or not to log `contents` value.
+ Defaults to `True`
+ open_mode (str, optional): open mode to use for openning the file.
+ Defaults to `w`
+ create_parent_dir (bool, optional): whether or not to create the
+ parent directory of `file_path`
+ error_level (str, optional): log level to use on error. Defaults to `ERROR`
+
+ Returns:
+ str: `file_path` on success
+ None: on error.
+ """
+ self.info("Writing to file %s" % file_path)
+ if verbose:
+ self.info("Contents:")
+ for line in contents.splitlines():
+ self.info(" %s" % line)
+ if create_parent_dir:
+ parent_dir = os.path.dirname(file_path)
+ self.mkdir_p(parent_dir, error_level=error_level)
+ try:
+ fh = open(file_path, open_mode)
+ try:
+ fh.write(contents)
+ except UnicodeEncodeError:
+ fh.write(contents.encode("utf-8", "replace"))
+ fh.close()
+ return file_path
+ except IOError:
+ self.log("%s can't be opened for writing!" % file_path, level=error_level)
+
+ @contextmanager
+ def opened(self, file_path, verbose=True, open_mode="r", error_level=ERROR):
+ """Create a context manager to use on a with statement.
+
+ Args:
+ file_path (str): filepath of the file to open.
+ verbose (bool, optional): useless parameter, not used here.
+ Defaults to True.
+ open_mode (str, optional): open mode to use for openning the file.
+ Defaults to `r`
+ error_level (str, optional): log level name to use on error.
+ Defaults to `ERROR`
+
+ Yields:
+ tuple: (file object, error) pair. In case of error `None` is yielded
+ as file object, together with the corresponding error.
+ If there is no error, `None` is returned as the error.
+ """
+ # See opened_w_error in http://www.python.org/dev/peps/pep-0343/
+ self.info("Reading from file %s" % file_path)
+ try:
+ fh = open(file_path, open_mode)
+ except IOError as err:
+ self.log(
+ "unable to open %s: %s" % (file_path, err.strerror), level=error_level
+ )
+ yield None, err
+ else:
+ try:
+ yield fh, None
+ finally:
+ fh.close()
+
+ def read_from_file(self, file_path, verbose=True, open_mode="r", error_level=ERROR):
+ """Use `self.opened` context manager to open a file and read its
+ content.
+
+ Args:
+ file_path (str): filepath of the file to read.
+ verbose (bool, optional): whether or not to log the file content.
+ Defaults to True.
+ open_mode (str, optional): open mode to use for openning the file.
+ Defaults to `r`
+ error_level (str, optional): log level name to use on error.
+ Defaults to `ERROR`
+
+ Returns:
+ None: on error.
+ str: file content on success.
+ """
+ with self.opened(file_path, verbose, open_mode, error_level) as (fh, err):
+ if err:
+ return None
+ contents = fh.read()
+ if verbose:
+ self.info("Contents:")
+ for line in contents.splitlines():
+ self.info(" %s" % line)
+ return contents
+
+ def chdir(self, dir_name):
+ self.log("Changing directory to %s." % dir_name)
+ os.chdir(dir_name)
+
+ def is_exe(self, fpath):
+ """
+ Determine if fpath is a file and if it is executable.
+ """
+ return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
+
+ def which(self, program):
+ """OS independent implementation of Unix's which command
+
+ Args:
+ program (str): name or path to the program whose executable is
+ being searched.
+
+ Returns:
+ None: if the executable was not found.
+ str: filepath of the executable file.
+ """
+ if self._is_windows() and not program.endswith(".exe"):
+ program += ".exe"
+ fpath, fname = os.path.split(program)
+ if fpath:
+ if self.is_exe(program):
+ return program
+ else:
+ # If the exe file is defined in the configs let's use that
+ exe = self.query_exe(program)
+ if self.is_exe(exe):
+ return exe
+
+ # If not defined, let's look for it in the $PATH
+ env = self.query_env()
+ for path in env["PATH"].split(os.pathsep):
+ exe_file = os.path.join(path, program)
+ if self.is_exe(exe_file):
+ return exe_file
+ return None
+
+ # More complex commands {{{2
+ def retry(
+ self,
+ action,
+ attempts=None,
+ sleeptime=60,
+ max_sleeptime=5 * 60,
+ retry_exceptions=(Exception,),
+ good_statuses=None,
+ cleanup=None,
+ error_level=ERROR,
+ error_message="%(action)s failed after %(attempts)d tries!",
+ failure_status=-1,
+ log_level=INFO,
+ args=(),
+ kwargs={},
+ ):
+ """generic retry command. Ported from `util.retry`_
+
+ Args:
+ action (func): callable object to retry.
+ attempts (int, optinal): maximum number of times to call actions.
+ Defaults to `self.config.get('global_retries', 5)`
+ sleeptime (int, optional): number of seconds to wait between
+ attempts. Defaults to 60 and doubles each retry attempt, to
+ a maximum of `max_sleeptime'
+ max_sleeptime (int, optional): maximum value of sleeptime. Defaults
+ to 5 minutes
+ retry_exceptions (tuple, optional): Exceptions that should be caught.
+ If exceptions other than those listed in `retry_exceptions' are
+ raised from `action', they will be raised immediately. Defaults
+ to (Exception)
+ good_statuses (object, optional): return values which, if specified,
+ will result in retrying if the return value isn't listed.
+ Defaults to `None`.
+ cleanup (func, optional): If `cleanup' is provided and callable
+ it will be called immediately after an Exception is caught.
+ No arguments will be passed to it. If your cleanup function
+ requires arguments it is recommended that you wrap it in an
+ argumentless function.
+ Defaults to `None`.
+ error_level (str, optional): log level name in case of error.
+ Defaults to `ERROR`.
+ error_message (str, optional): string format to use in case
+ none of the attempts success. Defaults to
+ '%(action)s failed after %(attempts)d tries!'
+ failure_status (int, optional): flag to return in case the retries
+ were not successfull. Defaults to -1.
+ log_level (str, optional): log level name to use for normal activity.
+ Defaults to `INFO`.
+ args (tuple, optional): positional arguments to pass onto `action`.
+ kwargs (dict, optional): key-value arguments to pass onto `action`.
+
+ Returns:
+ object: return value of `action`.
+ int: failure status in case of failure retries.
+ """
+ if not callable(action):
+ self.fatal("retry() called with an uncallable method %s!" % action)
+ if cleanup and not callable(cleanup):
+ self.fatal("retry() called with an uncallable cleanup method %s!" % cleanup)
+ if not attempts:
+ attempts = self.config.get("global_retries", 5)
+ if max_sleeptime < sleeptime:
+ self.debug(
+ "max_sleeptime %d less than sleeptime %d" % (max_sleeptime, sleeptime)
+ )
+ n = 0
+ while n <= attempts:
+ retry = False
+ n += 1
+ try:
+ self.log(
+ "retry: Calling %s with args: %s, kwargs: %s, attempt #%d"
+ % (action.__name__, str(args), str(kwargs), n),
+ level=log_level,
+ )
+ status = action(*args, **kwargs)
+ if good_statuses and status not in good_statuses:
+ retry = True
+ except retry_exceptions as e:
+ retry = True
+ error_message = "%s\nCaught exception: %s" % (error_message, str(e))
+ self.log(
+ "retry: attempt #%d caught %s exception: %s"
+ % (n, type(e).__name__, str(e)),
+ level=INFO,
+ )
+
+ if not retry:
+ return status
+ else:
+ if cleanup:
+ cleanup()
+ if n == attempts:
+ self.log(
+ error_message % {"action": action, "attempts": n},
+ level=error_level,
+ )
+ return failure_status
+ if sleeptime > 0:
+ self.log(
+ "retry: Failed, sleeping %d seconds before retrying"
+ % sleeptime,
+ level=log_level,
+ )
+ time.sleep(sleeptime)
+ sleeptime = sleeptime * 2
+ if sleeptime > max_sleeptime:
+ sleeptime = max_sleeptime
+
+ def query_env(
+ self,
+ partial_env=None,
+ replace_dict=None,
+ purge_env=(),
+ set_self_env=None,
+ log_level=DEBUG,
+ avoid_host_env=False,
+ ):
+ """Environment query/generation method.
+ The default, self.query_env(), will look for self.config['env']
+ and replace any special strings in there ( %(PATH)s ).
+ It will then store it as self.env for speeding things up later.
+
+ If you specify partial_env, partial_env will be used instead of
+ self.config['env'], and we don't save self.env as it's a one-off.
+
+
+ Args:
+ partial_env (dict, optional): key-value pairs of the name and value
+ of different environment variables. Defaults to an empty dictionary.
+ replace_dict (dict, optional): key-value pairs to replace the old
+ environment variables.
+ purge_env (list): environment names to delete from the final
+ environment dictionary.
+ set_self_env (boolean, optional): whether or not the environment
+ variables dictionary should be copied to `self`.
+ Defaults to True.
+ log_level (str, optional): log level name to use on normal operation.
+ Defaults to `DEBUG`.
+ avoid_host_env (boolean, optional): if set to True, we will not use
+ any environment variables set on the host except PATH.
+ Defaults to False.
+
+ Returns:
+ dict: environment variables names with their values.
+ """
+ if partial_env is None:
+ if self.env is not None:
+ return self.env
+ partial_env = self.config.get("env", None)
+ if partial_env is None:
+ partial_env = {}
+ if set_self_env is None:
+ set_self_env = True
+
+ env = {"PATH": os.environ["PATH"]} if avoid_host_env else os.environ.copy()
+
+ default_replace_dict = self.query_abs_dirs()
+ default_replace_dict["PATH"] = os.environ["PATH"]
+ if not replace_dict:
+ replace_dict = default_replace_dict
+ else:
+ for key in default_replace_dict:
+ if key not in replace_dict:
+ replace_dict[key] = default_replace_dict[key]
+ for key in partial_env.keys():
+ env[key] = partial_env[key] % replace_dict
+ self.log("ENV: %s is now %s" % (key, env[key]), level=log_level)
+ for k in purge_env:
+ if k in env:
+ del env[k]
+ if os.name == "nt":
+ pref_encoding = locale.getpreferredencoding()
+ for k, v in six.iteritems(env):
+ # When run locally on Windows machines, some environment
+ # variables may be unicode.
+ env[k] = six.ensure_str(v, pref_encoding)
+ if set_self_env:
+ self.env = env
+ return env
+
+ def query_exe(
+ self,
+ exe_name,
+ exe_dict="exes",
+ default=None,
+ return_type=None,
+ error_level=FATAL,
+ ):
+ """One way to work around PATH rewrites.
+
+ By default, return exe_name, and we'll fall through to searching
+ os.environ["PATH"].
+ However, if self.config[exe_dict][exe_name] exists, return that.
+ This lets us override exe paths via config file.
+
+ If we need runtime setting, we can build in self.exes support later.
+
+ Args:
+ exe_name (str): name of the executable to search for.
+ exe_dict(str, optional): name of the dictionary of executables
+ present in `self.config`. Defaults to `exes`.
+ default (str, optional): default name of the executable to search
+ for. Defaults to `exe_name`.
+ return_type (str, optional): type to which the original return
+ value will be turn into. Only 'list', 'string' and `None` are
+ supported. Defaults to `None`.
+ error_level (str, optional): log level name to use on error.
+
+ Returns:
+ list: in case return_type is 'list'
+ str: in case return_type is 'string'
+ None: in case return_type is `None`
+ Any: if the found executable is not of type list, tuple nor str.
+ """
+ if default is None:
+ default = exe_name
+ exe = self.config.get(exe_dict, {}).get(exe_name, default)
+ repl_dict = {}
+ if hasattr(self.script_obj, "query_abs_dirs"):
+ # allow for 'make': '%(abs_work_dir)s/...' etc.
+ dirs = self.script_obj.query_abs_dirs()
+ repl_dict.update(dirs)
+ if isinstance(exe, dict):
+ found = False
+ # allow for searchable paths of the exe
+ for name, path in six.iteritems(exe):
+ if isinstance(path, list) or isinstance(path, tuple):
+ path = [x % repl_dict for x in path]
+ if all([os.path.exists(section) for section in path]):
+ found = True
+ elif isinstance(path, str):
+ path = path % repl_dict
+ if os.path.exists(path):
+ found = True
+ else:
+ self.log(
+ "a exes %s dict's value is not a string, list, or tuple. Got key "
+ "%s and value %s" % (exe_name, name, str(path)),
+ level=error_level,
+ )
+ if found:
+ exe = path
+ break
+ else:
+ self.log(
+ "query_exe was a searchable dict but an existing "
+ "path could not be determined. Tried searching in "
+ "paths: %s" % (str(exe)),
+ level=error_level,
+ )
+ return None
+ elif isinstance(exe, list) or isinstance(exe, tuple):
+ exe = [x % repl_dict for x in exe]
+ elif isinstance(exe, str):
+ exe = exe % repl_dict
+ else:
+ self.log(
+ "query_exe: %s is not a list, tuple, dict, or string: "
+ "%s!" % (exe_name, str(exe)),
+ level=error_level,
+ )
+ return exe
+ if return_type == "list":
+ if isinstance(exe, str):
+ exe = [exe]
+ elif return_type == "string":
+ if isinstance(exe, list):
+ exe = subprocess.list2cmdline(exe)
+ elif return_type is not None:
+ self.log(
+ "Unknown return_type type %s requested in query_exe!" % return_type,
+ level=error_level,
+ )
+ return exe
+
+ def run_command(
+ self,
+ command,
+ cwd=None,
+ error_list=None,
+ halt_on_failure=False,
+ success_codes=None,
+ env=None,
+ partial_env=None,
+ return_type="status",
+ throw_exception=False,
+ output_parser=None,
+ output_timeout=None,
+ fatal_exit_code=2,
+ error_level=ERROR,
+ **kwargs
+ ):
+ """Run a command, with logging and error parsing.
+ TODO: context_lines
+
+ error_list example:
+ [{'regex': re.compile('^Error: LOL J/K'), level=IGNORE},
+ {'regex': re.compile('^Error:'), level=ERROR, contextLines='5:5'},
+ {'substr': 'THE WORLD IS ENDING', level=FATAL, contextLines='20:'}
+ ]
+ (context_lines isn't written yet)
+
+ Args:
+ command (str | list | tuple): command or sequence of commands to
+ execute and log.
+ cwd (str, optional): directory path from where to execute the
+ command. Defaults to `None`.
+ error_list (list, optional): list of errors to pass to
+ `mozharness.base.log.OutputParser`. Defaults to `None`.
+ halt_on_failure (bool, optional): whether or not to redefine the
+ log level as `FATAL` on errors. Defaults to False.
+ success_codes (int, optional): numeric value to compare against
+ the command return value.
+ env (dict, optional): key-value of environment values to use to
+ run the command. Defaults to None.
+ partial_env (dict, optional): key-value of environment values to
+ replace from the current environment values. Defaults to None.
+ return_type (str, optional): if equal to 'num_errors' then the
+ amount of errors matched by `error_list` is returned. Defaults
+ to 'status'.
+ throw_exception (bool, optional): whether or not to raise an
+ exception if the return value of the command doesn't match
+ any of the `success_codes`. Defaults to False.
+ output_parser (OutputParser, optional): lets you provide an
+ instance of your own OutputParser subclass. Defaults to `OutputParser`.
+ output_timeout (int): amount of seconds to wait for output before
+ the process is killed.
+ fatal_exit_code (int, optional): call `self.fatal` if the return value
+ of the command is not in `success_codes`. Defaults to 2.
+ error_level (str, optional): log level name to use on error. Defaults
+ to `ERROR`.
+ **kwargs: Arbitrary keyword arguments.
+
+ Returns:
+ int: -1 on error.
+ Any: `command` return value is returned otherwise.
+ """
+ if success_codes is None:
+ success_codes = [0]
+ if cwd is not None:
+ if not os.path.isdir(cwd):
+ level = error_level
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Can't run command %s in non-existent directory '%s'!"
+ % (command, cwd),
+ level=level,
+ )
+ return -1
+ self.info("Running command: %s in %s" % (command, cwd))
+ else:
+ self.info("Running command: %s" % (command,))
+ if isinstance(command, list) or isinstance(command, tuple):
+ self.info("Copy/paste: %s" % subprocess.list2cmdline(command))
+ shell = True
+ if isinstance(command, list) or isinstance(command, tuple):
+ shell = False
+ if env is None:
+ if partial_env:
+ self.info("Using partial env: %s" % pprint.pformat(partial_env))
+ env = self.query_env(partial_env=partial_env)
+ else:
+ if hasattr(self, "previous_env") and env == self.previous_env:
+ self.info("Using env: (same as previous command)")
+ else:
+ self.info("Using env: %s" % pprint.pformat(env))
+ self.previous_env = env
+
+ if output_parser is None:
+ parser = OutputParser(
+ config=self.config, log_obj=self.log_obj, error_list=error_list
+ )
+ else:
+ parser = output_parser
+
+ try:
+ if output_timeout:
+
+ def processOutput(line):
+ parser.add_lines(line)
+
+ def onTimeout():
+ self.info(
+ "Automation Error: mozprocess timed out after "
+ "%s seconds running %s" % (str(output_timeout), str(command))
+ )
+
+ p = ProcessHandler(
+ command,
+ shell=shell,
+ env=env,
+ cwd=cwd,
+ storeOutput=False,
+ onTimeout=(onTimeout,),
+ processOutputLine=[processOutput],
+ )
+ self.info(
+ "Calling %s with output_timeout %d" % (command, output_timeout)
+ )
+ p.run(outputTimeout=output_timeout)
+ p.wait()
+ if p.timedOut:
+ self.log(
+ "timed out after %s seconds of no output" % output_timeout,
+ level=error_level,
+ )
+ returncode = int(p.proc.returncode)
+ else:
+ p = subprocess.Popen(
+ command,
+ shell=shell,
+ stdout=subprocess.PIPE,
+ cwd=cwd,
+ stderr=subprocess.STDOUT,
+ env=env,
+ bufsize=0,
+ )
+ loop = True
+ while loop:
+ if p.poll() is not None:
+ """Avoid losing the final lines of the log?"""
+ loop = False
+ while True:
+ line = p.stdout.readline()
+ if not line:
+ break
+ parser.add_lines(line)
+ returncode = p.returncode
+ except KeyboardInterrupt:
+ level = error_level
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Process interrupted by the user, killing process with pid %s" % p.pid,
+ level=level,
+ )
+ p.kill()
+ return -1
+ except OSError as e:
+ level = error_level
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "caught OS error %s: %s while running %s"
+ % (e.errno, e.strerror, command),
+ level=level,
+ )
+ return -1
+
+ if returncode not in success_codes:
+ if throw_exception:
+ raise subprocess.CalledProcessError(returncode, command)
+ # Force level to be INFO as message is not necessary in Treeherder
+ self.log("Return code: %d" % returncode, level=INFO)
+
+ if halt_on_failure:
+ _fail = False
+ if returncode not in success_codes:
+ self.log(
+ "%s not in success codes: %s" % (returncode, success_codes),
+ level=error_level,
+ )
+ _fail = True
+ if parser.num_errors:
+ self.log("failures found while parsing output", level=error_level)
+ _fail = True
+ if _fail:
+ self.return_code = fatal_exit_code
+ self.fatal(
+ "Halting on failure while running %s" % (command,),
+ exit_code=fatal_exit_code,
+ )
+ if return_type == "num_errors":
+ return parser.num_errors
+ return returncode
+
+ def get_output_from_command(
+ self,
+ command,
+ cwd=None,
+ halt_on_failure=False,
+ env=None,
+ silent=False,
+ log_level=INFO,
+ tmpfile_base_path="tmpfile",
+ return_type="output",
+ save_tmpfiles=False,
+ throw_exception=False,
+ fatal_exit_code=2,
+ ignore_errors=False,
+ success_codes=None,
+ output_filter=None,
+ ):
+ """Similar to run_command, but where run_command is an
+ os.system(command) analog, get_output_from_command is a `command`
+ analog.
+
+ Less error checking by design, though if we figure out how to
+ do it without borking the output, great.
+
+ TODO: binary mode? silent is kinda like that.
+ TODO: since p.wait() can take a long time, optionally log something
+ every N seconds?
+ TODO: optionally only keep the first or last (N) line(s) of output?
+ TODO: optionally only return the tmp_stdout_filename?
+
+ ignore_errors=True is for the case where a command might produce standard
+ error output, but you don't particularly care; setting to True will
+ cause standard error to be logged at DEBUG rather than ERROR
+
+ Args:
+ command (str | list): command or list of commands to
+ execute and log.
+ cwd (str, optional): directory path from where to execute the
+ command. Defaults to `None`.
+ halt_on_failure (bool, optional): whether or not to redefine the
+ log level as `FATAL` on error. Defaults to False.
+ env (dict, optional): key-value of environment values to use to
+ run the command. Defaults to None.
+ silent (bool, optional): whether or not to output the stdout of
+ executing the command. Defaults to False.
+ log_level (str, optional): log level name to use on normal execution.
+ Defaults to `INFO`.
+ tmpfile_base_path (str, optional): base path of the file to which
+ the output will be writen to. Defaults to 'tmpfile'.
+ return_type (str, optional): if equal to 'output' then the complete
+ output of the executed command is returned, otherwise the written
+ filenames are returned. Defaults to 'output'.
+ save_tmpfiles (bool, optional): whether or not to save the temporary
+ files created from the command output. Defaults to False.
+ throw_exception (bool, optional): whether or not to raise an
+ exception if the return value of the command is not zero.
+ Defaults to False.
+ fatal_exit_code (int, optional): call self.fatal if the return value
+ of the command match this value.
+ ignore_errors (bool, optional): whether or not to change the log
+ level to `ERROR` for the output of stderr. Defaults to False.
+ success_codes (int, optional): numeric value to compare against
+ the command return value.
+ output_filter (func, optional): provide a function to filter output
+ so that noise is reduced and lines are sanitized. default: None
+
+ Returns:
+ None: if the cwd is not a directory.
+ None: on IOError.
+ tuple: stdout and stderr filenames.
+ str: stdout output.
+ """
+ if cwd:
+ if not os.path.isdir(cwd):
+ level = ERROR
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Can't run command %s in non-existent directory %s!"
+ % (command, cwd),
+ level=level,
+ )
+ return None
+ self.info("Getting output from command: %s in %s" % (command, cwd))
+ else:
+ self.info("Getting output from command: %s" % command)
+ if isinstance(command, list):
+ self.info("Copy/paste: %s" % subprocess.list2cmdline(command))
+ # This could potentially return something?
+ tmp_stdout = None
+ tmp_stderr = None
+ tmp_stdout_filename = "%s_stdout" % tmpfile_base_path
+ tmp_stderr_filename = "%s_stderr" % tmpfile_base_path
+ if success_codes is None:
+ success_codes = [0]
+
+ # TODO probably some more elegant solution than 2 similar passes
+ try:
+ tmp_stdout = open(tmp_stdout_filename, "w")
+ except IOError:
+ level = ERROR
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Can't open %s for writing!" % tmp_stdout_filename + self.exception(),
+ level=level,
+ )
+ return None
+ try:
+ tmp_stderr = open(tmp_stderr_filename, "w")
+ except IOError:
+ level = ERROR
+ if halt_on_failure:
+ level = FATAL
+ self.log(
+ "Can't open %s for writing!" % tmp_stderr_filename + self.exception(),
+ level=level,
+ )
+ return None
+ shell = True
+ if isinstance(command, list):
+ shell = False
+
+ p = subprocess.Popen(
+ command,
+ shell=shell,
+ stdout=tmp_stdout,
+ cwd=cwd,
+ stderr=tmp_stderr,
+ env=env,
+ bufsize=0,
+ )
+ # XXX: changed from self.debug to self.log due to this error:
+ # TypeError: debug() takes exactly 1 argument (2 given)
+ self.log(
+ "Temporary files: %s and %s" % (tmp_stdout_filename, tmp_stderr_filename),
+ level=DEBUG,
+ )
+ p.wait()
+ tmp_stdout.close()
+ tmp_stderr.close()
+ return_level = DEBUG
+ output = None
+ if return_type == "output" or not silent:
+ if os.path.exists(tmp_stdout_filename) and os.path.getsize(
+ tmp_stdout_filename
+ ):
+ output = self.read_from_file(tmp_stdout_filename, verbose=False)
+ if output_filter:
+ output = output_filter(output)
+ if not silent:
+ self.log("Output received:", level=log_level)
+ output_lines = output.rstrip().splitlines()
+ for line in output_lines:
+ if not line or line.isspace():
+ continue
+ if isinstance(line, binary_type):
+ line = line.decode("utf-8")
+ self.log(" %s" % line, level=log_level)
+ output = "\n".join(output_lines)
+ if os.path.exists(tmp_stderr_filename) and os.path.getsize(tmp_stderr_filename):
+ errors = self.read_from_file(tmp_stderr_filename, verbose=False)
+ if output_filter:
+ errors = output_filter(errors)
+ if errors:
+ if not ignore_errors:
+ return_level = ERROR
+ self.log("Errors received:", level=return_level)
+ for line in errors.rstrip().splitlines():
+ if not line or line.isspace():
+ continue
+ if isinstance(line, binary_type):
+ line = line.decode("utf-8")
+ self.log(" %s" % line, level=return_level)
+ elif p.returncode not in success_codes and not ignore_errors:
+ return_level = ERROR
+ # Clean up.
+ if not save_tmpfiles:
+ self.rmtree(tmp_stderr_filename, log_level=DEBUG)
+ self.rmtree(tmp_stdout_filename, log_level=DEBUG)
+ if p.returncode and throw_exception:
+ raise subprocess.CalledProcessError(p.returncode, command)
+ # Force level to be INFO as message is not necessary in Treeherder
+ self.log("Return code: %d" % p.returncode, level=INFO)
+ if halt_on_failure and return_level == ERROR:
+ self.return_code = fatal_exit_code
+ self.fatal(
+ "Halting on failure while running %s" % command,
+ exit_code=fatal_exit_code,
+ )
+ # Hm, options on how to return this? I bet often we'll want
+ # output_lines[0] with no newline.
+ if return_type != "output":
+ return (tmp_stdout_filename, tmp_stderr_filename)
+ else:
+ return output
+
+ def _touch_file(self, file_name, times=None, error_level=FATAL):
+ """touch a file.
+
+ Args:
+ file_name (str): name of the file to touch.
+ times (tuple, optional): 2-tuple as specified by `os.utime`_
+ Defaults to None.
+ error_level (str, optional): log level name in case of error.
+ Defaults to `FATAL`.
+
+ .. _`os.utime`:
+ https://docs.python.org/3.4/library/os.html?highlight=os.utime#os.utime
+ """
+ self.info("Touching: %s" % file_name)
+ try:
+ os.utime(file_name, times)
+ except OSError:
+ try:
+ open(file_name, "w").close()
+ except IOError as e:
+ msg = "I/O error(%s): %s" % (e.errno, e.strerror)
+ self.log(msg, error_level=error_level)
+ os.utime(file_name, times)
+
+ def unpack(
+ self,
+ filename,
+ extract_to,
+ extract_dirs=None,
+ error_level=ERROR,
+ fatal_exit_code=2,
+ verbose=False,
+ ):
+ """The method allows to extract a file regardless of its extension.
+
+ Args:
+ filename (str): filename of the compressed file.
+ extract_to (str): where to extract the compressed file.
+ extract_dirs (list, optional): directories inside the archive file to extract.
+ Defaults to `None`.
+ fatal_exit_code (int, optional): call `self.fatal` if the return value
+ of the command is not in `success_codes`. Defaults to 2.
+ verbose (bool, optional): whether or not extracted content should be displayed.
+ Defaults to False.
+
+ Raises:
+ IOError: on `filename` file not found.
+
+ """
+ if not os.path.isfile(filename):
+ raise IOError("Could not find file to extract: %s" % filename)
+
+ if zipfile.is_zipfile(filename):
+ try:
+ self.info(
+ "Using ZipFile to extract {} to {}".format(filename, extract_to)
+ )
+ with zipfile.ZipFile(filename) as bundle:
+ for entry in self._filter_entries(bundle.namelist(), extract_dirs):
+ if verbose:
+ self.info(" %s" % entry)
+ bundle.extract(entry, path=extract_to)
+
+ # ZipFile doesn't preserve permissions during extraction:
+ # http://bugs.python.org/issue15795
+ fname = os.path.realpath(os.path.join(extract_to, entry))
+ mode = bundle.getinfo(entry).external_attr >> 16 & 0x1FF
+ # Only set permissions if attributes are available. Otherwise all
+ # permissions will be removed eg. on Windows.
+ if mode:
+ os.chmod(fname, mode)
+ except zipfile.BadZipfile as e:
+ self.log(
+ "%s (%s)" % (str(e), filename),
+ level=error_level,
+ exit_code=fatal_exit_code,
+ )
+
+ # Bug 1211882 - is_tarfile cannot be trusted for dmg files
+ elif tarfile.is_tarfile(filename) and not filename.lower().endswith(".dmg"):
+ try:
+ self.info(
+ "Using TarFile to extract {} to {}".format(filename, extract_to)
+ )
+ with tarfile.open(filename) as bundle:
+ for entry in self._filter_entries(bundle.getnames(), extract_dirs):
+ _validate_tar_member(bundle.getmember(entry), extract_to)
+ if verbose:
+ self.info(" %s" % entry)
+ bundle.extract(entry, path=extract_to)
+ except tarfile.TarError as e:
+ self.log(
+ "%s (%s)" % (str(e), filename),
+ level=error_level,
+ exit_code=fatal_exit_code,
+ )
+ else:
+ self.log(
+ "No extraction method found for: %s" % filename,
+ level=error_level,
+ exit_code=fatal_exit_code,
+ )
+
+ def is_taskcluster(self):
+ """Returns boolean indicating if we're running in TaskCluster."""
+ # This may need expanding in the future to work on
+ return "TASKCLUSTER_WORKER_TYPE" in os.environ
+
+
+def PreScriptRun(func):
+ """Decorator for methods that will be called before script execution.
+
+ Each method on a BaseScript having this decorator will be called at the
+ beginning of BaseScript.run().
+
+ The return value is ignored. Exceptions will abort execution.
+ """
+ func._pre_run_listener = True
+ return func
+
+
+def PostScriptRun(func):
+ """Decorator for methods that will be called after script execution.
+
+ This is similar to PreScriptRun except it is called at the end of
+ execution. The method will always be fired, even if execution fails.
+ """
+ func._post_run_listener = True
+ return func
+
+
+def PreScriptAction(action=None):
+ """Decorator for methods that will be called at the beginning of each action.
+
+ Each method on a BaseScript having this decorator will be called during
+ BaseScript.run() before an individual action is executed. The method will
+ receive the action's name as an argument.
+
+ If no values are passed to the decorator, it will be applied to every
+ action. If a string is passed, the decorated function will only be called
+ for the action of that name.
+
+ The return value of the method is ignored. Exceptions will abort execution.
+ """
+
+ def _wrapped(func):
+ func._pre_action_listener = action
+ return func
+
+ def _wrapped_none(func):
+ func._pre_action_listener = None
+ return func
+
+ if type(action) == type(_wrapped):
+ return _wrapped_none(action)
+
+ return _wrapped
+
+
+def PostScriptAction(action=None):
+ """Decorator for methods that will be called at the end of each action.
+
+ This behaves similarly to PreScriptAction. It varies in that it is called
+ after execution of the action.
+
+ The decorated method will receive the action name as a positional argument.
+ It will then receive the following named arguments:
+
+ success - Bool indicating whether the action finished successfully.
+
+ The decorated method will always be called, even if the action threw an
+ exception.
+
+ The return value is ignored.
+ """
+
+ def _wrapped(func):
+ func._post_action_listener = action
+ return func
+
+ def _wrapped_none(func):
+ func._post_action_listener = None
+ return func
+
+ if type(action) == type(_wrapped):
+ return _wrapped_none(action)
+
+ return _wrapped
+
+
+# BaseScript {{{1
+class BaseScript(ScriptMixin, LogMixin, object):
+ def __init__(
+ self,
+ config_options=None,
+ ConfigClass=BaseConfig,
+ default_log_level="info",
+ **kwargs
+ ):
+ self._return_code = 0
+ super(BaseScript, self).__init__()
+
+ self.log_obj = None
+ self.abs_dirs = None
+ if config_options is None:
+ config_options = []
+ self.summary_list = []
+ self.failures = []
+ rw_config = ConfigClass(config_options=config_options, **kwargs)
+ self.config = rw_config.get_read_only_config()
+ self.actions = tuple(rw_config.actions)
+ self.all_actions = tuple(rw_config.all_actions)
+ self.env = None
+ self.new_log_obj(default_log_level=default_log_level)
+ self.script_obj = self
+
+ # Indicate we're a source checkout if VCS directory is present at the
+ # appropriate place. This code will break if this file is ever moved
+ # to another directory.
+ self.topsrcdir = None
+
+ srcreldir = "testing/mozharness/mozharness/base"
+ here = os.path.normpath(os.path.dirname(__file__))
+ if here.replace("\\", "/").endswith(srcreldir):
+ topsrcdir = os.path.normpath(os.path.join(here, "..", "..", "..", ".."))
+ hg_dir = os.path.join(topsrcdir, ".hg")
+ git_dir = os.path.join(topsrcdir, ".git")
+ if os.path.isdir(hg_dir) or os.path.isdir(git_dir):
+ self.topsrcdir = topsrcdir
+
+ # Set self.config to read-only.
+ #
+ # We can create intermediate config info programmatically from
+ # this in a repeatable way, with logs; this is how we straddle the
+ # ideal-but-not-user-friendly static config and the
+ # easy-to-write-hard-to-debug writable config.
+ #
+ # To allow for other, script-specific configurations
+ # (e.g., props json parsing), before locking,
+ # call self._pre_config_lock(). If needed, this method can
+ # alter self.config.
+ self._pre_config_lock(rw_config)
+ self._config_lock()
+
+ self.info("Run as %s" % rw_config.command_line)
+ if self.config.get("dump_config_hierarchy"):
+ # we only wish to dump and display what self.config is made up of,
+ # against the current script + args, without actually running any
+ # actions
+ self._dump_config_hierarchy(rw_config.all_cfg_files_and_dicts)
+ if self.config.get("dump_config"):
+ self.dump_config(exit_on_finish=True)
+
+ # Collect decorated methods. We simply iterate over the attributes of
+ # the current class instance and look for signatures deposited by
+ # the decorators.
+ self._listeners = dict(
+ pre_run=[],
+ pre_action=[],
+ post_action=[],
+ post_run=[],
+ )
+ for k in dir(self):
+ try:
+ item = self._getattr(k)
+ except Exception as e:
+ item = None
+ self.warning(
+ "BaseScript collecting decorated methods: "
+ "failure to get attribute {}: {}".format(k, str(e))
+ )
+ if not item:
+ continue
+
+ # We only decorate methods, so ignore other types.
+ if not inspect.ismethod(item):
+ continue
+
+ if hasattr(item, "_pre_run_listener"):
+ self._listeners["pre_run"].append(k)
+
+ if hasattr(item, "_pre_action_listener"):
+ self._listeners["pre_action"].append((k, item._pre_action_listener))
+
+ if hasattr(item, "_post_action_listener"):
+ self._listeners["post_action"].append((k, item._post_action_listener))
+
+ if hasattr(item, "_post_run_listener"):
+ self._listeners["post_run"].append(k)
+
+ def _getattr(self, name):
+ # `getattr(self, k)` will call the method `k` for any property
+ # access. If the property depends upon a module which has not
+ # been imported at the time the BaseScript initializer is
+ # executed, this property access will result in an
+ # Exception. Until Python 3's `inspect.getattr_static` is
+ # available, the simplest approach is to ignore the specific
+ # properties which are known to cause issues. Currently
+ # adb_path and device are ignored since they require the
+ # availablity of the mozdevice package which is not guaranteed
+ # when BaseScript is called.
+ property_list = set(["adb_path", "device"])
+ if six.PY2:
+ if name in property_list:
+ item = None
+ else:
+ item = getattr(self, name)
+ else:
+ item = inspect.getattr_static(self, name)
+ if type(item) == property:
+ item = None
+ else:
+ item = getattr(self, name)
+ return item
+
+ def _dump_config_hierarchy(self, cfg_files):
+ """interpret each config file used.
+
+ This will show which keys/values are being added or overwritten by
+ other config files depending on their hierarchy (when they were added).
+ """
+ # go through each config_file. We will start with the lowest and
+ # print its keys/values that are being used in self.config. If any
+ # keys/values are present in a config file with a higher precedence,
+ # ignore those.
+ dirs = self.query_abs_dirs()
+ cfg_files_dump_config = {} # we will dump this to file
+ # keep track of keys that did not come from a config file
+ keys_not_from_file = set(self.config.keys())
+ if not cfg_files:
+ cfg_files = []
+ self.info("Total config files: %d" % (len(cfg_files)))
+ if len(cfg_files):
+ self.info("cfg files used from lowest precedence to highest:")
+ for i, (target_file, target_dict) in enumerate(cfg_files):
+ unique_keys = set(target_dict.keys())
+ unique_dict = {}
+ # iterate through the target_dicts remaining 'higher' cfg_files
+ remaining_cfgs = cfg_files[slice(i + 1, len(cfg_files))]
+ # where higher == more precedent
+ for ii, (higher_file, higher_dict) in enumerate(remaining_cfgs):
+ # now only keep keys/values that are not overwritten by a
+ # higher config
+ unique_keys = unique_keys.difference(set(higher_dict.keys()))
+ # unique_dict we know now has only keys/values that are unique to
+ # this config file.
+ unique_dict = dict((key, target_dict.get(key)) for key in unique_keys)
+ cfg_files_dump_config[target_file] = unique_dict
+ self.action_message("Config File %d: %s" % (i + 1, target_file))
+ self.info(pprint.pformat(unique_dict))
+ # let's also find out which keys/values from self.config are not
+ # from each target config file dict
+ keys_not_from_file = keys_not_from_file.difference(set(target_dict.keys()))
+ not_from_file_dict = dict(
+ (key, self.config.get(key)) for key in keys_not_from_file
+ )
+ cfg_files_dump_config["not_from_cfg_file"] = not_from_file_dict
+ self.action_message(
+ "Not from any config file (default_config, " "cmd line options, etc)"
+ )
+ self.info(pprint.pformat(not_from_file_dict))
+
+ # finally, let's dump this output as JSON and exit early
+ self.dump_config(
+ os.path.join(dirs["abs_log_dir"], "localconfigfiles.json"),
+ cfg_files_dump_config,
+ console_output=False,
+ exit_on_finish=True,
+ )
+
+ def _pre_config_lock(self, rw_config):
+ """This empty method can allow for config checking and manipulation
+ before the config lock, when overridden in scripts.
+ """
+ pass
+
+ def _config_lock(self):
+ """After this point, the config is locked and should not be
+ manipulated (based on mozharness.base.config.ReadOnlyDict)
+ """
+ self.config.lock()
+
+ def _possibly_run_method(self, method_name, error_if_missing=False):
+ """This is here for run()."""
+ if hasattr(self, method_name) and callable(self._getattr(method_name)):
+ return getattr(self, method_name)()
+ elif error_if_missing:
+ self.error("No such method %s!" % method_name)
+
+ def run_action(self, action):
+ if action not in self.actions:
+ self.action_message("Skipping %s step." % action)
+ return
+
+ method_name = action.replace("-", "_")
+ self.action_message("Running %s step." % action)
+
+ # An exception during a pre action listener should abort execution.
+ for fn, target in self._listeners["pre_action"]:
+ if target is not None and target != action:
+ continue
+
+ try:
+ self.info("Running pre-action listener: %s" % fn)
+ method = getattr(self, fn)
+ method(action)
+ except Exception:
+ self.error(
+ "Exception during pre-action for %s: %s"
+ % (action, traceback.format_exc())
+ )
+
+ for fn, target in self._listeners["post_action"]:
+ if target is not None and target != action:
+ continue
+
+ try:
+ self.info("Running post-action listener: %s" % fn)
+ method = getattr(self, fn)
+ method(action, success=False)
+ except Exception:
+ self.error(
+ "An additional exception occurred during "
+ "post-action for %s: %s" % (action, traceback.format_exc())
+ )
+
+ self.fatal("Aborting due to exception in pre-action listener.")
+
+ # We always run post action listeners, even if the main routine failed.
+ success = False
+ try:
+ self.info("Running main action method: %s" % method_name)
+ self._possibly_run_method("preflight_%s" % method_name)
+ self._possibly_run_method(method_name, error_if_missing=True)
+ self._possibly_run_method("postflight_%s" % method_name)
+ success = True
+ finally:
+ post_success = True
+ for fn, target in self._listeners["post_action"]:
+ if target is not None and target != action:
+ continue
+
+ try:
+ self.info("Running post-action listener: %s" % fn)
+ method = getattr(self, fn)
+ method(action, success=success and self.return_code == 0)
+ except Exception:
+ post_success = False
+ self.error(
+ "Exception during post-action for %s: %s"
+ % (action, traceback.format_exc())
+ )
+
+ step_result = "success" if success else "failed"
+ self.action_message("Finished %s step (%s)" % (action, step_result))
+
+ if not post_success:
+ self.fatal("Aborting due to failure in post-action listener.")
+
+ def run(self):
+ """Default run method.
+ This is the "do everything" method, based on actions and all_actions.
+
+ First run self.dump_config() if it exists.
+ Second, go through the list of all_actions.
+ If they're in the list of self.actions, try to run
+ self.preflight_ACTION(), self.ACTION(), and self.postflight_ACTION().
+
+ Preflight is sanity checking before doing anything time consuming or
+ destructive.
+
+ Postflight is quick testing for success after an action.
+
+ """
+ for fn in self._listeners["pre_run"]:
+ try:
+ self.info("Running pre-run listener: %s" % fn)
+ method = getattr(self, fn)
+ method()
+ except Exception:
+ self.error(
+ "Exception during pre-run listener: %s" % traceback.format_exc()
+ )
+
+ for fn in self._listeners["post_run"]:
+ try:
+ method = getattr(self, fn)
+ method()
+ except Exception:
+ self.error(
+ "An additional exception occurred during a "
+ "post-run listener: %s" % traceback.format_exc()
+ )
+
+ self.fatal("Aborting due to failure in pre-run listener.")
+
+ self.dump_config()
+ try:
+ for action in self.all_actions:
+ self.run_action(action)
+ except Exception:
+ self.fatal("Uncaught exception: %s" % traceback.format_exc())
+ finally:
+ post_success = True
+ for fn in self._listeners["post_run"]:
+ try:
+ self.info("Running post-run listener: %s" % fn)
+ method = getattr(self, fn)
+ method()
+ except Exception:
+ post_success = False
+ self.error(
+ "Exception during post-run listener: %s"
+ % traceback.format_exc()
+ )
+
+ if not post_success:
+ self.fatal("Aborting due to failure in post-run listener.")
+
+ return self.return_code
+
+ def run_and_exit(self):
+ """Runs the script and exits the current interpreter."""
+ rc = self.run()
+ if rc != 0:
+ self.warning("returning nonzero exit status %d" % rc)
+ sys.exit(rc)
+
+ def clobber(self):
+ """
+ Delete the working directory
+ """
+ dirs = self.query_abs_dirs()
+ self.rmtree(dirs["abs_work_dir"], error_level=FATAL)
+
+ def query_abs_dirs(self):
+ """We want to be able to determine where all the important things
+ are. Absolute paths lend themselves well to this, though I wouldn't
+ be surprised if this causes some issues somewhere.
+
+ This should be overridden in any script that has additional dirs
+ to query.
+
+ The query_* methods tend to set self.VAR variables as their
+ runtime cache.
+ """
+ if self.abs_dirs:
+ return self.abs_dirs
+ c = self.config
+ dirs = {}
+ dirs["base_work_dir"] = c["base_work_dir"]
+ dirs["abs_work_dir"] = os.path.join(c["base_work_dir"], c["work_dir"])
+ dirs["abs_log_dir"] = os.path.join(c["base_work_dir"], c.get("log_dir", "logs"))
+ if "GECKO_PATH" in os.environ:
+ dirs["abs_src_dir"] = os.environ["GECKO_PATH"]
+ self.abs_dirs = dirs
+ return self.abs_dirs
+
+ def dump_config(
+ self, file_path=None, config=None, console_output=True, exit_on_finish=False
+ ):
+ """Dump self.config to localconfig.json"""
+ config = config or self.config
+ dirs = self.query_abs_dirs()
+ if not file_path:
+ file_path = os.path.join(dirs["abs_log_dir"], "localconfig.json")
+ self.info("Dumping config to %s." % file_path)
+ self.mkdir_p(os.path.dirname(file_path))
+ json_config = json.dumps(config, sort_keys=True, indent=4)
+ fh = codecs.open(file_path, encoding="utf-8", mode="w+")
+ fh.write(json_config)
+ fh.close()
+ if console_output:
+ self.info(pprint.pformat(config))
+ if exit_on_finish:
+ sys.exit()
+
+ # logging {{{2
+ def new_log_obj(self, default_log_level="info"):
+ c = self.config
+ log_dir = os.path.join(c["base_work_dir"], c.get("log_dir", "logs"))
+ log_config = {
+ "logger_name": "Simple",
+ "log_name": "log",
+ "log_dir": log_dir,
+ "log_level": default_log_level,
+ "log_format": "%(asctime)s %(levelname)8s - %(message)s",
+ "log_to_console": True,
+ "append_to_log": False,
+ }
+ log_type = self.config.get("log_type", "console")
+ for key in log_config.keys():
+ value = self.config.get(key, None)
+ if value is not None:
+ log_config[key] = value
+ if log_type == "multi":
+ self.log_obj = MultiFileLogger(**log_config)
+ elif log_type == "simple":
+ self.log_obj = SimpleFileLogger(**log_config)
+ else:
+ self.log_obj = ConsoleLogger(**log_config)
+
+ def action_message(self, message):
+ self.info(
+ "[mozharness: %sZ] %s"
+ % (datetime.datetime.utcnow().isoformat(" "), message)
+ )
+
+ def summary(self):
+ """Print out all the summary lines added via add_summary()
+ throughout the script.
+
+ I'd like to revisit how to do this in a prettier fashion.
+ """
+ self.action_message("%s summary:" % self.__class__.__name__)
+ if self.summary_list:
+ for item in self.summary_list:
+ try:
+ self.log(item["message"], level=item["level"])
+ except ValueError:
+ """log is closed; print as a default. Ran into this
+ when calling from __del__()"""
+ print("### Log is closed! (%s)" % item["message"])
+
+ def add_summary(self, message, level=INFO):
+ self.summary_list.append({"message": message, "level": level})
+ # TODO write to a summary-only log?
+ # Summaries need a lot more love.
+ self.log(message, level=level)
+
+ def summarize_success_count(
+ self, success_count, total_count, message="%d of %d successful.", level=None
+ ):
+ if level is None:
+ level = INFO
+ if success_count < total_count:
+ level = ERROR
+ self.add_summary(message % (success_count, total_count), level=level)
+
+ def get_hash_for_file(self, file_path, hash_type="sha512"):
+ bs = 65536
+ hasher = hashlib.new(hash_type)
+ with open(file_path, "rb") as fh:
+ buf = fh.read(bs)
+ while len(buf) > 0:
+ hasher.update(buf)
+ buf = fh.read(bs)
+ return hasher.hexdigest()
+
+ @property
+ def return_code(self):
+ return self._return_code
+
+ @return_code.setter
+ def return_code(self, code):
+ old_return_code, self._return_code = self._return_code, code
+ if old_return_code != code:
+ self.warning("setting return code to %d" % code)