diff options
Diffstat (limited to 'testing/mozharness/external_tools/extract_and_run_command.py')
-rw-r--r-- | testing/mozharness/external_tools/extract_and_run_command.py | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/testing/mozharness/external_tools/extract_and_run_command.py b/testing/mozharness/external_tools/extract_and_run_command.py new file mode 100644 index 0000000000..1f8f29dcf6 --- /dev/null +++ b/testing/mozharness/external_tools/extract_and_run_command.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Usage: extract_and_run_command.py [-j N] [command to run] -- [files and/or directories] + -j is the number of workers to start, defaulting to 1. + [command to run] must be a command that can accept one or many files + to process as arguments. + +WARNING: This script does NOT respond to SIGINT. You must use SIGQUIT or SIGKILL to + terminate it early. +""" + +### The canonical location for this file is +### https://hg.mozilla.org/build/tools/file/default/stage/extract_and_run_command.py +### +### Please update the copy in puppet to deploy new changes to +### stage.mozilla.org, see +# https://wiki.mozilla.org/ReleaseEngineering/How_To/Modify_scripts_on_stage + +from __future__ import absolute_import +import logging +import os +import shutil +import subprocess +import sys +import tempfile +import time +from os import path +from threading import Thread + +try: + from Queue import Queue +except ImportError: + from queue import Queue + +logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s") +log = logging.getLogger(__name__) + +try: + # the future - https://github.com/mozilla/build-mar via a venv + from mardor.marfile import BZ2MarFile +except: + # the past - http://hg.mozilla.org/build/tools/file/default/buildfarm/utils/mar.py + sys.path.append( + path.join(path.dirname(path.realpath(__file__)), "../buildfarm/utils") + ) + from mar import BZ2MarFile + +SEVENZIP = "7za" + + +def extractMar(filename, tempdir): + m = BZ2MarFile(filename) + m.extractall(path=tempdir) + + +def extractExe(filename, tempdir): + try: + # We don't actually care about output, put we redirect to a tempfile + # to avoid deadlocking in wait() when stdout=PIPE + fd = tempfile.TemporaryFile() + proc = subprocess.Popen( + [SEVENZIP, "x", "-o%s" % tempdir, filename], + stdout=fd, + stderr=subprocess.STDOUT, + ) + proc.wait() + except subprocess.CalledProcessError: + # Not all EXEs are 7-zip files, so we have to ignore extraction errors + pass + + +# The keys here are matched against the last 3 characters of filenames. +# The values are callables that accept two string arguments. +EXTRACTORS = { + ".mar": extractMar, + ".exe": extractExe, +} + + +def find_files(d): + """yields all of the files in `d'""" + for root, _, files in os.walk(d): + for f in files: + yield path.abspath(path.join(root, f)) + + +def rchmod(d, mode=0o755): + """chmods everything in `d' to `mode', including `d' itself""" + os.chmod(d, mode) + for root, dirs, files in os.walk(d): + for item in dirs: + os.chmod(path.join(root, item), mode) + for item in files: + os.chmod(path.join(root, item), mode) + + +def maybe_extract(filename): + """If an extractor is found for `filename', extracts it to a temporary + directory and chmods it. The consumer is responsible for removing + the extracted files, if desired.""" + ext = path.splitext(filename)[1] + if ext not in EXTRACTORS: + return None + # Append the full filepath to the tempdir + tempdir_root = tempfile.mkdtemp() + tempdir = path.join(tempdir_root, filename.lstrip("/")) + os.makedirs(tempdir) + EXTRACTORS[ext](filename, tempdir) + rchmod(tempdir_root) + return tempdir_root + + +def process(item, command): + def format_time(t): + return time.strftime("%H:%M:%S", time.localtime(t)) + + # Buffer output to avoid interleaving of multiple workers' + logs = [] + args = [item] + proc = None + start = time.time() + logs.append("START %s: %s" % (format_time(start), item)) + # If the file was extracted, we need to process all of its files, too. + tempdir = maybe_extract(item) + if tempdir: + for f in find_files(tempdir): + args.append(f) + + try: + fd = tempfile.TemporaryFile() + proc = subprocess.Popen(command + args, stdout=fd) + proc.wait() + if proc.returncode != 0: + raise Exception("returned %s" % proc.returncode) + finally: + if tempdir: + shutil.rmtree(tempdir) + fd.seek(0) + # rstrip() here to avoid an unnecessary newline, if it exists. + logs.append(fd.read().rstrip()) + end = time.time() + elapsed = end - start + logs.append( + "END %s (%d seconds elapsed): %s\n" % (format_time(end), elapsed, item) + ) + # Now that we've got all of our output, print it. It's important that + # the logging module is used for this, because "print" is not + # thread-safe. + log.info("\n".join(logs)) + + +def worker(command, errors): + item = q.get() + while item != None: + try: + process(item, command) + except: + errors.put(item) + item = q.get() + + +if __name__ == "__main__": + # getopt is used in favour of optparse to enable "--" as a separator + # between the command and list of files. optparse doesn't allow that. + from getopt import getopt + + options, args = getopt(sys.argv[1:], "j:h", ["help"]) + + concurrency = 1 + for o, a in options: + if o == "-j": + concurrency = int(a) + elif o in ("-h", "--help"): + log.info(__doc__) + sys.exit(0) + + if len(args) < 3 or "--" not in args: + log.error(__doc__) + sys.exit(1) + + command = [] + while args[0] != "--": + command.append(args.pop(0)) + args.pop(0) + + q = Queue() + errors = Queue() + threads = [] + for i in range(concurrency): + t = Thread(target=worker, args=(command, errors)) + t.start() + threads.append(t) + + # find_files is a generator, so work will begin prior to it finding + # all of the files + for arg in args: + if path.isfile(arg): + q.put(arg) + else: + for f in find_files(arg): + q.put(f) + # Because the workers are started before we start populating the q + # they can't use .empty() to determine whether or not their done. + # We also can't use q.join() or j.task_done(), because we need to + # support Python 2.4. We know that find_files won't yield None, + # so we can detect doneness by having workers die when they get None + # as an item. + for i in range(concurrency): + q.put(None) + + for t in threads: + t.join() + + if not errors.empty(): + log.error("Command failed for the following files:") + while not errors.empty(): + log.error(" %s" % errors.get()) + sys.exit(1) |