summaryrefslogtreecommitdiffstats
path: root/testing/mozharness/external_tools/extract_and_run_command.py
diff options
context:
space:
mode:
Diffstat (limited to 'testing/mozharness/external_tools/extract_and_run_command.py')
-rw-r--r--testing/mozharness/external_tools/extract_and_run_command.py222
1 files changed, 222 insertions, 0 deletions
diff --git a/testing/mozharness/external_tools/extract_and_run_command.py b/testing/mozharness/external_tools/extract_and_run_command.py
new file mode 100644
index 0000000000..1f8f29dcf6
--- /dev/null
+++ b/testing/mozharness/external_tools/extract_and_run_command.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Usage: extract_and_run_command.py [-j N] [command to run] -- [files and/or directories]
+ -j is the number of workers to start, defaulting to 1.
+ [command to run] must be a command that can accept one or many files
+ to process as arguments.
+
+WARNING: This script does NOT respond to SIGINT. You must use SIGQUIT or SIGKILL to
+ terminate it early.
+"""
+
+### The canonical location for this file is
+### https://hg.mozilla.org/build/tools/file/default/stage/extract_and_run_command.py
+###
+### Please update the copy in puppet to deploy new changes to
+### stage.mozilla.org, see
+# https://wiki.mozilla.org/ReleaseEngineering/How_To/Modify_scripts_on_stage
+
+from __future__ import absolute_import
+import logging
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+from os import path
+from threading import Thread
+
+try:
+ from Queue import Queue
+except ImportError:
+ from queue import Queue
+
+logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
+log = logging.getLogger(__name__)
+
+try:
+ # the future - https://github.com/mozilla/build-mar via a venv
+ from mardor.marfile import BZ2MarFile
+except:
+ # the past - http://hg.mozilla.org/build/tools/file/default/buildfarm/utils/mar.py
+ sys.path.append(
+ path.join(path.dirname(path.realpath(__file__)), "../buildfarm/utils")
+ )
+ from mar import BZ2MarFile
+
+SEVENZIP = "7za"
+
+
+def extractMar(filename, tempdir):
+ m = BZ2MarFile(filename)
+ m.extractall(path=tempdir)
+
+
+def extractExe(filename, tempdir):
+ try:
+ # We don't actually care about output, put we redirect to a tempfile
+ # to avoid deadlocking in wait() when stdout=PIPE
+ fd = tempfile.TemporaryFile()
+ proc = subprocess.Popen(
+ [SEVENZIP, "x", "-o%s" % tempdir, filename],
+ stdout=fd,
+ stderr=subprocess.STDOUT,
+ )
+ proc.wait()
+ except subprocess.CalledProcessError:
+ # Not all EXEs are 7-zip files, so we have to ignore extraction errors
+ pass
+
+
+# The keys here are matched against the last 3 characters of filenames.
+# The values are callables that accept two string arguments.
+EXTRACTORS = {
+ ".mar": extractMar,
+ ".exe": extractExe,
+}
+
+
+def find_files(d):
+ """yields all of the files in `d'"""
+ for root, _, files in os.walk(d):
+ for f in files:
+ yield path.abspath(path.join(root, f))
+
+
+def rchmod(d, mode=0o755):
+ """chmods everything in `d' to `mode', including `d' itself"""
+ os.chmod(d, mode)
+ for root, dirs, files in os.walk(d):
+ for item in dirs:
+ os.chmod(path.join(root, item), mode)
+ for item in files:
+ os.chmod(path.join(root, item), mode)
+
+
+def maybe_extract(filename):
+ """If an extractor is found for `filename', extracts it to a temporary
+ directory and chmods it. The consumer is responsible for removing
+ the extracted files, if desired."""
+ ext = path.splitext(filename)[1]
+ if ext not in EXTRACTORS:
+ return None
+ # Append the full filepath to the tempdir
+ tempdir_root = tempfile.mkdtemp()
+ tempdir = path.join(tempdir_root, filename.lstrip("/"))
+ os.makedirs(tempdir)
+ EXTRACTORS[ext](filename, tempdir)
+ rchmod(tempdir_root)
+ return tempdir_root
+
+
+def process(item, command):
+ def format_time(t):
+ return time.strftime("%H:%M:%S", time.localtime(t))
+
+ # Buffer output to avoid interleaving of multiple workers'
+ logs = []
+ args = [item]
+ proc = None
+ start = time.time()
+ logs.append("START %s: %s" % (format_time(start), item))
+ # If the file was extracted, we need to process all of its files, too.
+ tempdir = maybe_extract(item)
+ if tempdir:
+ for f in find_files(tempdir):
+ args.append(f)
+
+ try:
+ fd = tempfile.TemporaryFile()
+ proc = subprocess.Popen(command + args, stdout=fd)
+ proc.wait()
+ if proc.returncode != 0:
+ raise Exception("returned %s" % proc.returncode)
+ finally:
+ if tempdir:
+ shutil.rmtree(tempdir)
+ fd.seek(0)
+ # rstrip() here to avoid an unnecessary newline, if it exists.
+ logs.append(fd.read().rstrip())
+ end = time.time()
+ elapsed = end - start
+ logs.append(
+ "END %s (%d seconds elapsed): %s\n" % (format_time(end), elapsed, item)
+ )
+ # Now that we've got all of our output, print it. It's important that
+ # the logging module is used for this, because "print" is not
+ # thread-safe.
+ log.info("\n".join(logs))
+
+
+def worker(command, errors):
+ item = q.get()
+ while item != None:
+ try:
+ process(item, command)
+ except:
+ errors.put(item)
+ item = q.get()
+
+
+if __name__ == "__main__":
+ # getopt is used in favour of optparse to enable "--" as a separator
+ # between the command and list of files. optparse doesn't allow that.
+ from getopt import getopt
+
+ options, args = getopt(sys.argv[1:], "j:h", ["help"])
+
+ concurrency = 1
+ for o, a in options:
+ if o == "-j":
+ concurrency = int(a)
+ elif o in ("-h", "--help"):
+ log.info(__doc__)
+ sys.exit(0)
+
+ if len(args) < 3 or "--" not in args:
+ log.error(__doc__)
+ sys.exit(1)
+
+ command = []
+ while args[0] != "--":
+ command.append(args.pop(0))
+ args.pop(0)
+
+ q = Queue()
+ errors = Queue()
+ threads = []
+ for i in range(concurrency):
+ t = Thread(target=worker, args=(command, errors))
+ t.start()
+ threads.append(t)
+
+ # find_files is a generator, so work will begin prior to it finding
+ # all of the files
+ for arg in args:
+ if path.isfile(arg):
+ q.put(arg)
+ else:
+ for f in find_files(arg):
+ q.put(f)
+ # Because the workers are started before we start populating the q
+ # they can't use .empty() to determine whether or not their done.
+ # We also can't use q.join() or j.task_done(), because we need to
+ # support Python 2.4. We know that find_files won't yield None,
+ # so we can detect doneness by having workers die when they get None
+ # as an item.
+ for i in range(concurrency):
+ q.put(None)
+
+ for t in threads:
+ t.join()
+
+ if not errors.empty():
+ log.error("Command failed for the following files:")
+ while not errors.empty():
+ log.error(" %s" % errors.get())
+ sys.exit(1)