From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Fri, 19 Apr 2024 02:47:55 +0200
Subject: Adding upstream version 124.0.1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 taskcluster/scripts/misc/fetch-content | 900 +++++++++++++++++++++++++++++++++
 1 file changed, 900 insertions(+)
 create mode 100755 taskcluster/scripts/misc/fetch-content

(limited to 'taskcluster/scripts/misc/fetch-content')

diff --git a/taskcluster/scripts/misc/fetch-content b/taskcluster/scripts/misc/fetch-content
new file mode 100755
index 0000000000..9e6a1f4ef0
--- /dev/null
+++ b/taskcluster/scripts/misc/fetch-content
@@ -0,0 +1,900 @@
+#!/usr/bin/python3 -u
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import argparse
+import bz2
+import concurrent.futures
+import contextlib
+import datetime
+import gzip
+import hashlib
+import io
+import json
+import lzma
+import multiprocessing
+import os
+import pathlib
+import random
+import re
+import stat
+import subprocess
+import sys
+import tarfile
+import tempfile
+import time
+import urllib.parse
+import urllib.request
+import zipfile
+
+try:
+    import zstandard
+except ImportError:
+    zstandard = None
+
+try:
+    import certifi
+except ImportError:
+    certifi = None
+
+
+CONCURRENCY = multiprocessing.cpu_count()
+
+
+def log(msg):
+    print(msg, file=sys.stderr)
+    sys.stderr.flush()
+
+
+class IntegrityError(Exception):
+    """Represents an integrity error when downloading a URL."""
+
+
+def ZstdCompressor(*args, **kwargs):
+    if not zstandard:
+        raise ValueError("zstandard Python package not available")
+    return zstandard.ZstdCompressor(*args, **kwargs)
+
+
+def ZstdDecompressor(*args, **kwargs):
+    if not zstandard:
+        raise ValueError("zstandard Python package not available")
+    return zstandard.ZstdDecompressor(*args, **kwargs)
+
+
+@contextlib.contextmanager
+def rename_after_close(fname, *args, **kwargs):
+    """
+    Context manager that opens a temporary file to use as a writer,
+    and closes the file on context exit, renaming it to the expected
+    file name in case of success, or removing it in case of failure.
+
+    Takes the same options as open(), but must be used as a context
+    manager.
+    """
+    path = pathlib.Path(fname)
+    tmp = path.with_name("%s.tmp" % path.name)
+    try:
+        with tmp.open(*args, **kwargs) as fh:
+            yield fh
+    except Exception:
+        tmp.unlink()
+        raise
+    else:
+        tmp.rename(fname)
+
+
+# The following is copied from
+# https://github.com/mozilla-releng/redo/blob/6d07678a014e0c525e54a860381a165d34db10ff/redo/__init__.py#L15-L85
+def retrier(attempts=5, sleeptime=10, max_sleeptime=300, sleepscale=1.5, jitter=1):
+    """
+    A generator function that sleeps between retries, handles exponential
+    backoff and jitter. The action you are retrying is meant to run after
+    retrier yields.
+
+    At each iteration, we sleep for sleeptime + random.randint(-jitter, jitter).
+    Afterwards sleeptime is multiplied by sleepscale for the next iteration.
+
+    Args:
+        attempts (int): maximum number of times to try; defaults to 5
+        sleeptime (float): how many seconds to sleep between tries; defaults to
+                           60s (one minute)
+        max_sleeptime (float): the longest we'll sleep, in seconds; defaults to
+                               300s (five minutes)
+        sleepscale (float): how much to multiply the sleep time by each
+                            iteration; defaults to 1.5
+        jitter (int): random jitter to introduce to sleep time each iteration.
+                      the amount is chosen at random between [-jitter, +jitter]
+                      defaults to 1
+
+    Yields:
+        None, a maximum of `attempts` number of times
+
+    Example:
+        >>> n = 0
+        >>> for _ in retrier(sleeptime=0, jitter=0):
+        ...     if n == 3:
+        ...         # We did the thing!
+        ...         break
+        ...     n += 1
+        >>> n
+        3
+
+        >>> n = 0
+        >>> for _ in retrier(sleeptime=0, jitter=0):
+        ...     if n == 6:
+        ...         # We did the thing!
+        ...         break
+        ...     n += 1
+        ... else:
+        ...     print("max tries hit")
+        max tries hit
+    """
+    jitter = jitter or 0  # py35 barfs on the next line if jitter is None
+    if jitter > sleeptime:
+        # To prevent negative sleep times
+        raise Exception(
+            "jitter ({}) must be less than sleep time ({})".format(jitter, sleeptime)
+        )
+
+    sleeptime_real = sleeptime
+    for _ in range(attempts):
+        log("attempt %i/%i" % (_ + 1, attempts))
+
+        yield sleeptime_real
+
+        if jitter:
+            sleeptime_real = sleeptime + random.randint(-jitter, jitter)
+            # our jitter should scale along with the sleeptime
+            jitter = int(jitter * sleepscale)
+        else:
+            sleeptime_real = sleeptime
+
+        sleeptime *= sleepscale
+
+        if sleeptime_real > max_sleeptime:
+            sleeptime_real = max_sleeptime
+
+        # Don't need to sleep the last time
+        if _ < attempts - 1:
+            log(
+                "sleeping for %.2fs (attempt %i/%i)" % (sleeptime_real, _ + 1, attempts)
+            )
+            time.sleep(sleeptime_real)
+
+
+def stream_download(url, sha256=None, size=None):
+    """Download a URL to a generator, optionally with content verification.
+
+    If ``sha256`` or ``size`` are defined, the downloaded URL will be
+    validated against those requirements and ``IntegrityError`` will be
+    raised if expectations do not match.
+
+    Because verification cannot occur until the file is completely downloaded
+    it is recommended for consumers to not do anything meaningful with the
+    data if content verification is being used. To securely handle retrieved
+    content, it should be streamed to a file or memory and only operated
+    on after the generator is exhausted without raising.
+    """
+    log("Downloading %s" % url)
+
+    h = hashlib.sha256()
+    length = 0
+
+    t0 = time.time()
+    with urllib.request.urlopen(
+        url, timeout=60, cafile=certifi.where()
+    ) if certifi else urllib.request.urlopen(url, timeout=60) as fh:
+        if not url.endswith(".gz") and fh.info().get("Content-Encoding") == "gzip":
+            fh = gzip.GzipFile(fileobj=fh)
+
+        while True:
+            chunk = fh.read(65536)
+            if not chunk:
+                break
+
+            h.update(chunk)
+            length += len(chunk)
+
+            yield chunk
+
+    duration = time.time() - t0
+    digest = h.hexdigest()
+
+    log(
+        "%s resolved to %d bytes with sha256 %s in %.3fs"
+        % (url, length, digest, duration)
+    )
+
+    if size:
+        if size == length:
+            log("Verified size of %s" % url)
+        else:
+            raise IntegrityError(
+                "size mismatch on %s: wanted %d; got %d" % (url, size, length)
+            )
+
+    if sha256:
+        if digest == sha256:
+            log("Verified sha256 integrity of %s" % url)
+        else:
+            raise IntegrityError(
+                "sha256 mismatch on %s: wanted %s; got %s" % (url, sha256, digest)
+            )
+
+
+def download_to_path(url, path, sha256=None, size=None):
+    """Download a URL to a filesystem path, possibly with verification."""
+
+    # We download to a temporary file and rename at the end so there's
+    # no chance of the final file being partially written or containing
+    # bad data.
+    try:
+        path.unlink()
+    except FileNotFoundError:
+        pass
+
+    for _ in retrier(attempts=5, sleeptime=60):
+        try:
+            log("Downloading %s to %s" % (url, path))
+
+            with rename_after_close(path, "wb") as fh:
+                for chunk in stream_download(url, sha256=sha256, size=size):
+                    fh.write(chunk)
+
+            return
+        except IntegrityError:
+            raise
+        except Exception as e:
+            log("Download failed: {}".format(e))
+            continue
+
+    raise Exception("Download failed, no more retries!")
+
+
+def download_to_memory(url, sha256=None, size=None):
+    """Download a URL to memory, possibly with verification."""
+
+    data = b""
+    for _ in retrier(attempts=5, sleeptime=60):
+        try:
+            log("Downloading %s" % (url))
+
+            for chunk in stream_download(url, sha256=sha256, size=size):
+                data += chunk
+
+            return data
+        except IntegrityError:
+            raise
+        except Exception as e:
+            log("Download failed: {}".format(e))
+            continue
+
+    raise Exception("Download failed, no more retries!")
+
+
+def gpg_verify_path(path: pathlib.Path, public_key_data: bytes, signature_data: bytes):
+    """Verify that a filesystem path verifies using GPG.
+
+    Takes a Path defining a file to verify. ``public_key_data`` contains
+    bytes with GPG public key data. ``signature_data`` contains a signed
+    GPG document to use with ``gpg --verify``.
+    """
+    log("Validating GPG signature of %s" % path)
+    log("GPG key data:\n%s" % public_key_data.decode("ascii"))
+
+    with tempfile.TemporaryDirectory() as td:
+        try:
+            # --batch since we're running unattended.
+            gpg_args = ["gpg", "--homedir", td, "--batch"]
+
+            log("Importing GPG key...")
+            subprocess.run(gpg_args + ["--import"], input=public_key_data, check=True)
+
+            log("Verifying GPG signature...")
+            subprocess.run(
+                gpg_args + ["--verify", "-", "%s" % path],
+                input=signature_data,
+                check=True,
+            )
+
+            log("GPG signature verified!")
+        finally:
+            # There is a race between the agent self-terminating and
+            # shutil.rmtree() from the temporary directory cleanup that can
+            # lead to exceptions. Kill the agent before cleanup to prevent this.
+            env = dict(os.environ)
+            env["GNUPGHOME"] = td
+            subprocess.run(["gpgconf", "--kill", "gpg-agent"], env=env)
+
+
+class ArchiveTypeNotSupported(Exception):
+    def __init__(self, path: pathlib.Path):
+        super(Exception, self).__init__("Archive type not supported for %s" % path)
+
+
+def open_stream(path: pathlib.Path):
+    """Attempt to identify a path as an extractable archive by looking at its
+    content."""
+    fh = path.open(mode="rb")
+    magic = fh.read(6)
+    fh.seek(0)
+    if magic[:2] == b"PK":
+        return "zip", fh
+    if magic[:2] == b"\x1f\x8b":
+        fh = gzip.GzipFile(fileobj=fh)
+    elif magic[:3] == b"BZh":
+        fh = bz2.BZ2File(fh)
+    elif magic == b"\xfd7zXZ\x00":
+        fh = lzma.LZMAFile(fh)
+    elif magic[:4] == b"\x28\xb5\x2f\xfd":
+        fh = ZstdDecompressor().stream_reader(fh)
+    fh = io.BufferedReader(fh)
+    try:
+        # A full tar info header is 512 bytes.
+        headers = fh.peek(512)
+        # 257 is the offset of the ustar magic.
+        magic = headers[257 : 257 + 8]
+        # For older unix tar, rely on TarInfo.frombuf's checksum check
+        if magic in (b"ustar\x0000", b"ustar  \x00") or tarfile.TarInfo.frombuf(
+            headers[:512], tarfile.ENCODING, "surrogateescape"
+        ):
+            return "tar", fh
+    except Exception as e:
+        pass
+    raise ArchiveTypeNotSupported(path)
+
+
+def archive_type(path: pathlib.Path):
+    """Attempt to identify a path as an extractable archive."""
+    if path.suffixes[-2:-1] == [".tar"] or path.suffixes[-1:] == [".tgz"]:
+        return "tar"
+    elif path.suffix == ".zip":
+        return "zip"
+    else:
+        return None
+
+
+def extract_archive(path, dest_dir):
+    """Extract an archive to a destination directory."""
+
+    # Resolve paths to absolute variants.
+    path = path.resolve()
+    dest_dir = dest_dir.resolve()
+
+    log("Extracting %s to %s" % (path, dest_dir))
+    t0 = time.time()
+
+    # We pipe input to the decompressor program so that we can apply
+    # custom decompressors that the program may not know about.
+    typ, ifh = open_stream(path)
+    if typ == "tar":
+        # On Windows, the tar program doesn't support things like symbolic
+        # links, while Windows actually support them. The tarfile module in
+        # python does. So use that. But since it's significantly slower than
+        # the tar program on Linux, only use tarfile on Windows (tarfile is
+        # also not much slower on Windows, presumably because of the
+        # notoriously bad I/O).
+        if sys.platform == "win32":
+            tar = tarfile.open(fileobj=ifh, mode="r|")
+            tar.extractall(str(dest_dir))
+            args = []
+        else:
+            args = ["tar", "xf", "-"]
+            pipe_stdin = True
+    elif typ == "zip":
+        # unzip from stdin has wonky behavior. We don't use a pipe for it.
+        ifh = open(os.devnull, "rb")
+        args = ["unzip", "-o", str(path)]
+        pipe_stdin = False
+    else:
+        raise ValueError("unknown archive format: %s" % path)
+
+    if args:
+        with ifh, subprocess.Popen(
+            args, cwd=str(dest_dir), bufsize=0, stdin=subprocess.PIPE
+        ) as p:
+            while True:
+                if not pipe_stdin:
+                    break
+
+                chunk = ifh.read(131072)
+                if not chunk:
+                    break
+
+                p.stdin.write(chunk)
+
+        if p.returncode:
+            raise Exception("%r exited %d" % (args, p.returncode))
+
+    log("%s extracted in %.3fs" % (path, time.time() - t0))
+
+
+def repack_archive(
+    orig: pathlib.Path, dest: pathlib.Path, strip_components=0, prefix=""
+):
+    assert orig != dest
+    log("Repacking as %s" % dest)
+    orig_typ, ifh = open_stream(orig)
+    typ = archive_type(dest)
+    if not typ:
+        raise Exception("Archive type not supported for %s" % dest.name)
+
+    if dest.suffixes[-2:] != [".tar", ".zst"]:
+        raise Exception("Only producing .tar.zst archives is supported.")
+
+    if strip_components or prefix:
+
+        def filter(name):
+            if strip_components:
+                stripped = "/".join(name.split("/")[strip_components:])
+                if not stripped:
+                    raise Exception(
+                        "Stripping %d components would remove files" % strip_components
+                    )
+                name = stripped
+            return prefix + name
+
+    else:
+        filter = None
+
+    with rename_after_close(dest, "wb") as fh:
+        ctx = ZstdCompressor()
+        if orig_typ == "zip":
+            assert typ == "tar"
+            zip = zipfile.ZipFile(ifh)
+            # Convert the zip stream to a tar on the fly.
+            with ctx.stream_writer(fh) as compressor, tarfile.open(
+                fileobj=compressor, mode="w:"
+            ) as tar:
+                for zipinfo in zip.infolist():
+                    if zipinfo.is_dir():
+                        continue
+                    tarinfo = tarfile.TarInfo()
+                    filename = zipinfo.filename
+                    tarinfo.name = filter(filename) if filter else filename
+                    tarinfo.size = zipinfo.file_size
+                    # Zip files don't have any knowledge of the timezone
+                    # they were created in. Which is not really convenient to
+                    # reliably convert to a timestamp. But we don't really
+                    # care about accuracy, but rather about reproducibility,
+                    # so we pick UTC.
+                    time = datetime.datetime(
+                        *zipinfo.date_time, tzinfo=datetime.timezone.utc
+                    )
+                    tarinfo.mtime = time.timestamp()
+                    # 0 is MS-DOS, 3 is UNIX. Only in the latter case do we
+                    # get anything useful for the tar file mode.
+                    if zipinfo.create_system == 3:
+                        mode = zipinfo.external_attr >> 16
+                    else:
+                        mode = 0o0644
+                    tarinfo.mode = stat.S_IMODE(mode)
+                    if stat.S_ISLNK(mode):
+                        tarinfo.type = tarfile.SYMTYPE
+                        tarinfo.linkname = zip.read(filename).decode()
+                        tar.addfile(tarinfo, zip.open(filename))
+                    elif stat.S_ISREG(mode) or stat.S_IFMT(mode) == 0:
+                        tar.addfile(tarinfo, zip.open(filename))
+                    else:
+                        raise Exception("Unsupported file mode %o" % stat.S_IFMT(mode))
+
+        elif orig_typ == "tar":
+            if typ == "zip":
+                raise Exception("Repacking a tar to zip is not supported")
+            assert typ == "tar"
+
+            if filter:
+                # To apply the filter, we need to open the tar stream and
+                # tweak it.
+                origtar = tarfile.open(fileobj=ifh, mode="r|")
+                with ctx.stream_writer(fh) as compressor, tarfile.open(
+                    fileobj=compressor,
+                    mode="w:",
+                    format=origtar.format,
+                ) as tar:
+                    for tarinfo in origtar:
+                        if tarinfo.isdir():
+                            continue
+                        tarinfo.name = filter(tarinfo.name)
+                        if "path" in tarinfo.pax_headers:
+                            tarinfo.pax_headers["path"] = filter(
+                                tarinfo.pax_headers["path"]
+                            )
+                        if tarinfo.isfile():
+                            tar.addfile(tarinfo, origtar.extractfile(tarinfo))
+                        else:
+                            tar.addfile(tarinfo)
+            else:
+                # We only change compression here. The tar stream is unchanged.
+                ctx.copy_stream(ifh, fh)
+
+
+def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
+    """Fetch a URL and extract it to a destination path.
+
+    If the downloaded URL is an archive, it is extracted automatically
+    and the archive is deleted. Otherwise the file remains in place in
+    the destination directory.
+    """
+
+    basename = urllib.parse.urlparse(url).path.split("/")[-1]
+    dest_path = dest_dir / basename
+
+    download_to_path(url, dest_path, sha256=sha256, size=size)
+
+    if not extract:
+        return
+
+    try:
+        extract_archive(dest_path, dest_dir)
+        log("Removing %s" % dest_path)
+        dest_path.unlink()
+    except ArchiveTypeNotSupported:
+        pass
+
+
+def fetch_urls(downloads):
+    """Fetch URLs pairs to a pathlib.Path."""
+    with concurrent.futures.ThreadPoolExecutor(CONCURRENCY) as e:
+        fs = []
+
+        for download in downloads:
+            fs.append(e.submit(fetch_and_extract, *download))
+
+        for f in fs:
+            f.result()
+
+
+def _git_checkout_github_archive(dest_path: pathlib.Path, repo: str,
+                                 commit: str, prefix: str):
+    'Use github archive generator to speed up github git repo cloning'
+    repo = repo.rstrip('/')
+    github_url = '{repo}/archive/{commit}.tar.gz'.format(**locals())
+
+    with tempfile.TemporaryDirectory() as td:
+        temp_dir = pathlib.Path(td)
+        dl_dest = temp_dir / 'archive.tar.gz'
+        download_to_path(github_url, dl_dest)
+        repack_archive(dl_dest, dest_path,
+                       strip_components=1,
+                       prefix=prefix + '/')
+
+
+def _github_submodule_required(repo: str, commit: str):
+    'Use github API to check if submodules are used'
+    url = '{repo}/blob/{commit}/.gitmodules'.format(**locals())
+    try:
+        status_code = urllib.request.urlopen(url).getcode()
+        return status_code == 200
+    except:
+        return False
+
+
+def git_checkout_archive(
+    dest_path: pathlib.Path,
+    repo: str,
+    commit: str,
+    prefix=None,
+    ssh_key=None,
+    include_dot_git=False,
+):
+    """Produce an archive of the files comprising a Git checkout."""
+    dest_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if not prefix:
+        prefix = repo.rstrip("/").rsplit("/", 1)[-1]
+
+    if dest_path.suffixes[-2:] != [".tar", ".zst"]:
+        raise Exception("Only producing .tar.zst archives is supported.")
+
+    if repo.startswith('https://github.com/'):
+        if not include_dot_git and not _github_submodule_required(repo, commit):
+            log("Using github archive service to speedup archive creation")
+            # Always log sha1 info, either from commit or resolved from repo.
+            if re.match(r"^[a-fA-F0-9]{40}$", commit):
+                revision = commit
+            else:
+                ref_output = subprocess.check_output(["git", "ls-remote", repo,
+                                                      'refs/heads/' + commit])
+                revision, _ = ref_output.decode().split(maxsplit=1)
+            log("Fetching revision {}".format(revision))
+            return _git_checkout_github_archive(dest_path, repo, commit, prefix)
+
+    with tempfile.TemporaryDirectory() as td:
+        temp_dir = pathlib.Path(td)
+
+        git_dir = temp_dir / prefix
+
+        # This could be faster with a shallow clone. However, Git requires a ref
+        # to initiate a clone. Since the commit-ish may not refer to a ref, we
+        # simply perform a full clone followed by a checkout.
+        print("cloning %s to %s" % (repo, git_dir))
+
+        env = os.environ.copy()
+        keypath = ""
+        if ssh_key:
+            taskcluster_secret_url = api(
+                os.environ.get("TASKCLUSTER_PROXY_URL"),
+                "secrets",
+                "v1",
+                "secret/{keypath}".format(keypath=ssh_key),
+            )
+            taskcluster_secret = b"".join(stream_download(taskcluster_secret_url))
+            taskcluster_secret = json.loads(taskcluster_secret)
+            sshkey = taskcluster_secret["secret"]["ssh_privkey"]
+
+            keypath = temp_dir.joinpath("ssh-key")
+            keypath.write_text(sshkey)
+            keypath.chmod(0o600)
+
+            env = {
+                "GIT_SSH_COMMAND": "ssh -o 'StrictHostKeyChecking no' -i {keypath}".format(
+                    keypath=keypath
+                )
+            }
+
+        subprocess.run(["git", "clone", "-n", repo, str(git_dir)], check=True, env=env)
+
+        # Always use a detached head so that git prints out what it checked out.
+        subprocess.run(
+            ["git", "checkout", "--detach", commit], cwd=str(git_dir), check=True
+        )
+
+        # When including the .git, we want --depth 1, but a direct clone would not
+        # necessarily be able to give us the right commit.
+        if include_dot_git:
+            initial_clone = git_dir.with_name(git_dir.name + ".orig")
+            git_dir.rename(initial_clone)
+            subprocess.run(
+                [
+                    "git",
+                    "clone",
+                    "file://" + str(initial_clone),
+                    str(git_dir),
+                    "--depth",
+                    "1",
+                ],
+                check=True,
+            )
+            subprocess.run(
+                ["git", "remote", "set-url", "origin", repo],
+                cwd=str(git_dir),
+                check=True,
+            )
+
+        # --depth 1 can induce more work on the server side, so only use it for
+        # submodule initialization when we want to keep the .git directory.
+        depth = ["--depth", "1"] if include_dot_git else []
+        subprocess.run(
+            ["git", "submodule", "update", "--init"] + depth,
+            cwd=str(git_dir),
+            check=True,
+        )
+
+        if keypath:
+            os.remove(keypath)
+
+        print("creating archive %s of commit %s" % (dest_path, commit))
+        exclude_dot_git = [] if include_dot_git else ["--exclude=.git"]
+        proc = subprocess.Popen(
+            [
+                "tar",
+                "cf",
+                "-",
+            ]
+            + exclude_dot_git
+            + [
+                "-C",
+                str(temp_dir),
+                prefix,
+            ],
+            stdout=subprocess.PIPE,
+        )
+
+        with rename_after_close(dest_path, "wb") as out:
+            ctx = ZstdCompressor()
+            ctx.copy_stream(proc.stdout, out)
+
+        proc.wait()
+
+
+def command_git_checkout_archive(args):
+    dest = pathlib.Path(args.dest)
+
+    try:
+        git_checkout_archive(
+            dest,
+            args.repo,
+            args.commit,
+            prefix=args.path_prefix,
+            ssh_key=args.ssh_key_secret,
+            include_dot_git=args.include_dot_git,
+        )
+    except Exception:
+        try:
+            dest.unlink()
+        except FileNotFoundError:
+            pass
+
+        raise
+
+
+def command_static_url(args):
+    gpg_sig_url = args.gpg_sig_url
+    gpg_env_key = args.gpg_key_env
+
+    if bool(gpg_sig_url) != bool(gpg_env_key):
+        print("--gpg-sig-url and --gpg-key-env must both be defined")
+        return 1
+
+    if gpg_sig_url:
+        gpg_signature = b"".join(stream_download(gpg_sig_url))
+        gpg_key = os.environb[gpg_env_key.encode("ascii")]
+
+    dest = pathlib.Path(args.dest)
+    dest.parent.mkdir(parents=True, exist_ok=True)
+
+    basename = urllib.parse.urlparse(args.url).path.split("/")[-1]
+    if basename.endswith("".join(dest.suffixes)):
+        dl_dest = dest
+    else:
+        dl_dest = dest.parent / basename
+
+    try:
+        download_to_path(args.url, dl_dest, sha256=args.sha256, size=args.size)
+
+        if gpg_sig_url:
+            gpg_verify_path(dl_dest, gpg_key, gpg_signature)
+
+        if dl_dest != dest or args.strip_components or args.add_prefix:
+            repack_archive(dl_dest, dest, args.strip_components, args.add_prefix)
+    except Exception:
+        try:
+            dl_dest.unlink()
+        except FileNotFoundError:
+            pass
+
+        raise
+
+    if dl_dest != dest:
+        log("Removing %s" % dl_dest)
+        dl_dest.unlink()
+
+
+def api(root_url, service, version, path):
+    # taskcluster-lib-urls is not available when this script runs, so
+    # simulate its behavior:
+    return "{root_url}/api/{service}/{version}/{path}".format(
+        root_url=root_url, service=service, version=version, path=path
+    )
+
+
+def get_hash(fetch, root_url):
+    path = "task/{task}/artifacts/{artifact}".format(
+        task=fetch["task"], artifact="public/chain-of-trust.json"
+    )
+    url = api(root_url, "queue", "v1", path)
+    cot = json.loads(download_to_memory(url))
+    return cot["artifacts"][fetch["artifact"]]["sha256"]
+
+
+def command_task_artifacts(args):
+    start = time.monotonic()
+    fetches = json.loads(os.environ["MOZ_FETCHES"])
+    downloads = []
+    for fetch in fetches:
+        extdir = pathlib.Path(args.dest)
+        if "dest" in fetch:
+            # Note: normpath doesn't like pathlib.Path in python 3.5
+            extdir = pathlib.Path(os.path.normpath(str(extdir.joinpath(fetch["dest"]))))
+        extdir.mkdir(parents=True, exist_ok=True)
+        root_url = os.environ["TASKCLUSTER_ROOT_URL"]
+        sha256 = None
+        if fetch.get("verify-hash"):
+            sha256 = get_hash(fetch, root_url)
+        if fetch["artifact"].startswith("public/"):
+            path = "task/{task}/artifacts/{artifact}".format(
+                task=fetch["task"], artifact=fetch["artifact"]
+            )
+            url = api(root_url, "queue", "v1", path)
+        else:
+            url = ("{proxy_url}/api/queue/v1/task/{task}/artifacts/{artifact}").format(
+                proxy_url=os.environ["TASKCLUSTER_PROXY_URL"],
+                task=fetch["task"],
+                artifact=fetch["artifact"],
+            )
+        downloads.append((url, extdir, fetch["extract"], sha256))
+
+    fetch_urls(downloads)
+    end = time.monotonic()
+
+    perfherder_data = {
+        "framework": {"name": "build_metrics"},
+        "suites": [
+            {
+                "name": "fetch_content",
+                "value": end - start,
+                "lowerIsBetter": True,
+                "shouldAlert": False,
+                "subtests": [],
+            }
+        ],
+    }
+    print("PERFHERDER_DATA: {}".format(json.dumps(perfherder_data)), file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(title="sub commands")
+
+    git_checkout = subparsers.add_parser(
+        "git-checkout-archive",
+        help="Obtain an archive of files from a Git repository checkout",
+    )
+    git_checkout.set_defaults(func=command_git_checkout_archive)
+    git_checkout.add_argument(
+        "--path-prefix", help="Prefix for paths in produced archive"
+    )
+    git_checkout.add_argument("repo", help="URL to Git repository to be cloned")
+    git_checkout.add_argument("commit", help="Git commit to check out")
+    git_checkout.add_argument("dest", help="Destination path of archive")
+    git_checkout.add_argument(
+        "--ssh-key-secret", help="The scope path of the ssh key to used for checkout"
+    )
+    git_checkout.add_argument(
+        "--include-dot-git", action="store_true", help="Include the .git directory"
+    )
+
+    url = subparsers.add_parser("static-url", help="Download a static URL")
+    url.set_defaults(func=command_static_url)
+    url.add_argument("--sha256", required=True, help="SHA-256 of downloaded content")
+    url.add_argument(
+        "--size", required=True, type=int, help="Size of downloaded content, in bytes"
+    )
+    url.add_argument(
+        "--gpg-sig-url",
+        help="URL containing signed GPG document validating " "URL to fetch",
+    )
+    url.add_argument(
+        "--gpg-key-env", help="Environment variable containing GPG key to validate"
+    )
+    url.add_argument(
+        "--strip-components",
+        type=int,
+        default=0,
+        help="Number of leading components to strip from file "
+        "names in the downloaded archive",
+    )
+    url.add_argument(
+        "--add-prefix",
+        default="",
+        help="Prefix to add to file names in the downloaded " "archive",
+    )
+    url.add_argument("url", help="URL to fetch")
+    url.add_argument("dest", help="Destination path")
+
+    artifacts = subparsers.add_parser("task-artifacts", help="Fetch task artifacts")
+    artifacts.set_defaults(func=command_task_artifacts)
+    artifacts.add_argument(
+        "-d",
+        "--dest",
+        default=os.environ.get("MOZ_FETCHES_DIR"),
+        help="Destination directory which will contain all "
+        "artifacts (defaults to $MOZ_FETCHES_DIR)",
+    )
+
+    args = parser.parse_args()
+
+    if not args.dest:
+        parser.error(
+            "no destination directory specified, either pass in --dest "
+            "or set $MOZ_FETCHES_DIR"
+        )
+
+    return args.func(args)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
-- 
cgit v1.2.3