Adding upstream version 115.7.0esr.upstream/115.7.0esr upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
commit: 36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree: 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/python/taskcluster_taskgraph/taskgraph/util
parent: Initial commit. (diff)
download: firefox-esr-upstream.tar.xz
firefox-esr-upstream.zip
24 files changed, 3100 insertions, 0 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
new file mode 100644
index 0000000000..ee59ba4548
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import gzip
+import os
+import stat
+import tarfile
+
+# 2016-01-01T00:00:00+0000
+DEFAULT_MTIME = 1451606400
+
+
+def create_tar_from_files(fp, files):
+    """Create a tar file deterministically.
+
+    Receives a dict mapping names of files in the archive to local filesystem
+    paths or ``mozpack.files.BaseFile`` instances.
+
+    The files will be archived and written to the passed file handle opened
+    for writing.
+
+    Only regular files can be written.
+
+    FUTURE accept a filename argument (or create APIs to write files)
+    """
+    with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf:
+        for archive_path, f in sorted(files.items()):
+            if isinstance(f, str):
+                mode = os.stat(f).st_mode
+                f = open(f, "rb")
+            else:
+                mode = 0o0644
+
+            ti = tarfile.TarInfo(archive_path)
+            ti.mode = mode
+            ti.type = tarfile.REGTYPE
+
+            if not ti.isreg():
+                raise ValueError("not a regular file: %s" % f)
+
+            # Disallow setuid and setgid bits. This is an arbitrary restriction.
+            # However, since we set uid/gid to root:root, setuid and setgid
+            # would be a glaring security hole if the archive were
+            # uncompressed as root.
+            if ti.mode & (stat.S_ISUID | stat.S_ISGID):
+                raise ValueError("cannot add file with setuid or setgid set: " "%s" % f)
+
+            # Set uid, gid, username, and group as deterministic values.
+            ti.uid = 0
+            ti.gid = 0
+            ti.uname = ""
+            ti.gname = ""
+
+            # Set mtime to a constant value.
+            ti.mtime = DEFAULT_MTIME
+
+            f.seek(0, 2)
+            ti.size = f.tell()
+            f.seek(0, 0)
+            # tarfile wants to pass a size argument to read(). So just
+            # wrap/buffer in a proper file object interface.
+            tf.addfile(ti, f)
+
+
+def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
+    """Create a tar.gz file deterministically from files.
+
+    This is a glorified wrapper around ``create_tar_from_files`` that
+    adds gzip compression.
+
+    The passed file handle should be opened for writing in binary mode.
+    When the function returns, all data has been written to the handle.
+    """
+    # Offset 3-7 in the gzip header contains an mtime. Pin it to a known
+    # value so output is deterministic.
+    gf = gzip.GzipFile(
+        filename=filename or "",
+        mode="wb",
+        fileobj=fp,
+        compresslevel=compresslevel,
+        mtime=DEFAULT_MTIME,
+    )
+    with gf:
+        create_tar_from_files(gf, files)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py
new file mode 100644
index 0000000000..cf6f11c573
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py
@@ -0,0 +1,84 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+
+
+def attrmatch(attributes, **kwargs):
+    """Determine whether the given set of task attributes matches.  The
+    conditions are given as keyword arguments, where each keyword names an
+    attribute.  The keyword value can be a literal, a set, or a callable.  A
+    literal must match the attribute exactly.  Given a set, the attribute value
+    must be in the set.  A callable is called with the attribute value.  If an
+    attribute is specified as a keyword argument but not present in the
+    attributes, the result is False."""
+    for kwkey, kwval in kwargs.items():
+        if kwkey not in attributes:
+            return False
+        attval = attributes[kwkey]
+        if isinstance(kwval, set):
+            if attval not in kwval:
+                return False
+        elif callable(kwval):
+            if not kwval(attval):
+                return False
+        elif kwval != attributes[kwkey]:
+            return False
+    return True
+
+
+def keymatch(attributes, target):
+    """Determine if any keys in attributes are a match to target, then return
+    a list of matching values. First exact matches will be checked. Failing
+    that, regex matches and finally a default key.
+    """
+    # exact match
+    if target in attributes:
+        return [attributes[target]]
+
+    # regular expression match
+    matches = [v for k, v in attributes.items() if re.match(k + "$", target)]
+    if matches:
+        return matches
+
+    # default
+    if "default" in attributes:
+        return [attributes["default"]]
+
+    return []
+
+
+def _match_run_on(key, run_on):
+    """
+    Determine whether the given parameter is included in the corresponding `run-on-attribute`.
+    """
+    if "all" in run_on:
+        return True
+    return key in run_on
+
+
+match_run_on_projects = _match_run_on
+match_run_on_tasks_for = _match_run_on
+
+
+def match_run_on_git_branches(git_branch, run_on_git_branches):
+    """
+    Determine whether the given project is included in the `run-on-git-branches` parameter.
+    Allows 'all'.
+    """
+    if "all" in run_on_git_branches:
+        return True
+
+    for expected_git_branch_pattern in run_on_git_branches:
+        if re.match(expected_git_branch_pattern, git_branch):
+            return True
+
+    return False
+
+
+def sorted_unique_list(*args):
+    """Join one or more lists, and return a sorted list of unique members"""
+    combined = set().union(*args)
+    return sorted(combined)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py
new file mode 100644
index 0000000000..974b114902
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import time
+
+TARGET_CACHE_INDEX = "{cache_prefix}.cache.level-{level}.{type}.{name}.hash.{digest}"
+EXTRA_CACHE_INDEXES = [
+    "{cache_prefix}.cache.level-{level}.{type}.{name}.latest",
+    "{cache_prefix}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}",
+]
+
+
+def add_optimization(
+    config, taskdesc, cache_type, cache_name, digest=None, digest_data=None
+):
+    """
+    Allow the results of this task to be cached. This adds index routes to the
+    task so it can be looked up for future runs, and optimization hints so that
+    cached artifacts can be found. Exactly one of `digest` and `digest_data`
+    must be passed.
+
+    :param TransformConfig config: The configuration for the kind being transformed.
+    :param dict taskdesc: The description of the current task.
+    :param str cache_type: The type of task result being cached.
+    :param str cache_name: The name of the object being cached.
+    :param digest: A unique string identifying this version of the artifacts
+        being generated. Typically this will be the hash of inputs to the task.
+    :type digest: bytes or None
+    :param digest_data: A list of bytes representing the inputs of this task.
+        They will be concatenated and hashed to create the digest for this
+        task.
+    :type digest_data: list of bytes or None
+    """
+    if (digest is None) == (digest_data is None):
+        raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+    if digest is None:
+        digest = hashlib.sha256("\n".join(digest_data).encode("utf-8")).hexdigest()
+
+    if "cached-task-prefix" in config.graph_config["taskgraph"]:
+        cache_prefix = config.graph_config["taskgraph"]["cached-task-prefix"]
+    else:
+        cache_prefix = config.graph_config["trust-domain"]
+
+    subs = {
+        "cache_prefix": cache_prefix,
+        "type": cache_type,
+        "name": cache_name,
+        "digest": digest,
+    }
+
+    # We'll try to find a cached version of the toolchain at levels above and
+    # including the current level, starting at the highest level.
+    # Chain-of-trust doesn't handle tasks not built on the tip of a
+    # pull-request, so don't look for level-1 tasks if building a pull-request.
+    index_routes = []
+    min_level = int(config.params["level"])
+    if config.params["tasks_for"] == "github-pull-request":
+        min_level = max(min_level, 3)
+    for level in reversed(range(min_level, 4)):
+        subs["level"] = level
+        index_routes.append(TARGET_CACHE_INDEX.format(**subs))
+
+        taskdesc["optimization"] = {"index-search": index_routes}
+
+    # ... and cache at the lowest level.
+    subs["level"] = config.params["level"]
+    taskdesc.setdefault("routes", []).append(
+        f"index.{TARGET_CACHE_INDEX.format(**subs)}"
+    )
+
+    # ... and add some extra routes for humans
+    subs["build_date_long"] = time.strftime(
+        "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"])
+    )
+    taskdesc["routes"].extend(
+        [f"index.{route.format(**subs)}" for route in EXTRA_CACHE_INDEXES]
+    )
+
+    taskdesc["attributes"]["cached_task"] = {
+        "type": cache_type,
+        "name": cache_name,
+        "digest": digest,
+    }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py
new file mode 100644
index 0000000000..d0e1e1079f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py
@@ -0,0 +1,79 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Utilities for generating a decision task from :file:`.taskcluster.yml`.
+"""
+
+
+import os
+
+import jsone
+import slugid
+import yaml
+
+from .templates import merge
+from .time import current_json_time
+from .vcs import find_hg_revision_push_info
+
+
+def make_decision_task(params, root, context, head_rev=None):
+    """Generate a basic decision task, based on the root .taskcluster.yml"""
+    with open(os.path.join(root, ".taskcluster.yml"), "rb") as f:
+        taskcluster_yml = yaml.safe_load(f)
+
+    if not head_rev:
+        head_rev = params["head_rev"]
+
+    if params["repository_type"] == "hg":
+        pushlog = find_hg_revision_push_info(params["repository_url"], head_rev)
+
+        hg_push_context = {
+            "pushlog_id": pushlog["pushid"],
+            "pushdate": pushlog["pushdate"],
+            "owner": pushlog["user"],
+        }
+    else:
+        hg_push_context = {}
+
+    slugids = {}
+
+    def as_slugid(name):
+        # https://github.com/taskcluster/json-e/issues/164
+        name = name[0]
+        if name not in slugids:
+            slugids[name] = slugid.nice()
+        return slugids[name]
+
+    # provide a similar JSON-e context to what mozilla-taskcluster provides:
+    # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+    # but with a different tasks_for and an extra `cron` section
+    context = merge(
+        {
+            "repository": {
+                "url": params["repository_url"],
+                "project": params["project"],
+                "level": params["level"],
+            },
+            "push": merge(
+                {
+                    "revision": params["head_rev"],
+                    # remainder are fake values, but the decision task expects them anyway
+                    "comment": " ",
+                },
+                hg_push_context,
+            ),
+            "now": current_json_time(),
+            "as_slugid": as_slugid,
+        },
+        context,
+    )
+
+    rendered = jsone.render(taskcluster_yml, context)
+    if len(rendered["tasks"]) != 1:
+        raise Exception("Expected .taskcluster.yml to only produce one cron task")
+    task = rendered["tasks"][0]
+
+    task_id = task.pop("taskId")
+    return (task_id, task)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
new file mode 100644
index 0000000000..4b211cc4b3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
@@ -0,0 +1,342 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import io
+import json
+import os
+import re
+import sys
+import urllib.parse
+
+import requests_unixsocket
+
+from taskgraph.util.archive import create_tar_gz_from_files
+from taskgraph.util.memoize import memoize
+
+IMAGE_DIR = os.path.join(".", "taskcluster", "docker")
+
+from .yaml import load_yaml
+
+
+def docker_url(path, **kwargs):
+    docker_socket = os.environ.get("DOCKER_SOCKET", "/var/run/docker.sock")
+    return urllib.parse.urlunparse(
+        (
+            "http+unix",
+            urllib.parse.quote(docker_socket, safe=""),
+            path,
+            "",
+            urllib.parse.urlencode(kwargs),
+            "",
+        )
+    )
+
+
+def post_to_docker(tar, api_path, **kwargs):
+    """POSTs a tar file to a given docker API path.
+
+    The tar argument can be anything that can be passed to requests.post()
+    as data (e.g. iterator or file object).
+    The extra keyword arguments are passed as arguments to the docker API.
+    """
+    req = requests_unixsocket.Session().post(
+        docker_url(api_path, **kwargs),
+        data=tar,
+        stream=True,
+        headers={"Content-Type": "application/x-tar"},
+    )
+    if req.status_code != 200:
+        message = req.json().get("message")
+        if not message:
+            message = f"docker API returned HTTP code {req.status_code}"
+        raise Exception(message)
+    status_line = {}
+
+    buf = b""
+    for content in req.iter_content(chunk_size=None):
+        if not content:
+            continue
+        # Sometimes, a chunk of content is not a complete json, so we cumulate
+        # with leftovers from previous iterations.
+        buf += content
+        try:
+            data = json.loads(buf)
+        except Exception:
+            continue
+        buf = b""
+        # data is sometimes an empty dict.
+        if not data:
+            continue
+        # Mimic how docker itself presents the output. This code was tested
+        # with API version 1.18 and 1.26.
+        if "status" in data:
+            if "id" in data:
+                if sys.stderr.isatty():
+                    total_lines = len(status_line)
+                    line = status_line.setdefault(data["id"], total_lines)
+                    n = total_lines - line
+                    if n > 0:
+                        # Move the cursor up n lines.
+                        sys.stderr.write(f"\033[{n}A")
+                    # Clear line and move the cursor to the beginning of it.
+                    sys.stderr.write("\033[2K\r")
+                    sys.stderr.write(
+                        "{}: {} {}\n".format(
+                            data["id"], data["status"], data.get("progress", "")
+                        )
+                    )
+                    if n > 1:
+                        # Move the cursor down n - 1 lines, which, considering
+                        # the carriage return on the last write, gets us back
+                        # where we started.
+                        sys.stderr.write(f"\033[{n - 1}B")
+                else:
+                    status = status_line.get(data["id"])
+                    # Only print status changes.
+                    if status != data["status"]:
+                        sys.stderr.write("{}: {}\n".format(data["id"], data["status"]))
+                        status_line[data["id"]] = data["status"]
+            else:
+                status_line = {}
+                sys.stderr.write("{}\n".format(data["status"]))
+        elif "stream" in data:
+            sys.stderr.write(data["stream"])
+        elif "aux" in data:
+            sys.stderr.write(repr(data["aux"]))
+        elif "error" in data:
+            sys.stderr.write("{}\n".format(data["error"]))
+            # Sadly, docker doesn't give more than a plain string for errors,
+            # so the best we can do to propagate the error code from the command
+            # that failed is to parse the error message...
+            errcode = 1
+            m = re.search(r"returned a non-zero code: (\d+)", data["error"])
+            if m:
+                errcode = int(m.group(1))
+            sys.exit(errcode)
+        else:
+            raise NotImplementedError(repr(data))
+        sys.stderr.flush()
+
+
+def docker_image(name, by_tag=False):
+    """
+    Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``,
+    or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`.
+    """
+    try:
+        with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f:
+            registry = f.read().strip()
+    except OSError:
+        with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f:
+            registry = f.read().strip()
+
+    if not by_tag:
+        hashfile = os.path.join(IMAGE_DIR, name, "HASH")
+        try:
+            with open(hashfile) as f:
+                return f"{registry}/{name}@{f.read().strip()}"
+        except OSError:
+            raise Exception(f"Failed to read HASH file {hashfile}")
+
+    try:
+        with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f:
+            tag = f.read().strip()
+    except OSError:
+        tag = "latest"
+    return f"{registry}/{name}:{tag}"
+
+
+class VoidWriter:
+    """A file object with write capabilities that does nothing with the written
+    data."""
+
+    def write(self, buf):
+        pass
+
+
+def generate_context_hash(topsrcdir, image_path, args=None):
+    """Generates a sha256 hash for context directory used to build an image."""
+
+    return stream_context_tar(topsrcdir, image_path, VoidWriter(), args=args)
+
+
+class HashingWriter:
+    """A file object with write capabilities that hashes the written data at
+    the same time it passes down to a real file object."""
+
+    def __init__(self, writer):
+        self._hash = hashlib.sha256()
+        self._writer = writer
+
+    def write(self, buf):
+        self._hash.update(buf)
+        self._writer.write(buf)
+
+    def hexdigest(self):
+        return self._hash.hexdigest()
+
+
+def create_context_tar(topsrcdir, context_dir, out_path, args=None):
+    """Create a context tarball.
+
+    A directory ``context_dir`` containing a Dockerfile will be assembled into
+    a gzipped tar file at ``out_path``.
+
+    We also scan the source Dockerfile for special syntax that influences
+    context generation.
+
+    If a line in the Dockerfile has the form ``# %include <path>``,
+    the relative path specified on that line will be matched against
+    files in the source repository and added to the context under the
+    path ``topsrcdir/``. If an entry is a directory, we add all files
+    under that directory.
+
+    If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
+    the string ``$<name>`` in subsequent lines are replaced with the value
+    found in the ``args`` argument. Exception: this doesn't apply to VOLUME
+    definitions.
+
+    Returns the SHA-256 hex digest of the created archive.
+    """
+    with open(out_path, "wb") as fh:
+        return stream_context_tar(
+            topsrcdir,
+            context_dir,
+            fh,
+            image_name=os.path.basename(out_path),
+            args=args,
+        )
+
+
+RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task")
+RUN_TASK_FILES = {
+    f"run-task/{path}": os.path.join(RUN_TASK_ROOT, path)
+    for path in [
+        "run-task",
+        "fetch-content",
+        "hgrc",
+        "robustcheckout.py",
+    ]
+}
+RUN_TASK_SNIPPET = [
+    "COPY run-task/run-task /usr/local/bin/run-task\n",
+    "COPY run-task/fetch-content /usr/local/bin/fetch-content\n",
+    "COPY run-task/robustcheckout.py /usr/local/mercurial/robustcheckout.py\n"
+    "COPY run-task/hgrc /etc/mercurial/hgrc.d/mozilla.rc\n",
+]
+
+
+def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None):
+    """Like create_context_tar, but streams the tar file to the `out_file` file
+    object."""
+    archive_files = {}
+    replace = []
+    content = []
+
+    topsrcdir = os.path.abspath(topsrcdir)
+    context_dir = os.path.join(topsrcdir, context_dir)
+
+    for root, dirs, files in os.walk(context_dir):
+        for f in files:
+            source_path = os.path.join(root, f)
+            archive_path = source_path[len(context_dir) + 1 :]
+            archive_files[archive_path] = open(source_path, "rb")
+
+    # Parse Dockerfile for special syntax of extra files to include.
+    content = []
+    with open(os.path.join(context_dir, "Dockerfile")) as fh:
+        for line in fh:
+            if line.startswith("# %ARG"):
+                p = line[len("# %ARG ") :].strip()
+                if not args or p not in args:
+                    raise Exception(f"missing argument: {p}")
+                replace.append((re.compile(rf"\${p}\b"), args[p]))
+                continue
+
+            for regexp, s in replace:
+                line = re.sub(regexp, s, line)
+
+            content.append(line)
+
+            if not line.startswith("# %include"):
+                continue
+
+            if line.strip() == "# %include-run-task":
+                content.extend(RUN_TASK_SNIPPET)
+                archive_files.update(RUN_TASK_FILES)
+                continue
+
+            p = line[len("# %include ") :].strip()
+            if os.path.isabs(p):
+                raise Exception("extra include path cannot be absolute: %s" % p)
+
+            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+            # Check for filesystem traversal exploits.
+            if not fs_path.startswith(topsrcdir):
+                raise Exception("extra include path outside topsrcdir: %s" % p)
+
+            if not os.path.exists(fs_path):
+                raise Exception("extra include path does not exist: %s" % p)
+
+            if os.path.isdir(fs_path):
+                for root, dirs, files in os.walk(fs_path):
+                    for f in files:
+                        source_path = os.path.join(root, f)
+                        rel = source_path[len(fs_path) + 1 :]
+                        archive_path = os.path.join("topsrcdir", p, rel)
+                        archive_files[archive_path] = source_path
+            else:
+                archive_path = os.path.join("topsrcdir", p)
+                archive_files[archive_path] = fs_path
+
+    archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8"))
+
+    writer = HashingWriter(out_file)
+    create_tar_gz_from_files(writer, archive_files, image_name)
+    return writer.hexdigest()
+
+
+@memoize
+def image_paths():
+    """Return a map of image name to paths containing their Dockerfile."""
+    config = load_yaml("taskcluster", "ci", "docker-image", "kind.yml")
+    return {
+        k: os.path.join(IMAGE_DIR, v.get("definition", k))
+        for k, v in config["tasks"].items()
+    }
+
+
+def image_path(name):
+    paths = image_paths()
+    if name in paths:
+        return paths[name]
+    return os.path.join(IMAGE_DIR, name)
+
+
+@memoize
+def parse_volumes(image):
+    """Parse VOLUME entries from a Dockerfile for an image."""
+    volumes = set()
+
+    path = image_path(image)
+
+    with open(os.path.join(path, "Dockerfile"), "rb") as fh:
+        for line in fh:
+            line = line.strip()
+            # We assume VOLUME definitions don't use %ARGS.
+            if not line.startswith(b"VOLUME "):
+                continue
+
+            v = line.split(None, 1)[1]
+            if v.startswith(b"["):
+                raise ValueError(
+                    "cannot parse array syntax for VOLUME; "
+                    "convert to multiple entries"
+                )
+
+            volumes |= {volume.decode("utf-8") for volume in v.split()}
+
+    return volumes
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
new file mode 100644
index 0000000000..bf786e92e4
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
@@ -0,0 +1,54 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import hashlib
+from pathlib import Path
+
+from taskgraph.util import path as mozpath
+from taskgraph.util.memoize import memoize
+
+
+@memoize
+def hash_path(path):
+    """Hash a single file.
+
+    Returns the SHA-256 hash in hex form.
+    """
+    with open(path, "rb") as fh:
+        return hashlib.sha256(fh.read()).hexdigest()
+
+
+def _find_files(base_path):
+    for path in Path(base_path).rglob("*"):
+        if path.is_file():
+            yield str(path)
+
+
+def hash_paths(base_path, patterns):
+    """
+    Give a list of path patterns, return a digest of the contents of all
+    the corresponding files, similarly to git tree objects or mercurial
+    manifests.
+
+    Each file is hashed. The list of all hashes and file paths is then
+    itself hashed to produce the result.
+    """
+    h = hashlib.sha256()
+
+    found = set()
+    for pattern in patterns:
+        files = _find_files(base_path)
+        matches = [path for path in files if mozpath.match(path, pattern)]
+        if matches:
+            found.update(matches)
+        else:
+            raise Exception("%s did not match anything" % pattern)
+    for path in sorted(found):
+        h.update(
+            "{} {}\n".format(
+                hash_path(mozpath.abspath(mozpath.join(base_path, path))),
+                mozpath.normsep(path),
+            ).encode("utf-8")
+        )
+    return h.hexdigest()
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py
new file mode 100644
index 0000000000..9b0c5a44fb
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from .attributes import keymatch
+
+
+def evaluate_keyed_by(
+    value, item_name, attributes, defer=None, enforce_single_match=True
+):
+    """
+    For values which can either accept a literal value, or be keyed by some
+    attributes, perform that lookup and return the result.
+
+    For example, given item::
+
+        by-test-platform:
+            macosx-10.11/debug: 13
+            win.*: 6
+            default: 12
+
+    a call to `evaluate_keyed_by(item, 'thing-name', {'test-platform': 'linux96')`
+    would return `12`.
+
+    Items can be nested as deeply as desired::
+
+        by-test-platform:
+            win.*:
+                by-project:
+                    ash: ..
+                    cedar: ..
+            linux: 13
+            default: 12
+
+    Args:
+        value (str): Name of the value to perform evaluation on.
+        item_name (str): Used to generate useful error messages.
+        attributes (dict): Dictionary of attributes used to lookup 'by-<key>' with.
+        defer (list):
+            Allows evaluating a by-* entry at a later time. In the example
+            above it's possible that the project attribute hasn't been set yet,
+            in which case we'd want to stop before resolving that subkey and
+            then call this function again later. This can be accomplished by
+            setting `defer=["project"]` in this example.
+        enforce_single_match (bool):
+            If True (default), each task may only match a single arm of the
+            evaluation.
+    """
+    while True:
+        if not isinstance(value, dict) or len(value) != 1:
+            return value
+        value_key = next(iter(value))
+        if not value_key.startswith("by-"):
+            return value
+
+        keyed_by = value_key[3:]  # strip off 'by-' prefix
+
+        if defer and keyed_by in defer:
+            return value
+
+        key = attributes.get(keyed_by)
+        alternatives = next(iter(value.values()))
+
+        if len(alternatives) == 1 and "default" in alternatives:
+            # Error out when only 'default' is specified as only alternatives,
+            # because we don't need to by-{keyed_by} there.
+            raise Exception(
+                "Keyed-by '{}' unnecessary with only value 'default' "
+                "found, when determining item {}".format(keyed_by, item_name)
+            )
+
+        if key is None:
+            if "default" in alternatives:
+                value = alternatives["default"]
+                continue
+            else:
+                raise Exception(
+                    "No attribute {} and no value for 'default' found "
+                    "while determining item {}".format(keyed_by, item_name)
+                )
+
+        matches = keymatch(alternatives, key)
+        if enforce_single_match and len(matches) > 1:
+            raise Exception(
+                "Multiple matching values for {} {!r} found while "
+                "determining item {}".format(keyed_by, key, item_name)
+            )
+        elif matches:
+            value = matches[0]
+            continue
+
+        raise Exception(
+            "No {} matching {!r} nor 'default' found while determining item {}".format(
+                keyed_by, key, item_name
+            )
+        )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py
new file mode 100644
index 0000000000..56b513e74c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#923-949
+
+
+import functools
+
+
+class memoize(dict):
+    """A decorator to memoize the results of function calls depending
+    on its arguments.
+    Both functions and instance methods are handled, although in the
+    instance method case, the results are cache in the instance itself.
+    """
+
+    def __init__(self, func):
+        self.func = func
+        functools.update_wrapper(self, func)
+
+    def __call__(self, *args):
+        if args not in self:
+            self[args] = self.func(*args)
+        return self[args]
+
+    def method_call(self, instance, *args):
+        name = "_%s" % self.func.__name__
+        if not hasattr(instance, name):
+            setattr(instance, name, {})
+        cache = getattr(instance, name)
+        if args not in cache:
+            cache[args] = self.func(instance, *args)
+        return cache[args]
+
+    def __get__(self, instance, cls):
+        return functools.update_wrapper(
+            functools.partial(self.method_call, instance), self.func
+        )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py
new file mode 100644
index 0000000000..6233a98a40
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+
+from taskgraph.util.taskcluster import get_artifact_url
+from taskgraph.util.time import json_time_from_now
+
+TASK_REFERENCE_PATTERN = re.compile("<([^>]+)>")
+ARTIFACT_REFERENCE_PATTERN = re.compile("<([^/]+)/([^>]+)>")
+
+
+def _recurse(val, param_fns):
+    def recurse(val):
+        if isinstance(val, list):
+            return [recurse(v) for v in val]
+        elif isinstance(val, dict):
+            if len(val) == 1:
+                for param_key, param_fn in param_fns.items():
+                    if set(val.keys()) == {param_key}:
+                        return param_fn(val[param_key])
+            return {k: recurse(v) for k, v in val.items()}
+        else:
+            return val
+
+    return recurse(val)
+
+
+def resolve_timestamps(now, task_def):
+    """Resolve all instances of `{'relative-datestamp': '..'}` in the given task definition"""
+    return _recurse(
+        task_def,
+        {
+            "relative-datestamp": lambda v: json_time_from_now(v, now),
+        },
+    )
+
+
+def resolve_task_references(label, task_def, task_id, decision_task_id, dependencies):
+    """Resolve all instances of ``{'task-reference': '..<..>..'} ``
+    and ``{'artifact-reference`: '..<dependency/artifact/path>..'}``
+    in the given task definition, using the given dependencies.
+    """
+
+    def task_reference(val):
+        def repl(match):
+            key = match.group(1)
+            if key == "self":
+                return task_id
+            elif key == "decision":
+                return decision_task_id
+            try:
+                return dependencies[key]
+            except KeyError:
+                # handle escaping '<'
+                if key == "<":
+                    return key
+                raise KeyError(f"task '{label}' has no dependency named '{key}'")
+
+        return TASK_REFERENCE_PATTERN.sub(repl, val)
+
+    def artifact_reference(val):
+        def repl(match):
+            dependency, artifact_name = match.group(1, 2)
+
+            if dependency == "self":
+                raise KeyError(f"task '{label}' can't reference artifacts of self")
+            elif dependency == "decision":
+                task_id = decision_task_id
+            else:
+                try:
+                    task_id = dependencies[dependency]
+                except KeyError:
+                    raise KeyError(
+                        "task '{}' has no dependency named '{}'".format(
+                            label, dependency
+                        )
+                    )
+
+            assert artifact_name.startswith(
+                "public/"
+            ), "artifact-reference only supports public artifacts, not `{}`".format(
+                artifact_name
+            )
+            return get_artifact_url(task_id, artifact_name)
+
+        return ARTIFACT_REFERENCE_PATTERN.sub(repl, val)
+
+    return _recurse(
+        task_def,
+        {
+            "task-reference": task_reference,
+            "artifact-reference": artifact_reference,
+        },
+    )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py
new file mode 100644
index 0000000000..728b648ac1
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py
@@ -0,0 +1,172 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Like :py:mod:`os.path`, with a reduced set of functions, and with normalized path
+separators (always use forward slashes).
+Also contains a few additional utilities not found in :py:mod:`os.path`.
+"""
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozpack/path.py
+
+
+import os
+import posixpath
+import re
+
+
+def normsep(path):
+    """
+    Normalize path separators, by using forward slashes instead of whatever
+    :py:const:`os.sep` is.
+    """
+    if os.sep != "/":
+        path = path.replace(os.sep, "/")
+    if os.altsep and os.altsep != "/":
+        path = path.replace(os.altsep, "/")
+    return path
+
+
+def relpath(path, start):
+    rel = normsep(os.path.relpath(path, start))
+    return "" if rel == "." else rel
+
+
+def realpath(path):
+    return normsep(os.path.realpath(path))
+
+
+def abspath(path):
+    return normsep(os.path.abspath(path))
+
+
+def join(*paths):
+    return normsep(os.path.join(*paths))
+
+
+def normpath(path):
+    return posixpath.normpath(normsep(path))
+
+
+def dirname(path):
+    return posixpath.dirname(normsep(path))
+
+
+def commonprefix(paths):
+    return posixpath.commonprefix([normsep(path) for path in paths])
+
+
+def basename(path):
+    return os.path.basename(path)
+
+
+def splitext(path):
+    return posixpath.splitext(normsep(path))
+
+
+def split(path):
+    """
+    Return the normalized path as a list of its components.
+
+        ``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']``
+    """
+    return normsep(path).split("/")
+
+
+def basedir(path, bases):
+    """
+    Given a list of directories (`bases`), return which one contains the given
+    path. If several matches are found, the deepest base directory is returned.
+
+        ``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'``
+        (`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match)
+    """
+    path = normsep(path)
+    bases = [normsep(b) for b in bases]
+    if path in bases:
+        return path
+    for b in sorted(bases, reverse=True):
+        if b == "" or path.startswith(b + "/"):
+            return b
+
+
+re_cache = {}
+# Python versions < 3.7 return r'\/' for re.escape('/').
+if re.escape("/") == "/":
+    MATCH_STAR_STAR_RE = re.compile(r"(^|/)\\\*\\\*/")
+    MATCH_STAR_STAR_END_RE = re.compile(r"(^|/)\\\*\\\*$")
+else:
+    MATCH_STAR_STAR_RE = re.compile(r"(^|\\\/)\\\*\\\*\\\/")
+    MATCH_STAR_STAR_END_RE = re.compile(r"(^|\\\/)\\\*\\\*$")
+
+
+def match(path, pattern):
+    """
+    Return whether the given path matches the given pattern.
+    An asterisk can be used to match any string, including the null string, in
+    one part of the path:
+
+        ``foo`` matches ``*``, ``f*`` or ``fo*o``
+
+    However, an asterisk matching a subdirectory may not match the null string:
+
+        ``foo/bar`` does *not* match ``foo/*/bar``
+
+    If the pattern matches one of the ancestor directories of the path, the
+    patch is considered matching:
+
+        ``foo/bar`` matches ``foo``
+
+    Two adjacent asterisks can be used to match files and zero or more
+    directories and subdirectories.
+
+        ``foo/bar`` matches ``foo/**/bar``, or ``**/bar``
+    """
+    if not pattern:
+        return True
+    if pattern not in re_cache:
+        p = re.escape(pattern)
+        p = MATCH_STAR_STAR_RE.sub(r"\1(?:.+/)?", p)
+        p = MATCH_STAR_STAR_END_RE.sub(r"(?:\1.+)?", p)
+        p = p.replace(r"\*", "[^/]*") + "(?:/.*)?$"
+        re_cache[pattern] = re.compile(p)
+    return re_cache[pattern].match(path) is not None
+
+
+def rebase(oldbase, base, relativepath):
+    """
+    Return `relativepath` relative to `base` instead of `oldbase`.
+    """
+    if base == oldbase:
+        return relativepath
+    if len(base) < len(oldbase):
+        assert basedir(oldbase, [base]) == base
+        relbase = relpath(oldbase, base)
+        result = join(relbase, relativepath)
+    else:
+        assert basedir(base, [oldbase]) == oldbase
+        relbase = relpath(base, oldbase)
+        result = relpath(relativepath, relbase)
+    result = normpath(result)
+    if relativepath.endswith("/") and not result.endswith("/"):
+        result += "/"
+    return result
+
+
+def ancestors(path):
+    """Emit the parent directories of a path.
+
+    Args:
+        path (str): Path to emit parents of.
+
+    Yields:
+        str: Path of parent directory.
+    """
+    while path:
+        yield path
+        newpath = os.path.dirname(path)
+        if newpath == path:
+            break
+        path = newpath
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py
new file mode 100644
index 0000000000..3eb61dfbf3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py
@@ -0,0 +1,52 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import inspect
+import os
+
+
+def find_object(path):
+    """
+    Find a Python object given a path of the form <modulepath>:<objectpath>.
+    Conceptually equivalent to
+
+        def find_object(modulepath, objectpath):
+            import <modulepath> as mod
+            return mod.<objectpath>
+    """
+    if path.count(":") != 1:
+        raise ValueError(f'python path {path!r} does not have the form "module:object"')
+
+    modulepath, objectpath = path.split(":")
+    obj = __import__(modulepath)
+    for a in modulepath.split(".")[1:]:
+        obj = getattr(obj, a)
+    for a in objectpath.split("."):
+        obj = getattr(obj, a)
+    return obj
+
+
+def import_sibling_modules(exceptions=None):
+    """
+    Import all Python modules that are siblings of the calling module.
+
+    Args:
+        exceptions (list): A list of file names to exclude (caller and
+            __init__.py are implicitly excluded).
+    """
+    frame = inspect.stack()[1]
+    mod = inspect.getmodule(frame[0])
+
+    name = os.path.basename(mod.__file__)
+    excs = {"__init__.py", name}
+    if exceptions:
+        excs.update(exceptions)
+
+    modpath = mod.__name__
+    if not name.startswith("__init__.py"):
+        modpath = modpath.rsplit(".", 1)[0]
+
+    for f in os.listdir(os.path.dirname(mod.__file__)):
+        if f.endswith(".py") and f not in excs:
+            __import__(modpath + "." + f[:-3])
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py
new file mode 100644
index 0000000000..55d74f479a
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py
@@ -0,0 +1,22 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#115-127
+
+
+class ReadOnlyDict(dict):
+    """A read-only dictionary."""
+
+    def __init__(self, *args, **kwargs):
+        dict.__init__(self, *args, **kwargs)
+
+    def __delitem__(self, key):
+        raise Exception("Object does not support deletion.")
+
+    def __setitem__(self, key, value):
+        raise Exception("Object does not support assignment.")
+
+    def update(self, *args, **kwargs):
+        raise Exception("Object does not support update.")
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py
new file mode 100644
index 0000000000..3989f71182
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py
@@ -0,0 +1,260 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import collections
+import pprint
+import re
+
+import voluptuous
+
+import taskgraph
+
+from .keyed_by import evaluate_keyed_by
+
+
+def validate_schema(schema, obj, msg_prefix):
+    """
+    Validate that object satisfies schema.  If not, generate a useful exception
+    beginning with msg_prefix.
+    """
+    if taskgraph.fast:
+        return
+    try:
+        schema(obj)
+    except voluptuous.MultipleInvalid as exc:
+        msg = [msg_prefix]
+        for error in exc.errors:
+            msg.append(str(error))
+        raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj))
+
+
+def optionally_keyed_by(*arguments):
+    """
+    Mark a schema value as optionally keyed by any of a number of fields.  The
+    schema is the last argument, and the remaining fields are taken to be the
+    field names.  For example:
+
+        'some-value': optionally_keyed_by(
+            'test-platform', 'build-platform',
+            Any('a', 'b', 'c'))
+
+    The resulting schema will allow nesting of `by-test-platform` and
+    `by-build-platform` in either order.
+    """
+    schema = arguments[-1]
+    fields = arguments[:-1]
+
+    def validator(obj):
+        if isinstance(obj, dict) and len(obj) == 1:
+            k, v = list(obj.items())[0]
+            if k.startswith("by-") and k[len("by-") :] in fields:
+                res = {}
+                for kk, vv in v.items():
+                    try:
+                        res[kk] = validator(vv)
+                    except voluptuous.Invalid as e:
+                        e.prepend([k, kk])
+                        raise
+                return res
+        return Schema(schema)(obj)
+
+    return validator
+
+
+def resolve_keyed_by(
+    item, field, item_name, defer=None, enforce_single_match=True, **extra_values
+):
+    """
+    For values which can either accept a literal value, or be keyed by some
+    other attribute of the item, perform that lookup and replacement in-place
+    (modifying `item` directly).  The field is specified using dotted notation
+    to traverse dictionaries.
+
+    For example, given item::
+
+        job:
+            test-platform: linux128
+            chunks:
+                by-test-platform:
+                    macosx-10.11/debug: 13
+                    win.*: 6
+                    default: 12
+
+    a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])`
+    would mutate item in-place to::
+
+        job:
+            test-platform: linux128
+            chunks: 12
+
+    The `item_name` parameter is used to generate useful error messages.
+
+    If extra_values are supplied, they represent additional values available
+    for reference from by-<field>.
+
+    Items can be nested as deeply as the schema will allow::
+
+        chunks:
+            by-test-platform:
+                win.*:
+                    by-project:
+                        ash: ..
+                        cedar: ..
+                linux: 13
+                default: 12
+
+    Args:
+        item (dict): Object being evaluated.
+        field (str): Name of the key to perform evaluation on.
+        item_name (str): Used to generate useful error messages.
+        defer (list):
+            Allows evaluating a by-* entry at a later time. In the example
+            above it's possible that the project attribute hasn't been set yet,
+            in which case we'd want to stop before resolving that subkey and
+            then call this function again later. This can be accomplished by
+            setting `defer=["project"]` in this example.
+        enforce_single_match (bool):
+            If True (default), each task may only match a single arm of the
+            evaluation.
+        extra_values (kwargs):
+            If supplied, represent additional values available
+            for reference from by-<field>.
+
+    Returns:
+        dict: item which has also been modified in-place.
+    """
+    # find the field, returning the item unchanged if anything goes wrong
+    container, subfield = item, field
+    while "." in subfield:
+        f, subfield = subfield.split(".", 1)
+        if f not in container:
+            return item
+        container = container[f]
+        if not isinstance(container, dict):
+            return item
+
+    if subfield not in container:
+        return item
+
+    container[subfield] = evaluate_keyed_by(
+        value=container[subfield],
+        item_name=f"`{field}` in `{item_name}`",
+        defer=defer,
+        enforce_single_match=enforce_single_match,
+        attributes=dict(item, **extra_values),
+    )
+
+    return item
+
+
+# Schemas for YAML files should use dashed identifiers by default.  If there are
+# components of the schema for which there is a good reason to use another format,
+# they can be excepted here.
+EXCEPTED_SCHEMA_IDENTIFIERS = [
+    # upstream-artifacts and artifact-map are handed directly to scriptWorker,
+    # which expects interCaps
+    "upstream-artifacts",
+    "artifact-map",
+]
+
+
+def check_schema(schema):
+    identifier_re = re.compile(r"^\$?[a-z][a-z0-9-]*$")
+
+    def excepted(item):
+        for esi in EXCEPTED_SCHEMA_IDENTIFIERS:
+            if isinstance(esi, str):
+                if f"[{esi!r}]" in item:
+                    return True
+            elif esi(item):
+                return True
+        return False
+
+    def iter(path, sch):
+        def check_identifier(path, k):
+            if k in (str,) or k in (str, voluptuous.Extra):
+                pass
+            elif isinstance(k, voluptuous.NotIn):
+                pass
+            elif isinstance(k, str):
+                if not identifier_re.match(k) and not excepted(path):
+                    raise RuntimeError(
+                        "YAML schemas should use dashed lower-case identifiers, "
+                        "not {!r} @ {}".format(k, path)
+                    )
+            elif isinstance(k, (voluptuous.Optional, voluptuous.Required)):
+                check_identifier(path, k.schema)
+            elif isinstance(k, (voluptuous.Any, voluptuous.All)):
+                for v in k.validators:
+                    check_identifier(path, v)
+            elif not excepted(path):
+                raise RuntimeError(
+                    "Unexpected type in YAML schema: {} @ {}".format(
+                        type(k).__name__, path
+                    )
+                )
+
+        if isinstance(sch, collections.abc.Mapping):
+            for k, v in sch.items():
+                child = f"{path}[{k!r}]"
+                check_identifier(child, k)
+                iter(child, v)
+        elif isinstance(sch, (list, tuple)):
+            for i, v in enumerate(sch):
+                iter(f"{path}[{i}]", v)
+        elif isinstance(sch, voluptuous.Any):
+            for v in sch.validators:
+                iter(path, v)
+
+    iter("schema", schema.schema)
+
+
+class Schema(voluptuous.Schema):
+    """
+    Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
+    in the process.
+    """
+
+    def __init__(self, *args, check=True, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.check = check
+        if not taskgraph.fast and self.check:
+            check_schema(self)
+
+    def extend(self, *args, **kwargs):
+        schema = super().extend(*args, **kwargs)
+
+        if self.check:
+            check_schema(schema)
+        # We want twice extend schema to be checked too.
+        schema.__class__ = Schema
+        return schema
+
+    def _compile(self, schema):
+        if taskgraph.fast:
+            return
+        return super()._compile(schema)
+
+    def __getitem__(self, item):
+        return self.schema[item]
+
+
+OptimizationSchema = voluptuous.Any(
+    # always run this task (default)
+    None,
+    # search the index for the given index namespaces, and replace this task if found
+    # the search occurs in order, with the first match winning
+    {"index-search": [str]},
+    # skip this task if none of the given file patterns match
+    {"skip-unless-changed": [str]},
+)
+
+# shortcut for a string where task references are allowed
+taskref_or_string = voluptuous.Any(
+    str,
+    {voluptuous.Required("task-reference"): str},
+    {voluptuous.Required("artifact-reference"): str},
+)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py
new file mode 100644
index 0000000000..d695767f05
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+SHELL_QUOTE_RE = re.compile(r"[\\\t\r\n \'\"#<>&|`(){}$;\*\?]")
+
+
+def _quote(s):
+    """Given a string, returns a version that can be used literally on a shell
+    command line, enclosing it with single quotes if necessary.
+
+    As a special case, if given an int, returns a string containing the int,
+    not enclosed in quotes.
+    """
+    if type(s) == int:
+        return "%d" % s
+
+    # Empty strings need to be quoted to have any significance
+    if s and not SHELL_QUOTE_RE.search(s) and not s.startswith("~"):
+        return s
+
+    # Single quoted strings can contain any characters unescaped except the
+    # single quote itself, which can't even be escaped, so the string needs to
+    # be closed, an escaped single quote added, and reopened.
+    t = type(s)
+    return t("'%s'") % s.replace(t("'"), t("'\\''"))
+
+
+def quote(*strings):
+    """Given one or more strings, returns a quoted string that can be used
+    literally on a shell command line.
+
+        >>> quote('a', 'b')
+        "a b"
+        >>> quote('a b', 'c')
+        "'a b' c"
+    """
+    return " ".join(_quote(s) for s in strings)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py
new file mode 100644
index 0000000000..a830a473b3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py
@@ -0,0 +1,373 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import datetime
+import functools
+import logging
+import os
+
+import requests
+import taskcluster_urls as liburls
+from requests.packages.urllib3.util.retry import Retry
+
+from taskgraph.task import Task
+from taskgraph.util import yaml
+from taskgraph.util.memoize import memoize
+
+logger = logging.getLogger(__name__)
+
+# this is set to true for `mach taskgraph action-callback --test`
+testing = False
+
+# Default rootUrl to use if none is given in the environment; this should point
+# to the production Taskcluster deployment used for CI.
+PRODUCTION_TASKCLUSTER_ROOT_URL = None
+
+# the maximum number of parallel Taskcluster API calls to make
+CONCURRENCY = 50
+
+
+@memoize
+def get_root_url(use_proxy):
+    """Get the current TASKCLUSTER_ROOT_URL.
+
+    When running in a task, this must come from $TASKCLUSTER_ROOT_URL; when run
+    on the command line, a default may be provided that points to the
+    production deployment of Taskcluster. If use_proxy is set, this attempts to
+    get TASKCLUSTER_PROXY_URL instead, failing if it is not set.
+    """
+    if use_proxy:
+        try:
+            return liburls.normalize_root_url(os.environ["TASKCLUSTER_PROXY_URL"])
+        except KeyError:
+            if "TASK_ID" not in os.environ:
+                raise RuntimeError(
+                    "taskcluster-proxy is not available when not executing in a task"
+                )
+            else:
+                raise RuntimeError("taskcluster-proxy is not enabled for this task")
+
+    if "TASKCLUSTER_ROOT_URL" in os.environ:
+        logger.debug(
+            "Running in Taskcluster instance {}{}".format(
+                os.environ["TASKCLUSTER_ROOT_URL"],
+                " with taskcluster-proxy"
+                if "TASKCLUSTER_PROXY_URL" in os.environ
+                else "",
+            )
+        )
+        return liburls.normalize_root_url(os.environ["TASKCLUSTER_ROOT_URL"])
+
+    if "TASK_ID" in os.environ:
+        raise RuntimeError("$TASKCLUSTER_ROOT_URL must be set when running in a task")
+
+    if PRODUCTION_TASKCLUSTER_ROOT_URL is None:
+        raise RuntimeError(
+            "Could not detect Taskcluster instance, set $TASKCLUSTER_ROOT_URL"
+        )
+
+    logger.debug("Using default TASKCLUSTER_ROOT_URL")
+    return liburls.normalize_root_url(PRODUCTION_TASKCLUSTER_ROOT_URL)
+
+
+def requests_retry_session(
+    retries,
+    backoff_factor=0.1,
+    status_forcelist=(500, 502, 503, 504),
+    concurrency=CONCURRENCY,
+    session=None,
+):
+    session = session or requests.Session()
+    retry = Retry(
+        total=retries,
+        read=retries,
+        connect=retries,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_forcelist,
+    )
+
+    # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase
+    # that limit. Connections are established as needed, so using a large value
+    # should not negatively impact performance.
+    http_adapter = requests.adapters.HTTPAdapter(
+        pool_connections=concurrency,
+        pool_maxsize=concurrency,
+        max_retries=retry,
+    )
+    session.mount("http://", http_adapter)
+    session.mount("https://", http_adapter)
+
+    return session
+
+
+@memoize
+def get_session():
+    return requests_retry_session(retries=5)
+
+
+def _do_request(url, method=None, **kwargs):
+    if method is None:
+        method = "post" if kwargs else "get"
+
+    session = get_session()
+    if method == "get":
+        kwargs["stream"] = True
+
+    response = getattr(session, method)(url, **kwargs)
+
+    if response.status_code >= 400:
+        # Consume content before raise_for_status, so that the connection can be
+        # reused.
+        response.content
+    response.raise_for_status()
+    return response
+
+
+def _handle_artifact(path, response):
+    if path.endswith(".json"):
+        return response.json()
+    if path.endswith(".yml"):
+        return yaml.load_stream(response.text)
+    response.raw.read = functools.partial(response.raw.read, decode_content=True)
+    return response.raw
+
+
+def get_artifact_url(task_id, path, use_proxy=False):
+    artifact_tmpl = liburls.api(
+        get_root_url(False), "queue", "v1", "task/{}/artifacts/{}"
+    )
+    data = artifact_tmpl.format(task_id, path)
+    if use_proxy:
+        # Until Bug 1405889 is deployed, we can't download directly
+        # from the taskcluster-proxy.  Work around by using the /bewit
+        # endpoint instead.
+        # The bewit URL is the body of a 303 redirect, which we don't
+        # want to follow (which fetches a potentially large resource).
+        response = _do_request(
+            os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit",
+            data=data,
+            allow_redirects=False,
+        )
+        return response.text
+    return data
+
+
+def get_artifact(task_id, path, use_proxy=False):
+    """
+    Returns the artifact with the given path for the given task id.
+
+    If the path ends with ".json" or ".yml", the content is deserialized as,
+    respectively, json or yaml, and the corresponding python data (usually
+    dict) is returned.
+    For other types of content, a file-like object is returned.
+    """
+    response = _do_request(get_artifact_url(task_id, path, use_proxy))
+    return _handle_artifact(path, response)
+
+
+def list_artifacts(task_id, use_proxy=False):
+    response = _do_request(get_artifact_url(task_id, "", use_proxy).rstrip("/"))
+    return response.json()["artifacts"]
+
+
+def get_artifact_prefix(task):
+    prefix = None
+    if isinstance(task, dict):
+        prefix = task.get("attributes", {}).get("artifact_prefix")
+    elif isinstance(task, Task):
+        prefix = task.attributes.get("artifact_prefix")
+    else:
+        raise Exception(f"Can't find artifact-prefix of non-task: {task}")
+    return prefix or "public/build"
+
+
+def get_artifact_path(task, path):
+    return f"{get_artifact_prefix(task)}/{path}"
+
+
+def get_index_url(index_path, use_proxy=False, multiple=False):
+    index_tmpl = liburls.api(get_root_url(use_proxy), "index", "v1", "task{}/{}")
+    return index_tmpl.format("s" if multiple else "", index_path)
+
+
+def find_task_id(index_path, use_proxy=False):
+    try:
+        response = _do_request(get_index_url(index_path, use_proxy))
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 404:
+            raise KeyError(f"index path {index_path} not found")
+        raise
+    return response.json()["taskId"]
+
+
+def get_artifact_from_index(index_path, artifact_path, use_proxy=False):
+    full_path = index_path + "/artifacts/" + artifact_path
+    response = _do_request(get_index_url(full_path, use_proxy))
+    return _handle_artifact(full_path, response)
+
+
+def list_tasks(index_path, use_proxy=False):
+    """
+    Returns a list of task_ids where each task_id is indexed under a path
+    in the index. Results are sorted by expiration date from oldest to newest.
+    """
+    results = []
+    data = {}
+    while True:
+        response = _do_request(
+            get_index_url(index_path, use_proxy, multiple=True), json=data
+        )
+        response = response.json()
+        results += response["tasks"]
+        if response.get("continuationToken"):
+            data = {"continuationToken": response.get("continuationToken")}
+        else:
+            break
+
+    # We can sort on expires because in the general case
+    # all of these tasks should be created with the same expires time so they end up in
+    # order from earliest to latest action. If more correctness is needed, consider
+    # fetching each task and sorting on the created date.
+    results.sort(key=lambda t: parse_time(t["expires"]))
+    return [t["taskId"] for t in results]
+
+
+def parse_time(timestamp):
+    """Turn a "JSON timestamp" as used in TC APIs into a datetime"""
+    return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def get_task_url(task_id, use_proxy=False):
+    task_tmpl = liburls.api(get_root_url(use_proxy), "queue", "v1", "task/{}")
+    return task_tmpl.format(task_id)
+
+
+def get_task_definition(task_id, use_proxy=False):
+    response = _do_request(get_task_url(task_id, use_proxy))
+    return response.json()
+
+
+def cancel_task(task_id, use_proxy=False):
+    """Cancels a task given a task_id. In testing mode, just logs that it would
+    have cancelled."""
+    if testing:
+        logger.info(f"Would have cancelled {task_id}.")
+    else:
+        _do_request(get_task_url(task_id, use_proxy) + "/cancel", json={})
+
+
+def status_task(task_id, use_proxy=False):
+    """Gets the status of a task given a task_id.
+
+    In testing mode, just logs that it would have retrieved status.
+
+    Args:
+        task_id (str): A task id.
+        use_proxy (bool): Whether to use taskcluster-proxy (default: False)
+
+    Returns:
+        dict: A dictionary object as defined here:
+          https://docs.taskcluster.net/docs/reference/platform/queue/api#status
+    """
+    if testing:
+        logger.info(f"Would have gotten status for {task_id}.")
+    else:
+        resp = _do_request(get_task_url(task_id, use_proxy) + "/status")
+        status = resp.json().get("status", {})
+        return status
+
+
+def state_task(task_id, use_proxy=False):
+    """Gets the state of a task given a task_id.
+
+    In testing mode, just logs that it would have retrieved state. This is a subset of the
+    data returned by :func:`status_task`.
+
+    Args:
+        task_id (str): A task id.
+        use_proxy (bool): Whether to use taskcluster-proxy (default: False)
+
+    Returns:
+        str: The state of the task, one of
+          ``pending, running, completed, failed, exception, unknown``.
+    """
+    if testing:
+        logger.info(f"Would have gotten state for {task_id}.")
+    else:
+        status = status_task(task_id, use_proxy=use_proxy).get("state") or "unknown"
+        return status
+
+
+def rerun_task(task_id):
+    """Reruns a task given a task_id. In testing mode, just logs that it would
+    have reran."""
+    if testing:
+        logger.info(f"Would have rerun {task_id}.")
+    else:
+        _do_request(get_task_url(task_id, use_proxy=True) + "/rerun", json={})
+
+
+def get_current_scopes():
+    """Get the current scopes.  This only makes sense in a task with the Taskcluster
+    proxy enabled, where it returns the actual scopes accorded to the task."""
+    auth_url = liburls.api(get_root_url(True), "auth", "v1", "scopes/current")
+    resp = _do_request(auth_url)
+    return resp.json().get("scopes", [])
+
+
+def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False):
+    url_tmpl = liburls.api(
+        get_root_url(use_proxy), "purge-cache", "v1", "purge-cache/{}/{}"
+    )
+    return url_tmpl.format(provisioner_id, worker_type)
+
+
+def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False):
+    """Requests a cache purge from the purge-caches service."""
+    if testing:
+        logger.info(
+            "Would have purged {}/{}/{}.".format(
+                provisioner_id, worker_type, cache_name
+            )
+        )
+    else:
+        logger.info(f"Purging {provisioner_id}/{worker_type}/{cache_name}.")
+        purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy)
+        _do_request(purge_cache_url, json={"cacheName": cache_name})
+
+
+def send_email(address, subject, content, link, use_proxy=False):
+    """Sends an email using the notify service"""
+    logger.info(f"Sending email to {address}.")
+    url = liburls.api(get_root_url(use_proxy), "notify", "v1", "email")
+    _do_request(
+        url,
+        json={
+            "address": address,
+            "subject": subject,
+            "content": content,
+            "link": link,
+        },
+    )
+
+
+def list_task_group_incomplete_tasks(task_group_id):
+    """Generate the incomplete tasks in a task group"""
+    params = {}
+    while True:
+        url = liburls.api(
+            get_root_url(False),
+            "queue",
+            "v1",
+            f"task-group/{task_group_id}/list",
+        )
+        resp = _do_request(url, method="get", params=params).json()
+        for task in [t["status"] for t in resp["tasks"]]:
+            if task["state"] in ["running", "pending", "unscheduled"]:
+                yield task["taskId"]
+        if resp.get("continuationToken"):
+            params = {"continuationToken": resp.get("continuationToken")}
+        else:
+            break
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py
new file mode 100644
index 0000000000..7b545595ef
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py
@@ -0,0 +1,54 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Tools for interacting with existing taskgraphs.
+"""
+
+
+from taskgraph.util.taskcluster import find_task_id, get_artifact
+
+
+def find_decision_task(parameters, graph_config):
+    """Given the parameters for this action, find the taskId of the decision
+    task"""
+    if parameters.get("repository_type", "hg") == "hg":
+        return find_task_id(
+            "{}.v2.{}.pushlog-id.{}.decision".format(
+                graph_config["trust-domain"],
+                parameters["project"],
+                parameters["pushlog_id"],
+            )
+        )
+    elif parameters["repository_type"] == "git":
+        return find_task_id(
+            "{}.v2.{}.revision.{}.taskgraph.decision".format(
+                graph_config["trust-domain"],
+                parameters["project"],
+                parameters["head_rev"],
+            )
+        )
+    else:
+        raise Exception(
+            "Unknown repository_type {}!".format(parameters["repository_type"])
+        )
+
+
+def find_existing_tasks_from_previous_kinds(
+    full_task_graph, previous_graph_ids, rebuild_kinds
+):
+    """Given a list of previous decision/action taskIds and kinds to ignore
+    from the previous graphs, return a dictionary of labels-to-taskids to use
+    as ``existing_tasks`` in the optimization step."""
+    existing_tasks = {}
+    for previous_graph_id in previous_graph_ids:
+        label_to_taskid = get_artifact(previous_graph_id, "public/label-to-taskid.json")
+        kind_labels = {
+            t.label
+            for t in full_task_graph.tasks.values()
+            if t.attributes["kind"] not in rebuild_kinds
+        }
+        for label in set(label_to_taskid.keys()).intersection(kind_labels):
+            existing_tasks[label] = label_to_taskid[label]
+    return existing_tasks
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py
new file mode 100644
index 0000000000..465e4a43de
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py
@@ -0,0 +1,50 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import copy
+
+
+def merge_to(source, dest):
+    """
+    Merge dict and arrays (override scalar values)
+
+    Keys from source override keys from dest, and elements from lists in source
+    are appended to lists in dest.
+
+    :param dict source: to copy from
+    :param dict dest: to copy to (modified in place)
+    """
+
+    for key, value in source.items():
+        # Override mismatching or empty types
+        if type(value) != type(dest.get(key)):  # noqa
+            dest[key] = source[key]
+            continue
+
+        # Merge dict
+        if isinstance(value, dict):
+            merge_to(value, dest[key])
+            continue
+
+        if isinstance(value, list):
+            dest[key] = dest[key] + source[key]
+            continue
+
+        dest[key] = source[key]
+
+    return dest
+
+
+def merge(*objects):
+    """
+    Merge the given objects, using the semantics described for merge_to, with
+    objects later in the list taking precedence.  From an inheritance
+    perspective, "parents" should be listed before "children".
+
+    Returns the result without modifying any arguments.
+    """
+    if len(objects) == 1:
+        return copy.deepcopy(objects[0])
+    return merge_to(objects[-1], merge(*objects[:-1]))
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py
new file mode 100644
index 0000000000..e511978b5f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py
@@ -0,0 +1,115 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Python port of the ms.js node module this is not a direct port some things are
+# more complicated or less precise and we lean on time delta here.
+
+
+import datetime
+import re
+
+PATTERN = re.compile(r"((?:\d+)?\.?\d+) *([a-z]+)")
+
+
+def seconds(value):
+    return datetime.timedelta(seconds=int(value))
+
+
+def minutes(value):
+    return datetime.timedelta(minutes=int(value))
+
+
+def hours(value):
+    return datetime.timedelta(hours=int(value))
+
+
+def days(value):
+    return datetime.timedelta(days=int(value))
+
+
+def months(value):
+    # See warning in years(), below
+    return datetime.timedelta(days=int(value) * 30)
+
+
+def years(value):
+    # Warning here "years" are vague don't use this for really sensitive date
+    # computation the idea is to give you a absolute amount of time in the
+    # future which is not the same thing as "precisely on this date next year"
+    return datetime.timedelta(days=int(value) * 365)
+
+
+ALIASES = {}
+ALIASES["seconds"] = ALIASES["second"] = ALIASES["s"] = seconds
+ALIASES["minutes"] = ALIASES["minute"] = ALIASES["min"] = minutes
+ALIASES["hours"] = ALIASES["hour"] = ALIASES["h"] = hours
+ALIASES["days"] = ALIASES["day"] = ALIASES["d"] = days
+ALIASES["months"] = ALIASES["month"] = ALIASES["mo"] = months
+ALIASES["years"] = ALIASES["year"] = ALIASES["y"] = years
+
+
+class InvalidString(Exception):
+    pass
+
+
+class UnknownTimeMeasurement(Exception):
+    pass
+
+
+def value_of(input_str):
+    """
+    Convert a string to a json date in the future
+    :param str input_str: (ex: 1d, 2d, 6years, 2 seconds)
+    :returns: Unit given in seconds
+    """
+
+    matches = PATTERN.search(input_str)
+
+    if matches is None or len(matches.groups()) < 2:
+        raise InvalidString(f"'{input_str}' is invalid string")
+
+    value, unit = matches.groups()
+
+    if unit not in ALIASES:
+        raise UnknownTimeMeasurement(
+            "{} is not a valid time measure use one of {}".format(
+                unit, sorted(ALIASES.keys())
+            )
+        )
+
+    return ALIASES[unit](value)
+
+
+def json_time_from_now(input_str, now=None, datetime_format=False):
+    """
+    :param str input_str: Input string (see value of)
+    :param datetime now: Optionally set the definition of `now`
+    :param boolean datetime_format: Set `True` to get a `datetime` output
+    :returns: JSON string representation of time in future.
+    """
+
+    if now is None:
+        now = datetime.datetime.utcnow()
+
+    time = now + value_of(input_str)
+
+    if datetime_format is True:
+        return time
+    else:
+        # Sorta a big hack but the json schema validator for date does not like the
+        # ISO dates until 'Z' (for timezone) is added...
+        # Microseconds are excluded (see bug 1381801)
+        return time.isoformat(timespec="milliseconds") + "Z"
+
+
+def current_json_time(datetime_format=False):
+    """
+    :param boolean datetime_format: Set `True` to get a `datetime` output
+    :returns: JSON string representation of the current time.
+    """
+    if datetime_format is True:
+        return datetime.datetime.utcnow()
+    else:
+        # Microseconds are excluded (see bug 1381801)
+        return datetime.datetime.utcnow().isoformat(timespec="milliseconds") + "Z"
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py
new file mode 100644
index 0000000000..9d0c032a1b
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py
@@ -0,0 +1,64 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+_JOINED_SYMBOL_RE = re.compile(r"([^(]*)\(([^)]*)\)$")
+
+
+def split_symbol(treeherder_symbol):
+    """Split a symbol expressed as grp(sym) into its two parts.  If no group is
+    given, the returned group is '?'"""
+    groupSymbol = "?"
+    symbol = treeherder_symbol
+    if "(" in symbol:
+        match = _JOINED_SYMBOL_RE.match(symbol)
+        if match:
+            groupSymbol, symbol = match.groups()
+        else:
+            raise Exception(f"`{symbol}` is not a valid treeherder symbol.")
+    return groupSymbol, symbol
+
+
+def join_symbol(group, symbol):
+    """Perform the reverse of split_symbol, combining the given group and
+    symbol.  If the group is '?', then it is omitted."""
+    if group == "?":
+        return symbol
+    return f"{group}({symbol})"
+
+
+def add_suffix(treeherder_symbol, suffix):
+    """Add a suffix to a treeherder symbol that may contain a group."""
+    group, symbol = split_symbol(treeherder_symbol)
+    symbol += str(suffix)
+    return join_symbol(group, symbol)
+
+
+def replace_group(treeherder_symbol, new_group):
+    """Add a suffix to a treeherder symbol that may contain a group."""
+    _, symbol = split_symbol(treeherder_symbol)
+    return join_symbol(new_group, symbol)
+
+
+def inherit_treeherder_from_dep(job, dep_job):
+    """Inherit treeherder defaults from dep_job"""
+    treeherder = job.get("treeherder", {})
+
+    dep_th_platform = (
+        dep_job.task.get("extra", {})
+        .get("treeherder", {})
+        .get("machine", {})
+        .get("platform", "")
+    )
+    dep_th_collection = list(
+        dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys()
+    )[0]
+    treeherder.setdefault("platform", f"{dep_th_platform}/{dep_th_collection}")
+    treeherder.setdefault(
+        "tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1)
+    )
+    # Does not set symbol
+    treeherder.setdefault("kind", "build")
+    return treeherder
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py
new file mode 100644
index 0000000000..ba1d909019
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py
@@ -0,0 +1,539 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import os
+import re
+import subprocess
+from abc import ABC, abstractmethod, abstractproperty
+from shutil import which
+
+import requests
+from redo import retry
+
+from taskgraph.util.path import ancestors
+
+PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1"
+
+logger = logging.getLogger(__name__)
+
+
+class Repository(ABC):
+    # Both mercurial and git use sha1 as revision idenfiers. Luckily, both define
+    # the same value as the null revision.
+    #
+    # https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7
+    # https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30
+    NULL_REVISION = "0000000000000000000000000000000000000000"
+
+    def __init__(self, path):
+        self.path = path
+        self.binary = which(self.tool)
+        if self.binary is None:
+            raise OSError(f"{self.tool} not found!")
+        self._valid_diff_filter = ("m", "a", "d")
+
+        self._env = os.environ.copy()
+
+    def run(self, *args: str, **kwargs):
+        return_codes = kwargs.pop("return_codes", [])
+        cmd = (self.binary,) + args
+
+        try:
+            return subprocess.check_output(
+                cmd, cwd=self.path, env=self._env, encoding="utf-8", **kwargs
+            )
+        except subprocess.CalledProcessError as e:
+            if e.returncode in return_codes:
+                return ""
+            raise
+
+    @abstractproperty
+    def tool(self) -> str:
+        """Version control system being used, either 'hg' or 'git'."""
+
+    @abstractproperty
+    def head_rev(self) -> str:
+        """Hash of HEAD revision."""
+
+    @abstractproperty
+    def base_rev(self):
+        """Hash of revision the current topic branch is based on."""
+
+    @abstractproperty
+    def branch(self):
+        """Current branch or bookmark the checkout has active."""
+
+    @abstractproperty
+    def all_remote_names(self):
+        """Name of all configured remote repositories."""
+
+    @abstractproperty
+    def default_remote_name(self):
+        """Name the VCS defines for the remote repository when cloning
+        it for the first time. This name may not exist anymore if users
+        changed the default configuration, for instance."""
+
+    @abstractproperty
+    def remote_name(self):
+        """Name of the remote repository."""
+
+    def _get_most_suitable_remote(self, remote_instructions):
+        remotes = self.all_remote_names
+        if len(remotes) == 1:
+            return remotes[0]
+
+        if self.default_remote_name in remotes:
+            return self.default_remote_name
+
+        first_remote = remotes[0]
+        logger.warning(
+            f"Unable to determine which remote repository to use between: {remotes}. "
+            f'Arbitrarily using the first one "{first_remote}". Please set an '
+            f"`{self.default_remote_name}` remote if the arbitrarily selected one "
+            f"is not right. To do so: {remote_instructions}"
+        )
+
+        return first_remote
+
+    @abstractproperty
+    def default_branch(self):
+        """Name of the default branch."""
+
+    @abstractmethod
+    def get_url(self, remote=None):
+        """Get URL of the upstream repository."""
+
+    @abstractmethod
+    def get_commit_message(self, revision=None):
+        """Commit message of specified revision or current commit."""
+
+    @abstractmethod
+    def get_changed_files(self, diff_filter, mode="unstaged", rev=None, base_rev=None):
+        """Return a list of files that are changed in:
+         * either this repository's working copy,
+         * or at a given revision (``rev``)
+         * or between 2 revisions (``base_rev`` and ``rev``)
+
+        ``diff_filter`` controls which kinds of modifications are returned.
+        It is a string which may only contain the following characters:
+
+            A - Include files that were added
+            D - Include files that were deleted
+            M - Include files that were modified
+
+        By default, all three will be included.
+
+        ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an
+        effect on git. Defaults to 'unstaged'.
+
+        ``rev`` is a specifier for which changesets to consider for
+        changes. The exact meaning depends on the vcs system being used.
+
+        ``base_rev`` specifies the range of changesets. This parameter cannot
+        be used without ``rev``. The range includes ``rev`` but excludes
+        ``base_rev``.
+        """
+
+    @abstractmethod
+    def get_outgoing_files(self, diff_filter, upstream):
+        """Return a list of changed files compared to upstream.
+
+        ``diff_filter`` works the same as `get_changed_files`.
+        ``upstream`` is a remote ref to compare against. If unspecified,
+        this will be determined automatically. If there is no remote ref,
+        a MissingUpstreamRepo exception will be raised.
+        """
+
+    @abstractmethod
+    def working_directory_clean(self, untracked=False, ignored=False):
+        """Determine if the working directory is free of modifications.
+
+        Returns True if the working directory does not have any file
+        modifications. False otherwise.
+
+        By default, untracked and ignored files are not considered. If
+        ``untracked`` or ``ignored`` are set, they influence the clean check
+        to factor these file classes into consideration.
+        """
+
+    @abstractmethod
+    def update(self, ref):
+        """Update the working directory to the specified reference."""
+
+    @abstractmethod
+    def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+        """Find the latest revision that is common to both the given
+        ``head_rev`` and ``base_ref_or_rev``"""
+
+    @abstractmethod
+    def does_revision_exist_locally(self, revision):
+        """Check whether this revision exists in the local repository.
+
+        If this function returns an unexpected value, then make sure
+        the revision was fetched from the remote repository."""
+
+
+class HgRepository(Repository):
+    tool = "hg"
+    default_remote_name = "default"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._env["HGPLAIN"] = "1"
+
+    @property
+    def head_rev(self):
+        return self.run("log", "-r", ".", "-T", "{node}").strip()
+
+    @property
+    def base_rev(self):
+        return self.run("log", "-r", "last(ancestors(.) and public())", "-T", "{node}")
+
+    @property
+    def branch(self):
+        bookmarks_fn = os.path.join(self.path, ".hg", "bookmarks.current")
+        if os.path.exists(bookmarks_fn):
+            with open(bookmarks_fn) as f:
+                bookmark = f.read()
+                return bookmark or None
+
+        return None
+
+    @property
+    def all_remote_names(self):
+        remotes = self.run("paths", "--quiet").splitlines()
+        if not remotes:
+            raise RuntimeError("No remotes defined")
+        return remotes
+
+    @property
+    def remote_name(self):
+        return self._get_most_suitable_remote(
+            "Edit .hg/hgrc and add:\n\n[paths]\ndefault = $URL",
+        )
+
+    @property
+    def default_branch(self):
+        # Mercurial recommends keeping "default"
+        # https://www.mercurial-scm.org/wiki/StandardBranching#Don.27t_use_a_name_other_than_default_for_your_main_development_branch
+        return "default"
+
+    def get_url(self, remote="default"):
+        return self.run("path", "-T", "{url}", remote).strip()
+
+    def get_commit_message(self, revision=None):
+        revision = revision or self.head_rev
+        return self.run("log", "-r", ".", "-T", "{desc}")
+
+    def _format_diff_filter(self, diff_filter, for_status=False):
+        df = diff_filter.lower()
+        assert all(f in self._valid_diff_filter for f in df)
+
+        # When looking at the changes in the working directory, the hg status
+        # command uses 'd' for files that have been deleted with a non-hg
+        # command, and 'r' for files that have been `hg rm`ed. Use both.
+        return df.replace("d", "dr") if for_status else df
+
+    def _files_template(self, diff_filter):
+        template = ""
+        df = self._format_diff_filter(diff_filter)
+        if "a" in df:
+            template += "{file_adds % '{file}\\n'}"
+        if "d" in df:
+            template += "{file_dels % '{file}\\n'}"
+        if "m" in df:
+            template += "{file_mods % '{file}\\n'}"
+        return template
+
+    def get_changed_files(
+        self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
+    ):
+        if rev is None:
+            if base_rev is not None:
+                raise ValueError("Cannot specify `base_rev` without `rev`")
+            # Use --no-status to print just the filename.
+            df = self._format_diff_filter(diff_filter, for_status=True)
+            return self.run("status", "--no-status", f"-{df}").splitlines()
+        else:
+            template = self._files_template(diff_filter)
+            revision_argument = rev if base_rev is None else f"{base_rev}~-1::{rev}"
+            return self.run("log", "-r", revision_argument, "-T", template).splitlines()
+
+    def get_outgoing_files(self, diff_filter="ADM", upstream=None):
+        template = self._files_template(diff_filter)
+
+        if not upstream:
+            return self.run(
+                "log", "-r", "draft() and ancestors(.)", "--template", template
+            ).split()
+
+        return self.run(
+            "outgoing",
+            "-r",
+            ".",
+            "--quiet",
+            "--template",
+            template,
+            upstream,
+            return_codes=(1,),
+        ).split()
+
+    def working_directory_clean(self, untracked=False, ignored=False):
+        args = ["status", "--modified", "--added", "--removed", "--deleted"]
+        if untracked:
+            args.append("--unknown")
+        if ignored:
+            args.append("--ignored")
+
+        # If output is empty, there are no entries of requested status, which
+        # means we are clean.
+        return not len(self.run(*args).strip())
+
+    def update(self, ref):
+        return self.run("update", "--check", ref)
+
+    def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+        return self.run(
+            "log",
+            "-r",
+            f"last(ancestors('{base_ref_or_rev}') and ancestors('{head_rev}'))",
+            "--template",
+            "{node}",
+        ).strip()
+
+    def does_revision_exist_locally(self, revision):
+        try:
+            return self.run("log", "-r", revision).strip() != ""
+        except subprocess.CalledProcessError as e:
+            # Error code 255 comes with the message:
+            # "abort: unknown revision $REVISION"
+            if e.returncode == 255:
+                return False
+            raise
+
+
+class GitRepository(Repository):
+    tool = "git"
+    default_remote_name = "origin"
+
+    _LS_REMOTE_PATTERN = re.compile(r"ref:\s+refs/heads/(?P<branch_name>\S+)\s+HEAD")
+
+    @property
+    def head_rev(self):
+        return self.run("rev-parse", "--verify", "HEAD").strip()
+
+    @property
+    def base_rev(self):
+        refs = self.run(
+            "rev-list", "HEAD", "--topo-order", "--boundary", "--not", "--remotes"
+        ).splitlines()
+        if refs:
+            return refs[-1][1:]  # boundary starts with a prefix `-`
+        return self.head_rev
+
+    @property
+    def branch(self):
+        return self.run("branch", "--show-current").strip() or None
+
+    @property
+    def all_remote_names(self):
+        remotes = self.run("remote").splitlines()
+        if not remotes:
+            raise RuntimeError("No remotes defined")
+        return remotes
+
+    @property
+    def remote_name(self):
+        try:
+            remote_branch_name = self.run(
+                "rev-parse", "--verify", "--abbrev-ref", "--symbolic-full-name", "@{u}"
+            ).strip()
+            return remote_branch_name.split("/")[0]
+        except subprocess.CalledProcessError as e:
+            # Error code 128 comes with the message:
+            # "fatal: no upstream configured for branch $BRANCH"
+            if e.returncode != 128:
+                raise
+
+        return self._get_most_suitable_remote("`git remote add origin $URL`")
+
+    @property
+    def default_branch(self):
+        try:
+            # this one works if the current repo was cloned from an existing
+            # repo elsewhere
+            return self._get_default_branch_from_cloned_metadata()
+        except (subprocess.CalledProcessError, RuntimeError):
+            pass
+
+        try:
+            # This call works if you have (network) access to the repo
+            return self._get_default_branch_from_remote_query()
+        except (subprocess.CalledProcessError, RuntimeError):
+            pass
+
+        # this one is the last resort in case the remote is not accessible and
+        # the local repo is where `git init` was made
+        return self._guess_default_branch()
+
+    def _get_default_branch_from_remote_query(self):
+        # This function requires network access to the repo
+        remote_name = self.remote_name
+        output = self.run("ls-remote", "--symref", remote_name, "HEAD")
+        matches = self._LS_REMOTE_PATTERN.search(output)
+        if not matches:
+            raise RuntimeError(
+                f'Could not find the default branch of remote repository "{remote_name}". '
+                "Got: {output}"
+            )
+
+        branch_name = matches.group("branch_name")
+        return f"{remote_name}/{branch_name}"
+
+    def _get_default_branch_from_cloned_metadata(self):
+        return self.run("rev-parse", "--abbrev-ref", f"{self.remote_name}/HEAD").strip()
+
+    def _guess_default_branch(self):
+        branches = [
+            line.strip()
+            for line in self.run(
+                "branch", "--all", "--no-color", "--format=%(refname)"
+            ).splitlines()
+            for candidate_branch in ("main", "master", "branches/default/tip")
+            if line.strip().endswith(candidate_branch)
+        ]
+
+        if len(branches) == 1:
+            return branches[0]
+
+        raise RuntimeError(f"Unable to find default branch. Got: {branches}")
+
+    def get_url(self, remote="origin"):
+        return self.run("remote", "get-url", remote).strip()
+
+    def get_commit_message(self, revision=None):
+        revision = revision or self.head_rev
+        return self.run("log", "-n1", "--format=%B")
+
+    def get_changed_files(
+        self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
+    ):
+        assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
+
+        if rev is None:
+            if base_rev is not None:
+                raise ValueError("Cannot specify `base_rev` without `rev`")
+            cmd = ["diff"]
+            if mode == "staged":
+                cmd.append("--cached")
+            elif mode == "all":
+                cmd.append("HEAD")
+        else:
+            revision_argument = (
+                f"{rev}~1..{rev}" if base_rev is None else f"{base_rev}..{rev}"
+            )
+            cmd = ["log", "--format=format:", revision_argument]
+
+        cmd.append("--name-only")
+        cmd.append("--diff-filter=" + diff_filter.upper())
+
+        files = self.run(*cmd).splitlines()
+        return [f for f in files if f]
+
+    def get_outgoing_files(self, diff_filter="ADM", upstream=None):
+        assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
+
+        not_condition = upstream if upstream else "--remotes"
+
+        files = self.run(
+            "log",
+            "--name-only",
+            f"--diff-filter={diff_filter.upper()}",
+            "--oneline",
+            "--pretty=format:",
+            "HEAD",
+            "--not",
+            not_condition,
+        ).splitlines()
+        return [f for f in files if f]
+
+    def working_directory_clean(self, untracked=False, ignored=False):
+        args = ["status", "--porcelain"]
+
+        # Even in --porcelain mode, behavior is affected by the
+        # ``status.showUntrackedFiles`` option, which means we need to be
+        # explicit about how to treat untracked files.
+        if untracked:
+            args.append("--untracked-files=all")
+        else:
+            args.append("--untracked-files=no")
+
+        if ignored:
+            args.append("--ignored")
+
+        # If output is empty, there are no entries of requested status, which
+        # means we are clean.
+        return not len(self.run(*args).strip())
+
+    def update(self, ref):
+        self.run("checkout", ref)
+
+    def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+        return self.run("merge-base", base_ref_or_rev, head_rev).strip()
+
+    def does_revision_exist_locally(self, revision):
+        try:
+            return self.run("cat-file", "-t", revision).strip() == "commit"
+        except subprocess.CalledProcessError as e:
+            # Error code 128 comes with the message:
+            # "git cat-file: could not get object info"
+            if e.returncode == 128:
+                return False
+            raise
+
+
+def get_repository(path):
+    """Get a repository object for the repository at `path`.
+    If `path` is not a known VCS repository, raise an exception.
+    """
+    for path in ancestors(path):
+        if os.path.isdir(os.path.join(path, ".hg")):
+            return HgRepository(path)
+        elif os.path.exists(os.path.join(path, ".git")):
+            return GitRepository(path)
+
+    raise RuntimeError("Current directory is neither a git or hg repository")
+
+
+def find_hg_revision_push_info(repository, revision):
+    """Given the parameters for this action and a revision, find the
+    pushlog_id of the revision."""
+    pushlog_url = PUSHLOG_TMPL.format(repository, revision)
+
+    def query_pushlog(url):
+        r = requests.get(pushlog_url, timeout=60)
+        r.raise_for_status()
+        return r
+
+    r = retry(
+        query_pushlog,
+        args=(pushlog_url,),
+        attempts=5,
+        sleeptime=10,
+    )
+    pushes = r.json()["pushes"]
+    if len(pushes) != 1:
+        raise RuntimeError(
+            "Unable to find a single pushlog_id for {} revision {}: {}".format(
+                repository, revision, pushes
+            )
+        )
+    pushid = list(pushes.keys())[0]
+    return {
+        "pushdate": pushes[pushid]["date"],
+        "pushid": pushid,
+        "user": pushes[pushid]["user"],
+    }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py
new file mode 100644
index 0000000000..5911914f13
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py
@@ -0,0 +1,283 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import sys
+from abc import ABC, abstractmethod
+
+import attr
+
+from taskgraph.config import GraphConfig
+from taskgraph.parameters import Parameters
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.attributes import match_run_on_projects
+from taskgraph.util.treeherder import join_symbol
+
+logger = logging.getLogger(__name__)
+
+
+@attr.s(frozen=True)
+class Verification(ABC):
+    func = attr.ib()
+
+    @abstractmethod
+    def verify(self, **kwargs) -> None:
+        pass
+
+
+@attr.s(frozen=True)
+class InitialVerification(Verification):
+    """Verification that doesn't depend on any generation state."""
+
+    def verify(self):
+        self.func()
+
+
+@attr.s(frozen=True)
+class GraphVerification(Verification):
+    """Verification for a TaskGraph object."""
+
+    run_on_projects = attr.ib(default=None)
+
+    def verify(
+        self, graph: TaskGraph, graph_config: GraphConfig, parameters: Parameters
+    ):
+        if self.run_on_projects and not match_run_on_projects(
+            parameters["project"], self.run_on_projects
+        ):
+            return
+
+        scratch_pad = {}
+        graph.for_each_task(
+            self.func,
+            scratch_pad=scratch_pad,
+            graph_config=graph_config,
+            parameters=parameters,
+        )
+        self.func(
+            None,
+            graph,
+            scratch_pad=scratch_pad,
+            graph_config=graph_config,
+            parameters=parameters,
+        )
+
+
+@attr.s(frozen=True)
+class ParametersVerification(Verification):
+    """Verification for a set of parameters."""
+
+    def verify(self, parameters: Parameters):
+        self.func(parameters)
+
+
+@attr.s(frozen=True)
+class KindsVerification(Verification):
+    """Verification for kinds."""
+
+    def verify(self, kinds: dict):
+        self.func(kinds)
+
+
+@attr.s(frozen=True)
+class VerificationSequence:
+    """
+    Container for a sequence of verifications over a TaskGraph. Each
+    verification is represented as a callable taking (task, taskgraph,
+    scratch_pad), called for each task in the taskgraph, and one more
+    time with no task but with the taskgraph and the same scratch_pad
+    that was passed for each task.
+    """
+
+    _verifications = attr.ib(factory=dict)
+    _verification_types = {
+        "graph": GraphVerification,
+        "initial": InitialVerification,
+        "kinds": KindsVerification,
+        "parameters": ParametersVerification,
+    }
+
+    def __call__(self, name, *args, **kwargs):
+        for verification in self._verifications.get(name, []):
+            verification.verify(*args, **kwargs)
+
+    def add(self, name, **kwargs):
+        cls = self._verification_types.get(name, GraphVerification)
+
+        def wrap(func):
+            self._verifications.setdefault(name, []).append(cls(func, **kwargs))
+            return func
+
+        return wrap
+
+
+verifications = VerificationSequence()
+
+
+@verifications.add("full_task_graph")
+def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function verifies that tuple
+    (collection.keys(), machine.platform, groupSymbol, symbol) is unique
+    for a target task graph.
+    """
+    if task is None:
+        return
+    task_dict = task.task
+    if "extra" in task_dict:
+        extra = task_dict["extra"]
+        if "treeherder" in extra:
+            treeherder = extra["treeherder"]
+
+            collection_keys = tuple(sorted(treeherder.get("collection", {}).keys()))
+            if len(collection_keys) != 1:
+                raise Exception(
+                    "Task {} can't be in multiple treeherder collections "
+                    "(the part of the platform after `/`): {}".format(
+                        task.label, collection_keys
+                    )
+                )
+            platform = treeherder.get("machine", {}).get("platform")
+            group_symbol = treeherder.get("groupSymbol")
+            symbol = treeherder.get("symbol")
+
+            key = (platform, collection_keys[0], group_symbol, symbol)
+            if key in scratch_pad:
+                raise Exception(
+                    "Duplicate treeherder platform and symbol in tasks "
+                    "`{}`and `{}`: {} {}".format(
+                        task.label,
+                        scratch_pad[key],
+                        f"{platform}/{collection_keys[0]}",
+                        join_symbol(group_symbol, symbol),
+                    )
+                )
+            else:
+                scratch_pad[key] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_trust_domain_v2_routes(
+    task, taskgraph, scratch_pad, graph_config, parameters
+):
+    """
+    This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes.
+    """
+    if task is None:
+        return
+    route_prefix = "index.{}.v2".format(graph_config["trust-domain"])
+    task_dict = task.task
+    routes = task_dict.get("routes", [])
+
+    for route in routes:
+        if route.startswith(route_prefix):
+            if route in scratch_pad:
+                raise Exception(
+                    "conflict between {}:{} for route: {}".format(
+                        task.label, scratch_pad[route], route
+                    )
+                )
+            else:
+                scratch_pad[route] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_routes_notification_filters(
+    task, taskgraph, scratch_pad, graph_config, parameters
+):
+    """
+    This function ensures that only understood filters for notifications are
+    specified.
+
+    See: https://docs.taskcluster.net/reference/core/taskcluster-notify/docs/usage
+    """
+    if task is None:
+        return
+    route_prefix = "notify."
+    valid_filters = ("on-any", "on-completed", "on-failed", "on-exception")
+    task_dict = task.task
+    routes = task_dict.get("routes", [])
+
+    for route in routes:
+        if route.startswith(route_prefix):
+            # Get the filter of the route
+            route_filter = route.split(".")[-1]
+            if route_filter not in valid_filters:
+                raise Exception(
+                    "{} has invalid notification filter ({})".format(
+                        task.label, route_filter
+                    )
+                )
+
+
+@verifications.add("full_task_graph")
+def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, parameters):
+    tiers = scratch_pad
+    if task is not None:
+        tiers[task.label] = (
+            task.task.get("extra", {}).get("treeherder", {}).get("tier", sys.maxsize)
+        )
+    else:
+
+        def printable_tier(tier):
+            if tier == sys.maxsize:
+                return "unknown"
+            return tier
+
+        for task in taskgraph.tasks.values():
+            tier = tiers[task.label]
+            for d in task.dependencies.values():
+                if taskgraph[d].task.get("workerType") == "always-optimized":
+                    continue
+                if "dummy" in taskgraph[d].kind:
+                    continue
+                if tier < tiers[d]:
+                    raise Exception(
+                        "{} (tier {}) cannot depend on {} (tier {})".format(
+                            task.label,
+                            printable_tier(tier),
+                            d,
+                            printable_tier(tiers[d]),
+                        )
+                    )
+
+
+@verifications.add("full_task_graph")
+def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function verifies that toolchain aliases are not reused.
+    """
+    if task is None:
+        return
+    attributes = task.attributes
+    if "toolchain-alias" in attributes:
+        keys = attributes["toolchain-alias"]
+        if not keys:
+            keys = []
+        elif isinstance(keys, str):
+            keys = [keys]
+        for key in keys:
+            if key in scratch_pad:
+                raise Exception(
+                    "Duplicate toolchain-alias in tasks "
+                    "`{}`and `{}`: {}".format(
+                        task.label,
+                        scratch_pad[key],
+                        key,
+                    )
+                )
+            else:
+                scratch_pad[key] = task.label
+
+
+@verifications.add("optimized_task_graph")
+def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function ensures that always-optimized tasks have been optimized.
+    """
+    if task is None:
+        return
+    if task.task.get("workerType") == "always-optimized":
+        raise Exception(f"Could not optimize the task {task.label!r}")
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py
new file mode 100644
index 0000000000..d71f7e06a3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py
@@ -0,0 +1,75 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import attr
+
+from .keyed_by import evaluate_keyed_by
+from .memoize import memoize
+
+
+@attr.s
+class _BuiltinWorkerType:
+    provisioner = attr.ib(str)
+    worker_type = attr.ib(str)
+
+    @property
+    def implementation(self):
+        """
+        Since the list of built-in worker-types is small and fixed, we can get
+        away with punning the implementation name (in
+        `taskgraph.transforms.task`) and the worker_type.
+        """
+        return self.worker_type
+
+
+_BUILTIN_TYPES = {
+    "always-optimized": _BuiltinWorkerType("invalid", "always-optimized"),
+    "succeed": _BuiltinWorkerType("built-in", "succeed"),
+}
+
+
+@memoize
+def worker_type_implementation(graph_config, worker_type):
+    """Get the worker implementation and OS for the given workerType, where the
+    OS represents the host system, not the target OS, in the case of
+    cross-compiles."""
+    if worker_type in _BUILTIN_TYPES:
+        # For the built-in worker-types, we use an `implementation that matches
+        # the worker-type.
+        return _BUILTIN_TYPES[worker_type].implementation, None
+    worker_config = evaluate_keyed_by(
+        {"by-worker-type": graph_config["workers"]["aliases"]},
+        "worker-types.yml",
+        {"worker-type": worker_type},
+    )
+    return worker_config["implementation"], worker_config.get("os")
+
+
+@memoize
+def get_worker_type(graph_config, alias, level):
+    """
+    Get the worker type based, evaluating aliases from the graph config.
+    """
+    if alias in _BUILTIN_TYPES:
+        builtin_type = _BUILTIN_TYPES[alias]
+        return builtin_type.provisioner, builtin_type.worker_type
+
+    level = str(level)
+    worker_config = evaluate_keyed_by(
+        {"by-alias": graph_config["workers"]["aliases"]},
+        "graph_config.workers.aliases",
+        {"alias": alias},
+    )
+    provisioner = evaluate_keyed_by(
+        worker_config["provisioner"],
+        alias,
+        {"level": level},
+    ).format(level=level)
+    worker_type = evaluate_keyed_by(
+        worker_config["worker-type"],
+        alias,
+        {"level": level},
+    ).format(level=level, alias=alias)
+    return provisioner, worker_type
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py
new file mode 100644
index 0000000000..141c7a16d3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import os
+
+from yaml.loader import SafeLoader
+
+
+class UnicodeLoader(SafeLoader):
+    def construct_yaml_str(self, node):
+        return self.construct_scalar(node)
+
+
+UnicodeLoader.add_constructor("tag:yaml.org,2002:str", UnicodeLoader.construct_yaml_str)
+
+
+def load_stream(stream):
+    """
+    Parse the first YAML document in a stream
+    and produce the corresponding Python object.
+    """
+    loader = UnicodeLoader(stream)
+    try:
+        return loader.get_single_data()
+    finally:
+        loader.dispose()
+
+
+def load_yaml(*parts):
+    """Convenience function to load a YAML file in the given path.  This is
+    useful for loading kind configuration files from the kind path."""
+    filename = os.path.join(*parts)
+    with open(filename, "rb") as f:
+        return load_stream(f)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
commit	36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree	105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/python/taskcluster_taskgraph/taskgraph/util
parent	Initial commit. (diff)
download	firefox-esr-upstream.tar.xz firefox-esr-upstream.zip