74 files changed, 15455 insertions, 0 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py
new file mode 100644
index 0000000000..81cc763230
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py
@@ -0,0 +1,16 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+__version__ = "6.3.0"
+
+# Maximum number of dependencies a single task can have
+# https://docs.taskcluster.net/reference/platform/taskcluster-queue/references/api#createTask
+# specifies 100, but we also optionally add the decision task id as a dep in
+# taskgraph.create, so let's set this to 99.
+MAX_DEPENDENCIES = 99
+
+# Enable fast task generation for local debugging
+# This is normally switched on via the --fast/-F flag to `mach taskgraph`
+# Currently this skips toolchain task optimizations and schema validation
+fast = False
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/__init__.py
new file mode 100644
index 0000000000..590a957282
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/__init__.py
@@ -0,0 +1,16 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from .registry import (
+    register_callback_action,
+    render_actions_json,
+    trigger_action_callback,
+)
+
+__all__ = [
+    "register_callback_action",
+    "render_actions_json",
+    "trigger_action_callback",
+]
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py
new file mode 100644
index 0000000000..c5e1821546
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py
@@ -0,0 +1,64 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from taskgraph.actions.registry import register_callback_action
+from taskgraph.actions.util import (
+    combine_task_graph_files,
+    create_tasks,
+    fetch_graph_and_labels,
+)
+
+
+@register_callback_action(
+    name="add-new-jobs",
+    title="Add new jobs",
+    generic=True,
+    symbol="add-new",
+    description="Add new jobs using task labels.",
+    order=100,
+    context=[],
+    schema={
+        "type": "object",
+        "properties": {
+            "tasks": {
+                "type": "array",
+                "description": "An array of task labels",
+                "items": {"type": "string"},
+            },
+            "times": {
+                "type": "integer",
+                "default": 1,
+                "minimum": 1,
+                "maximum": 100,
+                "title": "Times",
+                "description": "How many times to run each task.",
+            },
+        },
+    },
+)
+def add_new_jobs_action(parameters, graph_config, input, task_group_id, task_id):
+    decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+        parameters, graph_config
+    )
+
+    to_run = []
+    for elem in input["tasks"]:
+        if elem in full_task_graph.tasks:
+            to_run.append(elem)
+        else:
+            raise Exception(f"{elem} was not found in the task-graph")
+
+    times = input.get("times", 1)
+    for i in range(times):
+        create_tasks(
+            graph_config,
+            to_run,
+            full_task_graph,
+            label_to_taskid,
+            parameters,
+            decision_task_id,
+            f"{i}",
+        )
+    combine_task_graph_files(list(range(times)))
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py
new file mode 100644
index 0000000000..03788c6538
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py
@@ -0,0 +1,42 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+import requests
+
+from taskgraph.util.taskcluster import cancel_task
+
+from .registry import register_callback_action
+
+logger = logging.getLogger(__name__)
+
+
+@register_callback_action(
+    title="Cancel Task",
+    name="cancel",
+    symbol="cx",
+    generic=True,
+    description=("Cancel the given task"),
+    order=350,
+    context=[{}],
+)
+def cancel_action(parameters, graph_config, input, task_group_id, task_id):
+    # Note that this is limited by the scopes afforded to generic actions to
+    # only cancel tasks with the level-specific schedulerId.
+    try:
+        cancel_task(task_id, use_proxy=True)
+    except requests.HTTPError as e:
+        if e.response.status_code == 409:
+            # A 409 response indicates that this task is past its deadline.  It
+            # cannot be cancelled at this time, but it's also not running
+            # anymore, so we can ignore this error.
+            logger.info(
+                'Task "{}" is past its deadline and cannot be cancelled.'.format(
+                    task_id
+                )
+            )
+            return
+        raise
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py
new file mode 100644
index 0000000000..d3e0440839
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py
@@ -0,0 +1,61 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import os
+from concurrent import futures
+
+import requests
+
+from taskgraph.util.taskcluster import (
+    CONCURRENCY,
+    cancel_task,
+    list_task_group_incomplete_tasks,
+)
+
+from .registry import register_callback_action
+
+logger = logging.getLogger(__name__)
+
+
+@register_callback_action(
+    title="Cancel All",
+    name="cancel-all",
+    generic=True,
+    symbol="cAll",
+    description=(
+        "Cancel all running and pending tasks created by the decision task "
+        "this action task is associated with."
+    ),
+    order=400,
+    context=[],
+)
+def cancel_all_action(parameters, graph_config, input, task_group_id, task_id):
+    def do_cancel_task(task_id):
+        logger.info(f"Cancelling task {task_id}")
+        try:
+            cancel_task(task_id, use_proxy=True)
+        except requests.HTTPError as e:
+            if e.response.status_code == 409:
+                # A 409 response indicates that this task is past its deadline.  It
+                # cannot be cancelled at this time, but it's also not running
+                # anymore, so we can ignore this error.
+                logger.info(
+                    "Task {} is past its deadline and cannot be cancelled.".format(
+                        task_id
+                    )
+                )
+                return
+            raise
+
+    own_task_id = os.environ.get("TASK_ID", "")
+    to_cancel = [
+        t for t in list_task_group_incomplete_tasks(task_group_id) if t != own_task_id
+    ]
+    logger.info(f"Cancelling {len(to_cancel)} tasks")
+    with futures.ThreadPoolExecutor(CONCURRENCY) as e:
+        cancel_futs = [e.submit(do_cancel_task, t) for t in to_cancel]
+        for f in futures.as_completed(cancel_futs):
+            f.result()
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/rebuild_cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/rebuild_cached_tasks.py
new file mode 100644
index 0000000000..2b88e6a698
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/rebuild_cached_tasks.py
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from .registry import register_callback_action
+from .util import create_tasks, fetch_graph_and_labels
+
+
+@register_callback_action(
+    name="rebuild-cached-tasks",
+    title="Rebuild Cached Tasks",
+    symbol="rebuild-cached",
+    description="Rebuild cached tasks.",
+    order=1000,
+    context=[],
+)
+def rebuild_cached_tasks_action(
+    parameters, graph_config, input, task_group_id, task_id
+):
+    decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+        parameters, graph_config
+    )
+    cached_tasks = [
+        label
+        for label, task in full_task_graph.tasks.items()
+        if task.attributes.get("cached_task", False)
+    ]
+    if cached_tasks:
+        create_tasks(
+            graph_config,
+            cached_tasks,
+            full_task_graph,
+            label_to_taskid,
+            parameters,
+            decision_task_id,
+        )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py
new file mode 100644
index 0000000000..1e909d30c7
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py
@@ -0,0 +1,352 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+from collections import namedtuple
+from types import FunctionType
+
+from mozilla_repo_urls import parse
+
+from taskgraph import create
+from taskgraph.config import load_graph_config
+from taskgraph.parameters import Parameters
+from taskgraph.util import hash, taskcluster, yaml
+from taskgraph.util.memoize import memoize
+from taskgraph.util.python_path import import_sibling_modules
+
+actions = []
+callbacks = {}
+
+Action = namedtuple("Action", ["order", "cb_name", "generic", "action_builder"])
+
+
+def is_json(data):
+    """Return ``True``, if ``data`` is a JSON serializable data structure."""
+    try:
+        json.dumps(data)
+    except ValueError:
+        return False
+    return True
+
+
+@memoize
+def read_taskcluster_yml(filename):
+    """Load and parse .taskcluster.yml, memoized to save some time"""
+    return yaml.load_yaml(filename)
+
+
+@memoize
+def hash_taskcluster_yml(filename):
+    """
+    Generate a hash of the given .taskcluster.yml.  This is the first 10 digits
+    of the sha256 of the file's content, and is used by administrative scripts
+    to create a hook based on this content.
+    """
+    return hash.hash_path(filename)[:10]
+
+
+def register_callback_action(
+    name,
+    title,
+    symbol,
+    description,
+    order=10000,
+    context=[],
+    available=lambda parameters: True,
+    schema=None,
+    generic=True,
+    cb_name=None,
+):
+    """
+    Register an action callback that can be triggered from supporting
+    user interfaces, such as Treeherder.
+
+    This function is to be used as a decorator for a callback that takes
+    parameters as follows:
+
+    ``parameters``:
+        Decision task :class:`parameters <taskgraph.parameters.Parameters>`.
+    ``input``:
+        Input matching specified JSON schema, ``None`` if no ``schema``
+        parameter is given to ``register_callback_action``.
+    ``task_group_id``:
+        The id of the task-group this was triggered for.
+    ``task_id`` and `task``:
+        task identifier and task definition for task the action was triggered
+        for, ``None`` if no ``context`` parameters was given to
+        ``register_callback_action``.
+
+    Args:
+        name (str):
+            An identifier for this action, used by UIs to find the action.
+        title (str):
+            A human readable title for the action to be used as label on a button
+            or text on a link for triggering the action.
+        symbol (str):
+            Treeherder symbol for the action callback, this is the symbol that the
+            task calling your callback will be displayed as. This is usually 1-3
+            letters abbreviating the action title.
+        description (str):
+            A human readable description of the action in **markdown**.
+            This will be display as tooltip and in dialog window when the action
+            is triggered. This is a good place to describe how to use the action.
+        order (int):
+            Order of the action in menus, this is relative to the ``order`` of
+            other actions declared.
+        context (list of dict):
+            List of tag-sets specifying which tasks the action is can take as input.
+            If no tag-sets is specified as input the action is related to the
+            entire task-group, and won't be triggered with a given task.
+
+            Otherwise, if ``context = [{'k': 'b', 'p': 'l'}, {'k': 't'}]`` will only
+            be displayed in the context menu for tasks that has
+            ``task.tags.k == 'b' && task.tags.p = 'l'`` or ``task.tags.k = 't'``.
+            Essentially, this allows filtering on ``task.tags``.
+
+            If this is a function, it is given the decision parameters and must return
+            a value of the form described above.
+        available (function):
+            An optional function that given decision parameters decides if the
+            action is available. Defaults to a function that always returns ``True``.
+        schema (dict):
+            JSON schema specifying input accepted by the action.
+            This is optional and can be left ``null`` if no input is taken.
+        generic (bool)
+            Whether this is a generic action or has its own permissions.
+        cb_name (str):
+            The name under which this function should be registered, defaulting to
+            `name`.  This is used to generation actionPerm for non-generic hook
+            actions, and thus appears in ci-configuration and various role and hook
+            names.  Unlike `name`, which can appear multiple times, cb_name must be
+            unique among all registered callbacks.
+
+    Returns:
+        function: Decorator to be used for the callback function.
+    """
+    mem = {"registered": False}  # workaround nonlocal missing in 2.x
+
+    assert isinstance(title, str), "title must be a string"
+    assert isinstance(description, str), "description must be a string"
+    title = title.strip()
+    description = description.strip()
+
+    # ensure that context is callable
+    if not callable(context):
+        context_value = context
+        context = lambda params: context_value  # noqa
+
+    def register_callback(cb, cb_name=cb_name):
+        assert isinstance(name, str), "name must be a string"
+        assert isinstance(order, int), "order must be an integer"
+        assert callable(schema) or is_json(
+            schema
+        ), "schema must be a JSON compatible object"
+        assert isinstance(cb, FunctionType), "callback must be a function"
+        # Allow for json-e > 25 chars in the symbol.
+        if "$" not in symbol:
+            assert 1 <= len(symbol) <= 25, "symbol must be between 1 and 25 characters"
+        assert isinstance(symbol, str), "symbol must be a string"
+
+        assert not mem[
+            "registered"
+        ], "register_callback_action must be used as decorator"
+        if not cb_name:
+            cb_name = name
+        assert cb_name not in callbacks, "callback name {} is not unique".format(
+            cb_name
+        )
+
+        def action_builder(parameters, graph_config, decision_task_id):
+            if not available(parameters):
+                return None
+
+            actionPerm = "generic" if generic else cb_name
+
+            # gather up the common decision-task-supplied data for this action
+            repo_param = "head_repository"
+            repository = {
+                "url": parameters[repo_param],
+                "project": parameters["project"],
+                "level": parameters["level"],
+            }
+
+            revision = parameters["head_rev"]
+            push = {
+                "owner": "mozilla-taskcluster-maintenance@mozilla.com",
+                "pushlog_id": parameters["pushlog_id"],
+                "revision": revision,
+            }
+            branch = parameters.get("head_ref")
+            if branch:
+                push["branch"] = branch
+
+            action = {
+                "name": name,
+                "title": title,
+                "description": description,
+                # target taskGroupId (the task group this decision task is creating)
+                "taskGroupId": decision_task_id,
+                "cb_name": cb_name,
+                "symbol": symbol,
+            }
+
+            rv = {
+                "name": name,
+                "title": title,
+                "description": description,
+                "context": context(parameters),
+            }
+            if schema:
+                rv["schema"] = (
+                    schema(graph_config=graph_config) if callable(schema) else schema
+                )
+
+            trustDomain = graph_config["trust-domain"]
+            level = parameters["level"]
+            tcyml_hash = hash_taskcluster_yml(graph_config.taskcluster_yml)
+
+            # the tcyml_hash is prefixed with `/` in the hookId, so users will be granted
+            # hooks:trigger-hook:project-gecko/in-tree-action-3-myaction/*; if another
+            # action was named `myaction/release`, then the `*` in the scope would also
+            # match that action.  To prevent such an accident, we prohibit `/` in hook
+            # names.
+            if "/" in actionPerm:
+                raise Exception("`/` is not allowed in action names; use `-`")
+
+            rv.update(
+                {
+                    "kind": "hook",
+                    "hookGroupId": f"project-{trustDomain}",
+                    "hookId": "in-tree-action-{}-{}/{}".format(
+                        level, actionPerm, tcyml_hash
+                    ),
+                    "hookPayload": {
+                        # provide the decision-task parameters as context for triggerHook
+                        "decision": {
+                            "action": action,
+                            "repository": repository,
+                            "push": push,
+                        },
+                        # and pass everything else through from our own context
+                        "user": {
+                            "input": {"$eval": "input"},
+                            "taskId": {"$eval": "taskId"},  # target taskId (or null)
+                            "taskGroupId": {
+                                "$eval": "taskGroupId"
+                            },  # target task group
+                        },
+                    },
+                    "extra": {
+                        "actionPerm": actionPerm,
+                    },
+                }
+            )
+
+            return rv
+
+        actions.append(Action(order, cb_name, generic, action_builder))
+
+        mem["registered"] = True
+        callbacks[cb_name] = cb
+        return cb
+
+    return register_callback
+
+
+def render_actions_json(parameters, graph_config, decision_task_id):
+    """
+    Render JSON object for the ``public/actions.json`` artifact.
+
+    Args:
+        parameters (:class:`~taskgraph.parameters.Parameters`):
+            Decision task parameters.
+
+    Returns:
+        dict:
+            JSON object representation of the ``public/actions.json``
+            artifact.
+    """
+    assert isinstance(parameters, Parameters), "requires instance of Parameters"
+    actions = []
+    for action in sorted(_get_actions(graph_config), key=lambda action: action.order):
+        action = action.action_builder(parameters, graph_config, decision_task_id)
+        if action:
+            assert is_json(action), "action must be a JSON compatible object"
+            actions.append(action)
+    return {
+        "version": 1,
+        "variables": {},
+        "actions": actions,
+    }
+
+
+def sanity_check_task_scope(callback, parameters, graph_config):
+    """
+    If this action is not generic, then verify that this task has the necessary
+    scope to run the action. This serves as a backstop preventing abuse by
+    running non-generic actions using generic hooks. While scopes should
+    prevent serious damage from such abuse, it's never a valid thing to do.
+    """
+    for action in _get_actions(graph_config):
+        if action.cb_name == callback:
+            break
+    else:
+        raise ValueError(f"No action with cb_name {callback}")
+
+    actionPerm = "generic" if action.generic else action.cb_name
+
+    repo_param = "head_repository"
+    raw_url = parameters[repo_param]
+    parsed_url = parse(raw_url)
+    expected_scope = f"assume:{parsed_url.taskcluster_role_prefix}:action:{actionPerm}"
+
+    # the scope should appear literally; no need for a satisfaction check. The use of
+    # get_current_scopes here calls the auth service through the Taskcluster Proxy, giving
+    # the precise scopes available to this task.
+    if expected_scope not in taskcluster.get_current_scopes():
+        raise ValueError(f"Expected task scope {expected_scope} for this action")
+
+
+def trigger_action_callback(
+    task_group_id, task_id, input, callback, parameters, root, test=False
+):
+    """
+    Trigger action callback with the given inputs. If `test` is true, then run
+    the action callback in testing mode, without actually creating tasks.
+    """
+    graph_config = load_graph_config(root)
+    graph_config.register()
+    callbacks = _get_callbacks(graph_config)
+    cb = callbacks.get(callback, None)
+    if not cb:
+        raise Exception(
+            "Unknown callback: {}. Known callbacks: {}".format(
+                callback, ", ".join(callbacks)
+            )
+        )
+
+    if test:
+        create.testing = True
+        taskcluster.testing = True
+
+    if not test:
+        sanity_check_task_scope(callback, parameters, graph_config)
+
+    cb(Parameters(**parameters), graph_config, input, task_group_id, task_id)
+
+
+def _load(graph_config):
+    # Load all modules from this folder, relying on the side-effects of register_
+    # functions to populate the action registry.
+    import_sibling_modules(exceptions=("util.py",))
+    return callbacks, actions
+
+
+def _get_callbacks(graph_config):
+    return _load(graph_config)[0]
+
+
+def _get_actions(graph_config):
+    return _load(graph_config)[1]
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py
new file mode 100644
index 0000000000..fd488b35fc
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py
@@ -0,0 +1,301 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import sys
+import textwrap
+
+from slugid import nice as slugid
+
+from taskgraph.util import taskcluster
+
+from .registry import register_callback_action
+from .util import (
+    combine_task_graph_files,
+    create_task_from_def,
+    create_tasks,
+    fetch_graph_and_labels,
+    relativize_datestamps,
+)
+
+logger = logging.getLogger(__name__)
+
+RERUN_STATES = ("exception", "failed")
+
+
+def _should_retrigger(task_graph, label):
+    """
+    Return whether a given task in the taskgraph should be retriggered.
+
+    This handles the case where the task isn't there by assuming it should not be.
+    """
+    if label not in task_graph:
+        logger.info(
+            "Task {} not in full taskgraph, assuming task should not be retriggered.".format(
+                label
+            )
+        )
+        return False
+    return task_graph[label].attributes.get("retrigger", False)
+
+
+@register_callback_action(
+    title="Retrigger",
+    name="retrigger",
+    symbol="rt",
+    cb_name="retrigger-decision",
+    description=textwrap.dedent(
+        """\
+        Create a clone of the task (retriggering decision, action, and cron tasks requires
+        special scopes)."""
+    ),
+    order=11,
+    context=[
+        {"kind": "decision-task"},
+        {"kind": "action-callback"},
+        {"kind": "cron-task"},
+    ],
+)
+def retrigger_decision_action(parameters, graph_config, input, task_group_id, task_id):
+    """For a single task, we try to just run exactly the same task once more.
+    It's quite possible that we don't have the scopes to do so (especially for
+    an action), but this is best-effort."""
+
+    # make all of the timestamps relative; they will then be turned back into
+    # absolute timestamps relative to the current time.
+    task = taskcluster.get_task_definition(task_id)
+    task = relativize_datestamps(task)
+    create_task_from_def(slugid(), task, parameters["level"])
+
+
+@register_callback_action(
+    title="Retrigger",
+    name="retrigger",
+    symbol="rt",
+    generic=True,
+    description=("Create a clone of the task."),
+    order=19,  # must be greater than other orders in this file, as this is the fallback version
+    context=[{"retrigger": "true"}],
+    schema={
+        "type": "object",
+        "properties": {
+            "downstream": {
+                "type": "boolean",
+                "description": (
+                    "If true, downstream tasks from this one will be cloned as well. "
+                    "The dependencies will be updated to work with the new task at the root."
+                ),
+                "default": False,
+            },
+            "times": {
+                "type": "integer",
+                "default": 1,
+                "minimum": 1,
+                "maximum": 100,
+                "title": "Times",
+                "description": "How many times to run each task.",
+            },
+        },
+    },
+)
+@register_callback_action(
+    title="Retrigger (disabled)",
+    name="retrigger",
+    cb_name="retrigger-disabled",
+    symbol="rt",
+    generic=True,
+    description=(
+        "Create a clone of the task.\n\n"
+        "This type of task should typically be re-run instead of re-triggered."
+    ),
+    order=20,  # must be greater than other orders in this file, as this is the fallback version
+    context=[{}],
+    schema={
+        "type": "object",
+        "properties": {
+            "downstream": {
+                "type": "boolean",
+                "description": (
+                    "If true, downstream tasks from this one will be cloned as well. "
+                    "The dependencies will be updated to work with the new task at the root."
+                ),
+                "default": False,
+            },
+            "times": {
+                "type": "integer",
+                "default": 1,
+                "minimum": 1,
+                "maximum": 100,
+                "title": "Times",
+                "description": "How many times to run each task.",
+            },
+            "force": {
+                "type": "boolean",
+                "default": False,
+                "description": (
+                    "This task should not be re-triggered. "
+                    "This can be overridden by passing `true` here."
+                ),
+            },
+        },
+    },
+)
+def retrigger_action(parameters, graph_config, input, task_group_id, task_id):
+    decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+        parameters, graph_config
+    )
+
+    task = taskcluster.get_task_definition(task_id)
+    label = task["metadata"]["name"]
+
+    with_downstream = " "
+    to_run = [label]
+
+    if not input.get("force", None) and not _should_retrigger(full_task_graph, label):
+        logger.info(
+            "Not retriggering task {}, task should not be retrigged "
+            "and force not specified.".format(label)
+        )
+        sys.exit(1)
+
+    if input.get("downstream"):
+        to_run = full_task_graph.graph.transitive_closure(
+            set(to_run), reverse=True
+        ).nodes
+        to_run = to_run & set(label_to_taskid.keys())
+        with_downstream = " (with downstream) "
+
+    times = input.get("times", 1)
+    for i in range(times):
+        create_tasks(
+            graph_config,
+            to_run,
+            full_task_graph,
+            label_to_taskid,
+            parameters,
+            decision_task_id,
+            f"{i}",
+        )
+
+        logger.info(f"Scheduled {label}{with_downstream}(time {i + 1}/{times})")
+    combine_task_graph_files(list(range(times)))
+
+
+@register_callback_action(
+    title="Rerun",
+    name="rerun",
+    generic=True,
+    symbol="rr",
+    description=(
+        "Rerun a task.\n\n"
+        "This only works on failed or exception tasks in the original taskgraph,"
+        " and is CoT friendly."
+    ),
+    order=300,
+    context=[{}],
+    schema={"type": "object", "properties": {}},
+)
+def rerun_action(parameters, graph_config, input, task_group_id, task_id):
+    task = taskcluster.get_task_definition(task_id)
+    parameters = dict(parameters)
+    decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+        parameters, graph_config
+    )
+    label = task["metadata"]["name"]
+    if task_id not in label_to_taskid.values():
+        logger.error(
+            "Refusing to rerun {}: taskId {} not in decision task {} label_to_taskid!".format(
+                label, task_id, decision_task_id
+            )
+        )
+
+    _rerun_task(task_id, label)
+
+
+def _rerun_task(task_id, label):
+    state = taskcluster.state_task(task_id)
+    if state not in RERUN_STATES:
+        logger.warning(
+            "No need to rerun {}: state '{}' not in {}!".format(
+                label, state, RERUN_STATES
+            )
+        )
+        return
+    taskcluster.rerun_task(task_id)
+    logger.info(f"Reran {label}")
+
+
+@register_callback_action(
+    title="Retrigger",
+    name="retrigger-multiple",
+    symbol="rt",
+    generic=True,
+    description=("Create a clone of the task."),
+    context=[],
+    schema={
+        "type": "object",
+        "properties": {
+            "requests": {
+                "type": "array",
+                "items": {
+                    "tasks": {
+                        "type": "array",
+                        "description": "An array of task labels",
+                        "items": {"type": "string"},
+                    },
+                    "times": {
+                        "type": "integer",
+                        "minimum": 1,
+                        "maximum": 100,
+                        "title": "Times",
+                        "description": "How many times to run each task.",
+                    },
+                    "additionalProperties": False,
+                },
+            },
+            "additionalProperties": False,
+        },
+    },
+)
+def retrigger_multiple(parameters, graph_config, input, task_group_id, task_id):
+    decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+        parameters, graph_config
+    )
+
+    suffixes = []
+    for i, request in enumerate(input.get("requests", [])):
+        times = request.get("times", 1)
+        rerun_tasks = [
+            label
+            for label in request.get("tasks")
+            if not _should_retrigger(full_task_graph, label)
+        ]
+        retrigger_tasks = [
+            label
+            for label in request.get("tasks")
+            if _should_retrigger(full_task_graph, label)
+        ]
+
+        for label in rerun_tasks:
+            # XXX we should not re-run tasks pulled in from other pushes
+            # In practice, this shouldn't matter, as only completed tasks
+            # are pulled in from other pushes and treeherder won't pass
+            # those labels.
+            _rerun_task(label_to_taskid[label], label)
+
+        for j in range(times):
+            suffix = f"{i}-{j}"
+            suffixes.append(suffix)
+            create_tasks(
+                graph_config,
+                retrigger_tasks,
+                full_task_graph,
+                label_to_taskid,
+                parameters,
+                decision_task_id,
+                suffix,
+            )
+
+    combine_task_graph_files(suffixes)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py
new file mode 100644
index 0000000000..cf81029da2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py
@@ -0,0 +1,282 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import copy
+import logging
+import os
+import re
+from concurrent import futures
+from functools import reduce
+
+from requests.exceptions import HTTPError
+
+from taskgraph import create
+from taskgraph.decision import read_artifact, rename_artifact, write_artifact
+from taskgraph.optimize.base import optimize_task_graph
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.taskcluster import (
+    CONCURRENCY,
+    get_artifact,
+    get_session,
+    list_tasks,
+    parse_time,
+)
+from taskgraph.util.taskgraph import find_decision_task
+
+logger = logging.getLogger(__name__)
+
+
+def get_parameters(decision_task_id):
+    return get_artifact(decision_task_id, "public/parameters.yml")
+
+
+def fetch_graph_and_labels(parameters, graph_config):
+    decision_task_id = find_decision_task(parameters, graph_config)
+
+    # First grab the graph and labels generated during the initial decision task
+    full_task_graph = get_artifact(decision_task_id, "public/full-task-graph.json")
+    _, full_task_graph = TaskGraph.from_json(full_task_graph)
+    label_to_taskid = get_artifact(decision_task_id, "public/label-to-taskid.json")
+
+    # fetch everything in parallel; this avoids serializing any delay in downloading
+    # each artifact (such as waiting for the artifact to be mirrored locally)
+    with futures.ThreadPoolExecutor(CONCURRENCY) as e:
+        fetches = []
+
+        # fetch any modifications made by action tasks and swap out new tasks
+        # for old ones
+        def fetch_action(task_id):
+            logger.info(f"fetching label-to-taskid.json for action task {task_id}")
+            try:
+                run_label_to_id = get_artifact(task_id, "public/label-to-taskid.json")
+                label_to_taskid.update(run_label_to_id)
+            except HTTPError as e:
+                if e.response.status_code != 404:
+                    raise
+                logger.debug(f"No label-to-taskid.json found for {task_id}: {e}")
+
+        namespace = "{}.v2.{}.pushlog-id.{}.actions".format(
+            graph_config["trust-domain"],
+            parameters["project"],
+            parameters["pushlog_id"],
+        )
+        for task_id in list_tasks(namespace):
+            fetches.append(e.submit(fetch_action, task_id))
+
+        # Similarly for cron tasks..
+        def fetch_cron(task_id):
+            logger.info(f"fetching label-to-taskid.json for cron task {task_id}")
+            try:
+                run_label_to_id = get_artifact(task_id, "public/label-to-taskid.json")
+                label_to_taskid.update(run_label_to_id)
+            except HTTPError as e:
+                if e.response.status_code != 404:
+                    raise
+                logger.debug(f"No label-to-taskid.json found for {task_id}: {e}")
+
+        namespace = "{}.v2.{}.revision.{}.cron".format(
+            graph_config["trust-domain"], parameters["project"], parameters["head_rev"]
+        )
+        for task_id in list_tasks(namespace):
+            fetches.append(e.submit(fetch_cron, task_id))
+
+        # now wait for each fetch to complete, raising an exception if there
+        # were any issues
+        for f in futures.as_completed(fetches):
+            f.result()
+
+    return (decision_task_id, full_task_graph, label_to_taskid)
+
+
+def create_task_from_def(task_id, task_def, level):
+    """Create a new task from a definition rather than from a label
+    that is already in the full-task-graph. The task definition will
+    have {relative-datestamp': '..'} rendered just like in a decision task.
+    Use this for entirely new tasks or ones that change internals of the task.
+    It is useful if you want to "edit" the full_task_graph and then hand
+    it to this function. No dependencies will be scheduled. You must handle
+    this yourself. Seeing how create_tasks handles it might prove helpful."""
+    task_def["schedulerId"] = f"gecko-level-{level}"
+    label = task_def["metadata"]["name"]
+    session = get_session()
+    create.create_task(session, task_id, label, task_def)
+
+
+def update_parent(task, graph):
+    task.task.setdefault("extra", {})["parent"] = os.environ.get("TASK_ID", "")
+    return task
+
+
+def update_dependencies(task, graph):
+    if os.environ.get("TASK_ID"):
+        task.task.setdefault("dependencies", []).append(os.environ["TASK_ID"])
+    return task
+
+
+def create_tasks(
+    graph_config,
+    to_run,
+    full_task_graph,
+    label_to_taskid,
+    params,
+    decision_task_id=None,
+    suffix="",
+    modifier=lambda t: t,
+):
+    """Create new tasks.  The task definition will have {relative-datestamp':
+    '..'} rendered just like in a decision task.  Action callbacks should use
+    this function to create new tasks,
+    allowing easy debugging with `mach taskgraph action-callback --test`.
+    This builds up all required tasks to run in order to run the tasks requested.
+
+    Optionally this function takes a `modifier` function that is passed in each
+    task before it is put into a new graph. It should return a valid task. Note
+    that this is passed _all_ tasks in the graph, not just the set in to_run. You
+    may want to skip modifying tasks not in your to_run list.
+
+    If `suffix` is given, then it is used to give unique names to the resulting
+    artifacts.  If you call this function multiple times in the same action,
+    pass a different suffix each time to avoid overwriting artifacts.
+
+    If you wish to create the tasks in a new group, leave out decision_task_id.
+
+    Returns an updated label_to_taskid containing the new tasks"""
+    if suffix:
+        suffix = f"-{suffix}"
+    to_run = set(to_run)
+
+    #  Copy to avoid side-effects later
+    full_task_graph = copy.deepcopy(full_task_graph)
+    label_to_taskid = label_to_taskid.copy()
+
+    target_graph = full_task_graph.graph.transitive_closure(to_run)
+    target_task_graph = TaskGraph(
+        {l: modifier(full_task_graph[l]) for l in target_graph.nodes}, target_graph
+    )
+    target_task_graph.for_each_task(update_parent)
+    if decision_task_id and decision_task_id != os.environ.get("TASK_ID"):
+        target_task_graph.for_each_task(update_dependencies)
+    optimized_task_graph, label_to_taskid = optimize_task_graph(
+        target_task_graph,
+        to_run,
+        params,
+        to_run,
+        decision_task_id,
+        existing_tasks=label_to_taskid,
+    )
+    write_artifact(f"task-graph{suffix}.json", optimized_task_graph.to_json())
+    write_artifact(f"label-to-taskid{suffix}.json", label_to_taskid)
+    write_artifact(f"to-run{suffix}.json", list(to_run))
+    create.create_tasks(
+        graph_config,
+        optimized_task_graph,
+        label_to_taskid,
+        params,
+        decision_task_id,
+    )
+    return label_to_taskid
+
+
+def _update_reducer(accumulator, new_value):
+    "similar to set or dict `update` method, but returning the modified object"
+    accumulator.update(new_value)
+    return accumulator
+
+
+def combine_task_graph_files(suffixes):
+    """Combine task-graph-{suffix}.json files into a single task-graph.json file.
+
+    Since Chain of Trust verification requires a task-graph.json file that
+    contains all children tasks, we can combine the various task-graph-0.json
+    type files into a master task-graph.json file at the end.
+
+    Actions also look for various artifacts, so we combine those in a similar
+    fashion.
+
+    In the case where there is only one suffix, we simply rename it to avoid the
+    additional cost of uploading two copies of the same data.
+    """
+
+    if len(suffixes) == 1:
+        for filename in ["task-graph", "label-to-taskid", "to-run"]:
+            rename_artifact(f"{filename}-{suffixes[0]}.json", f"{filename}.json")
+        return
+
+    def combine(file_contents, base):
+        return reduce(_update_reducer, file_contents, base)
+
+    files = [read_artifact(f"task-graph-{suffix}.json") for suffix in suffixes]
+    write_artifact("task-graph.json", combine(files, dict()))
+
+    files = [read_artifact(f"label-to-taskid-{suffix}.json") for suffix in suffixes]
+    write_artifact("label-to-taskid.json", combine(files, dict()))
+
+    files = [read_artifact(f"to-run-{suffix}.json") for suffix in suffixes]
+    write_artifact("to-run.json", list(combine(files, set())))
+
+
+def relativize_datestamps(task_def):
+    """
+    Given a task definition as received from the queue, convert all datestamps
+    to {relative_datestamp: ..} format, with the task creation time as "now".
+    The result is useful for handing to ``create_task``.
+    """
+    base = parse_time(task_def["created"])
+    # borrowed from https://github.com/epoberezkin/ajv/blob/master/lib/compile/formats.js
+    ts_pattern = re.compile(
+        r"^\d\d\d\d-[0-1]\d-[0-3]\d[t\s]"
+        r"(?:[0-2]\d:[0-5]\d:[0-5]\d|23:59:60)(?:\.\d+)?"
+        r"(?:z|[+-]\d\d:\d\d)$",
+        re.I,
+    )
+
+    def recurse(value):
+        if isinstance(value, str):
+            if ts_pattern.match(value):
+                value = parse_time(value)
+                diff = value - base
+                return {"relative-datestamp": f"{int(diff.total_seconds())} seconds"}
+        if isinstance(value, list):
+            return [recurse(e) for e in value]
+        if isinstance(value, dict):
+            return {k: recurse(v) for k, v in value.items()}
+        return value
+
+    return recurse(task_def)
+
+
+def add_args_to_command(cmd_parts, extra_args=[]):
+    """
+    Add custom command line args to a given command.
+
+    Args:
+      cmd_parts: the raw command as seen by taskcluster
+      extra_args: array of args we want to add
+    """
+    cmd_type = "default"
+    if len(cmd_parts) == 1 and isinstance(cmd_parts[0], dict):
+        # windows has single cmd part as dict: 'task-reference', with long string
+        cmd_parts = cmd_parts[0]["task-reference"].split(" ")
+        cmd_type = "dict"
+    elif len(cmd_parts) == 1 and (
+        isinstance(cmd_parts[0], str) or isinstance(cmd_parts[0], str)
+    ):
+        # windows has single cmd part as a long string
+        cmd_parts = cmd_parts[0].split(" ")
+        cmd_type = "unicode"
+    elif len(cmd_parts) == 1 and isinstance(cmd_parts[0], list):
+        # osx has an single value array with an array inside
+        cmd_parts = cmd_parts[0]
+        cmd_type = "subarray"
+
+    cmd_parts.extend(extra_args)
+
+    if cmd_type == "dict":
+        cmd_parts = [{"task-reference": " ".join(cmd_parts)}]
+    elif cmd_type == "unicode":
+        cmd_parts = [" ".join(cmd_parts)]
+    elif cmd_type == "subarray":
+        cmd_parts = [cmd_parts]
+    return cmd_parts
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/config.py b/third_party/python/taskcluster_taskgraph/taskgraph/config.py
new file mode 100644
index 0000000000..7ea7dc7b33
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/config.py
@@ -0,0 +1,146 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import os
+import sys
+from dataclasses import dataclass
+from typing import Dict
+
+from voluptuous import All, Any, Extra, Length, Optional, Required
+
+from .util import path
+from .util.python_path import find_object
+from .util.schema import Schema, optionally_keyed_by, validate_schema
+from .util.yaml import load_yaml
+
+logger = logging.getLogger(__name__)
+
+graph_config_schema = Schema(
+    {
+        # The trust-domain for this graph.
+        # (See https://firefox-source-docs.mozilla.org/taskcluster/taskcluster/taskgraph.html#taskgraph-trust-domain)  # noqa
+        Required("trust-domain"): str,
+        Required("task-priority"): optionally_keyed_by(
+            "project",
+            Any(
+                "highest",
+                "very-high",
+                "high",
+                "medium",
+                "low",
+                "very-low",
+                "lowest",
+            ),
+        ),
+        Optional(
+            "task-deadline-after",
+            description="Default 'deadline' for tasks, in relative date format. "
+            "Eg: '1 week'",
+        ): optionally_keyed_by("project", str),
+        Required("workers"): {
+            Required("aliases"): {
+                str: {
+                    Required("provisioner"): optionally_keyed_by("level", str),
+                    Required("implementation"): str,
+                    Required("os"): str,
+                    Required("worker-type"): optionally_keyed_by("level", str),
+                }
+            },
+        },
+        Required("taskgraph"): {
+            Optional(
+                "register",
+                description="Python function to call to register extensions.",
+            ): str,
+            Optional("decision-parameters"): str,
+            Optional(
+                "cached-task-prefix",
+                description="The taskcluster index prefix to use for caching tasks. "
+                "Defaults to `trust-domain`.",
+            ): str,
+            Optional(
+                "index-path-regexes",
+                description="Regular expressions matching index paths to be summarized.",
+            ): [str],
+            Required("repositories"): All(
+                {
+                    str: {
+                        Required("name"): str,
+                        Optional("project-regex"): str,
+                        Optional("ssh-secret-name"): str,
+                        # FIXME
+                        Extra: str,
+                    }
+                },
+                Length(min=1),
+            ),
+        },
+        Extra: object,
+    }
+)
+"""Schema for GraphConfig"""
+
+
+@dataclass(frozen=True, eq=False)
+class GraphConfig:
+    _config: Dict
+    root_dir: str
+
+    _PATH_MODIFIED = False
+
+    def __getitem__(self, name):
+        return self._config[name]
+
+    def __contains__(self, name):
+        return name in self._config
+
+    def register(self):
+        """
+        Add the project's taskgraph directory to the python path, and register
+        any extensions present.
+        """
+        modify_path = os.path.dirname(self.root_dir)
+        if GraphConfig._PATH_MODIFIED:
+            if GraphConfig._PATH_MODIFIED == modify_path:
+                # Already modified path with the same root_dir.
+                # We currently need to do this to enable actions to call
+                # taskgraph_decision, e.g. relpro.
+                return
+            raise Exception("Can't register multiple directories on python path.")
+        GraphConfig._PATH_MODIFIED = modify_path
+        sys.path.insert(0, modify_path)
+        register_path = self["taskgraph"].get("register")
+        if register_path:
+            find_object(register_path)(self)
+
+    @property
+    def vcs_root(self):
+        if path.split(self.root_dir)[-2:] != ["taskcluster", "ci"]:
+            raise Exception(
+                "Not guessing path to vcs root. "
+                "Graph config in non-standard location."
+            )
+        return os.path.dirname(os.path.dirname(self.root_dir))
+
+    @property
+    def taskcluster_yml(self):
+        return os.path.join(self.vcs_root, ".taskcluster.yml")
+
+
+def validate_graph_config(config):
+    validate_schema(graph_config_schema, config, "Invalid graph configuration:")
+
+
+def load_graph_config(root_dir):
+    config_yml = os.path.join(root_dir, "config.yml")
+    if not os.path.exists(config_yml):
+        raise Exception(f"Couldn't find taskgraph configuration: {config_yml}")
+
+    logger.debug(f"loading config from `{config_yml}`")
+    config = load_yaml(config_yml)
+
+    validate_graph_config(config)
+    return GraphConfig(config, root_dir=root_dir)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/create.py b/third_party/python/taskcluster_taskgraph/taskgraph/create.py
new file mode 100644
index 0000000000..deb1ac5348
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/create.py
@@ -0,0 +1,132 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+import logging
+import sys
+from concurrent import futures
+
+from slugid import nice as slugid
+
+from taskgraph.util.parameterization import resolve_timestamps
+from taskgraph.util.taskcluster import CONCURRENCY, get_session
+from taskgraph.util.time import current_json_time
+
+logger = logging.getLogger(__name__)
+
+# this is set to true for `mach taskgraph action-callback --test`
+testing = False
+
+
+def create_tasks(graph_config, taskgraph, label_to_taskid, params, decision_task_id):
+    taskid_to_label = {t: l for l, t in label_to_taskid.items()}
+
+    # when running as an actual decision task, we use the decision task's
+    # taskId as the taskGroupId.  The process that created the decision task
+    # helpfully placed it in this same taskGroup.  If there is no $TASK_ID,
+    # fall back to a slugid
+    scheduler_id = "{}-level-{}".format(graph_config["trust-domain"], params["level"])
+
+    # Add the taskGroupId, schedulerId and optionally the decision task
+    # dependency
+    for task_id in taskgraph.graph.nodes:
+        task_def = taskgraph.tasks[task_id].task
+
+        # if this task has no dependencies *within* this taskgraph, make it
+        # depend on this decision task. If it has another dependency within
+        # the taskgraph, then it already implicitly depends on the decision
+        # task.  The result is that tasks do not start immediately. if this
+        # loop fails halfway through, none of the already-created tasks run.
+        if not any(t in taskgraph.tasks for t in task_def.get("dependencies", [])):
+            task_def.setdefault("dependencies", []).append(decision_task_id)
+
+        task_def["taskGroupId"] = decision_task_id
+        task_def["schedulerId"] = scheduler_id
+
+    # If `testing` is True, then run without parallelization
+    concurrency = CONCURRENCY if not testing else 1
+    session = get_session()
+    with futures.ThreadPoolExecutor(concurrency) as e:
+        fs = {}
+
+        # We can't submit a task until its dependencies have been submitted.
+        # So our strategy is to walk the graph and submit tasks once all
+        # their dependencies have been submitted.
+        tasklist = set(taskgraph.graph.visit_postorder())
+        alltasks = tasklist.copy()
+
+        def schedule_tasks():
+            # bail out early if any futures have failed
+            if any(f.done() and f.exception() for f in fs.values()):
+                return
+
+            to_remove = set()
+            new = set()
+
+            def submit(task_id, label, task_def):
+                fut = e.submit(create_task, session, task_id, label, task_def)
+                new.add(fut)
+                fs[task_id] = fut
+
+            for task_id in tasklist:
+                task_def = taskgraph.tasks[task_id].task
+                # If we haven't finished submitting all our dependencies yet,
+                # come back to this later.
+                # Some dependencies aren't in our graph, so make sure to filter
+                # those out
+                deps = set(task_def.get("dependencies", [])) & alltasks
+                if any((d not in fs or not fs[d].done()) for d in deps):
+                    continue
+
+                submit(task_id, taskid_to_label[task_id], task_def)
+                to_remove.add(task_id)
+
+                # Schedule tasks as many times as task_duplicates indicates
+                attributes = taskgraph.tasks[task_id].attributes
+                for i in range(1, attributes.get("task_duplicates", 1)):
+                    # We use slugid() since we want a distinct task id
+                    submit(slugid(), taskid_to_label[task_id], task_def)
+            tasklist.difference_update(to_remove)
+
+            # as each of those futures complete, try to schedule more tasks
+            for f in futures.as_completed(new):
+                schedule_tasks()
+
+        # start scheduling tasks and run until everything is scheduled
+        schedule_tasks()
+
+        # check the result of each future, raising an exception if it failed
+        for f in futures.as_completed(fs.values()):
+            f.result()
+
+
+def create_task(session, task_id, label, task_def):
+    # create the task using 'http://taskcluster/queue', which is proxied to the queue service
+    # with credentials appropriate to this job.
+
+    # Resolve timestamps
+    now = current_json_time(datetime_format=True)
+    task_def = resolve_timestamps(now, task_def)
+
+    if testing:
+        json.dump(
+            [task_id, task_def],
+            sys.stdout,
+            sort_keys=True,
+            indent=4,
+            separators=(",", ": "),
+        )
+        # add a newline
+        print("")
+        return
+
+    logger.info(f"Creating task with taskId {task_id} for {label}")
+    res = session.put(f"http://taskcluster/queue/v1/task/{task_id}", json=task_def)
+    if res.status_code != 200:
+        try:
+            logger.error(res.json()["message"])
+        except Exception:
+            logger.error(res.text)
+        res.raise_for_status()
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py
new file mode 100644
index 0000000000..ed412f4473
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py
@@ -0,0 +1,379 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+import logging
+import os
+import pathlib
+import shutil
+import time
+from pathlib import Path
+
+import yaml
+from voluptuous import Optional
+
+from taskgraph.actions import render_actions_json
+from taskgraph.create import create_tasks
+from taskgraph.generator import TaskGraphGenerator
+from taskgraph.parameters import Parameters, get_version
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.python_path import find_object
+from taskgraph.util.schema import Schema, validate_schema
+from taskgraph.util.vcs import Repository, get_repository
+from taskgraph.util.yaml import load_yaml
+
+logger = logging.getLogger(__name__)
+
+ARTIFACTS_DIR = Path("artifacts")
+
+
+# For each project, this gives a set of parameters specific to the project.
+# See `taskcluster/docs/parameters.rst` for information on parameters.
+PER_PROJECT_PARAMETERS = {
+    # the default parameters are used for projects that do not match above.
+    "default": {
+        "target_tasks_method": "default",
+    }
+}
+
+
+try_task_config_schema_v2 = Schema(
+    {
+        Optional("parameters"): {str: object},
+    }
+)
+
+
+def full_task_graph_to_runnable_jobs(full_task_json):
+    runnable_jobs = {}
+    for label, node in full_task_json.items():
+        if not ("extra" in node["task"] and "treeherder" in node["task"]["extra"]):
+            continue
+
+        th = node["task"]["extra"]["treeherder"]
+        runnable_jobs[label] = {"symbol": th["symbol"]}
+
+        for i in ("groupName", "groupSymbol", "collection"):
+            if i in th:
+                runnable_jobs[label][i] = th[i]
+        if th.get("machine", {}).get("platform"):
+            runnable_jobs[label]["platform"] = th["machine"]["platform"]
+    return runnable_jobs
+
+
+def taskgraph_decision(options, parameters=None):
+    """
+    Run the decision task.  This function implements `mach taskgraph decision`,
+    and is responsible for
+
+     * processing decision task command-line options into parameters
+     * running task-graph generation exactly the same way the other `mach
+       taskgraph` commands do
+     * generating a set of artifacts to memorialize the graph
+     * calling TaskCluster APIs to create the graph
+    """
+
+    parameters = parameters or (
+        lambda graph_config: get_decision_parameters(graph_config, options)
+    )
+
+    decision_task_id = os.environ["TASK_ID"]
+
+    # create a TaskGraphGenerator instance
+    tgg = TaskGraphGenerator(
+        root_dir=options.get("root"),
+        parameters=parameters,
+        decision_task_id=decision_task_id,
+        write_artifacts=True,
+    )
+
+    # write out the parameters used to generate this graph
+    write_artifact("parameters.yml", dict(**tgg.parameters))
+
+    # write out the public/actions.json file
+    write_artifact(
+        "actions.json",
+        render_actions_json(tgg.parameters, tgg.graph_config, decision_task_id),
+    )
+
+    # write out the full graph for reference
+    full_task_json = tgg.full_task_graph.to_json()
+    write_artifact("full-task-graph.json", full_task_json)
+
+    # write out the public/runnable-jobs.json file
+    write_artifact(
+        "runnable-jobs.json", full_task_graph_to_runnable_jobs(full_task_json)
+    )
+
+    # this is just a test to check whether the from_json() function is working
+    _, _ = TaskGraph.from_json(full_task_json)
+
+    # write out the target task set to allow reproducing this as input
+    write_artifact("target-tasks.json", list(tgg.target_task_set.tasks.keys()))
+
+    # write out the optimized task graph to describe what will actually happen,
+    # and the map of labels to taskids
+    write_artifact("task-graph.json", tgg.morphed_task_graph.to_json())
+    write_artifact("label-to-taskid.json", tgg.label_to_taskid)
+
+    # write out current run-task and fetch-content scripts
+    RUN_TASK_DIR = pathlib.Path(__file__).parent / "run-task"
+    shutil.copy2(RUN_TASK_DIR / "run-task", ARTIFACTS_DIR)
+    shutil.copy2(RUN_TASK_DIR / "fetch-content", ARTIFACTS_DIR)
+
+    # actually create the graph
+    create_tasks(
+        tgg.graph_config,
+        tgg.morphed_task_graph,
+        tgg.label_to_taskid,
+        tgg.parameters,
+        decision_task_id=decision_task_id,
+    )
+
+
+def get_decision_parameters(graph_config, options):
+    """
+    Load parameters from the command-line options for 'taskgraph decision'.
+    This also applies per-project parameters, based on the given project.
+
+    """
+    parameters = {
+        n: options[n]
+        for n in [
+            "base_repository",
+            "base_ref",
+            "base_rev",
+            "head_repository",
+            "head_rev",
+            "head_ref",
+            "head_tag",
+            "project",
+            "pushlog_id",
+            "pushdate",
+            "repository_type",
+            "owner",
+            "level",
+            "target_tasks_method",
+            "tasks_for",
+        ]
+        if n in options
+    }
+
+    repo_path = os.getcwd()
+    repo = get_repository(repo_path)
+    try:
+        commit_message = repo.get_commit_message()
+    except UnicodeDecodeError:
+        commit_message = ""
+
+    parameters["base_ref"] = _determine_more_accurate_base_ref(
+        repo,
+        candidate_base_ref=options.get("base_ref"),
+        head_ref=options.get("head_ref"),
+        base_rev=options.get("base_rev"),
+    )
+
+    parameters["base_rev"] = _determine_more_accurate_base_rev(
+        repo,
+        base_ref=parameters["base_ref"],
+        candidate_base_rev=options.get("base_rev"),
+        head_rev=options.get("head_rev"),
+        env_prefix=_get_env_prefix(graph_config),
+    )
+
+    # Define default filter list, as most configurations shouldn't need
+    # custom filters.
+    parameters["filters"] = [
+        "target_tasks_method",
+    ]
+    parameters["optimize_strategies"] = None
+    parameters["optimize_target_tasks"] = True
+    parameters["existing_tasks"] = {}
+    parameters["do_not_optimize"] = []
+    parameters["enable_always_target"] = True
+    parameters["build_number"] = 1
+    parameters["version"] = get_version(repo_path)
+    parameters["next_version"] = None
+
+    # owner must be an email, but sometimes (e.g., for ffxbld) it is not, in which
+    # case, fake it
+    if "@" not in parameters["owner"]:
+        parameters["owner"] += "@noreply.mozilla.org"
+
+    # use the pushdate as build_date if given, else use current time
+    parameters["build_date"] = parameters["pushdate"] or int(time.time())
+    # moz_build_date is the build identifier based on build_date
+    parameters["moz_build_date"] = time.strftime(
+        "%Y%m%d%H%M%S", time.gmtime(parameters["build_date"])
+    )
+
+    project = parameters["project"]
+    try:
+        parameters.update(PER_PROJECT_PARAMETERS[project])
+    except KeyError:
+        logger.warning(
+            "using default project parameters; add {} to "
+            "PER_PROJECT_PARAMETERS in {} to customize behavior "
+            "for this project".format(project, __file__)
+        )
+        parameters.update(PER_PROJECT_PARAMETERS["default"])
+
+    # `target_tasks_method` has higher precedence than `project` parameters
+    if options.get("target_tasks_method"):
+        parameters["target_tasks_method"] = options["target_tasks_method"]
+
+    # ..but can be overridden by the commit message: if it contains the special
+    # string "DONTBUILD" and this is an on-push decision task, then use the
+    # special 'nothing' target task method.
+    if "DONTBUILD" in commit_message and (
+        options["tasks_for"] in ("hg-push", "github-push")
+    ):
+        parameters["target_tasks_method"] = "nothing"
+
+    if options.get("optimize_target_tasks") is not None:
+        parameters["optimize_target_tasks"] = options["optimize_target_tasks"]
+
+    if "decision-parameters" in graph_config["taskgraph"]:
+        find_object(graph_config["taskgraph"]["decision-parameters"])(
+            graph_config, parameters
+        )
+
+    if options.get("try_task_config_file"):
+        task_config_file = os.path.abspath(options.get("try_task_config_file"))
+    else:
+        # if try_task_config.json is present, load it
+        task_config_file = os.path.join(os.getcwd(), "try_task_config.json")
+
+    # load try settings
+    if ("try" in project and options["tasks_for"] == "hg-push") or options[
+        "tasks_for"
+    ] == "github-pull-request":
+        set_try_config(parameters, task_config_file)
+
+    result = Parameters(**parameters)
+    result.check()
+    return result
+
+
+def _determine_more_accurate_base_ref(repo, candidate_base_ref, head_ref, base_rev):
+    base_ref = candidate_base_ref
+
+    if not candidate_base_ref:
+        base_ref = repo.default_branch
+    elif candidate_base_ref == head_ref and base_rev == Repository.NULL_REVISION:
+        logger.info(
+            "base_ref and head_ref are identical but base_rev equals the null revision. "
+            "This is a new branch but Github didn't identify its actual base."
+        )
+        base_ref = repo.default_branch
+
+    if base_ref != candidate_base_ref:
+        logger.info(
+            f'base_ref has been reset from "{candidate_base_ref}" to "{base_ref}".'
+        )
+
+    return base_ref
+
+
+def _determine_more_accurate_base_rev(
+    repo, base_ref, candidate_base_rev, head_rev, env_prefix
+):
+    if not candidate_base_rev:
+        logger.info("base_rev is not set.")
+        base_ref_or_rev = base_ref
+    elif candidate_base_rev == Repository.NULL_REVISION:
+        logger.info("base_rev equals the null revision. This branch is a new one.")
+        base_ref_or_rev = base_ref
+    elif not repo.does_revision_exist_locally(candidate_base_rev):
+        logger.warning(
+            "base_rev does not exist locally. It is likely because the branch was force-pushed. "
+            "taskgraph is not able to assess how many commits were changed and assumes it is only "
+            f"the last one. Please set the {env_prefix.upper()}_BASE_REV environment variable "
+            "in the decision task and provide `--base-rev` to taskgraph."
+        )
+        base_ref_or_rev = base_ref
+    else:
+        base_ref_or_rev = candidate_base_rev
+
+    if base_ref_or_rev == base_ref:
+        logger.info(
+            f'Using base_ref "{base_ref}" to determine latest common revision...'
+        )
+
+    base_rev = repo.find_latest_common_revision(base_ref_or_rev, head_rev)
+    if base_rev != candidate_base_rev:
+        if base_ref_or_rev == candidate_base_rev:
+            logger.info("base_rev is not an ancestor of head_rev.")
+
+        logger.info(
+            f'base_rev has been reset from "{candidate_base_rev}" to "{base_rev}".'
+        )
+
+    return base_rev
+
+
+def _get_env_prefix(graph_config):
+    repo_keys = list(graph_config["taskgraph"].get("repositories", {}).keys())
+    return repo_keys[0] if repo_keys else ""
+
+
+def set_try_config(parameters, task_config_file):
+    if os.path.isfile(task_config_file):
+        logger.info(f"using try tasks from {task_config_file}")
+        with open(task_config_file) as fh:
+            task_config = json.load(fh)
+        task_config_version = task_config.pop("version")
+        if task_config_version == 2:
+            validate_schema(
+                try_task_config_schema_v2,
+                task_config,
+                "Invalid v2 `try_task_config.json`.",
+            )
+            parameters.update(task_config["parameters"])
+            return
+        else:
+            raise Exception(
+                f"Unknown `try_task_config.json` version: {task_config_version}"
+            )
+
+
+def write_artifact(filename, data):
+    logger.info(f"writing artifact file `{filename}`")
+    if not os.path.isdir(ARTIFACTS_DIR):
+        os.mkdir(ARTIFACTS_DIR)
+    path = ARTIFACTS_DIR / filename
+    if filename.endswith(".yml"):
+        with open(path, "w") as f:
+            yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
+    elif filename.endswith(".json"):
+        with open(path, "w") as f:
+            json.dump(data, f, sort_keys=True, indent=2, separators=(",", ": "))
+    elif filename.endswith(".gz"):
+        import gzip
+
+        with gzip.open(path, "wb") as f:
+            f.write(json.dumps(data))
+    else:
+        raise TypeError(f"Don't know how to write to {filename}")
+
+
+def read_artifact(filename):
+    path = ARTIFACTS_DIR / filename
+    if filename.endswith(".yml"):
+        return load_yaml(path, filename)
+    elif filename.endswith(".json"):
+        with open(path) as f:
+            return json.load(f)
+    elif filename.endswith(".gz"):
+        import gzip
+
+        with gzip.open(path, "rb") as f:
+            return json.load(f)
+    else:
+        raise TypeError(f"Don't know how to read {filename}")
+
+
+def rename_artifact(src, dest):
+    os.rename(ARTIFACTS_DIR / src, ARTIFACTS_DIR / dest)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/docker.py
new file mode 100644
index 0000000000..23897cbbee
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/docker.py
@@ -0,0 +1,219 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+import os
+import subprocess
+import tarfile
+from io import BytesIO
+from textwrap import dedent
+
+try:
+    import zstandard as zstd
+except ImportError as e:
+    zstd = e
+
+from taskgraph.util import docker
+from taskgraph.util.taskcluster import get_artifact_url, get_session
+
+
+def get_image_digest(image_name):
+    from taskgraph.generator import load_tasks_for_kind
+    from taskgraph.parameters import Parameters
+
+    params = Parameters(
+        level=os.environ.get("MOZ_SCM_LEVEL", "3"),
+        strict=False,
+    )
+    tasks = load_tasks_for_kind(params, "docker-image")
+    task = tasks[f"build-docker-image-{image_name}"]
+    return task.attributes["cached_task"]["digest"]
+
+
+def load_image_by_name(image_name, tag=None):
+    from taskgraph.generator import load_tasks_for_kind
+    from taskgraph.optimize import IndexSearch
+    from taskgraph.parameters import Parameters
+
+    params = Parameters(
+        level=os.environ.get("MOZ_SCM_LEVEL", "3"),
+        strict=False,
+    )
+    tasks = load_tasks_for_kind(params, "docker-image")
+    task = tasks[f"build-docker-image-{image_name}"]
+    task_id = IndexSearch().should_replace_task(
+        task, {}, task.optimization.get("index-search", [])
+    )
+
+    if task_id in (True, False):
+        print(
+            "Could not find artifacts for a docker image "
+            "named `{image_name}`. Local commits and other changes "
+            "in your checkout may cause this error. Try "
+            "updating to a fresh checkout of mozilla-central "
+            "to download image.".format(image_name=image_name)
+        )
+        return False
+
+    return load_image_by_task_id(task_id, tag)
+
+
+def load_image_by_task_id(task_id, tag=None):
+    artifact_url = get_artifact_url(task_id, "public/image.tar.zst")
+    result = load_image(artifact_url, tag)
+    print("Found docker image: {}:{}".format(result["image"], result["tag"]))
+    if tag:
+        print(f"Re-tagged as: {tag}")
+    else:
+        tag = "{}:{}".format(result["image"], result["tag"])
+    print(f"Try: docker run -ti --rm {tag} bash")
+    return True
+
+
+def build_context(name, outputFile, args=None):
+    """Build a context.tar for image with specified name."""
+    if not name:
+        raise ValueError("must provide a Docker image name")
+    if not outputFile:
+        raise ValueError("must provide a outputFile")
+
+    image_dir = docker.image_path(name)
+    if not os.path.isdir(image_dir):
+        raise Exception("image directory does not exist: %s" % image_dir)
+
+    docker.create_context_tar(".", image_dir, outputFile, args)
+
+
+def build_image(name, tag, args=None):
+    """Build a Docker image of specified name.
+
+    Output from image building process will be printed to stdout.
+    """
+    if not name:
+        raise ValueError("must provide a Docker image name")
+
+    image_dir = docker.image_path(name)
+    if not os.path.isdir(image_dir):
+        raise Exception("image directory does not exist: %s" % image_dir)
+
+    tag = tag or docker.docker_image(name, by_tag=True)
+
+    buf = BytesIO()
+    docker.stream_context_tar(".", image_dir, buf, "", args)
+    subprocess.run(
+        ["docker", "image", "build", "--no-cache", "-t", tag, "-"], input=buf.getvalue()
+    )
+
+    print(f"Successfully built {name} and tagged with {tag}")
+
+    if tag.endswith(":latest"):
+        print("*" * 50)
+        print("WARNING: no VERSION file found in image directory.")
+        print("Image is not suitable for deploying/pushing.")
+        print("Create an image suitable for deploying/pushing by creating")
+        print("a VERSION file in the image directory.")
+        print("*" * 50)
+
+
+def load_image(url, imageName=None, imageTag=None):
+    """
+    Load docker image from URL as imageName:tag, if no imageName or tag is given
+    it will use whatever is inside the zstd compressed tarball.
+
+    Returns an object with properties 'image', 'tag' and 'layer'.
+    """
+    if isinstance(zstd, ImportError):
+        raise ImportError(
+            dedent(
+                """
+                zstandard is not installed! Use `pip install taskcluster-taskgraph[load-image]`
+                to use this feature.
+                """
+            )
+        ) from zstd
+
+    # If imageName is given and we don't have an imageTag
+    # we parse out the imageTag from imageName, or default it to 'latest'
+    # if no imageName and no imageTag is given, 'repositories' won't be rewritten
+    if imageName and not imageTag:
+        if ":" in imageName:
+            imageName, imageTag = imageName.split(":", 1)
+        else:
+            imageTag = "latest"
+
+    info = {}
+
+    def download_and_modify_image():
+        # This function downloads and edits the downloaded tar file on the fly.
+        # It emits chunked buffers of the edited tar file, as a generator.
+        print(f"Downloading from {url}")
+        # get_session() gets us a requests.Session set to retry several times.
+        req = get_session().get(url, stream=True)
+        req.raise_for_status()
+
+        with zstd.ZstdDecompressor().stream_reader(req.raw) as ifh:
+            tarin = tarfile.open(
+                mode="r|",
+                fileobj=ifh,
+                bufsize=zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
+            )
+
+            # Stream through each member of the downloaded tar file individually.
+            for member in tarin:
+                # Non-file members only need a tar header. Emit one.
+                if not member.isfile():
+                    yield member.tobuf(tarfile.GNU_FORMAT)
+                    continue
+
+                # Open stream reader for the member
+                reader = tarin.extractfile(member)
+
+                # If member is `repositories`, we parse and possibly rewrite the
+                # image tags.
+                if member.name == "repositories":
+                    # Read and parse repositories
+                    repos = json.loads(reader.read())
+                    reader.close()
+
+                    # If there is more than one image or tag, we can't handle it
+                    # here.
+                    if len(repos.keys()) > 1:
+                        raise Exception("file contains more than one image")
+                    info["image"] = image = list(repos.keys())[0]
+                    if len(repos[image].keys()) > 1:
+                        raise Exception("file contains more than one tag")
+                    info["tag"] = tag = list(repos[image].keys())[0]
+                    info["layer"] = layer = repos[image][tag]
+
+                    # Rewrite the repositories file
+                    data = json.dumps({imageName or image: {imageTag or tag: layer}})
+                    reader = BytesIO(data.encode("utf-8"))
+                    member.size = len(data)
+
+                # Emit the tar header for this member.
+                yield member.tobuf(tarfile.GNU_FORMAT)
+                # Then emit its content.
+                remaining = member.size
+                while remaining:
+                    length = min(remaining, zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
+                    buf = reader.read(length)
+                    remaining -= len(buf)
+                    yield buf
+                # Pad to fill a 512 bytes block, per tar format.
+                remainder = member.size % 512
+                if remainder:
+                    yield ("\0" * (512 - remainder)).encode("utf-8")
+
+                reader.close()
+
+    subprocess.run(
+        ["docker", "image", "load"], input=b"".join(download_and_modify_image())
+    )
+
+    # Check that we found a repositories file
+    if not info.get("image") or not info.get("tag") or not info.get("layer"):
+        raise Exception("No repositories file found!")
+
+    return info
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py b/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py
new file mode 100644
index 0000000000..6be6e5eeee
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py
@@ -0,0 +1,91 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Support for optimizing tasks based on the set of files that have changed.
+"""
+
+
+import logging
+import os
+
+import requests
+from redo import retry
+
+from .util.memoize import memoize
+from .util.path import match as match_path
+from .util.vcs import get_repository
+
+logger = logging.getLogger(__name__)
+
+
+@memoize
+def get_changed_files(head_repository_url, head_rev, base_rev=None):
+    """
+    Get the set of files changed between revisions.
+    Responses are cached, so multiple calls with the same arguments are OK.
+    """
+    repo_path = os.getcwd()
+    repository = get_repository(repo_path)
+
+    if repository.tool == "hg":
+        # TODO Use VCS version once tested enough
+        return _get_changed_files_json_automationrelevance(
+            head_repository_url, head_rev
+        )
+
+    return repository.get_changed_files(rev=head_rev, base_rev=base_rev)
+
+
+def _get_changed_files_json_automationrelevance(head_repository_url, head_rev):
+    """
+    Get the set of files changed in the push headed by the given revision.
+    """
+    url = "{}/json-automationrelevance/{}".format(
+        head_repository_url.rstrip("/"), head_rev
+    )
+    logger.debug("Querying version control for metadata: %s", url)
+
+    def get_automationrelevance():
+        response = requests.get(url, timeout=30)
+        return response.json()
+
+    contents = retry(get_automationrelevance, attempts=10, sleeptime=10)
+
+    logger.debug(
+        "{} commits influencing task scheduling:".format(len(contents["changesets"]))
+    )
+    changed_files = set()
+    for c in contents["changesets"]:
+        desc = ""  # Support empty desc
+        if c["desc"]:
+            desc = c["desc"].splitlines()[0].encode("ascii", "ignore")
+        logger.debug(" {cset} {desc}".format(cset=c["node"][0:12], desc=desc))
+        changed_files |= set(c["files"])
+
+    return changed_files
+
+
+def check(params, file_patterns):
+    """Determine whether any of the files changed between 2 revisions
+    match any of the given file patterns."""
+
+    head_repository_url = params.get("head_repository")
+    head_rev = params.get("head_rev")
+    if not head_repository_url or not head_rev:
+        logger.warning(
+            "Missing `head_repository` or `head_rev` parameters; "
+            "assuming all files have changed"
+        )
+        return True
+
+    base_rev = params.get("base_rev")
+    changed_files = get_changed_files(head_repository_url, head_rev, base_rev)
+
+    for pattern in file_patterns:
+        for path in changed_files:
+            if match_path(path, pattern):
+                return True
+
+    return False
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/filter_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/filter_tasks.py
new file mode 100644
index 0000000000..63bd2874d6
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/filter_tasks.py
@@ -0,0 +1,34 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+from . import target_tasks
+
+logger = logging.getLogger(__name__)
+
+filter_task_functions = {}
+
+
+def filter_task(name):
+    """Generator to declare a task filter function."""
+
+    def wrap(func):
+        filter_task_functions[name] = func
+        return func
+
+    return wrap
+
+
+@filter_task("target_tasks_method")
+def filter_target_tasks(graph, parameters, graph_config):
+    """Proxy filter to use legacy target tasks code.
+
+    This should go away once target_tasks are converted to filters.
+    """
+
+    attr = parameters.get("target_tasks_method", "all_tasks")
+    fn = target_tasks.get_method(attr)
+    return fn(graph, parameters, graph_config)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/generator.py b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py
new file mode 100644
index 0000000000..4ed2a41520
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py
@@ -0,0 +1,451 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import copy
+import logging
+import os
+from dataclasses import dataclass
+from typing import Dict
+
+from . import filter_tasks
+from .config import GraphConfig, load_graph_config
+from .graph import Graph
+from .morph import morph
+from .optimize.base import optimize_task_graph
+from .parameters import parameters_loader
+from .task import Task
+from .taskgraph import TaskGraph
+from .transforms.base import TransformConfig, TransformSequence
+from .util.python_path import find_object
+from .util.verify import verifications
+from .util.yaml import load_yaml
+
+logger = logging.getLogger(__name__)
+
+
+class KindNotFound(Exception):
+    """
+    Raised when trying to load kind from a directory without a kind.yml.
+    """
+
+
+@dataclass(frozen=True)
+class Kind:
+    name: str
+    path: str
+    config: Dict
+    graph_config: GraphConfig
+
+    def _get_loader(self):
+        try:
+            loader = self.config["loader"]
+        except KeyError:
+            loader = "taskgraph.loader.default:loader"
+        return find_object(loader)
+
+    def load_tasks(self, parameters, loaded_tasks, write_artifacts):
+        loader = self._get_loader()
+        config = copy.deepcopy(self.config)
+
+        kind_dependencies = config.get("kind-dependencies", [])
+        kind_dependencies_tasks = {
+            task.label: task for task in loaded_tasks if task.kind in kind_dependencies
+        }
+
+        inputs = loader(self.name, self.path, config, parameters, loaded_tasks)
+
+        transforms = TransformSequence()
+        for xform_path in config["transforms"]:
+            if ":" not in xform_path:
+                xform_path = f"{xform_path}:transforms"
+
+            transform = find_object(xform_path)
+            transforms.add(transform)
+
+        # perform the transformations on the loaded inputs
+        trans_config = TransformConfig(
+            self.name,
+            self.path,
+            config,
+            parameters,
+            kind_dependencies_tasks,
+            self.graph_config,
+            write_artifacts=write_artifacts,
+        )
+        tasks = [
+            Task(
+                self.name,
+                label=task_dict["label"],
+                description=task_dict["description"],
+                attributes=task_dict["attributes"],
+                task=task_dict["task"],
+                optimization=task_dict.get("optimization"),
+                dependencies=task_dict.get("dependencies"),
+                soft_dependencies=task_dict.get("soft-dependencies"),
+                if_dependencies=task_dict.get("if-dependencies"),
+            )
+            for task_dict in transforms(trans_config, inputs)
+        ]
+        return tasks
+
+    @classmethod
+    def load(cls, root_dir, graph_config, kind_name):
+        path = os.path.join(root_dir, kind_name)
+        kind_yml = os.path.join(path, "kind.yml")
+        if not os.path.exists(kind_yml):
+            raise KindNotFound(kind_yml)
+
+        logger.debug(f"loading kind `{kind_name}` from `{path}`")
+        config = load_yaml(kind_yml)
+
+        return cls(kind_name, path, config, graph_config)
+
+
+class TaskGraphGenerator:
+    """
+    The central controller for taskgraph.  This handles all phases of graph
+    generation.  The task is generated from all of the kinds defined in
+    subdirectories of the generator's root directory.
+
+    Access to the results of this generation, as well as intermediate values at
+    various phases of generation, is available via properties.  This encourages
+    the provision of all generation inputs at instance construction time.
+    """
+
+    # Task-graph generation is implemented as a Python generator that yields
+    # each "phase" of generation.  This allows some mach subcommands to short-
+    # circuit generation of the entire graph by never completing the generator.
+
+    def __init__(
+        self,
+        root_dir,
+        parameters,
+        decision_task_id="DECISION-TASK",
+        write_artifacts=False,
+    ):
+        """
+        @param root_dir: root directory, with subdirectories for each kind
+        @param parameters: parameters for this task-graph generation, or callable
+            taking a `GraphConfig` and returning parameters
+        @type parameters: Union[Parameters, Callable[[GraphConfig], Parameters]]
+        """
+        if root_dir is None:
+            root_dir = "taskcluster/ci"
+        self.root_dir = root_dir
+        self._parameters = parameters
+        self._decision_task_id = decision_task_id
+        self._write_artifacts = write_artifacts
+
+        # start the generator
+        self._run = self._run()
+        self._run_results = {}
+
+    @property
+    def parameters(self):
+        """
+        The properties used for this graph.
+
+        @type: Properties
+        """
+        return self._run_until("parameters")
+
+    @property
+    def full_task_set(self):
+        """
+        The full task set: all tasks defined by any kind (a graph without edges)
+
+        @type: TaskGraph
+        """
+        return self._run_until("full_task_set")
+
+    @property
+    def full_task_graph(self):
+        """
+        The full task graph: the full task set, with edges representing
+        dependencies.
+
+        @type: TaskGraph
+        """
+        return self._run_until("full_task_graph")
+
+    @property
+    def target_task_set(self):
+        """
+        The set of targeted tasks (a graph without edges)
+
+        @type: TaskGraph
+        """
+        return self._run_until("target_task_set")
+
+    @property
+    def target_task_graph(self):
+        """
+        The set of targeted tasks and all of their dependencies
+
+        @type: TaskGraph
+        """
+        return self._run_until("target_task_graph")
+
+    @property
+    def optimized_task_graph(self):
+        """
+        The set of targeted tasks and all of their dependencies; tasks that
+        have been optimized out are either omitted or replaced with a Task
+        instance containing only a task_id.
+
+        @type: TaskGraph
+        """
+        return self._run_until("optimized_task_graph")
+
+    @property
+    def label_to_taskid(self):
+        """
+        A dictionary mapping task label to assigned taskId.  This property helps
+        in interpreting `optimized_task_graph`.
+
+        @type: dictionary
+        """
+        return self._run_until("label_to_taskid")
+
+    @property
+    def morphed_task_graph(self):
+        """
+        The optimized task graph, with any subsequent morphs applied. This graph
+        will have the same meaning as the optimized task graph, but be in a form
+        more palatable to TaskCluster.
+
+        @type: TaskGraph
+        """
+        return self._run_until("morphed_task_graph")
+
+    @property
+    def graph_config(self):
+        """
+        The configuration for this graph.
+
+        @type: TaskGraph
+        """
+        return self._run_until("graph_config")
+
+    def _load_kinds(self, graph_config, target_kinds=None):
+        if target_kinds:
+            # docker-image is an implicit dependency that never appears in
+            # kind-dependencies.
+            queue = target_kinds + ["docker-image"]
+            seen_kinds = set()
+            while queue:
+                kind_name = queue.pop()
+                if kind_name in seen_kinds:
+                    continue
+                seen_kinds.add(kind_name)
+                kind = Kind.load(self.root_dir, graph_config, kind_name)
+                yield kind
+                queue.extend(kind.config.get("kind-dependencies", []))
+        else:
+            for kind_name in os.listdir(self.root_dir):
+                try:
+                    yield Kind.load(self.root_dir, graph_config, kind_name)
+                except KindNotFound:
+                    continue
+
+    def _run(self):
+        logger.info("Loading graph configuration.")
+        graph_config = load_graph_config(self.root_dir)
+
+        yield ("graph_config", graph_config)
+
+        graph_config.register()
+
+        # Initial verifications that don't depend on any generation state.
+        verifications("initial")
+
+        if callable(self._parameters):
+            parameters = self._parameters(graph_config)
+        else:
+            parameters = self._parameters
+
+        logger.info(f"Using {parameters}")
+        logger.debug(f"Dumping parameters:\n{repr(parameters)}")
+
+        filters = parameters.get("filters", [])
+        # Always add legacy target tasks method until we deprecate that API.
+        if "target_tasks_method" not in filters:
+            filters.insert(0, "target_tasks_method")
+        filters = [filter_tasks.filter_task_functions[f] for f in filters]
+
+        yield self.verify("parameters", parameters)
+
+        logger.info("Loading kinds")
+        # put the kinds into a graph and sort topologically so that kinds are loaded
+        # in post-order
+        target_kinds = sorted(parameters.get("target-kinds", []))
+        if target_kinds:
+            logger.info(
+                "Limiting kinds to following kinds and dependencies: {}".format(
+                    ", ".join(target_kinds)
+                )
+            )
+        kinds = {
+            kind.name: kind for kind in self._load_kinds(graph_config, target_kinds)
+        }
+        verifications("kinds", kinds)
+
+        edges = set()
+        for kind in kinds.values():
+            for dep in kind.config.get("kind-dependencies", []):
+                edges.add((kind.name, dep, "kind-dependency"))
+        kind_graph = Graph(set(kinds), edges)
+
+        if target_kinds:
+            kind_graph = kind_graph.transitive_closure(
+                set(target_kinds) | {"docker-image"}
+            )
+
+        logger.info("Generating full task set")
+        all_tasks = {}
+        for kind_name in kind_graph.visit_postorder():
+            logger.debug(f"Loading tasks for kind {kind_name}")
+            kind = kinds[kind_name]
+            try:
+                new_tasks = kind.load_tasks(
+                    parameters,
+                    list(all_tasks.values()),
+                    self._write_artifacts,
+                )
+            except Exception:
+                logger.exception(f"Error loading tasks for kind {kind_name}:")
+                raise
+            for task in new_tasks:
+                if task.label in all_tasks:
+                    raise Exception("duplicate tasks with label " + task.label)
+                all_tasks[task.label] = task
+            logger.info(f"Generated {len(new_tasks)} tasks for kind {kind_name}")
+        full_task_set = TaskGraph(all_tasks, Graph(set(all_tasks), set()))
+        yield self.verify("full_task_set", full_task_set, graph_config, parameters)
+
+        logger.info("Generating full task graph")
+        edges = set()
+        for t in full_task_set:
+            for depname, dep in t.dependencies.items():
+                if dep not in all_tasks.keys():
+                    raise Exception(
+                        f"Task '{t.label}' lists a dependency that does not exist: '{dep}'"
+                    )
+                edges.add((t.label, dep, depname))
+
+        full_task_graph = TaskGraph(all_tasks, Graph(full_task_set.graph.nodes, edges))
+        logger.info(
+            "Full task graph contains %d tasks and %d dependencies"
+            % (len(full_task_set.graph.nodes), len(edges))
+        )
+        yield self.verify("full_task_graph", full_task_graph, graph_config, parameters)
+
+        logger.info("Generating target task set")
+        target_task_set = TaskGraph(
+            dict(all_tasks), Graph(set(all_tasks.keys()), set())
+        )
+        for fltr in filters:
+            old_len = len(target_task_set.graph.nodes)
+            target_tasks = set(fltr(target_task_set, parameters, graph_config))
+            target_task_set = TaskGraph(
+                {l: all_tasks[l] for l in target_tasks}, Graph(target_tasks, set())
+            )
+            logger.info(
+                "Filter %s pruned %d tasks (%d remain)"
+                % (fltr.__name__, old_len - len(target_tasks), len(target_tasks))
+            )
+
+        yield self.verify("target_task_set", target_task_set, graph_config, parameters)
+
+        logger.info("Generating target task graph")
+        # include all tasks with `always_target` set
+        if parameters["enable_always_target"]:
+            always_target_tasks = {
+                t.label
+                for t in full_task_graph.tasks.values()
+                if t.attributes.get("always_target")
+                if parameters["enable_always_target"] is True
+                or t.kind in parameters["enable_always_target"]
+            }
+        else:
+            always_target_tasks = set()
+        logger.info(
+            "Adding %d tasks with `always_target` attribute"
+            % (len(always_target_tasks) - len(always_target_tasks & target_tasks))
+        )
+        requested_tasks = target_tasks | always_target_tasks
+        target_graph = full_task_graph.graph.transitive_closure(requested_tasks)
+        target_task_graph = TaskGraph(
+            {l: all_tasks[l] for l in target_graph.nodes}, target_graph
+        )
+        yield self.verify(
+            "target_task_graph", target_task_graph, graph_config, parameters
+        )
+
+        logger.info("Generating optimized task graph")
+        existing_tasks = parameters.get("existing_tasks")
+        do_not_optimize = set(parameters.get("do_not_optimize", []))
+        if not parameters.get("optimize_target_tasks", True):
+            do_not_optimize = set(target_task_set.graph.nodes).union(do_not_optimize)
+
+        # this is used for testing experimental optimization strategies
+        strategies = os.environ.get(
+            "TASKGRAPH_OPTIMIZE_STRATEGIES", parameters.get("optimize_strategies")
+        )
+        if strategies:
+            strategies = find_object(strategies)
+
+        optimized_task_graph, label_to_taskid = optimize_task_graph(
+            target_task_graph,
+            requested_tasks,
+            parameters,
+            do_not_optimize,
+            self._decision_task_id,
+            existing_tasks=existing_tasks,
+            strategy_override=strategies,
+        )
+
+        yield self.verify(
+            "optimized_task_graph", optimized_task_graph, graph_config, parameters
+        )
+
+        morphed_task_graph, label_to_taskid = morph(
+            optimized_task_graph, label_to_taskid, parameters, graph_config
+        )
+
+        yield "label_to_taskid", label_to_taskid
+        yield self.verify(
+            "morphed_task_graph", morphed_task_graph, graph_config, parameters
+        )
+
+    def _run_until(self, name):
+        while name not in self._run_results:
+            try:
+                k, v = next(self._run)
+            except StopIteration:
+                raise AttributeError(f"No such run result {name}")
+            self._run_results[k] = v
+        return self._run_results[name]
+
+    def verify(self, name, obj, *args, **kwargs):
+        verifications(name, obj, *args, **kwargs)
+        return name, obj
+
+
+def load_tasks_for_kind(parameters, kind, root_dir=None):
+    """
+    Get all the tasks of a given kind.
+
+    This function is designed to be called from outside of taskgraph.
+    """
+    # make parameters read-write
+    parameters = dict(parameters)
+    parameters["target-kinds"] = [kind]
+    parameters = parameters_loader(spec=None, strict=False, overrides=parameters)
+    tgg = TaskGraphGenerator(root_dir=root_dir, parameters=parameters)
+    return {
+        task.task["metadata"]["name"]: task
+        for task in tgg.full_task_set
+        if task.kind == kind
+    }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/graph.py b/third_party/python/taskcluster_taskgraph/taskgraph/graph.py
new file mode 100644
index 0000000000..36b7f14984
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/graph.py
@@ -0,0 +1,134 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import collections
+from dataclasses import dataclass
+from typing import FrozenSet
+
+
+@dataclass(frozen=True)
+class Graph:
+    """Generic representation of a directed acyclic graph with labeled edges
+    connecting the nodes. Graph operations are implemented in a functional
+    manner, so the data structure is immutable.
+
+    It permits at most one edge of a given name between any set of nodes.  The
+    graph is not checked for cycles, and methods may hang or otherwise fail if
+    given a cyclic graph.
+
+    The `nodes` and `edges` attributes may be accessed in a read-only fashion.
+    The `nodes` attribute is a set of node names, while `edges` is a set of
+    `(left, right, name)` tuples representing an edge named `name` going from
+    node `left` to node `right`..
+    """
+
+    nodes: FrozenSet
+    edges: FrozenSet
+
+    def transitive_closure(self, nodes, reverse=False):
+        """Return the transitive closure of <nodes>: the graph containing all
+        specified nodes as well as any nodes reachable from them, and any
+        intervening edges.
+
+        If `reverse` is true, the "reachability" will be reversed and this
+        will return the set of nodes that can reach the specified nodes.
+
+        Example:
+
+        .. code-block::
+
+            a ------> b ------> c
+                      |
+                      `-------> d
+
+        transitive_closure([b]).nodes == set([a, b])
+        transitive_closure([c]).nodes == set([c, b, a])
+        transitive_closure([c], reverse=True).nodes == set([c])
+        transitive_closure([b], reverse=True).nodes == set([b, c, d])
+        """
+        assert isinstance(nodes, set)
+        if not (nodes <= self.nodes):
+            raise Exception(
+                f"Unknown nodes in transitive closure: {nodes - self.nodes}"
+            )
+
+        # generate a new graph by expanding along edges until reaching a fixed
+        # point
+        new_nodes, new_edges = nodes, set()
+        nodes, edges = set(), set()
+        while (new_nodes, new_edges) != (nodes, edges):
+            nodes, edges = new_nodes, new_edges
+            add_edges = {
+                (left, right, name)
+                for (left, right, name) in self.edges
+                if (right if reverse else left) in nodes
+            }
+            add_nodes = {(left if reverse else right) for (left, right, _) in add_edges}
+            new_nodes = nodes | add_nodes
+            new_edges = edges | add_edges
+        return Graph(new_nodes, new_edges)
+
+    def _visit(self, reverse):
+        queue = collections.deque(sorted(self.nodes))
+        links_by_node = self.reverse_links_dict() if reverse else self.links_dict()
+        seen = set()
+        while queue:
+            node = queue.popleft()
+            if node in seen:
+                continue
+            links = links_by_node[node]
+            if all((n in seen) for n in links):
+                seen.add(node)
+                yield node
+            else:
+                queue.extend(n for n in links if n not in seen)
+                queue.append(node)
+
+    def visit_postorder(self):
+        """
+        Generate a sequence of nodes in postorder, such that every node is
+        visited *after* any nodes it links to.
+
+        Behavior is undefined (read: it will hang) if the graph contains a
+        cycle.
+        """
+        return self._visit(False)
+
+    def visit_preorder(self):
+        """
+        Like visit_postorder, but in reverse: evrey node is visited *before*
+        any nodes it links to.
+        """
+        return self._visit(True)
+
+    def links_dict(self):
+        """
+        Return a dictionary mapping each node to a set of the nodes it links to
+        (omitting edge names)
+        """
+        links = collections.defaultdict(set)
+        for left, right, _ in self.edges:
+            links[left].add(right)
+        return links
+
+    def named_links_dict(self):
+        """
+        Return a two-level dictionary mapping each node to a dictionary mapping
+        edge names to labels.
+        """
+        links = collections.defaultdict(dict)
+        for left, right, name in self.edges:
+            links[left][name] = right
+        return links
+
+    def reverse_links_dict(self):
+        """
+        Return a dictionary mapping each node to a set of the nodes linking to
+        it (omitting edge names)
+        """
+        links = collections.defaultdict(set)
+        for left, right, _ in self.edges:
+            links[right].add(left)
+        return links
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/loader/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/loader/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/loader/__init__.py
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/loader/default.py b/third_party/python/taskcluster_taskgraph/taskgraph/loader/default.py
new file mode 100644
index 0000000000..5b2c258917
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/loader/default.py
@@ -0,0 +1,33 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+from .transform import loader as transform_loader
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_TRANSFORMS = [
+    "taskgraph.transforms.job:transforms",
+    "taskgraph.transforms.task:transforms",
+]
+
+
+def loader(kind, path, config, params, loaded_tasks):
+    """
+    This default loader builds on the `transform` loader by providing sensible
+    default transforms that the majority of simple tasks will need.
+    Specifically, `job` and `task` transforms will be appended to the end of the
+    list of transforms in the kind being loaded.
+    """
+    transform_refs = config.setdefault("transforms", [])
+    for t in DEFAULT_TRANSFORMS:
+        if t in config.get("transforms", ()):
+            raise KeyError(
+                f"Transform {t} is already present in the loader's default transforms; it must not be defined in the kind"
+            )
+    transform_refs.extend(DEFAULT_TRANSFORMS)
+    return transform_loader(kind, path, config, params, loaded_tasks)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/loader/transform.py b/third_party/python/taskcluster_taskgraph/taskgraph/loader/transform.py
new file mode 100644
index 0000000000..a134ffd127
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/loader/transform.py
@@ -0,0 +1,58 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+from taskgraph.util.templates import merge
+from taskgraph.util.yaml import load_yaml
+
+logger = logging.getLogger(__name__)
+
+
+def loader(kind, path, config, params, loaded_tasks):
+    """
+    Get the input elements that will be transformed into tasks in a generic
+    way.  The elements themselves are free-form, and become the input to the
+    first transform.
+
+    By default, this reads tasks from the `tasks` key, or from yaml files
+    named by `tasks-from`. The entities are read from mappings, and the
+    keys to those mappings are added in the `name` key of each entity.
+
+    If there is a `task-defaults` config, then every task is merged with it.
+    This provides a simple way to set default values for all tasks of a kind.
+    The `task-defaults` key can also be specified in a yaml file pointed to by
+    `tasks-from`. In this case it will only apply to tasks defined in the same
+    file.
+
+    Other kind implementations can use a different loader function to
+    produce inputs and hand them to `transform_inputs`.
+    """
+
+    def generate_tasks():
+        defaults = config.get("task-defaults")
+        for name, task in config.get("tasks", {}).items():
+            if defaults:
+                task = merge(defaults, task)
+            task["task-from"] = "kind.yml"
+            yield name, task
+
+        for filename in config.get("tasks-from", []):
+            tasks = load_yaml(path, filename)
+
+            file_defaults = tasks.pop("task-defaults", None)
+            if defaults:
+                file_defaults = merge(defaults, file_defaults or {})
+
+            for name, task in tasks.items():
+                if file_defaults:
+                    task = merge(file_defaults, task)
+                task["task-from"] = filename
+                yield name, task
+
+    for name, task in generate_tasks():
+        task["name"] = name
+        logger.debug(f"Generating tasks for {kind} {name}")
+        yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/main.py b/third_party/python/taskcluster_taskgraph/taskgraph/main.py
new file mode 100644
index 0000000000..88a4e2539b
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/main.py
@@ -0,0 +1,875 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import argparse
+import atexit
+import json
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import traceback
+from collections import namedtuple
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from pathlib import Path
+from textwrap import dedent
+from typing import Any, List
+
+import appdirs
+import yaml
+
+Command = namedtuple("Command", ["func", "args", "kwargs", "defaults"])
+commands = {}
+
+
+def command(*args, **kwargs):
+    defaults = kwargs.pop("defaults", {})
+
+    def decorator(func):
+        commands[args[0]] = Command(func, args, kwargs, defaults)
+        return func
+
+    return decorator
+
+
+def argument(*args, **kwargs):
+    def decorator(func):
+        if not hasattr(func, "args"):
+            func.args = []
+        func.args.append((args, kwargs))
+        return func
+
+    return decorator
+
+
+def format_taskgraph_labels(taskgraph):
+    return "\n".join(
+        sorted(
+            taskgraph.tasks[index].label for index in taskgraph.graph.visit_postorder()
+        )
+    )
+
+
+def format_taskgraph_json(taskgraph):
+    return json.dumps(
+        taskgraph.to_json(), sort_keys=True, indent=2, separators=(",", ": ")
+    )
+
+
+def format_taskgraph_yaml(taskgraph):
+    return yaml.safe_dump(taskgraph.to_json(), default_flow_style=False)
+
+
+def get_filtered_taskgraph(taskgraph, tasksregex, exclude_keys):
+    """
+    Filter all the tasks on basis of a regular expression
+    and returns a new TaskGraph object
+    """
+    from taskgraph.graph import Graph
+    from taskgraph.task import Task
+    from taskgraph.taskgraph import TaskGraph
+
+    if tasksregex:
+        named_links_dict = taskgraph.graph.named_links_dict()
+        filteredtasks = {}
+        filterededges = set()
+        regexprogram = re.compile(tasksregex)
+
+        for key in taskgraph.graph.visit_postorder():
+            task = taskgraph.tasks[key]
+            if regexprogram.match(task.label):
+                filteredtasks[key] = task
+                for depname, dep in named_links_dict[key].items():
+                    if regexprogram.match(dep):
+                        filterededges.add((key, dep, depname))
+
+        taskgraph = TaskGraph(filteredtasks, Graph(set(filteredtasks), filterededges))
+
+    if exclude_keys:
+        for label, task in taskgraph.tasks.items():
+            task = task.to_json()
+            for key in exclude_keys:
+                obj = task
+                attrs = key.split(".")
+                while attrs[0] in obj:
+                    if len(attrs) == 1:
+                        del obj[attrs[0]]
+                        break
+                    obj = obj[attrs[0]]
+                    attrs = attrs[1:]
+            taskgraph.tasks[label] = Task.from_json(task)
+
+    return taskgraph
+
+
+FORMAT_METHODS = {
+    "labels": format_taskgraph_labels,
+    "json": format_taskgraph_json,
+    "yaml": format_taskgraph_yaml,
+}
+
+
+def get_taskgraph_generator(root, parameters):
+    """Helper function to make testing a little easier."""
+    from taskgraph.generator import TaskGraphGenerator
+
+    return TaskGraphGenerator(root_dir=root, parameters=parameters)
+
+
+def format_taskgraph(options, parameters, logfile=None):
+    import taskgraph
+    from taskgraph.parameters import parameters_loader
+
+    if logfile:
+        handler = logging.FileHandler(logfile, mode="w")
+        if logging.root.handlers:
+            oldhandler = logging.root.handlers[-1]
+            logging.root.removeHandler(oldhandler)
+            handler.setFormatter(oldhandler.formatter)
+        logging.root.addHandler(handler)
+
+    if options["fast"]:
+        taskgraph.fast = True
+
+    if isinstance(parameters, str):
+        parameters = parameters_loader(
+            parameters,
+            overrides={"target-kinds": options.get("target_kinds")},
+            strict=False,
+        )
+
+    tgg = get_taskgraph_generator(options.get("root"), parameters)
+
+    tg = getattr(tgg, options["graph_attr"])
+    tg = get_filtered_taskgraph(tg, options["tasks_regex"], options["exclude_keys"])
+    format_method = FORMAT_METHODS[options["format"] or "labels"]
+    return format_method(tg)
+
+
+def dump_output(out, path=None, params_spec=None):
+    from taskgraph.parameters import Parameters
+
+    params_name = Parameters.format_spec(params_spec)
+    fh = None
+    if path:
+        # Substitute params name into file path if necessary
+        if params_spec and "{params}" not in path:
+            name, ext = os.path.splitext(path)
+            name += "_{params}"
+            path = name + ext
+
+        path = path.format(params=params_name)
+        fh = open(path, "w")
+    else:
+        print(
+            f"Dumping result with parameters from {params_name}:",
+            file=sys.stderr,
+        )
+    print(out + "\n", file=fh)
+
+
+def generate_taskgraph(options, parameters, logdir):
+    from taskgraph.parameters import Parameters
+
+    def logfile(spec):
+        """Determine logfile given a parameters specification."""
+        if logdir is None:
+            return None
+        return os.path.join(
+            logdir,
+            "{}_{}.log".format(options["graph_attr"], Parameters.format_spec(spec)),
+        )
+
+    # Don't bother using futures if there's only one parameter. This can make
+    # tracebacks a little more readable and avoids additional process overhead.
+    if len(parameters) == 1:
+        spec = parameters[0]
+        out = format_taskgraph(options, spec, logfile(spec))
+        dump_output(out, options["output_file"])
+        return 0
+
+    futures = {}
+    with ProcessPoolExecutor(max_workers=options["max_workers"]) as executor:
+        for spec in parameters:
+            f = executor.submit(format_taskgraph, options, spec, logfile(spec))
+            futures[f] = spec
+
+    returncode = 0
+    for future in as_completed(futures):
+        output_file = options["output_file"]
+        spec = futures[future]
+        e = future.exception()
+        if e:
+            returncode = 1
+            out = "".join(traceback.format_exception(type(e), e, e.__traceback__))
+            if options["diff"]:
+                # Dump to console so we don't accidentally diff the tracebacks.
+                output_file = None
+        else:
+            out = future.result()
+
+        dump_output(
+            out,
+            path=output_file,
+            params_spec=spec if len(parameters) > 1 else None,
+        )
+
+    return returncode
+
+
+@command(
+    "tasks",
+    help="Show all tasks in the taskgraph.",
+    defaults={"graph_attr": "full_task_set"},
+)
+@command(
+    "full", help="Show the full taskgraph.", defaults={"graph_attr": "full_task_graph"}
+)
+@command(
+    "target",
+    help="Show the set of target tasks.",
+    defaults={"graph_attr": "target_task_set"},
+)
+@command(
+    "target-graph",
+    help="Show the target graph.",
+    defaults={"graph_attr": "target_task_graph"},
+)
+@command(
+    "optimized",
+    help="Show the optimized graph.",
+    defaults={"graph_attr": "optimized_task_graph"},
+)
+@command(
+    "morphed",
+    help="Show the morphed graph.",
+    defaults={"graph_attr": "morphed_task_graph"},
+)
+@argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
+@argument("--quiet", "-q", action="store_true", help="suppress all logging output")
+@argument(
+    "--verbose", "-v", action="store_true", help="include debug-level logging output"
+)
+@argument(
+    "--json",
+    "-J",
+    action="store_const",
+    dest="format",
+    const="json",
+    help="Output task graph as a JSON object",
+)
+@argument(
+    "--yaml",
+    "-Y",
+    action="store_const",
+    dest="format",
+    const="yaml",
+    help="Output task graph as a YAML object",
+)
+@argument(
+    "--labels",
+    "-L",
+    action="store_const",
+    dest="format",
+    const="labels",
+    help="Output the label for each task in the task graph (default)",
+)
+@argument(
+    "--parameters",
+    "-p",
+    default=None,
+    action="append",
+    help="Parameters to use for the generation. Can be a path to file (.yml or "
+    ".json; see `taskcluster/docs/parameters.rst`), a directory (containing "
+    "parameters files), a url, of the form `project=mozilla-central` to download "
+    "latest parameters file for the specified project from CI, or of the form "
+    "`task-id=<decision task id>` to download parameters from the specified "
+    "decision task. Can be specified multiple times, in which case multiple "
+    "generations will happen from the same invocation (one per parameters "
+    "specified).",
+)
+@argument(
+    "--no-optimize",
+    dest="optimize",
+    action="store_false",
+    default="true",
+    help="do not remove tasks from the graph that are found in the "
+    "index (a.k.a. optimize the graph)",
+)
+@argument(
+    "-o",
+    "--output-file",
+    default=None,
+    help="file path to store generated output.",
+)
+@argument(
+    "--tasks-regex",
+    "--tasks",
+    default=None,
+    help="only return tasks with labels matching this regular " "expression.",
+)
+@argument(
+    "--exclude-key",
+    default=None,
+    dest="exclude_keys",
+    action="append",
+    help="Exclude the specified key (using dot notation) from the final result. "
+    "This is mainly useful with '--diff' to filter out expected differences. Can be "
+    "used multiple times.",
+)
+@argument(
+    "-k",
+    "--target-kind",
+    dest="target_kinds",
+    action="append",
+    default=[],
+    help="only return tasks that are of the given kind, or their dependencies.",
+)
+@argument(
+    "-F",
+    "--fast",
+    default=False,
+    action="store_true",
+    help="enable fast task generation for local debugging.",
+)
+@argument(
+    "--diff",
+    const="default",
+    nargs="?",
+    default=None,
+    help="Generate and diff the current taskgraph against another revision. "
+    "Without args the base revision will be used. A revision specifier such as "
+    "the hash or `.~1` (hg) or `HEAD~1` (git) can be used as well.",
+)
+@argument(
+    "-j",
+    "--max-workers",
+    dest="max_workers",
+    default=None,
+    type=int,
+    help="The maximum number of workers to use for parallel operations such as"
+    "when multiple parameters files are passed.",
+)
+def show_taskgraph(options):
+    from taskgraph.parameters import Parameters, parameters_loader
+    from taskgraph.util.vcs import get_repository
+
+    if options.pop("verbose", False):
+        logging.root.setLevel(logging.DEBUG)
+
+    repo = None
+    cur_rev = None
+    diffdir = None
+    output_file = options["output_file"]
+
+    if options["diff"]:
+        repo = get_repository(os.getcwd())
+
+        if not repo.working_directory_clean():
+            print(
+                "abort: can't diff taskgraph with dirty working directory",
+                file=sys.stderr,
+            )
+            return 1
+
+        # We want to return the working directory to the current state
+        # as best we can after we're done. In all known cases, using
+        # branch or bookmark (which are both available on the VCS object)
+        # as `branch` is preferable to a specific revision.
+        cur_rev = repo.branch or repo.head_rev[:12]
+        cur_rev_file = cur_rev.replace("/", "_")
+
+        diffdir = tempfile.mkdtemp()
+        atexit.register(
+            shutil.rmtree, diffdir
+        )  # make sure the directory gets cleaned up
+        options["output_file"] = os.path.join(
+            diffdir, f"{options['graph_attr']}_{cur_rev_file}"
+        )
+        print(f"Generating {options['graph_attr']} @ {cur_rev}", file=sys.stderr)
+
+    parameters: List[Any[str, Parameters]] = options.pop("parameters")
+    if not parameters:
+        overrides = {
+            "target-kinds": options.get("target_kinds"),
+        }
+        parameters = [
+            parameters_loader(None, strict=False, overrides=overrides)
+        ]  # will use default values
+
+    for param in parameters[:]:
+        if isinstance(param, str) and os.path.isdir(param):
+            parameters.remove(param)
+            parameters.extend(
+                [
+                    p.as_posix()
+                    for p in Path(param).iterdir()
+                    if p.suffix in (".yml", ".json")
+                ]
+            )
+
+    logdir = None
+    if len(parameters) > 1:
+        # Log to separate files for each process instead of stderr to
+        # avoid interleaving.
+        basename = os.path.basename(os.getcwd())
+        logdir = os.path.join(appdirs.user_log_dir("taskgraph"), basename)
+        if not os.path.isdir(logdir):
+            os.makedirs(logdir)
+    else:
+        # Only setup logging if we have a single parameter spec. Otherwise
+        # logging will go to files. This is also used as a hook for Gecko
+        # to setup its `mach` based logging.
+        setup_logging()
+
+    ret = generate_taskgraph(options, parameters, logdir)
+
+    if options["diff"]:
+        assert diffdir is not None
+        assert repo is not None
+
+        # Reload taskgraph modules to pick up changes and clear global state.
+        for mod in sys.modules.copy():
+            if mod != __name__ and mod.split(".", 1)[0].endswith("taskgraph"):
+                del sys.modules[mod]
+
+        if options["diff"] == "default":
+            base_rev = repo.base_rev
+        else:
+            base_rev = options["diff"]
+        base_rev_file = base_rev.replace("/", "_")
+
+        try:
+            repo.update(base_rev)
+            base_rev = repo.head_rev[:12]
+            options["output_file"] = os.path.join(
+                diffdir, f"{options['graph_attr']}_{base_rev_file}"
+            )
+            print(f"Generating {options['graph_attr']} @ {base_rev}", file=sys.stderr)
+            ret |= generate_taskgraph(options, parameters, logdir)
+        finally:
+            repo.update(cur_rev)
+
+        # Generate diff(s)
+        diffcmd = [
+            "diff",
+            "-U20",
+            "--report-identical-files",
+            f"--label={options['graph_attr']}@{base_rev}",
+            f"--label={options['graph_attr']}@{cur_rev}",
+        ]
+
+        for spec in parameters:
+            base_path = os.path.join(
+                diffdir, f"{options['graph_attr']}_{base_rev_file}"
+            )
+            cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_rev_file}")
+
+            params_name = None
+            if len(parameters) > 1:
+                params_name = Parameters.format_spec(spec)
+                base_path += f"_{params_name}"
+                cur_path += f"_{params_name}"
+
+            try:
+                proc = subprocess.run(
+                    diffcmd + [base_path, cur_path],
+                    capture_output=True,
+                    text=True,
+                    check=True,
+                )
+                diff_output = proc.stdout
+                returncode = 0
+            except subprocess.CalledProcessError as e:
+                # returncode 1 simply means diffs were found
+                if e.returncode != 1:
+                    print(e.stderr, file=sys.stderr)
+                    raise
+                diff_output = e.output
+                returncode = e.returncode
+
+            dump_output(
+                diff_output,
+                # Don't bother saving file if no diffs were found. Log to
+                # console in this case instead.
+                path=None if returncode == 0 else output_file,
+                params_spec=spec if len(parameters) > 1 else None,
+            )
+
+        if options["format"] != "json":
+            print(
+                "If you were expecting differences in task bodies "
+                'you should pass "-J"\n',
+                file=sys.stderr,
+            )
+
+    if len(parameters) > 1:
+        print(f"See '{logdir}' for logs", file=sys.stderr)
+
+    return ret
+
+
+@command("build-image", help="Build a Docker image")
+@argument("image_name", help="Name of the image to build")
+@argument(
+    "-t", "--tag", help="tag that the image should be built as.", metavar="name:tag"
+)
+@argument(
+    "--context-only",
+    help="File name the context tarball should be written to."
+    "with this option it will only build the context.tar.",
+    metavar="context.tar",
+)
+def build_image(args):
+    from taskgraph.docker import build_context, build_image
+
+    validate_docker()
+    if args["context_only"] is None:
+        build_image(args["image_name"], args["tag"], os.environ)
+    else:
+        build_context(args["image_name"], args["context_only"], os.environ)
+
+
+@command(
+    "load-image",
+    help="Load a pre-built Docker image. Note that you need to "
+    "have docker installed and running for this to work.",
+)
+@argument(
+    "--task-id",
+    help="Load the image at public/image.tar.zst in this task, "
+    "rather than searching the index",
+)
+@argument(
+    "-t",
+    "--tag",
+    help="tag that the image should be loaded as. If not "
+    "image will be loaded with tag from the tarball",
+    metavar="name:tag",
+)
+@argument(
+    "image_name",
+    nargs="?",
+    help="Load the image of this name based on the current "
+    "contents of the tree (as built for mozilla-central "
+    "or mozilla-inbound)",
+)
+def load_image(args):
+    from taskgraph.docker import load_image_by_name, load_image_by_task_id
+
+    if not args.get("image_name") and not args.get("task_id"):
+        print("Specify either IMAGE-NAME or TASK-ID")
+        sys.exit(1)
+    validate_docker()
+    try:
+        if args["task_id"]:
+            ok = load_image_by_task_id(args["task_id"], args.get("tag"))
+        else:
+            ok = load_image_by_name(args["image_name"], args.get("tag"))
+        if not ok:
+            sys.exit(1)
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
+
+
+def validate_docker():
+    p = subprocess.run(["docker", "ps"], capture_output=True)
+    if p.returncode != 0:
+        print("Error connecting to Docker:", p.stderr)
+        sys.exit(1)
+
+
+@command("image-digest", help="Print the digest of a docker image.")
+@argument(
+    "image_name",
+    help="Print the digest of the image of this name based on the current "
+    "contents of the tree.",
+)
+def image_digest(args):
+    from taskgraph.docker import get_image_digest
+
+    try:
+        digest = get_image_digest(args["image_name"])
+        print(digest)
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
+
+
+@command("decision", help="Run the decision task")
+@argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
+@argument(
+    "--message",
+    required=False,
+    help=argparse.SUPPRESS,
+)
+@argument(
+    "--project",
+    required=True,
+    help="Project to use for creating task graph. Example: --project=try",
+)
+@argument("--pushlog-id", dest="pushlog_id", required=True, default="0")
+@argument("--pushdate", dest="pushdate", required=True, type=int, default=0)
+@argument("--owner", required=True, help="email address of who owns this graph")
+@argument("--level", required=True, help="SCM level of this repository")
+@argument(
+    "--target-tasks-method", help="method for selecting the target tasks to generate"
+)
+@argument(
+    "--repository-type",
+    required=True,
+    help='Type of repository, either "hg" or "git"',
+)
+@argument("--base-repository", required=True, help='URL for "base" repository to clone')
+@argument(
+    "--base-ref", default="", help='Reference of the revision in the "base" repository'
+)
+@argument(
+    "--base-rev",
+    default="",
+    help="Taskgraph decides what to do based on the revision range between "
+    "`--base-rev` and `--head-rev`. Value is determined automatically if not provided",
+)
+@argument(
+    "--head-repository",
+    required=True,
+    help='URL for "head" repository to fetch revision from',
+)
+@argument(
+    "--head-ref", required=True, help="Reference (this is same as rev usually for hg)"
+)
+@argument(
+    "--head-rev", required=True, help="Commit revision to use from head repository"
+)
+@argument("--head-tag", help="Tag attached to the revision", default="")
+@argument(
+    "--tasks-for", required=True, help="the tasks_for value used to generate this task"
+)
+@argument("--try-task-config-file", help="path to try task configuration file")
+def decision(options):
+    from taskgraph.decision import taskgraph_decision
+
+    taskgraph_decision(options)
+
+
+@command("action-callback", description="Run action callback used by action tasks")
+@argument(
+    "--root",
+    "-r",
+    default="taskcluster/ci",
+    help="root of the taskgraph definition relative to topsrcdir",
+)
+def action_callback(options):
+    from taskgraph.actions import trigger_action_callback
+    from taskgraph.actions.util import get_parameters
+
+    try:
+        # the target task for this action (or null if it's a group action)
+        task_id = json.loads(os.environ.get("ACTION_TASK_ID", "null"))
+        # the target task group for this action
+        task_group_id = os.environ.get("ACTION_TASK_GROUP_ID", None)
+        input = json.loads(os.environ.get("ACTION_INPUT", "null"))
+        callback = os.environ.get("ACTION_CALLBACK", None)
+        root = options["root"]
+
+        parameters = get_parameters(task_group_id)
+
+        return trigger_action_callback(
+            task_group_id=task_group_id,
+            task_id=task_id,
+            input=input,
+            callback=callback,
+            parameters=parameters,
+            root=root,
+            test=False,
+        )
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
+
+
+@command("test-action-callback", description="Run an action callback in a testing mode")
+@argument(
+    "--root",
+    "-r",
+    default="taskcluster/ci",
+    help="root of the taskgraph definition relative to topsrcdir",
+)
+@argument(
+    "--parameters",
+    "-p",
+    default="",
+    help="parameters file (.yml or .json; see " "`taskcluster/docs/parameters.rst`)`",
+)
+@argument("--task-id", default=None, help="TaskId to which the action applies")
+@argument(
+    "--task-group-id", default=None, help="TaskGroupId to which the action applies"
+)
+@argument("--input", default=None, help="Action input (.yml or .json)")
+@argument("callback", default=None, help="Action callback name (Python function name)")
+def test_action_callback(options):
+    import taskgraph.actions
+    import taskgraph.parameters
+    from taskgraph.config import load_graph_config
+    from taskgraph.util import yaml
+
+    def load_data(filename):
+        with open(filename) as f:
+            if filename.endswith(".yml"):
+                return yaml.load_stream(f)
+            elif filename.endswith(".json"):
+                return json.load(f)
+            else:
+                raise Exception(f"unknown filename {filename}")
+
+    try:
+        task_id = options["task_id"]
+
+        if options["input"]:
+            input = load_data(options["input"])
+        else:
+            input = None
+
+        root = options["root"]
+        graph_config = load_graph_config(root)
+        trust_domain = graph_config["trust-domain"]
+        graph_config.register()
+
+        parameters = taskgraph.parameters.load_parameters_file(
+            options["parameters"], strict=False, trust_domain=trust_domain
+        )
+        parameters.check()
+
+        return taskgraph.actions.trigger_action_callback(
+            task_group_id=options["task_group_id"],
+            task_id=task_id,
+            input=input,
+            callback=options["callback"],
+            parameters=parameters,
+            root=root,
+            test=True,
+        )
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
+
+
+@command(
+    "init", description="Initialize a new Taskgraph setup in a new or existing project."
+)
+@argument(
+    "-f",
+    "--force",
+    action="store_true",
+    default=False,
+    help="Bypass safety checks.",
+)
+@argument(
+    "--prompt",
+    dest="no_input",
+    action="store_false",
+    default=True,
+    help="Prompt for input rather than using default values (advanced).",
+)
+@argument(
+    "--template",
+    default="gh:taskcluster/taskgraph",
+    help=argparse.SUPPRESS,  # used for testing
+)
+def init_taskgraph(options):
+    from cookiecutter.main import cookiecutter
+
+    import taskgraph
+    from taskgraph.util.vcs import get_repository
+
+    repo = get_repository(os.getcwd())
+    root = Path(repo.path)
+
+    # Clean up existing installations if necessary.
+    tc_yml = root.joinpath(".taskcluster.yml")
+    if tc_yml.is_file():
+        if not options["force"]:
+            proceed = input(
+                "A Taskcluster setup already exists in this repository, "
+                "would you like to overwrite it? [y/N]: "
+            ).lower()
+            while proceed not in ("y", "yes", "n", "no"):
+                proceed = input(f"Invalid option '{proceed}'! Try again: ")
+
+            if proceed[0] == "n":
+                sys.exit(1)
+
+        tc_yml.unlink()
+        tg_dir = root.joinpath("taskcluster")
+        if tg_dir.is_dir():
+            shutil.rmtree(tg_dir)
+
+    # Populate some defaults from the current repository.
+    context = {"project_name": root.name}
+
+    try:
+        repo_url = repo.get_url(remote=repo.remote_name)
+    except RuntimeError:
+        repo_url = ""
+
+    if repo.tool == "git" and "github.com" in repo_url:
+        context["repo_host"] = "github"
+    elif repo.tool == "hg" and "hg.mozilla.org" in repo_url:
+        context["repo_host"] = "hgmo"
+    else:
+        print(
+            dedent(
+                """\
+            Repository not supported!
+
+            Taskgraph only supports repositories hosted on Github or hg.mozilla.org.
+            Ensure you have a remote that points to one of these locations.
+            """
+            ),
+            file=sys.stderr,
+        )
+        return 1
+
+    # Generate the project.
+    cookiecutter(
+        options["template"],
+        checkout=taskgraph.__version__,
+        directory="template",
+        extra_context=context,
+        no_input=options["no_input"],
+        output_dir=root.parent,
+        overwrite_if_exists=True,
+    )
+
+
+def create_parser():
+    parser = argparse.ArgumentParser(description="Interact with taskgraph")
+    subparsers = parser.add_subparsers()
+    for _, (func, args, kwargs, defaults) in commands.items():
+        subparser = subparsers.add_parser(*args, **kwargs)
+        for arg in func.args:
+            subparser.add_argument(*arg[0], **arg[1])
+        subparser.set_defaults(command=func, **defaults)
+    return parser
+
+
+def setup_logging():
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
+    )
+
+
+def main(args=sys.argv[1:]):
+    setup_logging()
+    parser = create_parser()
+    args = parser.parse_args(args)
+    try:
+        return args.command(vars(args))
+    except Exception:
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/morph.py b/third_party/python/taskcluster_taskgraph/taskgraph/morph.py
new file mode 100644
index 0000000000..bfa1560270
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/morph.py
@@ -0,0 +1,261 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Graph morphs are modifications to task-graphs that take place *after* the
+optimization phase.
+
+These graph morphs are largely invisible to developers running `./mach`
+locally, so they should be limited to changes that do not modify the meaning of
+the graph.
+"""
+
+# Note that the translation of `{'task-reference': '..'}` and
+# `artifact-reference` are handled in the optimization phase (since
+# optimization involves dealing with taskIds directly).  Similarly,
+# `{'relative-datestamp': '..'}` is handled at the last possible moment during
+# task creation.
+
+
+import logging
+import os
+import re
+
+from slugid import nice as slugid
+
+from .graph import Graph
+from .task import Task
+from .taskgraph import TaskGraph
+from .util.workertypes import get_worker_type
+
+here = os.path.abspath(os.path.dirname(__file__))
+logger = logging.getLogger(__name__)
+MAX_ROUTES = 10
+
+registered_morphs = []
+
+
+def register_morph(func):
+    registered_morphs.append(func)
+
+
+def amend_taskgraph(taskgraph, label_to_taskid, to_add):
+    """Add the given tasks to the taskgraph, returning a new taskgraph"""
+    new_tasks = taskgraph.tasks.copy()
+    new_edges = set(taskgraph.graph.edges)
+    for task in to_add:
+        new_tasks[task.task_id] = task
+        assert task.label not in label_to_taskid
+        label_to_taskid[task.label] = task.task_id
+        for depname, dep in task.dependencies.items():
+            new_edges.add((task.task_id, dep, depname))
+
+    taskgraph = TaskGraph(new_tasks, Graph(set(new_tasks), new_edges))
+    return taskgraph, label_to_taskid
+
+
+def derive_index_task(task, taskgraph, label_to_taskid, parameters, graph_config):
+    """Create the shell of a task that depends on `task` and on the given docker
+    image."""
+    purpose = "index-task"
+    label = f"{purpose}-{task.label}"
+    provisioner_id, worker_type = get_worker_type(
+        graph_config, "misc", parameters["level"]
+    )
+
+    task_def = {
+        "provisionerId": provisioner_id,
+        "workerType": worker_type,
+        "dependencies": [task.task_id],
+        "created": {"relative-datestamp": "0 seconds"},
+        "deadline": task.task["deadline"],
+        # no point existing past the parent task's deadline
+        "expires": task.task["deadline"],
+        "metadata": {
+            "name": label,
+            "description": "{} for {}".format(
+                purpose, task.task["metadata"]["description"]
+            ),
+            "owner": task.task["metadata"]["owner"],
+            "source": task.task["metadata"]["source"],
+        },
+        "scopes": [],
+        "payload": {
+            "image": {
+                "path": "public/image.tar.zst",
+                "namespace": "taskgraph.cache.level-3.docker-images.v2.index-task.latest",
+                "type": "indexed-image",
+            },
+            "features": {
+                "taskclusterProxy": True,
+            },
+            "maxRunTime": 600,
+        },
+    }
+
+    # only include the docker-image dependency here if it is actually in the
+    # taskgraph (has not been optimized).  It is included in
+    # task_def['dependencies'] unconditionally.
+    dependencies = {"parent": task.task_id}
+
+    task = Task(
+        kind="misc",
+        label=label,
+        attributes={},
+        task=task_def,
+        dependencies=dependencies,
+    )
+    task.task_id = slugid()
+    return task, taskgraph, label_to_taskid
+
+
+def make_index_task(parent_task, taskgraph, label_to_taskid, parameters, graph_config):
+    index_paths = [
+        r.split(".", 1)[1] for r in parent_task.task["routes"] if r.startswith("index.")
+    ]
+    parent_task.task["routes"] = [
+        r for r in parent_task.task["routes"] if not r.startswith("index.")
+    ]
+
+    task, taskgraph, label_to_taskid = derive_index_task(
+        parent_task, taskgraph, label_to_taskid, parameters, graph_config
+    )
+
+    # we need to "summarize" the scopes, otherwise a particularly
+    # namespace-heavy index task might have more scopes than can fit in a
+    # temporary credential.
+    scopes = set()
+    domain_index_regex = re.compile(
+        r"({trust_domain}\.v2\.[^.]*\.).*".format(
+            trust_domain=re.escape(graph_config["trust-domain"])
+        )
+    )
+    index_path_res = [domain_index_regex]
+    for path in graph_config["taskgraph"].get("index-path-regexes", ()):
+        index_path_res.append(re.compile(path))
+    for path in index_paths:
+        for index_path_re in index_path_res:
+            match = index_path_re.match(path)
+            if match:
+                path = match.group(1) + "*"
+                break
+        scope = f"index:insert-task:{path}"
+        scopes.add(scope)
+    task.task["scopes"] = sorted(scopes)
+
+    task.task["payload"]["command"] = ["insert-indexes.js"] + index_paths
+    task.task["payload"]["env"] = {
+        "TARGET_TASKID": parent_task.task_id,
+        "INDEX_RANK": parent_task.task.get("extra", {}).get("index", {}).get("rank", 0),
+    }
+    return task, taskgraph, label_to_taskid
+
+
+@register_morph
+def add_index_tasks(taskgraph, label_to_taskid, parameters, graph_config):
+    """
+    The TaskCluster queue only allows 10 routes on a task, but we have tasks
+    with many more routes, for purposes of indexing. This graph morph adds
+    "index tasks" that depend on such tasks and do the index insertions
+    directly, avoiding the limits on task.routes.
+    """
+    logger.debug("Morphing: adding index tasks")
+
+    added = []
+    for label, task in taskgraph.tasks.items():
+        if len(task.task.get("routes", [])) <= MAX_ROUTES:
+            continue
+        task, taskgraph, label_to_taskid = make_index_task(
+            task, taskgraph, label_to_taskid, parameters, graph_config
+        )
+        added.append(task)
+
+    if added:
+        taskgraph, label_to_taskid = amend_taskgraph(taskgraph, label_to_taskid, added)
+        logger.info(f"Added {len(added)} index tasks")
+
+    return taskgraph, label_to_taskid
+
+
+def _get_morph_url():
+    """
+    Guess a URL for the current file, for source metadata for created tasks.
+
+    If we checked out the taskgraph code with run-task in the decision task,
+    we can use TASKGRAPH_* to find the right version, which covers the
+    existing use case.
+    """
+    taskgraph_repo = os.environ.get(
+        "TASKGRAPH_HEAD_REPOSITORY", "https://github.com/taskcluster/taskgraph"
+    )
+    taskgraph_rev = os.environ.get("TASKGRAPH_HEAD_REV", "default")
+    return f"{taskgraph_repo}/raw-file/{taskgraph_rev}/src/taskgraph/morph.py"
+
+
+@register_morph
+def add_code_review_task(taskgraph, label_to_taskid, parameters, graph_config):
+    logger.debug("Morphing: adding code review task")
+
+    review_config = parameters.get("code-review")
+    if not review_config:
+        return taskgraph, label_to_taskid
+
+    code_review_tasks = {}
+    for label, task in taskgraph.tasks.items():
+        if task.attributes.get("code-review"):
+            code_review_tasks[task.label] = task.task_id
+
+    if code_review_tasks:
+        code_review_task_def = {
+            "provisionerId": "built-in",
+            "workerType": "succeed",
+            "dependencies": sorted(code_review_tasks.values()),
+            # This option permits to run the task
+            # regardless of the dependencies tasks exit status
+            # as we are interested in the task failures
+            "requires": "all-resolved",
+            "created": {"relative-datestamp": "0 seconds"},
+            "deadline": {"relative-datestamp": "1 day"},
+            # no point existing past the parent task's deadline
+            "expires": {"relative-datestamp": "1 day"},
+            "metadata": {
+                "name": "code-review",
+                "description": "List all issues found in static analysis and linting tasks",
+                "owner": parameters["owner"],
+                "source": _get_morph_url(),
+            },
+            "scopes": [],
+            "payload": {},
+            "routes": ["project.relman.codereview.v1.try_ending"],
+            "extra": {
+                "code-review": {
+                    "phabricator-build-target": review_config[
+                        "phabricator-build-target"
+                    ],
+                    "repository": parameters["head_repository"],
+                    "revision": parameters["head_rev"],
+                }
+            },
+        }
+        task = Task(
+            kind="misc",
+            label="code-review",
+            attributes={},
+            task=code_review_task_def,
+            dependencies=code_review_tasks,
+        )
+        task.task_id = slugid()
+        taskgraph, label_to_taskid = amend_taskgraph(taskgraph, label_to_taskid, [task])
+        logger.info("Added code review task.")
+
+    return taskgraph, label_to_taskid
+
+
+def morph(taskgraph, label_to_taskid, parameters, graph_config):
+    """Apply all morphs"""
+    for m in registered_morphs:
+        taskgraph, label_to_taskid = m(
+            taskgraph, label_to_taskid, parameters, graph_config
+        )
+    return taskgraph, label_to_taskid
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/__init__.py
new file mode 100644
index 0000000000..06287d877d
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/__init__.py
@@ -0,0 +1,8 @@
+from .base import (  # noqa: F401
+    Alias,
+    All,
+    Any,
+    Not,
+    OptimizationStrategy,
+    register_strategy,
+)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py
new file mode 100644
index 0000000000..367b94e1de
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py
@@ -0,0 +1,551 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+The objective of optimization is to remove as many tasks from the graph as
+possible, as efficiently as possible, thereby delivering useful results as
+quickly as possible.  For example, ideally if only a test script is modified in
+a push, then the resulting graph contains only the corresponding test suite
+task.
+
+See ``taskcluster/docs/optimization.rst`` for more information.
+"""
+
+import datetime
+import logging
+from abc import ABCMeta, abstractmethod, abstractproperty
+from collections import defaultdict
+
+from slugid import nice as slugid
+
+from taskgraph.graph import Graph
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.parameterization import resolve_task_references, resolve_timestamps
+from taskgraph.util.python_path import import_sibling_modules
+
+logger = logging.getLogger(__name__)
+registry = {}
+
+
+def register_strategy(name, args=()):
+    def wrap(cls):
+        if name not in registry:
+            registry[name] = cls(*args)
+            if not hasattr(registry[name], "description"):
+                registry[name].description = name
+        return cls
+
+    return wrap
+
+
+def optimize_task_graph(
+    target_task_graph,
+    requested_tasks,
+    params,
+    do_not_optimize,
+    decision_task_id,
+    existing_tasks=None,
+    strategy_override=None,
+):
+    """
+    Perform task optimization, returning a taskgraph and a map from label to
+    assigned taskId, including replacement tasks.
+    """
+    label_to_taskid = {}
+    if not existing_tasks:
+        existing_tasks = {}
+
+    # instantiate the strategies for this optimization process
+    strategies = registry.copy()
+    if strategy_override:
+        strategies.update(strategy_override)
+
+    optimizations = _get_optimizations(target_task_graph, strategies)
+
+    removed_tasks = remove_tasks(
+        target_task_graph=target_task_graph,
+        requested_tasks=requested_tasks,
+        optimizations=optimizations,
+        params=params,
+        do_not_optimize=do_not_optimize,
+    )
+
+    replaced_tasks = replace_tasks(
+        target_task_graph=target_task_graph,
+        optimizations=optimizations,
+        params=params,
+        do_not_optimize=do_not_optimize,
+        label_to_taskid=label_to_taskid,
+        existing_tasks=existing_tasks,
+        removed_tasks=removed_tasks,
+    )
+
+    return (
+        get_subgraph(
+            target_task_graph,
+            removed_tasks,
+            replaced_tasks,
+            label_to_taskid,
+            decision_task_id,
+        ),
+        label_to_taskid,
+    )
+
+
+def _get_optimizations(target_task_graph, strategies):
+    def optimizations(label):
+        task = target_task_graph.tasks[label]
+        if task.optimization:
+            opt_by, arg = list(task.optimization.items())[0]
+            strategy = strategies[opt_by]
+            if hasattr(strategy, "description"):
+                opt_by += f" ({strategy.description})"
+            return (opt_by, strategy, arg)
+        else:
+            return ("never", strategies["never"], None)
+
+    return optimizations
+
+
+def _log_optimization(verb, opt_counts, opt_reasons=None):
+    if opt_reasons:
+        message = "optimize: {label} {action} because of {reason}"
+        for label, (action, reason) in opt_reasons.items():
+            logger.debug(message.format(label=label, action=action, reason=reason))
+
+    if opt_counts:
+        logger.info(
+            f"{verb.title()} "
+            + ", ".join(f"{c} tasks by {b}" for b, c in sorted(opt_counts.items()))
+            + " during optimization."
+        )
+    else:
+        logger.info(f"No tasks {verb} during optimization")
+
+
+def remove_tasks(
+    target_task_graph, requested_tasks, params, optimizations, do_not_optimize
+):
+    """
+    Implement the "Removing Tasks" phase, returning a set of task labels of all removed tasks.
+    """
+    opt_counts = defaultdict(int)
+    opt_reasons = {}
+    removed = set()
+    dependents_of = target_task_graph.graph.reverse_links_dict()
+    tasks = target_task_graph.tasks
+    prune_candidates = set()
+
+    # Traverse graph so dependents (child nodes) are guaranteed to be processed
+    # first.
+    for label in target_task_graph.graph.visit_preorder():
+        # Dependents that can be pruned away (shouldn't cause this task to run).
+        # Only dependents that either:
+        #   A) Explicitly reference this task in their 'if_dependencies' list, or
+        #   B) Don't have an 'if_dependencies' attribute (i.e are in 'prune_candidates'
+        #      because they should be removed but have prune_deps themselves)
+        # should be considered.
+        prune_deps = {
+            l
+            for l in dependents_of[label]
+            if l in prune_candidates
+            if not tasks[l].if_dependencies or label in tasks[l].if_dependencies
+        }
+
+        def _keep(reason):
+            """Mark a task as being kept in the graph. Also recursively removes
+            any dependents from `prune_candidates`, assuming they should be
+            kept because of this task.
+            """
+            opt_reasons[label] = ("kept", reason)
+
+            # Removes dependents that were in 'prune_candidates' from a task
+            # that ended up being kept (and therefore the dependents should
+            # also be kept).
+            queue = list(prune_deps)
+            while queue:
+                l = queue.pop()
+
+                # If l is a prune_dep of multiple tasks it could be queued up
+                # multiple times. Guard against it being already removed.
+                if l not in prune_candidates:
+                    continue
+
+                # If a task doesn't set 'if_dependencies' itself (rather it was
+                # added to 'prune_candidates' due to one of its depenendents),
+                # then we shouldn't remove it.
+                if not tasks[l].if_dependencies:
+                    continue
+
+                prune_candidates.remove(l)
+                queue.extend([r for r in dependents_of[l] if r in prune_candidates])
+
+        def _remove(reason):
+            """Potentially mark a task as being removed from the graph. If the
+            task has dependents that can be pruned, add this task to
+            `prune_candidates` rather than removing it.
+            """
+            if prune_deps:
+                # If there are prune_deps, unsure if we can remove this task yet.
+                prune_candidates.add(label)
+            else:
+                opt_reasons[label] = ("removed", reason)
+                opt_counts[reason] += 1
+                removed.add(label)
+
+        # if we're not allowed to optimize, that's easy..
+        if label in do_not_optimize:
+            _keep("do not optimize")
+            continue
+
+        # If there are remaining tasks depending on this one, do not remove.
+        if any(
+            l for l in dependents_of[label] if l not in removed and l not in prune_deps
+        ):
+            _keep("dependent tasks")
+            continue
+
+        # Some tasks in the task graph only exist because they were required
+        # by a task that has just been optimized away. They can now be removed.
+        if label not in requested_tasks:
+            _remove("dependents optimized")
+            continue
+
+        # Call the optimization strategy.
+        task = tasks[label]
+        opt_by, opt, arg = optimizations(label)
+        if opt.should_remove_task(task, params, arg):
+            _remove(opt_by)
+            continue
+
+        # Some tasks should only run if their dependency was also run. Since we
+        # haven't processed dependencies yet, we add them to a list of
+        # candidate tasks for pruning.
+        if task.if_dependencies:
+            opt_reasons[label] = ("kept", opt_by)
+            prune_candidates.add(label)
+        else:
+            _keep(opt_by)
+
+    if prune_candidates:
+        reason = "if-dependencies pruning"
+        for label in prune_candidates:
+            # There's an edge case where a triangle graph can cause a
+            # dependency to stay in 'prune_candidates' when the dependent
+            # remains. Do a final check to ensure we don't create any bad
+            # edges.
+            dependents = any(
+                d
+                for d in dependents_of[label]
+                if d not in prune_candidates
+                if d not in removed
+            )
+            if dependents:
+                opt_reasons[label] = ("kept", "dependent tasks")
+                continue
+            removed.add(label)
+            opt_counts[reason] += 1
+            opt_reasons[label] = ("removed", reason)
+
+    _log_optimization("removed", opt_counts, opt_reasons)
+    return removed
+
+
+def replace_tasks(
+    target_task_graph,
+    params,
+    optimizations,
+    do_not_optimize,
+    label_to_taskid,
+    removed_tasks,
+    existing_tasks,
+):
+    """
+    Implement the "Replacing Tasks" phase, returning a set of task labels of
+    all replaced tasks. The replacement taskIds are added to label_to_taskid as
+    a side-effect.
+    """
+    opt_counts = defaultdict(int)
+    replaced = set()
+    dependents_of = target_task_graph.graph.reverse_links_dict()
+    dependencies_of = target_task_graph.graph.links_dict()
+
+    for label in target_task_graph.graph.visit_postorder():
+        # if we're not allowed to optimize, that's easy..
+        if label in do_not_optimize:
+            continue
+
+        # if this task depends on un-replaced, un-removed tasks, do not replace
+        if any(
+            l not in replaced and l not in removed_tasks for l in dependencies_of[label]
+        ):
+            continue
+
+        # if the task already exists, that's an easy replacement
+        repl = existing_tasks.get(label)
+        if repl:
+            label_to_taskid[label] = repl
+            replaced.add(label)
+            opt_counts["existing_tasks"] += 1
+            continue
+
+        # call the optimization strategy
+        task = target_task_graph.tasks[label]
+        opt_by, opt, arg = optimizations(label)
+
+        # compute latest deadline of dependents (if any)
+        dependents = [target_task_graph.tasks[l] for l in dependents_of[label]]
+        deadline = None
+        if dependents:
+            now = datetime.datetime.utcnow()
+            deadline = max(
+                resolve_timestamps(now, task.task["deadline"]) for task in dependents
+            )
+        repl = opt.should_replace_task(task, params, deadline, arg)
+        if repl:
+            if repl is True:
+                # True means remove this task; get_subgraph will catch any
+                # problems with removed tasks being depended on
+                removed_tasks.add(label)
+            else:
+                label_to_taskid[label] = repl
+                replaced.add(label)
+            opt_counts[opt_by] += 1
+            continue
+
+    _log_optimization("replaced", opt_counts)
+    return replaced
+
+
+def get_subgraph(
+    target_task_graph,
+    removed_tasks,
+    replaced_tasks,
+    label_to_taskid,
+    decision_task_id,
+):
+    """
+    Return the subgraph of target_task_graph consisting only of
+    non-optimized tasks and edges between them.
+
+    To avoid losing track of taskIds for tasks optimized away, this method
+    simultaneously substitutes real taskIds for task labels in the graph, and
+    populates each task definition's `dependencies` key with the appropriate
+    taskIds.  Task references are resolved in the process.
+    """
+
+    # check for any dependency edges from included to removed tasks
+    bad_edges = [
+        (l, r, n)
+        for l, r, n in target_task_graph.graph.edges
+        if l not in removed_tasks and r in removed_tasks
+    ]
+    if bad_edges:
+        probs = ", ".join(
+            f"{l} depends on {r} as {n} but it has been removed"
+            for l, r, n in bad_edges
+        )
+        raise Exception("Optimization error: " + probs)
+
+    # fill in label_to_taskid for anything not removed or replaced
+    assert replaced_tasks <= set(label_to_taskid)
+    for label in sorted(
+        target_task_graph.graph.nodes - removed_tasks - set(label_to_taskid)
+    ):
+        label_to_taskid[label] = slugid()
+
+    # resolve labels to taskIds and populate task['dependencies']
+    tasks_by_taskid = {}
+    named_links_dict = target_task_graph.graph.named_links_dict()
+    omit = removed_tasks | replaced_tasks
+    for label, task in target_task_graph.tasks.items():
+        if label in omit:
+            continue
+        task.task_id = label_to_taskid[label]
+        named_task_dependencies = {
+            name: label_to_taskid[label]
+            for name, label in named_links_dict.get(label, {}).items()
+        }
+
+        # Add remaining soft dependencies
+        if task.soft_dependencies:
+            named_task_dependencies.update(
+                {
+                    label: label_to_taskid[label]
+                    for label in task.soft_dependencies
+                    if label in label_to_taskid and label not in omit
+                }
+            )
+
+        task.task = resolve_task_references(
+            task.label,
+            task.task,
+            task_id=task.task_id,
+            decision_task_id=decision_task_id,
+            dependencies=named_task_dependencies,
+        )
+        deps = task.task.setdefault("dependencies", [])
+        deps.extend(sorted(named_task_dependencies.values()))
+        tasks_by_taskid[task.task_id] = task
+
+    # resolve edges to taskIds
+    edges_by_taskid = (
+        (label_to_taskid.get(left), label_to_taskid.get(right), name)
+        for (left, right, name) in target_task_graph.graph.edges
+    )
+    # ..and drop edges that are no longer entirely in the task graph
+    #   (note that this omits edges to replaced tasks, but they are still in task.dependnecies)
+    edges_by_taskid = {
+        (left, right, name)
+        for (left, right, name) in edges_by_taskid
+        if left in tasks_by_taskid and right in tasks_by_taskid
+    }
+
+    return TaskGraph(tasks_by_taskid, Graph(set(tasks_by_taskid), edges_by_taskid))
+
+
+@register_strategy("never")
+class OptimizationStrategy:
+    def should_remove_task(self, task, params, arg):
+        """Determine whether to optimize this task by removing it.  Returns
+        True to remove."""
+        return False
+
+    def should_replace_task(self, task, params, deadline, arg):
+        """Determine whether to optimize this task by replacing it.  Returns a
+        taskId to replace this task, True to replace with nothing, or False to
+        keep the task."""
+        return False
+
+
+@register_strategy("always")
+class Always(OptimizationStrategy):
+    def should_remove_task(self, task, params, arg):
+        return True
+
+
+class CompositeStrategy(OptimizationStrategy, metaclass=ABCMeta):
+    def __init__(self, *substrategies, **kwargs):
+        self.substrategies = []
+        missing = set()
+        for sub in substrategies:
+            if isinstance(sub, str):
+                if sub not in registry.keys():
+                    missing.add(sub)
+                    continue
+                sub = registry[sub]
+
+            self.substrategies.append(sub)
+
+        if missing:
+            raise TypeError(
+                "substrategies aren't registered: {}".format(
+                    ",  ".join(sorted(missing))
+                )
+            )
+
+        self.split_args = kwargs.pop("split_args", None)
+        if not self.split_args:
+            self.split_args = lambda arg, substrategies: [arg] * len(substrategies)
+        if kwargs:
+            raise TypeError("unexpected keyword args")
+
+    @abstractproperty
+    def description(self):
+        """A textual description of the combined substrategies."""
+
+    @abstractmethod
+    def reduce(self, results):
+        """Given all substrategy results as a generator, return the overall
+        result."""
+
+    def _generate_results(self, fname, *args):
+        *passthru, arg = args
+        for sub, arg in zip(
+            self.substrategies, self.split_args(arg, self.substrategies)
+        ):
+            yield getattr(sub, fname)(*passthru, arg)
+
+    def should_remove_task(self, *args):
+        results = self._generate_results("should_remove_task", *args)
+        return self.reduce(results)
+
+    def should_replace_task(self, *args):
+        results = self._generate_results("should_replace_task", *args)
+        return self.reduce(results)
+
+
+class Any(CompositeStrategy):
+    """Given one or more optimization strategies, remove or replace a task if any of them
+    says to.
+
+    Replacement will use the value returned by the first strategy that says to replace.
+    """
+
+    @property
+    def description(self):
+        return "-or-".join([s.description for s in self.substrategies])
+
+    @classmethod
+    def reduce(cls, results):
+        for rv in results:
+            if rv:
+                return rv
+        return False
+
+
+class All(CompositeStrategy):
+    """Given one or more optimization strategies, remove or replace a task if all of them
+    says to.
+
+    Replacement will use the value returned by the first strategy passed in.
+    Note the values used for replacement need not be the same, as long as they
+    all say to replace.
+    """
+
+    @property
+    def description(self):
+        return "-and-".join([s.description for s in self.substrategies])
+
+    @classmethod
+    def reduce(cls, results):
+        for rv in results:
+            if not rv:
+                return rv
+        return True
+
+
+class Alias(CompositeStrategy):
+    """Provides an alias to an existing strategy.
+
+    This can be useful to swap strategies in and out without needing to modify
+    the task transforms.
+    """
+
+    def __init__(self, strategy):
+        super().__init__(strategy)
+
+    @property
+    def description(self):
+        return self.substrategies[0].description
+
+    def reduce(self, results):
+        return next(results)
+
+
+class Not(CompositeStrategy):
+    """Given a strategy, returns the opposite."""
+
+    def __init__(self, strategy):
+        super().__init__(strategy)
+
+    @property
+    def description(self):
+        return "not-" + self.substrategies[0].description
+
+    def reduce(self, results):
+        return not next(results)
+
+
+# Trigger registration in sibling modules.
+import_sibling_modules()
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py
new file mode 100644
index 0000000000..973b550632
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py
@@ -0,0 +1,64 @@
+import logging
+from datetime import datetime
+
+from taskgraph import files_changed
+from taskgraph.optimize.base import OptimizationStrategy, register_strategy
+from taskgraph.util.taskcluster import find_task_id, status_task
+
+logger = logging.getLogger(__name__)
+
+
+@register_strategy("index-search")
+class IndexSearch(OptimizationStrategy):
+    # A task with no dependencies remaining after optimization will be replaced
+    # if artifacts exist for the corresponding index_paths.
+    # Otherwise, we're in one of the following cases:
+    # - the task has un-optimized dependencies
+    # - the artifacts have expired
+    # - some changes altered the index_paths and new artifacts need to be
+    # created.
+    # In every of those cases, we need to run the task to create or refresh
+    # artifacts.
+
+    fmt = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+    def should_replace_task(self, task, params, deadline, index_paths):
+        "Look for a task with one of the given index paths"
+        for index_path in index_paths:
+            try:
+                task_id = find_task_id(index_path)
+                status = status_task(task_id)
+                # status can be `None` if we're in `testing` mode
+                # (e.g. test-action-callback)
+                if not status or status.get("state") in ("exception", "failed"):
+                    continue
+
+                if deadline and datetime.strptime(
+                    status["expires"], self.fmt
+                ) < datetime.strptime(deadline, self.fmt):
+                    continue
+
+                return task_id
+            except KeyError:
+                # 404 will end up here and go on to the next index path
+                pass
+
+        return False
+
+
+@register_strategy("skip-unless-changed")
+class SkipUnlessChanged(OptimizationStrategy):
+    def should_remove_task(self, task, params, file_patterns):
+        # pushlog_id == -1 - this is the case when run from a cron.yml job or on a git repository
+        if params.get("repository_type") == "hg" and params.get("pushlog_id") == -1:
+            return False
+
+        changed = files_changed.check(params, file_patterns)
+        if not changed:
+            logger.debug(
+                'no files found matching a pattern in `skip-unless-changed` for "{}"'.format(
+                    task.label
+                )
+            )
+            return True
+        return False
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py
new file mode 100644
index 0000000000..48571d97ad
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py
@@ -0,0 +1,376 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import gzip
+import hashlib
+import json
+import os
+import time
+from datetime import datetime
+from io import BytesIO
+from pprint import pformat
+from subprocess import CalledProcessError
+from unittest.mock import Mock
+from urllib.parse import urlparse
+from urllib.request import urlopen
+
+import mozilla_repo_urls
+from voluptuous import ALLOW_EXTRA, Any, Optional, Required, Schema
+
+from taskgraph.util import yaml
+from taskgraph.util.readonlydict import ReadOnlyDict
+from taskgraph.util.schema import validate_schema
+from taskgraph.util.taskcluster import find_task_id, get_artifact_url
+from taskgraph.util.vcs import get_repository
+
+
+class ParameterMismatch(Exception):
+    """Raised when a parameters.yml has extra or missing parameters."""
+
+
+# Please keep this list sorted and in sync with docs/reference/parameters.rst
+base_schema = Schema(
+    {
+        Required("base_repository"): str,
+        Required("base_ref"): str,
+        Required("base_rev"): str,
+        Required("build_date"): int,
+        Required("build_number"): int,
+        Required("do_not_optimize"): [str],
+        Required("enable_always_target"): Any(bool, [str]),
+        Required("existing_tasks"): {str: str},
+        Required("filters"): [str],
+        Required("head_ref"): str,
+        Required("head_repository"): str,
+        Required("head_rev"): str,
+        Required("head_tag"): str,
+        Required("level"): str,
+        Required("moz_build_date"): str,
+        Required("next_version"): Any(str, None),
+        Required("optimize_strategies"): Any(str, None),
+        Required("optimize_target_tasks"): bool,
+        Required("owner"): str,
+        Required("project"): str,
+        Required("pushdate"): int,
+        Required("pushlog_id"): str,
+        Required("repository_type"): str,
+        # target-kinds is not included, since it should never be
+        # used at run-time
+        Required("target_tasks_method"): str,
+        Required("tasks_for"): str,
+        Required("version"): Any(str, None),
+        Optional("code-review"): {
+            Required("phabricator-build-target"): str,
+        },
+    }
+)
+
+
+def get_contents(path):
+    with open(path) as fh:
+        contents = fh.readline().rstrip()
+    return contents
+
+
+def get_version(repo_path):
+    version_path = os.path.join(repo_path, "version.txt")
+    return get_contents(version_path) if os.path.isfile(version_path) else None
+
+
+def _get_defaults(repo_root=None):
+    repo_path = repo_root or os.getcwd()
+    try:
+        repo = get_repository(repo_path)
+    except RuntimeError:
+        # Use fake values if no repo is detected.
+        repo = Mock(branch="", head_rev="", tool="git")
+        repo.get_url.return_value = ""
+
+    try:
+        repo_url = repo.get_url()
+        parsed_url = mozilla_repo_urls.parse(repo_url)
+        project = parsed_url.repo_name
+    except (
+        CalledProcessError,
+        mozilla_repo_urls.errors.InvalidRepoUrlError,
+        mozilla_repo_urls.errors.UnsupportedPlatformError,
+    ):
+        repo_url = ""
+        project = ""
+
+    return {
+        "base_repository": repo_url,
+        "base_ref": "",
+        "base_rev": "",
+        "build_date": int(time.time()),
+        "build_number": 1,
+        "do_not_optimize": [],
+        "enable_always_target": True,
+        "existing_tasks": {},
+        "filters": ["target_tasks_method"],
+        "head_ref": repo.branch or repo.head_rev,
+        "head_repository": repo_url,
+        "head_rev": repo.head_rev,
+        "head_tag": "",
+        "level": "3",
+        "moz_build_date": datetime.now().strftime("%Y%m%d%H%M%S"),
+        "next_version": None,
+        "optimize_strategies": None,
+        "optimize_target_tasks": True,
+        "owner": "nobody@mozilla.com",
+        "project": project,
+        "pushdate": int(time.time()),
+        "pushlog_id": "0",
+        "repository_type": repo.tool,
+        "target_tasks_method": "default",
+        "tasks_for": "",
+        "version": get_version(repo_path),
+    }
+
+
+defaults_functions = [_get_defaults]
+
+
+def extend_parameters_schema(schema, defaults_fn=None):
+    """
+    Extend the schema for parameters to include per-project configuration.
+
+    This should be called by the `taskgraph.register` function in the
+    graph-configuration.
+
+    Args:
+        schema (Schema): The voluptuous.Schema object used to describe extended
+            parameters.
+        defaults_fn (function): A function which takes no arguments and returns a
+            dict mapping parameter name to default value in the
+            event strict=False (optional).
+    """
+    global base_schema
+    global defaults_functions
+    base_schema = base_schema.extend(schema)
+    if defaults_fn:
+        defaults_functions.append(defaults_fn)
+
+
+class Parameters(ReadOnlyDict):
+    """An immutable dictionary with nicer KeyError messages on failure"""
+
+    def __init__(self, strict=True, repo_root=None, **kwargs):
+        self.strict = strict
+        self.spec = kwargs.pop("spec", None)
+        self._id = None
+
+        if not self.strict:
+            # apply defaults to missing parameters
+            kwargs = Parameters._fill_defaults(repo_root=repo_root, **kwargs)
+
+        ReadOnlyDict.__init__(self, **kwargs)
+
+    @property
+    def id(self):
+        if not self._id:
+            self._id = hashlib.sha256(
+                json.dumps(self, sort_keys=True).encode("utf-8")
+            ).hexdigest()[:12]
+
+        return self._id
+
+    @staticmethod
+    def format_spec(spec):
+        """
+        Get a friendly identifier from a parameters specifier.
+
+        Args:
+            spec (str): Parameters specifier.
+
+        Returns:
+            str: Name to identify parameters by.
+        """
+        if spec is None:
+            return "defaults"
+
+        if any(spec.startswith(s) for s in ("task-id=", "project=")):
+            return spec
+
+        result = urlparse(spec)
+        if result.scheme in ("http", "https"):
+            spec = result.path
+
+        return os.path.splitext(os.path.basename(spec))[0]
+
+    @staticmethod
+    def _fill_defaults(repo_root=None, **kwargs):
+        defaults = {}
+        for fn in defaults_functions:
+            defaults.update(fn(repo_root))
+
+        for name, default in defaults.items():
+            if name not in kwargs:
+                kwargs[name] = default
+        return kwargs
+
+    def check(self):
+        schema = (
+            base_schema if self.strict else base_schema.extend({}, extra=ALLOW_EXTRA)
+        )
+        try:
+            validate_schema(schema, self.copy(), "Invalid parameters:")
+        except Exception as e:
+            raise ParameterMismatch(str(e))
+
+    def __getitem__(self, k):
+        try:
+            return super().__getitem__(k)
+        except KeyError:
+            raise KeyError(f"taskgraph parameter {k!r} not found")
+
+    def is_try(self):
+        """
+        Determine whether this graph is being built on a try project or for
+        `mach try fuzzy`.
+        """
+        return "try" in self["project"] or self["tasks_for"] == "github-pull-request"
+
+    @property
+    def moz_build_date(self):
+        # XXX self["moz_build_date"] is left as a string because:
+        #  * of backward compatibility
+        #  * parameters are output in a YAML file
+        return datetime.strptime(self["moz_build_date"], "%Y%m%d%H%M%S")
+
+    def file_url(self, path, pretty=False):
+        """
+        Determine the VCS URL for viewing a file in the tree, suitable for
+        viewing by a human.
+
+        :param str path: The path, relative to the root of the repository.
+        :param bool pretty: Whether to return a link to a formatted version of the
+            file, or the raw file version.
+
+        :return str: The URL displaying the given path.
+        """
+        if self["repository_type"] == "hg":
+            if path.startswith("comm/"):
+                path = path[len("comm/") :]
+                repo = self["comm_head_repository"]
+                rev = self["comm_head_rev"]
+            else:
+                repo = self["head_repository"]
+                rev = self["head_rev"]
+            endpoint = "file" if pretty else "raw-file"
+            return f"{repo}/{endpoint}/{rev}/{path}"
+        elif self["repository_type"] == "git":
+            # For getting the file URL for git repositories, we only support a Github HTTPS remote
+            repo = self["head_repository"]
+            if repo.startswith("https://github.com/"):
+                if repo.endswith("/"):
+                    repo = repo[:-1]
+
+                rev = self["head_rev"]
+                endpoint = "blob" if pretty else "raw"
+                return f"{repo}/{endpoint}/{rev}/{path}"
+            elif repo.startswith("git@github.com:"):
+                if repo.endswith(".git"):
+                    repo = repo[:-4]
+                rev = self["head_rev"]
+                endpoint = "blob" if pretty else "raw"
+                return "{}/{}/{}/{}".format(
+                    repo.replace("git@github.com:", "https://github.com/"),
+                    endpoint,
+                    rev,
+                    path,
+                )
+            else:
+                raise ParameterMismatch(
+                    "Don't know how to determine file URL for non-github"
+                    "repo: {}".format(repo)
+                )
+        else:
+            raise RuntimeError(
+                'Only the "git" and "hg" repository types are supported for using file_url()'
+            )
+
+    def __str__(self):
+        return f"Parameters(id={self.id}) (from {self.format_spec(self.spec)})"
+
+    def __repr__(self):
+        return pformat(dict(self), indent=2)
+
+
+def load_parameters_file(
+    spec, strict=True, overrides=None, trust_domain=None, repo_root=None
+):
+    """
+    Load parameters from a path, url, decision task-id or project.
+
+    Examples:
+        task-id=fdtgsD5DQUmAQZEaGMvQ4Q
+        project=mozilla-central
+    """
+
+    if overrides is None:
+        overrides = {}
+    overrides["spec"] = spec
+
+    if not spec:
+        return Parameters(strict=strict, repo_root=repo_root, **overrides)
+
+    try:
+        # reading parameters from a local parameters.yml file
+        f = open(spec)
+    except OSError:
+        # fetching parameters.yml using task task-id, project or supplied url
+        task_id = None
+        if spec.startswith("task-id="):
+            task_id = spec.split("=")[1]
+        elif spec.startswith("project="):
+            if trust_domain is None:
+                raise ValueError(
+                    "Can't specify parameters by project "
+                    "if trust domain isn't supplied.",
+                )
+            index = "{trust_domain}.v2.{project}.latest.taskgraph.decision".format(
+                trust_domain=trust_domain,
+                project=spec.split("=")[1],
+            )
+            task_id = find_task_id(index)
+
+        if task_id:
+            spec = get_artifact_url(task_id, "public/parameters.yml")
+        f = urlopen(spec)
+
+        # Decompress gzipped parameters.
+        if f.info().get("Content-Encoding") == "gzip":
+            buf = BytesIO(f.read())
+            f = gzip.GzipFile(fileobj=buf)
+
+    if spec.endswith(".yml"):
+        kwargs = yaml.load_stream(f)
+    elif spec.endswith(".json"):
+        kwargs = json.load(f)
+    else:
+        raise TypeError(f"Parameters file `{spec}` is not JSON or YAML")
+
+    kwargs.update(overrides)
+    return Parameters(strict=strict, repo_root=repo_root, **kwargs)
+
+
+def parameters_loader(spec, strict=True, overrides=None):
+    def get_parameters(graph_config):
+        try:
+            repo_root = graph_config.vcs_root
+        except Exception:
+            repo_root = None
+
+        parameters = load_parameters_file(
+            spec,
+            strict=strict,
+            overrides=overrides,
+            repo_root=repo_root,
+            trust_domain=graph_config["trust-domain"],
+        )
+        parameters.check()
+        return parameters
+
+    return get_parameters
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content
new file mode 100755
index 0000000000..0af923d01d
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content
@@ -0,0 +1,899 @@
+#!/usr/bin/python3 -u
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import argparse
+import bz2
+import concurrent.futures
+import contextlib
+import datetime
+import gzip
+import hashlib
+import json
+import lzma
+import multiprocessing
+import os
+import pathlib
+import random
+import re
+import stat
+import subprocess
+import sys
+import tarfile
+import tempfile
+import time
+import urllib.parse
+import urllib.request
+import zipfile
+
+try:
+    import zstandard
+except ImportError:
+    zstandard = None
+
+try:
+    import certifi
+except ImportError:
+    certifi = None
+
+
+CONCURRENCY = multiprocessing.cpu_count()
+
+
+def log(msg):
+    print(msg, file=sys.stderr)
+    sys.stderr.flush()
+
+
+class IntegrityError(Exception):
+    """Represents an integrity error when downloading a URL."""
+
+
+def ZstdCompressor(*args, **kwargs):
+    if not zstandard:
+        raise ValueError("zstandard Python package not available")
+    return zstandard.ZstdCompressor(*args, **kwargs)
+
+
+def ZstdDecompressor(*args, **kwargs):
+    if not zstandard:
+        raise ValueError("zstandard Python package not available")
+    return zstandard.ZstdDecompressor(*args, **kwargs)
+
+
+@contextlib.contextmanager
+def rename_after_close(fname, *args, **kwargs):
+    """
+    Context manager that opens a temporary file to use as a writer,
+    and closes the file on context exit, renaming it to the expected
+    file name in case of success, or removing it in case of failure.
+
+    Takes the same options as open(), but must be used as a context
+    manager.
+    """
+    path = pathlib.Path(fname)
+    tmp = path.with_name("%s.tmp" % path.name)
+    try:
+        with tmp.open(*args, **kwargs) as fh:
+            yield fh
+    except Exception:
+        tmp.unlink()
+        raise
+    else:
+        tmp.rename(fname)
+
+
+# The following is copied from
+# https://github.com/mozilla-releng/redo/blob/6d07678a014e0c525e54a860381a165d34db10ff/redo/__init__.py#L15-L85
+def retrier(attempts=5, sleeptime=10, max_sleeptime=300, sleepscale=1.5, jitter=1):
+    """
+    A generator function that sleeps between retries, handles exponential
+    backoff and jitter. The action you are retrying is meant to run after
+    retrier yields.
+
+    At each iteration, we sleep for sleeptime + random.randint(-jitter, jitter).
+    Afterwards sleeptime is multiplied by sleepscale for the next iteration.
+
+    Args:
+        attempts (int): maximum number of times to try; defaults to 5
+        sleeptime (float): how many seconds to sleep between tries; defaults to
+                           60s (one minute)
+        max_sleeptime (float): the longest we'll sleep, in seconds; defaults to
+                               300s (five minutes)
+        sleepscale (float): how much to multiply the sleep time by each
+                            iteration; defaults to 1.5
+        jitter (int): random jitter to introduce to sleep time each iteration.
+                      the amount is chosen at random between [-jitter, +jitter]
+                      defaults to 1
+
+    Yields:
+        None, a maximum of `attempts` number of times
+
+    Example:
+        >>> n = 0
+        >>> for _ in retrier(sleeptime=0, jitter=0):
+        ...     if n == 3:
+        ...         # We did the thing!
+        ...         break
+        ...     n += 1
+        >>> n
+        3
+
+        >>> n = 0
+        >>> for _ in retrier(sleeptime=0, jitter=0):
+        ...     if n == 6:
+        ...         # We did the thing!
+        ...         break
+        ...     n += 1
+        ... else:
+        ...     print("max tries hit")
+        max tries hit
+    """
+    jitter = jitter or 0  # py35 barfs on the next line if jitter is None
+    if jitter > sleeptime:
+        # To prevent negative sleep times
+        raise Exception(
+            "jitter ({}) must be less than sleep time ({})".format(jitter, sleeptime)
+        )
+
+    sleeptime_real = sleeptime
+    for _ in range(attempts):
+        log("attempt %i/%i" % (_ + 1, attempts))
+
+        yield sleeptime_real
+
+        if jitter:
+            sleeptime_real = sleeptime + random.randint(-jitter, jitter)
+            # our jitter should scale along with the sleeptime
+            jitter = int(jitter * sleepscale)
+        else:
+            sleeptime_real = sleeptime
+
+        sleeptime *= sleepscale
+
+        if sleeptime_real > max_sleeptime:
+            sleeptime_real = max_sleeptime
+
+        # Don't need to sleep the last time
+        if _ < attempts - 1:
+            log(
+                "sleeping for %.2fs (attempt %i/%i)" % (sleeptime_real, _ + 1, attempts)
+            )
+            time.sleep(sleeptime_real)
+
+
+def stream_download(url, sha256=None, size=None, headers=None):
+    """Download a URL to a generator, optionally with content verification.
+
+    If ``sha256`` or ``size`` are defined, the downloaded URL will be
+    validated against those requirements and ``IntegrityError`` will be
+    raised if expectations do not match.
+
+    Because verification cannot occur until the file is completely downloaded
+    it is recommended for consumers to not do anything meaningful with the
+    data if content verification is being used. To securely handle retrieved
+    content, it should be streamed to a file or memory and only operated
+    on after the generator is exhausted without raising.
+    """
+    log("Downloading %s" % url)
+    headers = headers or []
+
+    h = hashlib.sha256()
+    length = 0
+
+    t0 = time.time()
+    req_headers = {}
+    for header in headers:
+        key, val = header.split(":")
+        req_headers[key.strip()] = val.strip()
+
+    req = urllib.request.Request(url, None, req_headers)
+    with urllib.request.urlopen(
+        req, timeout=60, cafile=certifi.where()
+    ) if certifi else urllib.request.urlopen(req, timeout=60) as fh:
+        if not url.endswith(".gz") and fh.info().get("Content-Encoding") == "gzip":
+            fh = gzip.GzipFile(fileobj=fh)
+
+        while True:
+            chunk = fh.read(65536)
+            if not chunk:
+                break
+
+            h.update(chunk)
+            length += len(chunk)
+
+            yield chunk
+
+    duration = time.time() - t0
+    digest = h.hexdigest()
+
+    log(
+        "%s resolved to %d bytes with sha256 %s in %.3fs"
+        % (url, length, digest, duration)
+    )
+
+    if size:
+        if size == length:
+            log("Verified size of %s" % url)
+        else:
+            raise IntegrityError(
+                "size mismatch on %s: wanted %d; got %d" % (url, size, length)
+            )
+
+    if sha256:
+        if digest == sha256:
+            log("Verified sha256 integrity of %s" % url)
+        else:
+            raise IntegrityError(
+                "sha256 mismatch on %s: wanted %s; got %s" % (url, sha256, digest)
+            )
+
+
+def download_to_path(url, path, sha256=None, size=None, headers=None):
+    """Download a URL to a filesystem path, possibly with verification."""
+
+    # We download to a temporary file and rename at the end so there's
+    # no chance of the final file being partially written or containing
+    # bad data.
+    try:
+        path.unlink()
+    except FileNotFoundError:
+        pass
+
+    for _ in retrier(attempts=5, sleeptime=60):
+        try:
+            log("Downloading %s to %s" % (url, path))
+
+            with rename_after_close(path, "wb") as fh:
+                for chunk in stream_download(
+                    url, sha256=sha256, size=size, headers=headers
+                ):
+                    fh.write(chunk)
+
+            return
+        except IntegrityError:
+            raise
+        except Exception as e:
+            log("Download failed: {}".format(e))
+            continue
+
+    raise Exception("Download failed, no more retries!")
+
+
+def download_to_memory(url, sha256=None, size=None):
+    """Download a URL to memory, possibly with verification."""
+
+    data = b""
+    for _ in retrier(attempts=5, sleeptime=60):
+        try:
+            log("Downloading %s" % (url))
+
+            for chunk in stream_download(url, sha256=sha256, size=size):
+                data += chunk
+
+            return data
+        except IntegrityError:
+            raise
+        except Exception as e:
+            log("Download failed: {}".format(e))
+            continue
+
+    raise Exception("Download failed, no more retries!")
+
+
+def gpg_verify_path(path: pathlib.Path, public_key_data: bytes, signature_data: bytes):
+    """Verify that a filesystem path verifies using GPG.
+
+    Takes a Path defining a file to verify. ``public_key_data`` contains
+    bytes with GPG public key data. ``signature_data`` contains a signed
+    GPG document to use with ``gpg --verify``.
+    """
+    log("Validating GPG signature of %s" % path)
+    log("GPG key data:\n%s" % public_key_data.decode("ascii"))
+
+    with tempfile.TemporaryDirectory() as td:
+        try:
+            # --batch since we're running unattended.
+            gpg_args = ["gpg", "--homedir", td, "--batch"]
+
+            log("Importing GPG key...")
+            subprocess.run(gpg_args + ["--import"], input=public_key_data, check=True)
+
+            log("Verifying GPG signature...")
+            subprocess.run(
+                gpg_args + ["--verify", "-", "%s" % path],
+                input=signature_data,
+                check=True,
+            )
+
+            log("GPG signature verified!")
+        finally:
+            # There is a race between the agent self-terminating and
+            # shutil.rmtree() from the temporary directory cleanup that can
+            # lead to exceptions. Kill the agent before cleanup to prevent this.
+            env = dict(os.environ)
+            env["GNUPGHOME"] = td
+            subprocess.run(["gpgconf", "--kill", "gpg-agent"], env=env)
+
+
+def open_tar_stream(path: pathlib.Path):
+    """"""
+    if path.suffix == ".bz2":
+        return bz2.open(str(path), "rb")
+    elif path.suffix in (".gz", ".tgz") :
+        return gzip.open(str(path), "rb")
+    elif path.suffix == ".xz":
+        return lzma.open(str(path), "rb")
+    elif path.suffix == ".zst":
+        dctx = ZstdDecompressor()
+        return dctx.stream_reader(path.open("rb"))
+    elif path.suffix == ".tar":
+        return path.open("rb")
+    else:
+        raise ValueError("unknown archive format for tar file: %s" % path)
+
+
+def archive_type(path: pathlib.Path):
+    """Attempt to identify a path as an extractable archive."""
+    if path.suffixes[-2:-1] == [".tar"] or path.suffixes[-1:] == [".tgz"]:
+        return "tar"
+    elif path.suffix == ".zip":
+        return "zip"
+    else:
+        return None
+
+
+def extract_archive(path, dest_dir, typ):
+    """Extract an archive to a destination directory."""
+
+    # Resolve paths to absolute variants.
+    path = path.resolve()
+    dest_dir = dest_dir.resolve()
+
+    log("Extracting %s to %s" % (path, dest_dir))
+    t0 = time.time()
+
+    # We pipe input to the decompressor program so that we can apply
+    # custom decompressors that the program may not know about.
+    if typ == "tar":
+        ifh = open_tar_stream(path)
+        # On Windows, the tar program doesn't support things like symbolic
+        # links, while Windows actually support them. The tarfile module in
+        # python does. So use that. But since it's significantly slower than
+        # the tar program on Linux, only use tarfile on Windows (tarfile is
+        # also not much slower on Windows, presumably because of the
+        # notoriously bad I/O).
+        if sys.platform == "win32":
+            tar = tarfile.open(fileobj=ifh, mode="r|")
+            tar.extractall(str(dest_dir))
+            args = []
+        else:
+            args = ["tar", "xf", "-"]
+            pipe_stdin = True
+    elif typ == "zip":
+        # unzip from stdin has wonky behavior. We don't use a pipe for it.
+        ifh = open(os.devnull, "rb")
+        args = ["unzip", "-q", "-o", str(path)]
+        pipe_stdin = False
+    else:
+        raise ValueError("unknown archive format: %s" % path)
+
+    if args:
+        with ifh, subprocess.Popen(
+            args, cwd=str(dest_dir), bufsize=0, stdin=subprocess.PIPE
+        ) as p:
+            while True:
+                if not pipe_stdin:
+                    break
+
+                chunk = ifh.read(131072)
+                if not chunk:
+                    break
+
+                p.stdin.write(chunk)
+
+        if p.returncode:
+            raise Exception("%r exited %d" % (args, p.returncode))
+
+    log("%s extracted in %.3fs" % (path, time.time() - t0))
+
+
+def repack_archive(
+    orig: pathlib.Path, dest: pathlib.Path, strip_components=0, prefix=""
+):
+    assert orig != dest
+    log("Repacking as %s" % dest)
+    orig_typ = archive_type(orig)
+    typ = archive_type(dest)
+    if not orig_typ:
+        raise Exception("Archive type not supported for %s" % orig.name)
+    if not typ:
+        raise Exception("Archive type not supported for %s" % dest.name)
+
+    if dest.suffixes[-2:] != [".tar", ".zst"]:
+        raise Exception("Only producing .tar.zst archives is supported.")
+
+    if strip_components or prefix:
+
+        def filter(name):
+            if strip_components:
+                stripped = "/".join(name.split("/")[strip_components:])
+                if not stripped:
+                    raise Exception(
+                        "Stripping %d components would remove files" % strip_components
+                    )
+                name = stripped
+            return prefix + name
+
+    else:
+        filter = None
+
+    with rename_after_close(dest, "wb") as fh:
+        ctx = ZstdCompressor()
+        if orig_typ == "zip":
+            assert typ == "tar"
+            zip = zipfile.ZipFile(orig)
+            # Convert the zip stream to a tar on the fly.
+            with ctx.stream_writer(fh) as compressor, tarfile.open(
+                fileobj=compressor, mode="w:"
+            ) as tar:
+                for zipinfo in zip.infolist():
+                    if zipinfo.is_dir():
+                        continue
+                    tarinfo = tarfile.TarInfo()
+                    filename = zipinfo.filename
+                    tarinfo.name = filter(filename) if filter else filename
+                    tarinfo.size = zipinfo.file_size
+                    # Zip files don't have any knowledge of the timezone
+                    # they were created in. Which is not really convenient to
+                    # reliably convert to a timestamp. But we don't really
+                    # care about accuracy, but rather about reproducibility,
+                    # so we pick UTC.
+                    time = datetime.datetime(
+                        *zipinfo.date_time, tzinfo=datetime.timezone.utc
+                    )
+                    tarinfo.mtime = time.timestamp()
+                    # 0 is MS-DOS, 3 is UNIX. Only in the latter case do we
+                    # get anything useful for the tar file mode.
+                    if zipinfo.create_system == 3:
+                        mode = zipinfo.external_attr >> 16
+                    else:
+                        mode = 0o0644
+                    tarinfo.mode = stat.S_IMODE(mode)
+                    if stat.S_ISLNK(mode):
+                        tarinfo.type = tarfile.SYMTYPE
+                        tarinfo.linkname = zip.read(filename).decode()
+                        tar.addfile(tarinfo, zip.open(filename))
+                    elif stat.S_ISREG(mode) or stat.S_IFMT(mode) == 0:
+                        tar.addfile(tarinfo, zip.open(filename))
+                    else:
+                        raise Exception("Unsupported file mode %o" % stat.S_IFMT(mode))
+
+        elif orig_typ == "tar":
+            if typ == "zip":
+                raise Exception("Repacking a tar to zip is not supported")
+            assert typ == "tar"
+
+            ifh = open_tar_stream(orig)
+            if filter:
+                # To apply the filter, we need to open the tar stream and
+                # tweak it.
+                origtar = tarfile.open(fileobj=ifh, mode="r|")
+                with ctx.stream_writer(fh) as compressor, tarfile.open(
+                    fileobj=compressor,
+                    mode="w:",
+                    format=origtar.format,
+                ) as tar:
+                    for tarinfo in origtar:
+                        if tarinfo.isdir():
+                            continue
+                        tarinfo.name = filter(tarinfo.name)
+                        if "path" in tarinfo.pax_headers:
+                            tarinfo.pax_headers["path"] = filter(
+                                tarinfo.pax_headers["path"]
+                            )
+                        if tarinfo.isfile():
+                            tar.addfile(tarinfo, origtar.extractfile(tarinfo))
+                        else:
+                            tar.addfile(tarinfo)
+            else:
+                # We only change compression here. The tar stream is unchanged.
+                ctx.copy_stream(ifh, fh)
+
+
+def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
+    """Fetch a URL and extract it to a destination path.
+
+    If the downloaded URL is an archive, it is extracted automatically
+    and the archive is deleted. Otherwise the file remains in place in
+    the destination directory.
+    """
+
+    basename = urllib.parse.urlparse(url).path.split("/")[-1]
+    dest_path = dest_dir / basename
+
+    download_to_path(url, dest_path, sha256=sha256, size=size)
+
+    if not extract:
+        return
+
+    typ = archive_type(dest_path)
+    if typ:
+        extract_archive(dest_path, dest_dir, typ)
+        log("Removing %s" % dest_path)
+        dest_path.unlink()
+
+
+def fetch_urls(downloads):
+    """Fetch URLs pairs to a pathlib.Path."""
+    with concurrent.futures.ThreadPoolExecutor(CONCURRENCY) as e:
+        fs = []
+
+        for download in downloads:
+            fs.append(e.submit(fetch_and_extract, *download))
+
+        for f in fs:
+            f.result()
+
+
+def _git_checkout_github_archive(
+    dest_path: pathlib.Path, repo: str, commit: str, prefix: str
+):
+    "Use github archive generator to speed up github git repo cloning"
+    repo = repo.rstrip("/")
+    github_url = "{repo}/archive/{commit}.tar.gz".format(**locals())
+
+    with tempfile.TemporaryDirectory() as td:
+        temp_dir = pathlib.Path(td)
+        dl_dest = temp_dir / "archive.tar.gz"
+        download_to_path(github_url, dl_dest)
+        repack_archive(dl_dest, dest_path, strip_components=1, prefix=prefix + "/")
+
+
+def _github_submodule_required(repo: str, commit: str):
+    "Use github API to check if submodules are used"
+    url = "{repo}/blob/{commit}/.gitmodules".format(**locals())
+    try:
+        status_code = urllib.request.urlopen(url).getcode()
+        return status_code == 200
+    except:
+        return False
+
+
+def git_checkout_archive(
+    dest_path: pathlib.Path,
+    repo: str,
+    commit: str,
+    prefix=None,
+    ssh_key=None,
+    include_dot_git=False,
+):
+    """Produce an archive of the files comprising a Git checkout."""
+    dest_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if not prefix:
+        prefix = repo.rstrip("/").rsplit("/", 1)[-1]
+
+    if dest_path.suffixes[-2:] != [".tar", ".zst"]:
+        raise Exception("Only producing .tar.zst archives is supported.")
+
+    if repo.startswith("https://github.com/"):
+        if not include_dot_git and not _github_submodule_required(repo, commit):
+            log("Using github archive service to speedup archive creation")
+            # Always log sha1 info, either from commit or resolved from repo.
+            if re.match(r"^[a-fA-F0-9]{40}$", commit):
+                revision = commit
+            else:
+                ref_output = subprocess.check_output(["git", "ls-remote", repo,
+                                                      'refs/heads/' + commit])
+                revision, _ = ref_output.decode().split(maxsplit=1)
+            log("Fetching revision {}".format(revision))
+            return _git_checkout_github_archive(dest_path, repo, commit, prefix)
+
+    with tempfile.TemporaryDirectory() as td:
+        temp_dir = pathlib.Path(td)
+
+        git_dir = temp_dir / prefix
+
+        # This could be faster with a shallow clone. However, Git requires a ref
+        # to initiate a clone. Since the commit-ish may not refer to a ref, we
+        # simply perform a full clone followed by a checkout.
+        print("cloning %s to %s" % (repo, git_dir))
+
+        env = os.environ.copy()
+        keypath = ""
+        if ssh_key:
+            taskcluster_secret_url = api(
+                os.environ.get("TASKCLUSTER_PROXY_URL"),
+                "secrets",
+                "v1",
+                "secret/{keypath}".format(keypath=ssh_key),
+            )
+            taskcluster_secret = b"".join(stream_download(taskcluster_secret_url))
+            taskcluster_secret = json.loads(taskcluster_secret)
+            sshkey = taskcluster_secret["secret"]["ssh_privkey"]
+
+            keypath = temp_dir.joinpath("ssh-key")
+            keypath.write_text(sshkey)
+            keypath.chmod(0o600)
+
+            env = {
+                "GIT_SSH_COMMAND": "ssh -o 'StrictHostKeyChecking no' -i {keypath}".format(
+                    keypath=keypath
+                )
+            }
+
+        subprocess.run(["git", "clone", "-n", repo, str(git_dir)], check=True, env=env)
+
+        # Always use a detached head so that git prints out what it checked out.
+        subprocess.run(
+            ["git", "checkout", "--detach", commit], cwd=str(git_dir), check=True
+        )
+
+        # When including the .git, we want --depth 1, but a direct clone would not
+        # necessarily be able to give us the right commit.
+        if include_dot_git:
+            initial_clone = git_dir.with_name(git_dir.name + ".orig")
+            git_dir.rename(initial_clone)
+            subprocess.run(
+                [
+                    "git",
+                    "clone",
+                    "file://" + str(initial_clone),
+                    str(git_dir),
+                    "--depth",
+                    "1",
+                ],
+                check=True,
+            )
+            subprocess.run(
+                ["git", "remote", "set-url", "origin", repo],
+                cwd=str(git_dir),
+                check=True,
+            )
+
+        # --depth 1 can induce more work on the server side, so only use it for
+        # submodule initialization when we want to keep the .git directory.
+        depth = ["--depth", "1"] if include_dot_git else []
+        subprocess.run(
+            ["git", "submodule", "update", "--init"] + depth,
+            cwd=str(git_dir),
+            check=True,
+        )
+
+        if keypath:
+            os.remove(keypath)
+
+        print("creating archive %s of commit %s" % (dest_path, commit))
+        exclude_dot_git = [] if include_dot_git else ["--exclude=.git"]
+        proc = subprocess.Popen(
+            [
+                "tar",
+                "cf",
+                "-",
+            ]
+            + exclude_dot_git
+            + [
+                "-C",
+                str(temp_dir),
+                prefix,
+            ],
+            stdout=subprocess.PIPE,
+        )
+
+        with rename_after_close(dest_path, "wb") as out:
+            ctx = ZstdCompressor()
+            ctx.copy_stream(proc.stdout, out)
+
+        proc.wait()
+
+
+def command_git_checkout_archive(args):
+    dest = pathlib.Path(args.dest)
+
+    try:
+        git_checkout_archive(
+            dest,
+            args.repo,
+            args.commit,
+            prefix=args.path_prefix,
+            ssh_key=args.ssh_key_secret,
+            include_dot_git=args.include_dot_git,
+        )
+    except Exception:
+        try:
+            dest.unlink()
+        except FileNotFoundError:
+            pass
+
+        raise
+
+
+def command_static_url(args):
+    gpg_sig_url = args.gpg_sig_url
+    gpg_env_key = args.gpg_key_env
+
+    if bool(gpg_sig_url) != bool(gpg_env_key):
+        print("--gpg-sig-url and --gpg-key-env must both be defined")
+        return 1
+
+    if gpg_sig_url:
+        gpg_signature = b"".join(stream_download(gpg_sig_url))
+        gpg_key = os.environb[gpg_env_key.encode("ascii")]
+
+    dest = pathlib.Path(args.dest)
+    dest.parent.mkdir(parents=True, exist_ok=True)
+
+    basename = urllib.parse.urlparse(args.url).path.split("/")[-1]
+    if basename.endswith("".join(dest.suffixes)):
+        dl_dest = dest
+    else:
+        dl_dest = dest.parent / basename
+
+    try:
+        download_to_path(
+            args.url, dl_dest, sha256=args.sha256, size=args.size, headers=args.headers
+        )
+
+        if gpg_sig_url:
+            gpg_verify_path(dl_dest, gpg_key, gpg_signature)
+
+        if dl_dest != dest or args.strip_components or args.add_prefix:
+            repack_archive(dl_dest, dest, args.strip_components, args.add_prefix)
+    except Exception:
+        try:
+            dl_dest.unlink()
+        except FileNotFoundError:
+            pass
+
+        raise
+
+    if dl_dest != dest:
+        log("Removing %s" % dl_dest)
+        dl_dest.unlink()
+
+
+def api(root_url, service, version, path):
+    # taskcluster-lib-urls is not available when this script runs, so
+    # simulate its behavior:
+    return "{root_url}/api/{service}/{version}/{path}".format(
+        root_url=root_url, service=service, version=version, path=path
+    )
+
+
+def get_hash(fetch, root_url):
+    path = "task/{task}/artifacts/{artifact}".format(
+        task=fetch["task"], artifact="public/chain-of-trust.json"
+    )
+    url = api(root_url, "queue", "v1", path)
+    cot = json.loads(download_to_memory(url))
+    return cot["artifacts"][fetch["artifact"]]["sha256"]
+
+
+def command_task_artifacts(args):
+    start = time.monotonic()
+    fetches = json.loads(os.environ["MOZ_FETCHES"])
+    downloads = []
+    for fetch in fetches:
+        extdir = pathlib.Path(args.dest)
+        if "dest" in fetch:
+            # Note: normpath doesn't like pathlib.Path in python 3.5
+            extdir = pathlib.Path(os.path.normpath(str(extdir.joinpath(fetch["dest"]))))
+        extdir.mkdir(parents=True, exist_ok=True)
+        root_url = os.environ["TASKCLUSTER_ROOT_URL"]
+        sha256 = None
+        if fetch.get("verify-hash"):
+            sha256 = get_hash(fetch, root_url)
+        if fetch["artifact"].startswith("public/"):
+            path = "task/{task}/artifacts/{artifact}".format(
+                task=fetch["task"], artifact=fetch["artifact"]
+            )
+            url = api(root_url, "queue", "v1", path)
+        else:
+            url = ("{proxy_url}/api/queue/v1/task/{task}/artifacts/{artifact}").format(
+                proxy_url=os.environ["TASKCLUSTER_PROXY_URL"],
+                task=fetch["task"],
+                artifact=fetch["artifact"],
+            )
+        downloads.append((url, extdir, fetch["extract"], sha256))
+
+    fetch_urls(downloads)
+    end = time.monotonic()
+
+    perfherder_data = {
+        "framework": {"name": "build_metrics"},
+        "suites": [
+            {
+                "name": "fetch_content",
+                "value": end - start,
+                "lowerIsBetter": True,
+                "shouldAlert": False,
+                "subtests": [],
+            }
+        ],
+    }
+    print("PERFHERDER_DATA: {}".format(json.dumps(perfherder_data)), file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(title="sub commands")
+
+    git_checkout = subparsers.add_parser(
+        "git-checkout-archive",
+        help="Obtain an archive of files from a Git repository checkout",
+    )
+    git_checkout.set_defaults(func=command_git_checkout_archive)
+    git_checkout.add_argument(
+        "--path-prefix", help="Prefix for paths in produced archive"
+    )
+    git_checkout.add_argument("repo", help="URL to Git repository to be cloned")
+    git_checkout.add_argument("commit", help="Git commit to check out")
+    git_checkout.add_argument("dest", help="Destination path of archive")
+    git_checkout.add_argument(
+        "--ssh-key-secret", help="The scope path of the ssh key to used for checkout"
+    )
+    git_checkout.add_argument(
+        "--include-dot-git", action="store_true", help="Include the .git directory"
+    )
+
+    url = subparsers.add_parser("static-url", help="Download a static URL")
+    url.set_defaults(func=command_static_url)
+    url.add_argument("--sha256", required=True, help="SHA-256 of downloaded content")
+    url.add_argument(
+        "--size", required=True, type=int, help="Size of downloaded content, in bytes"
+    )
+    url.add_argument(
+        "--gpg-sig-url",
+        help="URL containing signed GPG document validating " "URL to fetch",
+    )
+    url.add_argument(
+        "--gpg-key-env", help="Environment variable containing GPG key to validate"
+    )
+    url.add_argument(
+        "--strip-components",
+        type=int,
+        default=0,
+        help="Number of leading components to strip from file "
+        "names in the downloaded archive",
+    )
+    url.add_argument(
+        "--add-prefix",
+        default="",
+        help="Prefix to add to file names in the downloaded " "archive",
+    )
+    url.add_argument(
+        "-H",
+        "--header",
+        default=[],
+        action="append",
+        dest="headers",
+        help="Header to send as part of the request, can be passed " "multiple times",
+    )
+    url.add_argument("url", help="URL to fetch")
+    url.add_argument("dest", help="Destination path")
+
+    artifacts = subparsers.add_parser("task-artifacts", help="Fetch task artifacts")
+    artifacts.set_defaults(func=command_task_artifacts)
+    artifacts.add_argument(
+        "-d",
+        "--dest",
+        default=os.environ.get("MOZ_FETCHES_DIR"),
+        help="Destination directory which will contain all "
+        "artifacts (defaults to $MOZ_FETCHES_DIR)",
+    )
+
+    args = parser.parse_args()
+
+    if not args.dest:
+        parser.error(
+            "no destination directory specified, either pass in --dest "
+            "or set $MOZ_FETCHES_DIR"
+        )
+
+    return args.func(args)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/hgrc b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/hgrc
new file mode 100755
index 0000000000..f6a2f6643c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/hgrc
@@ -0,0 +1,33 @@
+# By default the progress bar starts after 3s and updates every 0.1s. We
+# change this so it shows and updates every 1.0s.
+# We also tell progress to assume a TTY is present so updates are printed
+# even if there is no known TTY.
+[progress]
+delay = 1.0
+refresh = 1.0
+assume-tty = true
+
+[extensions]
+share =
+sparse =
+robustcheckout = /usr/local/mercurial/robustcheckout.py
+
+[hostsecurity]
+# When running a modern Python, Mercurial will default to TLS 1.1+.
+# When running on a legacy Python, Mercurial will default to TLS 1.0+.
+# There is no good reason we shouldn't be running a modern Python
+# capable of speaking TLS 1.2. And the only Mercurial servers we care
+# about should be running TLS 1.2. So make TLS 1.2 the minimum.
+minimumprotocol = tls1.2
+
+# Settings to make 1-click loaners more useful.
+[extensions]
+histedit =
+rebase =
+
+[diff]
+git = 1
+showfunc = 1
+
+[pager]
+pager = LESS=FRSXQ less
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py
new file mode 100644
index 0000000000..b5d2230211
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py
@@ -0,0 +1,860 @@
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+"""Robustly perform a checkout.
+
+This extension provides the ``hg robustcheckout`` command for
+ensuring a working directory is updated to the specified revision
+from a source repo using best practices to ensure optimal clone
+times and storage efficiency.
+"""
+
+from __future__ import absolute_import
+
+import contextlib
+import json
+import os
+import random
+import re
+import socket
+import ssl
+import time
+
+from mercurial.i18n import _
+from mercurial.node import hex, nullid
+from mercurial import (
+    commands,
+    configitems,
+    error,
+    exchange,
+    extensions,
+    hg,
+    match as matchmod,
+    pycompat,
+    registrar,
+    scmutil,
+    urllibcompat,
+    util,
+    vfs,
+)
+
+# Causes worker to purge caches on process exit and for task to retry.
+EXIT_PURGE_CACHE = 72
+
+testedwith = (
+    b"4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6.0 6.1 6.2 6.3 6.4"
+)
+minimumhgversion = b"4.5"
+
+cmdtable = {}
+command = registrar.command(cmdtable)
+
+configtable = {}
+configitem = registrar.configitem(configtable)
+
+configitem(b"robustcheckout", b"retryjittermin", default=configitems.dynamicdefault)
+configitem(b"robustcheckout", b"retryjittermax", default=configitems.dynamicdefault)
+
+
+def getsparse():
+    from mercurial import sparse
+
+    return sparse
+
+
+def peerlookup(remote, v):
+    with remote.commandexecutor() as e:
+        return e.callcommand(b"lookup", {b"key": v}).result()
+
+
+@command(
+    b"robustcheckout",
+    [
+        (b"", b"upstream", b"", b"URL of upstream repo to clone from"),
+        (b"r", b"revision", b"", b"Revision to check out"),
+        (b"b", b"branch", b"", b"Branch to check out"),
+        (b"", b"purge", False, b"Whether to purge the working directory"),
+        (b"", b"sharebase", b"", b"Directory where shared repos should be placed"),
+        (
+            b"",
+            b"networkattempts",
+            3,
+            b"Maximum number of attempts for network " b"operations",
+        ),
+        (b"", b"sparseprofile", b"", b"Sparse checkout profile to use (path in repo)"),
+        (
+            b"U",
+            b"noupdate",
+            False,
+            b"the clone will include an empty working directory\n"
+            b"(only a repository)",
+        ),
+    ],
+    b"[OPTION]... URL DEST",
+    norepo=True,
+)
+def robustcheckout(
+    ui,
+    url,
+    dest,
+    upstream=None,
+    revision=None,
+    branch=None,
+    purge=False,
+    sharebase=None,
+    networkattempts=None,
+    sparseprofile=None,
+    noupdate=False,
+):
+    """Ensure a working copy has the specified revision checked out.
+
+    Repository data is automatically pooled into the common directory
+    specified by ``--sharebase``, which is a required argument. It is required
+    because pooling storage prevents excessive cloning, which makes operations
+    complete faster.
+
+    One of ``--revision`` or ``--branch`` must be specified. ``--revision``
+    is preferred, as it is deterministic and there is no ambiguity as to which
+    revision will actually be checked out.
+
+    If ``--upstream`` is used, the repo at that URL is used to perform the
+    initial clone instead of cloning from the repo where the desired revision
+    is located.
+
+    ``--purge`` controls whether to removed untracked and ignored files from
+    the working directory. If used, the end state of the working directory
+    should only contain files explicitly under version control for the requested
+    revision.
+
+    ``--sparseprofile`` can be used to specify a sparse checkout profile to use.
+    The sparse checkout profile corresponds to a file in the revision to be
+    checked out. If a previous sparse profile or config is present, it will be
+    replaced by this sparse profile. We choose not to "widen" the sparse config
+    so operations are as deterministic as possible. If an existing checkout
+    is present and it isn't using a sparse checkout, we error. This is to
+    prevent accidentally enabling sparse on a repository that may have
+    clients that aren't sparse aware. Sparse checkout support requires Mercurial
+    4.3 or newer and the ``sparse`` extension must be enabled.
+    """
+    if not revision and not branch:
+        raise error.Abort(b"must specify one of --revision or --branch")
+
+    if revision and branch:
+        raise error.Abort(b"cannot specify both --revision and --branch")
+
+    # Require revision to look like a SHA-1.
+    if revision:
+        if (
+            len(revision) < 12
+            or len(revision) > 40
+            or not re.match(b"^[a-f0-9]+$", revision)
+        ):
+            raise error.Abort(
+                b"--revision must be a SHA-1 fragment 12-40 " b"characters long"
+            )
+
+    sharebase = sharebase or ui.config(b"share", b"pool")
+    if not sharebase:
+        raise error.Abort(
+            b"share base directory not defined; refusing to operate",
+            hint=b"define share.pool config option or pass --sharebase",
+        )
+
+    # Sparse profile support was added in Mercurial 4.3, where it was highly
+    # experimental. Because of the fragility of it, we only support sparse
+    # profiles on 4.3. When 4.4 is released, we'll need to opt in to sparse
+    # support. We /could/ silently fall back to non-sparse when not supported.
+    # However, given that sparse has performance implications, we want to fail
+    # fast if we can't satisfy the desired checkout request.
+    if sparseprofile:
+        try:
+            extensions.find(b"sparse")
+        except KeyError:
+            raise error.Abort(
+                b"sparse extension must be enabled to use " b"--sparseprofile"
+            )
+
+    ui.warn(b"(using Mercurial %s)\n" % util.version())
+
+    # worker.backgroundclose only makes things faster if running anti-virus,
+    # which our automation doesn't. Disable it.
+    ui.setconfig(b"worker", b"backgroundclose", False)
+    # Don't wait forever if the connection hangs
+    ui.setconfig(b"http", b"timeout", 600)
+
+    # By default the progress bar starts after 3s and updates every 0.1s. We
+    # change this so it shows and updates every 1.0s.
+    # We also tell progress to assume a TTY is present so updates are printed
+    # even if there is no known TTY.
+    # We make the config change here instead of in a config file because
+    # otherwise we're at the whim of whatever configs are used in automation.
+    ui.setconfig(b"progress", b"delay", 1.0)
+    ui.setconfig(b"progress", b"refresh", 1.0)
+    ui.setconfig(b"progress", b"assume-tty", True)
+
+    sharebase = os.path.realpath(sharebase)
+
+    optimes = []
+    behaviors = set()
+    start = time.time()
+
+    try:
+        return _docheckout(
+            ui,
+            url,
+            dest,
+            upstream,
+            revision,
+            branch,
+            purge,
+            sharebase,
+            optimes,
+            behaviors,
+            networkattempts,
+            sparse_profile=sparseprofile,
+            noupdate=noupdate,
+        )
+    finally:
+        overall = time.time() - start
+
+        # We store the overall time multiple ways in order to help differentiate
+        # the various "flavors" of operations.
+
+        # ``overall`` is always the total operation time.
+        optimes.append(("overall", overall))
+
+        def record_op(name):
+            # If special behaviors due to "corrupt" storage occur, we vary the
+            # name to convey that.
+            if "remove-store" in behaviors:
+                name += "_rmstore"
+            if "remove-wdir" in behaviors:
+                name += "_rmwdir"
+
+            optimes.append((name, overall))
+
+        # We break out overall operations primarily by their network interaction
+        # We have variants within for working directory operations.
+        if "clone" in behaviors and "create-store" in behaviors:
+            record_op("overall_clone")
+
+            if "sparse-update" in behaviors:
+                record_op("overall_clone_sparsecheckout")
+            else:
+                record_op("overall_clone_fullcheckout")
+
+        elif "pull" in behaviors or "clone" in behaviors:
+            record_op("overall_pull")
+
+            if "sparse-update" in behaviors:
+                record_op("overall_pull_sparsecheckout")
+            else:
+                record_op("overall_pull_fullcheckout")
+
+            if "empty-wdir" in behaviors:
+                record_op("overall_pull_emptywdir")
+            else:
+                record_op("overall_pull_populatedwdir")
+
+        else:
+            record_op("overall_nopull")
+
+            if "sparse-update" in behaviors:
+                record_op("overall_nopull_sparsecheckout")
+            else:
+                record_op("overall_nopull_fullcheckout")
+
+            if "empty-wdir" in behaviors:
+                record_op("overall_nopull_emptywdir")
+            else:
+                record_op("overall_nopull_populatedwdir")
+
+        server_url = urllibcompat.urlreq.urlparse(url).netloc
+
+        if "TASKCLUSTER_INSTANCE_TYPE" in os.environ:
+            perfherder = {
+                "framework": {
+                    "name": "vcs",
+                },
+                "suites": [],
+            }
+            for op, duration in optimes:
+                perfherder["suites"].append(
+                    {
+                        "name": op,
+                        "value": duration,
+                        "lowerIsBetter": True,
+                        "shouldAlert": False,
+                        "serverUrl": server_url.decode("utf-8"),
+                        "hgVersion": util.version().decode("utf-8"),
+                        "extraOptions": [os.environ["TASKCLUSTER_INSTANCE_TYPE"]],
+                        "subtests": [],
+                    }
+                )
+            ui.write(
+                b"PERFHERDER_DATA: %s\n"
+                % pycompat.bytestr(json.dumps(perfherder, sort_keys=True))
+            )
+
+
+def _docheckout(
+    ui,
+    url,
+    dest,
+    upstream,
+    revision,
+    branch,
+    purge,
+    sharebase,
+    optimes,
+    behaviors,
+    networkattemptlimit,
+    networkattempts=None,
+    sparse_profile=None,
+    noupdate=False,
+):
+    if not networkattempts:
+        networkattempts = [1]
+
+    def callself():
+        return _docheckout(
+            ui,
+            url,
+            dest,
+            upstream,
+            revision,
+            branch,
+            purge,
+            sharebase,
+            optimes,
+            behaviors,
+            networkattemptlimit,
+            networkattempts=networkattempts,
+            sparse_profile=sparse_profile,
+            noupdate=noupdate,
+        )
+
+    @contextlib.contextmanager
+    def timeit(op, behavior):
+        behaviors.add(behavior)
+        errored = False
+        try:
+            start = time.time()
+            yield
+        except Exception:
+            errored = True
+            raise
+        finally:
+            elapsed = time.time() - start
+
+            if errored:
+                op += "_errored"
+
+            optimes.append((op, elapsed))
+
+    ui.write(b"ensuring %s@%s is available at %s\n" % (url, revision or branch, dest))
+
+    # We assume that we're the only process on the machine touching the
+    # repository paths that we were told to use. This means our recovery
+    # scenario when things aren't "right" is to just nuke things and start
+    # from scratch. This is easier to implement than verifying the state
+    # of the data and attempting recovery. And in some scenarios (such as
+    # potential repo corruption), it is probably faster, since verifying
+    # repos can take a while.
+
+    destvfs = vfs.vfs(dest, audit=False, realpath=True)
+
+    def deletesharedstore(path=None):
+        storepath = path or destvfs.read(b".hg/sharedpath").strip()
+        if storepath.endswith(b".hg"):
+            storepath = os.path.dirname(storepath)
+
+        storevfs = vfs.vfs(storepath, audit=False)
+        storevfs.rmtree(forcibly=True)
+
+    if destvfs.exists() and not destvfs.exists(b".hg"):
+        raise error.Abort(b"destination exists but no .hg directory")
+
+    # Refuse to enable sparse checkouts on existing checkouts. The reasoning
+    # here is that another consumer of this repo may not be sparse aware. If we
+    # enabled sparse, we would lock them out.
+    if destvfs.exists() and sparse_profile and not destvfs.exists(b".hg/sparse"):
+        raise error.Abort(
+            b"cannot enable sparse profile on existing " b"non-sparse checkout",
+            hint=b"use a separate working directory to use sparse",
+        )
+
+    # And the other direction for symmetry.
+    if not sparse_profile and destvfs.exists(b".hg/sparse"):
+        raise error.Abort(
+            b"cannot use non-sparse checkout on existing sparse " b"checkout",
+            hint=b"use a separate working directory to use sparse",
+        )
+
+    # Require checkouts to be tied to shared storage because efficiency.
+    if destvfs.exists(b".hg") and not destvfs.exists(b".hg/sharedpath"):
+        ui.warn(b"(destination is not shared; deleting)\n")
+        with timeit("remove_unshared_dest", "remove-wdir"):
+            destvfs.rmtree(forcibly=True)
+
+    # Verify the shared path exists and is using modern pooled storage.
+    if destvfs.exists(b".hg/sharedpath"):
+        storepath = destvfs.read(b".hg/sharedpath").strip()
+
+        ui.write(b"(existing repository shared store: %s)\n" % storepath)
+
+        if not os.path.exists(storepath):
+            ui.warn(b"(shared store does not exist; deleting destination)\n")
+            with timeit("removed_missing_shared_store", "remove-wdir"):
+                destvfs.rmtree(forcibly=True)
+        elif not re.search(b"[a-f0-9]{40}/\.hg$", storepath.replace(b"\\", b"/")):
+            ui.warn(
+                b"(shared store does not belong to pooled storage; "
+                b"deleting destination to improve efficiency)\n"
+            )
+            with timeit("remove_unpooled_store", "remove-wdir"):
+                destvfs.rmtree(forcibly=True)
+
+    if destvfs.isfileorlink(b".hg/wlock"):
+        ui.warn(
+            b"(dest has an active working directory lock; assuming it is "
+            b"left over from a previous process and that the destination "
+            b"is corrupt; deleting it just to be sure)\n"
+        )
+        with timeit("remove_locked_wdir", "remove-wdir"):
+            destvfs.rmtree(forcibly=True)
+
+    def handlerepoerror(e):
+        if pycompat.bytestr(e) == _(b"abandoned transaction found"):
+            ui.warn(b"(abandoned transaction found; trying to recover)\n")
+            repo = hg.repository(ui, dest)
+            if not repo.recover():
+                ui.warn(b"(could not recover repo state; " b"deleting shared store)\n")
+                with timeit("remove_unrecovered_shared_store", "remove-store"):
+                    deletesharedstore()
+
+            ui.warn(b"(attempting checkout from beginning)\n")
+            return callself()
+
+        raise
+
+    # At this point we either have an existing working directory using
+    # shared, pooled storage or we have nothing.
+
+    def handlenetworkfailure():
+        if networkattempts[0] >= networkattemptlimit:
+            raise error.Abort(
+                b"reached maximum number of network attempts; " b"giving up\n"
+            )
+
+        ui.warn(
+            b"(retrying after network failure on attempt %d of %d)\n"
+            % (networkattempts[0], networkattemptlimit)
+        )
+
+        # Do a backoff on retries to mitigate the thundering herd
+        # problem. This is an exponential backoff with a multipler
+        # plus random jitter thrown in for good measure.
+        # With the default settings, backoffs will be:
+        # 1) 2.5 - 6.5
+        # 2) 5.5 - 9.5
+        # 3) 11.5 - 15.5
+        backoff = (2 ** networkattempts[0] - 1) * 1.5
+        jittermin = ui.configint(b"robustcheckout", b"retryjittermin", 1000)
+        jittermax = ui.configint(b"robustcheckout", b"retryjittermax", 5000)
+        backoff += float(random.randint(jittermin, jittermax)) / 1000.0
+        ui.warn(b"(waiting %.2fs before retry)\n" % backoff)
+        time.sleep(backoff)
+
+        networkattempts[0] += 1
+
+    def handlepullerror(e):
+        """Handle an exception raised during a pull.
+
+        Returns True if caller should call ``callself()`` to retry.
+        """
+        if isinstance(e, error.Abort):
+            if e.args[0] == _(b"repository is unrelated"):
+                ui.warn(b"(repository is unrelated; deleting)\n")
+                destvfs.rmtree(forcibly=True)
+                return True
+            elif e.args[0].startswith(_(b"stream ended unexpectedly")):
+                ui.warn(b"%s\n" % e.args[0])
+                # Will raise if failure limit reached.
+                handlenetworkfailure()
+                return True
+        # TODO test this branch
+        elif isinstance(e, error.ResponseError):
+            if e.args[0].startswith(_(b"unexpected response from remote server:")):
+                ui.warn(b"(unexpected response from remote server; retrying)\n")
+                destvfs.rmtree(forcibly=True)
+                # Will raise if failure limit reached.
+                handlenetworkfailure()
+                return True
+        elif isinstance(e, ssl.SSLError):
+            # Assume all SSL errors are due to the network, as Mercurial
+            # should convert non-transport errors like cert validation failures
+            # to error.Abort.
+            ui.warn(b"ssl error: %s\n" % pycompat.bytestr(str(e)))
+            handlenetworkfailure()
+            return True
+        elif isinstance(e, urllibcompat.urlerr.httperror) and e.code >= 500:
+            ui.warn(b"http error: %s\n" % pycompat.bytestr(str(e.reason)))
+            handlenetworkfailure()
+            return True
+        elif isinstance(e, urllibcompat.urlerr.urlerror):
+            if isinstance(e.reason, socket.error):
+                ui.warn(b"socket error: %s\n" % pycompat.bytestr(str(e.reason)))
+                handlenetworkfailure()
+                return True
+            else:
+                ui.warn(
+                    b"unhandled URLError; reason type: %s; value: %s\n"
+                    % (
+                        pycompat.bytestr(e.reason.__class__.__name__),
+                        pycompat.bytestr(str(e.reason)),
+                    )
+                )
+        elif isinstance(e, socket.timeout):
+            ui.warn(b"socket timeout\n")
+            handlenetworkfailure()
+            return True
+        else:
+            ui.warn(
+                b"unhandled exception during network operation; type: %s; "
+                b"value: %s\n"
+                % (pycompat.bytestr(e.__class__.__name__), pycompat.bytestr(str(e)))
+            )
+
+        return False
+
+    # Perform sanity checking of store. We may or may not know the path to the
+    # local store. It depends if we have an existing destvfs pointing to a
+    # share. To ensure we always find a local store, perform the same logic
+    # that Mercurial's pooled storage does to resolve the local store path.
+    cloneurl = upstream or url
+
+    try:
+        clonepeer = hg.peer(ui, {}, cloneurl)
+        rootnode = peerlookup(clonepeer, b"0")
+    except error.RepoLookupError:
+        raise error.Abort(b"unable to resolve root revision from clone " b"source")
+    except (
+        error.Abort,
+        ssl.SSLError,
+        urllibcompat.urlerr.urlerror,
+        socket.timeout,
+    ) as e:
+        if handlepullerror(e):
+            return callself()
+        raise
+
+    if rootnode == nullid:
+        raise error.Abort(b"source repo appears to be empty")
+
+    storepath = os.path.join(sharebase, hex(rootnode))
+    storevfs = vfs.vfs(storepath, audit=False)
+
+    if storevfs.isfileorlink(b".hg/store/lock"):
+        ui.warn(
+            b"(shared store has an active lock; assuming it is left "
+            b"over from a previous process and that the store is "
+            b"corrupt; deleting store and destination just to be "
+            b"sure)\n"
+        )
+        if destvfs.exists():
+            with timeit("remove_dest_active_lock", "remove-wdir"):
+                destvfs.rmtree(forcibly=True)
+
+        with timeit("remove_shared_store_active_lock", "remove-store"):
+            storevfs.rmtree(forcibly=True)
+
+    if storevfs.exists() and not storevfs.exists(b".hg/requires"):
+        ui.warn(
+            b"(shared store missing requires file; this is a really "
+            b"odd failure; deleting store and destination)\n"
+        )
+        if destvfs.exists():
+            with timeit("remove_dest_no_requires", "remove-wdir"):
+                destvfs.rmtree(forcibly=True)
+
+        with timeit("remove_shared_store_no_requires", "remove-store"):
+            storevfs.rmtree(forcibly=True)
+
+    if storevfs.exists(b".hg/requires"):
+        requires = set(storevfs.read(b".hg/requires").splitlines())
+        # "share-safe" (enabled by default as of hg 6.1) moved most
+        # requirements to a new file, so we need to look there as well to avoid
+        # deleting and re-cloning each time
+        if b"share-safe" in requires:
+            requires |= set(storevfs.read(b".hg/store/requires").splitlines())
+        # FUTURE when we require generaldelta, this is where we can check
+        # for that.
+        required = {b"dotencode", b"fncache"}
+
+        missing = required - requires
+        if missing:
+            ui.warn(
+                b"(shared store missing requirements: %s; deleting "
+                b"store and destination to ensure optimal behavior)\n"
+                % b", ".join(sorted(missing))
+            )
+            if destvfs.exists():
+                with timeit("remove_dest_missing_requires", "remove-wdir"):
+                    destvfs.rmtree(forcibly=True)
+
+            with timeit("remove_shared_store_missing_requires", "remove-store"):
+                storevfs.rmtree(forcibly=True)
+
+    created = False
+
+    if not destvfs.exists():
+        # Ensure parent directories of destination exist.
+        # Mercurial 3.8 removed ensuredirs and made makedirs race safe.
+        if util.safehasattr(util, "ensuredirs"):
+            makedirs = util.ensuredirs
+        else:
+            makedirs = util.makedirs
+
+        makedirs(os.path.dirname(destvfs.base), notindexed=True)
+        makedirs(sharebase, notindexed=True)
+
+        if upstream:
+            ui.write(b"(cloning from upstream repo %s)\n" % upstream)
+
+        if not storevfs.exists():
+            behaviors.add(b"create-store")
+
+        try:
+            with timeit("clone", "clone"):
+                shareopts = {b"pool": sharebase, b"mode": b"identity"}
+                res = hg.clone(
+                    ui,
+                    {},
+                    clonepeer,
+                    dest=dest,
+                    update=False,
+                    shareopts=shareopts,
+                    stream=True,
+                )
+        except (
+            error.Abort,
+            ssl.SSLError,
+            urllibcompat.urlerr.urlerror,
+            socket.timeout,
+        ) as e:
+            if handlepullerror(e):
+                return callself()
+            raise
+        except error.RepoError as e:
+            return handlerepoerror(e)
+        except error.RevlogError as e:
+            ui.warn(b"(repo corruption: %s; deleting shared store)\n" % e)
+            with timeit("remove_shared_store_revlogerror", "remote-store"):
+                deletesharedstore()
+            return callself()
+
+        # TODO retry here.
+        if res is None:
+            raise error.Abort(b"clone failed")
+
+        # Verify it is using shared pool storage.
+        if not destvfs.exists(b".hg/sharedpath"):
+            raise error.Abort(b"clone did not create a shared repo")
+
+        created = True
+
+    # The destination .hg directory should exist. Now make sure we have the
+    # wanted revision.
+
+    repo = hg.repository(ui, dest)
+
+    # We only pull if we are using symbolic names or the requested revision
+    # doesn't exist.
+    havewantedrev = False
+
+    if revision:
+        try:
+            ctx = scmutil.revsingle(repo, revision)
+        except error.RepoLookupError:
+            ctx = None
+
+        if ctx:
+            if not ctx.hex().startswith(revision):
+                raise error.Abort(
+                    b"--revision argument is ambiguous",
+                    hint=b"must be the first 12+ characters of a " b"SHA-1 fragment",
+                )
+
+            checkoutrevision = ctx.hex()
+            havewantedrev = True
+
+    if not havewantedrev:
+        ui.write(b"(pulling to obtain %s)\n" % (revision or branch,))
+
+        remote = None
+        try:
+            remote = hg.peer(repo, {}, url)
+            pullrevs = [peerlookup(remote, revision or branch)]
+            checkoutrevision = hex(pullrevs[0])
+            if branch:
+                ui.warn(
+                    b"(remote resolved %s to %s; "
+                    b"result is not deterministic)\n" % (branch, checkoutrevision)
+                )
+
+            if checkoutrevision in repo:
+                ui.warn(b"(revision already present locally; not pulling)\n")
+            else:
+                with timeit("pull", "pull"):
+                    pullop = exchange.pull(repo, remote, heads=pullrevs)
+                    if not pullop.rheads:
+                        raise error.Abort(b"unable to pull requested revision")
+        except (
+            error.Abort,
+            ssl.SSLError,
+            urllibcompat.urlerr.urlerror,
+            socket.timeout,
+        ) as e:
+            if handlepullerror(e):
+                return callself()
+            raise
+        except error.RepoError as e:
+            return handlerepoerror(e)
+        except error.RevlogError as e:
+            ui.warn(b"(repo corruption: %s; deleting shared store)\n" % e)
+            deletesharedstore()
+            return callself()
+        finally:
+            if remote:
+                remote.close()
+
+    # Now we should have the wanted revision in the store. Perform
+    # working directory manipulation.
+
+    # Avoid any working directory manipulations if `-U`/`--noupdate` was passed
+    if noupdate:
+        ui.write(b"(skipping update since `-U` was passed)\n")
+        return None
+
+    # Purge if requested. We purge before update because this way we're
+    # guaranteed to not have conflicts on `hg update`.
+    if purge and not created:
+        ui.write(b"(purging working directory)\n")
+        purge = getattr(commands, "purge", None)
+        if not purge:
+            purge = extensions.find(b"purge").purge
+
+        # Mercurial 4.3 doesn't purge files outside the sparse checkout.
+        # See https://bz.mercurial-scm.org/show_bug.cgi?id=5626. Force
+        # purging by monkeypatching the sparse matcher.
+        try:
+            old_sparse_fn = getattr(repo.dirstate, "_sparsematchfn", None)
+            if old_sparse_fn is not None:
+                repo.dirstate._sparsematchfn = lambda: matchmod.always()
+
+            with timeit("purge", "purge"):
+                if purge(
+                    ui,
+                    repo,
+                    all=True,
+                    abort_on_err=True,
+                    # The function expects all arguments to be
+                    # defined.
+                    **{"print": None, "print0": None, "dirs": None, "files": None}
+                ):
+                    raise error.Abort(b"error purging")
+        finally:
+            if old_sparse_fn is not None:
+                repo.dirstate._sparsematchfn = old_sparse_fn
+
+    # Update the working directory.
+
+    if repo[b"."].node() == nullid:
+        behaviors.add("empty-wdir")
+    else:
+        behaviors.add("populated-wdir")
+
+    if sparse_profile:
+        sparsemod = getsparse()
+
+        # By default, Mercurial will ignore unknown sparse profiles. This could
+        # lead to a full checkout. Be more strict.
+        try:
+            repo.filectx(sparse_profile, changeid=checkoutrevision).data()
+        except error.ManifestLookupError:
+            raise error.Abort(
+                b"sparse profile %s does not exist at revision "
+                b"%s" % (sparse_profile, checkoutrevision)
+            )
+
+        old_config = sparsemod.parseconfig(
+            repo.ui, repo.vfs.tryread(b"sparse"), b"sparse"
+        )
+
+        old_includes, old_excludes, old_profiles = old_config
+
+        if old_profiles == {sparse_profile} and not old_includes and not old_excludes:
+            ui.write(
+                b"(sparse profile %s already set; no need to update "
+                b"sparse config)\n" % sparse_profile
+            )
+        else:
+            if old_includes or old_excludes or old_profiles:
+                ui.write(
+                    b"(replacing existing sparse config with profile "
+                    b"%s)\n" % sparse_profile
+                )
+            else:
+                ui.write(b"(setting sparse config to profile %s)\n" % sparse_profile)
+
+            # If doing an incremental update, this will perform two updates:
+            # one to change the sparse profile and another to update to the new
+            # revision. This is not desired. But there's not a good API in
+            # Mercurial to do this as one operation.
+            # TRACKING hg64 - Mercurial 6.4 and later require call to
+            # dirstate.changing_parents(repo)
+            def parentchange(repo):
+                if util.safehasattr(repo.dirstate, "changing_parents"):
+                    return repo.dirstate.changing_parents(repo)
+                return repo.dirstate.parentchange()
+
+            with repo.wlock(), parentchange(repo), timeit(
+                "sparse_update_config", "sparse-update-config"
+            ):
+                # pylint --py3k: W1636
+                fcounts = list(
+                    map(
+                        len,
+                        sparsemod._updateconfigandrefreshwdir(
+                            repo, [], [], [sparse_profile], force=True
+                        ),
+                    )
+                )
+
+                repo.ui.status(
+                    b"%d files added, %d files dropped, "
+                    b"%d files conflicting\n" % tuple(fcounts)
+                )
+
+            ui.write(b"(sparse refresh complete)\n")
+
+    op = "update_sparse" if sparse_profile else "update"
+    behavior = "update-sparse" if sparse_profile else "update"
+
+    with timeit(op, behavior):
+        if commands.update(ui, repo, rev=checkoutrevision, clean=True):
+            raise error.Abort(b"error updating")
+
+    ui.write(b"updated to %s\n" % checkoutrevision)
+
+    return None
+
+
+def extsetup(ui):
+    # Ensure required extensions are loaded.
+    for ext in (b"purge", b"share"):
+        try:
+            extensions.find(ext)
+        except KeyError:
+            extensions.load(ui, ext, None)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task
new file mode 100755
index 0000000000..267b5283ea
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task
@@ -0,0 +1,1348 @@
+#!/usr/bin/python3 -u
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Run a task after performing common actions.
+
+This script is meant to be the "driver" for TaskCluster based tasks.
+It receives some common arguments to control the run-time environment.
+
+It performs actions as requested from the arguments. Then it executes
+the requested process and prints its output, prefixing it with the
+current time to improve log usefulness.
+"""
+
+import sys
+
+if sys.version_info[0:2] < (3, 5):
+    print("run-task requires Python 3.5+")
+    sys.exit(1)
+
+import argparse
+import datetime
+import errno
+import io
+import json
+import os
+import platform
+import re
+import shutil
+import signal
+import socket
+import stat
+import subprocess
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+from threading import Thread
+from typing import Optional
+
+SECRET_BASEURL_TPL = "http://taskcluster/secrets/v1/secret/{}"
+
+GITHUB_SSH_FINGERPRINT = (
+    b"github.com ssh-ed25519 "
+    b"AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\n"
+    b"github.com ecdsa-sha2-nistp256 "
+    b"AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB"
+    b"9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\n"
+    b"github.com ssh-rsa "
+    b"AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY"
+    b"4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDP"
+    b"gVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyR"
+    b"kQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWO"
+    b"WRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZ"
+    b"yaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+"
+    b"2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=\n"
+)
+
+
+CACHE_UID_GID_MISMATCH = """
+There is a UID/GID mismatch on the cache. This likely means:
+
+a) different tasks are running as a different user/group
+b) different Docker images have different UID/GID for the same user/group
+
+Our cache policy is that the UID/GID for ALL tasks must be consistent
+for the lifetime of the cache. This eliminates permissions problems due
+to file/directory user/group ownership.
+
+To make this error go away, ensure that all Docker images are use
+a consistent UID/GID and that all tasks using this cache are running as
+the same user/group.
+"""
+
+
+NON_EMPTY_VOLUME = """
+error: volume %s is not empty
+
+Our Docker image policy requires volumes to be empty.
+
+The volume was likely populated as part of building the Docker image.
+Change the Dockerfile and anything run from it to not create files in
+any VOLUME.
+
+A lesser possibility is that you stumbled upon a TaskCluster platform bug
+where it fails to use new volumes for tasks.
+"""
+
+
+FETCH_CONTENT_NOT_FOUND = """
+error: fetch-content script not found
+
+The script at `taskcluster/scripts/misc/fetch-content` could not be
+detected in the current environment.
+"""
+
+# The exit code to use when caches should be purged and the task retried.
+# This is EX_OSFILE (from sysexits.h):
+#     Some system file  does not exist, cannot be opened, or has some
+#     sort of error (e.g., syntax error).
+EXIT_PURGE_CACHE = 72
+
+
+IS_MACOSX = sys.platform == "darwin"
+IS_POSIX = os.name == "posix"
+IS_WINDOWS = os.name == "nt"
+
+# Both mercurial and git use sha1 as revision idenfiers. Luckily, both define
+# the same value as the null revision.
+#
+# https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7
+# https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30
+NULL_REVISION = "0000000000000000000000000000000000000000"
+
+
+def print_line(prefix, m):
+    now = datetime.datetime.utcnow().isoformat().encode("utf-8")
+    # slice microseconds to 3 decimals.
+    now = now[:-3] if now[-7:-6] == b"." else now
+    sys.stdout.buffer.write(b"[%s %sZ] %s" % (prefix, now, m))
+    sys.stdout.buffer.flush()
+
+
+def _call_windows_retry(func, args=(), retry_max=5, retry_delay=0.5):
+    """
+    It's possible to see spurious errors on Windows due to various things
+    keeping a handle to the directory open (explorer, virus scanners, etc)
+    So we try a few times if it fails with a known error.
+    retry_delay is multiplied by the number of failed attempts to increase
+    the likelihood of success in subsequent attempts.
+    """
+    retry_count = 0
+    while True:
+        try:
+            func(*args)
+        except OSError as e:
+            # Error codes are defined in:
+            # https://docs.python.org/3/library/errno.html#module-errno
+            if e.errno not in (errno.EACCES, errno.ENOTEMPTY, errno.ENOENT):
+                raise
+
+            if retry_count == retry_max:
+                raise
+
+            retry_count += 1
+
+            print(
+                '%s() failed for "%s". Reason: %s (%s). Retrying...'
+                % (func.__name__, args, e.strerror, e.errno)
+            )
+            time.sleep(retry_count * retry_delay)
+        else:
+            # If no exception has been thrown it should be done
+            break
+
+
+def remove(path):
+    """Removes the specified file, link, or directory tree.
+
+    This is a replacement for shutil.rmtree that works better under
+    windows. It does the following things:
+
+     - check path access for the current user before trying to remove
+     - retry operations on some known errors due to various things keeping
+       a handle on file paths - like explorer, virus scanners, etc. The
+       known errors are errno.EACCES and errno.ENOTEMPTY, and it will
+       retry up to 5 five times with a delay of (failed_attempts * 0.5) seconds
+       between each attempt.
+
+    Note that no error will be raised if the given path does not exists.
+
+    :param path: path to be removed
+    """
+
+    def _update_permissions(path):
+        """Sets specified pemissions depending on filetype"""
+        if os.path.islink(path):
+            # Path is a symlink which we don't have to modify
+            # because it should already have all the needed permissions
+            return
+
+        stats = os.stat(path)
+
+        if os.path.isfile(path):
+            mode = stats.st_mode | stat.S_IWUSR
+        elif os.path.isdir(path):
+            mode = stats.st_mode | stat.S_IWUSR | stat.S_IXUSR
+        else:
+            # Not supported type
+            return
+
+        _call_windows_retry(os.chmod, (path, mode))
+
+    if not os.path.lexists(path):
+        print_line(b"remove", b"WARNING: %s does not exists!\n" % path.encode("utf-8"))
+        return
+
+    """
+    On Windows, adds '\\\\?\\' to paths which match ^[A-Za-z]:\\.* to access
+    files or directories that exceed MAX_PATH(260) limitation or that ends
+    with a period.
+    """
+    if (
+        sys.platform in ("win32", "cygwin")
+        and len(path) >= 3
+        and path[1] == ":"
+        and path[2] == "\\"
+    ):
+        path = "\\\\?\\%s" % path
+
+    if os.path.isfile(path) or os.path.islink(path):
+        # Verify the file or link is read/write for the current user
+        _update_permissions(path)
+        _call_windows_retry(os.remove, (path,))
+
+    elif os.path.isdir(path):
+        # Verify the directory is read/write/execute for the current user
+        _update_permissions(path)
+
+        # We're ensuring that every nested item has writable permission.
+        for root, dirs, files in os.walk(path):
+            for entry in dirs + files:
+                _update_permissions(os.path.join(root, entry))
+        _call_windows_retry(shutil.rmtree, (path,))
+
+
+def run_required_command(prefix, args, *, extra_env=None, cwd=None):
+    res = run_command(prefix, args, extra_env=extra_env, cwd=cwd)
+    if res:
+        sys.exit(res)
+
+
+def retry_required_command(prefix, args, *, extra_env=None, cwd=None, retries=2):
+    backoff = 1
+    while True:
+        res = run_command(prefix, args, extra_env=extra_env, cwd=cwd)
+        if not res:
+            return
+        if not retries:
+            sys.exit(res)
+        retries -= 1
+        backoff *= 2
+        time.sleep(backoff)
+
+
+def run_command(prefix, args, *, extra_env=None, cwd=None):
+    """Runs a process and prefixes its output with the time.
+
+    Returns the process exit code.
+    """
+    print_line(prefix, b"executing %r\n" % args)
+
+    env = dict(os.environ)
+    env.update(extra_env or {})
+
+    # Note: TaskCluster's stdin is a TTY. This attribute is lost
+    # when we pass sys.stdin to the invoked process. If we cared
+    # to preserve stdin as a TTY, we could make this work. But until
+    # someone needs it, don't bother.
+
+    # We want stdout to be bytes on Python 3. That means we can't use
+    # universal_newlines=True (because it implies text mode). But
+    # p.stdout.readline() won't work for bytes text streams. So, on Python 3,
+    # we manually install a latin1 stream wrapper. This allows us to readline()
+    # and preserves bytes, without losing any data.
+
+    p = subprocess.Popen(
+        args,
+        # Disable buffering because we want to receive output
+        # as it is generated so timestamps in logs are
+        # accurate.
+        bufsize=0,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        stdin=sys.stdin.fileno(),
+        cwd=cwd,
+        env=env,
+    )
+
+    stdout = io.TextIOWrapper(p.stdout, encoding="latin1")
+
+    while True:
+        data = stdout.readline().encode("latin1")
+
+        if data == b"":
+            break
+
+        print_line(prefix, data)
+
+    return p.wait()
+
+
+def get_posix_user_group(user, group):
+    import grp
+    import pwd
+
+    try:
+        user_record = pwd.getpwnam(user)
+    except KeyError:
+        print("could not find user %s; specify a valid user with --user" % user)
+        sys.exit(1)
+
+    try:
+        group_record = grp.getgrnam(group)
+    except KeyError:
+        print("could not find group %s; specify a valid group with --group" % group)
+        sys.exit(1)
+
+    # Most tasks use worker:worker. We require they have a specific numeric ID
+    # because otherwise it is too easy for files written to caches to have
+    # mismatched numeric IDs, which results in permissions errors.
+    if user_record.pw_name == "worker" and user_record.pw_uid != 1000:
+        print("user `worker` must have uid=1000; got %d" % user_record.pw_uid)
+        sys.exit(1)
+
+    if group_record.gr_name == "worker" and group_record.gr_gid != 1000:
+        print("group `worker` must have gid=1000; got %d" % group_record.gr_gid)
+        sys.exit(1)
+
+    # Find all groups to which this user is a member.
+    gids = [g.gr_gid for g in grp.getgrall() if group in g.gr_mem]
+
+    return user_record, group_record, gids
+
+
+def write_audit_entry(path, msg):
+    now = datetime.datetime.utcnow().isoformat().encode("utf-8")
+    with open(path, "ab") as fh:
+        fh.write(b"[%sZ %s] %s\n" % (now, os.environb.get(b"TASK_ID", b"UNKNOWN"), msg))
+
+
+WANTED_DIR_MODE = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR
+
+
+def set_dir_permissions(path, uid, gid):
+    st = os.lstat(path)
+
+    if st.st_uid != uid or st.st_gid != gid:
+        os.chown(path, uid, gid)
+
+    # Also make sure dirs are writable in case we need to delete
+    # them.
+    if st.st_mode & WANTED_DIR_MODE != WANTED_DIR_MODE:
+        os.chmod(path, st.st_mode | WANTED_DIR_MODE)
+
+
+def chown_recursive(path, user, group, uid, gid):
+    print_line(
+        b"chown",
+        b"recursively changing ownership of %s to %s:%s\n"
+        % (path.encode("utf-8"), user.encode("utf-8"), group.encode("utf-8")),
+    )
+
+    set_dir_permissions(path, uid, gid)
+
+    for root, dirs, files in os.walk(path):
+        for d in dirs:
+            set_dir_permissions(os.path.join(root, d), uid, gid)
+
+        for f in files:
+            # File may be a symlink that points to nowhere. In which case
+            # os.chown() would fail because it attempts to follow the
+            # symlink. We only care about directory entries, not what
+            # they point to. So setting the owner of the symlink should
+            # be sufficient.
+            os.lchown(os.path.join(root, f), uid, gid)
+
+
+def configure_cache_posix(cache, user, group, untrusted_caches, running_as_root):
+    """Configure a cache path on POSIX platforms.
+
+    For each cache, we write out a special file denoting attributes and
+    capabilities of run-task and the task being executed. These attributes
+    are used by subsequent run-task invocations to validate that use of
+    the cache is acceptable.
+
+    We /could/ blow away the cache data on requirements mismatch.
+    While this would be convenient, this could result in "competing" tasks
+    effectively undoing the other's work. This would slow down task
+    execution in aggregate. Without monitoring for this, people may not notice
+    the problem and tasks would be slower than they could be. We follow the
+    principle of "fail fast" to ensure optimal task execution.
+
+    We also write an audit log of who used the caches. This log is printed
+    during failures to help aid debugging.
+    """
+
+    our_requirements = {
+        # Include a version string that we can bump whenever to trigger
+        # fresh caches. The actual value is not relevant and doesn't need
+        # to follow any explicit order. Since taskgraph bakes this file's
+        # hash into cache names, any change to this file/version is sufficient
+        # to force the use of a new cache.
+        b"version=1",
+        # Include the UID and GID the task will run as to ensure that tasks
+        # with different UID and GID don't share the same cache.
+        b"uid=%d" % user.pw_uid,
+        b"gid=%d" % group.gr_gid,
+    }
+
+    requires_path = os.path.join(cache, ".cacherequires")
+    audit_path = os.path.join(cache, ".cachelog")
+
+    # The cache is empty. Configure it.
+    if not os.listdir(cache):
+        print_line(
+            b"cache",
+            b"cache %s is empty; writing requirements: "
+            b"%s\n" % (cache.encode("utf-8"), b" ".join(sorted(our_requirements))),
+        )
+
+        # We write a requirements file so future invocations know what the
+        # requirements are.
+        with open(requires_path, "wb") as fh:
+            fh.write(b"\n".join(sorted(our_requirements)))
+
+        # And make it read-only as a precaution against deletion.
+        os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
+
+        write_audit_entry(
+            audit_path,
+            b"created; requirements: %s" % b", ".join(sorted(our_requirements)),
+        )
+
+        set_dir_permissions(cache, user.pw_uid, group.gr_gid)
+        return
+
+    # The cache has content and we have a requirements file. Validate
+    # requirements alignment.
+    if os.path.exists(requires_path):
+        with open(requires_path, "rb") as fh:
+            wanted_requirements = set(fh.read().splitlines())
+
+        print_line(
+            b"cache",
+            b"cache %s exists; requirements: %s\n"
+            % (cache.encode("utf-8"), b" ".join(sorted(wanted_requirements))),
+        )
+
+        missing = wanted_requirements - our_requirements
+
+        # Allow requirements mismatch for uid/gid if and only if caches
+        # are untrusted. This allows cache behavior on Try to be
+        # reasonable. Otherwise, random tasks could "poison" cache
+        # usability by introducing uid/gid mismatches. For untrusted
+        # environments like Try, this is a perfectly reasonable thing to
+        # allow.
+        if (
+            missing
+            and untrusted_caches
+            and running_as_root
+            and all(s.startswith((b"uid=", b"gid=")) for s in missing)
+        ):
+            print_line(
+                b"cache",
+                b"cache %s uid/gid mismatch; this is acceptable "
+                b"because caches for this task are untrusted; "
+                b"changing ownership to facilitate cache use\n" % cache.encode("utf-8"),
+            )
+            chown_recursive(
+                cache, user.pw_name, group.gr_name, user.pw_uid, group.gr_gid
+            )
+
+            # And write out the updated reality.
+            with open(requires_path, "wb") as fh:
+                fh.write(b"\n".join(sorted(our_requirements)))
+
+            write_audit_entry(
+                audit_path,
+                b"chown; requirements: %s" % b", ".join(sorted(our_requirements)),
+            )
+
+        elif missing:
+            print(
+                "error: requirements for populated cache %s differ from "
+                "this task" % cache
+            )
+            print(
+                "cache requirements: %s"
+                % " ".join(sorted(s.decode("utf-8") for s in wanted_requirements))
+            )
+            print(
+                "our requirements:   %s"
+                % " ".join(sorted(s.decode("utf-8") for s in our_requirements))
+            )
+            if any(s.startswith((b"uid=", b"gid=")) for s in missing):
+                print(CACHE_UID_GID_MISMATCH)
+
+            write_audit_entry(
+                audit_path,
+                b"requirements mismatch; wanted: %s"
+                % b", ".join(sorted(our_requirements)),
+            )
+
+            print("")
+            print("audit log:")
+            with open(audit_path, "r") as fh:
+                print(fh.read())
+
+            return True
+        else:
+            write_audit_entry(audit_path, b"used")
+
+        # We don't need to adjust permissions here because the cache is
+        # associated with a uid/gid and the first task should have set
+        # a proper owner/group.
+
+        return
+
+    # The cache has content and no requirements file. This shouldn't
+    # happen because run-task should be the first thing that touches a
+    # cache.
+    print(
+        "error: cache %s is not empty and is missing a "
+        ".cacherequires file; the cache names for this task are "
+        "likely mis-configured or TASKCLUSTER_CACHES is not set "
+        "properly" % cache
+    )
+
+    write_audit_entry(audit_path, b"missing .cacherequires")
+    return True
+
+
+def configure_volume_posix(volume, user, group, running_as_root):
+    # The only time we should see files in the volume is if the Docker
+    # image build put files there.
+    #
+    # For the sake of simplicity, our policy is that volumes should be
+    # empty. This also has the advantage that an empty volume looks
+    # a lot like an empty cache. Tasks can rely on caches being
+    # swapped in and out on any volume without any noticeable change
+    # of behavior.
+    volume_files = os.listdir(volume)
+    if volume_files:
+        print(NON_EMPTY_VOLUME % volume)
+        print("entries in root directory: %s" % " ".join(sorted(volume_files)))
+        sys.exit(1)
+
+    # The volume is almost certainly owned by root:root. Chown it so it
+    # is writable.
+
+    if running_as_root:
+        print_line(
+            b"volume",
+            b"changing ownership of volume %s "
+            b"to %d:%d\n" % (volume.encode("utf-8"), user.pw_uid, group.gr_gid),
+        )
+        set_dir_permissions(volume, user.pw_uid, group.gr_gid)
+
+
+def _clean_git_checkout(destination_path):
+    # Delete untracked files (i.e. build products)
+    print_line(b"vcs", b"cleaning git checkout...\n")
+    args = [
+        "git",
+        "clean",
+        # Two -f`s causes subdirectories with `.git`
+        # directories to be cleaned as well.
+        "-nxdff",
+    ]
+    print_line(b"vcs", b"executing %r\n" % args)
+    p = subprocess.Popen(
+        args,
+        # Disable buffering because we want to receive output
+        # as it is generated so timestamps in logs are
+        # accurate.
+        bufsize=0,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        stdin=sys.stdin.fileno(),
+        cwd=destination_path,
+        env=os.environ,
+    )
+    stdout = io.TextIOWrapper(p.stdout, encoding="latin1")
+    ret = p.wait()
+    if ret:
+        sys.exit(ret)
+    data = stdout.read()
+    prefix = "Would remove "
+    filenames = [
+        os.path.join(destination_path, line[len(prefix) :])
+        for line in data.splitlines()
+    ]
+    print_line(b"vcs", b"removing %r\n" % filenames)
+    for filename in filenames:
+        remove(filename)
+    print_line(b"vcs", b"successfully cleaned git checkout!\n")
+
+
+def git_checkout(
+    destination_path: str,
+    head_repo: str,
+    base_repo: Optional[str],
+    base_ref: Optional[str],
+    base_rev: Optional[str],
+    ref: Optional[str],
+    commit: Optional[str],
+    ssh_key_file: Optional[Path],
+    ssh_known_hosts_file: Optional[Path],
+):
+    env = {
+        # abort if transfer speed is lower than 1kB/s for 1 minute
+        "GIT_HTTP_LOW_SPEED_LIMIT": "1024",
+        "GIT_HTTP_LOW_SPEED_TIME": "60",
+        "PYTHONUNBUFFERED": "1",
+    }
+
+    if ssh_key_file and ssh_known_hosts_file:
+        if not ssh_key_file.exists():
+            raise RuntimeError("Can't find specified ssh_key file.")
+        if not ssh_known_hosts_file.exists():
+            raise RuntimeError("Can't find specified known_hosts file.")
+        env["GIT_SSH_COMMAND"] = " ".join(
+            [
+                "ssh",
+                "-oIdentityFile={}".format(ssh_key_file.as_posix()),
+                "-oStrictHostKeyChecking=yes",
+                "-oUserKnownHostsFile={}".format(ssh_known_hosts_file.as_posix()),
+            ]
+        )
+    elif ssh_key_file or ssh_known_hosts_file:
+        raise RuntimeError(
+            "Must specify both ssh_key_file and ssh_known_hosts_file, if either are specified",
+        )
+
+    if not os.path.exists(destination_path):
+        # Repository doesn't already exist, needs to be cloned
+        args = [
+            "git",
+            "clone",
+            base_repo if base_repo else head_repo,
+            destination_path,
+        ]
+
+        retry_required_command(b"vcs", args, extra_env=env)
+
+    if base_ref:
+        args = ["git", "fetch", "origin", base_ref]
+
+        retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+        # Create local branch so that taskgraph is able to compute differences
+        # between the head branch and the base one, if needed
+        args = ["git", "checkout", base_ref]
+
+        retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+    # When commits are force-pushed (like on a testing branch), base_rev doesn't
+    # exist on base_ref. Fetching it allows taskgraph to compute differences
+    # between the previous state before the force-push and the current state.
+    #
+    # Unlike base_ref just above, there is no need to checkout the revision:
+    # it's immediately available after the fetch.
+    if base_rev and base_rev != NULL_REVISION:
+        args = ["git", "fetch", "origin", base_rev]
+
+        retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+    # If a ref was provided, it might be tag, so we need to make sure we fetch
+    # those. This is explicitly only done when base and head repo match,
+    # because it is the only scenario where tags could be present. (PRs, for
+    # example, always include an explicit rev.) Failure to do this could result
+    # in not having a tag, or worse: having an outdated version of one.
+    # `--force` is needed to be able to update an existing tag.
+    if ref and base_repo == head_repo:
+        args = [
+            "git",
+            "fetch",
+            "--tags",
+            "--force",
+            base_repo,
+            ref,
+        ]
+
+        retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+    # If a ref isn't provided, we fetch all refs from head_repo, which may be slow
+    args = [
+        "git",
+        "fetch",
+        "--no-tags",
+        head_repo,
+        ref if ref else "+refs/heads/*:refs/remotes/work/*",
+    ]
+
+    retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+    args = [
+        "git",
+        "checkout",
+        "-f",
+    ]
+
+    if ref:
+        args.extend(["-B", ref])
+
+    # `git fetch` set `FETCH_HEAD` reference to the last commit of the desired branch
+    args.append(commit if commit else "FETCH_HEAD")
+
+    run_required_command(b"vcs", args, cwd=destination_path)
+
+    if os.path.exists(os.path.join(destination_path, ".gitmodules")):
+        args = [
+            "git",
+            "submodule",
+            "init",
+        ]
+
+        run_required_command(b"vcs", args, cwd=destination_path)
+
+        args = [
+            "git",
+            "submodule",
+            "update",
+            "--force",  # Overrides any potential local changes
+        ]
+
+        run_required_command(b"vcs", args, cwd=destination_path)
+
+    _clean_git_checkout(destination_path)
+
+    args = ["git", "rev-parse", "--verify", "HEAD"]
+
+    commit_hash = subprocess.check_output(
+        args, cwd=destination_path, universal_newlines=True
+    ).strip()
+    assert re.match("^[a-f0-9]{40}$", commit_hash)
+
+    if head_repo.startswith("https://github.com"):
+        if head_repo.endswith("/"):
+            head_repo = head_repo[:-1]
+
+        tinderbox_link = "{}/commit/{}".format(head_repo, commit_hash)
+        repo_name = head_repo.split("/")[-1]
+    else:
+        tinderbox_link = head_repo
+        repo_name = head_repo
+
+    msg = (
+        "TinderboxPrint:<a href='{link}' "
+        "title='Built from {name} commit {commit_hash}'>"
+        "{commit_hash}</a>\n".format(
+            commit_hash=commit_hash, link=tinderbox_link, name=repo_name
+        )
+    )
+
+    print_line(b"vcs", msg.encode("utf-8"))
+
+    return commit_hash
+
+
+def fetch_ssh_secret(secret_name):
+    """Retrieves the private ssh key, and returns it as a StringIO object"""
+    secret_url = SECRET_BASEURL_TPL.format(secret_name)
+    try:
+        print_line(
+            b"vcs",
+            b"fetching secret %s from %s\n"
+            % (secret_name.encode("utf-8"), secret_url.encode("utf-8")),
+        )
+        res = urllib.request.urlopen(secret_url, timeout=10)
+        secret = res.read()
+        try:
+            secret = json.loads(secret.decode("utf-8"))
+        except ValueError:
+            print_line(b"vcs", b"invalid JSON in secret")
+            sys.exit(1)
+    except (urllib.error.URLError, socket.timeout):
+        print_line(b"vcs", b"Unable to retrieve ssh secret. aborting...")
+        sys.exit(1)
+
+    return secret["secret"]["ssh_privkey"]
+
+
+def hg_checkout(
+    destination_path: str,
+    head_repo: str,
+    base_repo: Optional[str],
+    store_path: str,
+    sparse_profile: Optional[str],
+    branch: Optional[str],
+    revision: Optional[str],
+):
+    if IS_MACOSX:
+        hg_bin = "/tools/python27-mercurial/bin/hg"
+    elif IS_POSIX:
+        hg_bin = "hg"
+    elif IS_WINDOWS:
+        # This is where OCC installs it in the AMIs.
+        hg_bin = r"C:\Program Files\Mercurial\hg.exe"
+        if not os.path.exists(hg_bin):
+            print("could not find Mercurial executable: %s" % hg_bin)
+            sys.exit(1)
+    else:
+        raise RuntimeError("Must be running on mac, posix or windows")
+
+    args = [
+        hg_bin,
+        "robustcheckout",
+        "--sharebase",
+        store_path,
+        "--purge",
+    ]
+
+    if base_repo:
+        args.extend(["--upstream", base_repo])
+    if sparse_profile:
+        args.extend(["--sparseprofile", sparse_profile])
+
+    # Specify method to checkout a revision. This defaults to revisions as
+    # SHA-1 strings, but also supports symbolic revisions like `tip` via the
+    # branch flag.
+    args.extend(
+        [
+            "--branch" if branch else "--revision",
+            branch or revision,
+            head_repo,
+            destination_path,
+        ]
+    )
+
+    run_required_command(b"vcs", args, extra_env={"PYTHONUNBUFFERED": "1"})
+
+    # Update the current revision hash and ensure that it is well formed.
+    revision = subprocess.check_output(
+        [hg_bin, "log", "--rev", ".", "--template", "{node}"],
+        cwd=destination_path,
+        # Triggers text mode on Python 3.
+        universal_newlines=True,
+    )
+
+    assert re.match("^[a-f0-9]{40}$", revision)
+
+    msg = (
+        "TinderboxPrint:<a href={head_repo}/rev/{revision} "
+        "title='Built from {repo_name} revision {revision}'>"
+        "{revision}</a>\n".format(
+            revision=revision, head_repo=head_repo, repo_name=head_repo.split("/")[-1]
+        )
+    )
+
+    print_line(b"vcs", msg.encode("utf-8"))
+
+    return revision
+
+
+def fetch_artifacts():
+    print_line(b"fetches", b"fetching artifacts\n")
+
+    fetch_content = shutil.which("fetch-content")
+
+    if not fetch_content or not os.path.isfile(fetch_content):
+        fetch_content = os.path.join(os.path.dirname(__file__), "fetch-content")
+
+    if not os.path.isfile(fetch_content):
+        print(FETCH_CONTENT_NOT_FOUND)
+        sys.exit(1)
+
+    cmd = [sys.executable, "-u", fetch_content, "task-artifacts"]
+    print_line(b"fetches", b"executing %r\n" % cmd)
+    subprocess.run(cmd, check=True, env=os.environ)
+    print_line(b"fetches", b"finished fetching artifacts\n")
+
+
+def add_vcs_arguments(parser, project, name):
+    """Adds arguments to ArgumentParser to control VCS options for a project."""
+
+    parser.add_argument(
+        "--%s-checkout" % project,
+        help="Directory where %s checkout should be created" % name,
+    )
+    parser.add_argument(
+        "--%s-sparse-profile" % project,
+        help="Path to sparse profile for %s checkout" % name,
+    )
+
+
+def collect_vcs_options(args, project, name):
+    checkout = getattr(args, "%s_checkout" % project)
+    sparse_profile = getattr(args, "%s_sparse_profile" % project)
+
+    env_prefix = project.upper()
+
+    repo_type = os.environ.get("%s_REPOSITORY_TYPE" % env_prefix)
+    base_repo = os.environ.get("%s_BASE_REPOSITORY" % env_prefix)
+    base_ref = os.environ.get("%s_BASE_REF" % env_prefix)
+    base_rev = os.environ.get("%s_BASE_REV" % env_prefix)
+    head_repo = os.environ.get("%s_HEAD_REPOSITORY" % env_prefix)
+    revision = os.environ.get("%s_HEAD_REV" % env_prefix)
+    ref = os.environ.get("%s_HEAD_REF" % env_prefix)
+    pip_requirements = os.environ.get("%s_PIP_REQUIREMENTS" % env_prefix)
+    private_key_secret = os.environ.get("%s_SSH_SECRET_NAME" % env_prefix)
+
+    store_path = os.environ.get("HG_STORE_PATH")
+
+    # Expand ~ in some paths.
+    if checkout:
+        checkout = os.path.abspath(os.path.expanduser(checkout))
+    if store_path:
+        store_path = os.path.abspath(os.path.expanduser(store_path))
+
+    if pip_requirements:
+        pip_requirements = os.path.join(checkout, pip_requirements)
+
+    # Some callers set the base repository to mozilla-central for historical
+    # reasons. Switch to mozilla-unified because robustcheckout works best
+    # with it.
+    if base_repo == "https://hg.mozilla.org/mozilla-central":
+        base_repo = "https://hg.mozilla.org/mozilla-unified"
+
+    return {
+        "store-path": store_path,
+        "project": project,
+        "name": name,
+        "env-prefix": env_prefix,
+        "checkout": checkout,
+        "sparse-profile": sparse_profile,
+        "base-repo": base_repo,
+        "base-ref": base_ref,
+        "base-rev": base_rev,
+        "head-repo": head_repo,
+        "revision": revision,
+        "ref": ref,
+        "repo-type": repo_type,
+        "ssh-secret-name": private_key_secret,
+        "pip-requirements": pip_requirements,
+    }
+
+
+def vcs_checkout_from_args(options):
+    if not options["checkout"]:
+        if options["ref"] and not options["revision"]:
+            print("task should be defined in terms of non-symbolic revision")
+            sys.exit(1)
+        return
+
+    revision = options["revision"]
+    ref = options["ref"]
+    ssh_key_file = None
+    ssh_known_hosts_file = None
+    ssh_dir = None
+
+    try:
+        if options.get("ssh-secret-name"):
+            ssh_dir = Path("~/.ssh-run-task").expanduser()
+            os.makedirs(ssh_dir, 0o700)
+            ssh_key_file = ssh_dir.joinpath("private_ssh_key")
+            ssh_key = fetch_ssh_secret(options["ssh-secret-name"])
+            # We don't use write_text here, to avoid \n -> \r\n on windows
+            ssh_key_file.write_bytes(ssh_key.encode("ascii"))
+            ssh_key_file.chmod(0o600)
+            # TODO: We should pull this from a secret, so it can be updated on old trees
+            ssh_known_hosts_file = ssh_dir.joinpath("known_hosts")
+            ssh_known_hosts_file.write_bytes(GITHUB_SSH_FINGERPRINT)
+
+        if options["repo-type"] == "git":
+            if not revision and not ref:
+                raise RuntimeError(
+                    "Git requires that either a ref, a revision, or both are provided"
+                )
+
+            if not ref:
+                print("Providing a ref will improve the performance of this checkout")
+
+            revision = git_checkout(
+                options["checkout"],
+                options["head-repo"],
+                options["base-repo"],
+                options["base-ref"],
+                options["base-rev"],
+                ref,
+                revision,
+                ssh_key_file,
+                ssh_known_hosts_file,
+            )
+        elif options["repo-type"] == "hg":
+            if not revision and not ref:
+                raise RuntimeError(
+                    "Hg requires that at least one of a ref or revision " "is provided"
+                )
+
+            revision = hg_checkout(
+                options["checkout"],
+                options["head-repo"],
+                options["base-repo"],
+                options["store-path"],
+                options["sparse-profile"],
+                ref,
+                revision,
+            )
+        else:
+            raise RuntimeError('Type of VCS must be either "git" or "hg"')
+    finally:
+        if ssh_dir:
+            shutil.rmtree(ssh_dir, ignore_errors=True)
+            pass
+
+    os.environ["%s_HEAD_REV" % options["env-prefix"]] = revision
+
+
+def install_pip_requirements(repositories):
+    """Install pip requirements files from specified repositories, if necessary."""
+    requirements = [
+        r["pip-requirements"] for r in repositories if r["pip-requirements"]
+    ]
+    if not requirements:
+        return
+
+    cmd = [sys.executable, "-mpip", "install"]
+    if os.environ.get("PIP_DISABLE_REQUIRE_HASHES") != "1":
+        cmd.append("--require-hashes")
+
+    for path in requirements:
+        cmd.extend(["-r", path])
+
+    run_required_command(b"pip-install", cmd)
+
+
+def maybe_run_resource_monitoring():
+    """Run the resource monitor if available.
+
+    Discussion in https://github.com/taskcluster/taskcluster-rfcs/pull/160
+    and https://bugzil.la/1648051
+
+    """
+    if "MOZ_FETCHES" not in os.environ:
+        return
+    if "RESOURCE_MONITOR_OUTPUT" not in os.environ:
+        return
+
+    prefix = b"resource_monitor"
+
+    executable = "{}/resource-monitor/resource-monitor{}".format(
+        os.environ.get("MOZ_FETCHES_DIR"), ".exe" if IS_WINDOWS else ""
+    )
+
+    if not os.path.exists(executable) or not os.access(executable, os.X_OK):
+        print_line(prefix, b"%s not executable\n" % executable.encode("utf-8"))
+        return
+    args = [
+        executable,
+        "-process",
+        str(os.getpid()),
+        "-output",
+        os.environ["RESOURCE_MONITOR_OUTPUT"],
+    ]
+    print_line(prefix, b"Resource monitor starting: %s\n" % str(args).encode("utf-8"))
+    # Avoid environment variables the payload doesn't need.
+    del os.environ["RESOURCE_MONITOR_OUTPUT"]
+
+    # Without CREATE_NEW_PROCESS_GROUP Windows signals will attempt to kill run-task, too.
+    process = subprocess.Popen(
+        args,
+        # Disable buffering because we want to receive output
+        # as it is generated so timestamps in logs are
+        # accurate.
+        bufsize=0,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if IS_WINDOWS else 0,
+        cwd=os.getcwd(),
+    )
+
+    def capture_output():
+        fh = io.TextIOWrapper(process.stdout, encoding="latin1")
+        while True:
+            data = fh.readline().encode("latin1")
+            if data == b"":
+                break
+            print_line(prefix, data)
+
+    monitor_process = Thread(target=capture_output)
+    monitor_process.start()
+    return process
+
+
+def _display_python_version():
+    print_line(
+        b"setup", b"Python version: %s\n" % platform.python_version().encode("utf-8")
+    )
+
+
+def main(args):
+    os.environ["TASK_WORKDIR"] = os.getcwd()
+    print_line(
+        b"setup",
+        b"run-task started in %s\n" % os.environ["TASK_WORKDIR"].encode("utf-8"),
+    )
+    print_line(
+        b"setup",
+        b"Invoked by command: %s\n" % " ".join(args).encode("utf-8"),
+    )
+    _display_python_version()
+    running_as_root = IS_POSIX and os.getuid() == 0
+
+    # Arguments up to '--' are ours. After are for the main task
+    # to be executed.
+    try:
+        i = args.index("--")
+        our_args = args[0:i]
+        task_args = args[i + 1 :]
+    except ValueError:
+        our_args = args
+        task_args = []
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--user", default="worker", help="user to run as")
+    parser.add_argument("--group", default="worker", help="group to run as")
+    parser.add_argument("--task-cwd", help="directory to run the provided command in")
+
+    repositories = os.environ.get("REPOSITORIES")
+    if repositories:
+        repositories = json.loads(repositories)
+    else:
+        repositories = {"vcs": "repository"}
+
+    for repository, name in repositories.items():
+        add_vcs_arguments(parser, repository, name)
+
+    parser.add_argument(
+        "--fetch-hgfingerprint", action="store_true", help=argparse.SUPPRESS
+    )
+
+    args = parser.parse_args(our_args)
+
+    repositories = [
+        collect_vcs_options(args, repository, name)
+        for (repository, name) in repositories.items()
+    ]
+    # Sort repositories so that parent checkout paths come before children
+    repositories.sort(key=lambda repo: Path(repo["checkout"] or "/").parts)
+
+    uid = gid = gids = user = group = None
+    if IS_POSIX and running_as_root:
+        user, group, gids = get_posix_user_group(args.user, args.group)
+        uid = user.pw_uid
+        gid = group.gr_gid
+
+    if running_as_root and os.path.exists("/dev/kvm"):
+        # Ensure kvm permissions for worker, required for Android x86
+        st = os.stat("/dev/kvm")
+        os.chmod("/dev/kvm", st.st_mode | 0o666)
+
+    # Validate caches.
+    #
+    # Taskgraph should pass in a list of paths that are caches via an
+    # environment variable (which we don't want to pass down to child
+    # processes).
+
+    if "TASKCLUSTER_CACHES" in os.environ:
+        caches = os.environ["TASKCLUSTER_CACHES"].split(";")
+        del os.environ["TASKCLUSTER_CACHES"]
+    else:
+        caches = []
+
+    if "TASKCLUSTER_UNTRUSTED_CACHES" in os.environ:
+        untrusted_caches = True
+        del os.environ["TASKCLUSTER_UNTRUSTED_CACHES"]
+    else:
+        untrusted_caches = False
+
+    for cache in caches:
+        if not os.path.isdir(cache):
+            print(
+                "error: cache %s is not a directory; this should never "
+                "happen" % cache
+            )
+            return 1
+
+        purge = configure_cache_posix(
+            cache, user, group, untrusted_caches, running_as_root
+        )
+
+        if purge:
+            return EXIT_PURGE_CACHE
+
+    if "TASKCLUSTER_VOLUMES" in os.environ:
+        volumes = os.environ["TASKCLUSTER_VOLUMES"].split(";")
+        del os.environ["TASKCLUSTER_VOLUMES"]
+    else:
+        volumes = []
+
+    if volumes and not IS_POSIX:
+        print("assertion failed: volumes not expected on Windows")
+        return 1
+
+    # Sanitize volumes.
+    for volume in volumes:
+        # If a volume is a cache, it was dealt with above.
+        if volume in caches:
+            print_line(b"volume", b"volume %s is a cache\n" % volume.encode("utf-8"))
+            continue
+
+        configure_volume_posix(volume, user, group, running_as_root)
+
+    all_caches_and_volumes = set(map(os.path.normpath, caches))
+    all_caches_and_volumes |= set(map(os.path.normpath, volumes))
+
+    def path_in_cache_or_volume(path):
+        path = os.path.normpath(path)
+
+        while path:
+            if path in all_caches_and_volumes:
+                return True
+
+            path, child = os.path.split(path)
+            if not child:
+                break
+
+        return False
+
+    def prepare_checkout_dir(checkout):
+        if not checkout:
+            return
+
+        # The checkout path becomes the working directory. Since there are
+        # special cache files in the cache's root directory and working
+        # directory purging could blow them away, disallow this scenario.
+        if os.path.exists(os.path.join(checkout, ".cacherequires")):
+            print("error: cannot perform vcs checkout into cache root: %s" % checkout)
+            sys.exit(1)
+
+        # TODO given the performance implications, consider making this a fatal
+        # error.
+        if not path_in_cache_or_volume(checkout):
+            print_line(
+                b"vcs",
+                b"WARNING: vcs checkout path (%s) not in cache "
+                b"or volume; performance will likely suffer\n"
+                % checkout.encode("utf-8"),
+            )
+
+        # Ensure the directory for the source checkout exists.
+        try:
+            os.makedirs(os.path.dirname(checkout))
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+
+        # And that it is owned by the appropriate user/group.
+        if running_as_root:
+            os.chown(os.path.dirname(checkout), uid, gid)
+
+    def prepare_hg_store_path():
+        # And ensure the shared store path exists and has proper permissions.
+        if "HG_STORE_PATH" not in os.environ:
+            print("error: HG_STORE_PATH environment variable not set")
+            sys.exit(1)
+
+        store_path = os.environ["HG_STORE_PATH"]
+
+        if not path_in_cache_or_volume(store_path):
+            print_line(
+                b"vcs",
+                b"WARNING: HG_STORE_PATH (%s) not in cache or "
+                b"volume; performance will likely suffer\n"
+                % store_path.encode("utf-8"),
+            )
+
+        try:
+            os.makedirs(store_path)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+
+        if running_as_root:
+            os.chown(store_path, uid, gid)
+
+    repository_paths = [
+        Path(repo["checkout"]) for repo in repositories if repo["checkout"]
+    ]
+    for repo in repositories:
+        if not repo["checkout"]:
+            continue
+        parents = Path(repo["checkout"]).parents
+        if any((path in repository_paths) for path in parents):
+            # Skip creating any checkouts that are inside other checokuts
+            continue
+        prepare_checkout_dir(repo["checkout"])
+
+    if any(repo["checkout"] and repo["repo-type"] == "hg" for repo in repositories):
+        prepare_hg_store_path()
+
+    if IS_POSIX and running_as_root:
+        # Drop permissions to requested user.
+        # This code is modeled after what `sudo` was observed to do in a Docker
+        # container. We do not bother calling setrlimit() because containers have
+        # their own limits.
+        print_line(
+            b"setup",
+            b"running as %s:%s\n"
+            % (args.user.encode("utf-8"), args.group.encode("utf-8")),
+        )
+
+        os.setgroups(gids)
+        os.umask(0o22)
+        os.setresgid(gid, gid, gid)
+        os.setresuid(uid, uid, uid)
+
+    for repo in repositories:
+        vcs_checkout_from_args(repo)
+
+    resource_process = None
+
+    try:
+        for k in ["MOZ_FETCHES_DIR", "UPLOAD_DIR"] + [
+            "{}_PATH".format(repository["project"].upper())
+            for repository in repositories
+        ]:
+            if k in os.environ:
+                os.environ[k] = os.path.abspath(os.environ[k])
+                print_line(
+                    b"setup",
+                    b"%s is %s\n" % (k.encode("utf-8"), os.environ[k].encode("utf-8")),
+                )
+
+        if "MOZ_FETCHES" in os.environ:
+            fetch_artifacts()
+
+        # Install Python requirements after fetches in case tasks want to use
+        # fetches to grab dependencies.
+        install_pip_requirements(repositories)
+
+        resource_process = maybe_run_resource_monitoring()
+
+        return run_command(b"task", task_args, cwd=args.task_cwd)
+    finally:
+        if resource_process:
+            print_line(b"resource_monitor", b"terminating\n")
+            if IS_WINDOWS:
+                # .terminate() on Windows is not a graceful shutdown, due to
+                # differences in signals. CTRL_BREAK_EVENT will work provided
+                # the subprocess is in a different process group, so this script
+                # isn't also killed.
+                os.kill(resource_process.pid, signal.CTRL_BREAK_EVENT)
+            else:
+                resource_process.terminate()
+            resource_process.wait()
+        fetches_dir = os.environ.get("MOZ_FETCHES_DIR")
+        if fetches_dir and os.path.isdir(fetches_dir):
+            print_line(b"fetches", b"removing %s\n" % fetches_dir.encode("utf-8"))
+            remove(fetches_dir)
+            print_line(b"fetches", b"finished\n")
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py
new file mode 100644
index 0000000000..1119a1c960
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py
@@ -0,0 +1,107 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from taskgraph.util.attributes import (
+    match_run_on_git_branches,
+    match_run_on_projects,
+    match_run_on_tasks_for,
+)
+
+_target_task_methods = {}
+
+_GIT_REFS_HEADS_PREFIX = "refs/heads/"
+
+
+def _target_task(name):
+    def wrap(func):
+        _target_task_methods[name] = func
+        return func
+
+    return wrap
+
+
+def get_method(method):
+    """Get a target_task_method to pass to a TaskGraphGenerator."""
+    return _target_task_methods[method]
+
+
+def filter_out_cron(task, parameters):
+    """
+    Filter out tasks that run via cron.
+    """
+    return not task.attributes.get("cron")
+
+
+def filter_for_project(task, parameters):
+    """Filter tasks by project.  Optionally enable nightlies."""
+    run_on_projects = set(task.attributes.get("run_on_projects", []))
+    return match_run_on_projects(parameters["project"], run_on_projects)
+
+
+def filter_for_tasks_for(task, parameters):
+    run_on_tasks_for = set(task.attributes.get("run_on_tasks_for", ["all"]))
+    return match_run_on_tasks_for(parameters["tasks_for"], run_on_tasks_for)
+
+
+def filter_for_git_branch(task, parameters):
+    """Filter tasks by git branch.
+    If `run_on_git_branch` is not defined, then task runs on all branches"""
+    # We cannot filter out on git branches if we not on a git repository
+    if parameters.get("repository_type") != "git":
+        return True
+
+    # Pull requests usually have arbitrary names, let's not filter git branches on them.
+    if parameters["tasks_for"] == "github-pull-request":
+        return True
+
+    run_on_git_branches = set(task.attributes.get("run_on_git_branches", ["all"]))
+    git_branch = parameters["head_ref"]
+    if git_branch.startswith(_GIT_REFS_HEADS_PREFIX):
+        git_branch = git_branch[len(_GIT_REFS_HEADS_PREFIX) :]
+
+    return match_run_on_git_branches(git_branch, run_on_git_branches)
+
+
+def filter_out_shipping_phase(task, parameters):
+    return task.attributes.get("shipping_phase") in (None, "build")
+
+
+def standard_filter(task, parameters):
+    return all(
+        filter_func(task, parameters)
+        for filter_func in (
+            filter_out_cron,
+            filter_out_shipping_phase,
+            filter_for_project,
+            filter_for_tasks_for,
+            filter_for_git_branch,
+        )
+    )
+
+
+@_target_task("default")
+def target_tasks_default(full_task_graph, parameters, graph_config):
+    """Target the tasks which have indicated they should be run on this project
+    via the `run_on_projects` attributes."""
+    return [
+        l for l, t in full_task_graph.tasks.items() if standard_filter(t, parameters)
+    ]
+
+
+@_target_task("codereview")
+def target_tasks_codereview(full_task_graph, parameters, graph_config):
+    """Target the tasks which have indicated they should be run on this project
+    via the `run_on_projects` attributes."""
+    return [
+        l
+        for l, t in full_task_graph.tasks.items()
+        if standard_filter(t, parameters) and t.attributes.get("code-review")
+    ]
+
+
+@_target_task("nothing")
+def target_tasks_nothing(full_task_graph, parameters, graph_config):
+    """Select nothing, for DONTBUILD pushes"""
+    return []
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/task.py b/third_party/python/taskcluster_taskgraph/taskgraph/task.py
new file mode 100644
index 0000000000..45427ac4f7
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/task.py
@@ -0,0 +1,84 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Union
+
+
+@dataclass
+class Task:
+    """
+    Representation of a task in a TaskGraph.  Each Task has, at creation:
+
+    - kind: the name of the task kind
+    - label; the label for this task
+    - attributes: a dictionary of attributes for this task (used for filtering)
+    - task: the task definition (JSON-able dictionary)
+    - optimization: optimization to apply to the task (see taskgraph.optimize)
+    - dependencies: tasks this one depends on, in the form {name: label}, for example
+      {'build': 'build-linux64/opt', 'docker-image': 'build-docker-image-desktop-test'}
+    - soft_dependencies: tasks this one may depend on if they are available post
+      optimisation. They are set as a list of tasks label.
+    - if_dependencies: only run this task if at least one of these dependencies
+      are present.
+
+    And later, as the task-graph processing proceeds:
+
+    - task_id -- TaskCluster taskId under which this task will be created
+
+    This class is just a convenience wrapper for the data type and managing
+    display, comparison, serialization, etc. It has no functionality of its own.
+    """
+
+    kind: str
+    label: str
+    attributes: Dict
+    task: Dict
+    description: str = ""
+    task_id: Union[str, None] = field(default=None, init=False)
+    optimization: Union[Dict[str, Any], None] = field(default=None)
+    dependencies: Dict = field(default_factory=dict)
+    soft_dependencies: List = field(default_factory=list)
+    if_dependencies: List = field(default_factory=list)
+
+    def __post_init__(self):
+        self.attributes["kind"] = self.kind
+
+    def to_json(self):
+        rv = {
+            "kind": self.kind,
+            "label": self.label,
+            "description": self.description,
+            "attributes": self.attributes,
+            "dependencies": self.dependencies,
+            "soft_dependencies": self.soft_dependencies,
+            "if_dependencies": self.if_dependencies,
+            "optimization": self.optimization,
+            "task": self.task,
+        }
+        if self.task_id:
+            rv["task_id"] = self.task_id
+        return rv
+
+    @classmethod
+    def from_json(cls, task_dict):
+        """
+        Given a data structure as produced by taskgraph.to_json, re-construct
+        the original Task object.  This is used to "resume" the task-graph
+        generation process, for example in Action tasks.
+        """
+        rv = cls(
+            kind=task_dict["kind"],
+            label=task_dict["label"],
+            description=task_dict.get("description", ""),
+            attributes=task_dict["attributes"],
+            task=task_dict["task"],
+            optimization=task_dict["optimization"],
+            dependencies=task_dict.get("dependencies"),
+            soft_dependencies=task_dict.get("soft_dependencies"),
+            if_dependencies=task_dict.get("if_dependencies"),
+        )
+        if "task_id" in task_dict:
+            rv.task_id = task_dict["task_id"]
+        return rv
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/taskgraph.py b/third_party/python/taskcluster_taskgraph/taskgraph/taskgraph.py
new file mode 100644
index 0000000000..e479a7cf15
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/taskgraph.py
@@ -0,0 +1,72 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from dataclasses import dataclass
+from typing import List
+
+from .graph import Graph
+from .task import Task
+
+
+@dataclass(frozen=True)
+class TaskGraph:
+    """
+    Representation of a task graph.
+
+    A task graph is a combination of a Graph and a dictionary of tasks indexed
+    by label. TaskGraph instances should be treated as immutable.
+
+    In the graph, tasks are said to "link to" their dependencies. Whereas
+    tasks are "linked from" their dependents.
+    """
+
+    tasks: List[Task]
+    graph: Graph
+
+    def __post_init__(self):
+        assert set(self.tasks) == self.graph.nodes
+
+    def for_each_task(self, f, *args, **kwargs):
+        for task_label in self.graph.visit_postorder():
+            task = self.tasks[task_label]
+            f(task, self, *args, **kwargs)
+
+    def __getitem__(self, label):
+        "Get a task by label"
+        return self.tasks[label]
+
+    def __contains__(self, label):
+        return label in self.tasks
+
+    def __iter__(self):
+        "Iterate over tasks in undefined order"
+        return iter(self.tasks.values())
+
+    def to_json(self):
+        "Return a JSON-able object representing the task graph, as documented"
+        named_links_dict = self.graph.named_links_dict()
+        # this dictionary may be keyed by label or by taskid, so let's just call it 'key'
+        tasks = {}
+        for key in self.graph.visit_postorder():
+            tasks[key] = self.tasks[key].to_json()
+            # overwrite dependencies with the information in the taskgraph's edges.
+            tasks[key]["dependencies"] = named_links_dict.get(key, {})
+        return tasks
+
+    @classmethod
+    def from_json(cls, tasks_dict):
+        """
+        This code is used to generate the a TaskGraph using a dictionary
+        which is representative of the TaskGraph.
+        """
+        tasks = {}
+        edges = set()
+        for key, value in tasks_dict.items():
+            tasks[key] = Task.from_json(value)
+            if "task_id" in value:
+                tasks[key].task_id = value["task_id"]
+            for depname, dep in value["dependencies"].items():
+                edges.add((key, dep, depname))
+        task_graph = cls(tasks, Graph(set(tasks), edges))
+        return tasks, task_graph
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py
new file mode 100644
index 0000000000..4fa7b5fc0c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py
@@ -0,0 +1,3 @@
+from taskgraph.transforms import (  # noqa: Added for backwards compat
+    notify as release_notifications,
+)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py
new file mode 100644
index 0000000000..e6fcd2400c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py
@@ -0,0 +1,158 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+from dataclasses import dataclass, field
+from typing import Dict, List, Union
+
+from taskgraph.task import Task
+
+from ..config import GraphConfig
+from ..parameters import Parameters
+from ..util.memoize import memoize
+from ..util.schema import Schema, validate_schema
+
+
+@dataclass(frozen=True)
+class RepoConfig:
+    prefix: str
+    name: str
+    base_repository: str
+    head_repository: str
+    head_ref: str
+    type: str
+    path: str = ""
+    head_rev: Union[str, None] = None
+    ssh_secret_name: Union[str, None] = None
+
+
+@dataclass(frozen=True, eq=False)
+class TransformConfig:
+    """
+    A container for configuration affecting transforms.  The `config` argument
+    to transforms is an instance of this class.
+    """
+
+    # the name of the current kind
+    kind: str
+
+    # the path to the kind configuration directory
+    path: str
+
+    # the parsed contents of kind.yml
+    config: Dict
+
+    # the parameters for this task-graph generation run
+    params: Parameters
+
+    # a dict of all the tasks associated with the kind dependencies of the
+    # current kind
+    kind_dependencies_tasks: Dict[str, Task]
+
+    # Global configuration of the taskgraph
+    graph_config: GraphConfig
+
+    # whether to write out artifacts for the decision task
+    write_artifacts: bool
+
+    @property
+    @memoize
+    def repo_configs(self):
+        repositories = self.graph_config["taskgraph"]["repositories"]
+        if len(repositories) == 1:
+            current_prefix = list(repositories.keys())[0]
+        else:
+            project = self.params["project"]
+            matching_repos = {
+                repo_prefix: repo
+                for (repo_prefix, repo) in repositories.items()
+                if re.match(repo["project-regex"], project)
+            }
+            if len(matching_repos) != 1:
+                raise Exception(
+                    f"Couldn't find repository matching project `{project}`"
+                )
+            current_prefix = list(matching_repos.keys())[0]
+
+        repo_configs = {
+            current_prefix: RepoConfig(
+                prefix=current_prefix,
+                name=repositories[current_prefix]["name"],
+                base_repository=self.params["base_repository"],
+                head_repository=self.params["head_repository"],
+                head_ref=self.params["head_ref"],
+                head_rev=self.params["head_rev"],
+                type=self.params["repository_type"],
+                ssh_secret_name=repositories[current_prefix].get("ssh-secret-name"),
+            ),
+        }
+        if len(repositories) != 1:
+            repo_configs.update(
+                {
+                    repo_prefix: RepoConfig(
+                        prefix=repo_prefix,
+                        name=repo["name"],
+                        base_repository=repo["default-repository"],
+                        head_repository=repo["default-repository"],
+                        head_ref=repo["default-ref"],
+                        type=repo["type"],
+                        ssh_secret_name=repo.get("ssh-secret-name"),
+                    )
+                    for (repo_prefix, repo) in repositories.items()
+                    if repo_prefix != current_prefix
+                }
+            )
+        return repo_configs
+
+
+@dataclass()
+class TransformSequence:
+    """
+    Container for a sequence of transforms.  Each transform is represented as a
+    callable taking (config, items) and returning a generator which will yield
+    transformed items.  The resulting sequence has the same interface.
+
+    This is convenient to use in a file full of transforms, as it provides a
+    decorator, @transforms.add, that will add the decorated function to the
+    sequence.
+    """
+
+    _transforms: List = field(default_factory=list)
+
+    def __call__(self, config, items):
+        for xform in self._transforms:
+            items = xform(config, items)
+            if items is None:
+                raise Exception(f"Transform {xform} is not a generator")
+        return items
+
+    def add(self, func):
+        self._transforms.append(func)
+        return func
+
+    def add_validate(self, schema):
+        self.add(ValidateSchema(schema))
+
+
+@dataclass
+class ValidateSchema:
+    schema: Schema
+
+    def __call__(self, config, tasks):
+        for task in tasks:
+            if "name" in task:
+                error = "In {kind} kind task {name!r}:".format(
+                    kind=config.kind, name=task["name"]
+                )
+            elif "label" in task:
+                error = "In job {label!r}:".format(label=task["label"])
+            elif "primary-dependency" in task:
+                error = "In {kind} kind task for {dependency!r}:".format(
+                    kind=config.kind, dependency=task["primary-dependency"].label
+                )
+            else:
+                error = "In unknown task:"
+            validate_schema(self.schema, task, error)
+            yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py
new file mode 100644
index 0000000000..57a55dffb3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py
@@ -0,0 +1,90 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from collections import deque
+
+import taskgraph
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.cached_tasks import add_optimization
+
+transforms = TransformSequence()
+
+
+def order_tasks(config, tasks):
+    """Iterate image tasks in an order where parent tasks come first."""
+    if config.kind == "docker-image":
+        kind_prefix = "build-docker-image-"
+    else:
+        kind_prefix = config.kind + "-"
+
+    pending = deque(tasks)
+    task_labels = {task["label"] for task in pending}
+    emitted = set()
+    while True:
+        try:
+            task = pending.popleft()
+        except IndexError:
+            break
+        parents = {
+            task
+            for task in task.get("dependencies", {}).values()
+            if task.startswith(kind_prefix)
+        }
+        if parents and not emitted.issuperset(parents & task_labels):
+            pending.append(task)
+            continue
+        emitted.add(task["label"])
+        yield task
+
+
+def format_task_digest(cached_task):
+    return "/".join(
+        [
+            cached_task["type"],
+            cached_task["name"],
+            cached_task["digest"],
+        ]
+    )
+
+
+@transforms.add
+def cache_task(config, tasks):
+    if taskgraph.fast:
+        for task in tasks:
+            yield task
+        return
+
+    digests = {}
+    for task in config.kind_dependencies_tasks.values():
+        if "cached_task" in task.attributes:
+            digests[task.label] = format_task_digest(task.attributes["cached_task"])
+
+    for task in order_tasks(config, tasks):
+        cache = task.pop("cache", None)
+        if cache is None:
+            yield task
+            continue
+
+        dependency_digests = []
+        for p in task.get("dependencies", {}).values():
+            if p in digests:
+                dependency_digests.append(digests[p])
+            else:
+                raise Exception(
+                    "Cached task {} has uncached parent task: {}".format(
+                        task["label"], p
+                    )
+                )
+        digest_data = cache["digest-data"] + sorted(dependency_digests)
+        add_optimization(
+            config,
+            task,
+            cache_type=cache["type"],
+            cache_name=cache["name"],
+            digest_data=digest_data,
+        )
+        digests[task["label"]] = format_task_digest(task["attributes"]["cached_task"])
+
+        yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/chunking.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/chunking.py
new file mode 100644
index 0000000000..31d7eff82c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/chunking.py
@@ -0,0 +1,82 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import copy
+from textwrap import dedent
+
+from voluptuous import ALLOW_EXTRA, Optional, Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.schema import Schema
+from taskgraph.util.templates import substitute
+
+CHUNK_SCHEMA = Schema(
+    {
+        # Optional, so it can be used for a subset of tasks in a kind
+        Optional(
+            "chunk",
+            description=dedent(
+                """
+            `chunk` can be used to split one task into `total-chunks`
+            tasks, substituting `this_chunk` and `total_chunks` into any
+            fields in `substitution-fields`.
+            """.lstrip()
+            ),
+        ): {
+            Required(
+                "total-chunks",
+                description=dedent(
+                    """
+                The total number of chunks to split the task into.
+                """.lstrip()
+                ),
+            ): int,
+            Optional(
+                "substitution-fields",
+                description=dedent(
+                    """
+                A list of fields that need to have `{this_chunk}` and/or
+                `{total_chunks}` replaced in them.
+                """.lstrip()
+                ),
+            ): [str],
+        }
+    },
+    extra=ALLOW_EXTRA,
+)
+
+transforms = TransformSequence()
+transforms.add_validate(CHUNK_SCHEMA)
+
+
+@transforms.add
+def chunk_tasks(config, tasks):
+    for task in tasks:
+        chunk_config = task.pop("chunk", None)
+        if not chunk_config:
+            yield task
+            continue
+
+        total_chunks = chunk_config["total-chunks"]
+
+        for this_chunk in range(1, total_chunks + 1):
+            subtask = copy.deepcopy(task)
+
+            subs = {
+                "this_chunk": this_chunk,
+                "total_chunks": total_chunks,
+            }
+            subtask.setdefault("attributes", {})
+            subtask["attributes"].update(subs)
+
+            for field in chunk_config["substitution-fields"]:
+                container, subfield = subtask, field
+                while "." in subfield:
+                    f, subfield = subfield.split(".", 1)
+                    container = container[f]
+
+                subcontainer = copy.deepcopy(container[subfield])
+                subfield = substitute(subfield, **subs)
+                container[subfield] = substitute(subcontainer, **subs)
+
+            yield subtask
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py
new file mode 100644
index 0000000000..bdb655b97d
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py
@@ -0,0 +1,23 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Add soft dependencies and configuration to code-review tasks.
+"""
+
+
+from taskgraph.transforms.base import TransformSequence
+
+transforms = TransformSequence()
+
+
+@transforms.add
+def add_dependencies(config, jobs):
+    for job in jobs:
+        job.setdefault("soft-dependencies", [])
+        job["soft-dependencies"] += [
+            dep_task.label
+            for dep_task in config.kind_dependencies_tasks.values()
+            if dep_task.attributes.get("code-review") is True
+        ]
+        yield job
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py
new file mode 100644
index 0000000000..d0c5b9c97b
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py
@@ -0,0 +1,214 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+import logging
+import os
+import re
+
+from voluptuous import Optional, Required
+
+import taskgraph
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.docker import create_context_tar, generate_context_hash
+from taskgraph.util.schema import Schema
+
+from .task import task_description_schema
+
+logger = logging.getLogger(__name__)
+
+CONTEXTS_DIR = "docker-contexts"
+
+DIGEST_RE = re.compile("^[0-9a-f]{64}$")
+
+IMAGE_BUILDER_IMAGE = (
+    "mozillareleases/image_builder:5.0.0"
+    "@sha256:"
+    "e510a9a9b80385f71c112d61b2f2053da625aff2b6d430411ac42e424c58953f"
+)
+
+transforms = TransformSequence()
+
+docker_image_schema = Schema(
+    {
+        # Name of the docker image.
+        Required("name"): str,
+        # Name of the parent docker image.
+        Optional("parent"): str,
+        # Treeherder symbol.
+        Optional("symbol"): str,
+        # relative path (from config.path) to the file the docker image was defined
+        # in.
+        Optional("task-from"): str,
+        # Arguments to use for the Dockerfile.
+        Optional("args"): {str: str},
+        # Name of the docker image definition under taskcluster/docker, when
+        # different from the docker image name.
+        Optional("definition"): str,
+        # List of package tasks this docker image depends on.
+        Optional("packages"): [str],
+        Optional(
+            "index",
+            description="information for indexing this build so its artifacts can be discovered",
+        ): task_description_schema["index"],
+        Optional(
+            "cache",
+            description="Whether this image should be cached based on inputs.",
+        ): bool,
+    }
+)
+
+
+transforms.add_validate(docker_image_schema)
+
+
+@transforms.add
+def fill_template(config, tasks):
+    available_packages = set()
+    for task in config.kind_dependencies_tasks.values():
+        if task.kind != "packages":
+            continue
+        name = task.label.replace("packages-", "")
+        available_packages.add(name)
+
+    context_hashes = {}
+
+    tasks = list(tasks)
+
+    if not taskgraph.fast and config.write_artifacts:
+        if not os.path.isdir(CONTEXTS_DIR):
+            os.makedirs(CONTEXTS_DIR)
+
+    for task in tasks:
+        image_name = task.pop("name")
+        job_symbol = task.pop("symbol", None)
+        args = task.pop("args", {})
+        definition = task.pop("definition", image_name)
+        packages = task.pop("packages", [])
+        parent = task.pop("parent", None)
+
+        for p in packages:
+            if p not in available_packages:
+                raise Exception(
+                    "Missing package job for {}-{}: {}".format(
+                        config.kind, image_name, p
+                    )
+                )
+
+        if not taskgraph.fast:
+            context_path = os.path.join("taskcluster", "docker", definition)
+            topsrcdir = os.path.dirname(config.graph_config.taskcluster_yml)
+            if config.write_artifacts:
+                context_file = os.path.join(CONTEXTS_DIR, f"{image_name}.tar.gz")
+                logger.info(f"Writing {context_file} for docker image {image_name}")
+                context_hash = create_context_tar(
+                    topsrcdir,
+                    context_path,
+                    context_file,
+                    args,
+                )
+            else:
+                context_hash = generate_context_hash(topsrcdir, context_path, args)
+        else:
+            if config.write_artifacts:
+                raise Exception("Can't write artifacts if `taskgraph.fast` is set.")
+            context_hash = "0" * 40
+        digest_data = [context_hash]
+        digest_data += [json.dumps(args, sort_keys=True)]
+        context_hashes[image_name] = context_hash
+
+        description = "Build the docker image {} for use by dependent tasks".format(
+            image_name
+        )
+
+        args["DOCKER_IMAGE_PACKAGES"] = " ".join(f"<{p}>" for p in packages)
+
+        # Adjust the zstandard compression level based on the execution level.
+        # We use faster compression for level 1 because we care more about
+        # end-to-end times. We use slower/better compression for other levels
+        # because images are read more often and it is worth the trade-off to
+        # burn more CPU once to reduce image size.
+        zstd_level = "3" if int(config.params["level"]) == 1 else "10"
+
+        # include some information that is useful in reconstructing this task
+        # from JSON
+        taskdesc = {
+            "label": "build-docker-image-" + image_name,
+            "description": description,
+            "attributes": {
+                "image_name": image_name,
+                "artifact_prefix": "public",
+            },
+            "always-target": True,
+            "expires-after": "28 days" if config.params.is_try() else "1 year",
+            "scopes": [],
+            "run-on-projects": [],
+            "worker-type": "images",
+            "worker": {
+                "implementation": "docker-worker",
+                "os": "linux",
+                "artifacts": [
+                    {
+                        "type": "file",
+                        "path": "/workspace/image.tar.zst",
+                        "name": "public/image.tar.zst",
+                    }
+                ],
+                "env": {
+                    "CONTEXT_TASK_ID": {"task-reference": "<decision>"},
+                    "CONTEXT_PATH": "public/docker-contexts/{}.tar.gz".format(
+                        image_name
+                    ),
+                    "HASH": context_hash,
+                    "PROJECT": config.params["project"],
+                    "IMAGE_NAME": image_name,
+                    "DOCKER_IMAGE_ZSTD_LEVEL": zstd_level,
+                    "DOCKER_BUILD_ARGS": {
+                        "task-reference": json.dumps(args),
+                    },
+                    "VCS_BASE_REPOSITORY": config.params["base_repository"],
+                    "VCS_HEAD_REPOSITORY": config.params["head_repository"],
+                    "VCS_HEAD_REV": config.params["head_rev"],
+                    "VCS_REPOSITORY_TYPE": config.params["repository_type"],
+                },
+                "chain-of-trust": True,
+                "max-run-time": 7200,
+            },
+        }
+        if "index" in task:
+            taskdesc["index"] = task["index"]
+        if job_symbol:
+            taskdesc["treeherder"] = {
+                "symbol": job_symbol,
+                "platform": "taskcluster-images/opt",
+                "kind": "other",
+                "tier": 1,
+            }
+
+        worker = taskdesc["worker"]
+
+        worker["docker-image"] = IMAGE_BUILDER_IMAGE
+        digest_data.append(f"image-builder-image:{IMAGE_BUILDER_IMAGE}")
+
+        if packages:
+            deps = taskdesc.setdefault("dependencies", {})
+            for p in sorted(packages):
+                deps[p] = f"packages-{p}"
+
+        if parent:
+            deps = taskdesc.setdefault("dependencies", {})
+            deps["parent"] = f"build-docker-image-{parent}"
+            worker["env"]["PARENT_TASK_ID"] = {
+                "task-reference": "<parent>",
+            }
+
+        if task.get("cache", True) and not taskgraph.fast:
+            taskdesc["cache"] = {
+                "type": "docker-images.v2",
+                "name": image_name,
+                "digest-data": digest_data,
+            }
+
+        yield taskdesc
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py
new file mode 100644
index 0000000000..bcb8ff38a6
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py
@@ -0,0 +1,336 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Support for running tasks that download remote content and re-export
+# it as task artifacts.
+
+
+import os
+import re
+from dataclasses import dataclass
+from typing import Callable
+
+from voluptuous import Extra, Optional, Required
+
+import taskgraph
+
+from ..util import path
+from ..util.cached_tasks import add_optimization
+from ..util.schema import Schema, validate_schema
+from ..util.treeherder import join_symbol
+from .base import TransformSequence
+
+CACHE_TYPE = "content.v1"
+
+FETCH_SCHEMA = Schema(
+    {
+        # Name of the task.
+        Required("name"): str,
+        # Relative path (from config.path) to the file the task was defined
+        # in.
+        Optional("task-from"): str,
+        # Description of the task.
+        Required("description"): str,
+        Optional("docker-image"): object,
+        Optional(
+            "fetch-alias",
+            description="An alias that can be used instead of the real fetch job name in "
+            "fetch stanzas for jobs.",
+        ): str,
+        Optional(
+            "artifact-prefix",
+            description="The prefix of the taskcluster artifact being uploaded. "
+            "Defaults to `public/`; if it starts with something other than "
+            "`public/` the artifact will require scopes to access.",
+        ): str,
+        Optional("attributes"): {str: object},
+        Required("fetch"): {
+            Required("type"): str,
+            Extra: object,
+        },
+    }
+)
+
+
+# define a collection of payload builders, depending on the worker implementation
+fetch_builders = {}
+
+
+@dataclass(frozen=True)
+class FetchBuilder:
+    schema: Schema
+    builder: Callable
+
+
+def fetch_builder(name, schema):
+    schema = Schema({Required("type"): name}).extend(schema)
+
+    def wrap(func):
+        fetch_builders[name] = FetchBuilder(schema, func)
+        return func
+
+    return wrap
+
+
+transforms = TransformSequence()
+transforms.add_validate(FETCH_SCHEMA)
+
+
+@transforms.add
+def process_fetch_job(config, jobs):
+    # Converts fetch-url entries to the job schema.
+    for job in jobs:
+        typ = job["fetch"]["type"]
+        name = job["name"]
+        fetch = job.pop("fetch")
+
+        if typ not in fetch_builders:
+            raise Exception(f"Unknown fetch type {typ} in fetch {name}")
+        validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:")
+
+        job.update(configure_fetch(config, typ, name, fetch))
+
+        yield job
+
+
+def configure_fetch(config, typ, name, fetch):
+    if typ not in fetch_builders:
+        raise Exception(f"No fetch type {typ} in fetch {name}")
+    validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:")
+
+    return fetch_builders[typ].builder(config, name, fetch)
+
+
+@transforms.add
+def make_task(config, jobs):
+    # Fetch tasks are idempotent and immutable. Have them live for
+    # essentially forever.
+    if config.params["level"] == "3":
+        expires = "1000 years"
+    else:
+        expires = "28 days"
+
+    for job in jobs:
+        name = job["name"]
+        artifact_prefix = job.get("artifact-prefix", "public")
+        env = job.get("env", {})
+        env.update({"UPLOAD_DIR": "/builds/worker/artifacts"})
+        attributes = job.get("attributes", {})
+        attributes["fetch-artifact"] = path.join(artifact_prefix, job["artifact_name"])
+        alias = job.get("fetch-alias")
+        if alias:
+            attributes["fetch-alias"] = alias
+
+        task = {
+            "attributes": attributes,
+            "name": name,
+            "description": job["description"],
+            "expires-after": expires,
+            "label": "fetch-%s" % name,
+            "run-on-projects": [],
+            "run": {
+                "using": "run-task",
+                "checkout": False,
+                "command": job["command"],
+            },
+            "worker-type": "images",
+            "worker": {
+                "chain-of-trust": True,
+                "docker-image": job.get("docker-image", {"in-tree": "fetch"}),
+                "env": env,
+                "max-run-time": 900,
+                "artifacts": [
+                    {
+                        "type": "directory",
+                        "name": artifact_prefix,
+                        "path": "/builds/worker/artifacts",
+                    }
+                ],
+            },
+        }
+
+        if "treeherder" in config.graph_config:
+            task["treeherder"] = {
+                "symbol": join_symbol("Fetch", name),
+                "kind": "build",
+                "platform": "fetch/opt",
+                "tier": 1,
+            }
+
+        if job.get("secret", None):
+            task["scopes"] = ["secrets:get:" + job.get("secret")]
+            task["worker"]["taskcluster-proxy"] = True
+
+        if not taskgraph.fast:
+            cache_name = task["label"].replace(f"{config.kind}-", "", 1)
+
+            # This adds the level to the index path automatically.
+            add_optimization(
+                config,
+                task,
+                cache_type=CACHE_TYPE,
+                cache_name=cache_name,
+                digest_data=job["digest_data"],
+            )
+        yield task
+
+
+@fetch_builder(
+    "static-url",
+    schema={
+        # The URL to download.
+        Required("url"): str,
+        # The SHA-256 of the downloaded content.
+        Required("sha256"): str,
+        # Size of the downloaded entity, in bytes.
+        Required("size"): int,
+        # GPG signature verification.
+        Optional("gpg-signature"): {
+            # URL where GPG signature document can be obtained. Can contain the
+            # value ``{url}``, which will be substituted with the value from
+            # ``url``.
+            Required("sig-url"): str,
+            # Path to file containing GPG public key(s) used to validate
+            # download.
+            Required("key-path"): str,
+        },
+        # The name to give to the generated artifact. Defaults to the file
+        # portion of the URL. Using a different extension converts the
+        # archive to the given type. Only conversion to .tar.zst is
+        # supported.
+        Optional("artifact-name"): str,
+        # Strip the given number of path components at the beginning of
+        # each file entry in the archive.
+        # Requires an artifact-name ending with .tar.zst.
+        Optional("strip-components"): int,
+        # Add the given prefix to each file entry in the archive.
+        # Requires an artifact-name ending with .tar.zst.
+        Optional("add-prefix"): str,
+        # Headers to pass alongside the request.
+        Optional("headers"): {
+            str: str,
+        },
+        # IMPORTANT: when adding anything that changes the behavior of the task,
+        # it is important to update the digest data used to compute cache hits.
+    },
+)
+def create_fetch_url_task(config, name, fetch):
+    artifact_name = fetch.get("artifact-name")
+    if not artifact_name:
+        artifact_name = fetch["url"].split("/")[-1]
+
+    command = [
+        "fetch-content",
+        "static-url",
+    ]
+
+    # Arguments that matter to the cache digest
+    args = [
+        "--sha256",
+        fetch["sha256"],
+        "--size",
+        "%d" % fetch["size"],
+    ]
+
+    if fetch.get("strip-components"):
+        args.extend(["--strip-components", "%d" % fetch["strip-components"]])
+
+    if fetch.get("add-prefix"):
+        args.extend(["--add-prefix", fetch["add-prefix"]])
+
+    command.extend(args)
+
+    env = {}
+
+    if "gpg-signature" in fetch:
+        sig_url = fetch["gpg-signature"]["sig-url"].format(url=fetch["url"])
+        key_path = os.path.join(taskgraph.GECKO, fetch["gpg-signature"]["key-path"])
+
+        with open(key_path) as fh:
+            gpg_key = fh.read()
+
+        env["FETCH_GPG_KEY"] = gpg_key
+        command.extend(
+            [
+                "--gpg-sig-url",
+                sig_url,
+                "--gpg-key-env",
+                "FETCH_GPG_KEY",
+            ]
+        )
+
+    if "headers" in fetch:
+        for k, v in fetch["headers"].items():
+            command.extend(["-H", f"{k}:{v}"])
+
+    command.extend(
+        [
+            fetch["url"],
+            "/builds/worker/artifacts/%s" % artifact_name,
+        ]
+    )
+
+    return {
+        "command": command,
+        "artifact_name": artifact_name,
+        "env": env,
+        # We don't include the GPG signature in the digest because it isn't
+        # materially important for caching: GPG signatures are supplemental
+        # trust checking beyond what the shasum already provides.
+        "digest_data": args + [artifact_name],
+    }
+
+
+@fetch_builder(
+    "git",
+    schema={
+        Required("repo"): str,
+        Required("revision"): str,
+        Optional("include-dot-git"): bool,
+        Optional("artifact-name"): str,
+        Optional("path-prefix"): str,
+        # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key)
+        # In the secret dictionary, the key should be specified as
+        #  "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..."
+        # n.b. The OpenSSH private key file format requires a newline at the end of the file.
+        Optional("ssh-key"): str,
+    },
+)
+def create_git_fetch_task(config, name, fetch):
+    path_prefix = fetch.get("path-prefix")
+    if not path_prefix:
+        path_prefix = fetch["repo"].rstrip("/").rsplit("/", 1)[-1]
+    artifact_name = fetch.get("artifact-name")
+    if not artifact_name:
+        artifact_name = f"{path_prefix}.tar.zst"
+
+    if not re.match(r"[0-9a-fA-F]{40}", fetch["revision"]):
+        raise Exception(f'Revision is not a sha1 in fetch task "{name}"')
+
+    args = [
+        "fetch-content",
+        "git-checkout-archive",
+        "--path-prefix",
+        path_prefix,
+        fetch["repo"],
+        fetch["revision"],
+        "/builds/worker/artifacts/%s" % artifact_name,
+    ]
+
+    ssh_key = fetch.get("ssh-key")
+    if ssh_key:
+        args.append("--ssh-key-secret")
+        args.append(ssh_key)
+
+    digest_data = [fetch["revision"], path_prefix, artifact_name]
+    if fetch.get("include-dot-git", False):
+        args.append("--include-dot-git")
+        digest_data.append(".git")
+
+    return {
+        "command": args,
+        "artifact_name": artifact_name,
+        "digest_data": digest_data,
+        "secret": ssh_key,
+    }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/from_deps.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/from_deps.py
new file mode 100644
index 0000000000..337d68e4ba
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/from_deps.py
@@ -0,0 +1,242 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Transforms used to create tasks based on the kind dependencies, filtering on
+common attributes like the ``build-type``.
+
+These transforms are useful when follow-up tasks are needed for some
+indeterminate subset of existing tasks. For example, running a signing task
+after each build task, whatever builds may exist.
+"""
+from copy import deepcopy
+from textwrap import dedent
+
+from voluptuous import Any, Extra, Optional, Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.transforms.job import fetches_schema
+from taskgraph.util.attributes import attrmatch
+from taskgraph.util.dependencies import GROUP_BY_MAP, get_dependencies
+from taskgraph.util.schema import Schema, validate_schema
+
+FROM_DEPS_SCHEMA = Schema(
+    {
+        Required("from-deps"): {
+            Optional(
+                "kinds",
+                description=dedent(
+                    """
+                Limit dependencies to specified kinds (defaults to all kinds in
+                `kind-dependencies`).
+
+                The first kind in the list is the "primary" kind. The
+                dependency of this kind will be used to derive the label
+                and copy attributes (if `copy-attributes` is True).
+                """.lstrip()
+                ),
+            ): list,
+            Optional(
+                "set-name",
+                description=dedent(
+                    """
+                When True, `from_deps` will derive a name for the generated
+                tasks from the name of the primary dependency. Defaults to
+                True.
+                """.lstrip()
+                ),
+            ): bool,
+            Optional(
+                "with-attributes",
+                description=dedent(
+                    """
+                Limit dependencies to tasks whose attributes match
+                using :func:`~taskgraph.util.attributes.attrmatch`.
+                """.lstrip()
+                ),
+            ): {str: Any(list, str)},
+            Optional(
+                "group-by",
+                description=dedent(
+                    """
+                Group cross-kind dependencies using the given group-by
+                function. One task will be created for each group. If not
+                specified, the 'single' function will be used which creates
+                a new task for each individual dependency.
+                """.lstrip()
+                ),
+            ): Any(
+                None,
+                *GROUP_BY_MAP,
+                {Any(*GROUP_BY_MAP): object},
+            ),
+            Optional(
+                "copy-attributes",
+                description=dedent(
+                    """
+                If True, copy attributes from the dependency matching the
+                first kind in the `kinds` list (whether specified explicitly
+                or taken from `kind-dependencies`).
+                """.lstrip()
+                ),
+            ): bool,
+            Optional(
+                "unique-kinds",
+                description=dedent(
+                    """
+                If true (the default), there must be only a single unique task
+                for each kind in a dependency group. Setting this to false
+                disables that requirement.
+                """.lstrip()
+                ),
+            ): bool,
+            Optional(
+                "fetches",
+                description=dedent(
+                    """
+                If present, a `fetches` entry will be added for each task
+                dependency. Attributes of the upstream task may be used as
+                substitution values in the `artifact` or `dest` values of the
+                `fetches` entry.
+                """.lstrip()
+                ),
+            ): {str: [fetches_schema]},
+        },
+        Extra: object,
+    },
+)
+"""Schema for from_deps transforms."""
+
+transforms = TransformSequence()
+transforms.add_validate(FROM_DEPS_SCHEMA)
+
+
+@transforms.add
+def from_deps(config, tasks):
+    for task in tasks:
+        # Setup and error handling.
+        from_deps = task.pop("from-deps")
+        kind_deps = config.config.get("kind-dependencies", [])
+        kinds = from_deps.get("kinds", kind_deps)
+
+        invalid = set(kinds) - set(kind_deps)
+        if invalid:
+            invalid = "\n".join(sorted(invalid))
+            raise Exception(
+                dedent(
+                    f"""
+                    The `from-deps.kinds` key contains the following kinds
+                    that are not defined in `kind-dependencies`:
+                    {invalid}
+                """.lstrip()
+                )
+            )
+
+        if not kinds:
+            raise Exception(
+                dedent(
+                    """
+                The `from_deps` transforms require at least one kind defined
+                in `kind-dependencies`!
+                """.lstrip()
+                )
+            )
+
+        # Resolve desired dependencies.
+        with_attributes = from_deps.get("with-attributes")
+        deps = [
+            task
+            for task in config.kind_dependencies_tasks.values()
+            if task.kind in kinds
+            if not with_attributes or attrmatch(task.attributes, **with_attributes)
+        ]
+
+        # Resolve groups.
+        group_by = from_deps.get("group-by", "single")
+        groups = set()
+
+        if isinstance(group_by, dict):
+            assert len(group_by) == 1
+            group_by, arg = group_by.popitem()
+            func = GROUP_BY_MAP[group_by]
+            if func.schema:
+                validate_schema(
+                    func.schema, arg, f"Invalid group-by {group_by} argument"
+                )
+            groups = func(config, deps, arg)
+        else:
+            func = GROUP_BY_MAP[group_by]
+            groups = func(config, deps)
+
+        # Split the task, one per group.
+        set_name = from_deps.get("set-name", True)
+        copy_attributes = from_deps.get("copy-attributes", False)
+        unique_kinds = from_deps.get("unique-kinds", True)
+        fetches = from_deps.get("fetches", [])
+        for group in groups:
+            # Verify there is only one task per kind in each group.
+            group_kinds = {t.kind for t in group}
+            if unique_kinds and len(group_kinds) < len(group):
+                raise Exception(
+                    "The from_deps transforms only allow a single task per kind in a group!"
+                )
+
+            new_task = deepcopy(task)
+            new_task.setdefault("dependencies", {})
+            new_task["dependencies"].update(
+                {dep.kind if unique_kinds else dep.label: dep.label for dep in group}
+            )
+
+            # Set name and copy attributes from the primary kind.
+            for kind in kinds:
+                if kind in group_kinds:
+                    primary_kind = kind
+                    break
+            else:
+                raise Exception("Could not detect primary kind!")
+
+            new_task.setdefault("attributes", {})[
+                "primary-kind-dependency"
+            ] = primary_kind
+
+            primary_dep = [dep for dep in group if dep.kind == primary_kind][0]
+
+            if set_name:
+                if primary_dep.label.startswith(primary_kind):
+                    new_task["name"] = primary_dep.label[len(primary_kind) + 1 :]
+                else:
+                    new_task["name"] = primary_dep.label
+
+            if copy_attributes:
+                attrs = new_task.setdefault("attributes", {})
+                new_task["attributes"] = primary_dep.attributes.copy()
+                new_task["attributes"].update(attrs)
+
+            if fetches:
+                task_fetches = new_task.setdefault("fetches", {})
+
+                for dep_task in get_dependencies(config, new_task):
+                    # Nothing to do if this kind has no fetches listed
+                    if dep_task.kind not in fetches:
+                        continue
+
+                    fetches_from_dep = []
+                    for kind, kind_fetches in fetches.items():
+                        if kind != dep_task.kind:
+                            continue
+
+                        for fetch in kind_fetches:
+                            entry = fetch.copy()
+                            entry["artifact"] = entry["artifact"].format(
+                                **dep_task.attributes
+                            )
+                            if "dest" in entry:
+                                entry["dest"] = entry["dest"].format(
+                                    **dep_task.attributes
+                                )
+                            fetches_from_dep.append(entry)
+
+                    task_fetches[dep_task.label] = fetches_from_dep
+
+            yield new_task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py
new file mode 100644
index 0000000000..06978ff46d
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py
@@ -0,0 +1,453 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Convert a job description into a task description.
+
+Jobs descriptions are similar to task descriptions, but they specify how to run
+the job at a higher level, using a "run" field that can be interpreted by
+run-using handlers in `taskcluster/taskgraph/transforms/job`.
+"""
+
+
+import copy
+import json
+import logging
+
+from voluptuous import Any, Exclusive, Extra, Optional, Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.transforms.cached_tasks import order_tasks
+from taskgraph.transforms.task import task_description_schema
+from taskgraph.util import path as mozpath
+from taskgraph.util.python_path import import_sibling_modules
+from taskgraph.util.schema import Schema, validate_schema
+from taskgraph.util.taskcluster import get_artifact_prefix
+from taskgraph.util.workertypes import worker_type_implementation
+
+logger = logging.getLogger(__name__)
+
+# Fetches may be accepted in other transforms and eventually passed along
+# to a `job` (eg: from_deps). Defining this here allows them to re-use
+# the schema and avoid duplication.
+fetches_schema = {
+    Required("artifact"): str,
+    Optional("dest"): str,
+    Optional("extract"): bool,
+    Optional("verify-hash"): bool,
+}
+
+# Schema for a build description
+job_description_schema = Schema(
+    {
+        # The name of the job and the job's label.  At least one must be specified,
+        # and the label will be generated from the name if necessary, by prepending
+        # the kind.
+        Optional("name"): str,
+        Optional("label"): str,
+        # the following fields are passed directly through to the task description,
+        # possibly modified by the run implementation.  See
+        # taskcluster/taskgraph/transforms/task.py for the schema details.
+        Required("description"): task_description_schema["description"],
+        Optional("attributes"): task_description_schema["attributes"],
+        Optional("task-from"): task_description_schema["task-from"],
+        Optional("dependencies"): task_description_schema["dependencies"],
+        Optional("soft-dependencies"): task_description_schema["soft-dependencies"],
+        Optional("if-dependencies"): task_description_schema["if-dependencies"],
+        Optional("requires"): task_description_schema["requires"],
+        Optional("expires-after"): task_description_schema["expires-after"],
+        Optional("routes"): task_description_schema["routes"],
+        Optional("scopes"): task_description_schema["scopes"],
+        Optional("tags"): task_description_schema["tags"],
+        Optional("extra"): task_description_schema["extra"],
+        Optional("treeherder"): task_description_schema["treeherder"],
+        Optional("index"): task_description_schema["index"],
+        Optional("run-on-projects"): task_description_schema["run-on-projects"],
+        Optional("run-on-tasks-for"): task_description_schema["run-on-tasks-for"],
+        Optional("run-on-git-branches"): task_description_schema["run-on-git-branches"],
+        Optional("shipping-phase"): task_description_schema["shipping-phase"],
+        Optional("always-target"): task_description_schema["always-target"],
+        Exclusive("optimization", "optimization"): task_description_schema[
+            "optimization"
+        ],
+        Optional("needs-sccache"): task_description_schema["needs-sccache"],
+        # The "when" section contains descriptions of the circumstances under which
+        # this task should be included in the task graph.  This will be converted
+        # into an optimization, so it cannot be specified in a job description that
+        # also gives 'optimization'.
+        Exclusive("when", "optimization"): {
+            # This task only needs to be run if a file matching one of the given
+            # patterns has changed in the push.  The patterns use the mozpack
+            # match function (python/mozbuild/mozpack/path.py).
+            Optional("files-changed"): [str],
+        },
+        # A list of artifacts to install from 'fetch' tasks.
+        Optional("fetches"): {
+            Any("toolchain", "fetch"): [str],
+            str: [
+                str,
+                fetches_schema,
+            ],
+        },
+        # A description of how to run this job.
+        "run": {
+            # The key to a job implementation in a peer module to this one
+            "using": str,
+            # Base work directory used to set up the task.
+            Optional("workdir"): str,
+            # Any remaining content is verified against that job implementation's
+            # own schema.
+            Extra: object,
+        },
+        Required("worker-type"): task_description_schema["worker-type"],
+        # This object will be passed through to the task description, with additions
+        # provided by the job's run-using function
+        Optional("worker"): dict,
+    }
+)
+
+transforms = TransformSequence()
+transforms.add_validate(job_description_schema)
+
+
+@transforms.add
+def rewrite_when_to_optimization(config, jobs):
+    for job in jobs:
+        when = job.pop("when", {})
+        if not when:
+            yield job
+            continue
+
+        files_changed = when.get("files-changed")
+
+        # implicitly add task config directory.
+        files_changed.append(f"{config.path}/**")
+
+        # "only when files changed" implies "skip if files have not changed"
+        job["optimization"] = {"skip-unless-changed": files_changed}
+
+        assert "when" not in job
+        yield job
+
+
+@transforms.add
+def set_implementation(config, jobs):
+    for job in jobs:
+        impl, os = worker_type_implementation(config.graph_config, job["worker-type"])
+        if os:
+            job.setdefault("tags", {})["os"] = os
+        if impl:
+            job.setdefault("tags", {})["worker-implementation"] = impl
+        worker = job.setdefault("worker", {})
+        assert "implementation" not in worker
+        worker["implementation"] = impl
+        if os:
+            worker["os"] = os
+        yield job
+
+
+@transforms.add
+def set_label(config, jobs):
+    for job in jobs:
+        if "label" not in job:
+            if "name" not in job:
+                raise Exception("job has neither a name nor a label")
+            job["label"] = "{}-{}".format(config.kind, job["name"])
+        if job.get("name"):
+            del job["name"]
+        yield job
+
+
+@transforms.add
+def add_resource_monitor(config, jobs):
+    for job in jobs:
+        if job.get("attributes", {}).get("resource-monitor"):
+            worker_implementation, worker_os = worker_type_implementation(
+                config.graph_config, job["worker-type"]
+            )
+            # Normalise worker os so that linux-bitbar and similar use linux tools.
+            worker_os = worker_os.split("-")[0]
+            if "win7" in job["worker-type"]:
+                arch = "32"
+            else:
+                arch = "64"
+            job.setdefault("fetches", {})
+            job["fetches"].setdefault("toolchain", [])
+            job["fetches"]["toolchain"].append(f"{worker_os}{arch}-resource-monitor")
+
+            if worker_implementation == "docker-worker":
+                artifact_source = "/builds/worker/monitoring/resource-monitor.json"
+            else:
+                artifact_source = "monitoring/resource-monitor.json"
+            job["worker"].setdefault("artifacts", [])
+            job["worker"]["artifacts"].append(
+                {
+                    "name": "public/monitoring/resource-monitor.json",
+                    "type": "file",
+                    "path": artifact_source,
+                }
+            )
+            # Set env for output file
+            job["worker"].setdefault("env", {})
+            job["worker"]["env"]["RESOURCE_MONITOR_OUTPUT"] = artifact_source
+
+        yield job
+
+
+def get_attribute(dict, key, attributes, attribute_name):
+    """Get `attribute_name` from the given `attributes` dict, and if there
+    is a corresponding value, set `key` in `dict` to that value."""
+    value = attributes.get(attribute_name)
+    if value:
+        dict[key] = value
+
+
+@transforms.add
+def use_fetches(config, jobs):
+    artifact_names = {}
+    aliases = {}
+    extra_env = {}
+
+    if config.kind in ("toolchain", "fetch"):
+        jobs = list(jobs)
+        for job in jobs:
+            run = job.get("run", {})
+            label = job["label"]
+            get_attribute(artifact_names, label, run, "toolchain-artifact")
+            value = run.get(f"{config.kind}-alias")
+            if value:
+                aliases[f"{config.kind}-{value}"] = label
+
+    for task in config.kind_dependencies_tasks.values():
+        if task.kind in ("fetch", "toolchain"):
+            get_attribute(
+                artifact_names,
+                task.label,
+                task.attributes,
+                f"{task.kind}-artifact",
+            )
+            get_attribute(extra_env, task.label, task.attributes, f"{task.kind}-env")
+            value = task.attributes.get(f"{task.kind}-alias")
+            if value:
+                aliases[f"{task.kind}-{value}"] = task.label
+
+    artifact_prefixes = {}
+    for job in order_tasks(config, jobs):
+        artifact_prefixes[job["label"]] = get_artifact_prefix(job)
+
+        fetches = job.pop("fetches", None)
+        if not fetches:
+            yield job
+            continue
+
+        job_fetches = []
+        name = job.get("name", job.get("label"))
+        dependencies = job.setdefault("dependencies", {})
+        worker = job.setdefault("worker", {})
+        env = worker.setdefault("env", {})
+        prefix = get_artifact_prefix(job)
+        for kind in sorted(fetches):
+            artifacts = fetches[kind]
+            if kind in ("fetch", "toolchain"):
+                for fetch_name in sorted(artifacts):
+                    label = f"{kind}-{fetch_name}"
+                    label = aliases.get(label, label)
+                    if label not in artifact_names:
+                        raise Exception(
+                            "Missing fetch job for {kind}-{name}: {fetch}".format(
+                                kind=config.kind, name=name, fetch=fetch_name
+                            )
+                        )
+                    if label in extra_env:
+                        env.update(extra_env[label])
+
+                    path = artifact_names[label]
+
+                    dependencies[label] = label
+                    job_fetches.append(
+                        {
+                            "artifact": path,
+                            "task": f"<{label}>",
+                            "extract": True,
+                        }
+                    )
+            else:
+                if kind not in dependencies:
+                    raise Exception(
+                        "{name} can't fetch {kind} artifacts because "
+                        "it has no {kind} dependencies!".format(name=name, kind=kind)
+                    )
+                dep_label = dependencies[kind]
+                if dep_label in artifact_prefixes:
+                    prefix = artifact_prefixes[dep_label]
+                else:
+                    dep_tasks = [
+                        task
+                        for label, task in config.kind_dependencies_tasks.items()
+                        if label == dep_label
+                    ]
+                    if len(dep_tasks) != 1:
+                        raise Exception(
+                            "{name} can't fetch {kind} artifacts because "
+                            "there are {tasks} with label {label} in kind dependencies!".format(
+                                name=name,
+                                kind=kind,
+                                label=dependencies[kind],
+                                tasks="no tasks"
+                                if len(dep_tasks) == 0
+                                else "multiple tasks",
+                            )
+                        )
+
+                    prefix = get_artifact_prefix(dep_tasks[0])
+
+                def cmp_artifacts(a):
+                    if isinstance(a, str):
+                        return a
+                    else:
+                        return a["artifact"]
+
+                for artifact in sorted(artifacts, key=cmp_artifacts):
+                    if isinstance(artifact, str):
+                        path = artifact
+                        dest = None
+                        extract = True
+                        verify_hash = False
+                    else:
+                        path = artifact["artifact"]
+                        dest = artifact.get("dest")
+                        extract = artifact.get("extract", True)
+                        verify_hash = artifact.get("verify-hash", False)
+
+                    fetch = {
+                        "artifact": f"{prefix}/{path}",
+                        "task": f"<{kind}>",
+                        "extract": extract,
+                    }
+                    if dest is not None:
+                        fetch["dest"] = dest
+                    if verify_hash:
+                        fetch["verify-hash"] = verify_hash
+                    job_fetches.append(fetch)
+
+        job_artifact_prefixes = {
+            mozpath.dirname(fetch["artifact"])
+            for fetch in job_fetches
+            if not fetch["artifact"].startswith("public/")
+        }
+        if job_artifact_prefixes:
+            # Use taskcluster-proxy and request appropriate scope.  For example, add
+            # 'scopes: [queue:get-artifact:path/to/*]' for 'path/to/artifact.tar.xz'.
+            worker["taskcluster-proxy"] = True
+            for prefix in sorted(job_artifact_prefixes):
+                scope = f"queue:get-artifact:{prefix}/*"
+                if scope not in job.setdefault("scopes", []):
+                    job["scopes"].append(scope)
+
+        env["MOZ_FETCHES"] = {"task-reference": json.dumps(job_fetches, sort_keys=True)}
+
+        env.setdefault("MOZ_FETCHES_DIR", "fetches")
+
+        yield job
+
+
+@transforms.add
+def make_task_description(config, jobs):
+    """Given a build description, create a task description"""
+    # import plugin modules first, before iterating over jobs
+    import_sibling_modules(exceptions=("common.py",))
+
+    for job in jobs:
+        # always-optimized tasks never execute, so have no workdir
+        if job["worker"]["implementation"] in ("docker-worker", "generic-worker"):
+            job["run"].setdefault("workdir", "/builds/worker")
+
+        taskdesc = copy.deepcopy(job)
+
+        # fill in some empty defaults to make run implementations easier
+        taskdesc.setdefault("attributes", {})
+        taskdesc.setdefault("dependencies", {})
+        taskdesc.setdefault("soft-dependencies", [])
+        taskdesc.setdefault("routes", [])
+        taskdesc.setdefault("scopes", [])
+        taskdesc.setdefault("extra", {})
+
+        # give the function for job.run.using on this worker implementation a
+        # chance to set up the task description.
+        configure_taskdesc_for_run(
+            config, job, taskdesc, job["worker"]["implementation"]
+        )
+        del taskdesc["run"]
+
+        # yield only the task description, discarding the job description
+        yield taskdesc
+
+
+# A registry of all functions decorated with run_job_using
+registry = {}
+
+
+def run_job_using(worker_implementation, run_using, schema=None, defaults={}):
+    """Register the decorated function as able to set up a task description for
+    jobs with the given worker implementation and `run.using` property.  If
+    `schema` is given, the job's run field will be verified to match it.
+
+    The decorated function should have the signature `using_foo(config, job, taskdesc)`
+    and should modify the task description in-place.  The skeleton of
+    the task description is already set up, but without a payload."""
+
+    def wrap(func):
+        for_run_using = registry.setdefault(run_using, {})
+        if worker_implementation in for_run_using:
+            raise Exception(
+                "run_job_using({!r}, {!r}) already exists: {!r}".format(
+                    run_using,
+                    worker_implementation,
+                    for_run_using[worker_implementation],
+                )
+            )
+        for_run_using[worker_implementation] = (func, schema, defaults)
+        return func
+
+    return wrap
+
+
+@run_job_using(
+    "always-optimized", "always-optimized", Schema({"using": "always-optimized"})
+)
+def always_optimized(config, job, taskdesc):
+    pass
+
+
+def configure_taskdesc_for_run(config, job, taskdesc, worker_implementation):
+    """
+    Run the appropriate function for this job against the given task
+    description.
+
+    This will raise an appropriate error if no function exists, or if the job's
+    run is not valid according to the schema.
+    """
+    run_using = job["run"]["using"]
+    if run_using not in registry:
+        raise Exception(f"no functions for run.using {run_using!r}")
+
+    if worker_implementation not in registry[run_using]:
+        raise Exception(
+            "no functions for run.using {!r} on {!r}".format(
+                run_using, worker_implementation
+            )
+        )
+
+    func, schema, defaults = registry[run_using][worker_implementation]
+    for k, v in defaults.items():
+        job["run"].setdefault(k, v)
+
+    if schema:
+        validate_schema(
+            schema,
+            job["run"],
+            "In job.run using {!r}/{!r} for job {!r}:".format(
+                job["run"]["using"], worker_implementation, job["label"]
+            ),
+        )
+    func(config, job, taskdesc)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py
new file mode 100644
index 0000000000..04708daf81
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py
@@ -0,0 +1,171 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Common support for various job types.  These functions are all named after the
+worker implementation they operate on, and take the same three parameters, for
+consistency.
+"""
+
+
+import hashlib
+import json
+
+from taskgraph.util.taskcluster import get_artifact_prefix
+
+
+def get_vcsdir_name(os):
+    if os == "windows":
+        return "src"
+    else:
+        return "vcs"
+
+
+def add_cache(job, taskdesc, name, mount_point, skip_untrusted=False):
+    """Adds a cache based on the worker's implementation.
+
+    Args:
+        job (dict): Task's job description.
+        taskdesc (dict): Target task description to modify.
+        name (str): Name of the cache.
+        mount_point (path): Path on the host to mount the cache.
+        skip_untrusted (bool): Whether cache is used in untrusted environments
+            (default: False). Only applies to docker-worker.
+    """
+    if not job["run"].get("use-caches", True):
+        return
+
+    worker = job["worker"]
+
+    if worker["implementation"] == "docker-worker":
+        taskdesc["worker"].setdefault("caches", []).append(
+            {
+                "type": "persistent",
+                "name": name,
+                "mount-point": mount_point,
+                "skip-untrusted": skip_untrusted,
+            }
+        )
+
+    elif worker["implementation"] == "generic-worker":
+        taskdesc["worker"].setdefault("mounts", []).append(
+            {
+                "cache-name": name,
+                "directory": mount_point,
+            }
+        )
+
+    else:
+        # Caches not implemented
+        pass
+
+
+def add_artifacts(config, job, taskdesc, path):
+    taskdesc["worker"].setdefault("artifacts", []).append(
+        {
+            "name": get_artifact_prefix(taskdesc),
+            "path": path,
+            "type": "directory",
+        }
+    )
+
+
+def docker_worker_add_artifacts(config, job, taskdesc):
+    """Adds an artifact directory to the task"""
+    path = "{workdir}/artifacts/".format(**job["run"])
+    taskdesc["worker"]["env"]["UPLOAD_DIR"] = path
+    add_artifacts(config, job, taskdesc, path)
+
+
+def generic_worker_add_artifacts(config, job, taskdesc):
+    """Adds an artifact directory to the task"""
+    # The path is the location on disk; it doesn't necessarily
+    # mean the artifacts will be public or private; that is set via the name
+    # attribute in add_artifacts.
+    add_artifacts(config, job, taskdesc, path=get_artifact_prefix(taskdesc))
+
+
+def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False):
+    """Update a job/task with parameters to enable a VCS checkout.
+
+    This can only be used with ``run-task`` tasks, as the cache name is
+    reserved for ``run-task`` tasks.
+    """
+    worker = job["worker"]
+    is_mac = worker["os"] == "macosx"
+    is_win = worker["os"] == "windows"
+    is_linux = worker["os"] == "linux"
+    is_docker = worker["implementation"] == "docker-worker"
+    assert is_mac or is_win or is_linux
+
+    if is_win:
+        checkoutdir = "./build"
+        hgstore = "y:/hg-shared"
+    elif is_docker:
+        checkoutdir = "{workdir}/checkouts".format(**job["run"])
+        hgstore = f"{checkoutdir}/hg-store"
+    else:
+        checkoutdir = "./checkouts"
+        hgstore = f"{checkoutdir}/hg-shared"
+
+    vcsdir = checkoutdir + "/" + get_vcsdir_name(worker["os"])
+    cache_name = "checkouts"
+
+    # Robust checkout does not clean up subrepositories, so ensure  that tasks
+    # that checkout different sets of paths have separate caches.
+    # See https://bugzilla.mozilla.org/show_bug.cgi?id=1631610
+    if len(repo_configs) > 1:
+        checkout_paths = {
+            "\t".join([repo_config.path, repo_config.prefix])
+            for repo_config in sorted(
+                repo_configs.values(), key=lambda repo_config: repo_config.path
+            )
+        }
+        checkout_paths_str = "\n".join(checkout_paths).encode("utf-8")
+        digest = hashlib.sha256(checkout_paths_str).hexdigest()
+        cache_name += f"-repos-{digest}"
+
+    # Sparse checkouts need their own cache because they can interfere
+    # with clients that aren't sparse aware.
+    if sparse:
+        cache_name += "-sparse"
+
+    # Workers using Mercurial >= 5.8 will enable revlog-compression-zstd, which
+    # workers using older versions can't understand, so they can't share cache.
+    # At the moment, only docker workers use the newer version.
+    if is_docker:
+        cache_name += "-hg58"
+
+    add_cache(job, taskdesc, cache_name, checkoutdir)
+
+    env = taskdesc["worker"].setdefault("env", {})
+    env.update(
+        {
+            "HG_STORE_PATH": hgstore,
+            "REPOSITORIES": json.dumps(
+                {repo.prefix: repo.name for repo in repo_configs.values()}
+            ),
+            "VCS_PATH": vcsdir,
+        }
+    )
+    for repo_config in repo_configs.values():
+        env.update(
+            {
+                f"{repo_config.prefix.upper()}_{key}": value
+                for key, value in {
+                    "BASE_REPOSITORY": repo_config.base_repository,
+                    "HEAD_REPOSITORY": repo_config.head_repository,
+                    "HEAD_REV": repo_config.head_rev,
+                    "HEAD_REF": repo_config.head_ref,
+                    "REPOSITORY_TYPE": repo_config.type,
+                    "SSH_SECRET_NAME": repo_config.ssh_secret_name,
+                }.items()
+                if value is not None
+            }
+        )
+        if repo_config.ssh_secret_name:
+            taskdesc["scopes"].append(f"secrets:get:{repo_config.ssh_secret_name}")
+
+    # only some worker platforms have taskcluster-proxy enabled
+    if job["worker"]["implementation"] in ("docker-worker",):
+        taskdesc["worker"]["taskcluster-proxy"] = True
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py
new file mode 100644
index 0000000000..09b48fe594
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py
@@ -0,0 +1,37 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+This transform allows including indexed tasks from other projects in the
+current taskgraph.  The transform takes a list of indexes, and the optimization
+phase will replace the task with the task from the other graph.
+"""
+
+
+from voluptuous import Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.transforms.job import run_job_using
+from taskgraph.util.schema import Schema
+
+transforms = TransformSequence()
+
+run_task_schema = Schema(
+    {
+        Required("using"): "index-search",
+        Required(
+            "index-search",
+            "A list of indexes in decreasing order of priority at which to lookup for this "
+            "task. This is interpolated with the graph parameters.",
+        ): [str],
+    }
+)
+
+
+@run_job_using("always-optimized", "index-search", schema=run_task_schema)
+def fill_template(config, job, taskdesc):
+    run = job["run"]
+    taskdesc["optimization"] = {
+        "index-search": [index.format(**config.params) for index in run["index-search"]]
+    }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py
new file mode 100644
index 0000000000..6337673611
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py
@@ -0,0 +1,231 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Support for running jobs that are invoked via the `run-task` script.
+"""
+
+import dataclasses
+import os
+
+from voluptuous import Any, Optional, Required
+
+from taskgraph.transforms.job import run_job_using
+from taskgraph.transforms.job.common import support_vcs_checkout
+from taskgraph.transforms.task import taskref_or_string
+from taskgraph.util import path, taskcluster
+from taskgraph.util.schema import Schema
+
+EXEC_COMMANDS = {
+    "bash": ["bash", "-cx"],
+    "powershell": ["powershell.exe", "-ExecutionPolicy", "Bypass"],
+}
+
+run_task_schema = Schema(
+    {
+        Required("using"): "run-task",
+        # if true, add a cache at ~worker/.cache, which is where things like pip
+        # tend to hide their caches.  This cache is never added for level-1 jobs.
+        # TODO Once bug 1526028 is fixed, this and 'use-caches' should be merged.
+        Required("cache-dotcache"): bool,
+        # Whether or not to use caches.
+        Optional("use-caches"): bool,
+        # if true (the default), perform a checkout on the worker
+        Required("checkout"): Any(bool, {str: dict}),
+        Optional(
+            "cwd",
+            description="Path to run command in. If a checkout is present, the path "
+            "to the checkout will be interpolated with the key `checkout`",
+        ): str,
+        # The sparse checkout profile to use. Value is the filename relative to the
+        # directory where sparse profiles are defined (build/sparse-profiles/).
+        Required("sparse-profile"): Any(str, None),
+        # The command arguments to pass to the `run-task` script, after the
+        # checkout arguments.  If a list, it will be passed directly; otherwise
+        # it will be included in a single argument to the command specified by
+        # `exec-with`.
+        Required("command"): Any([taskref_or_string], taskref_or_string),
+        # What to execute the command with in the event command is a string.
+        Optional("exec-with"): Any(*list(EXEC_COMMANDS)),
+        # Command used to invoke the `run-task` script. Can be used if the script
+        # or Python installation is in a non-standard location on the workers.
+        Optional("run-task-command"): list,
+        # Base work directory used to set up the task.
+        Required("workdir"): str,
+        # Whether to run as root. (defaults to False)
+        Optional("run-as-root"): bool,
+    }
+)
+
+
+def common_setup(config, job, taskdesc, command):
+    run = job["run"]
+    if run["checkout"]:
+        repo_configs = config.repo_configs
+        if len(repo_configs) > 1 and run["checkout"] is True:
+            raise Exception("Must explicitly specify checkouts with multiple repos.")
+        elif run["checkout"] is not True:
+            repo_configs = {
+                repo: dataclasses.replace(repo_configs[repo], **config)
+                for (repo, config) in run["checkout"].items()
+            }
+
+        support_vcs_checkout(
+            config,
+            job,
+            taskdesc,
+            repo_configs=repo_configs,
+            sparse=bool(run["sparse-profile"]),
+        )
+
+        vcs_path = taskdesc["worker"]["env"]["VCS_PATH"]
+        for repo_config in repo_configs.values():
+            checkout_path = path.join(vcs_path, repo_config.path)
+            command.append(f"--{repo_config.prefix}-checkout={checkout_path}")
+
+        if run["sparse-profile"]:
+            command.append(
+                "--{}-sparse-profile=build/sparse-profiles/{}".format(
+                    repo_config.prefix,
+                    run["sparse-profile"],
+                )
+            )
+
+        if "cwd" in run:
+            run["cwd"] = path.normpath(run["cwd"].format(checkout=vcs_path))
+    elif "cwd" in run and "{checkout}" in run["cwd"]:
+        raise Exception(
+            "Found `{{checkout}}` interpolation in `cwd` for task {name} "
+            "but the task doesn't have a checkout: {cwd}".format(
+                cwd=run["cwd"], name=job.get("name", job.get("label"))
+            )
+        )
+
+    if "cwd" in run:
+        command.extend(("--task-cwd", run["cwd"]))
+
+    taskdesc["worker"].setdefault("env", {})["MOZ_SCM_LEVEL"] = config.params["level"]
+
+
+worker_defaults = {
+    "cache-dotcache": False,
+    "checkout": True,
+    "sparse-profile": None,
+    "run-as-root": False,
+}
+
+
+def script_url(config, script):
+    if "MOZ_AUTOMATION" in os.environ and "TASK_ID" not in os.environ:
+        raise Exception("TASK_ID must be defined to use run-task on generic-worker")
+    task_id = os.environ.get("TASK_ID", "<TASK_ID>")
+    # use_proxy = False to avoid having all generic-workers turn on proxy
+    # Assumes the cluster allows anonymous downloads of public artifacts
+    tc_url = taskcluster.get_root_url(False)
+    # TODO: Use util/taskcluster.py:get_artifact_url once hack for Bug 1405889 is removed
+    return f"{tc_url}/api/queue/v1/task/{task_id}/artifacts/public/{script}"
+
+
+@run_job_using(
+    "docker-worker", "run-task", schema=run_task_schema, defaults=worker_defaults
+)
+def docker_worker_run_task(config, job, taskdesc):
+    run = job["run"]
+    worker = taskdesc["worker"] = job["worker"]
+    command = run.pop("run-task-command", ["/usr/local/bin/run-task"])
+    common_setup(config, job, taskdesc, command)
+
+    if run.get("cache-dotcache"):
+        worker["caches"].append(
+            {
+                "type": "persistent",
+                "name": "{project}-dotcache".format(**config.params),
+                "mount-point": "{workdir}/.cache".format(**run),
+                "skip-untrusted": True,
+            }
+        )
+
+    run_command = run["command"]
+
+    # dict is for the case of `{'task-reference': str}`.
+    if isinstance(run_command, str) or isinstance(run_command, dict):
+        exec_cmd = EXEC_COMMANDS[run.pop("exec-with", "bash")]
+        run_command = exec_cmd + [run_command]
+    if run["run-as-root"]:
+        command.extend(("--user", "root", "--group", "root"))
+    command.append("--")
+    command.extend(run_command)
+    worker["command"] = command
+
+
+@run_job_using(
+    "generic-worker", "run-task", schema=run_task_schema, defaults=worker_defaults
+)
+def generic_worker_run_task(config, job, taskdesc):
+    run = job["run"]
+    worker = taskdesc["worker"] = job["worker"]
+    is_win = worker["os"] == "windows"
+    is_mac = worker["os"] == "macosx"
+    is_bitbar = worker["os"] == "linux-bitbar"
+
+    command = run.pop("run-task-command", None)
+    if not command:
+        if is_win:
+            command = ["C:/mozilla-build/python3/python3.exe", "run-task"]
+        elif is_mac:
+            command = ["/tools/python36/bin/python3", "run-task"]
+        else:
+            command = ["./run-task"]
+
+    common_setup(config, job, taskdesc, command)
+
+    worker.setdefault("mounts", [])
+    if run.get("cache-dotcache"):
+        worker["mounts"].append(
+            {
+                "cache-name": "{project}-dotcache".format(**config.params),
+                "directory": "{workdir}/.cache".format(**run),
+            }
+        )
+    worker["mounts"].append(
+        {
+            "content": {
+                "url": script_url(config, "run-task"),
+            },
+            "file": "./run-task",
+        }
+    )
+    if worker.get("env", {}).get("MOZ_FETCHES"):
+        worker["mounts"].append(
+            {
+                "content": {
+                    "url": script_url(config, "fetch-content"),
+                },
+                "file": "./fetch-content",
+            }
+        )
+
+    run_command = run["command"]
+
+    if isinstance(run_command, str):
+        if is_win:
+            run_command = f'"{run_command}"'
+        exec_cmd = EXEC_COMMANDS[run.pop("exec-with", "bash")]
+        run_command = exec_cmd + [run_command]
+
+    if run["run-as-root"]:
+        command.extend(("--user", "root", "--group", "root"))
+    command.append("--")
+    if is_bitbar:
+        # Use the bitbar wrapper script which sets up the device and adb
+        # environment variables
+        command.append("/builds/taskcluster/script.py")
+    command.extend(run_command)
+
+    if is_win:
+        worker["command"] = [" ".join(command)]
+    else:
+        worker["command"] = [
+            ["chmod", "+x", "run-task"],
+            command,
+        ]
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py
new file mode 100644
index 0000000000..c9c09542ff
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py
@@ -0,0 +1,175 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Support for running toolchain-building jobs via dedicated scripts
+"""
+
+from voluptuous import ALLOW_EXTRA, Any, Optional, Required
+
+import taskgraph
+from taskgraph.transforms.job import configure_taskdesc_for_run, run_job_using
+from taskgraph.transforms.job.common import (
+    docker_worker_add_artifacts,
+    generic_worker_add_artifacts,
+    get_vcsdir_name,
+)
+from taskgraph.util.hash import hash_paths
+from taskgraph.util.schema import Schema
+from taskgraph.util.shell import quote as shell_quote
+
+CACHE_TYPE = "toolchains.v3"
+
+toolchain_run_schema = Schema(
+    {
+        Required("using"): "toolchain-script",
+        # The script (in taskcluster/scripts/misc) to run.
+        Required("script"): str,
+        # Arguments to pass to the script.
+        Optional("arguments"): [str],
+        # Sparse profile to give to checkout using `run-task`.  If given,
+        # a filename in `build/sparse-profiles`.  Defaults to
+        # "toolchain-build", i.e., to
+        # `build/sparse-profiles/toolchain-build`.  If `None`, instructs
+        # `run-task` to not use a sparse profile at all.
+        Required("sparse-profile"): Any(str, None),
+        # Paths/patterns pointing to files that influence the outcome of a
+        # toolchain build.
+        Optional("resources"): [str],
+        # Path to the artifact produced by the toolchain job
+        Required("toolchain-artifact"): str,
+        Optional(
+            "toolchain-alias",
+            description="An alias that can be used instead of the real toolchain job name in "
+            "fetch stanzas for jobs.",
+        ): Any(str, [str]),
+        Optional(
+            "toolchain-env",
+            description="Additional env variables to add to the worker when using this toolchain",
+        ): {str: object},
+        # Base work directory used to set up the task.
+        Required("workdir"): str,
+    },
+    extra=ALLOW_EXTRA,
+)
+
+
+def get_digest_data(config, run, taskdesc):
+    files = list(run.pop("resources", []))
+    # The script
+    files.append("taskcluster/scripts/toolchain/{}".format(run["script"]))
+
+    # Accumulate dependency hashes for index generation.
+    data = [hash_paths(config.graph_config.vcs_root, files)]
+
+    data.append(taskdesc["attributes"]["toolchain-artifact"])
+
+    # If the task uses an in-tree docker image, we want it to influence
+    # the index path as well. Ideally, the content of the docker image itself
+    # should have an influence, but at the moment, we can't get that
+    # information here. So use the docker image name as a proxy. Not a lot of
+    # changes to docker images actually have an impact on the resulting
+    # toolchain artifact, so we'll just rely on such important changes to be
+    # accompanied with a docker image name change.
+    image = taskdesc["worker"].get("docker-image", {}).get("in-tree")
+    if image:
+        data.append(image)
+
+    # Likewise script arguments should influence the index.
+    args = run.get("arguments")
+    if args:
+        data.extend(args)
+    return data
+
+
+def common_toolchain(config, job, taskdesc, is_docker):
+    run = job["run"]
+
+    worker = taskdesc["worker"] = job["worker"]
+    worker["chain-of-trust"] = True
+
+    srcdir = get_vcsdir_name(worker["os"])
+
+    if is_docker:
+        # If the task doesn't have a docker-image, set a default
+        worker.setdefault("docker-image", {"in-tree": "toolchain-build"})
+
+    # Allow the job to specify where artifacts come from, but add
+    # public/build if it's not there already.
+    artifacts = worker.setdefault("artifacts", [])
+    if not any(artifact.get("name") == "public/build" for artifact in artifacts):
+        if is_docker:
+            docker_worker_add_artifacts(config, job, taskdesc)
+        else:
+            generic_worker_add_artifacts(config, job, taskdesc)
+
+    env = worker["env"]
+    env.update(
+        {
+            "MOZ_BUILD_DATE": config.params["moz_build_date"],
+            "MOZ_SCM_LEVEL": config.params["level"],
+        }
+    )
+
+    attributes = taskdesc.setdefault("attributes", {})
+    attributes["toolchain-artifact"] = run.pop("toolchain-artifact")
+    if "toolchain-alias" in run:
+        attributes["toolchain-alias"] = run.pop("toolchain-alias")
+    if "toolchain-env" in run:
+        attributes["toolchain-env"] = run.pop("toolchain-env")
+
+    if not taskgraph.fast:
+        name = taskdesc["label"].replace(f"{config.kind}-", "", 1)
+        taskdesc["cache"] = {
+            "type": CACHE_TYPE,
+            "name": name,
+            "digest-data": get_digest_data(config, run, taskdesc),
+        }
+
+    script = run.pop("script")
+    run["using"] = "run-task"
+    run["cwd"] = "{checkout}/.."
+
+    if script.endswith(".ps1"):
+        run["exec-with"] = "powershell"
+
+    command = [f"{srcdir}/taskcluster/scripts/toolchain/{script}"] + run.pop(
+        "arguments", []
+    )
+
+    if not is_docker:
+        # Don't quote the first item in the command because it purposely contains
+        # an environment variable that is not meant to be quoted.
+        if len(command) > 1:
+            command = command[0] + " " + shell_quote(*command[1:])
+        else:
+            command = command[0]
+
+    run["command"] = command
+
+    configure_taskdesc_for_run(config, job, taskdesc, worker["implementation"])
+
+
+toolchain_defaults = {
+    "sparse-profile": "toolchain-build",
+}
+
+
+@run_job_using(
+    "docker-worker",
+    "toolchain-script",
+    schema=toolchain_run_schema,
+    defaults=toolchain_defaults,
+)
+def docker_worker_toolchain(config, job, taskdesc):
+    common_toolchain(config, job, taskdesc, is_docker=True)
+
+
+@run_job_using(
+    "generic-worker",
+    "toolchain-script",
+    schema=toolchain_run_schema,
+    defaults=toolchain_defaults,
+)
+def generic_worker_toolchain(config, job, taskdesc):
+    common_toolchain(config, job, taskdesc, is_docker=False)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/notify.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/notify.py
new file mode 100644
index 0000000000..a61e7999c1
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/notify.py
@@ -0,0 +1,195 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Add notifications to tasks via Taskcluster's notify service.
+
+See https://docs.taskcluster.net/docs/reference/core/notify/usage for
+more information.
+"""
+from voluptuous import ALLOW_EXTRA, Any, Exclusive, Optional, Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.schema import Schema, optionally_keyed_by, resolve_keyed_by
+
+_status_type = Any(
+    "on-completed",
+    "on-defined",
+    "on-exception",
+    "on-failed",
+    "on-pending",
+    "on-resolved",
+    "on-running",
+)
+
+_recipients = [
+    {
+        Required("type"): "email",
+        Required("address"): optionally_keyed_by("project", "level", str),
+        Optional("status-type"): _status_type,
+    },
+    {
+        Required("type"): "matrix-room",
+        Required("room-id"): str,
+        Optional("status-type"): _status_type,
+    },
+    {
+        Required("type"): "pulse",
+        Required("routing-key"): str,
+        Optional("status-type"): _status_type,
+    },
+    {
+        Required("type"): "slack-channel",
+        Required("channel-id"): str,
+        Optional("status-type"): _status_type,
+    },
+]
+
+_route_keys = {
+    "email": "address",
+    "matrix-room": "room-id",
+    "pulse": "routing-key",
+    "slack-channel": "channel-id",
+}
+"""Map each type to its primary key that will be used in the route."""
+
+NOTIFY_SCHEMA = Schema(
+    {
+        Exclusive("notify", "config"): {
+            Required("recipients"): [Any(*_recipients)],
+            Optional("content"): {
+                Optional("email"): {
+                    Optional("subject"): str,
+                    Optional("content"): str,
+                    Optional("link"): {
+                        Required("text"): str,
+                        Required("href"): str,
+                    },
+                },
+                Optional("matrix"): {
+                    Optional("body"): str,
+                    Optional("formatted-body"): str,
+                    Optional("format"): str,
+                    Optional("msg-type"): str,
+                },
+                Optional("slack"): {
+                    Optional("text"): str,
+                    Optional("blocks"): list,
+                    Optional("attachments"): list,
+                },
+            },
+        },
+        # Continue supporting the legacy schema for backwards compat.
+        Exclusive("notifications", "config"): {
+            Required("emails"): optionally_keyed_by("project", "level", [str]),
+            Required("subject"): str,
+            Optional("message"): str,
+            Optional("status-types"): [_status_type],
+        },
+    },
+    extra=ALLOW_EXTRA,
+)
+"""Notify schema."""
+
+transforms = TransformSequence()
+transforms.add_validate(NOTIFY_SCHEMA)
+
+
+def _convert_legacy(config, legacy, label):
+    """Convert the legacy format to the new one."""
+    notify = {
+        "recipients": [],
+        "content": {"email": {"subject": legacy["subject"]}},
+    }
+    resolve_keyed_by(
+        legacy,
+        "emails",
+        label,
+        **{
+            "level": config.params["level"],
+            "project": config.params["project"],
+        },
+    )
+
+    status_types = legacy.get("status-types", ["on-completed"])
+    for email in legacy["emails"]:
+        for status_type in status_types:
+            notify["recipients"].append(
+                {"type": "email", "address": email, "status-type": status_type}
+            )
+
+    notify["content"]["email"]["content"] = legacy.get("message", legacy["subject"])
+    return notify
+
+
+def _convert_content(content):
+    """Convert the notify content to Taskcluster's format.
+
+    The Taskcluster notification format is described here:
+    https://docs.taskcluster.net/docs/reference/core/notify/usage
+    """
+    tc = {}
+    if "email" in content:
+        tc["email"] = content.pop("email")
+
+    for key, obj in content.items():
+        for name in obj.keys():
+            tc_name = "".join(part.capitalize() for part in name.split("-"))
+            tc[f"{key}{tc_name}"] = obj[name]
+    return tc
+
+
+@transforms.add
+def add_notifications(config, tasks):
+    for task in tasks:
+        label = "{}-{}".format(config.kind, task["name"])
+        if "notifications" in task:
+            notify = _convert_legacy(config, task.pop("notifications"), label)
+        else:
+            notify = task.pop("notify", None)
+
+        if not notify:
+            yield task
+            continue
+
+        format_kwargs = dict(
+            task=task,
+            config=config.__dict__,
+        )
+
+        def substitute(ctx):
+            """Recursively find all strings in a simple nested dict (no lists),
+            and format them in-place using `format_kwargs`."""
+            for key, val in ctx.items():
+                if isinstance(val, str):
+                    ctx[key] = val.format(**format_kwargs)
+                elif isinstance(val, dict):
+                    ctx[key] = substitute(val)
+            return ctx
+
+        task.setdefault("routes", [])
+        for recipient in notify["recipients"]:
+            type = recipient["type"]
+            recipient.setdefault("status-type", "on-completed")
+            substitute(recipient)
+
+            if type == "email":
+                resolve_keyed_by(
+                    recipient,
+                    "address",
+                    label,
+                    **{
+                        "level": config.params["level"],
+                        "project": config.params["project"],
+                    },
+                )
+
+            task["routes"].append(
+                f"notify.{type}.{recipient[_route_keys[type]]}.{recipient['status-type']}"
+            )
+
+        if "content" in notify:
+            task.setdefault("extra", {}).update(
+                {"notify": _convert_content(substitute(notify["content"]))}
+            )
+        yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py
new file mode 100644
index 0000000000..c55de78513
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py
@@ -0,0 +1,1375 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+These transformations take a task description and turn it into a TaskCluster
+task definition (along with attributes, label, etc.).  The input to these
+transformations is generic to any kind of task, but abstracts away some of the
+complexities of worker implementations, scopes, and treeherder annotations.
+"""
+
+
+import hashlib
+import os
+import re
+import time
+from copy import deepcopy
+from dataclasses import dataclass
+from typing import Callable
+
+from voluptuous import All, Any, Extra, NotIn, Optional, Required
+
+from taskgraph import MAX_DEPENDENCIES
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.hash import hash_path
+from taskgraph.util.keyed_by import evaluate_keyed_by
+from taskgraph.util.memoize import memoize
+from taskgraph.util.schema import (
+    OptimizationSchema,
+    Schema,
+    optionally_keyed_by,
+    resolve_keyed_by,
+    taskref_or_string,
+    validate_schema,
+)
+from taskgraph.util.treeherder import split_symbol, treeherder_defaults
+from taskgraph.util.workertypes import worker_type_implementation
+
+from ..util import docker as dockerutil
+from ..util.workertypes import get_worker_type
+
+RUN_TASK = os.path.join(
+    os.path.dirname(os.path.dirname(__file__)), "run-task", "run-task"
+)
+
+
+@memoize
+def _run_task_suffix():
+    """String to append to cache names under control of run-task."""
+    return hash_path(RUN_TASK)[0:20]
+
+
+# A task description is a general description of a TaskCluster task
+task_description_schema = Schema(
+    {
+        # the label for this task
+        Required("label"): str,
+        # description of the task (for metadata)
+        Required("description"): str,
+        # attributes for this task
+        Optional("attributes"): {str: object},
+        # relative path (from config.path) to the file task was defined in
+        Optional("task-from"): str,
+        # dependencies of this task, keyed by name; these are passed through
+        # verbatim and subject to the interpretation of the Task's get_dependencies
+        # method.
+        Optional("dependencies"): {
+            All(
+                str,
+                NotIn(
+                    ["self", "decision"],
+                    "Can't use 'self` or 'decision' as dependency names.",
+                ),
+            ): object,
+        },
+        # Soft dependencies of this task, as a list of tasks labels
+        Optional("soft-dependencies"): [str],
+        # Dependencies that must be scheduled in order for this task to run.
+        Optional("if-dependencies"): [str],
+        Optional("requires"): Any("all-completed", "all-resolved"),
+        # expiration and deadline times, relative to task creation, with units
+        # (e.g., "14 days").  Defaults are set based on the project.
+        Optional("expires-after"): str,
+        Optional("deadline-after"): str,
+        # custom routes for this task; the default treeherder routes will be added
+        # automatically
+        Optional("routes"): [str],
+        # custom scopes for this task; any scopes required for the worker will be
+        # added automatically. The following parameters will be substituted in each
+        # scope:
+        #  {level} -- the scm level of this push
+        #  {project} -- the project of this push
+        Optional("scopes"): [str],
+        # Tags
+        Optional("tags"): {str: str},
+        # custom "task.extra" content
+        Optional("extra"): {str: object},
+        # treeherder-related information; see
+        # https://schemas.taskcluster.net/taskcluster-treeherder/v1/task-treeherder-config.json
+        # This may be provided in one of two ways:
+        # 1) A simple `true` will cause taskgraph to generate the required information
+        # 2) A dictionary with one or more of the required keys. Any key not present
+        #    will use a default as described below.
+        # If not specified, no treeherder extra information or routes will be
+        # added to the task
+        Optional("treeherder"): Any(
+            True,
+            {
+                # either a bare symbol, or "grp(sym)".
+                # The default symbol is the uppercased first letter of each
+                # section of the kind (delimited by "-") all smooshed together.
+                # Eg: "test" becomes "T", "docker-image" becomes "DI", etc.
+                "symbol": Optional(str),
+                # the job kind
+                # If "build" or "test" is found in the kind name, this defaults
+                # to the appropriate value. Otherwise, defaults to "other"
+                "kind": Optional(Any("build", "test", "other")),
+                # tier for this task
+                # Defaults to 1
+                "tier": Optional(int),
+                # task platform, in the form platform/collection, used to set
+                # treeherder.machine.platform and treeherder.collection or
+                # treeherder.labels
+                # Defaults to "default/opt"
+                "platform": Optional(str),
+            },
+        ),
+        # information for indexing this build so its artifacts can be discovered;
+        # if omitted, the build will not be indexed.
+        Optional("index"): {
+            # the name of the product this build produces
+            "product": str,
+            # the names to use for this job in the TaskCluster index
+            "job-name": str,
+            # Type of gecko v2 index to use
+            "type": str,
+            # The rank that the task will receive in the TaskCluster
+            # index.  A newly completed task supersedes the currently
+            # indexed task iff it has a higher rank.  If unspecified,
+            # 'by-tier' behavior will be used.
+            "rank": Any(
+                # Rank is equal the timestamp of the build_date for tier-1
+                # tasks, and zero for non-tier-1.  This sorts tier-{2,3}
+                # builds below tier-1 in the index.
+                "by-tier",
+                # Rank is given as an integer constant (e.g. zero to make
+                # sure a task is last in the index).
+                int,
+                # Rank is equal to the timestamp of the build_date.  This
+                # option can be used to override the 'by-tier' behavior
+                # for non-tier-1 tasks.
+                "build_date",
+            ),
+        },
+        # The `run_on_projects` attribute, defaulting to "all".  This dictates the
+        # projects on which this task should be included in the target task set.
+        # See the attributes documentation for details.
+        Optional("run-on-projects"): optionally_keyed_by("build-platform", [str]),
+        Optional("run-on-tasks-for"): [str],
+        Optional("run-on-git-branches"): [str],
+        # The `shipping_phase` attribute, defaulting to None. This specifies the
+        # release promotion phase that this task belongs to.
+        Optional("shipping-phase"): Any(
+            None,
+            "build",
+            "promote",
+            "push",
+            "ship",
+        ),
+        # The `always-target` attribute will cause the task to be included in the
+        # target_task_graph regardless of filtering. Tasks included in this manner
+        # will be candidates for optimization even when `optimize_target_tasks` is
+        # False, unless the task was also explicitly chosen by the target_tasks
+        # method.
+        Required("always-target"): bool,
+        # Optimization to perform on this task during the optimization phase.
+        # Optimizations are defined in taskcluster/taskgraph/optimize.py.
+        Required("optimization"): OptimizationSchema,
+        # the provisioner-id/worker-type for the task.  The following parameters will
+        # be substituted in this string:
+        #  {level} -- the scm level of this push
+        "worker-type": str,
+        # Whether the job should use sccache compiler caching.
+        Required("needs-sccache"): bool,
+        # information specific to the worker implementation that will run this task
+        Optional("worker"): {
+            Required("implementation"): str,
+            Extra: object,
+        },
+    }
+)
+
+TC_TREEHERDER_SCHEMA_URL = (
+    "https://github.com/taskcluster/taskcluster-treeherder/"
+    "blob/master/schemas/task-treeherder-config.yml"
+)
+
+
+UNKNOWN_GROUP_NAME = (
+    "Treeherder group {} (from {}) has no name; " "add it to taskcluster/ci/config.yml"
+)
+
+V2_ROUTE_TEMPLATES = [
+    "index.{trust-domain}.v2.{project}.latest.{product}.{job-name}",
+    "index.{trust-domain}.v2.{project}.pushdate.{build_date_long}.{product}.{job-name}",
+    "index.{trust-domain}.v2.{project}.pushlog-id.{pushlog_id}.{product}.{job-name}",
+    "index.{trust-domain}.v2.{project}.revision.{branch_rev}.{product}.{job-name}",
+]
+
+# the roots of the treeherder routes
+TREEHERDER_ROUTE_ROOT = "tc-treeherder"
+
+
+def get_branch_rev(config):
+    return config.params["head_rev"]
+
+
+@memoize
+def get_default_priority(graph_config, project):
+    return evaluate_keyed_by(
+        graph_config["task-priority"], "Graph Config", {"project": project}
+    )
+
+
+@memoize
+def get_default_deadline(graph_config, project):
+    return evaluate_keyed_by(
+        graph_config["task-deadline-after"], "Graph Config", {"project": project}
+    )
+
+
+# define a collection of payload builders, depending on the worker implementation
+payload_builders = {}
+
+
+@dataclass(frozen=True)
+class PayloadBuilder:
+    schema: Schema
+    builder: Callable
+
+
+def payload_builder(name, schema):
+    schema = Schema({Required("implementation"): name, Optional("os"): str}).extend(
+        schema
+    )
+
+    def wrap(func):
+        assert name not in payload_builders, f"duplicate payload builder name {name}"
+        payload_builders[name] = PayloadBuilder(schema, func)
+        return func
+
+    return wrap
+
+
+# define a collection of index builders, depending on the type implementation
+index_builders = {}
+
+
+def index_builder(name):
+    def wrap(func):
+        assert name not in index_builders, f"duplicate index builder name {name}"
+        index_builders[name] = func
+        return func
+
+    return wrap
+
+
+UNSUPPORTED_INDEX_PRODUCT_ERROR = """\
+The index product {product} is not in the list of configured products in
+`taskcluster/ci/config.yml'.
+"""
+
+
+def verify_index(config, index):
+    product = index["product"]
+    if product not in config.graph_config["index"]["products"]:
+        raise Exception(UNSUPPORTED_INDEX_PRODUCT_ERROR.format(product=product))
+
+
+@payload_builder(
+    "docker-worker",
+    schema={
+        Required("os"): "linux",
+        # For tasks that will run in docker-worker, this is the name of the docker
+        # image or in-tree docker image to run the task in.  If in-tree, then a
+        # dependency will be created automatically.  This is generally
+        # `desktop-test`, or an image that acts an awful lot like it.
+        Required("docker-image"): Any(
+            # a raw Docker image path (repo/image:tag)
+            str,
+            # an in-tree generated docker image (from `taskcluster/docker/<name>`)
+            {"in-tree": str},
+            # an indexed docker image
+            {"indexed": str},
+        ),
+        # worker features that should be enabled
+        Required("relengapi-proxy"): bool,
+        Required("chain-of-trust"): bool,
+        Required("taskcluster-proxy"): bool,
+        Required("allow-ptrace"): bool,
+        Required("loopback-video"): bool,
+        Required("loopback-audio"): bool,
+        Required("docker-in-docker"): bool,  # (aka 'dind')
+        Required("privileged"): bool,
+        # Paths to Docker volumes.
+        #
+        # For in-tree Docker images, volumes can be parsed from Dockerfile.
+        # This only works for the Dockerfile itself: if a volume is defined in
+        # a base image, it will need to be declared here. Out-of-tree Docker
+        # images will also require explicit volume annotation.
+        #
+        # Caches are often mounted to the same path as Docker volumes. In this
+        # case, they take precedence over a Docker volume. But a volume still
+        # needs to be declared for the path.
+        Optional("volumes"): [str],
+        # caches to set up for the task
+        Optional("caches"): [
+            {
+                # only one type is supported by any of the workers right now
+                "type": "persistent",
+                # name of the cache, allowing re-use by subsequent tasks naming the
+                # same cache
+                "name": str,
+                # location in the task image where the cache will be mounted
+                "mount-point": str,
+                # Whether the cache is not used in untrusted environments
+                # (like the Try repo).
+                Optional("skip-untrusted"): bool,
+            }
+        ],
+        # artifacts to extract from the task image after completion
+        Optional("artifacts"): [
+            {
+                # type of artifact -- simple file, or recursive directory
+                "type": Any("file", "directory"),
+                # task image path from which to read artifact
+                "path": str,
+                # name of the produced artifact (root of the names for
+                # type=directory)
+                "name": str,
+            }
+        ],
+        # environment variables
+        Required("env"): {str: taskref_or_string},
+        # the command to run; if not given, docker-worker will default to the
+        # command in the docker image
+        Optional("command"): [taskref_or_string],
+        # the maximum time to run, in seconds
+        Required("max-run-time"): int,
+        # the exit status code(s) that indicates the task should be retried
+        Optional("retry-exit-status"): [int],
+        # the exit status code(s) that indicates the caches used by the task
+        # should be purged
+        Optional("purge-caches-exit-status"): [int],
+        # Whether any artifacts are assigned to this worker
+        Optional("skip-artifacts"): bool,
+    },
+)
+def build_docker_worker_payload(config, task, task_def):
+    worker = task["worker"]
+    level = int(config.params["level"])
+
+    image = worker["docker-image"]
+    if isinstance(image, dict):
+        if "in-tree" in image:
+            name = image["in-tree"]
+            docker_image_task = "build-docker-image-" + image["in-tree"]
+            task.setdefault("dependencies", {})["docker-image"] = docker_image_task
+
+            image = {
+                "path": "public/image.tar.zst",
+                "taskId": {"task-reference": "<docker-image>"},
+                "type": "task-image",
+            }
+
+            # Find VOLUME in Dockerfile.
+            volumes = dockerutil.parse_volumes(name)
+            for v in sorted(volumes):
+                if v in worker["volumes"]:
+                    raise Exception(
+                        "volume %s already defined; "
+                        "if it is defined in a Dockerfile, "
+                        "it does not need to be specified in the "
+                        "worker definition" % v
+                    )
+
+                worker["volumes"].append(v)
+
+        elif "indexed" in image:
+            image = {
+                "path": "public/image.tar.zst",
+                "namespace": image["indexed"],
+                "type": "indexed-image",
+            }
+        else:
+            raise Exception("unknown docker image type")
+
+    features = {}
+
+    if worker.get("relengapi-proxy"):
+        features["relengAPIProxy"] = True
+
+    if worker.get("taskcluster-proxy"):
+        features["taskclusterProxy"] = True
+
+    if worker.get("allow-ptrace"):
+        features["allowPtrace"] = True
+        task_def["scopes"].append("docker-worker:feature:allowPtrace")
+
+    if worker.get("chain-of-trust"):
+        features["chainOfTrust"] = True
+
+    if worker.get("docker-in-docker"):
+        features["dind"] = True
+
+    if task.get("needs-sccache"):
+        features["taskclusterProxy"] = True
+        task_def["scopes"].append(
+            "assume:project:taskcluster:{trust_domain}:level-{level}-sccache-buckets".format(
+                trust_domain=config.graph_config["trust-domain"],
+                level=config.params["level"],
+            )
+        )
+        worker["env"]["USE_SCCACHE"] = "1"
+        # Disable sccache idle shutdown.
+        worker["env"]["SCCACHE_IDLE_TIMEOUT"] = "0"
+    else:
+        worker["env"]["SCCACHE_DISABLE"] = "1"
+
+    capabilities = {}
+
+    for lo in "audio", "video":
+        if worker.get("loopback-" + lo):
+            capitalized = "loopback" + lo.capitalize()
+            devices = capabilities.setdefault("devices", {})
+            devices[capitalized] = True
+            task_def["scopes"].append("docker-worker:capability:device:" + capitalized)
+
+    if worker.get("privileged"):
+        capabilities["privileged"] = True
+        task_def["scopes"].append("docker-worker:capability:privileged")
+
+    task_def["payload"] = payload = {
+        "image": image,
+        "env": worker["env"],
+    }
+    if "command" in worker:
+        payload["command"] = worker["command"]
+
+    if "max-run-time" in worker:
+        payload["maxRunTime"] = worker["max-run-time"]
+
+    run_task = payload.get("command", [""])[0].endswith("run-task")
+
+    # run-task exits EXIT_PURGE_CACHES if there is a problem with caches.
+    # Automatically retry the tasks and purge caches if we see this exit
+    # code.
+    # TODO move this closer to code adding run-task once bug 1469697 is
+    # addressed.
+    if run_task:
+        worker.setdefault("retry-exit-status", []).append(72)
+        worker.setdefault("purge-caches-exit-status", []).append(72)
+
+    payload["onExitStatus"] = {}
+    if "retry-exit-status" in worker:
+        payload["onExitStatus"]["retry"] = worker["retry-exit-status"]
+    if "purge-caches-exit-status" in worker:
+        payload["onExitStatus"]["purgeCaches"] = worker["purge-caches-exit-status"]
+
+    if "artifacts" in worker:
+        artifacts = {}
+        for artifact in worker["artifacts"]:
+            artifacts[artifact["name"]] = {
+                "path": artifact["path"],
+                "type": artifact["type"],
+                "expires": task_def["expires"],  # always expire with the task
+            }
+        payload["artifacts"] = artifacts
+
+    if isinstance(worker.get("docker-image"), str):
+        out_of_tree_image = worker["docker-image"]
+    else:
+        out_of_tree_image = None
+        image = worker.get("docker-image", {}).get("in-tree")
+
+    if "caches" in worker:
+        caches = {}
+
+        # run-task knows how to validate caches.
+        #
+        # To help ensure new run-task features and bug fixes don't interfere
+        # with existing caches, we seed the hash of run-task into cache names.
+        # So, any time run-task changes, we should get a fresh set of caches.
+        # This means run-task can make changes to cache interaction at any time
+        # without regards for backwards or future compatibility.
+        #
+        # But this mechanism only works for in-tree Docker images that are built
+        # with the current run-task! For out-of-tree Docker images, we have no
+        # way of knowing their content of run-task. So, in addition to varying
+        # cache names by the contents of run-task, we also take the Docker image
+        # name into consideration. This means that different Docker images will
+        # never share the same cache. This is a bit unfortunate. But it is the
+        # safest thing to do. Fortunately, most images are defined in-tree.
+        #
+        # For out-of-tree Docker images, we don't strictly need to incorporate
+        # the run-task content into the cache name. However, doing so preserves
+        # the mechanism whereby changing run-task results in new caches
+        # everywhere.
+
+        # As an additional mechanism to force the use of different caches, the
+        # string literal in the variable below can be changed. This is
+        # preferred to changing run-task because it doesn't require images
+        # to be rebuilt.
+        cache_version = "v3"
+
+        if run_task:
+            suffix = f"{cache_version}-{_run_task_suffix()}"
+
+            if out_of_tree_image:
+                name_hash = hashlib.sha256(
+                    out_of_tree_image.encode("utf-8")
+                ).hexdigest()
+                suffix += name_hash[0:12]
+
+        else:
+            suffix = cache_version
+
+        skip_untrusted = config.params.is_try() or level == 1
+
+        for cache in worker["caches"]:
+            # Some caches aren't enabled in environments where we can't
+            # guarantee certain behavior. Filter those out.
+            if cache.get("skip-untrusted") and skip_untrusted:
+                continue
+
+            name = "{trust_domain}-level-{level}-{name}-{suffix}".format(
+                trust_domain=config.graph_config["trust-domain"],
+                level=config.params["level"],
+                name=cache["name"],
+                suffix=suffix,
+            )
+            caches[name] = cache["mount-point"]
+            task_def["scopes"].append("docker-worker:cache:%s" % name)
+
+        # Assertion: only run-task is interested in this.
+        if run_task:
+            payload["env"]["TASKCLUSTER_CACHES"] = ";".join(sorted(caches.values()))
+
+        payload["cache"] = caches
+
+    # And send down volumes information to run-task as well.
+    if run_task and worker.get("volumes"):
+        payload["env"]["TASKCLUSTER_VOLUMES"] = ";".join(sorted(worker["volumes"]))
+
+    if payload.get("cache") and skip_untrusted:
+        payload["env"]["TASKCLUSTER_UNTRUSTED_CACHES"] = "1"
+
+    if features:
+        payload["features"] = features
+    if capabilities:
+        payload["capabilities"] = capabilities
+
+    check_caches_are_volumes(task)
+
+
+@payload_builder(
+    "generic-worker",
+    schema={
+        Required("os"): Any("windows", "macosx", "linux", "linux-bitbar"),
+        # see http://schemas.taskcluster.net/generic-worker/v1/payload.json
+        # and https://docs.taskcluster.net/reference/workers/generic-worker/payload
+        # command is a list of commands to run, sequentially
+        # on Windows, each command is a string, on OS X and Linux, each command is
+        # a string array
+        Required("command"): Any(
+            [taskref_or_string], [[taskref_or_string]]  # Windows  # Linux / OS X
+        ),
+        # artifacts to extract from the task image after completion; note that artifacts
+        # for the generic worker cannot have names
+        Optional("artifacts"): [
+            {
+                # type of artifact -- simple file, or recursive directory
+                "type": Any("file", "directory"),
+                # filesystem path from which to read artifact
+                "path": str,
+                # if not specified, path is used for artifact name
+                Optional("name"): str,
+            }
+        ],
+        # Directories and/or files to be mounted.
+        # The actual allowed combinations are stricter than the model below,
+        # but this provides a simple starting point.
+        # See https://docs.taskcluster.net/reference/workers/generic-worker/payload
+        Optional("mounts"): [
+            {
+                # A unique name for the cache volume, implies writable cache directory
+                # (otherwise mount is a read-only file or directory).
+                Optional("cache-name"): str,
+                # Optional content for pre-loading cache, or mandatory content for
+                # read-only file or directory. Pre-loaded content can come from either
+                # a task artifact or from a URL.
+                Optional("content"): {
+                    # *** Either (artifact and task-id) or url must be specified. ***
+                    # Artifact name that contains the content.
+                    Optional("artifact"): str,
+                    # Task ID that has the artifact that contains the content.
+                    Optional("task-id"): taskref_or_string,
+                    # URL that supplies the content in response to an unauthenticated
+                    # GET request.
+                    Optional("url"): str,
+                },
+                # *** Either file or directory must be specified. ***
+                # If mounting a cache or read-only directory, the filesystem location of
+                # the directory should be specified as a relative path to the task
+                # directory here.
+                Optional("directory"): str,
+                # If mounting a file, specify the relative path within the task
+                # directory to mount the file (the file will be read only).
+                Optional("file"): str,
+                # Required if and only if `content` is specified and mounting a
+                # directory (not a file). This should be the archive format of the
+                # content (either pre-loaded cache or read-only directory).
+                Optional("format"): Any("rar", "tar.bz2", "tar.gz", "zip"),
+            }
+        ],
+        # environment variables
+        Required("env"): {str: taskref_or_string},
+        # the maximum time to run, in seconds
+        Required("max-run-time"): int,
+        # the exit status code(s) that indicates the task should be retried
+        Optional("retry-exit-status"): [int],
+        # the exit status code(s) that indicates the caches used by the task
+        # should be purged
+        Optional("purge-caches-exit-status"): [int],
+        # os user groups for test task workers
+        Optional("os-groups"): [str],
+        # feature for test task to run as administarotr
+        Optional("run-as-administrator"): bool,
+        # optional features
+        Required("chain-of-trust"): bool,
+        Optional("taskcluster-proxy"): bool,
+        # Whether any artifacts are assigned to this worker
+        Optional("skip-artifacts"): bool,
+    },
+)
+def build_generic_worker_payload(config, task, task_def):
+    worker = task["worker"]
+
+    task_def["payload"] = {
+        "command": worker["command"],
+        "maxRunTime": worker["max-run-time"],
+    }
+
+    on_exit_status = {}
+    if "retry-exit-status" in worker:
+        on_exit_status["retry"] = worker["retry-exit-status"]
+    if "purge-caches-exit-status" in worker:
+        on_exit_status["purgeCaches"] = worker["purge-caches-exit-status"]
+    if worker["os"] == "windows":
+        on_exit_status.setdefault("retry", []).extend(
+            [
+                # These codes (on windows) indicate a process interruption,
+                # rather than a task run failure. See bug 1544403.
+                1073807364,  # process force-killed due to system shutdown
+                3221225786,  # sigint (any interrupt)
+            ]
+        )
+    if on_exit_status:
+        task_def["payload"]["onExitStatus"] = on_exit_status
+
+    env = worker.get("env", {})
+
+    if task.get("needs-sccache"):
+        env["USE_SCCACHE"] = "1"
+        # Disable sccache idle shutdown.
+        env["SCCACHE_IDLE_TIMEOUT"] = "0"
+    else:
+        env["SCCACHE_DISABLE"] = "1"
+
+    if env:
+        task_def["payload"]["env"] = env
+
+    artifacts = []
+
+    for artifact in worker.get("artifacts", []):
+        a = {
+            "path": artifact["path"],
+            "type": artifact["type"],
+        }
+        if "name" in artifact:
+            a["name"] = artifact["name"]
+        artifacts.append(a)
+
+    if artifacts:
+        task_def["payload"]["artifacts"] = artifacts
+
+    # Need to copy over mounts, but rename keys to respect naming convention
+    #   * 'cache-name' -> 'cacheName'
+    #   * 'task-id'    -> 'taskId'
+    # All other key names are already suitable, and don't need renaming.
+    mounts = deepcopy(worker.get("mounts", []))
+    for mount in mounts:
+        if "cache-name" in mount:
+            mount["cacheName"] = "{trust_domain}-level-{level}-{name}".format(
+                trust_domain=config.graph_config["trust-domain"],
+                level=config.params["level"],
+                name=mount.pop("cache-name"),
+            )
+            task_def["scopes"].append(
+                "generic-worker:cache:{}".format(mount["cacheName"])
+            )
+        if "content" in mount:
+            if "task-id" in mount["content"]:
+                mount["content"]["taskId"] = mount["content"].pop("task-id")
+            if "artifact" in mount["content"]:
+                if not mount["content"]["artifact"].startswith("public/"):
+                    task_def["scopes"].append(
+                        "queue:get-artifact:{}".format(mount["content"]["artifact"])
+                    )
+
+    if mounts:
+        task_def["payload"]["mounts"] = mounts
+
+    if worker.get("os-groups"):
+        task_def["payload"]["osGroups"] = worker["os-groups"]
+        task_def["scopes"].extend(
+            [
+                "generic-worker:os-group:{}/{}".format(task["worker-type"], group)
+                for group in worker["os-groups"]
+            ]
+        )
+
+    features = {}
+
+    if worker.get("chain-of-trust"):
+        features["chainOfTrust"] = True
+
+    if worker.get("taskcluster-proxy"):
+        features["taskclusterProxy"] = True
+
+    if worker.get("run-as-administrator", False):
+        features["runAsAdministrator"] = True
+        task_def["scopes"].append(
+            "generic-worker:run-as-administrator:{}".format(task["worker-type"]),
+        )
+
+    if features:
+        task_def["payload"]["features"] = features
+
+
+@payload_builder(
+    "beetmover",
+    schema={
+        # the maximum time to run, in seconds
+        Required("max-run-time"): int,
+        # locale key, if this is a locale beetmover job
+        Optional("locale"): str,
+        Optional("partner-public"): bool,
+        Required("release-properties"): {
+            "app-name": str,
+            "app-version": str,
+            "branch": str,
+            "build-id": str,
+            "hash-type": str,
+            "platform": str,
+        },
+        # list of artifact URLs for the artifacts that should be beetmoved
+        Required("upstream-artifacts"): [
+            {
+                # taskId of the task with the artifact
+                Required("taskId"): taskref_or_string,
+                # type of signing task (for CoT)
+                Required("taskType"): str,
+                # Paths to the artifacts to sign
+                Required("paths"): [str],
+                # locale is used to map upload path and allow for duplicate simple names
+                Required("locale"): str,
+            }
+        ],
+        Optional("artifact-map"): object,
+    },
+)
+def build_beetmover_payload(config, task, task_def):
+    worker = task["worker"]
+    release_properties = worker["release-properties"]
+
+    task_def["payload"] = {
+        "maxRunTime": worker["max-run-time"],
+        "releaseProperties": {
+            "appName": release_properties["app-name"],
+            "appVersion": release_properties["app-version"],
+            "branch": release_properties["branch"],
+            "buildid": release_properties["build-id"],
+            "hashType": release_properties["hash-type"],
+            "platform": release_properties["platform"],
+        },
+        "upload_date": config.params["build_date"],
+        "upstreamArtifacts": worker["upstream-artifacts"],
+    }
+    if worker.get("locale"):
+        task_def["payload"]["locale"] = worker["locale"]
+    if worker.get("artifact-map"):
+        task_def["payload"]["artifactMap"] = worker["artifact-map"]
+    if worker.get("partner-public"):
+        task_def["payload"]["is_partner_repack_public"] = worker["partner-public"]
+
+
+@payload_builder(
+    "invalid",
+    schema={
+        # an invalid task is one which should never actually be created; this is used in
+        # release automation on branches where the task just doesn't make sense
+        Extra: object,
+    },
+)
+def build_invalid_payload(config, task, task_def):
+    task_def["payload"] = "invalid task - should never be created"
+
+
+@payload_builder(
+    "always-optimized",
+    schema={
+        Extra: object,
+    },
+)
+@payload_builder("succeed", schema={})
+def build_dummy_payload(config, task, task_def):
+    task_def["payload"] = {}
+
+
+transforms = TransformSequence()
+
+
+@transforms.add
+def set_implementation(config, tasks):
+    """
+    Set the worker implementation based on the worker-type alias.
+    """
+    for task in tasks:
+        worker = task.setdefault("worker", {})
+        if "implementation" in task["worker"]:
+            yield task
+            continue
+
+        impl, os = worker_type_implementation(config.graph_config, task["worker-type"])
+
+        tags = task.setdefault("tags", {})
+        tags["worker-implementation"] = impl
+        if os:
+            task["tags"]["os"] = os
+        worker["implementation"] = impl
+        if os:
+            worker["os"] = os
+
+        yield task
+
+
+@transforms.add
+def set_defaults(config, tasks):
+    for task in tasks:
+        task.setdefault("always-target", False)
+        task.setdefault("optimization", None)
+        task.setdefault("needs-sccache", False)
+
+        worker = task["worker"]
+        if worker["implementation"] in ("docker-worker",):
+            worker.setdefault("relengapi-proxy", False)
+            worker.setdefault("chain-of-trust", False)
+            worker.setdefault("taskcluster-proxy", False)
+            worker.setdefault("allow-ptrace", False)
+            worker.setdefault("loopback-video", False)
+            worker.setdefault("loopback-audio", False)
+            worker.setdefault("docker-in-docker", False)
+            worker.setdefault("privileged", False)
+            worker.setdefault("volumes", [])
+            worker.setdefault("env", {})
+            if "caches" in worker:
+                for c in worker["caches"]:
+                    c.setdefault("skip-untrusted", False)
+        elif worker["implementation"] == "generic-worker":
+            worker.setdefault("env", {})
+            worker.setdefault("os-groups", [])
+            if worker["os-groups"] and worker["os"] != "windows":
+                raise Exception(
+                    "os-groups feature of generic-worker is only supported on "
+                    "Windows, not on {}".format(worker["os"])
+                )
+            worker.setdefault("chain-of-trust", False)
+        elif worker["implementation"] in (
+            "scriptworker-signing",
+            "beetmover",
+            "beetmover-push-to-release",
+            "beetmover-maven",
+        ):
+            worker.setdefault("max-run-time", 600)
+        elif worker["implementation"] == "push-apk":
+            worker.setdefault("commit", False)
+
+        yield task
+
+
+@transforms.add
+def task_name_from_label(config, tasks):
+    for task in tasks:
+        if "label" not in task:
+            if "name" not in task:
+                raise Exception("task has neither a name nor a label")
+            task["label"] = "{}-{}".format(config.kind, task["name"])
+        if task.get("name"):
+            del task["name"]
+        yield task
+
+
+@transforms.add
+def validate(config, tasks):
+    for task in tasks:
+        validate_schema(
+            task_description_schema,
+            task,
+            "In task {!r}:".format(task.get("label", "?no-label?")),
+        )
+        validate_schema(
+            payload_builders[task["worker"]["implementation"]].schema,
+            task["worker"],
+            "In task.run {!r}:".format(task.get("label", "?no-label?")),
+        )
+        yield task
+
+
+@index_builder("generic")
+def add_generic_index_routes(config, task):
+    index = task.get("index")
+    routes = task.setdefault("routes", [])
+
+    verify_index(config, index)
+
+    subs = config.params.copy()
+    subs["job-name"] = index["job-name"]
+    subs["build_date_long"] = time.strftime(
+        "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"])
+    )
+    subs["product"] = index["product"]
+    subs["trust-domain"] = config.graph_config["trust-domain"]
+    subs["branch_rev"] = get_branch_rev(config)
+
+    for tpl in V2_ROUTE_TEMPLATES:
+        routes.append(tpl.format(**subs))
+
+    return task
+
+
+@transforms.add
+def process_treeherder_metadata(config, tasks):
+    for task in tasks:
+        routes = task.get("routes", [])
+        extra = task.get("extra", {})
+        task_th = task.get("treeherder")
+
+        if task_th:
+            # This `merged_th` object is just an intermediary that combines
+            # the defaults and whatever is in the task. Ultimately, the task
+            # transforms this data a bit in the `treeherder` object that is
+            # eventually set in the task.
+            merged_th = treeherder_defaults(config.kind, task["label"])
+            if isinstance(task_th, dict):
+                merged_th.update(task_th)
+
+            treeherder = extra.setdefault("treeherder", {})
+            extra.setdefault("treeherder-platform", merged_th["platform"])
+
+            machine_platform, collection = merged_th["platform"].split("/", 1)
+            treeherder["machine"] = {"platform": machine_platform}
+            treeherder["collection"] = {collection: True}
+
+            group_names = config.graph_config["treeherder"]["group-names"]
+            groupSymbol, symbol = split_symbol(merged_th["symbol"])
+            if groupSymbol != "?":
+                treeherder["groupSymbol"] = groupSymbol
+                if groupSymbol not in group_names:
+                    path = os.path.join(config.path, task.get("task-from", ""))
+                    raise Exception(UNKNOWN_GROUP_NAME.format(groupSymbol, path))
+                treeherder["groupName"] = group_names[groupSymbol]
+            treeherder["symbol"] = symbol
+            if len(symbol) > 25 or len(groupSymbol) > 25:
+                raise RuntimeError(
+                    "Treeherder group and symbol names must not be longer than "
+                    "25 characters: {} (see {})".format(
+                        treeherder["symbol"],
+                        TC_TREEHERDER_SCHEMA_URL,
+                    )
+                )
+            treeherder["jobKind"] = merged_th["kind"]
+            treeherder["tier"] = merged_th["tier"]
+
+            branch_rev = get_branch_rev(config)
+
+            if config.params["tasks_for"].startswith("github-pull-request"):
+                # In the past we used `project` for this, but that ends up being
+                # set to the repository name of the _head_ repo, which is not correct
+                # (and causes scope issues) if it doesn't match the name of the
+                # base repo
+                base_project = config.params["base_repository"].split("/")[-1]
+                if base_project.endswith(".git"):
+                    base_project = base_project[:-4]
+                th_project_suffix = "-pr"
+            else:
+                base_project = config.params["project"]
+                th_project_suffix = ""
+
+            routes.append(
+                "{}.v2.{}.{}.{}".format(
+                    TREEHERDER_ROUTE_ROOT,
+                    base_project + th_project_suffix,
+                    branch_rev,
+                    config.params["pushlog_id"],
+                )
+            )
+
+        task["routes"] = routes
+        task["extra"] = extra
+        yield task
+
+
+@transforms.add
+def add_index_routes(config, tasks):
+    for task in tasks:
+        index = task.get("index", {})
+
+        # The default behavior is to rank tasks according to their tier
+        extra_index = task.setdefault("extra", {}).setdefault("index", {})
+        rank = index.get("rank", "by-tier")
+
+        if rank == "by-tier":
+            # rank is zero for non-tier-1 tasks and based on pushid for others;
+            # this sorts tier-{2,3} builds below tier-1 in the index
+            tier = task.get("extra", {}).get("treeherder", {}).get("tier", 3)
+            extra_index["rank"] = 0 if tier > 1 else int(config.params["build_date"])
+        elif rank == "build_date":
+            extra_index["rank"] = int(config.params["build_date"])
+        else:
+            extra_index["rank"] = rank
+
+        if not index:
+            yield task
+            continue
+
+        index_type = index.get("type", "generic")
+        if index_type not in index_builders:
+            raise ValueError(f"Unknown index-type {index_type}")
+        task = index_builders[index_type](config, task)
+
+        del task["index"]
+        yield task
+
+
+@transforms.add
+def build_task(config, tasks):
+    for task in tasks:
+        level = str(config.params["level"])
+
+        provisioner_id, worker_type = get_worker_type(
+            config.graph_config,
+            task["worker-type"],
+            level,
+        )
+        task["worker-type"] = "/".join([provisioner_id, worker_type])
+        project = config.params["project"]
+
+        routes = task.get("routes", [])
+        scopes = [
+            s.format(level=level, project=project) for s in task.get("scopes", [])
+        ]
+
+        # set up extra
+        extra = task.get("extra", {})
+        extra["parent"] = os.environ.get("TASK_ID", "")
+
+        if "expires-after" not in task:
+            task["expires-after"] = "28 days" if config.params.is_try() else "1 year"
+
+        if "deadline-after" not in task:
+            if "task-deadline-after" in config.graph_config:
+                task["deadline-after"] = get_default_deadline(
+                    config.graph_config, config.params["project"]
+                )
+            else:
+                task["deadline-after"] = "1 day"
+
+        if "priority" not in task:
+            task["priority"] = get_default_priority(
+                config.graph_config, config.params["project"]
+            )
+
+        tags = task.get("tags", {})
+        tags.update(
+            {
+                "createdForUser": config.params["owner"],
+                "kind": config.kind,
+                "label": task["label"],
+            }
+        )
+
+        task_def = {
+            "provisionerId": provisioner_id,
+            "workerType": worker_type,
+            "routes": routes,
+            "created": {"relative-datestamp": "0 seconds"},
+            "deadline": {"relative-datestamp": task["deadline-after"]},
+            "expires": {"relative-datestamp": task["expires-after"]},
+            "scopes": scopes,
+            "metadata": {
+                "description": task["description"],
+                "name": task["label"],
+                "owner": config.params["owner"],
+                "source": config.params.file_url(config.path, pretty=True),
+            },
+            "extra": extra,
+            "tags": tags,
+            "priority": task["priority"],
+        }
+
+        if task.get("requires", None):
+            task_def["requires"] = task["requires"]
+
+        if task.get("extra", {}).get("treeherder"):
+            branch_rev = get_branch_rev(config)
+            if config.params["tasks_for"].startswith("github-pull-request"):
+                # In the past we used `project` for this, but that ends up being
+                # set to the repository name of the _head_ repo, which is not correct
+                # (and causes scope issues) if it doesn't match the name of the
+                # base repo
+                base_project = config.params["base_repository"].split("/")[-1]
+                if base_project.endswith(".git"):
+                    base_project = base_project[:-4]
+                th_project_suffix = "-pr"
+            else:
+                base_project = config.params["project"]
+                th_project_suffix = ""
+
+            # link back to treeherder in description
+            th_push_link = (
+                "https://treeherder.mozilla.org/#/jobs?repo={}&revision={}".format(
+                    config.params["project"] + th_project_suffix, branch_rev
+                )
+            )
+            task_def["metadata"]["description"] += " ([Treeherder push]({}))".format(
+                th_push_link
+            )
+
+        # add the payload and adjust anything else as required (e.g., scopes)
+        payload_builders[task["worker"]["implementation"]].builder(
+            config, task, task_def
+        )
+
+        attributes = task.get("attributes", {})
+        # Resolve run-on-projects
+        build_platform = attributes.get("build_platform")
+        resolve_keyed_by(
+            task,
+            "run-on-projects",
+            item_name=task["label"],
+            **{"build-platform": build_platform},
+        )
+        attributes["run_on_projects"] = task.get("run-on-projects", ["all"])
+        attributes["run_on_tasks_for"] = task.get("run-on-tasks-for", ["all"])
+        # We don't want to pollute non git repos with this attribute. Moreover, target_tasks
+        # already assumes the default value is ['all']
+        if task.get("run-on-git-branches"):
+            attributes["run_on_git_branches"] = task["run-on-git-branches"]
+
+        attributes["always_target"] = task["always-target"]
+        # This logic is here since downstream tasks don't always match their
+        # upstream dependency's shipping_phase.
+        # A text_type task['shipping-phase'] takes precedence, then
+        # an existing attributes['shipping_phase'], then fall back to None.
+        if task.get("shipping-phase") is not None:
+            attributes["shipping_phase"] = task["shipping-phase"]
+        else:
+            attributes.setdefault("shipping_phase", None)
+
+        # Set MOZ_AUTOMATION on all jobs.
+        if task["worker"]["implementation"] in (
+            "generic-worker",
+            "docker-worker",
+        ):
+            payload = task_def.get("payload")
+            if payload:
+                env = payload.setdefault("env", {})
+                env["MOZ_AUTOMATION"] = "1"
+
+        dependencies = task.get("dependencies", {})
+        if_dependencies = task.get("if-dependencies", [])
+        if if_dependencies:
+            for i, dep in enumerate(if_dependencies):
+                if dep in dependencies:
+                    if_dependencies[i] = dependencies[dep]
+                    continue
+
+                raise Exception(
+                    "{label} specifies '{dep}' in if-dependencies, "
+                    "but {dep} is not a dependency!".format(
+                        label=task["label"], dep=dep
+                    )
+                )
+
+        yield {
+            "label": task["label"],
+            "description": task["description"],
+            "task": task_def,
+            "dependencies": dependencies,
+            "if-dependencies": if_dependencies,
+            "soft-dependencies": task.get("soft-dependencies", []),
+            "attributes": attributes,
+            "optimization": task.get("optimization", None),
+        }
+
+
+@transforms.add
+def add_github_checks(config, tasks):
+    """
+    For git repositories, add checks route to all tasks.
+
+    This will be replaced by a configurable option in the future.
+    """
+    if config.params["repository_type"] != "git":
+        for task in tasks:
+            yield task
+
+    for task in tasks:
+        task["task"]["routes"].append("checks")
+        yield task
+
+
+@transforms.add
+def chain_of_trust(config, tasks):
+    for task in tasks:
+        if task["task"].get("payload", {}).get("features", {}).get("chainOfTrust"):
+            image = task.get("dependencies", {}).get("docker-image")
+            if image:
+                cot = (
+                    task["task"].setdefault("extra", {}).setdefault("chainOfTrust", {})
+                )
+                cot.setdefault("inputs", {})["docker-image"] = {
+                    "task-reference": "<docker-image>"
+                }
+        yield task
+
+
+@transforms.add
+def check_task_identifiers(config, tasks):
+    """Ensures that all tasks have well defined identifiers:
+    ``^[a-zA-Z0-9_-]{1,38}$``
+    """
+    e = re.compile("^[a-zA-Z0-9_-]{1,38}$")
+    for task in tasks:
+        for attrib in ("workerType", "provisionerId"):
+            if not e.match(task["task"][attrib]):
+                raise Exception(
+                    "task {}.{} is not a valid identifier: {}".format(
+                        task["label"], attrib, task["task"][attrib]
+                    )
+                )
+        yield task
+
+
+@transforms.add
+def check_task_dependencies(config, tasks):
+    """Ensures that tasks don't have more than 100 dependencies."""
+    for task in tasks:
+        number_of_dependencies = (
+            len(task["dependencies"])
+            + len(task["if-dependencies"])
+            + len(task["soft-dependencies"])
+        )
+        if number_of_dependencies > MAX_DEPENDENCIES:
+            raise Exception(
+                "task {}/{} has too many dependencies ({} > {})".format(
+                    config.kind,
+                    task["label"],
+                    number_of_dependencies,
+                    MAX_DEPENDENCIES,
+                )
+            )
+        yield task
+
+
+def check_caches_are_volumes(task):
+    """Ensures that all cache paths are defined as volumes.
+
+    Caches and volumes are the only filesystem locations whose content
+    isn't defined by the Docker image itself. Some caches are optional
+    depending on the job environment. We want paths that are potentially
+    caches to have as similar behavior regardless of whether a cache is
+    used. To help enforce this, we require that all paths used as caches
+    to be declared as Docker volumes. This check won't catch all offenders.
+    But it is better than nothing.
+    """
+    volumes = set(task["worker"]["volumes"])
+    paths = {c["mount-point"] for c in task["worker"].get("caches", [])}
+    missing = paths - volumes
+
+    if not missing:
+        return
+
+    raise Exception(
+        "task {} (image {}) has caches that are not declared as "
+        "Docker volumes: {} "
+        "(have you added them as VOLUMEs in the Dockerfile?)".format(
+            task["label"], task["worker"]["docker-image"], ", ".join(sorted(missing))
+        )
+    )
+
+
+@transforms.add
+def check_run_task_caches(config, tasks):
+    """Audit for caches requiring run-task.
+
+    run-task manages caches in certain ways. If a cache managed by run-task
+    is used by a non run-task task, it could cause problems. So we audit for
+    that and make sure certain cache names are exclusive to run-task.
+
+    IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY
+    CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS
+    THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT.
+    """
+    re_reserved_caches = re.compile(
+        """^
+        (checkouts|tooltool-cache)
+    """,
+        re.VERBOSE,
+    )
+
+    cache_prefix = "{trust_domain}-level-{level}-".format(
+        trust_domain=config.graph_config["trust-domain"],
+        level=config.params["level"],
+    )
+
+    suffix = _run_task_suffix()
+
+    for task in tasks:
+        payload = task["task"].get("payload", {})
+        command = payload.get("command") or [""]
+
+        main_command = command[0] if isinstance(command[0], str) else ""
+        run_task = main_command.endswith("run-task")
+
+        for cache in payload.get("cache", {}):
+            if not cache.startswith(cache_prefix):
+                raise Exception(
+                    "{} is using a cache ({}) which is not appropriate "
+                    "for its trust-domain and level. It should start with {}.".format(
+                        task["label"], cache, cache_prefix
+                    )
+                )
+
+            cache = cache[len(cache_prefix) :]
+
+            if not re_reserved_caches.match(cache):
+                continue
+
+            if not run_task:
+                raise Exception(
+                    f"{task['label']} is using a cache ({cache}) reserved for run-task "
+                    "change the task to use run-task or use a different "
+                    "cache name"
+                )
+
+            if not cache.endswith(suffix):
+                raise Exception(
+                    f"{task['label']} is using a cache ({cache}) reserved for run-task "
+                    "but the cache name is not dependent on the contents "
+                    "of run-task; change the cache name to conform to the "
+                    "naming requirements"
+                )
+
+        yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task_context.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task_context.py
new file mode 100644
index 0000000000..5c7ed6af80
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task_context.py
@@ -0,0 +1,121 @@
+from textwrap import dedent
+
+from voluptuous import ALLOW_EXTRA, Any, Optional, Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.schema import Schema
+from taskgraph.util.templates import deep_get, substitute
+from taskgraph.util.yaml import load_yaml
+
+SCHEMA = Schema(
+    {
+        Required(
+            "task-context",
+            description=dedent(
+                """
+            `task-context` can be used to substitute values into any field in a
+            task with data that is not known until `taskgraph` runs.
+
+            This data can be provided via `from-parameters` or `from-file`,
+            which can pull in values from parameters and a defined yml file
+            respectively.
+
+            Data may also be provided directly in the `from-object` section of
+            `task-context`. This can be useful in `kinds` that define most of
+            their contents in `task-defaults`, but have some values that may
+            differ for various concrete `tasks` in the `kind`.
+
+            If the same key is found in multiple places the order of precedence
+            is as follows:
+              - Parameters
+              - `from-object` keys
+              - File
+
+            That is to say: parameters will always override anything else.
+
+            """.lstrip(),
+            ),
+        ): {
+            Optional(
+                "from-parameters",
+                description=dedent(
+                    """
+                Retrieve task context values from parameters. A single
+                parameter may be provided or a list of parameters in
+                priority order. The latter can be useful in implementing a
+                "default" value if some other parameter is not provided.
+                """.lstrip()
+                ),
+            ): {str: Any([str], str)},
+            Optional(
+                "from-file",
+                description=dedent(
+                    """
+                Retrieve task context values from a yaml file. The provided
+                file should usually only contain top level keys and values
+                (eg: nested objects will not be interpolated - they will be
+                substituted as text representations of the object).
+                """.lstrip()
+                ),
+            ): str,
+            Optional(
+                "from-object",
+                description="Key/value pairs to be used as task context",
+            ): object,
+            Required(
+                "substitution-fields",
+                description=dedent(
+                    """
+                A list of fields in the task to substitute the provided values
+                into.
+                """.lstrip()
+                ),
+            ): [str],
+        },
+    },
+    extra=ALLOW_EXTRA,
+)
+
+transforms = TransformSequence()
+transforms.add_validate(SCHEMA)
+
+
+@transforms.add
+def render_task(config, jobs):
+    for job in jobs:
+        sub_config = job.pop("task-context")
+        params_context = {}
+        for var, path in sub_config.pop("from-parameters", {}).items():
+            if isinstance(path, str):
+                params_context[var] = deep_get(config.params, path)
+            else:
+                for choice in path:
+                    value = deep_get(config.params, choice)
+                    if value is not None:
+                        params_context[var] = value
+                        break
+
+        file_context = {}
+        from_file = sub_config.pop("from-file", None)
+        if from_file:
+            file_context = load_yaml(from_file)
+
+        fields = sub_config.pop("substitution-fields")
+
+        subs = {}
+        subs.update(file_context)
+        # We've popped away the configuration; everything left in `sub_config` is
+        # substitution key/value pairs.
+        subs.update(sub_config.pop("from-object", {}))
+        subs.update(params_context)
+
+        # Now that we have our combined context, we can substitute.
+        for field in fields:
+            container, subfield = job, field
+            while "." in subfield:
+                f, subfield = subfield.split(".", 1)
+                container = container[f]
+
+            container[subfield] = substitute(container[subfield], **subs)
+
+        yield job
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
new file mode 100644
index 0000000000..ee59ba4548
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import gzip
+import os
+import stat
+import tarfile
+
+# 2016-01-01T00:00:00+0000
+DEFAULT_MTIME = 1451606400
+
+
+def create_tar_from_files(fp, files):
+    """Create a tar file deterministically.
+
+    Receives a dict mapping names of files in the archive to local filesystem
+    paths or ``mozpack.files.BaseFile`` instances.
+
+    The files will be archived and written to the passed file handle opened
+    for writing.
+
+    Only regular files can be written.
+
+    FUTURE accept a filename argument (or create APIs to write files)
+    """
+    with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf:
+        for archive_path, f in sorted(files.items()):
+            if isinstance(f, str):
+                mode = os.stat(f).st_mode
+                f = open(f, "rb")
+            else:
+                mode = 0o0644
+
+            ti = tarfile.TarInfo(archive_path)
+            ti.mode = mode
+            ti.type = tarfile.REGTYPE
+
+            if not ti.isreg():
+                raise ValueError("not a regular file: %s" % f)
+
+            # Disallow setuid and setgid bits. This is an arbitrary restriction.
+            # However, since we set uid/gid to root:root, setuid and setgid
+            # would be a glaring security hole if the archive were
+            # uncompressed as root.
+            if ti.mode & (stat.S_ISUID | stat.S_ISGID):
+                raise ValueError("cannot add file with setuid or setgid set: " "%s" % f)
+
+            # Set uid, gid, username, and group as deterministic values.
+            ti.uid = 0
+            ti.gid = 0
+            ti.uname = ""
+            ti.gname = ""
+
+            # Set mtime to a constant value.
+            ti.mtime = DEFAULT_MTIME
+
+            f.seek(0, 2)
+            ti.size = f.tell()
+            f.seek(0, 0)
+            # tarfile wants to pass a size argument to read(). So just
+            # wrap/buffer in a proper file object interface.
+            tf.addfile(ti, f)
+
+
+def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
+    """Create a tar.gz file deterministically from files.
+
+    This is a glorified wrapper around ``create_tar_from_files`` that
+    adds gzip compression.
+
+    The passed file handle should be opened for writing in binary mode.
+    When the function returns, all data has been written to the handle.
+    """
+    # Offset 3-7 in the gzip header contains an mtime. Pin it to a known
+    # value so output is deterministic.
+    gf = gzip.GzipFile(
+        filename=filename or "",
+        mode="wb",
+        fileobj=fp,
+        compresslevel=compresslevel,
+        mtime=DEFAULT_MTIME,
+    )
+    with gf:
+        create_tar_from_files(gf, files)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py
new file mode 100644
index 0000000000..74d6996629
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py
@@ -0,0 +1,96 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+
+
+def attrmatch(attributes, **kwargs):
+    """Determine whether the given set of task attributes matches.
+
+    The conditions are given as keyword arguments, where each keyword names an
+    attribute. The keyword value can be a literal, a set, or a callable:
+
+        * A literal must match the attribute exactly.
+        * Given a set or list, the attribute value must be contained within it.
+        * A callable is called with the attribute value and returns a boolean.
+
+    If an attribute is specified as a keyword argument but not present in the
+    task's attributes, the result is False.
+
+    Args:
+        attributes (dict): The task's attributes object.
+        kwargs (dict): The conditions the task's attributes must satisfy in
+                       order to match.
+    Returns:
+        bool: Whether the task's attributes match the conditions or not.
+    """
+    for kwkey, kwval in kwargs.items():
+        if kwkey not in attributes:
+            return False
+        attval = attributes[kwkey]
+        if isinstance(kwval, (set, list)):
+            if attval not in kwval:
+                return False
+        elif callable(kwval):
+            if not kwval(attval):
+                return False
+        elif kwval != attributes[kwkey]:
+            return False
+    return True
+
+
+def keymatch(attributes, target):
+    """Determine if any keys in attributes are a match to target, then return
+    a list of matching values. First exact matches will be checked. Failing
+    that, regex matches and finally a default key.
+    """
+    # exact match
+    if target in attributes:
+        return [attributes[target]]
+
+    # regular expression match
+    matches = [v for k, v in attributes.items() if re.match(k + "$", target)]
+    if matches:
+        return matches
+
+    # default
+    if "default" in attributes:
+        return [attributes["default"]]
+
+    return []
+
+
+def _match_run_on(key, run_on):
+    """
+    Determine whether the given parameter is included in the corresponding `run-on-attribute`.
+    """
+    if "all" in run_on:
+        return True
+    return key in run_on
+
+
+match_run_on_projects = _match_run_on
+match_run_on_tasks_for = _match_run_on
+
+
+def match_run_on_git_branches(git_branch, run_on_git_branches):
+    """
+    Determine whether the given project is included in the `run-on-git-branches` parameter.
+    Allows 'all'.
+    """
+    if "all" in run_on_git_branches:
+        return True
+
+    for expected_git_branch_pattern in run_on_git_branches:
+        if re.match(expected_git_branch_pattern, git_branch):
+            return True
+
+    return False
+
+
+def sorted_unique_list(*args):
+    """Join one or more lists, and return a sorted list of unique members"""
+    combined = set().union(*args)
+    return sorted(combined)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py
new file mode 100644
index 0000000000..974b114902
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import time
+
+TARGET_CACHE_INDEX = "{cache_prefix}.cache.level-{level}.{type}.{name}.hash.{digest}"
+EXTRA_CACHE_INDEXES = [
+    "{cache_prefix}.cache.level-{level}.{type}.{name}.latest",
+    "{cache_prefix}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}",
+]
+
+
+def add_optimization(
+    config, taskdesc, cache_type, cache_name, digest=None, digest_data=None
+):
+    """
+    Allow the results of this task to be cached. This adds index routes to the
+    task so it can be looked up for future runs, and optimization hints so that
+    cached artifacts can be found. Exactly one of `digest` and `digest_data`
+    must be passed.
+
+    :param TransformConfig config: The configuration for the kind being transformed.
+    :param dict taskdesc: The description of the current task.
+    :param str cache_type: The type of task result being cached.
+    :param str cache_name: The name of the object being cached.
+    :param digest: A unique string identifying this version of the artifacts
+        being generated. Typically this will be the hash of inputs to the task.
+    :type digest: bytes or None
+    :param digest_data: A list of bytes representing the inputs of this task.
+        They will be concatenated and hashed to create the digest for this
+        task.
+    :type digest_data: list of bytes or None
+    """
+    if (digest is None) == (digest_data is None):
+        raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+    if digest is None:
+        digest = hashlib.sha256("\n".join(digest_data).encode("utf-8")).hexdigest()
+
+    if "cached-task-prefix" in config.graph_config["taskgraph"]:
+        cache_prefix = config.graph_config["taskgraph"]["cached-task-prefix"]
+    else:
+        cache_prefix = config.graph_config["trust-domain"]
+
+    subs = {
+        "cache_prefix": cache_prefix,
+        "type": cache_type,
+        "name": cache_name,
+        "digest": digest,
+    }
+
+    # We'll try to find a cached version of the toolchain at levels above and
+    # including the current level, starting at the highest level.
+    # Chain-of-trust doesn't handle tasks not built on the tip of a
+    # pull-request, so don't look for level-1 tasks if building a pull-request.
+    index_routes = []
+    min_level = int(config.params["level"])
+    if config.params["tasks_for"] == "github-pull-request":
+        min_level = max(min_level, 3)
+    for level in reversed(range(min_level, 4)):
+        subs["level"] = level
+        index_routes.append(TARGET_CACHE_INDEX.format(**subs))
+
+        taskdesc["optimization"] = {"index-search": index_routes}
+
+    # ... and cache at the lowest level.
+    subs["level"] = config.params["level"]
+    taskdesc.setdefault("routes", []).append(
+        f"index.{TARGET_CACHE_INDEX.format(**subs)}"
+    )
+
+    # ... and add some extra routes for humans
+    subs["build_date_long"] = time.strftime(
+        "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"])
+    )
+    taskdesc["routes"].extend(
+        [f"index.{route.format(**subs)}" for route in EXTRA_CACHE_INDEXES]
+    )
+
+    taskdesc["attributes"]["cached_task"] = {
+        "type": cache_type,
+        "name": cache_name,
+        "digest": digest,
+    }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py
new file mode 100644
index 0000000000..d0e1e1079f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py
@@ -0,0 +1,79 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Utilities for generating a decision task from :file:`.taskcluster.yml`.
+"""
+
+
+import os
+
+import jsone
+import slugid
+import yaml
+
+from .templates import merge
+from .time import current_json_time
+from .vcs import find_hg_revision_push_info
+
+
+def make_decision_task(params, root, context, head_rev=None):
+    """Generate a basic decision task, based on the root .taskcluster.yml"""
+    with open(os.path.join(root, ".taskcluster.yml"), "rb") as f:
+        taskcluster_yml = yaml.safe_load(f)
+
+    if not head_rev:
+        head_rev = params["head_rev"]
+
+    if params["repository_type"] == "hg":
+        pushlog = find_hg_revision_push_info(params["repository_url"], head_rev)
+
+        hg_push_context = {
+            "pushlog_id": pushlog["pushid"],
+            "pushdate": pushlog["pushdate"],
+            "owner": pushlog["user"],
+        }
+    else:
+        hg_push_context = {}
+
+    slugids = {}
+
+    def as_slugid(name):
+        # https://github.com/taskcluster/json-e/issues/164
+        name = name[0]
+        if name not in slugids:
+            slugids[name] = slugid.nice()
+        return slugids[name]
+
+    # provide a similar JSON-e context to what mozilla-taskcluster provides:
+    # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+    # but with a different tasks_for and an extra `cron` section
+    context = merge(
+        {
+            "repository": {
+                "url": params["repository_url"],
+                "project": params["project"],
+                "level": params["level"],
+            },
+            "push": merge(
+                {
+                    "revision": params["head_rev"],
+                    # remainder are fake values, but the decision task expects them anyway
+                    "comment": " ",
+                },
+                hg_push_context,
+            ),
+            "now": current_json_time(),
+            "as_slugid": as_slugid,
+        },
+        context,
+    )
+
+    rendered = jsone.render(taskcluster_yml, context)
+    if len(rendered["tasks"]) != 1:
+        raise Exception("Expected .taskcluster.yml to only produce one cron task")
+    task = rendered["tasks"][0]
+
+    task_id = task.pop("taskId")
+    return (task_id, task)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/dependencies.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/dependencies.py
new file mode 100644
index 0000000000..d33aa3d7f2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/dependencies.py
@@ -0,0 +1,92 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from typing import Dict, Iterator, Optional
+
+from taskgraph.task import Task
+from taskgraph.transforms.base import TransformConfig
+from taskgraph.util.schema import Schema
+
+# Define a collection of group_by functions
+GROUP_BY_MAP = {}
+
+
+def group_by(name, schema=None):
+    def wrapper(func):
+        assert (
+            name not in GROUP_BY_MAP
+        ), f"duplicate group_by function name {name} ({func} and {GROUP_BY_MAP[name]})"
+        GROUP_BY_MAP[name] = func
+        func.schema = schema
+        return func
+
+    return wrapper
+
+
+@group_by("single")
+def group_by_single(config, tasks):
+    for task in tasks:
+        yield [task]
+
+
+@group_by("all")
+def group_by_all(config, tasks):
+    return [[task for task in tasks]]
+
+
+@group_by("attribute", schema=Schema(str))
+def group_by_attribute(config, tasks, attr):
+    groups = {}
+    for task in tasks:
+        val = task.attributes.get(attr)
+        if not val:
+            continue
+        groups.setdefault(val, []).append(task)
+
+    return groups.values()
+
+
+def get_dependencies(config: TransformConfig, task: Dict) -> Iterator[Task]:
+    """Iterate over all dependencies as ``Task`` objects.
+
+    Args:
+        config (TransformConfig): The ``TransformConfig`` object associated
+            with the kind.
+        task (Dict): The task dictionary to retrieve dependencies from.
+
+    Returns:
+        Iterator[Task]: Returns a generator that iterates over the ``Task``
+        objects associated with each dependency.
+    """
+    if "dependencies" not in task:
+        return []
+
+    for label, dep in config.kind_dependencies_tasks.items():
+        if label in task["dependencies"].values():
+            yield dep
+
+
+def get_primary_dependency(config: TransformConfig, task: Dict) -> Optional[Task]:
+    """Return the ``Task`` object associated with the primary dependency.
+
+    This uses the task's ``primary-kind-dependency`` attribute to find the primary
+    dependency, or returns ``None`` if the attribute is unset.
+
+    Args:
+        config (TransformConfig): The ``TransformConfig`` object associated
+            with the kind.
+        task (Dict): The task dictionary to retrieve the primary dependency from.
+
+    Returns:
+        Optional[Task]: The ``Task`` object associated with the
+            primary dependency or ``None``.
+    """
+    try:
+        primary_kind = task["attributes"]["primary-kind-dependency"]
+    except KeyError:
+        return None
+
+    for dep in get_dependencies(config, task):
+        if dep.kind == primary_kind:
+            return dep
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
new file mode 100644
index 0000000000..c37a69f98f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
@@ -0,0 +1,237 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import io
+import os
+import re
+
+from taskgraph.util.archive import create_tar_gz_from_files
+from taskgraph.util.memoize import memoize
+
+IMAGE_DIR = os.path.join(".", "taskcluster", "docker")
+
+from .yaml import load_yaml
+
+
+def docker_image(name, by_tag=False):
+    """
+    Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``,
+    or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`.
+    """
+    try:
+        with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f:
+            registry = f.read().strip()
+    except OSError:
+        with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f:
+            registry = f.read().strip()
+
+    if not by_tag:
+        hashfile = os.path.join(IMAGE_DIR, name, "HASH")
+        try:
+            with open(hashfile) as f:
+                return f"{registry}/{name}@{f.read().strip()}"
+        except OSError:
+            raise Exception(f"Failed to read HASH file {hashfile}")
+
+    try:
+        with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f:
+            tag = f.read().strip()
+    except OSError:
+        tag = "latest"
+    return f"{registry}/{name}:{tag}"
+
+
+class VoidWriter:
+    """A file object with write capabilities that does nothing with the written
+    data."""
+
+    def write(self, buf):
+        pass
+
+
+def generate_context_hash(topsrcdir, image_path, args=None):
+    """Generates a sha256 hash for context directory used to build an image."""
+
+    return stream_context_tar(topsrcdir, image_path, VoidWriter(), args=args)
+
+
+class HashingWriter:
+    """A file object with write capabilities that hashes the written data at
+    the same time it passes down to a real file object."""
+
+    def __init__(self, writer):
+        self._hash = hashlib.sha256()
+        self._writer = writer
+
+    def write(self, buf):
+        self._hash.update(buf)
+        self._writer.write(buf)
+
+    def hexdigest(self):
+        return self._hash.hexdigest()
+
+
+def create_context_tar(topsrcdir, context_dir, out_path, args=None):
+    """Create a context tarball.
+
+    A directory ``context_dir`` containing a Dockerfile will be assembled into
+    a gzipped tar file at ``out_path``.
+
+    We also scan the source Dockerfile for special syntax that influences
+    context generation.
+
+    If a line in the Dockerfile has the form ``# %include <path>``,
+    the relative path specified on that line will be matched against
+    files in the source repository and added to the context under the
+    path ``topsrcdir/``. If an entry is a directory, we add all files
+    under that directory.
+
+    If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
+    the string ``$<name>`` in subsequent lines are replaced with the value
+    found in the ``args`` argument. Exception: this doesn't apply to VOLUME
+    definitions.
+
+    Returns the SHA-256 hex digest of the created archive.
+    """
+    with open(out_path, "wb") as fh:
+        return stream_context_tar(
+            topsrcdir,
+            context_dir,
+            fh,
+            image_name=os.path.basename(out_path),
+            args=args,
+        )
+
+
+RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task")
+RUN_TASK_FILES = {
+    f"run-task/{path}": os.path.join(RUN_TASK_ROOT, path)
+    for path in [
+        "run-task",
+        "fetch-content",
+        "hgrc",
+        "robustcheckout.py",
+    ]
+}
+RUN_TASK_SNIPPET = [
+    "COPY run-task/run-task /usr/local/bin/run-task\n",
+    "COPY run-task/fetch-content /usr/local/bin/fetch-content\n",
+    "COPY run-task/robustcheckout.py /usr/local/mercurial/robustcheckout.py\n"
+    "COPY run-task/hgrc /etc/mercurial/hgrc.d/mozilla.rc\n",
+]
+
+
+def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None):
+    """Like create_context_tar, but streams the tar file to the `out_file` file
+    object."""
+    archive_files = {}
+    replace = []
+    content = []
+
+    topsrcdir = os.path.abspath(topsrcdir)
+    context_dir = os.path.join(topsrcdir, context_dir)
+
+    for root, dirs, files in os.walk(context_dir):
+        for f in files:
+            source_path = os.path.join(root, f)
+            archive_path = source_path[len(context_dir) + 1 :]
+            archive_files[archive_path] = open(source_path, "rb")
+
+    # Parse Dockerfile for special syntax of extra files to include.
+    content = []
+    with open(os.path.join(context_dir, "Dockerfile")) as fh:
+        for line in fh:
+            if line.startswith("# %ARG"):
+                p = line[len("# %ARG ") :].strip()
+                if not args or p not in args:
+                    raise Exception(f"missing argument: {p}")
+                replace.append((re.compile(rf"\${p}\b"), args[p]))
+                continue
+
+            for regexp, s in replace:
+                line = re.sub(regexp, s, line)
+
+            content.append(line)
+
+            if not line.startswith("# %include"):
+                continue
+
+            if line.strip() == "# %include-run-task":
+                content.extend(RUN_TASK_SNIPPET)
+                archive_files.update(RUN_TASK_FILES)
+                continue
+
+            p = line[len("# %include ") :].strip()
+            if os.path.isabs(p):
+                raise Exception("extra include path cannot be absolute: %s" % p)
+
+            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+            # Check for filesystem traversal exploits.
+            if not fs_path.startswith(topsrcdir):
+                raise Exception("extra include path outside topsrcdir: %s" % p)
+
+            if not os.path.exists(fs_path):
+                raise Exception("extra include path does not exist: %s" % p)
+
+            if os.path.isdir(fs_path):
+                for root, dirs, files in os.walk(fs_path):
+                    for f in files:
+                        source_path = os.path.join(root, f)
+                        rel = source_path[len(fs_path) + 1 :]
+                        archive_path = os.path.join("topsrcdir", p, rel)
+                        archive_files[archive_path] = source_path
+            else:
+                archive_path = os.path.join("topsrcdir", p)
+                archive_files[archive_path] = fs_path
+
+    archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8"))
+
+    writer = HashingWriter(out_file)
+    create_tar_gz_from_files(writer, archive_files, image_name)
+    return writer.hexdigest()
+
+
+@memoize
+def image_paths():
+    """Return a map of image name to paths containing their Dockerfile."""
+    config = load_yaml("taskcluster", "ci", "docker-image", "kind.yml")
+    return {
+        k: os.path.join(IMAGE_DIR, v.get("definition", k))
+        for k, v in config["tasks"].items()
+    }
+
+
+def image_path(name):
+    paths = image_paths()
+    if name in paths:
+        return paths[name]
+    return os.path.join(IMAGE_DIR, name)
+
+
+@memoize
+def parse_volumes(image):
+    """Parse VOLUME entries from a Dockerfile for an image."""
+    volumes = set()
+
+    path = image_path(image)
+
+    with open(os.path.join(path, "Dockerfile"), "rb") as fh:
+        for line in fh:
+            line = line.strip()
+            # We assume VOLUME definitions don't use %ARGS.
+            if not line.startswith(b"VOLUME "):
+                continue
+
+            v = line.split(None, 1)[1]
+            if v.startswith(b"["):
+                raise ValueError(
+                    "cannot parse array syntax for VOLUME; "
+                    "convert to multiple entries"
+                )
+
+            volumes |= {volume.decode("utf-8") for volume in v.split()}
+
+    return volumes
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
new file mode 100644
index 0000000000..5d884fc318
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
@@ -0,0 +1,58 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import hashlib
+from pathlib import Path
+
+from taskgraph.util import path as mozpath
+from taskgraph.util.memoize import memoize
+
+
+@memoize
+def hash_path(path):
+    """Hash a single file.
+
+    Returns the SHA-256 hash in hex form.
+    """
+    with open(path, "rb") as fh:
+        return hashlib.sha256(fh.read()).hexdigest()
+
+
+def hash_paths(base_path, patterns):
+    """
+    Give a list of path patterns, return a digest of the contents of all
+    the corresponding files, similarly to git tree objects or mercurial
+    manifests.
+
+    Each file is hashed. The list of all hashes and file paths is then
+    itself hashed to produce the result.
+    """
+    h = hashlib.sha256()
+
+    found = set()
+    for pattern in patterns:
+        matches = _find_matching_files(base_path, pattern)
+        if matches:
+            found.update(matches)
+        else:
+            raise Exception("%s did not match anything" % pattern)
+    for path in sorted(found):
+        h.update(
+            "{} {}\n".format(
+                hash_path(mozpath.abspath(mozpath.join(base_path, path))),
+                mozpath.normsep(path),
+            ).encode("utf-8")
+        )
+    return h.hexdigest()
+
+
+@memoize
+def _find_matching_files(base_path, pattern):
+    files = _get_all_files(base_path)
+    return [path for path in files if mozpath.match(path, pattern)]
+
+
+@memoize
+def _get_all_files(base_path):
+    return [str(path) for path in Path(base_path).rglob("*") if path.is_file()]
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py
new file mode 100644
index 0000000000..9b0c5a44fb
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from .attributes import keymatch
+
+
+def evaluate_keyed_by(
+    value, item_name, attributes, defer=None, enforce_single_match=True
+):
+    """
+    For values which can either accept a literal value, or be keyed by some
+    attributes, perform that lookup and return the result.
+
+    For example, given item::
+
+        by-test-platform:
+            macosx-10.11/debug: 13
+            win.*: 6
+            default: 12
+
+    a call to `evaluate_keyed_by(item, 'thing-name', {'test-platform': 'linux96')`
+    would return `12`.
+
+    Items can be nested as deeply as desired::
+
+        by-test-platform:
+            win.*:
+                by-project:
+                    ash: ..
+                    cedar: ..
+            linux: 13
+            default: 12
+
+    Args:
+        value (str): Name of the value to perform evaluation on.
+        item_name (str): Used to generate useful error messages.
+        attributes (dict): Dictionary of attributes used to lookup 'by-<key>' with.
+        defer (list):
+            Allows evaluating a by-* entry at a later time. In the example
+            above it's possible that the project attribute hasn't been set yet,
+            in which case we'd want to stop before resolving that subkey and
+            then call this function again later. This can be accomplished by
+            setting `defer=["project"]` in this example.
+        enforce_single_match (bool):
+            If True (default), each task may only match a single arm of the
+            evaluation.
+    """
+    while True:
+        if not isinstance(value, dict) or len(value) != 1:
+            return value
+        value_key = next(iter(value))
+        if not value_key.startswith("by-"):
+            return value
+
+        keyed_by = value_key[3:]  # strip off 'by-' prefix
+
+        if defer and keyed_by in defer:
+            return value
+
+        key = attributes.get(keyed_by)
+        alternatives = next(iter(value.values()))
+
+        if len(alternatives) == 1 and "default" in alternatives:
+            # Error out when only 'default' is specified as only alternatives,
+            # because we don't need to by-{keyed_by} there.
+            raise Exception(
+                "Keyed-by '{}' unnecessary with only value 'default' "
+                "found, when determining item {}".format(keyed_by, item_name)
+            )
+
+        if key is None:
+            if "default" in alternatives:
+                value = alternatives["default"]
+                continue
+            else:
+                raise Exception(
+                    "No attribute {} and no value for 'default' found "
+                    "while determining item {}".format(keyed_by, item_name)
+                )
+
+        matches = keymatch(alternatives, key)
+        if enforce_single_match and len(matches) > 1:
+            raise Exception(
+                "Multiple matching values for {} {!r} found while "
+                "determining item {}".format(keyed_by, key, item_name)
+            )
+        elif matches:
+            value = matches[0]
+            continue
+
+        raise Exception(
+            "No {} matching {!r} nor 'default' found while determining item {}".format(
+                keyed_by, key, item_name
+            )
+        )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py
new file mode 100644
index 0000000000..56b513e74c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#923-949
+
+
+import functools
+
+
+class memoize(dict):
+    """A decorator to memoize the results of function calls depending
+    on its arguments.
+    Both functions and instance methods are handled, although in the
+    instance method case, the results are cache in the instance itself.
+    """
+
+    def __init__(self, func):
+        self.func = func
+        functools.update_wrapper(self, func)
+
+    def __call__(self, *args):
+        if args not in self:
+            self[args] = self.func(*args)
+        return self[args]
+
+    def method_call(self, instance, *args):
+        name = "_%s" % self.func.__name__
+        if not hasattr(instance, name):
+            setattr(instance, name, {})
+        cache = getattr(instance, name)
+        if args not in cache:
+            cache[args] = self.func(instance, *args)
+        return cache[args]
+
+    def __get__(self, instance, cls):
+        return functools.update_wrapper(
+            functools.partial(self.method_call, instance), self.func
+        )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py
new file mode 100644
index 0000000000..6233a98a40
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+
+from taskgraph.util.taskcluster import get_artifact_url
+from taskgraph.util.time import json_time_from_now
+
+TASK_REFERENCE_PATTERN = re.compile("<([^>]+)>")
+ARTIFACT_REFERENCE_PATTERN = re.compile("<([^/]+)/([^>]+)>")
+
+
+def _recurse(val, param_fns):
+    def recurse(val):
+        if isinstance(val, list):
+            return [recurse(v) for v in val]
+        elif isinstance(val, dict):
+            if len(val) == 1:
+                for param_key, param_fn in param_fns.items():
+                    if set(val.keys()) == {param_key}:
+                        return param_fn(val[param_key])
+            return {k: recurse(v) for k, v in val.items()}
+        else:
+            return val
+
+    return recurse(val)
+
+
+def resolve_timestamps(now, task_def):
+    """Resolve all instances of `{'relative-datestamp': '..'}` in the given task definition"""
+    return _recurse(
+        task_def,
+        {
+            "relative-datestamp": lambda v: json_time_from_now(v, now),
+        },
+    )
+
+
+def resolve_task_references(label, task_def, task_id, decision_task_id, dependencies):
+    """Resolve all instances of ``{'task-reference': '..<..>..'} ``
+    and ``{'artifact-reference`: '..<dependency/artifact/path>..'}``
+    in the given task definition, using the given dependencies.
+    """
+
+    def task_reference(val):
+        def repl(match):
+            key = match.group(1)
+            if key == "self":
+                return task_id
+            elif key == "decision":
+                return decision_task_id
+            try:
+                return dependencies[key]
+            except KeyError:
+                # handle escaping '<'
+                if key == "<":
+                    return key
+                raise KeyError(f"task '{label}' has no dependency named '{key}'")
+
+        return TASK_REFERENCE_PATTERN.sub(repl, val)
+
+    def artifact_reference(val):
+        def repl(match):
+            dependency, artifact_name = match.group(1, 2)
+
+            if dependency == "self":
+                raise KeyError(f"task '{label}' can't reference artifacts of self")
+            elif dependency == "decision":
+                task_id = decision_task_id
+            else:
+                try:
+                    task_id = dependencies[dependency]
+                except KeyError:
+                    raise KeyError(
+                        "task '{}' has no dependency named '{}'".format(
+                            label, dependency
+                        )
+                    )
+
+            assert artifact_name.startswith(
+                "public/"
+            ), "artifact-reference only supports public artifacts, not `{}`".format(
+                artifact_name
+            )
+            return get_artifact_url(task_id, artifact_name)
+
+        return ARTIFACT_REFERENCE_PATTERN.sub(repl, val)
+
+    return _recurse(
+        task_def,
+        {
+            "task-reference": task_reference,
+            "artifact-reference": artifact_reference,
+        },
+    )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py
new file mode 100644
index 0000000000..c725140b12
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py
@@ -0,0 +1,167 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Like :py:mod:`os.path`, with a reduced set of functions, and with normalized path
+separators (always use forward slashes).
+Also contains a few additional utilities not found in :py:mod:`os.path`.
+"""
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozpack/path.py
+
+
+import os
+import posixpath
+import re
+
+
+def normsep(path):
+    """
+    Normalize path separators, by using forward slashes instead of whatever
+    :py:const:`os.sep` is.
+    """
+    if os.sep != "/":
+        path = path.replace(os.sep, "/")
+    if os.altsep and os.altsep != "/":
+        path = path.replace(os.altsep, "/")
+    return path
+
+
+def relpath(path, start):
+    rel = normsep(os.path.relpath(path, start))
+    return "" if rel == "." else rel
+
+
+def realpath(path):
+    return normsep(os.path.realpath(path))
+
+
+def abspath(path):
+    return normsep(os.path.abspath(path))
+
+
+def join(*paths):
+    return normsep(os.path.join(*paths))
+
+
+def normpath(path):
+    return posixpath.normpath(normsep(path))
+
+
+def dirname(path):
+    return posixpath.dirname(normsep(path))
+
+
+def commonprefix(paths):
+    return posixpath.commonprefix([normsep(path) for path in paths])
+
+
+def basename(path):
+    return os.path.basename(path)
+
+
+def splitext(path):
+    return posixpath.splitext(normsep(path))
+
+
+def split(path):
+    """
+    Return the normalized path as a list of its components.
+
+        ``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']``
+    """
+    return normsep(path).split("/")
+
+
+def basedir(path, bases):
+    """
+    Given a list of directories (`bases`), return which one contains the given
+    path. If several matches are found, the deepest base directory is returned.
+
+        ``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'``
+        (`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match)
+    """
+    path = normsep(path)
+    bases = [normsep(b) for b in bases]
+    if path in bases:
+        return path
+    for b in sorted(bases, reverse=True):
+        if not b or path.startswith(b + "/"):
+            return b
+
+
+re_cache = {}
+MATCH_STAR_STAR_RE = re.compile(r"(^|/)\\\*\\\*/")
+MATCH_STAR_STAR_END_RE = re.compile(r"(^|/)\\\*\\\*$")
+
+
+def match(path, pattern):
+    """
+    Return whether the given path matches the given pattern.
+    An asterisk can be used to match any string, including the null string, in
+    one part of the path:
+
+        ``foo`` matches ``*``, ``f*`` or ``fo*o``
+
+    However, an asterisk matching a subdirectory may not match the null string:
+
+        ``foo/bar`` does *not* match ``foo/*/bar``
+
+    If the pattern matches one of the ancestor directories of the path, the
+    patch is considered matching:
+
+        ``foo/bar`` matches ``foo``
+
+    Two adjacent asterisks can be used to match files and zero or more
+    directories and subdirectories.
+
+        ``foo/bar`` matches ``foo/**/bar``, or ``**/bar``
+    """
+    if not pattern:
+        return True
+    if pattern not in re_cache:
+        p = re.escape(pattern)
+        p = MATCH_STAR_STAR_RE.sub(r"\1(?:.+/)?", p)
+        p = MATCH_STAR_STAR_END_RE.sub(r"(?:\1.+)?", p)
+        p = p.replace(r"\*", "[^/]*") + "(?:/.*)?$"
+        re_cache[pattern] = re.compile(p)
+    return re_cache[pattern].match(path) is not None
+
+
+def rebase(oldbase, base, relativepath):
+    """
+    Return `relativepath` relative to `base` instead of `oldbase`.
+    """
+    if base == oldbase:
+        return relativepath
+    if len(base) < len(oldbase):
+        assert basedir(oldbase, [base]) == base
+        relbase = relpath(oldbase, base)
+        result = join(relbase, relativepath)
+    else:
+        assert basedir(base, [oldbase]) == oldbase
+        relbase = relpath(base, oldbase)
+        result = relpath(relativepath, relbase)
+    result = normpath(result)
+    if relativepath.endswith("/") and not result.endswith("/"):
+        result += "/"
+    return result
+
+
+def ancestors(path):
+    """Emit the parent directories of a path.
+
+    Args:
+        path (str): Path to emit parents of.
+
+    Yields:
+        str: Path of parent directory.
+    """
+    while path:
+        yield path
+        newpath = os.path.dirname(path)
+        if newpath == path:
+            break
+        path = newpath
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py
new file mode 100644
index 0000000000..3eb61dfbf3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py
@@ -0,0 +1,52 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import inspect
+import os
+
+
+def find_object(path):
+    """
+    Find a Python object given a path of the form <modulepath>:<objectpath>.
+    Conceptually equivalent to
+
+        def find_object(modulepath, objectpath):
+            import <modulepath> as mod
+            return mod.<objectpath>
+    """
+    if path.count(":") != 1:
+        raise ValueError(f'python path {path!r} does not have the form "module:object"')
+
+    modulepath, objectpath = path.split(":")
+    obj = __import__(modulepath)
+    for a in modulepath.split(".")[1:]:
+        obj = getattr(obj, a)
+    for a in objectpath.split("."):
+        obj = getattr(obj, a)
+    return obj
+
+
+def import_sibling_modules(exceptions=None):
+    """
+    Import all Python modules that are siblings of the calling module.
+
+    Args:
+        exceptions (list): A list of file names to exclude (caller and
+            __init__.py are implicitly excluded).
+    """
+    frame = inspect.stack()[1]
+    mod = inspect.getmodule(frame[0])
+
+    name = os.path.basename(mod.__file__)
+    excs = {"__init__.py", name}
+    if exceptions:
+        excs.update(exceptions)
+
+    modpath = mod.__name__
+    if not name.startswith("__init__.py"):
+        modpath = modpath.rsplit(".", 1)[0]
+
+    for f in os.listdir(os.path.dirname(mod.__file__)):
+        if f.endswith(".py") and f not in excs:
+            __import__(modpath + "." + f[:-3])
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py
new file mode 100644
index 0000000000..55d74f479a
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py
@@ -0,0 +1,22 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#115-127
+
+
+class ReadOnlyDict(dict):
+    """A read-only dictionary."""
+
+    def __init__(self, *args, **kwargs):
+        dict.__init__(self, *args, **kwargs)
+
+    def __delitem__(self, key):
+        raise Exception("Object does not support deletion.")
+
+    def __setitem__(self, key, value):
+        raise Exception("Object does not support assignment.")
+
+    def update(self, *args, **kwargs):
+        raise Exception("Object does not support update.")
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py
new file mode 100644
index 0000000000..3989f71182
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py
@@ -0,0 +1,260 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import collections
+import pprint
+import re
+
+import voluptuous
+
+import taskgraph
+
+from .keyed_by import evaluate_keyed_by
+
+
+def validate_schema(schema, obj, msg_prefix):
+    """
+    Validate that object satisfies schema.  If not, generate a useful exception
+    beginning with msg_prefix.
+    """
+    if taskgraph.fast:
+        return
+    try:
+        schema(obj)
+    except voluptuous.MultipleInvalid as exc:
+        msg = [msg_prefix]
+        for error in exc.errors:
+            msg.append(str(error))
+        raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj))
+
+
+def optionally_keyed_by(*arguments):
+    """
+    Mark a schema value as optionally keyed by any of a number of fields.  The
+    schema is the last argument, and the remaining fields are taken to be the
+    field names.  For example:
+
+        'some-value': optionally_keyed_by(
+            'test-platform', 'build-platform',
+            Any('a', 'b', 'c'))
+
+    The resulting schema will allow nesting of `by-test-platform` and
+    `by-build-platform` in either order.
+    """
+    schema = arguments[-1]
+    fields = arguments[:-1]
+
+    def validator(obj):
+        if isinstance(obj, dict) and len(obj) == 1:
+            k, v = list(obj.items())[0]
+            if k.startswith("by-") and k[len("by-") :] in fields:
+                res = {}
+                for kk, vv in v.items():
+                    try:
+                        res[kk] = validator(vv)
+                    except voluptuous.Invalid as e:
+                        e.prepend([k, kk])
+                        raise
+                return res
+        return Schema(schema)(obj)
+
+    return validator
+
+
+def resolve_keyed_by(
+    item, field, item_name, defer=None, enforce_single_match=True, **extra_values
+):
+    """
+    For values which can either accept a literal value, or be keyed by some
+    other attribute of the item, perform that lookup and replacement in-place
+    (modifying `item` directly).  The field is specified using dotted notation
+    to traverse dictionaries.
+
+    For example, given item::
+
+        job:
+            test-platform: linux128
+            chunks:
+                by-test-platform:
+                    macosx-10.11/debug: 13
+                    win.*: 6
+                    default: 12
+
+    a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])`
+    would mutate item in-place to::
+
+        job:
+            test-platform: linux128
+            chunks: 12
+
+    The `item_name` parameter is used to generate useful error messages.
+
+    If extra_values are supplied, they represent additional values available
+    for reference from by-<field>.
+
+    Items can be nested as deeply as the schema will allow::
+
+        chunks:
+            by-test-platform:
+                win.*:
+                    by-project:
+                        ash: ..
+                        cedar: ..
+                linux: 13
+                default: 12
+
+    Args:
+        item (dict): Object being evaluated.
+        field (str): Name of the key to perform evaluation on.
+        item_name (str): Used to generate useful error messages.
+        defer (list):
+            Allows evaluating a by-* entry at a later time. In the example
+            above it's possible that the project attribute hasn't been set yet,
+            in which case we'd want to stop before resolving that subkey and
+            then call this function again later. This can be accomplished by
+            setting `defer=["project"]` in this example.
+        enforce_single_match (bool):
+            If True (default), each task may only match a single arm of the
+            evaluation.
+        extra_values (kwargs):
+            If supplied, represent additional values available
+            for reference from by-<field>.
+
+    Returns:
+        dict: item which has also been modified in-place.
+    """
+    # find the field, returning the item unchanged if anything goes wrong
+    container, subfield = item, field
+    while "." in subfield:
+        f, subfield = subfield.split(".", 1)
+        if f not in container:
+            return item
+        container = container[f]
+        if not isinstance(container, dict):
+            return item
+
+    if subfield not in container:
+        return item
+
+    container[subfield] = evaluate_keyed_by(
+        value=container[subfield],
+        item_name=f"`{field}` in `{item_name}`",
+        defer=defer,
+        enforce_single_match=enforce_single_match,
+        attributes=dict(item, **extra_values),
+    )
+
+    return item
+
+
+# Schemas for YAML files should use dashed identifiers by default.  If there are
+# components of the schema for which there is a good reason to use another format,
+# they can be excepted here.
+EXCEPTED_SCHEMA_IDENTIFIERS = [
+    # upstream-artifacts and artifact-map are handed directly to scriptWorker,
+    # which expects interCaps
+    "upstream-artifacts",
+    "artifact-map",
+]
+
+
+def check_schema(schema):
+    identifier_re = re.compile(r"^\$?[a-z][a-z0-9-]*$")
+
+    def excepted(item):
+        for esi in EXCEPTED_SCHEMA_IDENTIFIERS:
+            if isinstance(esi, str):
+                if f"[{esi!r}]" in item:
+                    return True
+            elif esi(item):
+                return True
+        return False
+
+    def iter(path, sch):
+        def check_identifier(path, k):
+            if k in (str,) or k in (str, voluptuous.Extra):
+                pass
+            elif isinstance(k, voluptuous.NotIn):
+                pass
+            elif isinstance(k, str):
+                if not identifier_re.match(k) and not excepted(path):
+                    raise RuntimeError(
+                        "YAML schemas should use dashed lower-case identifiers, "
+                        "not {!r} @ {}".format(k, path)
+                    )
+            elif isinstance(k, (voluptuous.Optional, voluptuous.Required)):
+                check_identifier(path, k.schema)
+            elif isinstance(k, (voluptuous.Any, voluptuous.All)):
+                for v in k.validators:
+                    check_identifier(path, v)
+            elif not excepted(path):
+                raise RuntimeError(
+                    "Unexpected type in YAML schema: {} @ {}".format(
+                        type(k).__name__, path
+                    )
+                )
+
+        if isinstance(sch, collections.abc.Mapping):
+            for k, v in sch.items():
+                child = f"{path}[{k!r}]"
+                check_identifier(child, k)
+                iter(child, v)
+        elif isinstance(sch, (list, tuple)):
+            for i, v in enumerate(sch):
+                iter(f"{path}[{i}]", v)
+        elif isinstance(sch, voluptuous.Any):
+            for v in sch.validators:
+                iter(path, v)
+
+    iter("schema", schema.schema)
+
+
+class Schema(voluptuous.Schema):
+    """
+    Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
+    in the process.
+    """
+
+    def __init__(self, *args, check=True, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.check = check
+        if not taskgraph.fast and self.check:
+            check_schema(self)
+
+    def extend(self, *args, **kwargs):
+        schema = super().extend(*args, **kwargs)
+
+        if self.check:
+            check_schema(schema)
+        # We want twice extend schema to be checked too.
+        schema.__class__ = Schema
+        return schema
+
+    def _compile(self, schema):
+        if taskgraph.fast:
+            return
+        return super()._compile(schema)
+
+    def __getitem__(self, item):
+        return self.schema[item]
+
+
+OptimizationSchema = voluptuous.Any(
+    # always run this task (default)
+    None,
+    # search the index for the given index namespaces, and replace this task if found
+    # the search occurs in order, with the first match winning
+    {"index-search": [str]},
+    # skip this task if none of the given file patterns match
+    {"skip-unless-changed": [str]},
+)
+
+# shortcut for a string where task references are allowed
+taskref_or_string = voluptuous.Any(
+    str,
+    {voluptuous.Required("task-reference"): str},
+    {voluptuous.Required("artifact-reference"): str},
+)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py
new file mode 100644
index 0000000000..d695767f05
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+SHELL_QUOTE_RE = re.compile(r"[\\\t\r\n \'\"#<>&|`(){}$;\*\?]")
+
+
+def _quote(s):
+    """Given a string, returns a version that can be used literally on a shell
+    command line, enclosing it with single quotes if necessary.
+
+    As a special case, if given an int, returns a string containing the int,
+    not enclosed in quotes.
+    """
+    if type(s) == int:
+        return "%d" % s
+
+    # Empty strings need to be quoted to have any significance
+    if s and not SHELL_QUOTE_RE.search(s) and not s.startswith("~"):
+        return s
+
+    # Single quoted strings can contain any characters unescaped except the
+    # single quote itself, which can't even be escaped, so the string needs to
+    # be closed, an escaped single quote added, and reopened.
+    t = type(s)
+    return t("'%s'") % s.replace(t("'"), t("'\\''"))
+
+
+def quote(*strings):
+    """Given one or more strings, returns a quoted string that can be used
+    literally on a shell command line.
+
+        >>> quote('a', 'b')
+        "a b"
+        >>> quote('a b', 'c')
+        "'a b' c"
+    """
+    return " ".join(_quote(s) for s in strings)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py
new file mode 100644
index 0000000000..a830a473b3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py
@@ -0,0 +1,373 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import datetime
+import functools
+import logging
+import os
+
+import requests
+import taskcluster_urls as liburls
+from requests.packages.urllib3.util.retry import Retry
+
+from taskgraph.task import Task
+from taskgraph.util import yaml
+from taskgraph.util.memoize import memoize
+
+logger = logging.getLogger(__name__)
+
+# this is set to true for `mach taskgraph action-callback --test`
+testing = False
+
+# Default rootUrl to use if none is given in the environment; this should point
+# to the production Taskcluster deployment used for CI.
+PRODUCTION_TASKCLUSTER_ROOT_URL = None
+
+# the maximum number of parallel Taskcluster API calls to make
+CONCURRENCY = 50
+
+
+@memoize
+def get_root_url(use_proxy):
+    """Get the current TASKCLUSTER_ROOT_URL.
+
+    When running in a task, this must come from $TASKCLUSTER_ROOT_URL; when run
+    on the command line, a default may be provided that points to the
+    production deployment of Taskcluster. If use_proxy is set, this attempts to
+    get TASKCLUSTER_PROXY_URL instead, failing if it is not set.
+    """
+    if use_proxy:
+        try:
+            return liburls.normalize_root_url(os.environ["TASKCLUSTER_PROXY_URL"])
+        except KeyError:
+            if "TASK_ID" not in os.environ:
+                raise RuntimeError(
+                    "taskcluster-proxy is not available when not executing in a task"
+                )
+            else:
+                raise RuntimeError("taskcluster-proxy is not enabled for this task")
+
+    if "TASKCLUSTER_ROOT_URL" in os.environ:
+        logger.debug(
+            "Running in Taskcluster instance {}{}".format(
+                os.environ["TASKCLUSTER_ROOT_URL"],
+                " with taskcluster-proxy"
+                if "TASKCLUSTER_PROXY_URL" in os.environ
+                else "",
+            )
+        )
+        return liburls.normalize_root_url(os.environ["TASKCLUSTER_ROOT_URL"])
+
+    if "TASK_ID" in os.environ:
+        raise RuntimeError("$TASKCLUSTER_ROOT_URL must be set when running in a task")
+
+    if PRODUCTION_TASKCLUSTER_ROOT_URL is None:
+        raise RuntimeError(
+            "Could not detect Taskcluster instance, set $TASKCLUSTER_ROOT_URL"
+        )
+
+    logger.debug("Using default TASKCLUSTER_ROOT_URL")
+    return liburls.normalize_root_url(PRODUCTION_TASKCLUSTER_ROOT_URL)
+
+
+def requests_retry_session(
+    retries,
+    backoff_factor=0.1,
+    status_forcelist=(500, 502, 503, 504),
+    concurrency=CONCURRENCY,
+    session=None,
+):
+    session = session or requests.Session()
+    retry = Retry(
+        total=retries,
+        read=retries,
+        connect=retries,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_forcelist,
+    )
+
+    # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase
+    # that limit. Connections are established as needed, so using a large value
+    # should not negatively impact performance.
+    http_adapter = requests.adapters.HTTPAdapter(
+        pool_connections=concurrency,
+        pool_maxsize=concurrency,
+        max_retries=retry,
+    )
+    session.mount("http://", http_adapter)
+    session.mount("https://", http_adapter)
+
+    return session
+
+
+@memoize
+def get_session():
+    return requests_retry_session(retries=5)
+
+
+def _do_request(url, method=None, **kwargs):
+    if method is None:
+        method = "post" if kwargs else "get"
+
+    session = get_session()
+    if method == "get":
+        kwargs["stream"] = True
+
+    response = getattr(session, method)(url, **kwargs)
+
+    if response.status_code >= 400:
+        # Consume content before raise_for_status, so that the connection can be
+        # reused.
+        response.content
+    response.raise_for_status()
+    return response
+
+
+def _handle_artifact(path, response):
+    if path.endswith(".json"):
+        return response.json()
+    if path.endswith(".yml"):
+        return yaml.load_stream(response.text)
+    response.raw.read = functools.partial(response.raw.read, decode_content=True)
+    return response.raw
+
+
+def get_artifact_url(task_id, path, use_proxy=False):
+    artifact_tmpl = liburls.api(
+        get_root_url(False), "queue", "v1", "task/{}/artifacts/{}"
+    )
+    data = artifact_tmpl.format(task_id, path)
+    if use_proxy:
+        # Until Bug 1405889 is deployed, we can't download directly
+        # from the taskcluster-proxy.  Work around by using the /bewit
+        # endpoint instead.
+        # The bewit URL is the body of a 303 redirect, which we don't
+        # want to follow (which fetches a potentially large resource).
+        response = _do_request(
+            os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit",
+            data=data,
+            allow_redirects=False,
+        )
+        return response.text
+    return data
+
+
+def get_artifact(task_id, path, use_proxy=False):
+    """
+    Returns the artifact with the given path for the given task id.
+
+    If the path ends with ".json" or ".yml", the content is deserialized as,
+    respectively, json or yaml, and the corresponding python data (usually
+    dict) is returned.
+    For other types of content, a file-like object is returned.
+    """
+    response = _do_request(get_artifact_url(task_id, path, use_proxy))
+    return _handle_artifact(path, response)
+
+
+def list_artifacts(task_id, use_proxy=False):
+    response = _do_request(get_artifact_url(task_id, "", use_proxy).rstrip("/"))
+    return response.json()["artifacts"]
+
+
+def get_artifact_prefix(task):
+    prefix = None
+    if isinstance(task, dict):
+        prefix = task.get("attributes", {}).get("artifact_prefix")
+    elif isinstance(task, Task):
+        prefix = task.attributes.get("artifact_prefix")
+    else:
+        raise Exception(f"Can't find artifact-prefix of non-task: {task}")
+    return prefix or "public/build"
+
+
+def get_artifact_path(task, path):
+    return f"{get_artifact_prefix(task)}/{path}"
+
+
+def get_index_url(index_path, use_proxy=False, multiple=False):
+    index_tmpl = liburls.api(get_root_url(use_proxy), "index", "v1", "task{}/{}")
+    return index_tmpl.format("s" if multiple else "", index_path)
+
+
+def find_task_id(index_path, use_proxy=False):
+    try:
+        response = _do_request(get_index_url(index_path, use_proxy))
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 404:
+            raise KeyError(f"index path {index_path} not found")
+        raise
+    return response.json()["taskId"]
+
+
+def get_artifact_from_index(index_path, artifact_path, use_proxy=False):
+    full_path = index_path + "/artifacts/" + artifact_path
+    response = _do_request(get_index_url(full_path, use_proxy))
+    return _handle_artifact(full_path, response)
+
+
+def list_tasks(index_path, use_proxy=False):
+    """
+    Returns a list of task_ids where each task_id is indexed under a path
+    in the index. Results are sorted by expiration date from oldest to newest.
+    """
+    results = []
+    data = {}
+    while True:
+        response = _do_request(
+            get_index_url(index_path, use_proxy, multiple=True), json=data
+        )
+        response = response.json()
+        results += response["tasks"]
+        if response.get("continuationToken"):
+            data = {"continuationToken": response.get("continuationToken")}
+        else:
+            break
+
+    # We can sort on expires because in the general case
+    # all of these tasks should be created with the same expires time so they end up in
+    # order from earliest to latest action. If more correctness is needed, consider
+    # fetching each task and sorting on the created date.
+    results.sort(key=lambda t: parse_time(t["expires"]))
+    return [t["taskId"] for t in results]
+
+
+def parse_time(timestamp):
+    """Turn a "JSON timestamp" as used in TC APIs into a datetime"""
+    return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def get_task_url(task_id, use_proxy=False):
+    task_tmpl = liburls.api(get_root_url(use_proxy), "queue", "v1", "task/{}")
+    return task_tmpl.format(task_id)
+
+
+def get_task_definition(task_id, use_proxy=False):
+    response = _do_request(get_task_url(task_id, use_proxy))
+    return response.json()
+
+
+def cancel_task(task_id, use_proxy=False):
+    """Cancels a task given a task_id. In testing mode, just logs that it would
+    have cancelled."""
+    if testing:
+        logger.info(f"Would have cancelled {task_id}.")
+    else:
+        _do_request(get_task_url(task_id, use_proxy) + "/cancel", json={})
+
+
+def status_task(task_id, use_proxy=False):
+    """Gets the status of a task given a task_id.
+
+    In testing mode, just logs that it would have retrieved status.
+
+    Args:
+        task_id (str): A task id.
+        use_proxy (bool): Whether to use taskcluster-proxy (default: False)
+
+    Returns:
+        dict: A dictionary object as defined here:
+          https://docs.taskcluster.net/docs/reference/platform/queue/api#status
+    """
+    if testing:
+        logger.info(f"Would have gotten status for {task_id}.")
+    else:
+        resp = _do_request(get_task_url(task_id, use_proxy) + "/status")
+        status = resp.json().get("status", {})
+        return status
+
+
+def state_task(task_id, use_proxy=False):
+    """Gets the state of a task given a task_id.
+
+    In testing mode, just logs that it would have retrieved state. This is a subset of the
+    data returned by :func:`status_task`.
+
+    Args:
+        task_id (str): A task id.
+        use_proxy (bool): Whether to use taskcluster-proxy (default: False)
+
+    Returns:
+        str: The state of the task, one of
+          ``pending, running, completed, failed, exception, unknown``.
+    """
+    if testing:
+        logger.info(f"Would have gotten state for {task_id}.")
+    else:
+        status = status_task(task_id, use_proxy=use_proxy).get("state") or "unknown"
+        return status
+
+
+def rerun_task(task_id):
+    """Reruns a task given a task_id. In testing mode, just logs that it would
+    have reran."""
+    if testing:
+        logger.info(f"Would have rerun {task_id}.")
+    else:
+        _do_request(get_task_url(task_id, use_proxy=True) + "/rerun", json={})
+
+
+def get_current_scopes():
+    """Get the current scopes.  This only makes sense in a task with the Taskcluster
+    proxy enabled, where it returns the actual scopes accorded to the task."""
+    auth_url = liburls.api(get_root_url(True), "auth", "v1", "scopes/current")
+    resp = _do_request(auth_url)
+    return resp.json().get("scopes", [])
+
+
+def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False):
+    url_tmpl = liburls.api(
+        get_root_url(use_proxy), "purge-cache", "v1", "purge-cache/{}/{}"
+    )
+    return url_tmpl.format(provisioner_id, worker_type)
+
+
+def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False):
+    """Requests a cache purge from the purge-caches service."""
+    if testing:
+        logger.info(
+            "Would have purged {}/{}/{}.".format(
+                provisioner_id, worker_type, cache_name
+            )
+        )
+    else:
+        logger.info(f"Purging {provisioner_id}/{worker_type}/{cache_name}.")
+        purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy)
+        _do_request(purge_cache_url, json={"cacheName": cache_name})
+
+
+def send_email(address, subject, content, link, use_proxy=False):
+    """Sends an email using the notify service"""
+    logger.info(f"Sending email to {address}.")
+    url = liburls.api(get_root_url(use_proxy), "notify", "v1", "email")
+    _do_request(
+        url,
+        json={
+            "address": address,
+            "subject": subject,
+            "content": content,
+            "link": link,
+        },
+    )
+
+
+def list_task_group_incomplete_tasks(task_group_id):
+    """Generate the incomplete tasks in a task group"""
+    params = {}
+    while True:
+        url = liburls.api(
+            get_root_url(False),
+            "queue",
+            "v1",
+            f"task-group/{task_group_id}/list",
+        )
+        resp = _do_request(url, method="get", params=params).json()
+        for task in [t["status"] for t in resp["tasks"]]:
+            if task["state"] in ["running", "pending", "unscheduled"]:
+                yield task["taskId"]
+        if resp.get("continuationToken"):
+            params = {"continuationToken": resp.get("continuationToken")}
+        else:
+            break
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py
new file mode 100644
index 0000000000..7b545595ef
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py
@@ -0,0 +1,54 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Tools for interacting with existing taskgraphs.
+"""
+
+
+from taskgraph.util.taskcluster import find_task_id, get_artifact
+
+
+def find_decision_task(parameters, graph_config):
+    """Given the parameters for this action, find the taskId of the decision
+    task"""
+    if parameters.get("repository_type", "hg") == "hg":
+        return find_task_id(
+            "{}.v2.{}.pushlog-id.{}.decision".format(
+                graph_config["trust-domain"],
+                parameters["project"],
+                parameters["pushlog_id"],
+            )
+        )
+    elif parameters["repository_type"] == "git":
+        return find_task_id(
+            "{}.v2.{}.revision.{}.taskgraph.decision".format(
+                graph_config["trust-domain"],
+                parameters["project"],
+                parameters["head_rev"],
+            )
+        )
+    else:
+        raise Exception(
+            "Unknown repository_type {}!".format(parameters["repository_type"])
+        )
+
+
+def find_existing_tasks_from_previous_kinds(
+    full_task_graph, previous_graph_ids, rebuild_kinds
+):
+    """Given a list of previous decision/action taskIds and kinds to ignore
+    from the previous graphs, return a dictionary of labels-to-taskids to use
+    as ``existing_tasks`` in the optimization step."""
+    existing_tasks = {}
+    for previous_graph_id in previous_graph_ids:
+        label_to_taskid = get_artifact(previous_graph_id, "public/label-to-taskid.json")
+        kind_labels = {
+            t.label
+            for t in full_task_graph.tasks.values()
+            if t.attributes["kind"] not in rebuild_kinds
+        }
+        for label in set(label_to_taskid.keys()).intersection(kind_labels):
+            existing_tasks[label] = label_to_taskid[label]
+    return existing_tasks
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py
new file mode 100644
index 0000000000..23cd5f8d68
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py
@@ -0,0 +1,80 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import copy
+
+
+def merge_to(source, dest):
+    """
+    Merge dict and arrays (override scalar values)
+
+    Keys from source override keys from dest, and elements from lists in source
+    are appended to lists in dest.
+
+    :param dict source: to copy from
+    :param dict dest: to copy to (modified in place)
+    """
+
+    for key, value in source.items():
+        # Override mismatching or empty types
+        if type(value) != type(dest.get(key)):  # noqa
+            dest[key] = source[key]
+            continue
+
+        # Merge dict
+        if isinstance(value, dict):
+            merge_to(value, dest[key])
+            continue
+
+        if isinstance(value, list):
+            dest[key] = dest[key] + source[key]
+            continue
+
+        dest[key] = source[key]
+
+    return dest
+
+
+def merge(*objects):
+    """
+    Merge the given objects, using the semantics described for merge_to, with
+    objects later in the list taking precedence.  From an inheritance
+    perspective, "parents" should be listed before "children".
+
+    Returns the result without modifying any arguments.
+    """
+    if len(objects) == 1:
+        return copy.deepcopy(objects[0])
+    return merge_to(objects[-1], merge(*objects[:-1]))
+
+
+def deep_get(dict_, field):
+    container, subfield = dict_, field
+    while "." in subfield:
+        f, subfield = subfield.split(".", 1)
+        if f not in container:
+            return None
+
+        container = container[f]
+
+    return container.get(subfield)
+
+
+def substitute(item, **subs):
+    if isinstance(item, list):
+        for i in range(len(item)):
+            item[i] = substitute(item[i], **subs)
+    elif isinstance(item, dict):
+        new_dict = {}
+        for k, v in item.items():
+            k = k.format(**subs)
+            new_dict[k] = substitute(v, **subs)
+        item = new_dict
+    elif isinstance(item, str):
+        item = item.format(**subs)
+    else:
+        item = item
+
+    return item
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py
new file mode 100644
index 0000000000..e511978b5f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py
@@ -0,0 +1,115 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Python port of the ms.js node module this is not a direct port some things are
+# more complicated or less precise and we lean on time delta here.
+
+
+import datetime
+import re
+
+PATTERN = re.compile(r"((?:\d+)?\.?\d+) *([a-z]+)")
+
+
+def seconds(value):
+    return datetime.timedelta(seconds=int(value))
+
+
+def minutes(value):
+    return datetime.timedelta(minutes=int(value))
+
+
+def hours(value):
+    return datetime.timedelta(hours=int(value))
+
+
+def days(value):
+    return datetime.timedelta(days=int(value))
+
+
+def months(value):
+    # See warning in years(), below
+    return datetime.timedelta(days=int(value) * 30)
+
+
+def years(value):
+    # Warning here "years" are vague don't use this for really sensitive date
+    # computation the idea is to give you a absolute amount of time in the
+    # future which is not the same thing as "precisely on this date next year"
+    return datetime.timedelta(days=int(value) * 365)
+
+
+ALIASES = {}
+ALIASES["seconds"] = ALIASES["second"] = ALIASES["s"] = seconds
+ALIASES["minutes"] = ALIASES["minute"] = ALIASES["min"] = minutes
+ALIASES["hours"] = ALIASES["hour"] = ALIASES["h"] = hours
+ALIASES["days"] = ALIASES["day"] = ALIASES["d"] = days
+ALIASES["months"] = ALIASES["month"] = ALIASES["mo"] = months
+ALIASES["years"] = ALIASES["year"] = ALIASES["y"] = years
+
+
+class InvalidString(Exception):
+    pass
+
+
+class UnknownTimeMeasurement(Exception):
+    pass
+
+
+def value_of(input_str):
+    """
+    Convert a string to a json date in the future
+    :param str input_str: (ex: 1d, 2d, 6years, 2 seconds)
+    :returns: Unit given in seconds
+    """
+
+    matches = PATTERN.search(input_str)
+
+    if matches is None or len(matches.groups()) < 2:
+        raise InvalidString(f"'{input_str}' is invalid string")
+
+    value, unit = matches.groups()
+
+    if unit not in ALIASES:
+        raise UnknownTimeMeasurement(
+            "{} is not a valid time measure use one of {}".format(
+                unit, sorted(ALIASES.keys())
+            )
+        )
+
+    return ALIASES[unit](value)
+
+
+def json_time_from_now(input_str, now=None, datetime_format=False):
+    """
+    :param str input_str: Input string (see value of)
+    :param datetime now: Optionally set the definition of `now`
+    :param boolean datetime_format: Set `True` to get a `datetime` output
+    :returns: JSON string representation of time in future.
+    """
+
+    if now is None:
+        now = datetime.datetime.utcnow()
+
+    time = now + value_of(input_str)
+
+    if datetime_format is True:
+        return time
+    else:
+        # Sorta a big hack but the json schema validator for date does not like the
+        # ISO dates until 'Z' (for timezone) is added...
+        # Microseconds are excluded (see bug 1381801)
+        return time.isoformat(timespec="milliseconds") + "Z"
+
+
+def current_json_time(datetime_format=False):
+    """
+    :param boolean datetime_format: Set `True` to get a `datetime` output
+    :returns: JSON string representation of the current time.
+    """
+    if datetime_format is True:
+        return datetime.datetime.utcnow()
+    else:
+        # Microseconds are excluded (see bug 1381801)
+        return datetime.datetime.utcnow().isoformat(timespec="milliseconds") + "Z"
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py
new file mode 100644
index 0000000000..cff5f286cc
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py
@@ -0,0 +1,84 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+_JOINED_SYMBOL_RE = re.compile(r"([^(]*)\(([^)]*)\)$")
+
+
+def split_symbol(treeherder_symbol):
+    """Split a symbol expressed as grp(sym) into its two parts.  If no group is
+    given, the returned group is '?'"""
+    groupSymbol = "?"
+    symbol = treeherder_symbol
+    if "(" in symbol:
+        match = _JOINED_SYMBOL_RE.match(symbol)
+        if match:
+            groupSymbol, symbol = match.groups()
+        else:
+            raise Exception(f"`{symbol}` is not a valid treeherder symbol.")
+    return groupSymbol, symbol
+
+
+def join_symbol(group, symbol):
+    """Perform the reverse of split_symbol, combining the given group and
+    symbol.  If the group is '?', then it is omitted."""
+    if group == "?":
+        return symbol
+    return f"{group}({symbol})"
+
+
+def add_suffix(treeherder_symbol, suffix):
+    """Add a suffix to a treeherder symbol that may contain a group."""
+    group, symbol = split_symbol(treeherder_symbol)
+    symbol += str(suffix)
+    return join_symbol(group, symbol)
+
+
+def replace_group(treeherder_symbol, new_group):
+    """Add a suffix to a treeherder symbol that may contain a group."""
+    _, symbol = split_symbol(treeherder_symbol)
+    return join_symbol(new_group, symbol)
+
+
+def inherit_treeherder_from_dep(job, dep_job):
+    """Inherit treeherder defaults from dep_job"""
+    treeherder = job.get("treeherder", {})
+
+    dep_th_platform = (
+        dep_job.task.get("extra", {})
+        .get("treeherder", {})
+        .get("machine", {})
+        .get("platform", "")
+    )
+    dep_th_collection = list(
+        dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys()
+    )[0]
+    treeherder.setdefault("platform", f"{dep_th_platform}/{dep_th_collection}")
+    treeherder.setdefault(
+        "tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1)
+    )
+    # Does not set symbol
+    treeherder.setdefault("kind", "build")
+    return treeherder
+
+
+def treeherder_defaults(kind, label):
+    defaults = {
+        # Despite its name, this is expected to be a platform+collection
+        "platform": "default/opt",
+        "tier": 1,
+    }
+    if "build" in kind:
+        defaults["kind"] = "build"
+    elif "test" in kind:
+        defaults["kind"] = "test"
+    else:
+        defaults["kind"] = "other"
+
+    # Takes the uppercased first letter of each part of the kind name, eg:
+    # apple-banana -> AB
+    defaults["symbol"] = "".join([c[0] for c in kind.split("-")]).upper()
+
+    return defaults
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py
new file mode 100644
index 0000000000..2d967d2645
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py
@@ -0,0 +1,552 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import os
+import re
+import subprocess
+from abc import ABC, abstractmethod, abstractproperty
+from shutil import which
+
+import requests
+from redo import retry
+
+from taskgraph.util.path import ancestors
+
+PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1"
+
+logger = logging.getLogger(__name__)
+
+
+class Repository(ABC):
+    # Both mercurial and git use sha1 as revision idenfiers. Luckily, both define
+    # the same value as the null revision.
+    #
+    # https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7
+    # https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30
+    NULL_REVISION = "0000000000000000000000000000000000000000"
+
+    def __init__(self, path):
+        self.path = path
+        self.binary = which(self.tool)
+        if self.binary is None:
+            raise OSError(f"{self.tool} not found!")
+        self._valid_diff_filter = ("m", "a", "d")
+
+        self._env = os.environ.copy()
+
+    def run(self, *args: str, **kwargs):
+        return_codes = kwargs.pop("return_codes", [])
+        cmd = (self.binary,) + args
+
+        try:
+            return subprocess.check_output(
+                cmd, cwd=self.path, env=self._env, encoding="utf-8", **kwargs
+            )
+        except subprocess.CalledProcessError as e:
+            if e.returncode in return_codes:
+                return ""
+            raise
+
+    @abstractproperty
+    def tool(self) -> str:
+        """Version control system being used, either 'hg' or 'git'."""
+
+    @abstractproperty
+    def head_rev(self) -> str:
+        """Hash of HEAD revision."""
+
+    @abstractproperty
+    def base_rev(self):
+        """Hash of revision the current topic branch is based on."""
+
+    @abstractproperty
+    def branch(self):
+        """Current branch or bookmark the checkout has active."""
+
+    @abstractproperty
+    def all_remote_names(self):
+        """Name of all configured remote repositories."""
+
+    @abstractproperty
+    def default_remote_name(self):
+        """Name the VCS defines for the remote repository when cloning
+        it for the first time. This name may not exist anymore if users
+        changed the default configuration, for instance."""
+
+    @abstractproperty
+    def remote_name(self):
+        """Name of the remote repository."""
+
+    def _get_most_suitable_remote(self, remote_instructions):
+        remotes = self.all_remote_names
+        if len(remotes) == 1:
+            return remotes[0]
+
+        if self.default_remote_name in remotes:
+            return self.default_remote_name
+
+        first_remote = remotes[0]
+        logger.warning(
+            f"Unable to determine which remote repository to use between: {remotes}. "
+            f'Arbitrarily using the first one "{first_remote}". Please set an '
+            f"`{self.default_remote_name}` remote if the arbitrarily selected one "
+            f"is not right. To do so: {remote_instructions}"
+        )
+
+        return first_remote
+
+    @abstractproperty
+    def default_branch(self):
+        """Name of the default branch."""
+
+    @abstractmethod
+    def get_url(self, remote=None):
+        """Get URL of the upstream repository."""
+
+    @abstractmethod
+    def get_commit_message(self, revision=None):
+        """Commit message of specified revision or current commit."""
+
+    @abstractmethod
+    def get_changed_files(self, diff_filter, mode="unstaged", rev=None, base_rev=None):
+        """Return a list of files that are changed in:
+         * either this repository's working copy,
+         * or at a given revision (``rev``)
+         * or between 2 revisions (``base_rev`` and ``rev``)
+
+        ``diff_filter`` controls which kinds of modifications are returned.
+        It is a string which may only contain the following characters:
+
+            A - Include files that were added
+            D - Include files that were deleted
+            M - Include files that were modified
+
+        By default, all three will be included.
+
+        ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an
+        effect on git. Defaults to 'unstaged'.
+
+        ``rev`` is a specifier for which changesets to consider for
+        changes. The exact meaning depends on the vcs system being used.
+
+        ``base_rev`` specifies the range of changesets. This parameter cannot
+        be used without ``rev``. The range includes ``rev`` but excludes
+        ``base_rev``.
+        """
+
+    @abstractmethod
+    def get_outgoing_files(self, diff_filter, upstream):
+        """Return a list of changed files compared to upstream.
+
+        ``diff_filter`` works the same as `get_changed_files`.
+        ``upstream`` is a remote ref to compare against. If unspecified,
+        this will be determined automatically. If there is no remote ref,
+        a MissingUpstreamRepo exception will be raised.
+        """
+
+    @abstractmethod
+    def working_directory_clean(self, untracked=False, ignored=False):
+        """Determine if the working directory is free of modifications.
+
+        Returns True if the working directory does not have any file
+        modifications. False otherwise.
+
+        By default, untracked and ignored files are not considered. If
+        ``untracked`` or ``ignored`` are set, they influence the clean check
+        to factor these file classes into consideration.
+        """
+
+    @abstractmethod
+    def update(self, ref):
+        """Update the working directory to the specified reference."""
+
+    @abstractmethod
+    def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+        """Find the latest revision that is common to both the given
+        ``head_rev`` and ``base_ref_or_rev``.
+
+        If no common revision exists, ``Repository.NULL_REVISION`` will
+        be returned."""
+
+    @abstractmethod
+    def does_revision_exist_locally(self, revision):
+        """Check whether this revision exists in the local repository.
+
+        If this function returns an unexpected value, then make sure
+        the revision was fetched from the remote repository."""
+
+
+class HgRepository(Repository):
+    tool = "hg"
+    default_remote_name = "default"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._env["HGPLAIN"] = "1"
+
+    @property
+    def head_rev(self):
+        return self.run("log", "-r", ".", "-T", "{node}").strip()
+
+    @property
+    def base_rev(self):
+        return self.run("log", "-r", "last(ancestors(.) and public())", "-T", "{node}")
+
+    @property
+    def branch(self):
+        bookmarks_fn = os.path.join(self.path, ".hg", "bookmarks.current")
+        if os.path.exists(bookmarks_fn):
+            with open(bookmarks_fn) as f:
+                bookmark = f.read()
+                return bookmark or None
+
+        return None
+
+    @property
+    def all_remote_names(self):
+        remotes = self.run("paths", "--quiet").splitlines()
+        if not remotes:
+            raise RuntimeError("No remotes defined")
+        return remotes
+
+    @property
+    def remote_name(self):
+        return self._get_most_suitable_remote(
+            "Edit .hg/hgrc and add:\n\n[paths]\ndefault = $URL",
+        )
+
+    @property
+    def default_branch(self):
+        # Mercurial recommends keeping "default"
+        # https://www.mercurial-scm.org/wiki/StandardBranching#Don.27t_use_a_name_other_than_default_for_your_main_development_branch
+        return "default"
+
+    def get_url(self, remote="default"):
+        return self.run("path", "-T", "{url}", remote).strip()
+
+    def get_commit_message(self, revision=None):
+        revision = revision or "."
+        return self.run("log", "-r", revision, "-T", "{desc}")
+
+    def _format_diff_filter(self, diff_filter, for_status=False):
+        df = diff_filter.lower()
+        assert all(f in self._valid_diff_filter for f in df)
+
+        # When looking at the changes in the working directory, the hg status
+        # command uses 'd' for files that have been deleted with a non-hg
+        # command, and 'r' for files that have been `hg rm`ed. Use both.
+        return df.replace("d", "dr") if for_status else df
+
+    def _files_template(self, diff_filter):
+        template = ""
+        df = self._format_diff_filter(diff_filter)
+        if "a" in df:
+            template += "{file_adds % '{file}\\n'}"
+        if "d" in df:
+            template += "{file_dels % '{file}\\n'}"
+        if "m" in df:
+            template += "{file_mods % '{file}\\n'}"
+        return template
+
+    def get_changed_files(
+        self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
+    ):
+        if rev is None:
+            if base_rev is not None:
+                raise ValueError("Cannot specify `base_rev` without `rev`")
+            # Use --no-status to print just the filename.
+            df = self._format_diff_filter(diff_filter, for_status=True)
+            return self.run("status", "--no-status", f"-{df}").splitlines()
+        else:
+            template = self._files_template(diff_filter)
+            revision_argument = rev if base_rev is None else f"{base_rev}~-1::{rev}"
+            return self.run("log", "-r", revision_argument, "-T", template).splitlines()
+
+    def get_outgoing_files(self, diff_filter="ADM", upstream=None):
+        template = self._files_template(diff_filter)
+
+        if not upstream:
+            return self.run(
+                "log", "-r", "draft() and ancestors(.)", "--template", template
+            ).split()
+
+        return self.run(
+            "outgoing",
+            "-r",
+            ".",
+            "--quiet",
+            "--template",
+            template,
+            upstream,
+            return_codes=(1,),
+        ).split()
+
+    def working_directory_clean(self, untracked=False, ignored=False):
+        args = ["status", "--modified", "--added", "--removed", "--deleted"]
+        if untracked:
+            args.append("--unknown")
+        if ignored:
+            args.append("--ignored")
+
+        # If output is empty, there are no entries of requested status, which
+        # means we are clean.
+        return not len(self.run(*args).strip())
+
+    def update(self, ref):
+        return self.run("update", "--check", ref)
+
+    def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+        ancestor = self.run(
+            "log",
+            "-r",
+            f"last(ancestors('{base_ref_or_rev}') and ancestors('{head_rev}'))",
+            "--template",
+            "{node}",
+        ).strip()
+        return ancestor or self.NULL_REVISION
+
+    def does_revision_exist_locally(self, revision):
+        try:
+            return bool(self.run("log", "-r", revision).strip())
+        except subprocess.CalledProcessError as e:
+            # Error code 255 comes with the message:
+            # "abort: unknown revision $REVISION"
+            if e.returncode == 255:
+                return False
+            raise
+
+
+class GitRepository(Repository):
+    tool = "git"
+    default_remote_name = "origin"
+
+    _LS_REMOTE_PATTERN = re.compile(r"ref:\s+refs/heads/(?P<branch_name>\S+)\s+HEAD")
+
+    @property
+    def head_rev(self):
+        return self.run("rev-parse", "--verify", "HEAD").strip()
+
+    @property
+    def base_rev(self):
+        refs = self.run(
+            "rev-list", "HEAD", "--topo-order", "--boundary", "--not", "--remotes"
+        ).splitlines()
+        if refs:
+            return refs[-1][1:]  # boundary starts with a prefix `-`
+        return self.head_rev
+
+    @property
+    def branch(self):
+        return self.run("branch", "--show-current").strip() or None
+
+    @property
+    def all_remote_names(self):
+        remotes = self.run("remote").splitlines()
+        if not remotes:
+            raise RuntimeError("No remotes defined")
+        return remotes
+
+    @property
+    def remote_name(self):
+        try:
+            remote_branch_name = self.run(
+                "rev-parse",
+                "--verify",
+                "--abbrev-ref",
+                "--symbolic-full-name",
+                "@{u}",
+                stderr=subprocess.PIPE,
+            ).strip()
+            return remote_branch_name.split("/")[0]
+        except subprocess.CalledProcessError as e:
+            # Error code 128 comes with the message:
+            # "fatal: no upstream configured for branch $BRANCH"
+            if e.returncode != 128:
+                print(e.stderr)
+                raise
+
+        return self._get_most_suitable_remote("`git remote add origin $URL`")
+
+    @property
+    def default_branch(self):
+        try:
+            # this one works if the current repo was cloned from an existing
+            # repo elsewhere
+            return self._get_default_branch_from_cloned_metadata()
+        except (subprocess.CalledProcessError, RuntimeError):
+            pass
+
+        try:
+            # This call works if you have (network) access to the repo
+            return self._get_default_branch_from_remote_query()
+        except (subprocess.CalledProcessError, RuntimeError):
+            pass
+
+        # this one is the last resort in case the remote is not accessible and
+        # the local repo is where `git init` was made
+        return self._guess_default_branch()
+
+    def _get_default_branch_from_remote_query(self):
+        # This function requires network access to the repo
+        remote_name = self.remote_name
+        output = self.run("ls-remote", "--symref", remote_name, "HEAD")
+        matches = self._LS_REMOTE_PATTERN.search(output)
+        if not matches:
+            raise RuntimeError(
+                f'Could not find the default branch of remote repository "{remote_name}". '
+                "Got: {output}"
+            )
+
+        branch_name = matches.group("branch_name")
+        return f"{remote_name}/{branch_name}"
+
+    def _get_default_branch_from_cloned_metadata(self):
+        return self.run("rev-parse", "--abbrev-ref", f"{self.remote_name}/HEAD").strip()
+
+    def _guess_default_branch(self):
+        branches = [
+            line.strip()
+            for line in self.run(
+                "branch", "--all", "--no-color", "--format=%(refname)"
+            ).splitlines()
+            for candidate_branch in ("main", "master", "branches/default/tip")
+            if line.strip().endswith(candidate_branch)
+        ]
+
+        if len(branches) == 1:
+            return branches[0]
+
+        raise RuntimeError(f"Unable to find default branch. Got: {branches}")
+
+    def get_url(self, remote="origin"):
+        return self.run("remote", "get-url", remote).strip()
+
+    def get_commit_message(self, revision=None):
+        revision = revision or "HEAD"
+        return self.run("log", "-n1", "--format=%B", revision)
+
+    def get_changed_files(
+        self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
+    ):
+        assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
+
+        if rev is None:
+            if base_rev is not None:
+                raise ValueError("Cannot specify `base_rev` without `rev`")
+            cmd = ["diff"]
+            if mode == "staged":
+                cmd.append("--cached")
+            elif mode == "all":
+                cmd.append("HEAD")
+        else:
+            revision_argument = (
+                f"{rev}~1..{rev}" if base_rev is None else f"{base_rev}..{rev}"
+            )
+            cmd = ["log", "--format=format:", revision_argument]
+
+        cmd.append("--name-only")
+        cmd.append("--diff-filter=" + diff_filter.upper())
+
+        files = self.run(*cmd).splitlines()
+        return [f for f in files if f]
+
+    def get_outgoing_files(self, diff_filter="ADM", upstream=None):
+        assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
+
+        not_condition = upstream if upstream else "--remotes"
+
+        files = self.run(
+            "log",
+            "--name-only",
+            f"--diff-filter={diff_filter.upper()}",
+            "--oneline",
+            "--pretty=format:",
+            "HEAD",
+            "--not",
+            not_condition,
+        ).splitlines()
+        return [f for f in files if f]
+
+    def working_directory_clean(self, untracked=False, ignored=False):
+        args = ["status", "--porcelain"]
+
+        # Even in --porcelain mode, behavior is affected by the
+        # ``status.showUntrackedFiles`` option, which means we need to be
+        # explicit about how to treat untracked files.
+        if untracked:
+            args.append("--untracked-files=all")
+        else:
+            args.append("--untracked-files=no")
+
+        if ignored:
+            args.append("--ignored")
+
+        # If output is empty, there are no entries of requested status, which
+        # means we are clean.
+        return not len(self.run(*args).strip())
+
+    def update(self, ref):
+        self.run("checkout", ref)
+
+    def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+        try:
+            return self.run("merge-base", base_ref_or_rev, head_rev).strip()
+        except subprocess.CalledProcessError:
+            return self.NULL_REVISION
+
+    def does_revision_exist_locally(self, revision):
+        try:
+            return self.run("cat-file", "-t", revision).strip() == "commit"
+        except subprocess.CalledProcessError as e:
+            # Error code 128 comes with the message:
+            # "git cat-file: could not get object info"
+            if e.returncode == 128:
+                return False
+            raise
+
+
+def get_repository(path):
+    """Get a repository object for the repository at `path`.
+    If `path` is not a known VCS repository, raise an exception.
+    """
+    for path in ancestors(path):
+        if os.path.isdir(os.path.join(path, ".hg")):
+            return HgRepository(path)
+        elif os.path.exists(os.path.join(path, ".git")):
+            return GitRepository(path)
+
+    raise RuntimeError("Current directory is neither a git or hg repository")
+
+
+def find_hg_revision_push_info(repository, revision):
+    """Given the parameters for this action and a revision, find the
+    pushlog_id of the revision."""
+    pushlog_url = PUSHLOG_TMPL.format(repository, revision)
+
+    def query_pushlog(url):
+        r = requests.get(pushlog_url, timeout=60)
+        r.raise_for_status()
+        return r
+
+    r = retry(
+        query_pushlog,
+        args=(pushlog_url,),
+        attempts=5,
+        sleeptime=10,
+    )
+    pushes = r.json()["pushes"]
+    if len(pushes) != 1:
+        raise RuntimeError(
+            "Unable to find a single pushlog_id for {} revision {}: {}".format(
+                repository, revision, pushes
+            )
+        )
+    pushid = list(pushes.keys())[0]
+    return {
+        "pushdate": pushes[pushid]["date"],
+        "pushid": pushid,
+        "user": pushes[pushid]["user"],
+    }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py
new file mode 100644
index 0000000000..e6705c16cf
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py
@@ -0,0 +1,283 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import sys
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Callable, Dict, List, Union
+
+from taskgraph.config import GraphConfig
+from taskgraph.parameters import Parameters
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.attributes import match_run_on_projects
+from taskgraph.util.treeherder import join_symbol
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class Verification(ABC):
+    func: Callable
+
+    @abstractmethod
+    def verify(self, **kwargs) -> None:
+        pass
+
+
+@dataclass(frozen=True)
+class InitialVerification(Verification):
+    """Verification that doesn't depend on any generation state."""
+
+    def verify(self):
+        self.func()
+
+
+@dataclass(frozen=True)
+class GraphVerification(Verification):
+    """Verification for a TaskGraph object."""
+
+    run_on_projects: Union[List, None] = field(default=None)
+
+    def verify(
+        self, graph: TaskGraph, graph_config: GraphConfig, parameters: Parameters
+    ):
+        if self.run_on_projects and not match_run_on_projects(
+            parameters["project"], self.run_on_projects
+        ):
+            return
+
+        scratch_pad = {}
+        graph.for_each_task(
+            self.func,
+            scratch_pad=scratch_pad,
+            graph_config=graph_config,
+            parameters=parameters,
+        )
+        self.func(
+            None,
+            graph,
+            scratch_pad=scratch_pad,
+            graph_config=graph_config,
+            parameters=parameters,
+        )
+
+
+@dataclass(frozen=True)
+class ParametersVerification(Verification):
+    """Verification for a set of parameters."""
+
+    def verify(self, parameters: Parameters):
+        self.func(parameters)
+
+
+@dataclass(frozen=True)
+class KindsVerification(Verification):
+    """Verification for kinds."""
+
+    def verify(self, kinds: dict):
+        self.func(kinds)
+
+
+@dataclass(frozen=True)
+class VerificationSequence:
+    """
+    Container for a sequence of verifications over a TaskGraph. Each
+    verification is represented as a callable taking (task, taskgraph,
+    scratch_pad), called for each task in the taskgraph, and one more
+    time with no task but with the taskgraph and the same scratch_pad
+    that was passed for each task.
+    """
+
+    _verifications: Dict = field(default_factory=dict)
+    _verification_types = {
+        "graph": GraphVerification,
+        "initial": InitialVerification,
+        "kinds": KindsVerification,
+        "parameters": ParametersVerification,
+    }
+
+    def __call__(self, name, *args, **kwargs):
+        for verification in self._verifications.get(name, []):
+            verification.verify(*args, **kwargs)
+
+    def add(self, name, **kwargs):
+        cls = self._verification_types.get(name, GraphVerification)
+
+        def wrap(func):
+            self._verifications.setdefault(name, []).append(cls(func, **kwargs))
+            return func
+
+        return wrap
+
+
+verifications = VerificationSequence()
+
+
+@verifications.add("full_task_graph")
+def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function verifies that tuple
+    (collection.keys(), machine.platform, groupSymbol, symbol) is unique
+    for a target task graph.
+    """
+    if task is None:
+        return
+    task_dict = task.task
+    if "extra" in task_dict:
+        extra = task_dict["extra"]
+        if "treeherder" in extra:
+            treeherder = extra["treeherder"]
+
+            collection_keys = tuple(sorted(treeherder.get("collection", {}).keys()))
+            if len(collection_keys) != 1:
+                raise Exception(
+                    "Task {} can't be in multiple treeherder collections "
+                    "(the part of the platform after `/`): {}".format(
+                        task.label, collection_keys
+                    )
+                )
+            platform = treeherder.get("machine", {}).get("platform")
+            group_symbol = treeherder.get("groupSymbol")
+            symbol = treeherder.get("symbol")
+
+            key = (platform, collection_keys[0], group_symbol, symbol)
+            if key in scratch_pad:
+                raise Exception(
+                    "Duplicate treeherder platform and symbol in tasks "
+                    "`{}`and `{}`: {} {}".format(
+                        task.label,
+                        scratch_pad[key],
+                        f"{platform}/{collection_keys[0]}",
+                        join_symbol(group_symbol, symbol),
+                    )
+                )
+            else:
+                scratch_pad[key] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_trust_domain_v2_routes(
+    task, taskgraph, scratch_pad, graph_config, parameters
+):
+    """
+    This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes.
+    """
+    if task is None:
+        return
+    route_prefix = "index.{}.v2".format(graph_config["trust-domain"])
+    task_dict = task.task
+    routes = task_dict.get("routes", [])
+
+    for route in routes:
+        if route.startswith(route_prefix):
+            if route in scratch_pad:
+                raise Exception(
+                    "conflict between {}:{} for route: {}".format(
+                        task.label, scratch_pad[route], route
+                    )
+                )
+            else:
+                scratch_pad[route] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_routes_notification_filters(
+    task, taskgraph, scratch_pad, graph_config, parameters
+):
+    """
+    This function ensures that only understood filters for notifications are
+    specified.
+
+    See: https://docs.taskcluster.net/reference/core/taskcluster-notify/docs/usage
+    """
+    if task is None:
+        return
+    route_prefix = "notify."
+    valid_filters = ("on-any", "on-completed", "on-failed", "on-exception")
+    task_dict = task.task
+    routes = task_dict.get("routes", [])
+
+    for route in routes:
+        if route.startswith(route_prefix):
+            # Get the filter of the route
+            route_filter = route.split(".")[-1]
+            if route_filter not in valid_filters:
+                raise Exception(
+                    "{} has invalid notification filter ({})".format(
+                        task.label, route_filter
+                    )
+                )
+
+
+@verifications.add("full_task_graph")
+def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, parameters):
+    tiers = scratch_pad
+    if task is not None:
+        tiers[task.label] = (
+            task.task.get("extra", {}).get("treeherder", {}).get("tier", sys.maxsize)
+        )
+    else:
+
+        def printable_tier(tier):
+            if tier == sys.maxsize:
+                return "unknown"
+            return tier
+
+        for task in taskgraph.tasks.values():
+            tier = tiers[task.label]
+            for d in task.dependencies.values():
+                if taskgraph[d].task.get("workerType") == "always-optimized":
+                    continue
+                if "dummy" in taskgraph[d].kind:
+                    continue
+                if tier < tiers[d]:
+                    raise Exception(
+                        "{} (tier {}) cannot depend on {} (tier {})".format(
+                            task.label,
+                            printable_tier(tier),
+                            d,
+                            printable_tier(tiers[d]),
+                        )
+                    )
+
+
+@verifications.add("full_task_graph")
+def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function verifies that toolchain aliases are not reused.
+    """
+    if task is None:
+        return
+    attributes = task.attributes
+    if "toolchain-alias" in attributes:
+        keys = attributes["toolchain-alias"]
+        if not keys:
+            keys = []
+        elif isinstance(keys, str):
+            keys = [keys]
+        for key in keys:
+            if key in scratch_pad:
+                raise Exception(
+                    "Duplicate toolchain-alias in tasks "
+                    "`{}`and `{}`: {}".format(
+                        task.label,
+                        scratch_pad[key],
+                        key,
+                    )
+                )
+            else:
+                scratch_pad[key] = task.label
+
+
+@verifications.add("optimized_task_graph")
+def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function ensures that always-optimized tasks have been optimized.
+    """
+    if task is None:
+        return
+    if task.task.get("workerType") == "always-optimized":
+        raise Exception(f"Could not optimize the task {task.label!r}")
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py
new file mode 100644
index 0000000000..da39654d6b
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py
@@ -0,0 +1,78 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from dataclasses import dataclass
+
+from .keyed_by import evaluate_keyed_by
+from .memoize import memoize
+
+
+@dataclass
+class _BuiltinWorkerType:
+    provisioner: str
+    worker_type: str
+
+    @property
+    def implementation(self):
+        """
+        Since the list of built-in worker-types is small and fixed, we can get
+        away with punning the implementation name (in
+        `taskgraph.transforms.task`) and the worker_type.
+        """
+        return self.worker_type
+
+
+_BUILTIN_TYPES = {
+    "always-optimized": _BuiltinWorkerType("invalid", "always-optimized"),
+    "succeed": _BuiltinWorkerType("built-in", "succeed"),
+}
+
+
+@memoize
+def worker_type_implementation(graph_config, worker_type):
+    """Get the worker implementation and OS for the given workerType, where the
+    OS represents the host system, not the target OS, in the case of
+    cross-compiles."""
+    if worker_type in _BUILTIN_TYPES:
+        # For the built-in worker-types, we use an `implementation that matches
+        # the worker-type.
+        return _BUILTIN_TYPES[worker_type].implementation, None
+    worker_config = evaluate_keyed_by(
+        {"by-worker-type": graph_config["workers"]["aliases"]},
+        "worker-types.yml",
+        {"worker-type": worker_type},
+    )
+    return worker_config["implementation"], worker_config.get("os")
+
+
+@memoize
+def get_worker_type(graph_config, alias, level):
+    """
+    Get the worker type based, evaluating aliases from the graph config.
+    """
+    if alias in _BUILTIN_TYPES:
+        builtin_type = _BUILTIN_TYPES[alias]
+        return builtin_type.provisioner, builtin_type.worker_type
+
+    level = str(level)
+    worker_config = evaluate_keyed_by(
+        {"by-alias": graph_config["workers"]["aliases"]},
+        "graph_config.workers.aliases",
+        {"alias": alias},
+    )
+    provisioner = evaluate_keyed_by(
+        worker_config["provisioner"],
+        alias,
+        {"level": level},
+    ).format(
+        **{"alias": alias, "level": level, "trust-domain": graph_config["trust-domain"]}
+    )
+    worker_type = evaluate_keyed_by(
+        worker_config["worker-type"],
+        alias,
+        {"level": level},
+    ).format(
+        **{"alias": alias, "level": level, "trust-domain": graph_config["trust-domain"]}
+    )
+    return provisioner, worker_type
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py
new file mode 100644
index 0000000000..141c7a16d3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import os
+
+from yaml.loader import SafeLoader
+
+
+class UnicodeLoader(SafeLoader):
+    def construct_yaml_str(self, node):
+        return self.construct_scalar(node)
+
+
+UnicodeLoader.add_constructor("tag:yaml.org,2002:str", UnicodeLoader.construct_yaml_str)
+
+
+def load_stream(stream):
+    """
+    Parse the first YAML document in a stream
+    and produce the corresponding Python object.
+    """
+    loader = UnicodeLoader(stream)
+    try:
+        return loader.get_single_data()
+    finally:
+        loader.dispose()
+
+
+def load_yaml(*parts):
+    """Convenience function to load a YAML file in the given path.  This is
+    useful for loading kind configuration files from the kind path."""
+    filename = os.path.join(*parts)
+    with open(filename, "rb") as f:
+        return load_stream(f)