28 files changed, 5119 insertions, 0 deletions
diff --git a/taskcluster/taskgraph/util/__init__.py b/taskcluster/taskgraph/util/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/taskcluster/taskgraph/util/__init__.py
diff --git a/taskcluster/taskgraph/util/attributes.py b/taskcluster/taskgraph/util/attributes.py
new file mode 100644
index 0000000000..6a446d6fa7
--- /dev/null
+++ b/taskcluster/taskgraph/util/attributes.py
@@ -0,0 +1,163 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import re
+
+import six
+
+
+INTEGRATION_PROJECTS = {
+    "autoland",
+}
+
+TRUNK_PROJECTS = INTEGRATION_PROJECTS | {"mozilla-central", "comm-central"}
+
+RELEASE_PROJECTS = {
+    "mozilla-central",
+    "mozilla-beta",
+    "mozilla-release",
+    "mozilla-esr78",
+    "comm-central",
+    "comm-beta",
+    "comm-esr78",
+    "oak",
+}
+
+RELEASE_PROMOTION_PROJECTS = {
+    "jamun",
+    "maple",
+    "try",
+    "try-comm-central",
+} | RELEASE_PROJECTS
+
+TEMPORARY_PROJECTS = set(
+    {
+        # When using a "Disposeabel Project Branch" you can specify your branch here. e.g.:
+        # 'oak',
+    }
+)
+
+ALL_PROJECTS = RELEASE_PROMOTION_PROJECTS | TRUNK_PROJECTS | TEMPORARY_PROJECTS
+
+RUN_ON_PROJECT_ALIASES = {
+    # key is alias, value is lambda to test it against
+    "all": lambda project: True,
+    "integration": lambda project: project in INTEGRATION_PROJECTS,
+    "release": lambda project: project in RELEASE_PROJECTS,
+    "trunk": lambda project: project in TRUNK_PROJECTS,
+}
+
+_COPYABLE_ATTRIBUTES = (
+    "accepted-mar-channel-ids",
+    "artifact_map",
+    "artifact_prefix",
+    "build_platform",
+    "build_type",
+    "l10n_chunk",
+    "locale",
+    "mar-channel-id",
+    "nightly",
+    "required_signoffs",
+    "shippable",
+    "shipping_phase",
+    "shipping_product",
+    "signed",
+    "stub-installer",
+    "update-channel",
+)
+
+
+def attrmatch(attributes, **kwargs):
+    """Determine whether the given set of task attributes matches.  The
+    conditions are given as keyword arguments, where each keyword names an
+    attribute.  The keyword value can be a literal, a set, or a callable.  A
+    literal must match the attribute exactly.  Given a set, the attribute value
+    must be in the set.  A callable is called with the attribute value.  If an
+    attribute is specified as a keyword argument but not present in the
+    attributes, the result is False."""
+    for kwkey, kwval in six.iteritems(kwargs):
+        if kwkey not in attributes:
+            return False
+        attval = attributes[kwkey]
+        if isinstance(kwval, set):
+            if attval not in kwval:
+                return False
+        elif callable(kwval):
+            if not kwval(attval):
+                return False
+        elif kwval != attributes[kwkey]:
+            return False
+    return True
+
+
+def keymatch(attributes, target):
+    """Determine if any keys in attributes are a match to target, then return
+    a list of matching values. First exact matches will be checked. Failing
+    that, regex matches and finally a default key.
+    """
+    # exact match
+    if target in attributes:
+        return [attributes[target]]
+
+    # regular expression match
+    matches = [v for k, v in six.iteritems(attributes) if re.match(k + "$", target)]
+    if matches:
+        return matches
+
+    # default
+    if "default" in attributes:
+        return [attributes["default"]]
+
+    return []
+
+
+def match_run_on_projects(project, run_on_projects):
+    """Determine whether the given project is included in the `run-on-projects`
+    parameter, applying expansions for things like "integration" mentioned in
+    the attribute documentation."""
+    aliases = RUN_ON_PROJECT_ALIASES.keys()
+    run_aliases = set(aliases) & set(run_on_projects)
+    if run_aliases:
+        if any(RUN_ON_PROJECT_ALIASES[alias](project) for alias in run_aliases):
+            return True
+
+    return project in run_on_projects
+
+
+def match_run_on_hg_branches(hg_branch, run_on_hg_branches):
+    """Determine whether the given project is included in the `run-on-hg-branches`
+    parameter. Allows 'all'."""
+    if "all" in run_on_hg_branches:
+        return True
+
+    for expected_hg_branch_pattern in run_on_hg_branches:
+        if re.match(expected_hg_branch_pattern, hg_branch):
+            return True
+
+    return False
+
+
+def copy_attributes_from_dependent_job(dep_job, denylist=()):
+    return {
+        attr: dep_job.attributes[attr]
+        for attr in _COPYABLE_ATTRIBUTES
+        if attr in dep_job.attributes and attr not in denylist
+    }
+
+
+def sorted_unique_list(*args):
+    """Join one or more lists, and return a sorted list of unique members"""
+    combined = set().union(*args)
+    return sorted(combined)
+
+
+def release_level(project):
+    """
+    Whether this is a staging release or not.
+
+    :return six.text_type: One of "production" or "staging".
+    """
+    return "production" if project in RELEASE_PROJECTS else "staging"
diff --git a/taskcluster/taskgraph/util/backstop.py b/taskcluster/taskgraph/util/backstop.py
new file mode 100644
index 0000000000..e89cd53ef0
--- /dev/null
+++ b/taskcluster/taskgraph/util/backstop.py
@@ -0,0 +1,81 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from requests import HTTPError
+
+from taskgraph.util.taskcluster import (
+    find_task_id,
+    get_artifact,
+    status_task,
+)
+
+
+BACKSTOP_PUSH_INTERVAL = 20
+BACKSTOP_TIME_INTERVAL = 60 * 4  # minutes
+BACKSTOP_INDEX = "gecko.v2.{project}.latest.taskgraph.backstop"
+
+
+def is_backstop(
+    params, push_interval=BACKSTOP_PUSH_INTERVAL, time_interval=BACKSTOP_TIME_INTERVAL
+):
+    """Determines whether the given parameters represent a backstop push.
+
+    Args:
+        push_interval (int): Number of pushes
+        time_interval (int): Minutes between forced schedules.
+                             Use 0 to disable.
+    Returns:
+        bool: True if this is a backtop, otherwise False.
+    """
+    # In case this is being faked on try.
+    if params.get("backstop", False):
+        return True
+
+    project = params["project"]
+    pushid = int(params["pushlog_id"])
+    pushdate = int(params["pushdate"])
+
+    if project == "try":
+        return False
+    elif project != "autoland":
+        return True
+
+    # On every Nth push, want to run all tasks.
+    if pushid % push_interval == 0:
+        return True
+
+    if time_interval <= 0:
+        return False
+
+    # We also want to ensure we run all tasks at least once per N minutes.
+    index = BACKSTOP_INDEX.format(project=project)
+
+    try:
+        last_backstop_id = find_task_id(index)
+    except KeyError:
+        # Index wasn't found, implying there hasn't been a backstop push yet.
+        return True
+
+    if status_task(last_backstop_id) in ("failed", "exception"):
+        # If the last backstop failed its decision task, make this a backstop.
+        return True
+
+    try:
+        last_pushdate = get_artifact(last_backstop_id, "public/parameters.yml")[
+            "pushdate"
+        ]
+    except HTTPError as e:
+        # If the last backstop decision task exists in the index, but
+        # parameters.yml isn't available yet, it means the decision task is
+        # still running. If that's the case, we can be pretty sure the time
+        # component will not cause a backstop, so just return False.
+        if e.response.status_code == 404:
+            return False
+        raise
+
+    if (pushdate - last_pushdate) / 60 >= time_interval:
+        return True
+    return False
diff --git a/taskcluster/taskgraph/util/bugbug.py b/taskcluster/taskgraph/util/bugbug.py
new file mode 100644
index 0000000000..8fc3c07610
--- /dev/null
+++ b/taskcluster/taskgraph/util/bugbug.py
@@ -0,0 +1,126 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import os
+import json
+import sys
+import time
+
+import requests
+from mozbuild.util import memoize
+
+from taskgraph.util.taskcluster import requests_retry_session
+
+try:
+    # TODO(py3): use time.monotonic()
+    from time import monotonic
+except ImportError:
+    from time import time as monotonic
+
+BUGBUG_BASE_URL = "https://bugbug.herokuapp.com"
+RETRY_TIMEOUT = 9 * 60  # seconds
+RETRY_INTERVAL = 10  # seconds
+
+# Preset confidence thresholds.
+CT_LOW = 0.7
+CT_MEDIUM = 0.8
+CT_HIGH = 0.9
+
+GROUP_TRANSLATIONS = {
+    "testing/web-platform/tests": "",
+    "testing/web-platform/mozilla/tests": "/_mozilla",
+}
+
+
+def translate_group(group):
+    for prefix, value in GROUP_TRANSLATIONS.items():
+        if group.startswith(prefix):
+            return group.replace(prefix, value)
+
+    return group
+
+
+class BugbugTimeoutException(Exception):
+    pass
+
+
+@memoize
+def get_session():
+    s = requests.Session()
+    s.headers.update({"X-API-KEY": "gecko-taskgraph"})
+    return requests_retry_session(retries=5, session=s)
+
+
+def _write_perfherder_data(lower_is_better):
+    if os.environ.get("MOZ_AUTOMATION", "0") == "1":
+        perfherder_data = {
+            "framework": {"name": "build_metrics"},
+            "suites": [
+                {
+                    "name": suite,
+                    "value": value,
+                    "lowerIsBetter": True,
+                    "shouldAlert": False,
+                    "subtests": [],
+                }
+                for suite, value in lower_is_better.items()
+            ],
+        }
+        print(
+            "PERFHERDER_DATA: {}".format(json.dumps(perfherder_data)), file=sys.stderr
+        )
+
+
+@memoize
+def push_schedules(branch, rev):
+    url = BUGBUG_BASE_URL + "/push/{branch}/{rev}/schedules".format(
+        branch=branch, rev=rev
+    )
+    start = monotonic()
+    session = get_session()
+
+    # On try there is no fallback and pulling is slower, so we allow bugbug more
+    # time to compute the results.
+    # See https://github.com/mozilla/bugbug/issues/1673.
+    timeout = RETRY_TIMEOUT
+    if branch == "try":
+        timeout += int(timeout / 3)
+
+    attempts = timeout / RETRY_INTERVAL
+    i = 0
+    while i < attempts:
+        r = session.get(url)
+        r.raise_for_status()
+
+        if r.status_code != 202:
+            break
+
+        time.sleep(RETRY_INTERVAL)
+        i += 1
+    end = monotonic()
+
+    _write_perfherder_data(
+        lower_is_better={
+            "bugbug_push_schedules_time": end - start,
+            "bugbug_push_schedules_retries": i,
+        }
+    )
+
+    data = r.json()
+    if r.status_code == 202:
+        raise BugbugTimeoutException(
+            "Timed out waiting for result from '{}'".format(url)
+        )
+
+    if "groups" in data:
+        data["groups"] = {translate_group(k): v for k, v in data["groups"].items()}
+
+    if "config_groups" in data:
+        data["config_groups"] = {
+            translate_group(k): v for k, v in data["config_groups"].items()
+        }
+
+    return data
diff --git a/taskcluster/taskgraph/util/cached_tasks.py b/taskcluster/taskgraph/util/cached_tasks.py
new file mode 100644
index 0000000000..980ef0ef79
--- /dev/null
+++ b/taskcluster/taskgraph/util/cached_tasks.py
@@ -0,0 +1,82 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import hashlib
+import time
+
+import six
+
+
+TARGET_CACHE_INDEX = "{trust_domain}.cache.level-{level}.{type}.{name}.hash.{digest}"
+EXTRA_CACHE_INDEXES = [
+    "{trust_domain}.cache.level-{level}.{type}.{name}.latest",
+    "{trust_domain}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}",
+]
+
+
+def add_optimization(
+    config, taskdesc, cache_type, cache_name, digest=None, digest_data=None
+):
+    """
+    Allow the results of this task to be cached. This adds index routes to the
+    task so it can be looked up for future runs, and optimization hints so that
+    cached artifacts can be found. Exactly one of `digest` and `digest_data`
+    must be passed.
+
+    :param TransformConfig config: The configuration for the kind being transformed.
+    :param dict taskdesc: The description of the current task.
+    :param str cache_type: The type of task result being cached.
+    :param str cache_name: The name of the object being cached.
+    :param digest: A unique string indentifying this version of the artifacts
+        being generated. Typically this will be the hash of inputs to the task.
+    :type digest: bytes or None
+    :param digest_data: A list of bytes representing the inputs of this task.
+        They will be concatenated and hashed to create the digest for this
+        task.
+    :type digest_data: list of bytes or None
+    """
+    if (digest is None) == (digest_data is None):
+        raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+    if digest is None:
+        digest = hashlib.sha256(six.ensure_binary("\n".join(digest_data))).hexdigest()
+
+    subs = {
+        "trust_domain": config.graph_config["trust-domain"],
+        "type": cache_type,
+        "name": cache_name,
+        "digest": digest,
+    }
+
+    # We'll try to find a cached version of the toolchain at levels above
+    # and including the current level, starting at the highest level.
+    index_routes = []
+    for level in reversed(range(int(config.params["level"]), 4)):
+        subs["level"] = level
+        index_routes.append(TARGET_CACHE_INDEX.format(**subs))
+    taskdesc["optimization"] = {"index-search": index_routes}
+
+    # ... and cache at the lowest level.
+    taskdesc.setdefault("routes", []).append(
+        "index.{}".format(TARGET_CACHE_INDEX.format(**subs))
+    )
+
+    # ... and add some extra routes for humans
+    subs["build_date_long"] = time.strftime(
+        "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"])
+    )
+    taskdesc["routes"].extend(
+        ["index.{}".format(route.format(**subs)) for route in EXTRA_CACHE_INDEXES]
+    )
+
+    taskdesc["attributes"]["cached_task"] = {
+        "type": cache_type,
+        "name": cache_name,
+        "digest": digest,
+    }
+
+    # Allow future pushes to find this task before it completes
+    # Implementation in morphs
+    taskdesc["attributes"]["eager_indexes"] = [TARGET_CACHE_INDEX.format(**subs)]
diff --git a/taskcluster/taskgraph/util/chunking.py b/taskcluster/taskgraph/util/chunking.py
new file mode 100644
index 0000000000..ecf8098f01
--- /dev/null
+++ b/taskcluster/taskgraph/util/chunking.py
@@ -0,0 +1,270 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+"""Utility functions to handle test chunking."""
+
+import json
+import logging
+import os
+from abc import ABCMeta, abstractmethod
+
+import six
+from manifestparser import TestManifest
+from manifestparser.filters import chunk_by_runtime
+from mozbuild.util import memoize
+from moztest.resolve import (
+    TEST_SUITES,
+    TestResolver,
+    TestManifestLoader,
+)
+
+from taskgraph import GECKO
+from taskgraph.util.bugbug import BugbugTimeoutException, push_schedules
+
+logger = logging.getLogger(__name__)
+here = os.path.abspath(os.path.dirname(__file__))
+resolver = TestResolver.from_environment(cwd=here, loader_cls=TestManifestLoader)
+
+
+def guess_mozinfo_from_task(task):
+    """Attempt to build a mozinfo dict from a task definition.
+
+    This won't be perfect and many values used in the manifests will be missing. But
+    it should cover most of the major ones and be "good enough" for chunking in the
+    taskgraph.
+
+    Args:
+        task (dict): A task definition.
+
+    Returns:
+        A dict that can be used as a mozinfo replacement.
+    """
+    info = {
+        "asan": "asan" in task["build-attributes"]["build_platform"],
+        "bits": 32 if "32" in task["build-attributes"]["build_platform"] else 64,
+        "ccov": "ccov" in task["build-attributes"]["build_platform"],
+        "debug": task["build-attributes"]["build_type"] == "debug",
+        "e10s": task["attributes"]["e10s"],
+        "fission": task["attributes"].get("unittest_variant") == "fission",
+        "headless": "-headless" in task["test-name"],
+        "tsan": "tsan" in task["build-attributes"]["build_platform"],
+        "webrender": task.get("webrender", False),
+    }
+    for platform in ("android", "linux", "mac", "win"):
+        if platform in task["build-attributes"]["build_platform"]:
+            info["os"] = platform
+            break
+    else:
+        raise ValueError(
+            "{} is not a known platform!".format(
+                task["build-attributes"]["build_platform"]
+            )
+        )
+
+    info["appname"] = "fennec" if info["os"] == "android" else "firefox"
+
+    # guess processor
+    if "aarch64" in task["build-attributes"]["build_platform"]:
+        info["processor"] = "aarch64"
+    elif info["os"] == "android" and "arm" in task["test-platform"]:
+        info["processor"] = "arm"
+    elif info["bits"] == 32:
+        info["processor"] = "x86"
+    else:
+        info["processor"] = "x86_64"
+
+    # guess toolkit
+    if info["os"] == "android":
+        info["toolkit"] = "android"
+    elif info["os"] == "win":
+        info["toolkit"] = "windows"
+    elif info["os"] == "mac":
+        info["toolkit"] = "cocoa"
+    else:
+        info["toolkit"] = "gtk"
+
+    return info
+
+
+@memoize
+def get_runtimes(platform, suite_name):
+    if not suite_name or not platform:
+        raise TypeError("suite_name and platform cannot be empty.")
+
+    base = os.path.join(GECKO, "testing", "runtimes", "manifest-runtimes-{}.json")
+    for key in ("android", "windows"):
+        if key in platform:
+            path = base.format(key)
+            break
+    else:
+        path = base.format("unix")
+
+    if not os.path.exists(path):
+        raise IOError("manifest runtime file at {} not found.".format(path))
+
+    with open(path, "r") as fh:
+        return json.load(fh)[suite_name]
+
+
+def chunk_manifests(suite, platform, chunks, manifests):
+    """Run the chunking algorithm.
+
+    Args:
+        platform (str): Platform used to find runtime info.
+        chunks (int): Number of chunks to split manifests into.
+        manifests(list): Manifests to chunk.
+
+    Returns:
+        A list of length `chunks` where each item contains a list of manifests
+        that run in that chunk.
+    """
+    manifests = set(manifests)
+
+    if "web-platform-tests" not in suite:
+        runtimes = {
+            k: v for k, v in get_runtimes(platform, suite).items() if k in manifests
+        }
+        return [
+            c[1]
+            for c in chunk_by_runtime(None, chunks, runtimes).get_chunked_manifests(
+                manifests
+            )
+        ]
+
+    # Keep track of test paths for each chunk, and the runtime information.
+    chunked_manifests = [[] for _ in range(chunks)]
+
+    # Spread out the test manifests evenly across all chunks.
+    for index, key in enumerate(sorted(manifests)):
+        chunked_manifests[index % chunks].append(key)
+
+    # One last sort by the number of manifests. Chunk size should be more or less
+    # equal in size.
+    chunked_manifests.sort(key=lambda x: len(x))
+
+    # Return just the chunked test paths.
+    return chunked_manifests
+
+
+@six.add_metaclass(ABCMeta)
+class BaseManifestLoader(object):
+    def __init__(self, params):
+        self.params = params
+
+    @abstractmethod
+    def get_manifests(self, flavor, subsuite, mozinfo):
+        """Compute which manifests should run for the given flavor, subsuite and mozinfo.
+
+        This function returns skipped manifests separately so that more balanced
+        chunks can be achieved by only considering "active" manifests in the
+        chunking algorithm.
+
+        Args:
+            flavor (str): The suite to run. Values are defined by the 'build_flavor' key
+                in `moztest.resolve.TEST_SUITES`.
+            subsuite (str): The subsuite to run or 'undefined' to denote no subsuite.
+            mozinfo (frozenset): Set of data in the form of (<key>, <value>) used
+                                 for filtering.
+
+        Returns:
+            A tuple of two manifest lists. The first is the set of active manifests (will
+            run at least one test. The second is a list of skipped manifests (all tests are
+            skipped).
+        """
+        pass
+
+
+class DefaultLoader(BaseManifestLoader):
+    """Load manifests using metadata from the TestResolver."""
+
+    @memoize
+    def get_tests(self, suite):
+        suite_definition = TEST_SUITES[suite]
+        return list(
+            resolver.resolve_tests(
+                flavor=suite_definition["build_flavor"],
+                subsuite=suite_definition.get("kwargs", {}).get(
+                    "subsuite", "undefined"
+                ),
+            )
+        )
+
+    @memoize
+    def get_manifests(self, suite, mozinfo):
+        mozinfo = dict(mozinfo)
+        # Compute all tests for the given suite/subsuite.
+        tests = self.get_tests(suite)
+
+        if "web-platform-tests" in suite:
+            manifests = set()
+            for t in tests:
+                manifests.add(t["manifest"])
+            return {"active": list(manifests), "skipped": []}
+
+        manifests = set(chunk_by_runtime.get_manifest(t) for t in tests)
+
+        # Compute  the active tests.
+        m = TestManifest()
+        m.tests = tests
+        tests = m.active_tests(disabled=False, exists=False, **mozinfo)
+        active = set(chunk_by_runtime.get_manifest(t) for t in tests)
+        skipped = manifests - active
+        return {"active": list(active), "skipped": list(skipped)}
+
+
+class BugbugLoader(DefaultLoader):
+    """Load manifests using metadata from the TestResolver, and then
+    filter them based on a query to bugbug."""
+
+    CONFIDENCE_THRESHOLD = 0.5
+
+    def __init__(self, *args, **kwargs):
+        super(BugbugLoader, self).__init__(*args, **kwargs)
+        self.timedout = False
+
+    @memoize
+    def get_manifests(self, suite, mozinfo):
+        manifests = super(BugbugLoader, self).get_manifests(suite, mozinfo)
+
+        # Don't prune any manifests if we're on a backstop push or there was a timeout.
+        if self.params["backstop"] or self.timedout:
+            return manifests
+
+        try:
+            data = push_schedules(self.params["project"], self.params["head_rev"])
+        except BugbugTimeoutException:
+            logger.warning("Timed out waiting for bugbug, loading all test manifests.")
+            self.timedout = True
+            return self.get_manifests(suite, mozinfo)
+
+        bugbug_manifests = {
+            m
+            for m, c in data.get("groups", {}).items()
+            if c >= self.CONFIDENCE_THRESHOLD
+        }
+
+        manifests["active"] = list(set(manifests["active"]) & bugbug_manifests)
+        manifests["skipped"] = list(set(manifests["skipped"]) & bugbug_manifests)
+        return manifests
+
+
+manifest_loaders = {
+    "bugbug": BugbugLoader,
+    "default": DefaultLoader,
+}
+
+_loader_cache = {}
+
+
+def get_manifest_loader(name, params):
+    # Ensure we never create more than one instance of the same loader type for
+    # performance reasons.
+    if name in _loader_cache:
+        return _loader_cache[name]
+
+    loader = manifest_loaders[name](dict(params))
+    _loader_cache[name] = loader
+    return loader
diff --git a/taskcluster/taskgraph/util/declarative_artifacts.py b/taskcluster/taskgraph/util/declarative_artifacts.py
new file mode 100644
index 0000000000..821200e71d
--- /dev/null
+++ b/taskcluster/taskgraph/util/declarative_artifacts.py
@@ -0,0 +1,70 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, unicode_literals
+
+import re
+
+from taskgraph.util.scriptworker import generate_beetmover_upstream_artifacts
+
+
+_ARTIFACT_ID_PER_PLATFORM = {
+    "android-aarch64-opt": "geckoview-default-arm64-v8a",
+    "android-api-16-opt": "geckoview-default-armeabi-v7a",
+    "android-x86-opt": "geckoview-default-x86",
+    "android-x86_64-opt": "geckoview-default-x86_64",
+    "android-geckoview-fat-aar-opt": "geckoview-default",
+    "android-aarch64-shippable": "geckoview{update_channel}-arm64-v8a",
+    "android-api-16-shippable": "geckoview{update_channel}-armeabi-v7a",
+    "android-x86-shippable": "geckoview{update_channel}-x86",
+    "android-x86_64-shippable": "geckoview{update_channel}-x86_64",
+    "android-geckoview-fat-aar-shippable": "geckoview{update_channel}",
+}
+
+
+def get_geckoview_upstream_artifacts(config, job, platform=""):
+    if not platform:
+        platform = job["attributes"]["build_platform"]
+    upstream_artifacts = generate_beetmover_upstream_artifacts(
+        config,
+        job,
+        platform="",
+        **get_geckoview_template_vars(
+            config, platform, job["attributes"].get("update-channel")
+        )
+    )
+    return [
+        {key: value for key, value in upstream_artifact.items() if key != "locale"}
+        for upstream_artifact in upstream_artifacts
+    ]
+
+
+def get_geckoview_template_vars(config, platform, update_channel):
+    version_groups = re.match(r"(\d+).(\d+).*", config.params["version"])
+    if version_groups:
+        major_version, minor_version = version_groups.groups()
+
+    return {
+        "artifact_id": get_geckoview_artifact_id(
+            config,
+            platform,
+            update_channel,
+        ),
+        "build_date": config.params["moz_build_date"],
+        "major_version": major_version,
+        "minor_version": minor_version,
+    }
+
+
+def get_geckoview_artifact_id(config, platform, update_channel=None):
+    if update_channel == "release":
+        update_channel = ""
+    elif update_channel is not None:
+        update_channel = "-{}".format(update_channel)
+    else:
+        # For shippable builds, mozharness defaults to using
+        # "nightly-{project}" for the update channel.  For other builds, the
+        # update channel is not set, but the value is not substituted.
+        update_channel = "-nightly-{}".format(config.params["project"])
+    return _ARTIFACT_ID_PER_PLATFORM[platform].format(update_channel=update_channel)
diff --git a/taskcluster/taskgraph/util/docker.py b/taskcluster/taskgraph/util/docker.py
new file mode 100644
index 0000000000..0642e4faff
--- /dev/null
+++ b/taskcluster/taskgraph/util/docker.py
@@ -0,0 +1,356 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import hashlib
+import io
+import json
+import os
+import re
+import requests
+import requests_unixsocket
+import six
+import sys
+
+from six.moves.urllib.parse import quote, urlencode, urlunparse
+from six.moves.collections_abc import Mapping
+
+from mozbuild.util import memoize
+from mozpack.files import GeneratedFile
+from mozpack.archive import create_tar_gz_from_files
+from .. import GECKO
+
+from .yaml import load_yaml
+
+
+IMAGE_DIR = os.path.join(GECKO, "taskcluster", "docker")
+
+
+def docker_url(path, **kwargs):
+    docker_socket = os.environ.get("DOCKER_SOCKET", "/var/run/docker.sock")
+    return urlunparse(
+        ("http+unix", quote(docker_socket, safe=""), path, "", urlencode(kwargs), "")
+    )
+
+
+def post_to_docker(tar, api_path, **kwargs):
+    """POSTs a tar file to a given docker API path.
+
+    The tar argument can be anything that can be passed to requests.post()
+    as data (e.g. iterator or file object).
+    The extra keyword arguments are passed as arguments to the docker API.
+    """
+    # requests-unixsocket doesn't honor requests timeouts
+    # See https://github.com/msabramo/requests-unixsocket/issues/44
+    # We have some large docker images that trigger the default timeout,
+    # so we increase the requests-unixsocket timeout here.
+    session = requests.Session()
+    session.mount(
+        requests_unixsocket.DEFAULT_SCHEME,
+        requests_unixsocket.UnixAdapter(timeout=120),
+    )
+    req = session.post(
+        docker_url(api_path, **kwargs),
+        data=tar,
+        stream=True,
+        headers={"Content-Type": "application/x-tar"},
+    )
+    if req.status_code != 200:
+        message = req.json().get("message")
+        if not message:
+            message = "docker API returned HTTP code {}".format(req.status_code)
+        raise Exception(message)
+    status_line = {}
+
+    buf = b""
+    for content in req.iter_content(chunk_size=None):
+        if not content:
+            continue
+        # Sometimes, a chunk of content is not a complete json, so we cumulate
+        # with leftovers from previous iterations.
+        buf += content
+        try:
+            data = json.loads(buf)
+        except Exception:
+            continue
+        buf = b""
+        # data is sometimes an empty dict.
+        if not data:
+            continue
+        # Mimick how docker itself presents the output. This code was tested
+        # with API version 1.18 and 1.26.
+        if "status" in data:
+            if "id" in data:
+                if sys.stderr.isatty():
+                    total_lines = len(status_line)
+                    line = status_line.setdefault(data["id"], total_lines)
+                    n = total_lines - line
+                    if n > 0:
+                        # Move the cursor up n lines.
+                        sys.stderr.write("\033[{}A".format(n))
+                    # Clear line and move the cursor to the beginning of it.
+                    sys.stderr.write("\033[2K\r")
+                    sys.stderr.write(
+                        "{}: {} {}\n".format(
+                            data["id"], data["status"], data.get("progress", "")
+                        )
+                    )
+                    if n > 1:
+                        # Move the cursor down n - 1 lines, which, considering
+                        # the carriage return on the last write, gets us back
+                        # where we started.
+                        sys.stderr.write("\033[{}B".format(n - 1))
+                else:
+                    status = status_line.get(data["id"])
+                    # Only print status changes.
+                    if status != data["status"]:
+                        sys.stderr.write("{}: {}\n".format(data["id"], data["status"]))
+                        status_line[data["id"]] = data["status"]
+            else:
+                status_line = {}
+                sys.stderr.write("{}\n".format(data["status"]))
+        elif "stream" in data:
+            sys.stderr.write(data["stream"])
+        elif "aux" in data:
+            sys.stderr.write(repr(data["aux"]))
+        elif "error" in data:
+            sys.stderr.write("{}\n".format(data["error"]))
+            # Sadly, docker doesn't give more than a plain string for errors,
+            # so the best we can do to propagate the error code from the command
+            # that failed is to parse the error message...
+            errcode = 1
+            m = re.search(r"returned a non-zero code: (\d+)", data["error"])
+            if m:
+                errcode = int(m.group(1))
+            sys.exit(errcode)
+        else:
+            raise NotImplementedError(repr(data))
+        sys.stderr.flush()
+
+
+def docker_image(name, by_tag=False):
+    """
+    Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``,
+    or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`.
+    """
+    try:
+        with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f:
+            registry = f.read().strip()
+    except IOError:
+        with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f:
+            registry = f.read().strip()
+
+    if not by_tag:
+        hashfile = os.path.join(IMAGE_DIR, name, "HASH")
+        try:
+            with open(hashfile) as f:
+                return "{}/{}@{}".format(registry, name, f.read().strip())
+        except IOError:
+            raise Exception("Failed to read HASH file {}".format(hashfile))
+
+    try:
+        with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f:
+            tag = f.read().strip()
+    except IOError:
+        tag = "latest"
+    return "{}/{}:{}".format(registry, name, tag)
+
+
+class VoidWriter(object):
+    """A file object with write capabilities that does nothing with the written
+    data."""
+
+    def write(self, buf):
+        pass
+
+
+def generate_context_hash(topsrcdir, image_path, image_name, args):
+    """Generates a sha256 hash for context directory used to build an image."""
+
+    return stream_context_tar(
+        topsrcdir, image_path, VoidWriter(), image_name, args=args
+    )
+
+
+class HashingWriter(object):
+    """A file object with write capabilities that hashes the written data at
+    the same time it passes down to a real file object."""
+
+    def __init__(self, writer):
+        self._hash = hashlib.sha256()
+        self._writer = writer
+
+    def write(self, buf):
+        self._hash.update(buf)
+        self._writer.write(buf)
+
+    def hexdigest(self):
+        return six.ensure_text(self._hash.hexdigest())
+
+
+def create_context_tar(topsrcdir, context_dir, out_path, image_name, args):
+    """Create a context tarball.
+
+    A directory ``context_dir`` containing a Dockerfile will be assembled into
+    a gzipped tar file at ``out_path``.
+
+    We also scan the source Dockerfile for special syntax that influences
+    context generation.
+
+    If a line in the Dockerfile has the form ``# %include <path>``,
+    the relative path specified on that line will be matched against
+    files in the source repository and added to the context under the
+    path ``topsrcdir/``. If an entry is a directory, we add all files
+    under that directory.
+
+    If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
+    the string ``$<name>`` in subsequent lines are replaced with the value
+    found in the ``args`` argument. Exception: this doesn't apply to VOLUME
+    definitions.
+
+    Returns the SHA-256 hex digest of the created archive.
+    """
+    with open(out_path, "wb") as fh:
+        return stream_context_tar(
+            topsrcdir,
+            context_dir,
+            fh,
+            image_name=image_name,
+            args=args,
+        )
+
+
+def stream_context_tar(topsrcdir, context_dir, out_file, image_name, args):
+    """Like create_context_tar, but streams the tar file to the `out_file` file
+    object."""
+    archive_files = {}
+    replace = []
+    content = []
+
+    context_dir = os.path.join(topsrcdir, context_dir)
+
+    for root, dirs, files in os.walk(context_dir):
+        for f in files:
+            source_path = os.path.join(root, f)
+            archive_path = source_path[len(context_dir) + 1 :]
+            archive_files[archive_path] = source_path
+
+    # Parse Dockerfile for special syntax of extra files to include.
+    with io.open(os.path.join(context_dir, "Dockerfile"), "r") as fh:
+        for line in fh:
+            if line.startswith("# %ARG"):
+                p = line[len("# %ARG ") :].strip()
+                if not args or p not in args:
+                    raise Exception("missing argument: {}".format(p))
+                replace.append((re.compile(r"\${}\b".format(p)), args[p]))
+                continue
+
+            for regexp, s in replace:
+                line = re.sub(regexp, s, line)
+
+            content.append(line)
+
+            if not line.startswith("# %include"):
+                continue
+
+            p = line[len("# %include ") :].strip()
+            if os.path.isabs(p):
+                raise Exception("extra include path cannot be absolute: %s" % p)
+
+            fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+            # Check for filesystem traversal exploits.
+            if not fs_path.startswith(topsrcdir):
+                raise Exception("extra include path outside topsrcdir: %s" % p)
+
+            if not os.path.exists(fs_path):
+                raise Exception("extra include path does not exist: %s" % p)
+
+            if os.path.isdir(fs_path):
+                for root, dirs, files in os.walk(fs_path):
+                    for f in files:
+                        source_path = os.path.join(root, f)
+                        rel = source_path[len(fs_path) + 1 :]
+                        archive_path = os.path.join("topsrcdir", p, rel)
+                        archive_files[archive_path] = source_path
+            else:
+                archive_path = os.path.join("topsrcdir", p)
+                archive_files[archive_path] = fs_path
+
+    archive_files["Dockerfile"] = GeneratedFile(
+        b"".join(six.ensure_binary(s) for s in content)
+    )
+
+    writer = HashingWriter(out_file)
+    create_tar_gz_from_files(writer, archive_files, "{}.tar".format(image_name))
+    return writer.hexdigest()
+
+
+class ImagePathsMap(Mapping):
+    """ImagePathsMap contains the mapping of Docker image names to their
+    context location in the filesystem. The register function allows Thunderbird
+    to define additional images under comm/taskcluster.
+    """
+
+    def __init__(self, config_path, image_dir=IMAGE_DIR):
+        config = load_yaml(GECKO, config_path)
+        self.__update_image_paths(config["jobs"], image_dir)
+
+    def __getitem__(self, key):
+        return self.__dict__[key]
+
+    def __iter__(self):
+        return iter(self.__dict__)
+
+    def __len__(self):
+        return len(self.__dict__)
+
+    def __update_image_paths(self, jobs, image_dir):
+        self.__dict__.update(
+            {
+                k: os.path.join(image_dir, v.get("definition", k))
+                for k, v in jobs.items()
+            }
+        )
+
+    def register(self, jobs_config_path, image_dir):
+        """Register additional image_paths. In this case, there is no 'jobs'
+        key in the loaded YAML as this file is loaded via jobs-from in kind.yml."""
+        jobs = load_yaml(GECKO, jobs_config_path)
+        self.__update_image_paths(jobs, image_dir)
+
+
+image_paths = ImagePathsMap("taskcluster/ci/docker-image/kind.yml")
+
+
+def image_path(name):
+    if name in image_paths:
+        return image_paths[name]
+    return os.path.join(IMAGE_DIR, name)
+
+
+@memoize
+def parse_volumes(image):
+    """Parse VOLUME entries from a Dockerfile for an image."""
+    volumes = set()
+
+    path = image_path(image)
+
+    with open(os.path.join(path, "Dockerfile"), "rb") as fh:
+        for line in fh:
+            line = line.strip()
+            # We assume VOLUME definitions don't use %ARGS.
+            if not line.startswith(b"VOLUME "):
+                continue
+
+            v = line.split(None, 1)[1]
+            if v.startswith(b"["):
+                raise ValueError(
+                    "cannot parse array syntax for VOLUME; "
+                    "convert to multiple entries"
+                )
+
+            volumes |= set([six.ensure_text(v) for v in v.split()])
+
+    return volumes
diff --git a/taskcluster/taskgraph/util/hash.py b/taskcluster/taskgraph/util/hash.py
new file mode 100644
index 0000000000..04b946be71
--- /dev/null
+++ b/taskcluster/taskgraph/util/hash.py
@@ -0,0 +1,58 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+from mozbuild.util import memoize
+import mozpack.path as mozpath
+from mozversioncontrol import get_repository_object
+import hashlib
+import io
+import six
+
+
+@memoize
+def hash_path(path):
+    """Hash a single file.
+
+    Returns the SHA-256 hash in hex form.
+    """
+    with io.open(path, mode="rb") as fh:
+        return hashlib.sha256(fh.read()).hexdigest()
+
+
+@memoize
+def get_file_finder(base_path):
+    return get_repository_object(base_path).get_tracked_files_finder()
+
+
+def hash_paths(base_path, patterns):
+    """
+    Give a list of path patterns, return a digest of the contents of all
+    the corresponding files, similarly to git tree objects or mercurial
+    manifests.
+
+    Each file is hashed. The list of all hashes and file paths is then
+    itself hashed to produce the result.
+    """
+    finder = get_file_finder(base_path)
+    h = hashlib.sha256()
+    files = {}
+    for pattern in patterns:
+        found = list(finder.find(pattern))
+        if found:
+            files.update(found)
+        else:
+            raise Exception("%s did not match anything" % pattern)
+    for path in sorted(files.keys()):
+        if path.endswith((".pyc", ".pyd", ".pyo")):
+            continue
+        h.update(
+            six.ensure_binary(
+                "{} {}\n".format(
+                    hash_path(mozpath.abspath(mozpath.join(base_path, path))),
+                    mozpath.normsep(path),
+                )
+            )
+        )
+    return h.hexdigest()
diff --git a/taskcluster/taskgraph/util/hg.py b/taskcluster/taskgraph/util/hg.py
new file mode 100644
index 0000000000..65dd412354
--- /dev/null
+++ b/taskcluster/taskgraph/util/hg.py
@@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import logging
+
+import requests
+import six
+import subprocess
+from redo import retry
+
+from mozbuild.util import memoize
+
+logger = logging.getLogger(__name__)
+
+PUSHLOG_CHANGESET_TMPL = (
+    "{repository}/json-pushes?version=2&changeset={revision}&tipsonly=1"
+)
+PUSHLOG_PUSHES_TMPL = (
+    "{repository}/json-pushes/?version=2&startID={push_id_start}&endID={push_id_end}"
+)
+
+
+def _query_pushlog(url):
+    response = retry(
+        requests.get,
+        attempts=5,
+        sleeptime=10,
+        args=(url,),
+        kwargs={"timeout": 60, "headers": {"User-Agent": "TaskCluster"}},
+    )
+
+    return response.json()["pushes"]
+
+
+def find_hg_revision_push_info(repository, revision):
+    """Given the parameters for this action and a revision, find the
+    pushlog_id of the revision."""
+    url = PUSHLOG_CHANGESET_TMPL.format(repository=repository, revision=revision)
+
+    pushes = _query_pushlog(url)
+
+    if len(pushes) != 1:
+        raise RuntimeError(
+            "Found {} pushlog_ids, expected 1, for {} revision {}: {}".format(
+                len(pushes), repository, revision, pushes
+            )
+        )
+
+    pushid = list(pushes.keys())[0]
+    return {
+        "pushdate": pushes[pushid]["date"],
+        "pushid": pushid,
+        "user": pushes[pushid]["user"],
+    }
+
+
+@memoize
+def get_push_data(repository, project, push_id_start, push_id_end):
+    url = PUSHLOG_PUSHES_TMPL.format(
+        repository=repository,
+        push_id_start=push_id_start - 1,
+        push_id_end=push_id_end,
+    )
+
+    try:
+        pushes = _query_pushlog(url)
+
+        return {
+            push_id: pushes[str(push_id)]
+            for push_id in range(push_id_start, push_id_end + 1)
+        }
+
+    # In the event of request times out, requests will raise a TimeoutError.
+    except requests.exceptions.Timeout:
+        logger.warning("json-pushes timeout")
+
+    # In the event of a network problem (e.g. DNS failure, refused connection, etc),
+    # requests will raise a ConnectionError.
+    except requests.exceptions.ConnectionError:
+        logger.warning("json-pushes connection error")
+
+    # In the event of the rare invalid HTTP response(e.g 404, 401),
+    # requests will raise an HTTPError exception
+    except requests.exceptions.HTTPError:
+        logger.warning("Bad Http response")
+
+    # When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426)
+    except ValueError as error:
+        logger.warning("Invalid JSON, possible server error: {}".format(error))
+
+    # We just print the error out as a debug message if we failed to catch the exception above
+    except requests.exceptions.RequestException as error:
+        logger.warning(error)
+
+    return None
+
+
+def get_hg_revision_branch(root, revision):
+    """Given the parameters for a revision, find the hg_branch (aka
+    relbranch) of the revision."""
+    return six.ensure_text(
+        subprocess.check_output(
+            [
+                "hg",
+                "identify",
+                "-T",
+                "{branch}",
+                "--rev",
+                revision,
+            ],
+            cwd=root,
+            universal_newlines=True,
+        )
+    )
+
+
+# For these functions, we assume that run-task has correctly checked out the
+# revision indicated by GECKO_HEAD_REF, so all that remains is to see what the
+# current revision is.  Mercurial refers to that as `.`.
+def get_hg_commit_message(root):
+    return six.ensure_text(
+        subprocess.check_output(["hg", "log", "-r", ".", "-T", "{desc}"], cwd=root)
+    )
+
+
+def calculate_head_rev(root):
+    return six.ensure_text(
+        subprocess.check_output(["hg", "log", "-r", ".", "-T", "{node}"], cwd=root)
+    )
diff --git a/taskcluster/taskgraph/util/keyed_by.py b/taskcluster/taskgraph/util/keyed_by.py
new file mode 100644
index 0000000000..c86447f75f
--- /dev/null
+++ b/taskcluster/taskgraph/util/keyed_by.py
@@ -0,0 +1,89 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from .attributes import keymatch
+
+
+def evaluate_keyed_by(value, item_name, attributes, defer=None):
+    """
+    For values which can either accept a literal value, or be keyed by some
+    attributes, perform that lookup and return the result.
+
+    For example, given item::
+
+        by-test-platform:
+            macosx-10.11/debug: 13
+            win.*: 6
+            default: 12
+
+    a call to `evaluate_keyed_by(item, 'thing-name', {'test-platform': 'linux96')`
+    would return `12`.
+
+    The `item_name` parameter is used to generate useful error messages.
+    Items can be nested as deeply as desired::
+
+        by-test-platform:
+            win.*:
+                by-project:
+                    ash: ..
+                    cedar: ..
+            linux: 13
+            default: 12
+
+    The `defer` parameter allows evaluating a by-* entry at a later time. In the
+    example above it's possible that the project attribute hasn't been set
+    yet, in which case we'd want to stop before resolving that subkey and then
+    call this function again later. This can be accomplished by setting
+    `defer=["project"]` in this example.
+    """
+    while True:
+        if not isinstance(value, dict) or len(value) != 1:
+            return value
+        value_key = next(iter(value))
+        if not value_key.startswith("by-"):
+            return value
+
+        keyed_by = value_key[3:]  # strip off 'by-' prefix
+
+        if defer and keyed_by in defer:
+            return value
+
+        key = attributes.get(keyed_by)
+        alternatives = next(iter(value.values()))
+
+        if len(alternatives) == 1 and "default" in alternatives:
+            # Error out when only 'default' is specified as only alternatives,
+            # because we don't need to by-{keyed_by} there.
+            raise Exception(
+                "Keyed-by '{}' unnecessary with only value 'default' "
+                "found, when determining item {}".format(keyed_by, item_name)
+            )
+
+        if key is None:
+            if "default" in alternatives:
+                value = alternatives["default"]
+                continue
+            else:
+                raise Exception(
+                    "No attribute {} and no value for 'default' found "
+                    "while determining item {}".format(keyed_by, item_name)
+                )
+
+        matches = keymatch(alternatives, key)
+        if len(matches) > 1:
+            raise Exception(
+                "Multiple matching values for {} {!r} found while "
+                "determining item {}".format(keyed_by, key, item_name)
+            )
+        elif matches:
+            value = matches[0]
+            continue
+
+        raise Exception(
+            "No {} matching {!r} nor 'default' found while determining item {}".format(
+                keyed_by, key, item_name
+            )
+        )
diff --git a/taskcluster/taskgraph/util/parameterization.py b/taskcluster/taskgraph/util/parameterization.py
new file mode 100644
index 0000000000..0486773605
--- /dev/null
+++ b/taskcluster/taskgraph/util/parameterization.py
@@ -0,0 +1,107 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import re
+
+import six
+
+from taskgraph.util.time import json_time_from_now
+from taskgraph.util.taskcluster import get_artifact_url
+
+TASK_REFERENCE_PATTERN = re.compile("<([^>]+)>")
+ARTIFACT_REFERENCE_PATTERN = re.compile("<([^/]+)/([^>]+)>")
+
+
+def _recurse(val, param_fns):
+    def recurse(val):
+        if isinstance(val, list):
+            return [recurse(v) for v in val]
+        elif isinstance(val, dict):
+            if len(val) == 1:
+                for param_key, param_fn in param_fns.items():
+                    if set(six.iterkeys(val)) == {param_key}:
+                        return param_fn(val[param_key])
+            return {k: recurse(v) for k, v in six.iteritems(val)}
+        else:
+            return val
+
+    return recurse(val)
+
+
+def resolve_timestamps(now, task_def):
+    """Resolve all instances of `{'relative-datestamp': '..'}` in the given task definition"""
+    return _recurse(
+        task_def,
+        {
+            "relative-datestamp": lambda v: json_time_from_now(v, now),
+        },
+    )
+
+
+def resolve_task_references(label, task_def, task_id, decision_task_id, dependencies):
+    """Resolve all instances of
+      {'task-reference': '..<..>..'}
+    and
+      {'artifact-reference`: '..<dependency/artifact/path>..'}
+    in the given task definition, using the given dependencies
+
+    """
+
+    def task_reference(val):
+        def repl(match):
+            key = match.group(1)
+            if key == "self":
+                return task_id
+            elif key == "decision":
+                return decision_task_id
+            try:
+                return dependencies[key]
+            except KeyError:
+                # handle escaping '<'
+                if key == "<":
+                    return key
+                raise KeyError(
+                    "task '{}' has no dependency named '{}'".format(label, key)
+                )
+
+        return TASK_REFERENCE_PATTERN.sub(repl, val)
+
+    def artifact_reference(val):
+        def repl(match):
+            dependency, artifact_name = match.group(1, 2)
+
+            if dependency == "self":
+                raise KeyError(
+                    "task '{}' can't reference artifacts of self".format(label)
+                )
+            elif dependency == "decision":
+                task_id = decision_task_id
+            else:
+                try:
+                    task_id = dependencies[dependency]
+                except KeyError:
+                    raise KeyError(
+                        "task '{}' has no dependency named '{}'".format(
+                            label, dependency
+                        )
+                    )
+
+            assert artifact_name.startswith(
+                "public/"
+            ), "artifact-reference only supports public artifacts, not `{}`".format(
+                artifact_name
+            )
+            return get_artifact_url(task_id, artifact_name)
+
+        return ARTIFACT_REFERENCE_PATTERN.sub(repl, val)
+
+    return _recurse(
+        task_def,
+        {
+            "task-reference": task_reference,
+            "artifact-reference": artifact_reference,
+        },
+    )
diff --git a/taskcluster/taskgraph/util/partials.py b/taskcluster/taskgraph/util/partials.py
new file mode 100644
index 0000000000..50a64d89de
--- /dev/null
+++ b/taskcluster/taskgraph/util/partials.py
@@ -0,0 +1,301 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import logging
+
+import requests
+import six
+
+import redo
+from taskgraph.util.scriptworker import (
+    BALROG_SCOPE_ALIAS_TO_PROJECT,
+    BALROG_SERVER_SCOPES,
+)
+
+logger = logging.getLogger(__name__)
+
+PLATFORM_RENAMES = {
+    "windows2012-32": "win32",
+    "windows2012-64": "win64",
+    "windows2012-aarch64": "win64-aarch64",
+    "osx-cross": "macosx64",
+    "osx": "macosx64",
+}
+
+BALROG_PLATFORM_MAP = {
+    "linux": ["Linux_x86-gcc3"],
+    "linux32": ["Linux_x86-gcc3"],
+    "linux64": ["Linux_x86_64-gcc3"],
+    "linux64-asan-reporter": ["Linux_x86_64-gcc3-asan"],
+    "macosx64": [
+        "Darwin_x86_64-gcc3-u-i386-x86_64",
+        "Darwin_x86-gcc3-u-i386-x86_64",
+        "Darwin_aarch64-gcc3",
+        "Darwin_x86-gcc3",
+        "Darwin_x86_64-gcc3",
+    ],
+    "win32": ["WINNT_x86-msvc", "WINNT_x86-msvc-x86", "WINNT_x86-msvc-x64"],
+    "win64": ["WINNT_x86_64-msvc", "WINNT_x86_64-msvc-x64"],
+    "win64-asan-reporter": ["WINNT_x86_64-msvc-x64-asan"],
+    "win64-aarch64": [
+        "WINNT_aarch64-msvc-aarch64",
+    ],
+}
+
+FTP_PLATFORM_MAP = {
+    "Darwin_x86-gcc3": "mac",
+    "Darwin_x86-gcc3-u-i386-x86_64": "mac",
+    "Darwin_x86_64-gcc3": "mac",
+    "Darwin_x86_64-gcc3-u-i386-x86_64": "mac",
+    "Darwin_aarch64-gcc3": "mac",
+    "Linux_x86-gcc3": "linux-i686",
+    "Linux_x86_64-gcc3": "linux-x86_64",
+    "Linux_x86_64-gcc3-asan": "linux-x86_64-asan-reporter",
+    "WINNT_x86_64-msvc-x64-asan": "win64-asan-reporter",
+    "WINNT_x86-msvc": "win32",
+    "WINNT_x86-msvc-x64": "win32",
+    "WINNT_x86-msvc-x86": "win32",
+    "WINNT_x86_64-msvc": "win64",
+    "WINNT_x86_64-msvc-x64": "win64",
+    "WINNT_aarch64-msvc-aarch64": "win64-aarch64",
+}
+
+
+def get_balrog_platform_name(platform):
+    """Convert build platform names into balrog platform names.
+
+    Remove known values instead to catch aarch64 and other platforms
+    that may be added.
+    """
+    removals = ["-devedition", "-shippable"]
+    for remove in removals:
+        platform = platform.replace(remove, "")
+    return PLATFORM_RENAMES.get(platform, platform)
+
+
+def _sanitize_platform(platform):
+    platform = get_balrog_platform_name(platform)
+    if platform not in BALROG_PLATFORM_MAP:
+        return platform
+    return BALROG_PLATFORM_MAP[platform][0]
+
+
+def get_builds(release_history, platform, locale):
+    """Examine cached balrog release history and return the list of
+    builds we need to generate diffs from"""
+    platform = _sanitize_platform(platform)
+    return release_history.get(platform, {}).get(locale, {})
+
+
+def get_partials_artifacts_from_params(release_history, platform, locale):
+    platform = _sanitize_platform(platform)
+    return [
+        (artifact, details.get("previousVersion", None))
+        for artifact, details in release_history.get(platform, {})
+        .get(locale, {})
+        .items()
+    ]
+
+
+def get_partials_info_from_params(release_history, platform, locale):
+    platform = _sanitize_platform(platform)
+
+    artifact_map = {}
+    for k in release_history.get(platform, {}).get(locale, {}):
+        details = release_history[platform][locale][k]
+        attributes = ("buildid", "previousBuildNumber", "previousVersion")
+        artifact_map[k] = {
+            attr: details[attr] for attr in attributes if attr in details
+        }
+    return artifact_map
+
+
+def _retry_on_http_errors(url, verify, params, errors):
+    if params:
+        params_str = "&".join("=".join([k, str(v)]) for k, v in six.iteritems(params))
+    else:
+        params_str = ""
+    logger.info("Connecting to %s?%s", url, params_str)
+    for _ in redo.retrier(sleeptime=5, max_sleeptime=30, attempts=10):
+        try:
+            req = requests.get(url, verify=verify, params=params, timeout=10)
+            req.raise_for_status()
+            return req
+        except requests.HTTPError as e:
+            if e.response.status_code in errors:
+                logger.exception(
+                    "Got HTTP %s trying to reach %s", e.response.status_code, url
+                )
+            else:
+                raise
+    else:
+        raise
+
+
+def get_sorted_releases(product, branch):
+    """Returns a list of release names from Balrog.
+    :param product: product name, AKA appName
+    :param branch: branch name, e.g. mozilla-central
+    :return: a sorted list of release names, most recent first.
+    """
+    url = "{}/releases".format(_get_balrog_api_root(branch))
+    params = {
+        "product": product,
+        # Adding -nightly-2 (2 stands for the beginning of build ID
+        # based on date) should filter out release and latest blobs.
+        # This should be changed to -nightly-3 in 3000 ;)
+        "name_prefix": "{}-{}-nightly-2".format(product, branch),
+        "names_only": True,
+    }
+    req = _retry_on_http_errors(url=url, verify=True, params=params, errors=[500])
+    releases = req.json()["names"]
+    releases = sorted(releases, reverse=True)
+    return releases
+
+
+def get_release_builds(release, branch):
+    url = "{}/releases/{}".format(_get_balrog_api_root(branch), release)
+    req = _retry_on_http_errors(url=url, verify=True, params=None, errors=[500])
+    return req.json()
+
+
+def _get_balrog_api_root(branch):
+    # Query into the scopes scriptworker uses to make sure we check against the same balrog server
+    # That our jobs would use.
+    scope = None
+    for alias, projects in BALROG_SCOPE_ALIAS_TO_PROJECT:
+        if branch in projects and alias in BALROG_SERVER_SCOPES:
+            scope = BALROG_SERVER_SCOPES[alias]
+            break
+    else:
+        scope = BALROG_SERVER_SCOPES["default"]
+
+    if scope == "balrog:server:dep":
+        return "https://stage.balrog.nonprod.cloudops.mozgcp.net/api/v1"
+    else:
+        return "https://aus5.mozilla.org/api/v1"
+
+
+def find_localtest(fileUrls):
+    for channel in fileUrls:
+        if "-localtest" in channel:
+            return channel
+
+
+def populate_release_history(
+    product, branch, maxbuilds=4, maxsearch=10, partial_updates=None
+):
+    # Assuming we are using release branches when we know the list of previous
+    # releases in advance
+    if partial_updates:
+        return _populate_release_history(
+            product, branch, partial_updates=partial_updates
+        )
+    else:
+        return _populate_nightly_history(
+            product, branch, maxbuilds=maxbuilds, maxsearch=maxsearch
+        )
+
+
+def _populate_nightly_history(product, branch, maxbuilds=4, maxsearch=10):
+    """Find relevant releases in Balrog
+    Not all releases have all platforms and locales, due
+    to Taskcluster migration.
+
+        Args:
+            product (str): capitalized product name, AKA appName, e.g. Firefox
+            branch (str): branch name (mozilla-central)
+            maxbuilds (int): Maximum number of historical releases to populate
+            maxsearch(int): Traverse at most this many releases, to avoid
+                working through the entire history.
+        Returns:
+            json object based on data from balrog api
+
+            results = {
+                'platform1': {
+                    'locale1': {
+                        'buildid1': mar_url,
+                        'buildid2': mar_url,
+                        'buildid3': mar_url,
+                    },
+                    'locale2': {
+                        'target.partial-1.mar': {'buildid1': 'mar_url'},
+                    }
+                },
+                'platform2': {
+                }
+            }
+    """
+    last_releases = get_sorted_releases(product, branch)
+
+    partial_mar_tmpl = "target.partial-{}.mar"
+
+    builds = dict()
+    for release in last_releases[:maxsearch]:
+        # maxbuilds in all categories, don't make any more queries
+        full = len(builds) > 0 and all(
+            len(builds[platform][locale]) >= maxbuilds
+            for platform in builds
+            for locale in builds[platform]
+        )
+        if full:
+            break
+        history = get_release_builds(release, branch)
+
+        for platform in history["platforms"]:
+            if "alias" in history["platforms"][platform]:
+                continue
+            if platform not in builds:
+                builds[platform] = dict()
+            for locale in history["platforms"][platform]["locales"]:
+                if locale not in builds[platform]:
+                    builds[platform][locale] = dict()
+                if len(builds[platform][locale]) >= maxbuilds:
+                    continue
+                buildid = history["platforms"][platform]["locales"][locale]["buildID"]
+                url = history["platforms"][platform]["locales"][locale]["completes"][0][
+                    "fileUrl"
+                ]
+                nextkey = len(builds[platform][locale]) + 1
+                builds[platform][locale][partial_mar_tmpl.format(nextkey)] = {
+                    "buildid": buildid,
+                    "mar_url": url,
+                }
+    return builds
+
+
+def _populate_release_history(product, branch, partial_updates):
+    builds = dict()
+    for version, release in six.iteritems(partial_updates):
+        prev_release_blob = "{product}-{version}-build{build_number}".format(
+            product=product, version=version, build_number=release["buildNumber"]
+        )
+        partial_mar_key = "target-{version}.partial.mar".format(version=version)
+        history = get_release_builds(prev_release_blob, branch)
+        # use one of the localtest channels to avoid relying on bouncer
+        localtest = find_localtest(history["fileUrls"])
+        url_pattern = history["fileUrls"][localtest]["completes"]["*"]
+
+        for platform in history["platforms"]:
+            if "alias" in history["platforms"][platform]:
+                continue
+            if platform not in builds:
+                builds[platform] = dict()
+            for locale in history["platforms"][platform]["locales"]:
+                if locale not in builds[platform]:
+                    builds[platform][locale] = dict()
+                buildid = history["platforms"][platform]["locales"][locale]["buildID"]
+                url = url_pattern.replace(
+                    "%OS_FTP%", FTP_PLATFORM_MAP[platform]
+                ).replace("%LOCALE%", locale)
+                builds[platform][locale][partial_mar_key] = {
+                    "buildid": buildid,
+                    "mar_url": url,
+                    "previousVersion": version,
+                    "previousBuildNumber": release["buildNumber"],
+                    "product": product,
+                }
+    return builds
diff --git a/taskcluster/taskgraph/util/partners.py b/taskcluster/taskgraph/util/partners.py
new file mode 100644
index 0000000000..1d820bf55e
--- /dev/null
+++ b/taskcluster/taskgraph/util/partners.py
@@ -0,0 +1,557 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from copy import deepcopy
+import json
+import logging
+import os
+from redo import retry
+import requests
+import xml.etree.ElementTree as ET
+
+from taskgraph.util.attributes import release_level
+from taskgraph.util.schema import resolve_keyed_by
+import six
+import yaml
+
+# Suppress chatty requests logging
+logging.getLogger("requests").setLevel(logging.WARNING)
+
+log = logging.getLogger(__name__)
+
+GITHUB_API_ENDPOINT = "https://api.github.com/graphql"
+
+"""
+LOGIN_QUERY, MANIFEST_QUERY, and REPACK_CFG_QUERY are all written to the Github v4 API,
+which users GraphQL. See https://developer.github.com/v4/
+"""
+
+LOGIN_QUERY = """query {
+  viewer {
+    login
+    name
+  }
+}
+"""
+
+# Returns the contents of default.xml from a manifest repository
+MANIFEST_QUERY = """query {
+  repository(owner:"%(owner)s", name:"%(repo)s") {
+    object(expression: "master:%(file)s") {
+      ... on Blob {
+        text
+      }
+    }
+  }
+}
+"""
+# Example response:
+# {
+#   "data": {
+#     "repository": {
+#       "object": {
+#         "text": "<?xml version=\"1.0\" ?>\n<manifest>\n  " +
+#           "<remote fetch=\"git@github.com:mozilla-partners/\" name=\"mozilla-partners\"/>\n  " +
+#           "<remote fetch=\"git@github.com:mozilla/\" name=\"mozilla\"/>\n\n  " +
+#           "<project name=\"repack-scripts\" path=\"scripts\" remote=\"mozilla-partners\" " +
+#           "revision=\"master\"/>\n  <project name=\"build-tools\" path=\"scripts/tools\" " +
+#           "remote=\"mozilla\" revision=\"master\"/>\n  <project name=\"mozilla-EME-free\" " +
+#           "path=\"partners/mozilla-EME-free\" remote=\"mozilla-partners\" " +
+#           "revision=\"master\"/>\n</manifest>\n"
+#       }
+#     }
+#   }
+# }
+
+# Returns the contents of desktop/*/repack.cfg for a partner repository
+REPACK_CFG_QUERY = """query{
+  repository(owner:"%(owner)s", name:"%(repo)s") {
+    object(expression: "%(revision)s:desktop/"){
+      ... on Tree {
+        entries {
+          name
+          object {
+            ... on Tree {
+              entries {
+                name
+                object {
+                  ... on Blob {
+                    text
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+"""
+# Example response:
+# {
+#   "data": {
+#     "repository": {
+#       "object": {
+#         "entries": [
+#           {
+#             "name": "mozilla-EME-free",
+#             "object": {
+#               "entries": [
+#                 {
+#                   "name": "distribution",
+#                   "object": {}
+#                 },
+#                 {
+#                   "name": "repack.cfg",
+#                   "object": {
+#                     "text": "aus=\"mozilla-EMEfree\"\ndist_id=\"mozilla-EMEfree\"\n" +
+#                             "dist_version=\"1.0\"\nlinux-i686=true\nlinux-x86_64=true\n" +
+#                             " locales=\"ach af de en-US\"\nmac=true\nwin32=true\nwin64=true\n" +
+#                             "output_dir=\"%(platform)s-EME-free/%(locale)s\"\n\n" +
+#                             "# Upload params\nbucket=\"net-mozaws-prod-delivery-firefox\"\n" +
+#                             "upload_to_candidates=true\n"
+#                   }
+#                 }
+#               ]
+#             }
+#           }
+#         ]
+#       }
+#     }
+#   }
+# }
+
+# Map platforms in repack.cfg into their equivalents in taskcluster
+TC_PLATFORM_PER_FTP = {
+    "linux-i686": "linux-shippable",
+    "linux-x86_64": "linux64-shippable",
+    "mac": "macosx64-shippable",
+    "win32": "win32-shippable",
+    "win64": "win64-shippable",
+    "win64-aarch64": "win64-aarch64-shippable",
+}
+
+TASKCLUSTER_PROXY_SECRET_ROOT = "http://taskcluster/secrets/v1/secret"
+
+LOCALES_FILE = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))),
+    "browser",
+    "locales",
+    "l10n-changesets.json",
+)
+
+# cache data at the module level
+partner_configs = {}
+
+
+def get_token(params):
+    """We use a Personal Access Token from Github to lookup partner config. No extra scopes are
+    needed on the token to read public repositories, but need the 'repo' scope to see private
+    repositories. This is not fine grained and also grants r/w access, but is revoked at the repo
+    level.
+    """
+
+    # Allow for local taskgraph debugging
+    if os.environ.get("GITHUB_API_TOKEN"):
+        return os.environ["GITHUB_API_TOKEN"]
+
+    # The 'usual' method - via taskClusterProxy for decision tasks
+    url = "{secret_root}/project/releng/gecko/build/level-{level}/partner-github-api".format(
+        secret_root=TASKCLUSTER_PROXY_SECRET_ROOT, **params
+    )
+    try:
+        resp = retry(
+            requests.get,
+            attempts=2,
+            sleeptime=10,
+            args=(url,),
+            kwargs={"timeout": 60, "headers": ""},
+        )
+        j = resp.json()
+        return j["secret"]["key"]
+    except (requests.ConnectionError, ValueError, KeyError):
+        raise RuntimeError("Could not get Github API token to lookup partner data")
+
+
+def query_api(query, token):
+    """ Make a query with a Github auth header, returning the json """
+    headers = {"Authorization": "bearer %s" % token}
+    r = requests.post(GITHUB_API_ENDPOINT, json={"query": query}, headers=headers)
+    r.raise_for_status()
+
+    j = r.json()
+    if "errors" in j:
+        raise RuntimeError("Github query error - %s", j["errors"])
+    return j
+
+
+def check_login(token):
+    log.debug("Checking we have a valid login")
+    query_api(LOGIN_QUERY, token)
+
+
+def get_repo_params(repo):
+    """ Parse the organisation and repo name from an https or git url for a repo """
+    if repo.startswith("https"):
+        # eg https://github.com/mozilla-partners/mozilla-EME-free
+        return repo.rsplit("/", 2)[-2:]
+    elif repo.startswith("git@"):
+        # eg git@github.com:mozilla-partners/mailru.git
+        repo = repo.replace(".git", "")
+        return repo.split(":")[-1].split("/")
+
+
+def get_partners(manifestRepo, token):
+    """Given the url to a manifest repository, retrieve the default.xml and parse it into a
+    list of partner repos.
+    """
+    log.debug("Querying for manifest default.xml in %s", manifestRepo)
+    owner, repo = get_repo_params(manifestRepo)
+    query = MANIFEST_QUERY % {"owner": owner, "repo": repo, "file": "default.xml"}
+    raw_manifest = query_api(query, token)
+    log.debug("Raw manifest: %s", raw_manifest)
+    if not raw_manifest["data"]["repository"]:
+        raise RuntimeError(
+            "Couldn't load partner manifest at %s, insufficient permissions ?"
+            % manifestRepo
+        )
+    e = ET.fromstring(raw_manifest["data"]["repository"]["object"]["text"])
+
+    remotes = {}
+    partners = {}
+    for child in e:
+        if child.tag == "remote":
+            name = child.attrib["name"]
+            url = child.attrib["fetch"]
+            remotes[name] = url
+            log.debug("Added remote %s at %s", name, url)
+        elif child.tag == "project":
+            # we don't need to check any code repos
+            if "scripts" in child.attrib["path"]:
+                continue
+            owner, _ = get_repo_params(remotes[child.attrib["remote"]] + "_")
+            partner_url = {
+                "owner": owner,
+                "repo": child.attrib["name"],
+                "revision": child.attrib["revision"],
+            }
+            partners[child.attrib["name"]] = partner_url
+            log.debug(
+                "Added partner %s at revision %s"
+                % (partner_url["repo"], partner_url["revision"])
+            )
+    return partners
+
+
+def parse_config(data):
+    """Parse a single repack.cfg file into a python dictionary.
+    data is contents of the file, in "foo=bar\nbaz=buzz" style. We do some translation on
+    locales and platforms data, otherwise passthrough
+    """
+    ALLOWED_KEYS = (
+        "locales",
+        "platforms",
+        "upload_to_candidates",
+        "repack_stub_installer",
+        "publish_to_releases",
+    )
+    config = {"platforms": []}
+    for l in data.splitlines():
+        if "=" in l:
+            l = str(l)
+            key, value = l.split("=", 1)
+            value = value.strip("'\"").rstrip("'\"")
+            if key in TC_PLATFORM_PER_FTP.keys():
+                if value.lower() == "true":
+                    config["platforms"].append(TC_PLATFORM_PER_FTP[key])
+                continue
+            if key not in ALLOWED_KEYS:
+                continue
+            if key == "locales":
+                # a list please
+                value = value.split(" ")
+            config[key] = value
+    return config
+
+
+def get_repack_configs(repackRepo, token):
+    """ For a partner repository, retrieve all the repack.cfg files and parse them into a dict """
+    log.debug("Querying for configs in %s", repackRepo)
+    query = REPACK_CFG_QUERY % repackRepo
+    raw_configs = query_api(query, token)
+    raw_configs = raw_configs["data"]["repository"]["object"]["entries"]
+
+    configs = {}
+    for sub_config in raw_configs:
+        name = sub_config["name"]
+        for file in sub_config["object"].get("entries", []):
+            if file["name"] != "repack.cfg":
+                continue
+            configs[name] = parse_config(file["object"]["text"])
+    return configs
+
+
+def get_attribution_config(manifestRepo, token):
+    log.debug("Querying for manifest attribution_config.yml in %s", manifestRepo)
+    owner, repo = get_repo_params(manifestRepo)
+    query = MANIFEST_QUERY % {
+        "owner": owner,
+        "repo": repo,
+        "file": "attribution_config.yml",
+    }
+    raw_manifest = query_api(query, token)
+    if not raw_manifest["data"]["repository"]:
+        raise RuntimeError(
+            "Couldn't load partner manifest at %s, insufficient permissions ?"
+            % manifestRepo
+        )
+    # no file has been set up, gracefully continue
+    if raw_manifest["data"]["repository"]["object"] is None:
+        log.debug("No attribution_config.yml file found")
+        return {}
+
+    return yaml.safe_load(raw_manifest["data"]["repository"]["object"]["text"])
+
+
+def get_partner_config_by_url(manifest_url, kind, token, partner_subset=None):
+    """Retrieve partner data starting from the manifest url, which points to a repository
+    containing a default.xml that is intended to be drive the Google tool 'repo'. It
+    descends into each partner repo to lookup and parse the repack.cfg file(s).
+
+    If partner_subset is a list of sub_config names only return data for those.
+
+    Supports caching data by kind to avoid repeated requests, relying on the related kinds for
+    partner repacking, signing, repackage, repackage signing all having the same kind prefix.
+    """
+    if not manifest_url:
+        raise RuntimeError("Manifest url for {} not defined".format(kind))
+    if kind not in partner_configs:
+        log.info("Looking up data for %s from %s", kind, manifest_url)
+        check_login(token)
+        if kind == "release-partner-attribution":
+            partner_configs[kind] = get_attribution_config(manifest_url, token)
+        else:
+            partners = get_partners(manifest_url, token)
+
+            partner_configs[kind] = {}
+            for partner, partner_url in partners.items():
+                if partner_subset and partner not in partner_subset:
+                    continue
+                partner_configs[kind][partner] = get_repack_configs(partner_url, token)
+
+    return partner_configs[kind]
+
+
+def check_if_partners_enabled(config, tasks):
+    if (
+        (
+            config.params["release_enable_partner_repack"]
+            and config.kind.startswith("release-partner-repack")
+        )
+        or (
+            config.params["release_enable_partner_attribution"]
+            and config.kind.startswith("release-partner-attribution")
+        )
+        or (
+            config.params["release_enable_emefree"]
+            and config.kind.startswith("release-eme-free-")
+        )
+    ):
+        for task in tasks:
+            yield task
+
+
+def get_partner_config_by_kind(config, kind):
+    """Retrieve partner data starting from the manifest url, which points to a repository
+    containing a default.xml that is intended to be drive the Google tool 'repo'. It
+    descends into each partner repo to lookup and parse the repack.cfg file(s).
+
+    Supports caching data by kind to avoid repeated requests, relying on the related kinds for
+    partner repacking, signing, repackage, repackage signing all having the same kind prefix.
+    """
+    partner_subset = config.params["release_partners"]
+    partner_configs = config.params["release_partner_config"] or {}
+
+    # TODO eme-free should be a partner; we shouldn't care about per-kind
+    for k in partner_configs:
+        if kind.startswith(k):
+            kind_config = partner_configs[k]
+            break
+    else:
+        return {}
+    # if we're only interested in a subset of partners we remove the rest
+    if partner_subset:
+        if kind.startswith("release-partner-repack"):
+            # TODO - should be fatal to have an unknown partner in partner_subset
+            for partner in [p for p in kind_config.keys() if p not in partner_subset]:
+                del kind_config[partner]
+        elif kind.startswith("release-partner-attribution") and isinstance(
+            kind_config, dict
+        ):
+            all_configs = deepcopy(kind_config.get("configs", []))
+            kind_config["configs"] = []
+            for this_config in all_configs:
+                if this_config["campaign"] in partner_subset:
+                    kind_config["configs"].append(this_config)
+    return kind_config
+
+
+def _fix_subpartner_locales(orig_config, all_locales):
+    subpartner_config = deepcopy(orig_config)
+    # Get an ordered list of subpartner locales that is a subset of all_locales
+    subpartner_config["locales"] = sorted(
+        list(set(orig_config["locales"]) & set(all_locales))
+    )
+    return subpartner_config
+
+
+def fix_partner_config(orig_config):
+    pc = {}
+    with open(LOCALES_FILE, "r") as fh:
+        all_locales = list(json.load(fh).keys())
+    # l10n-changesets.json doesn't include en-US, but the repack list does
+    if "en-US" not in all_locales:
+        all_locales.append("en-US")
+    for kind, kind_config in six.iteritems(orig_config):
+        if kind == "release-partner-attribution":
+            pc[kind] = {}
+            if kind_config:
+                pc[kind] = {"defaults": kind_config["defaults"]}
+                for config in kind_config["configs"]:
+                    # Make sure our locale list is a subset of all_locales
+                    pc[kind].setdefault("configs", []).append(
+                        _fix_subpartner_locales(config, all_locales)
+                    )
+        else:
+            for partner, partner_config in six.iteritems(kind_config):
+                for subpartner, subpartner_config in six.iteritems(partner_config):
+                    # get rid of empty subpartner configs
+                    if not subpartner_config:
+                        continue
+                    # Make sure our locale list is a subset of all_locales
+                    pc.setdefault(kind, {}).setdefault(partner, {})[
+                        subpartner
+                    ] = _fix_subpartner_locales(subpartner_config, all_locales)
+    return pc
+
+
+# seems likely this exists elsewhere already
+def get_ftp_platform(platform):
+    if platform.startswith("win32"):
+        return "win32"
+    elif platform.startswith("win64-aarch64"):
+        return "win64-aarch64"
+    elif platform.startswith("win64"):
+        return "win64"
+    elif platform.startswith("macosx"):
+        return "mac"
+    elif platform.startswith("linux-"):
+        return "linux-i686"
+    elif platform.startswith("linux64"):
+        return "linux-x86_64"
+    else:
+        raise ValueError("Unimplemented platform {}".format(platform))
+
+
+# Ugh
+def locales_per_build_platform(build_platform, locales):
+    if build_platform.startswith("mac"):
+        exclude = ["ja"]
+    else:
+        exclude = ["ja-JP-mac"]
+    return [locale for locale in locales if locale not in exclude]
+
+
+def get_partner_url_config(parameters, graph_config):
+    partner_url_config = deepcopy(graph_config["partner-urls"])
+    substitutions = {
+        "release-product": parameters["release_product"],
+        "release-level": release_level(parameters["project"]),
+        "release-type": parameters["release_type"],
+    }
+    resolve_keyed_by(
+        partner_url_config,
+        "release-eme-free-repack",
+        "eme-free manifest_url",
+        **substitutions
+    )
+    resolve_keyed_by(
+        partner_url_config,
+        "release-partner-repack",
+        "partner manifest url",
+        **substitutions
+    )
+    resolve_keyed_by(
+        partner_url_config,
+        "release-partner-attribution",
+        "partner attribution url",
+        **substitutions
+    )
+    return partner_url_config
+
+
+def get_repack_ids_by_platform(config, build_platform):
+    partner_config = get_partner_config_by_kind(config, config.kind)
+    combinations = []
+    for partner, subconfigs in partner_config.items():
+        for sub_config_name, sub_config in subconfigs.items():
+            if build_platform not in sub_config.get("platforms", []):
+                continue
+            locales = locales_per_build_platform(
+                build_platform, sub_config.get("locales", [])
+            )
+            for locale in locales:
+                combinations.append("{}/{}/{}".format(partner, sub_config_name, locale))
+    return sorted(combinations)
+
+
+def get_partners_to_be_published(config):
+    # hardcoded kind because release-bouncer-aliases doesn't match otherwise
+    partner_config = get_partner_config_by_kind(config, "release-partner-repack")
+    partners = []
+    for partner, subconfigs in partner_config.items():
+        for sub_config_name, sub_config in subconfigs.items():
+            if sub_config.get("publish_to_releases"):
+                partners.append((partner, sub_config_name, sub_config["platforms"]))
+    return partners
+
+
+def apply_partner_priority(config, jobs):
+    priority = None
+    # Reduce the priority of the partner repack jobs because they don't block QE. Meanwhile
+    # leave EME-free jobs alone because they do, and they'll get the branch priority like the rest
+    # of the release. Only bother with this in production, not on staging releases on try.
+    # medium is the same as mozilla-central, see taskcluster/ci/config.yml. ie higher than
+    # integration branches because we don't want to wait a lot for the graph to be done, but
+    # for multiple releases the partner tasks always wait for non-partner.
+    if (
+        config.kind.startswith(
+            ("release-partner-repack", "release-partner-attribution")
+        )
+        and config.params.release_level() == "production"
+    ):
+        priority = "medium"
+    for job in jobs:
+        if priority:
+            job["priority"] = priority
+        yield job
+
+
+def generate_attribution_code(defaults, partner):
+    params = {
+        "medium": defaults["medium"],
+        "source": defaults["source"],
+        "campaign": partner["campaign"],
+        "content": partner["content"],
+    }
+    if partner.get("variation"):
+        params["variation"] = partner["variation"]
+    if partner.get("experiment"):
+        params["experiment"] = partner["experiment"]
+
+    code = six.moves.urllib.parse.urlencode(params)
+    return code
diff --git a/taskcluster/taskgraph/util/perfile.py b/taskcluster/taskgraph/util/perfile.py
new file mode 100644
index 0000000000..5944593e26
--- /dev/null
+++ b/taskcluster/taskgraph/util/perfile.py
@@ -0,0 +1,103 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import itertools
+import json
+import logging
+import math
+
+from mozbuild.util import memoize
+from mozpack.path import match as mozpackmatch
+from taskgraph import files_changed
+import taskgraph
+from .. import GECKO
+
+logger = logging.getLogger(__name__)
+
+
+@memoize
+def perfile_number_of_chunks(is_try, try_task_config, head_repository, head_rev, type):
+    if taskgraph.fast and not is_try:
+        # When iterating on taskgraph changes, the exact number of chunks that
+        # test-verify runs usually isn't important, so skip it when going fast.
+        return 3
+    tests_per_chunk = 10.0
+    if type.startswith("test-coverage"):
+        tests_per_chunk = 30.0
+
+    if type.startswith("test-verify-wpt") or type.startswith("test-coverage-wpt"):
+        file_patterns = [
+            "testing/web-platform/tests/**",
+            "testing/web-platform/mozilla/tests/**",
+        ]
+    elif type.startswith("test-verify-gpu") or type.startswith("test-coverage-gpu"):
+        file_patterns = [
+            "**/*webgl*/**/test_*",
+            "**/dom/canvas/**/test_*",
+            "**/gfx/tests/**/test_*",
+            "**/devtools/canvasdebugger/**/browser_*",
+            "**/reftest*/**",
+        ]
+    elif type.startswith("test-verify") or type.startswith("test-coverage"):
+        file_patterns = [
+            "**/test_*",
+            "**/browser_*",
+            "**/crashtest*/**",
+            "js/src/tests/test/**",
+            "js/src/tests/non262/**",
+            "js/src/tests/test262/**",
+        ]
+    else:
+        # Returning 0 means no tests to run, this captures non test-verify tasks
+        return 1
+
+    changed_files = set()
+    if try_task_config:
+        suite_to_paths = json.loads(try_task_config)
+        specified_files = itertools.chain.from_iterable(suite_to_paths.values())
+        changed_files.update(specified_files)
+
+    if is_try:
+        changed_files.update(files_changed.get_locally_changed_files(GECKO))
+    else:
+        changed_files.update(files_changed.get_changed_files(head_repository, head_rev))
+
+    test_count = 0
+    for pattern in file_patterns:
+        for path in changed_files:
+            # TODO: consider running tests if a manifest changes
+            if path.endswith(".list") or path.endswith(".ini"):
+                continue
+            if path.endswith("^headers^"):
+                continue
+
+            if mozpackmatch(path, pattern):
+                gpu = False
+                if type == "test-verify-e10s" or type == "test-coverage-e10s":
+                    # file_patterns for test-verify will pick up some gpu tests, lets ignore
+                    # in the case of reftest, we will not have any in the regular case
+                    gpu_dirs = [
+                        "dom/canvas",
+                        "gfx/tests",
+                        "devtools/canvasdebugger",
+                        "webgl",
+                    ]
+                    for gdir in gpu_dirs:
+                        if len(path.split(gdir)) > 1:
+                            gpu = True
+
+                if not gpu:
+                    test_count += 1
+
+    chunks = test_count / tests_per_chunk
+    chunks = int(math.ceil(chunks))
+
+    # Never return 0 chunks on try, so that per-file tests can be pushed to try with
+    # an explicit path, and also so "empty" runs can be checked on try.
+    if is_try and chunks == 0:
+        chunks = 1
+
+    return chunks
diff --git a/taskcluster/taskgraph/util/platforms.py b/taskcluster/taskgraph/util/platforms.py
new file mode 100644
index 0000000000..8f68eb8809
--- /dev/null
+++ b/taskcluster/taskgraph/util/platforms.py
@@ -0,0 +1,61 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import re
+
+from .attributes import keymatch
+
+# platform family is extracted from build platform by taking the alphabetic prefix
+# and then translating win -> windows
+_platform_re = re.compile(r"^[a-z]*")
+_renames = {"win": "windows"}
+
+
+_archive_formats = {
+    "linux": ".tar.bz2",
+    "macosx": ".tar.gz",
+    "windows": ".zip",
+}
+
+_executable_extension = {
+    "linux": "",
+    "macosx": "",
+    "windows": ".exe",
+}
+
+_architectures = {
+    r"linux\b.*": "x86",
+    r"linux64\b.*": "x86_64",
+    r"macosx64\b.*": "macos-x86_64-aarch64",
+    r"win32\b.*": "x86",
+    r"win64\b(?!-aarch64).*": "x86_64",
+    r"win64-aarch64\b.*": "aarch64",
+}
+
+
+def platform_family(build_platform):
+    """Given a build platform, return the platform family (linux, macosx, etc.)"""
+    family = _platform_re.match(build_platform).group(0)
+    return _renames.get(family, family)
+
+
+def archive_format(build_platform):
+    """Given a build platform, return the archive format used on the platform."""
+    return _archive_formats[platform_family(build_platform)]
+
+
+def executable_extension(build_platform):
+    """Given a build platform, return the executable extension used on the platform."""
+    return _executable_extension[platform_family(build_platform)]
+
+
+def architecture(build_platform):
+    matches = keymatch(_architectures, build_platform)
+    if len(matches) == 1:
+        return matches[0]
+    raise Exception(
+        "Could not determine architecture of platform `{}`.".format(build_platform)
+    )
diff --git a/taskcluster/taskgraph/util/python_path.py b/taskcluster/taskgraph/util/python_path.py
new file mode 100644
index 0000000000..c8bc11c5dd
--- /dev/null
+++ b/taskcluster/taskgraph/util/python_path.py
@@ -0,0 +1,56 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import inspect
+import os
+
+
+def find_object(path):
+    """
+    Find a Python object given a path of the form <modulepath>:<objectpath>.
+    Conceptually equivalent to
+
+        def find_object(modulepath, objectpath):
+            import <modulepath> as mod
+            return mod.<objectpath>
+    """
+    if path.count(":") != 1:
+        raise ValueError(
+            'python path {!r} does not have the form "module:object"'.format(path)
+        )
+
+    modulepath, objectpath = path.split(":")
+    obj = __import__(modulepath)
+    for a in modulepath.split(".")[1:]:
+        obj = getattr(obj, a)
+    for a in objectpath.split("."):
+        obj = getattr(obj, a)
+    return obj
+
+
+def import_sibling_modules(exceptions=None):
+    """
+    Import all Python modules that are siblings of the calling module.
+
+    Args:
+        exceptions (list): A list of file names to exclude (caller and
+            __init__.py are implicitly excluded).
+    """
+    frame = inspect.stack()[1]
+    mod = inspect.getmodule(frame[0])
+
+    name = os.path.basename(mod.__file__)
+    excs = set(["__init__.py", name])
+    if exceptions:
+        excs.update(exceptions)
+
+    modpath = mod.__name__
+    if not name.startswith("__init__.py"):
+        modpath = modpath.rsplit(".", 1)[0]
+
+    for f in os.listdir(os.path.dirname(mod.__file__)):
+        if f.endswith(".py") and f not in excs:
+            __import__(modpath + "." + f[:-3])
diff --git a/taskcluster/taskgraph/util/schema.py b/taskcluster/taskgraph/util/schema.py
new file mode 100644
index 0000000000..a095132a94
--- /dev/null
+++ b/taskcluster/taskgraph/util/schema.py
@@ -0,0 +1,228 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import re
+import pprint
+import collections
+import voluptuous
+
+from six import text_type, iteritems
+
+import taskgraph
+
+from .keyed_by import evaluate_keyed_by
+
+
+def validate_schema(schema, obj, msg_prefix):
+    """
+    Validate that object satisfies schema.  If not, generate a useful exception
+    beginning with msg_prefix.
+    """
+    if taskgraph.fast:
+        return
+    try:
+        schema(obj)
+    except voluptuous.MultipleInvalid as exc:
+        msg = [msg_prefix]
+        for error in exc.errors:
+            msg.append(str(error))
+        raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj))
+
+
+def optionally_keyed_by(*arguments):
+    """
+    Mark a schema value as optionally keyed by any of a number of fields.  The
+    schema is the last argument, and the remaining fields are taken to be the
+    field names.  For example:
+
+        'some-value': optionally_keyed_by(
+            'test-platform', 'build-platform',
+            Any('a', 'b', 'c'))
+
+    The resulting schema will allow nesting of `by-test-platform` and
+    `by-build-platform` in either order.
+    """
+    schema = arguments[-1]
+    fields = arguments[:-1]
+
+    def validator(obj):
+        if isinstance(obj, dict) and len(obj) == 1:
+            k, v = list(obj.items())[0]
+            if k.startswith("by-") and k[len("by-") :] in fields:
+                res = {}
+                for kk, vv in v.items():
+                    try:
+                        res[kk] = validator(vv)
+                    except voluptuous.Invalid as e:
+                        e.prepend([k, kk])
+                        raise
+                return res
+        return Schema(schema)(obj)
+
+    return validator
+
+
+def resolve_keyed_by(item, field, item_name, defer=None, **extra_values):
+    """
+    For values which can either accept a literal value, or be keyed by some
+    other attribute of the item, perform that lookup and replacement in-place
+    (modifying `item` directly).  The field is specified using dotted notation
+    to traverse dictionaries.
+
+    For example, given item::
+
+        job:
+            test-platform: linux128
+            chunks:
+                by-test-platform:
+                    macosx-10.11/debug: 13
+                    win.*: 6
+                    default: 12
+
+    a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])`
+    would mutate item in-place to::
+
+        job:
+            test-platform: linux128
+            chunks: 12
+
+    The `item_name` parameter is used to generate useful error messages.
+
+    If extra_values are supplied, they represent additional values available
+    for reference from by-<field>.
+
+    Items can be nested as deeply as the schema will allow::
+
+        chunks:
+            by-test-platform:
+                win.*:
+                    by-project:
+                        ash: ..
+                        cedar: ..
+                linux: 13
+                default: 12
+
+    The `defer` parameter allows evaluating a by-* entry at a later time. In the
+    example above it's possible that the project attribute hasn't been set
+    yet, in which case we'd want to stop before resolving that subkey and then
+    call this function again later. This can be accomplished by setting
+    `defer=["project"]` in this example.
+    """
+    # find the field, returning the item unchanged if anything goes wrong
+    container, subfield = item, field
+    while "." in subfield:
+        f, subfield = subfield.split(".", 1)
+        if f not in container:
+            return item
+        container = container[f]
+        if not isinstance(container, dict):
+            return item
+
+    if subfield not in container:
+        return item
+
+    container[subfield] = evaluate_keyed_by(
+        value=container[subfield],
+        item_name="`{}` in `{}`".format(field, item_name),
+        defer=defer,
+        attributes=dict(item, **extra_values),
+    )
+
+    return item
+
+
+# Schemas for YAML files should use dashed identifiers by default.  If there are
+# components of the schema for which there is a good reason to use another format,
+# they can be whitelisted here.
+WHITELISTED_SCHEMA_IDENTIFIERS = [
+    # upstream-artifacts are handed directly to scriptWorker, which expects interCaps
+    lambda path: "[{!r}]".format("upstream-artifacts") in path,
+    lambda path: (
+        "[{!r}]".format("test_name") in path
+        or "[{!r}]".format("json_location") in path
+        or "[{!r}]".format("video_location") in path
+    ),
+]
+
+
+def check_schema(schema):
+    identifier_re = re.compile("^[a-z][a-z0-9-]*$")
+
+    def whitelisted(path):
+        return any(f(path) for f in WHITELISTED_SCHEMA_IDENTIFIERS)
+
+    def iter(path, sch):
+        def check_identifier(path, k):
+            if k in (text_type, text_type, voluptuous.Extra):
+                pass
+            elif isinstance(k, voluptuous.NotIn):
+                pass
+            elif isinstance(k, text_type):
+                if not identifier_re.match(k) and not whitelisted(path):
+                    raise RuntimeError(
+                        "YAML schemas should use dashed lower-case identifiers, "
+                        "not {!r} @ {}".format(k, path)
+                    )
+            elif isinstance(k, (voluptuous.Optional, voluptuous.Required)):
+                check_identifier(path, k.schema)
+            elif isinstance(k, (voluptuous.Any, voluptuous.All)):
+                for v in k.validators:
+                    check_identifier(path, v)
+            elif not whitelisted(path):
+                raise RuntimeError(
+                    "Unexpected type in YAML schema: {} @ {}".format(
+                        type(k).__name__, path
+                    )
+                )
+
+        if isinstance(sch, collections.Mapping):
+            for k, v in iteritems(sch):
+                child = "{}[{!r}]".format(path, k)
+                check_identifier(child, k)
+                iter(child, v)
+        elif isinstance(sch, (list, tuple)):
+            for i, v in enumerate(sch):
+                iter("{}[{}]".format(path, i), v)
+        elif isinstance(sch, voluptuous.Any):
+            for v in sch.validators:
+                iter(path, v)
+
+    iter("schema", schema.schema)
+
+
+class Schema(voluptuous.Schema):
+    """
+    Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
+    in the process.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(Schema, self).__init__(*args, **kwargs)
+        if not taskgraph.fast:
+            check_schema(self)
+
+    def extend(self, *args, **kwargs):
+        schema = super(Schema, self).extend(*args, **kwargs)
+        check_schema(schema)
+        # We want twice extend schema to be checked too.
+        schema.__class__ = Schema
+        return schema
+
+    def _compile(self, schema):
+        if taskgraph.fast:
+            return
+        return super(Schema, self)._compile(schema)
+
+    def __getitem__(self, item):
+        return self.schema[item]
+
+
+# shortcut for a string where task references are allowed
+taskref_or_string = voluptuous.Any(
+    text_type,
+    {voluptuous.Required("task-reference"): text_type},
+    {voluptuous.Required("artifact-reference"): text_type},
+)
diff --git a/taskcluster/taskgraph/util/scriptworker.py b/taskcluster/taskgraph/util/scriptworker.py
new file mode 100644
index 0000000000..af94432295
--- /dev/null
+++ b/taskcluster/taskgraph/util/scriptworker.py
@@ -0,0 +1,819 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""Make scriptworker.cot.verify more user friendly by making scopes dynamic.
+
+Scriptworker uses certain scopes to determine which sets of credentials to use.
+Certain scopes are restricted by branch in chain of trust verification, and are
+checked again at the script level.  This file provides functions to adjust
+these scopes automatically by project; this makes pushing to try, forking a
+project branch, and merge day uplifts more user friendly.
+
+In the future, we may adjust scopes by other settings as well, e.g. different
+scopes for `push-to-candidates` rather than `push-to-releases`, even if both
+happen on mozilla-beta and mozilla-release.
+
+Additional configuration is found in the :ref:`graph config <taskgraph-graph-config>`.
+"""
+from __future__ import absolute_import, print_function, unicode_literals
+import functools
+import json
+import os
+import itertools
+from copy import deepcopy
+from datetime import datetime
+
+import jsone
+
+from mozbuild.util import memoize
+
+from .schema import resolve_keyed_by
+from .taskcluster import get_artifact_prefix
+from .yaml import load_yaml
+
+# constants {{{1
+"""Map signing scope aliases to sets of projects.
+
+Currently m-c and DevEdition on m-b use nightly signing; Beta on m-b and m-r
+use release signing. These data structures aren't set-up to handle different
+scopes on the same repo, so we use a different set of them for DevEdition, and
+callers are responsible for using the correct one (by calling the appropriate
+helper below). More context on this in https://bugzilla.mozilla.org/show_bug.cgi?id=1358601.
+
+We will need to add esr support at some point. Eventually we want to add
+nuance so certain m-b and m-r tasks use dep or nightly signing, and we only
+release sign when we have a signed-off set of candidate builds.  This current
+approach works for now, though.
+
+This is a list of list-pairs, for ordering.
+"""
+SIGNING_SCOPE_ALIAS_TO_PROJECT = [
+    [
+        "all-nightly-branches",
+        set(
+            [
+                "mozilla-central",
+                "comm-central",
+                "oak",
+            ]
+        ),
+    ],
+    [
+        "all-release-branches",
+        set(
+            [
+                "mozilla-beta",
+                "mozilla-release",
+                "mozilla-esr78",
+                "comm-beta",
+                "comm-esr78",
+            ]
+        ),
+    ],
+]
+
+"""Map the signing scope aliases to the actual scopes.
+"""
+SIGNING_CERT_SCOPES = {
+    "all-release-branches": "signing:cert:release-signing",
+    "all-nightly-branches": "signing:cert:nightly-signing",
+    "default": "signing:cert:dep-signing",
+}
+
+DEVEDITION_SIGNING_SCOPE_ALIAS_TO_PROJECT = [
+    [
+        "beta",
+        set(
+            [
+                "mozilla-beta",
+            ]
+        ),
+    ]
+]
+
+DEVEDITION_SIGNING_CERT_SCOPES = {
+    "beta": "signing:cert:nightly-signing",
+    "default": "signing:cert:dep-signing",
+}
+
+"""Map beetmover scope aliases to sets of projects.
+"""
+BEETMOVER_SCOPE_ALIAS_TO_PROJECT = [
+    [
+        "all-nightly-branches",
+        set(
+            [
+                "mozilla-central",
+                "comm-central",
+                "oak",
+            ]
+        ),
+    ],
+    [
+        "all-release-branches",
+        set(
+            [
+                "mozilla-beta",
+                "mozilla-release",
+                "mozilla-esr78",
+                "comm-beta",
+                "comm-esr78",
+            ]
+        ),
+    ],
+]
+
+"""Map the beetmover scope aliases to the actual scopes.
+"""
+BEETMOVER_BUCKET_SCOPES = {
+    "all-release-branches": "beetmover:bucket:release",
+    "all-nightly-branches": "beetmover:bucket:nightly",
+    "default": "beetmover:bucket:dep",
+}
+
+"""Map the beetmover tasks aliases to the actual action scopes.
+"""
+BEETMOVER_ACTION_SCOPES = {
+    "nightly": "beetmover:action:push-to-nightly",
+    "nightly-oak": "beetmover:action:push-to-nightly",
+    "default": "beetmover:action:push-to-candidates",
+}
+
+
+"""Known balrog actions."""
+BALROG_ACTIONS = (
+    "submit-locale",
+    "submit-toplevel",
+    "schedule",
+    "v2-submit-locale",
+    "v2-submit-toplevel",
+)
+
+"""Map balrog scope aliases to sets of projects.
+
+This is a list of list-pairs, for ordering.
+"""
+BALROG_SCOPE_ALIAS_TO_PROJECT = [
+    [
+        "nightly",
+        set(
+            [
+                "mozilla-central",
+                "comm-central",
+                "oak",
+            ]
+        ),
+    ],
+    [
+        "beta",
+        set(
+            [
+                "mozilla-beta",
+                "comm-beta",
+            ]
+        ),
+    ],
+    [
+        "release",
+        set(
+            [
+                "mozilla-release",
+                "comm-esr78",
+            ]
+        ),
+    ],
+    [
+        "esr78",
+        set(
+            [
+                "mozilla-esr78",
+            ]
+        ),
+    ],
+]
+
+"""Map the balrog scope aliases to the actual scopes.
+"""
+BALROG_SERVER_SCOPES = {
+    "nightly": "balrog:server:nightly",
+    "aurora": "balrog:server:aurora",
+    "beta": "balrog:server:beta",
+    "release": "balrog:server:release",
+    "esr78": "balrog:server:esr",
+    "default": "balrog:server:dep",
+}
+
+
+""" The list of the release promotion phases which we send notifications for
+"""
+RELEASE_NOTIFICATION_PHASES = ("promote", "push", "ship")
+
+
+def add_scope_prefix(config, scope):
+    """
+    Prepends the scriptworker scope prefix from the :ref:`graph config
+    <taskgraph-graph-config>`.
+
+    Args:
+        config (TransformConfig): The configuration for the kind being transformed.
+        scope (string): The suffix of the scope
+
+    Returns:
+        string: the scope to use.
+    """
+    return "{prefix}:{scope}".format(
+        prefix=config.graph_config["scriptworker"]["scope-prefix"],
+        scope=scope,
+    )
+
+
+def with_scope_prefix(f):
+    """
+    Wraps a function, calling :py:func:`add_scope_prefix` on the result of
+    calling the wrapped function.
+
+    Args:
+        f (callable): A function that takes a ``config`` and some keyword
+            arguments, and returns a scope suffix.
+
+    Returns:
+        callable: the wrapped function
+    """
+
+    @functools.wraps(f)
+    def wrapper(config, **kwargs):
+        scope_or_scopes = f(config, **kwargs)
+        if isinstance(scope_or_scopes, list):
+            return map(functools.partial(add_scope_prefix, config), scope_or_scopes)
+        else:
+            return add_scope_prefix(config, scope_or_scopes)
+
+    return wrapper
+
+
+# scope functions {{{1
+@with_scope_prefix
+def get_scope_from_project(config, alias_to_project_map, alias_to_scope_map):
+    """Determine the restricted scope from `config.params['project']`.
+
+    Args:
+        config (TransformConfig): The configuration for the kind being transformed.
+        alias_to_project_map (list of lists): each list pair contains the
+            alias and the set of projects that match.  This is ordered.
+        alias_to_scope_map (dict): the alias alias to scope
+
+    Returns:
+        string: the scope to use.
+    """
+    for alias, projects in alias_to_project_map:
+        if config.params["project"] in projects and alias in alias_to_scope_map:
+            return alias_to_scope_map[alias]
+    return alias_to_scope_map["default"]
+
+
+@with_scope_prefix
+def get_scope_from_release_type(config, release_type_to_scope_map):
+    """Determine the restricted scope from `config.params['target_tasks_method']`.
+
+    Args:
+        config (TransformConfig): The configuration for the kind being transformed.
+        release_type_to_scope_map (dict): the maps release types to scopes
+
+    Returns:
+        string: the scope to use.
+    """
+    return release_type_to_scope_map.get(
+        config.params["release_type"], release_type_to_scope_map["default"]
+    )
+
+
+def get_phase_from_target_method(config, alias_to_tasks_map, alias_to_phase_map):
+    """Determine the phase from `config.params['target_tasks_method']`.
+
+    Args:
+        config (TransformConfig): The configuration for the kind being transformed.
+        alias_to_tasks_map (list of lists): each list pair contains the
+            alias and the set of target methods that match. This is ordered.
+        alias_to_phase_map (dict): the alias to phase map
+
+    Returns:
+        string: the phase to use.
+    """
+    for alias, tasks in alias_to_tasks_map:
+        if (
+            config.params["target_tasks_method"] in tasks
+            and alias in alias_to_phase_map
+        ):
+            return alias_to_phase_map[alias]
+    return alias_to_phase_map["default"]
+
+
+@with_scope_prefix
+def get_balrog_action_scope(config, action="submit"):
+    assert action in BALROG_ACTIONS
+    return "balrog:action:{}".format(action)
+
+
+get_signing_cert_scope = functools.partial(
+    get_scope_from_project,
+    alias_to_project_map=SIGNING_SCOPE_ALIAS_TO_PROJECT,
+    alias_to_scope_map=SIGNING_CERT_SCOPES,
+)
+
+get_devedition_signing_cert_scope = functools.partial(
+    get_scope_from_project,
+    alias_to_project_map=DEVEDITION_SIGNING_SCOPE_ALIAS_TO_PROJECT,
+    alias_to_scope_map=DEVEDITION_SIGNING_CERT_SCOPES,
+)
+
+get_beetmover_bucket_scope = functools.partial(
+    get_scope_from_project,
+    alias_to_project_map=BEETMOVER_SCOPE_ALIAS_TO_PROJECT,
+    alias_to_scope_map=BEETMOVER_BUCKET_SCOPES,
+)
+
+get_beetmover_action_scope = functools.partial(
+    get_scope_from_release_type,
+    release_type_to_scope_map=BEETMOVER_ACTION_SCOPES,
+)
+
+get_balrog_server_scope = functools.partial(
+    get_scope_from_project,
+    alias_to_project_map=BALROG_SCOPE_ALIAS_TO_PROJECT,
+    alias_to_scope_map=BALROG_SERVER_SCOPES,
+)
+
+cached_load_yaml = memoize(load_yaml)
+
+
+# release_config {{{1
+def get_release_config(config):
+    """Get the build number and version for a release task.
+
+    Currently only applies to beetmover tasks.
+
+    Args:
+        config (TransformConfig): The configuration for the kind being transformed.
+
+    Returns:
+        dict: containing both `build_number` and `version`.  This can be used to
+            update `task.payload`.
+    """
+    release_config = {}
+
+    partial_updates = os.environ.get("PARTIAL_UPDATES", "")
+    if partial_updates != "" and config.kind in (
+        "release-bouncer-sub",
+        "release-bouncer-check",
+        "release-update-verify-config",
+        "release-secondary-update-verify-config",
+        "release-balrog-submit-toplevel",
+        "release-secondary-balrog-submit-toplevel",
+    ):
+        partial_updates = json.loads(partial_updates)
+        release_config["partial_versions"] = ", ".join(
+            [
+                "{}build{}".format(v, info["buildNumber"])
+                for v, info in partial_updates.items()
+            ]
+        )
+        if release_config["partial_versions"] == "{}":
+            del release_config["partial_versions"]
+
+    release_config["version"] = config.params["version"]
+    release_config["appVersion"] = config.params["app_version"]
+
+    release_config["next_version"] = config.params["next_version"]
+    release_config["build_number"] = config.params["build_number"]
+    return release_config
+
+
+def get_signing_cert_scope_per_platform(build_platform, is_shippable, config):
+    if "devedition" in build_platform:
+        return get_devedition_signing_cert_scope(config)
+    elif is_shippable:
+        return get_signing_cert_scope(config)
+    else:
+        return add_scope_prefix(config, "signing:cert:dep-signing")
+
+
+# generate_beetmover_upstream_artifacts {{{1
+def generate_beetmover_upstream_artifacts(
+    config, job, platform, locale=None, dependencies=None, **kwargs
+):
+    """Generate the upstream artifacts for beetmover, using the artifact map.
+
+    Currently only applies to beetmover tasks.
+
+    Args:
+        job (dict): The current job being generated
+        dependencies (list): A list of the job's dependency labels.
+        platform (str): The current build platform
+        locale (str): The current locale being beetmoved.
+
+    Returns:
+        list: A list of dictionaries conforming to the upstream_artifacts spec.
+    """
+    base_artifact_prefix = get_artifact_prefix(job)
+    resolve_keyed_by(
+        job,
+        "attributes.artifact_map",
+        "artifact map",
+        **{
+            "release-type": config.params["release_type"],
+            "platform": platform,
+        }
+    )
+    map_config = deepcopy(cached_load_yaml(job["attributes"]["artifact_map"]))
+    upstream_artifacts = list()
+
+    if not locale:
+        locales = map_config["default_locales"]
+    elif isinstance(locale, list):
+        locales = locale
+    else:
+        locales = [locale]
+
+    if not dependencies:
+        if job.get("dependencies"):
+            dependencies = job["dependencies"].keys()
+        elif job.get("primary-dependency"):
+            dependencies = [job["primary-dependency"].kind]
+        else:
+            raise Exception("Unsupported type of dependency. Got job: {}".format(job))
+
+    for locale, dep in itertools.product(locales, dependencies):
+        paths = list()
+
+        for filename in map_config["mapping"]:
+            if dep not in map_config["mapping"][filename]["from"]:
+                continue
+            if locale != "en-US" and not map_config["mapping"][filename]["all_locales"]:
+                continue
+            if (
+                "only_for_platforms" in map_config["mapping"][filename]
+                and platform
+                not in map_config["mapping"][filename]["only_for_platforms"]
+            ):
+                continue
+            if (
+                "not_for_platforms" in map_config["mapping"][filename]
+                and platform in map_config["mapping"][filename]["not_for_platforms"]
+            ):
+                continue
+            if "partials_only" in map_config["mapping"][filename]:
+                continue
+            # The next time we look at this file it might be a different locale.
+            file_config = deepcopy(map_config["mapping"][filename])
+            resolve_keyed_by(
+                file_config,
+                "source_path_modifier",
+                "source path modifier",
+                locale=locale,
+            )
+
+            kwargs["locale"] = locale
+
+            paths.append(
+                os.path.join(
+                    base_artifact_prefix,
+                    jsone.render(file_config["source_path_modifier"], kwargs),
+                    jsone.render(filename, kwargs),
+                )
+            )
+
+        if job.get("dependencies") and getattr(
+            job["dependencies"][dep], "release_artifacts", None
+        ):
+            paths = [
+                path
+                for path in paths
+                if path in job["dependencies"][dep].release_artifacts
+            ]
+
+        if not paths:
+            continue
+
+        upstream_artifacts.append(
+            {
+                "taskId": {"task-reference": "<{}>".format(dep)},
+                "taskType": map_config["tasktype_map"].get(dep),
+                "paths": sorted(paths),
+                "locale": locale,
+            }
+        )
+
+    upstream_artifacts.sort(key=lambda u: u["paths"])
+    return upstream_artifacts
+
+
+# generate_beetmover_artifact_map {{{1
+def generate_beetmover_artifact_map(config, job, **kwargs):
+    """Generate the beetmover artifact map.
+
+    Currently only applies to beetmover tasks.
+
+    Args:
+        config (): Current taskgraph configuration.
+        job (dict): The current job being generated
+    Common kwargs:
+        platform (str): The current build platform
+        locale (str): The current locale being beetmoved.
+
+    Returns:
+        list: A list of dictionaries containing source->destination
+            maps for beetmover.
+    """
+    platform = kwargs.get("platform", "")
+    resolve_keyed_by(
+        job,
+        "attributes.artifact_map",
+        job["label"],
+        **{
+            "release-type": config.params["release_type"],
+            "platform": platform,
+        }
+    )
+    map_config = deepcopy(cached_load_yaml(job["attributes"]["artifact_map"]))
+    base_artifact_prefix = map_config.get(
+        "base_artifact_prefix", get_artifact_prefix(job)
+    )
+
+    artifacts = list()
+
+    dependencies = job["dependencies"].keys()
+
+    if kwargs.get("locale"):
+        if isinstance(kwargs["locale"], list):
+            locales = kwargs["locale"]
+        else:
+            locales = [kwargs["locale"]]
+    else:
+        locales = map_config["default_locales"]
+
+    resolve_keyed_by(map_config, "s3_bucket_paths", job["label"], platform=platform)
+
+    for locale, dep in sorted(itertools.product(locales, dependencies)):
+        paths = dict()
+        for filename in map_config["mapping"]:
+            # Relevancy checks
+            if dep not in map_config["mapping"][filename]["from"]:
+                # We don't get this file from this dependency.
+                continue
+            if locale != "en-US" and not map_config["mapping"][filename]["all_locales"]:
+                # This locale either doesn't produce or shouldn't upload this file.
+                continue
+            if (
+                "only_for_platforms" in map_config["mapping"][filename]
+                and platform
+                not in map_config["mapping"][filename]["only_for_platforms"]
+            ):
+                # This platform either doesn't produce or shouldn't upload this file.
+                continue
+            if (
+                "not_for_platforms" in map_config["mapping"][filename]
+                and platform in map_config["mapping"][filename]["not_for_platforms"]
+            ):
+                # This platform either doesn't produce or shouldn't upload this file.
+                continue
+            if "partials_only" in map_config["mapping"][filename]:
+                continue
+
+            # deepcopy because the next time we look at this file the locale will differ.
+            file_config = deepcopy(map_config["mapping"][filename])
+
+            for field in [
+                "destinations",
+                "locale_prefix",
+                "source_path_modifier",
+                "update_balrog_manifest",
+                "pretty_name",
+                "checksums_path",
+            ]:
+                resolve_keyed_by(
+                    file_config, field, job["label"], locale=locale, platform=platform
+                )
+
+            # This format string should ideally be in the configuration file,
+            # but this would mean keeping variable names in sync between code + config.
+            destinations = [
+                "{s3_bucket_path}/{dest_path}/{locale_prefix}{filename}".format(
+                    s3_bucket_path=bucket_path,
+                    dest_path=dest_path,
+                    locale_prefix=file_config["locale_prefix"],
+                    filename=file_config.get("pretty_name", filename),
+                )
+                for dest_path, bucket_path in itertools.product(
+                    file_config["destinations"], map_config["s3_bucket_paths"]
+                )
+            ]
+            # Creating map entries
+            # Key must be artifact path, to avoid trampling duplicates, such
+            # as public/build/target.apk and public/build/en-US/target.apk
+            key = os.path.join(
+                base_artifact_prefix,
+                file_config["source_path_modifier"],
+                filename,
+            )
+
+            paths[key] = {
+                "destinations": destinations,
+            }
+            if file_config.get("checksums_path"):
+                paths[key]["checksums_path"] = file_config["checksums_path"]
+
+            # optional flag: balrog manifest
+            if file_config.get("update_balrog_manifest"):
+                paths[key]["update_balrog_manifest"] = True
+                if file_config.get("balrog_format"):
+                    paths[key]["balrog_format"] = file_config["balrog_format"]
+
+        if not paths:
+            # No files for this dependency/locale combination.
+            continue
+
+        # Render all variables for the artifact map
+        platforms = deepcopy(map_config.get("platform_names", {}))
+        if platform:
+            for key in platforms.keys():
+                resolve_keyed_by(platforms, key, job["label"], platform=platform)
+
+        upload_date = datetime.fromtimestamp(config.params["build_date"])
+
+        kwargs.update(
+            {
+                "locale": locale,
+                "version": config.params["version"],
+                "branch": config.params["project"],
+                "build_number": config.params["build_number"],
+                "year": upload_date.year,
+                "month": upload_date.strftime("%m"),  # zero-pad the month
+                "upload_date": upload_date.strftime("%Y-%m-%d-%H-%M-%S"),
+            }
+        )
+        kwargs.update(**platforms)
+        paths = jsone.render(paths, kwargs)
+        artifacts.append(
+            {
+                "taskId": {"task-reference": "<{}>".format(dep)},
+                "locale": locale,
+                "paths": paths,
+            }
+        )
+
+    return artifacts
+
+
+# generate_beetmover_partials_artifact_map {{{1
+def generate_beetmover_partials_artifact_map(config, job, partials_info, **kwargs):
+    """Generate the beetmover partials artifact map.
+
+    Currently only applies to beetmover tasks.
+
+    Args:
+        config (): Current taskgraph configuration.
+        job (dict): The current job being generated
+        partials_info (dict): Current partials and information about them in a dict
+    Common kwargs:
+        platform (str): The current build platform
+        locale (str): The current locale being beetmoved.
+
+    Returns:
+        list: A list of dictionaries containing source->destination
+            maps for beetmover.
+    """
+    platform = kwargs.get("platform", "")
+    resolve_keyed_by(
+        job,
+        "attributes.artifact_map",
+        "artifact map",
+        **{
+            "release-type": config.params["release_type"],
+            "platform": platform,
+        }
+    )
+    map_config = deepcopy(cached_load_yaml(job["attributes"]["artifact_map"]))
+    base_artifact_prefix = map_config.get(
+        "base_artifact_prefix", get_artifact_prefix(job)
+    )
+
+    artifacts = list()
+    dependencies = job["dependencies"].keys()
+
+    if kwargs.get("locale"):
+        locales = [kwargs["locale"]]
+    else:
+        locales = map_config["default_locales"]
+
+    resolve_keyed_by(
+        map_config, "s3_bucket_paths", "s3_bucket_paths", platform=platform
+    )
+
+    platforms = deepcopy(map_config.get("platform_names", {}))
+    if platform:
+        for key in platforms.keys():
+            resolve_keyed_by(platforms, key, key, platform=platform)
+    upload_date = datetime.fromtimestamp(config.params["build_date"])
+
+    for locale, dep in itertools.product(locales, dependencies):
+        paths = dict()
+        for filename in map_config["mapping"]:
+            # Relevancy checks
+            if dep not in map_config["mapping"][filename]["from"]:
+                # We don't get this file from this dependency.
+                continue
+            if locale != "en-US" and not map_config["mapping"][filename]["all_locales"]:
+                # This locale either doesn't produce or shouldn't upload this file.
+                continue
+            if "partials_only" not in map_config["mapping"][filename]:
+                continue
+            # deepcopy because the next time we look at this file the locale will differ.
+            file_config = deepcopy(map_config["mapping"][filename])
+
+            for field in [
+                "destinations",
+                "locale_prefix",
+                "source_path_modifier",
+                "update_balrog_manifest",
+                "from_buildid",
+                "pretty_name",
+                "checksums_path",
+            ]:
+                resolve_keyed_by(
+                    file_config, field, field, locale=locale, platform=platform
+                )
+
+            # This format string should ideally be in the configuration file,
+            # but this would mean keeping variable names in sync between code + config.
+            destinations = [
+                "{s3_bucket_path}/{dest_path}/{locale_prefix}{filename}".format(
+                    s3_bucket_path=bucket_path,
+                    dest_path=dest_path,
+                    locale_prefix=file_config["locale_prefix"],
+                    filename=file_config.get("pretty_name", filename),
+                )
+                for dest_path, bucket_path in itertools.product(
+                    file_config["destinations"], map_config["s3_bucket_paths"]
+                )
+            ]
+            # Creating map entries
+            # Key must be artifact path, to avoid trampling duplicates, such
+            # as public/build/target.apk and public/build/en-US/target.apk
+            key = os.path.join(
+                base_artifact_prefix,
+                file_config["source_path_modifier"],
+                filename,
+            )
+            partials_paths = {}
+            for pname, info in partials_info.items():
+                partials_paths[key] = {
+                    "destinations": destinations,
+                }
+                if file_config.get("checksums_path"):
+                    partials_paths[key]["checksums_path"] = file_config[
+                        "checksums_path"
+                    ]
+
+                # optional flag: balrog manifest
+                if file_config.get("update_balrog_manifest"):
+                    partials_paths[key]["update_balrog_manifest"] = True
+                    if file_config.get("balrog_format"):
+                        partials_paths[key]["balrog_format"] = file_config[
+                            "balrog_format"
+                        ]
+                # optional flag: from_buildid
+                if file_config.get("from_buildid"):
+                    partials_paths[key]["from_buildid"] = file_config["from_buildid"]
+
+                # render buildid
+                kwargs.update(
+                    {
+                        "partial": pname,
+                        "from_buildid": info["buildid"],
+                        "previous_version": info.get("previousVersion"),
+                        "buildid": str(config.params["moz_build_date"]),
+                        "locale": locale,
+                        "version": config.params["version"],
+                        "branch": config.params["project"],
+                        "build_number": config.params["build_number"],
+                        "year": upload_date.year,
+                        "month": upload_date.strftime("%m"),  # zero-pad the month
+                        "upload_date": upload_date.strftime("%Y-%m-%d-%H-%M-%S"),
+                    }
+                )
+                kwargs.update(**platforms)
+                paths.update(jsone.render(partials_paths, kwargs))
+
+        if not paths:
+            continue
+
+        artifacts.append(
+            {
+                "taskId": {"task-reference": "<{}>".format(dep)},
+                "locale": locale,
+                "paths": paths,
+            }
+        )
+
+    artifacts.sort(key=lambda a: sorted(a["paths"].items()))
+    return artifacts
diff --git a/taskcluster/taskgraph/util/signed_artifacts.py b/taskcluster/taskgraph/util/signed_artifacts.py
new file mode 100644
index 0000000000..0a215e152e
--- /dev/null
+++ b/taskcluster/taskgraph/util/signed_artifacts.py
@@ -0,0 +1,196 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Defines artifacts to sign before repackage.
+"""
+
+from __future__ import absolute_import, print_function, unicode_literals
+from taskgraph.util.taskcluster import get_artifact_path
+from taskgraph.util.declarative_artifacts import get_geckoview_upstream_artifacts
+
+
+LANGPACK_SIGN_PLATFORMS = {  # set
+    "linux64-shippable",
+    "linux64-devedition",
+    "macosx64-shippable",
+    "macosx64-devedition",
+}
+
+
+def is_partner_kind(kind):
+    if kind and kind.startswith(("release-partner", "release-eme-free")):
+        return True
+
+
+def is_notarization_kind(kind):
+    if kind and "notarization" in kind:
+        return True
+
+
+def generate_specifications_of_artifacts_to_sign(
+    config, job, keep_locale_template=True, kind=None, dep_kind=None
+):
+    build_platform = job["attributes"].get("build_platform")
+    use_stub = job["attributes"].get("stub-installer")
+    # Get locales to know if we want to sign ja-JP-mac langpack
+    locales = job["attributes"].get("chunk_locales", [])
+    if kind == "release-source-signing":
+        artifacts_specifications = [
+            {
+                "artifacts": [get_artifact_path(job, "source.tar.xz")],
+                "formats": ["autograph_gpg"],
+            }
+        ]
+    elif "android" in build_platform:
+        artifacts_specifications = [
+            {
+                "artifacts": get_geckoview_artifacts_to_sign(config, job),
+                "formats": ["autograph_gpg"],
+            }
+        ]
+    # XXX: Mars aren't signed here (on any platform) because internals will be
+    # signed at after this stage of the release
+    elif "macosx" in build_platform:
+        if is_notarization_kind(dep_kind):
+            # This task is notarization part 3: download signed bits,
+            # and staple notarization.
+            artifacts_specifications = [
+                {
+                    "artifacts": [
+                        get_artifact_path(job, "{locale}/target.tar.gz"),
+                        get_artifact_path(job, "{locale}/target.pkg"),
+                    ],
+                    "formats": [],
+                }
+            ]
+            langpack_formats = []
+        else:
+            # This task is either depsigning, or notarization part 1:
+            # download unsigned bits, and sign. If notarization part 1,
+            # submit for notarization and create a uuid_manifest.json
+            if is_partner_kind(kind):
+                extension = "tar.gz"
+            else:
+                extension = "dmg"
+            artifacts_specifications = [
+                {
+                    "artifacts": [
+                        get_artifact_path(job, "{{locale}}/target.{}".format(extension))
+                    ],
+                    "formats": ["macapp", "autograph_widevine", "autograph_omnija"],
+                }
+            ]
+            langpack_formats = ["autograph_langpack"]
+
+        if "ja-JP-mac" in locales and build_platform in LANGPACK_SIGN_PLATFORMS:
+            artifacts_specifications += [
+                {
+                    "artifacts": [
+                        get_artifact_path(job, "ja-JP-mac/target.langpack.xpi")
+                    ],
+                    "formats": langpack_formats,
+                }
+            ]
+    elif "win" in build_platform:
+        artifacts_specifications = [
+            {
+                "artifacts": [
+                    get_artifact_path(job, "{locale}/setup.exe"),
+                ],
+                "formats": ["autograph_authenticode"],
+            },
+            {
+                "artifacts": [
+                    get_artifact_path(job, "{locale}/target.zip"),
+                ],
+                "formats": [
+                    "autograph_authenticode",
+                    "autograph_widevine",
+                    "autograph_omnija",
+                ],
+            },
+        ]
+
+        if use_stub:
+            artifacts_specifications[0]["artifacts"] += [
+                get_artifact_path(job, "{locale}/setup-stub.exe")
+            ]
+    elif "linux" in build_platform:
+        artifacts_specifications = [
+            {
+                "artifacts": [get_artifact_path(job, "{locale}/target.tar.bz2")],
+                "formats": ["autograph_gpg", "autograph_widevine", "autograph_omnija"],
+            }
+        ]
+        if build_platform in LANGPACK_SIGN_PLATFORMS:
+            artifacts_specifications += [
+                {
+                    "artifacts": [
+                        get_artifact_path(job, "{locale}/target.langpack.xpi")
+                    ],
+                    "formats": ["autograph_langpack"],
+                }
+            ]
+    else:
+        raise Exception("Platform not implemented for signing")
+
+    if not keep_locale_template:
+        artifacts_specifications = _strip_locale_template(artifacts_specifications)
+
+    if is_partner_kind(kind):
+        artifacts_specifications = _strip_widevine_for_partners(
+            artifacts_specifications
+        )
+
+    return artifacts_specifications
+
+
+def _strip_locale_template(artifacts_without_locales):
+    for spec in artifacts_without_locales:
+        for index, artifact in enumerate(spec["artifacts"]):
+            stripped_artifact = artifact.format(locale="")
+            stripped_artifact = stripped_artifact.replace("//", "/")
+            spec["artifacts"][index] = stripped_artifact
+
+    return artifacts_without_locales
+
+
+def _strip_widevine_for_partners(artifacts_specifications):
+    """Partner repacks should not resign that's previously signed for fear of breaking partial
+    updates
+    """
+    for spec in artifacts_specifications:
+        if "autograph_widevine" in spec["formats"]:
+            spec["formats"].remove("autograph_widevine")
+        if "autograph_omnija" in spec["formats"]:
+            spec["formats"].remove("autograph_omnija")
+
+    return artifacts_specifications
+
+
+def get_signed_artifacts(input, formats, behavior=None):
+    """
+    Get the list of signed artifacts for the given input and formats.
+    """
+    artifacts = set()
+    if input.endswith(".dmg"):
+        artifacts.add(input.replace(".dmg", ".tar.gz"))
+        if behavior and behavior != "mac_sign":
+            artifacts.add(input.replace(".dmg", ".pkg"))
+    else:
+        artifacts.add(input)
+    if "autograph_gpg" in formats:
+        artifacts.add("{}.asc".format(input))
+
+    return artifacts
+
+
+def get_geckoview_artifacts_to_sign(config, job):
+    upstream_artifacts = get_geckoview_upstream_artifacts(config, job)
+    return [
+        path
+        for upstream_artifact in upstream_artifacts
+        for path in upstream_artifact["paths"]
+        if not path.endswith(".md5") and not path.endswith(".sha1")
+    ]
diff --git a/taskcluster/taskgraph/util/taskcluster.py b/taskcluster/taskgraph/util/taskcluster.py
new file mode 100644
index 0000000000..ae6315d2f7
--- /dev/null
+++ b/taskcluster/taskgraph/util/taskcluster.py
@@ -0,0 +1,373 @@
+# -*- coding: utf-8 -*-
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import os
+import datetime
+import functools
+import requests
+import six
+import logging
+import taskcluster_urls as liburls
+from mozbuild.util import memoize
+from requests.packages.urllib3.util.retry import Retry
+from taskcluster import Hooks
+from taskgraph.task import Task
+from taskgraph.util import yaml
+
+logger = logging.getLogger(__name__)
+
+# this is set to true for `mach taskgraph action-callback --test`
+testing = False
+
+# Default rootUrl to use if none is given in the environment; this should point
+# to the production Taskcluster deployment used for CI.
+PRODUCTION_TASKCLUSTER_ROOT_URL = "https://firefox-ci-tc.services.mozilla.com"
+
+# the maximum number of parallel Taskcluster API calls to make
+CONCURRENCY = 50
+
+
+@memoize
+def get_root_url(use_proxy):
+    """Get the current TASKCLUSTER_ROOT_URL.  When running in a task, this must
+    come from $TASKCLUSTER_ROOT_URL; when run on the command line, we apply a
+    defualt that points to the production deployment of Taskcluster.  If use_proxy
+    is set, this attempts to get TASKCLUSTER_PROXY_URL instead, failing if it
+    is not set."""
+    if use_proxy:
+        try:
+            return six.ensure_text(os.environ["TASKCLUSTER_PROXY_URL"])
+        except KeyError:
+            if "TASK_ID" not in os.environ:
+                raise RuntimeError(
+                    "taskcluster-proxy is not available when not executing in a task"
+                )
+            else:
+                raise RuntimeError("taskcluster-proxy is not enabled for this task")
+
+    if "TASKCLUSTER_ROOT_URL" not in os.environ:
+        if "TASK_ID" in os.environ:
+            raise RuntimeError(
+                "$TASKCLUSTER_ROOT_URL must be set when running in a task"
+            )
+        else:
+            logger.debug("Using default TASKCLUSTER_ROOT_URL (Firefox CI production)")
+            return PRODUCTION_TASKCLUSTER_ROOT_URL
+    logger.debug(
+        "Running in Taskcluster instance {}{}".format(
+            os.environ["TASKCLUSTER_ROOT_URL"],
+            " with taskcluster-proxy" if "TASKCLUSTER_PROXY_URL" in os.environ else "",
+        )
+    )
+    return six.ensure_text(os.environ["TASKCLUSTER_ROOT_URL"])
+
+
+def requests_retry_session(
+    retries,
+    backoff_factor=0.1,
+    status_forcelist=(500, 502, 504),
+    concurrency=CONCURRENCY,
+    session=None,
+):
+    session = session or requests.Session()
+    retry = Retry(
+        total=retries,
+        read=retries,
+        connect=retries,
+        backoff_factor=backoff_factor,
+        status_forcelist=status_forcelist,
+    )
+
+    # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase
+    # that limit. Connections are established as needed, so using a large value
+    # should not negatively impact performance.
+    http_adapter = requests.adapters.HTTPAdapter(
+        pool_connections=concurrency,
+        pool_maxsize=concurrency,
+        max_retries=retry,
+    )
+    session.mount("http://", http_adapter)
+    session.mount("https://", http_adapter)
+
+    return session
+
+
+@memoize
+def get_session():
+    return requests_retry_session(retries=5)
+
+
+def _do_request(url, method=None, **kwargs):
+    if method is None:
+        method = "post" if kwargs else "get"
+
+    session = get_session()
+    if method == "get":
+        kwargs["stream"] = True
+    response = getattr(session, method)(url, **kwargs)
+
+    if response.status_code >= 400:
+        # Consume content before raise_for_status, so that the connection can be
+        # reused.
+        response.content
+    response.raise_for_status()
+    return response
+
+
+def _handle_artifact(path, response):
+    if path.endswith(".json"):
+        return response.json()
+    if path.endswith(".yml"):
+        return yaml.load_stream(response.text)
+    response.raw.read = functools.partial(response.raw.read, decode_content=True)
+    return response.raw
+
+
+def get_artifact_url(task_id, path, use_proxy=False):
+    artifact_tmpl = liburls.api(
+        get_root_url(False), "queue", "v1", "task/{}/artifacts/{}"
+    )
+    data = six.ensure_text(artifact_tmpl.format(task_id, path))
+    if use_proxy:
+        # Until Bug 1405889 is deployed, we can't download directly
+        # from the taskcluster-proxy.  Work around by using the /bewit
+        # endpoint instead.
+        # The bewit URL is the body of a 303 redirect, which we don't
+        # want to follow (which fetches a potentially large resource).
+        response = _do_request(
+            os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit",
+            data=data,
+            allow_redirects=False,
+        )
+        return six.ensure_text(response.text)
+    return data
+
+
+def get_artifact(task_id, path, use_proxy=False):
+    """
+    Returns the artifact with the given path for the given task id.
+
+    If the path ends with ".json" or ".yml", the content is deserialized as,
+    respectively, json or yaml, and the corresponding python data (usually
+    dict) is returned.
+    For other types of content, a file-like object is returned.
+    """
+    response = _do_request(get_artifact_url(task_id, path, use_proxy))
+    return _handle_artifact(path, response)
+
+
+def list_artifacts(task_id, use_proxy=False):
+    response = _do_request(get_artifact_url(task_id, "", use_proxy).rstrip("/"))
+    return response.json()["artifacts"]
+
+
+def get_artifact_prefix(task):
+    prefix = None
+    if isinstance(task, dict):
+        prefix = task.get("attributes", {}).get("artifact_prefix")
+    elif isinstance(task, Task):
+        prefix = task.attributes.get("artifact_prefix")
+    else:
+        raise Exception("Can't find artifact-prefix of non-task: {}".format(task))
+    return prefix or "public/build"
+
+
+def get_artifact_path(task, path):
+    return "{}/{}".format(get_artifact_prefix(task), path)
+
+
+def get_index_url(index_path, use_proxy=False, multiple=False):
+    index_tmpl = liburls.api(get_root_url(use_proxy), "index", "v1", "task{}/{}")
+    return index_tmpl.format("s" if multiple else "", index_path)
+
+
+def find_task_id(index_path):
+    try:
+        response = _do_request(get_index_url(index_path))
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 404:
+            raise KeyError("index path {} not found".format(index_path))
+        raise
+    return response.json()["taskId"]
+
+
+def get_artifact_from_index(index_path, artifact_path, use_proxy=False):
+    full_path = index_path + "/artifacts/" + artifact_path
+    response = _do_request(get_index_url(full_path, use_proxy))
+    return _handle_artifact(full_path, response)
+
+
+def list_tasks(index_path, use_proxy=False):
+    """
+    Returns a list of task_ids where each task_id is indexed under a path
+    in the index. Results are sorted by expiration date from oldest to newest.
+    """
+    results = []
+    data = {}
+    while True:
+        response = _do_request(
+            get_index_url(index_path, use_proxy, multiple=True), json=data
+        )
+        response = response.json()
+        results += response["tasks"]
+        if response.get("continuationToken"):
+            data = {"continuationToken": response.get("continuationToken")}
+        else:
+            break
+
+    # We can sort on expires because in the general case
+    # all of these tasks should be created with the same expires time so they end up in
+    # order from earliest to latest action. If more correctness is needed, consider
+    # fetching each task and sorting on the created date.
+    results.sort(key=lambda t: parse_time(t["expires"]))
+    return [t["taskId"] for t in results]
+
+
+def insert_index(index_path, task_id, data=None, use_proxy=False):
+    index_url = get_index_url(index_path, use_proxy=use_proxy)
+
+    # Find task expiry.
+    expires = get_task_definition(task_id, use_proxy=use_proxy)["expires"]
+
+    response = _do_request(
+        index_url,
+        method="put",
+        json={
+            "taskId": task_id,
+            "rank": 0,
+            "data": data or {},
+            "expires": expires,
+        },
+    )
+    return response
+
+
+def parse_time(timestamp):
+    """Turn a "JSON timestamp" as used in TC APIs into a datetime"""
+    return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def get_task_url(task_id, use_proxy=False):
+    task_tmpl = liburls.api(get_root_url(use_proxy), "queue", "v1", "task/{}")
+    return task_tmpl.format(task_id)
+
+
+def get_task_definition(task_id, use_proxy=False):
+    response = _do_request(get_task_url(task_id, use_proxy))
+    return response.json()
+
+
+def cancel_task(task_id, use_proxy=False):
+    """Cancels a task given a task_id. In testing mode, just logs that it would
+    have cancelled."""
+    if testing:
+        logger.info("Would have cancelled {}.".format(task_id))
+    else:
+        _do_request(get_task_url(task_id, use_proxy) + "/cancel", json={})
+
+
+def status_task(task_id, use_proxy=False):
+    """Gets the status of a task given a task_id. In testing mode, just logs that it would
+    have retrieved status."""
+    if testing:
+        logger.info("Would have gotten status for {}.".format(task_id))
+    else:
+        resp = _do_request(get_task_url(task_id, use_proxy) + "/status")
+        status = resp.json().get("status", {}).get("state") or "unknown"
+        return status
+
+
+def rerun_task(task_id):
+    """Reruns a task given a task_id. In testing mode, just logs that it would
+    have reran."""
+    if testing:
+        logger.info("Would have rerun {}.".format(task_id))
+    else:
+        _do_request(get_task_url(task_id, use_proxy=True) + "/rerun", json={})
+
+
+def trigger_hook(hook_group_id, hook_id, hook_payload):
+    hooks = Hooks({"rootUrl": get_root_url(True)})
+    response = hooks.triggerHook(hook_group_id, hook_id, hook_payload)
+
+    logger.info(
+        "Task seen here: {}/tasks/{}".format(
+            get_root_url(os.environ.get("TASKCLUSTER_PROXY_URL")),
+            response["status"]["taskId"],
+        )
+    )
+
+
+def get_current_scopes():
+    """Get the current scopes.  This only makes sense in a task with the Taskcluster
+    proxy enabled, where it returns the actual scopes accorded to the task."""
+    auth_url = liburls.api(get_root_url(True), "auth", "v1", "scopes/current")
+    resp = _do_request(auth_url)
+    return resp.json().get("scopes", [])
+
+
+def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False):
+    url_tmpl = liburls.api(
+        get_root_url(use_proxy), "purge-cache", "v1", "purge-cache/{}/{}"
+    )
+    return url_tmpl.format(provisioner_id, worker_type)
+
+
+def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False):
+    """Requests a cache purge from the purge-caches service."""
+    if testing:
+        logger.info(
+            "Would have purged {}/{}/{}.".format(
+                provisioner_id, worker_type, cache_name
+            )
+        )
+    else:
+        logger.info("Purging {}/{}/{}.".format(provisioner_id, worker_type, cache_name))
+        purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy)
+        _do_request(purge_cache_url, json={"cacheName": cache_name})
+
+
+def send_email(address, subject, content, link, use_proxy=False):
+    """Sends an email using the notify service"""
+    logger.info("Sending email to {}.".format(address))
+    url = liburls.api(get_root_url(use_proxy), "notify", "v1", "email")
+    _do_request(
+        url,
+        json={
+            "address": address,
+            "subject": subject,
+            "content": content,
+            "link": link,
+        },
+    )
+
+
+def list_task_group_tasks(task_group_id):
+    """Generate the tasks in a task group"""
+    params = {}
+    while True:
+        url = liburls.api(
+            get_root_url(False),
+            "queue",
+            "v1",
+            "task-group/{}/list".format(task_group_id),
+        )
+        resp = _do_request(url, method="get", params=params).json()
+        for task in resp["tasks"]:
+            yield task
+        if resp.get("continuationToken"):
+            params = {"continuationToken": resp.get("continuationToken")}
+        else:
+            break
+
+
+def list_task_group_incomplete_task_ids(task_group_id):
+    states = ("running", "pending", "unscheduled")
+    for task in [t["status"] for t in list_task_group_tasks(task_group_id)]:
+        if task["state"] in states:
+            yield task["taskId"]
diff --git a/taskcluster/taskgraph/util/taskgraph.py b/taskcluster/taskgraph/util/taskgraph.py
new file mode 100644
index 0000000000..735bfbb15a
--- /dev/null
+++ b/taskcluster/taskgraph/util/taskgraph.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Tools for interacting with existing taskgraphs.
+"""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import six
+
+from taskgraph.util.taskcluster import (
+    find_task_id,
+    get_artifact,
+)
+
+
+def find_decision_task(parameters, graph_config):
+    """Given the parameters for this action, find the taskId of the decision
+    task"""
+    head_rev_param = "{}head_rev".format(graph_config["project-repo-param-prefix"])
+    return find_task_id(
+        "{}.v2.{}.revision.{}.taskgraph.decision".format(
+            graph_config["trust-domain"],
+            parameters["project"],
+            parameters[head_rev_param],
+        )
+    )
+
+
+def find_existing_tasks(previous_graph_ids):
+    existing_tasks = {}
+    for previous_graph_id in previous_graph_ids:
+        label_to_taskid = get_artifact(previous_graph_id, "public/label-to-taskid.json")
+        existing_tasks.update(label_to_taskid)
+    return existing_tasks
+
+
+def find_existing_tasks_from_previous_kinds(
+    full_task_graph, previous_graph_ids, rebuild_kinds
+):
+    """Given a list of previous decision/action taskIds and kinds to ignore
+    from the previous graphs, return a dictionary of labels-to-taskids to use
+    as ``existing_tasks`` in the optimization step."""
+    existing_tasks = find_existing_tasks(previous_graph_ids)
+    kind_labels = {
+        t.label
+        for t in six.itervalues(full_task_graph.tasks)
+        if t.attributes["kind"] not in rebuild_kinds
+    }
+    return {
+        label: taskid
+        for (label, taskid) in existing_tasks.items()
+        if label in kind_labels
+    }
diff --git a/taskcluster/taskgraph/util/templates.py b/taskcluster/taskgraph/util/templates.py
new file mode 100644
index 0000000000..d0cfb251c3
--- /dev/null
+++ b/taskcluster/taskgraph/util/templates.py
@@ -0,0 +1,60 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import copy
+
+
+def merge_to(source, dest):
+    """
+    Merge dict and arrays (override scalar values)
+
+    Keys from source override keys from dest, and elements from lists in source
+    are appended to lists in dest.
+
+    :param dict source: to copy from
+    :param dict dest: to copy to (modified in place)
+    """
+
+    for key, value in source.items():
+        if (
+            isinstance(value, dict)
+            and len(value) == 1
+            and list(value)[0].startswith("by-")
+        ):
+            # Do not merge by-* values as this is likely to confuse someone
+            dest[key] = value
+            continue
+
+        # Override mismatching or empty types
+        if type(value) != type(dest.get(key)):  # noqa
+            dest[key] = value
+            continue
+
+        # Merge dict
+        if isinstance(value, dict):
+            merge_to(value, dest[key])
+            continue
+
+        if isinstance(value, list):
+            dest[key] = dest[key] + value
+            continue
+
+        dest[key] = value
+
+    return dest
+
+
+def merge(*objects):
+    """
+    Merge the given objects, using the semantics described for merge_to, with
+    objects later in the list taking precedence.  From an inheritance
+    perspective, "parents" should be listed before "children".
+
+    Returns the result without modifying any arguments.
+    """
+    if len(objects) == 1:
+        return copy.deepcopy(objects[0])
+    return merge_to(objects[-1], merge(*objects[:-1]))
diff --git a/taskcluster/taskgraph/util/time.py b/taskcluster/taskgraph/util/time.py
new file mode 100644
index 0000000000..184d92f0c0
--- /dev/null
+++ b/taskcluster/taskgraph/util/time.py
@@ -0,0 +1,118 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Python port of the ms.js node module this is not a direct port some things are
+# more complicated or less precise and we lean on time delta here.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import re
+import datetime
+
+PATTERN = re.compile("((?:\d+)?\.?\d+) *([a-z]+)")
+
+
+def seconds(value):
+    return datetime.timedelta(seconds=int(value))
+
+
+def minutes(value):
+    return datetime.timedelta(minutes=int(value))
+
+
+def hours(value):
+    return datetime.timedelta(hours=int(value))
+
+
+def days(value):
+    return datetime.timedelta(days=int(value))
+
+
+def months(value):
+    # See warning in years(), below
+    return datetime.timedelta(days=int(value) * 30)
+
+
+def years(value):
+    # Warning here "years" are vague don't use this for really sensitive date
+    # computation the idea is to give you a absolute amount of time in the
+    # future which is not the same thing as "precisely on this date next year"
+    return datetime.timedelta(days=int(value) * 365)
+
+
+ALIASES = {}
+ALIASES["seconds"] = ALIASES["second"] = ALIASES["s"] = seconds
+ALIASES["minutes"] = ALIASES["minute"] = ALIASES["min"] = minutes
+ALIASES["hours"] = ALIASES["hour"] = ALIASES["h"] = hours
+ALIASES["days"] = ALIASES["day"] = ALIASES["d"] = days
+ALIASES["months"] = ALIASES["month"] = ALIASES["mo"] = months
+ALIASES["years"] = ALIASES["year"] = ALIASES["y"] = years
+
+
+class InvalidString(Exception):
+    pass
+
+
+class UnknownTimeMeasurement(Exception):
+    pass
+
+
+def value_of(input_str):
+    """
+    Convert a string to a json date in the future
+    :param str input_str: (ex: 1d, 2d, 6years, 2 seconds)
+    :returns: Unit given in seconds
+    """
+
+    matches = PATTERN.search(input_str)
+
+    if matches is None or len(matches.groups()) < 2:
+        raise InvalidString("'{}' is invalid string".format(input_str))
+
+    value, unit = matches.groups()
+
+    if unit not in ALIASES:
+        raise UnknownTimeMeasurement(
+            "{} is not a valid time measure use one of {}".format(
+                unit, sorted(ALIASES.keys())
+            )
+        )
+
+    return ALIASES[unit](value)
+
+
+def json_time_from_now(input_str, now=None, datetime_format=False):
+    """
+    :param str input_str: Input string (see value of)
+    :param datetime now: Optionally set the definition of `now`
+    :param boolean datetime_format: Set `True` to get a `datetime` output
+    :returns: JSON string representation of time in future.
+    """
+
+    if now is None:
+        now = datetime.datetime.utcnow()
+
+    time = now + value_of(input_str)
+
+    if datetime_format is True:
+        return time
+    else:
+        # Sorta a big hack but the json schema validator for date does not like the
+        # ISO dates until 'Z' (for timezone) is added...
+        # the [:23] ensures only whole seconds or milliseconds are included,
+        # not microseconds (see bug 1381801)
+        return time.isoformat()[:23] + "Z"
+
+
+def current_json_time(datetime_format=False):
+    """
+    :param boolean datetime_format: Set `True` to get a `datetime` output
+    :returns: JSON string representation of the current time.
+    """
+    if datetime_format is True:
+        return datetime.datetime.utcnow()
+    else:
+        # the [:23] ensures only whole seconds or milliseconds are included,
+        # not microseconds (see bug 1381801)
+        return datetime.datetime.utcnow().isoformat()[:23] + "Z"
diff --git a/taskcluster/taskgraph/util/treeherder.py b/taskcluster/taskgraph/util/treeherder.py
new file mode 100644
index 0000000000..9e7429ef64
--- /dev/null
+++ b/taskcluster/taskgraph/util/treeherder.py
@@ -0,0 +1,67 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+import re
+
+_JOINED_SYMBOL_RE = re.compile(r"([^(]*)\(([^)]*)\)$")
+
+
+def split_symbol(treeherder_symbol):
+    """Split a symbol expressed as grp(sym) into its two parts.  If no group is
+    given, the returned group is '?'"""
+    groupSymbol = "?"
+    symbol = treeherder_symbol
+    if "(" in symbol:
+        match = _JOINED_SYMBOL_RE.match(symbol)
+        if match:
+            groupSymbol, symbol = match.groups()
+        else:
+            raise Exception("`{}` is not a valid treeherder symbol.".format(symbol))
+    return groupSymbol, symbol
+
+
+def join_symbol(group, symbol):
+    """Perform the reverse of split_symbol, combining the given group and
+    symbol.  If the group is '?', then it is omitted."""
+    if group == "?":
+        return symbol
+    return "{}({})".format(group, symbol)
+
+
+def add_suffix(treeherder_symbol, suffix):
+    """Add a suffix to a treeherder symbol that may contain a group."""
+    group, symbol = split_symbol(treeherder_symbol)
+    symbol += str(suffix)
+    return join_symbol(group, symbol)
+
+
+def replace_group(treeherder_symbol, new_group):
+    """Add a suffix to a treeherder symbol that may contain a group."""
+    _, symbol = split_symbol(treeherder_symbol)
+    return join_symbol(new_group, symbol)
+
+
+def inherit_treeherder_from_dep(job, dep_job):
+    """Inherit treeherder defaults from dep_job"""
+    treeherder = job.get("treeherder", {})
+
+    dep_th_platform = (
+        dep_job.task.get("extra", {})
+        .get("treeherder", {})
+        .get("machine", {})
+        .get("platform", "")
+    )
+    dep_th_collection = list(
+        dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys()
+    )[0]
+    treeherder.setdefault(
+        "platform", "{}/{}".format(dep_th_platform, dep_th_collection)
+    )
+    treeherder.setdefault(
+        "tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1)
+    )
+    # Does not set symbol
+    treeherder.setdefault("kind", "build")
+    return treeherder
diff --git a/taskcluster/taskgraph/util/verify.py b/taskcluster/taskgraph/util/verify.py
new file mode 100644
index 0000000000..a1d24718c9
--- /dev/null
+++ b/taskcluster/taskgraph/util/verify.py
@@ -0,0 +1,454 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import logging
+import re
+import os
+
+import attr
+import six
+
+from .. import GECKO
+from .treeherder import join_symbol
+from taskgraph.util.attributes import match_run_on_projects, RELEASE_PROJECTS
+
+from taskgraph.util.attributes import ALL_PROJECTS, RUN_ON_PROJECT_ALIASES
+
+logger = logging.getLogger(__name__)
+doc_base_path = os.path.join(GECKO, "taskcluster", "docs")
+
+
+@attr.s(frozen=True)
+class Verification(object):
+    verify = attr.ib()
+    run_on_projects = attr.ib()
+
+
+@attr.s(frozen=True)
+class VerificationSequence(object):
+    """
+    Container for a sequence of verifications over a TaskGraph. Each
+    verification is represented as a callable taking (task, taskgraph,
+    scratch_pad), called for each task in the taskgraph, and one more
+    time with no task but with the taskgraph and the same scratch_pad
+    that was passed for each task.
+    """
+
+    _verifications = attr.ib(factory=dict)
+
+    def __call__(self, graph_name, graph, graph_config, parameters):
+        for verification in self._verifications.get(graph_name, []):
+            if not match_run_on_projects(
+                parameters["project"], verification.run_on_projects
+            ):
+                continue
+            scratch_pad = {}
+            graph.for_each_task(
+                verification.verify,
+                scratch_pad=scratch_pad,
+                graph_config=graph_config,
+                parameters=parameters,
+            )
+            verification.verify(
+                None,
+                graph,
+                scratch_pad=scratch_pad,
+                graph_config=graph_config,
+                parameters=parameters,
+            )
+        return graph_name, graph
+
+    def add(self, graph_name, run_on_projects={"all"}):
+        def wrap(func):
+            self._verifications.setdefault(graph_name, []).append(
+                Verification(func, run_on_projects)
+            )
+            return func
+
+        return wrap
+
+
+verifications = VerificationSequence()
+
+
+@attr.s(frozen=True)
+class DocPaths(object):
+    _paths = attr.ib(factory=list)
+
+    def get_files(self, filename):
+        rv = []
+        for p in self._paths:
+            doc_path = os.path.join(p, filename)
+            if os.path.exists(doc_path):
+                rv.append(doc_path)
+        return rv
+
+    def add(self, path):
+        """
+        Projects that make use of Firefox's taskgraph can extend it with
+        their own task kinds by registering additional paths for documentation.
+        documentation_paths.add() needs to be called by the project's Taskgraph
+        registration function. See taskgraph.config.
+        """
+        self._paths.append(path)
+
+
+documentation_paths = DocPaths()
+documentation_paths.add(doc_base_path)
+
+
+def verify_docs(filename, identifiers, appearing_as):
+    """
+    Look for identifiers of the type appearing_as in the files
+    returned by documentation_paths.get_files(). Firefox will have
+    a single file in a list, but projects such as Thunderbird can have
+    documentation in another location and may return multiple files.
+    """
+    # We ignore identifiers starting with '_' for the sake of tests.
+    # Strings starting with "_" are ignored for doc verification
+    # hence they can be used for faking test values
+    doc_files = documentation_paths.get_files(filename)
+    doctext = "".join([open(d).read() for d in doc_files])
+
+    if appearing_as == "inline-literal":
+        expression_list = [
+            "``" + identifier + "``"
+            for identifier in identifiers
+            if not identifier.startswith("_")
+        ]
+    elif appearing_as == "heading":
+        expression_list = [
+            "\n" + identifier + "\n(?:(?:(?:-+\n)+)|(?:(?:.+\n)+))"
+            for identifier in identifiers
+            if not identifier.startswith("_")
+        ]
+    else:
+        raise Exception("appearing_as = `{}` not defined".format(appearing_as))
+
+    for expression, identifier in zip(expression_list, identifiers):
+        match_group = re.search(expression, doctext)
+        if not match_group:
+            raise Exception(
+                "{}: `{}` missing from doc file: `{}`".format(
+                    appearing_as, identifier, filename
+                )
+            )
+
+
+@verifications.add("full_task_graph")
+def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function verifies that tuple
+    (collection.keys(), machine.platform, groupSymbol, symbol) is unique
+    for a target task graph.
+    """
+    if task is None:
+        return
+    task_dict = task.task
+    if "extra" in task_dict:
+        extra = task_dict["extra"]
+        if "treeherder" in extra:
+            treeherder = extra["treeherder"]
+
+            collection_keys = tuple(sorted(treeherder.get("collection", {}).keys()))
+            if len(collection_keys) != 1:
+                raise Exception(
+                    "Task {} can't be in multiple treeherder collections "
+                    "(the part of the platform after `/`): {}".format(
+                        task.label, collection_keys
+                    )
+                )
+            platform = treeherder.get("machine", {}).get("platform")
+            group_symbol = treeherder.get("groupSymbol")
+            symbol = treeherder.get("symbol")
+
+            key = (platform, collection_keys[0], group_symbol, symbol)
+            if key in scratch_pad:
+                raise Exception(
+                    "Duplicate treeherder platform and symbol in tasks "
+                    "`{}`and `{}`: {} {}".format(
+                        task.label,
+                        scratch_pad[key],
+                        "{}/{}".format(platform, collection_keys[0]),
+                        join_symbol(group_symbol, symbol),
+                    )
+                )
+            else:
+                scratch_pad[key] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_trust_domain_v2_routes(
+    task, taskgraph, scratch_pad, graph_config, parameters
+):
+    """
+    This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes.
+    """
+    if task is None:
+        return
+    route_prefix = "index.{}.v2".format(graph_config["trust-domain"])
+    task_dict = task.task
+    routes = task_dict.get("routes", [])
+
+    for route in routes:
+        if route.startswith(route_prefix):
+            if route in scratch_pad:
+                raise Exception(
+                    "conflict between {}:{} for route: {}".format(
+                        task.label, scratch_pad[route], route
+                    )
+                )
+            else:
+                scratch_pad[route] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_routes_notification_filters(
+    task, taskgraph, scratch_pad, graph_config, parameters
+):
+    """
+    This function ensures that only understood filters for notifications are
+    specified.
+
+    See: https://firefox-ci-tc.services.mozilla.com/docs/manual/using/task-notifications
+    """
+    if task is None:
+        return
+    route_prefix = "notify."
+    valid_filters = ("on-any", "on-completed", "on-failed", "on-exception")
+    task_dict = task.task
+    routes = task_dict.get("routes", [])
+
+    for route in routes:
+        if route.startswith(route_prefix):
+            # Get the filter of the route
+            route_filter = route.split(".")[-1]
+            if route_filter not in valid_filters:
+                raise Exception(
+                    "{} has invalid notification filter ({})".format(
+                        task.label, route_filter
+                    )
+                )
+
+
+@verifications.add("full_task_graph")
+def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, parameters):
+    tiers = scratch_pad
+    if task is not None:
+        tiers[task.label] = (
+            task.task.get("extra", {}).get("treeherder", {}).get("tier", six.MAXSIZE)
+        )
+    else:
+
+        def printable_tier(tier):
+            if tier == six.MAXSIZE:
+                return "unknown"
+            return tier
+
+        for task in six.itervalues(taskgraph.tasks):
+            tier = tiers[task.label]
+            for d in six.itervalues(task.dependencies):
+                if taskgraph[d].task.get("workerType") == "always-optimized":
+                    continue
+                if "dummy" in taskgraph[d].kind:
+                    continue
+                if tier < tiers[d]:
+                    raise Exception(
+                        "{} (tier {}) cannot depend on {} (tier {})".format(
+                            task.label,
+                            printable_tier(tier),
+                            d,
+                            printable_tier(tiers[d]),
+                        )
+                    )
+
+
+@verifications.add("full_task_graph")
+def verify_required_signoffs(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    Task with required signoffs can't be dependencies of tasks with less
+    required signoffs.
+    """
+    all_required_signoffs = scratch_pad
+    if task is not None:
+        all_required_signoffs[task.label] = set(
+            task.attributes.get("required_signoffs", [])
+        )
+    else:
+
+        def printable_signoff(signoffs):
+            if len(signoffs) == 1:
+                return "required signoff {}".format(*signoffs)
+            elif signoffs:
+                return "required signoffs {}".format(", ".join(signoffs))
+            else:
+                return "no required signoffs"
+
+        for task in six.itervalues(taskgraph.tasks):
+            required_signoffs = all_required_signoffs[task.label]
+            for d in six.itervalues(task.dependencies):
+                if required_signoffs < all_required_signoffs[d]:
+                    raise Exception(
+                        "{} ({}) cannot depend on {} ({})".format(
+                            task.label,
+                            printable_signoff(required_signoffs),
+                            d,
+                            printable_signoff(all_required_signoffs[d]),
+                        )
+                    )
+
+
+@verifications.add("full_task_graph")
+def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function verifies that toolchain aliases are not reused.
+    """
+    if task is None:
+        return
+    attributes = task.attributes
+    if "toolchain-alias" in attributes:
+        key = attributes["toolchain-alias"]
+        if key in scratch_pad:
+            raise Exception(
+                "Duplicate toolchain-alias in tasks "
+                "`{}`and `{}`: {}".format(
+                    task.label,
+                    scratch_pad[key],
+                    key,
+                )
+            )
+        else:
+            scratch_pad[key] = task.label
+
+
+@verifications.add("optimized_task_graph")
+def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    This function ensures that always-optimized tasks have been optimized.
+    """
+    if task is None:
+        return
+    if task.task.get("workerType") == "always-optimized":
+        raise Exception("Could not optimize the task {!r}".format(task.label))
+
+
+@verifications.add("full_task_graph", run_on_projects=RELEASE_PROJECTS)
+def verify_shippable_no_sccache(task, taskgraph, scratch_pad, graph_config, parameters):
+    if task and task.attributes.get("shippable"):
+        if task.task.get("payload", {}).get("env", {}).get("USE_SCCACHE"):
+            raise Exception("Shippable job {} cannot use sccache".format(task.label))
+
+
+@verifications.add("full_task_graph")
+def verify_test_packaging(task, taskgraph, scratch_pad, graph_config, parameters):
+    if task is None:
+        # In certain cases there are valid reasons for tests to be missing,
+        # don't error out when that happens.
+        missing_tests_allowed = any(
+            (
+                # user specified `--target-kind`
+                parameters.get("target-kind") is not None,
+                # manifest scheduling is enabled
+                parameters["test_manifest_loader"] != "default",
+            )
+        )
+
+        exceptions = []
+        for task in six.itervalues(taskgraph.tasks):
+            if task.kind == "build" and not task.attributes.get(
+                "skip-verify-test-packaging"
+            ):
+                build_env = task.task.get("payload", {}).get("env", {})
+                package_tests = build_env.get("MOZ_AUTOMATION_PACKAGE_TESTS")
+                shippable = task.attributes.get("shippable", False)
+                build_has_tests = scratch_pad.get(task.label)
+
+                if package_tests != "1":
+                    # Shippable builds should always package tests.
+                    if shippable:
+                        exceptions.append(
+                            "Build job {} is shippable and does not specify "
+                            "MOZ_AUTOMATION_PACKAGE_TESTS=1 in the "
+                            "environment.".format(task.label)
+                        )
+
+                    # Build tasks in the scratch pad have tests dependent on
+                    # them, so we need to package tests during build.
+                    if build_has_tests:
+                        exceptions.append(
+                            "Build job {} has tests dependent on it and does not specify "
+                            "MOZ_AUTOMATION_PACKAGE_TESTS=1 in the environment".format(
+                                task.label
+                            )
+                        )
+                else:
+                    # Build tasks that aren't in the scratch pad have no
+                    # dependent tests, so we shouldn't package tests.
+                    # With the caveat that we expect shippable jobs to always
+                    # produce tests.
+                    if not build_has_tests and not shippable:
+                        # If we have not generated all task kinds, we can't verify that
+                        # there are no dependent tests.
+                        if not missing_tests_allowed:
+                            exceptions.append(
+                                "Build job {} has no tests, but specifies "
+                                "MOZ_AUTOMATION_PACKAGE_TESTS={} in the environment. "
+                                "Unset MOZ_AUTOMATION_PACKAGE_TESTS in the task definition "
+                                "to fix.".format(task.label, package_tests)
+                            )
+        if exceptions:
+            raise Exception("\n".join(exceptions))
+        return
+    if task.kind == "test":
+        build_task = taskgraph[task.dependencies["build"]]
+        scratch_pad[build_task.label] = 1
+
+
+@verifications.add("full_task_graph")
+def verify_run_known_projects(task, taskgraph, scratch_pad, graph_config, parameters):
+    """Validates the inputs in run-on-projects.
+
+    We should never let 'try' (or 'try-comm-central') be in run-on-projects even though it
+    is valid because it is not considered for try pushes.  While here we also validate for
+    other unknown projects or typos.
+    """
+    if task and task.attributes.get("run_on_projects"):
+        projects = set(task.attributes["run_on_projects"])
+        if {"try", "try-comm-central"} & set(projects):
+            raise Exception(
+                "In task {}: using try in run-on-projects is invalid; use try "
+                "selectors to select this task on try".format(task.label)
+            )
+        # try isn't valid, but by the time we get here its not an available project anyway.
+        valid_projects = ALL_PROJECTS | set(RUN_ON_PROJECT_ALIASES.keys())
+        invalid_projects = projects - valid_projects
+        if invalid_projects:
+            raise Exception(
+                "Task '{}' has an invalid run-on-projects value: "
+                "{}".format(task.label, invalid_projects)
+            )
+
+
+@verifications.add("full_task_graph")
+def verify_local_toolchains(task, taskgraph, scratch_pad, graph_config, parameters):
+    """
+    Toolchains that are used for local development need to be built on a
+    level-3 branch to installable via `mach bootstrap`. We ensure here that all
+    such tasks run on at least trunk projects, even if they aren't pulled in as
+    a dependency of other tasks in the graph.
+
+    There is code in `mach artifact toolchain` that verifies that anything
+    installed via `mach bootstrap` has the attribute set.
+    """
+    if task and task.attributes.get("local-toolchain"):
+        run_on_projects = task.attributes.get("run_on_projects", [])
+        if not any(alias in run_on_projects for alias in ["all", "trunk"]):
+            raise Exception(
+                "Toolchain {} used for local development is not built on trunk. {}".format(
+                    task.label, run_on_projects
+                )
+            )
diff --git a/taskcluster/taskgraph/util/workertypes.py b/taskcluster/taskgraph/util/workertypes.py
new file mode 100644
index 0000000000..6a9f571a69
--- /dev/null
+++ b/taskcluster/taskgraph/util/workertypes.py
@@ -0,0 +1,95 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from mozbuild.util import memoize
+
+from .keyed_by import evaluate_keyed_by
+from .attributes import keymatch
+
+WORKER_TYPES = {
+    "gce/gecko-1-b-linux": ("docker-worker", "linux"),
+    "gce/gecko-2-b-linux": ("docker-worker", "linux"),
+    "gce/gecko-3-b-linux": ("docker-worker", "linux"),
+    "invalid/invalid": ("invalid", None),
+    "invalid/always-optimized": ("always-optimized", None),
+    "scriptworker-prov-v1/signing-linux-v1": ("scriptworker-signing", None),
+    "scriptworker-k8s/gecko-3-shipit": ("shipit", None),
+    "scriptworker-k8s/gecko-1-shipit": ("shipit", None),
+}
+
+
+@memoize
+def _get(graph_config, alias, level, release_level):
+    """Get the configuration for this worker_type alias: {provisioner,
+    worker-type, implementation, os}"""
+    level = str(level)
+
+    # handle the legacy (non-alias) format
+    if "/" in alias:
+        alias = alias.format(level=level)
+        provisioner, worker_type = alias.split("/", 1)
+        try:
+            implementation, os = WORKER_TYPES[alias]
+            return {
+                "provisioner": provisioner,
+                "worker-type": worker_type,
+                "implementation": implementation,
+                "os": os,
+            }
+        except KeyError:
+            return {
+                "provisioner": provisioner,
+                "worker-type": worker_type,
+            }
+
+    matches = keymatch(graph_config["workers"]["aliases"], alias)
+    if len(matches) > 1:
+        raise KeyError("Multiple matches for worker-type alias " + alias)
+    elif not matches:
+        raise KeyError("No matches for worker-type alias " + alias)
+    worker_config = matches[0].copy()
+
+    worker_config["provisioner"] = evaluate_keyed_by(
+        worker_config["provisioner"],
+        "worker-type alias {} field provisioner".format(alias),
+        {"level": level},
+    ).format(
+        **{
+            "trust-domain": graph_config["trust-domain"],
+            "level": level,
+            "alias": alias,
+        }
+    )
+    worker_config["worker-type"] = evaluate_keyed_by(
+        worker_config["worker-type"],
+        "worker-type alias {} field worker-type".format(alias),
+        {"level": level, "release-level": release_level},
+    ).format(
+        **{
+            "trust-domain": graph_config["trust-domain"],
+            "level": level,
+            "alias": alias,
+        }
+    )
+
+    return worker_config
+
+
+def worker_type_implementation(graph_config, worker_type):
+    """Get the worker implementation and OS for the given workerType, where the
+    OS represents the host system, not the target OS, in the case of
+    cross-compiles."""
+    worker_config = _get(graph_config, worker_type, "1", "staging")
+    return worker_config["implementation"], worker_config.get("os")
+
+
+def get_worker_type(graph_config, worker_type, level, release_level):
+    """
+    Get the worker type provisioner and worker-type, optionally evaluating
+    aliases from the graph config.
+    """
+    worker_config = _get(graph_config, worker_type, level, release_level)
+    return worker_config["provisioner"], worker_config["worker-type"]
diff --git a/taskcluster/taskgraph/util/yaml.py b/taskcluster/taskgraph/util/yaml.py
new file mode 100644
index 0000000000..9f1015c541
--- /dev/null
+++ b/taskcluster/taskgraph/util/yaml.py
@@ -0,0 +1,37 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import os
+
+from yaml.loader import SafeLoader
+
+
+class UnicodeLoader(SafeLoader):
+    def construct_yaml_str(self, node):
+        return self.construct_scalar(node)
+
+
+UnicodeLoader.add_constructor("tag:yaml.org,2002:str", UnicodeLoader.construct_yaml_str)
+
+
+def load_stream(stream):
+    """
+    Parse the first YAML document in a stream
+    and produce the corresponding Python object.
+    """
+    loader = UnicodeLoader(stream)
+    try:
+        return loader.get_single_data()
+    finally:
+        loader.dispose()
+
+
+def load_yaml(*parts):
+    """Convenience function to load a YAML file in the given path.  This is
+    useful for loading kind configuration files from the kind path."""
+    filename = os.path.join(*parts)
+    with open(filename, "rb") as f:
+        return load_stream(f)