summaryrefslogtreecommitdiffstats
path: root/third_party/python/taskcluster_taskgraph/taskgraph/util
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/taskcluster_taskgraph/taskgraph/util')
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py0
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py86
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py96
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py86
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py79
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/dependencies.py92
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py237
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py58
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py97
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py40
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py97
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/path.py167
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py52
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py22
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py260
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py40
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py373
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py54
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py80
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/time.py115
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py84
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py552
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py283
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py78
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py36
25 files changed, 3164 insertions, 0 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
new file mode 100644
index 0000000000..ee59ba4548
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import gzip
+import os
+import stat
+import tarfile
+
+# 2016-01-01T00:00:00+0000
+DEFAULT_MTIME = 1451606400
+
+
+def create_tar_from_files(fp, files):
+ """Create a tar file deterministically.
+
+ Receives a dict mapping names of files in the archive to local filesystem
+ paths or ``mozpack.files.BaseFile`` instances.
+
+ The files will be archived and written to the passed file handle opened
+ for writing.
+
+ Only regular files can be written.
+
+ FUTURE accept a filename argument (or create APIs to write files)
+ """
+ with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf:
+ for archive_path, f in sorted(files.items()):
+ if isinstance(f, str):
+ mode = os.stat(f).st_mode
+ f = open(f, "rb")
+ else:
+ mode = 0o0644
+
+ ti = tarfile.TarInfo(archive_path)
+ ti.mode = mode
+ ti.type = tarfile.REGTYPE
+
+ if not ti.isreg():
+ raise ValueError("not a regular file: %s" % f)
+
+ # Disallow setuid and setgid bits. This is an arbitrary restriction.
+ # However, since we set uid/gid to root:root, setuid and setgid
+ # would be a glaring security hole if the archive were
+ # uncompressed as root.
+ if ti.mode & (stat.S_ISUID | stat.S_ISGID):
+ raise ValueError("cannot add file with setuid or setgid set: " "%s" % f)
+
+ # Set uid, gid, username, and group as deterministic values.
+ ti.uid = 0
+ ti.gid = 0
+ ti.uname = ""
+ ti.gname = ""
+
+ # Set mtime to a constant value.
+ ti.mtime = DEFAULT_MTIME
+
+ f.seek(0, 2)
+ ti.size = f.tell()
+ f.seek(0, 0)
+ # tarfile wants to pass a size argument to read(). So just
+ # wrap/buffer in a proper file object interface.
+ tf.addfile(ti, f)
+
+
+def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
+ """Create a tar.gz file deterministically from files.
+
+ This is a glorified wrapper around ``create_tar_from_files`` that
+ adds gzip compression.
+
+ The passed file handle should be opened for writing in binary mode.
+ When the function returns, all data has been written to the handle.
+ """
+ # Offset 3-7 in the gzip header contains an mtime. Pin it to a known
+ # value so output is deterministic.
+ gf = gzip.GzipFile(
+ filename=filename or "",
+ mode="wb",
+ fileobj=fp,
+ compresslevel=compresslevel,
+ mtime=DEFAULT_MTIME,
+ )
+ with gf:
+ create_tar_from_files(gf, files)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py
new file mode 100644
index 0000000000..74d6996629
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py
@@ -0,0 +1,96 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+
+
+def attrmatch(attributes, **kwargs):
+ """Determine whether the given set of task attributes matches.
+
+ The conditions are given as keyword arguments, where each keyword names an
+ attribute. The keyword value can be a literal, a set, or a callable:
+
+ * A literal must match the attribute exactly.
+ * Given a set or list, the attribute value must be contained within it.
+ * A callable is called with the attribute value and returns a boolean.
+
+ If an attribute is specified as a keyword argument but not present in the
+ task's attributes, the result is False.
+
+ Args:
+ attributes (dict): The task's attributes object.
+ kwargs (dict): The conditions the task's attributes must satisfy in
+ order to match.
+ Returns:
+ bool: Whether the task's attributes match the conditions or not.
+ """
+ for kwkey, kwval in kwargs.items():
+ if kwkey not in attributes:
+ return False
+ attval = attributes[kwkey]
+ if isinstance(kwval, (set, list)):
+ if attval not in kwval:
+ return False
+ elif callable(kwval):
+ if not kwval(attval):
+ return False
+ elif kwval != attributes[kwkey]:
+ return False
+ return True
+
+
+def keymatch(attributes, target):
+ """Determine if any keys in attributes are a match to target, then return
+ a list of matching values. First exact matches will be checked. Failing
+ that, regex matches and finally a default key.
+ """
+ # exact match
+ if target in attributes:
+ return [attributes[target]]
+
+ # regular expression match
+ matches = [v for k, v in attributes.items() if re.match(k + "$", target)]
+ if matches:
+ return matches
+
+ # default
+ if "default" in attributes:
+ return [attributes["default"]]
+
+ return []
+
+
+def _match_run_on(key, run_on):
+ """
+ Determine whether the given parameter is included in the corresponding `run-on-attribute`.
+ """
+ if "all" in run_on:
+ return True
+ return key in run_on
+
+
+match_run_on_projects = _match_run_on
+match_run_on_tasks_for = _match_run_on
+
+
+def match_run_on_git_branches(git_branch, run_on_git_branches):
+ """
+ Determine whether the given project is included in the `run-on-git-branches` parameter.
+ Allows 'all'.
+ """
+ if "all" in run_on_git_branches:
+ return True
+
+ for expected_git_branch_pattern in run_on_git_branches:
+ if re.match(expected_git_branch_pattern, git_branch):
+ return True
+
+ return False
+
+
+def sorted_unique_list(*args):
+ """Join one or more lists, and return a sorted list of unique members"""
+ combined = set().union(*args)
+ return sorted(combined)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py
new file mode 100644
index 0000000000..974b114902
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import time
+
+TARGET_CACHE_INDEX = "{cache_prefix}.cache.level-{level}.{type}.{name}.hash.{digest}"
+EXTRA_CACHE_INDEXES = [
+ "{cache_prefix}.cache.level-{level}.{type}.{name}.latest",
+ "{cache_prefix}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}",
+]
+
+
+def add_optimization(
+ config, taskdesc, cache_type, cache_name, digest=None, digest_data=None
+):
+ """
+ Allow the results of this task to be cached. This adds index routes to the
+ task so it can be looked up for future runs, and optimization hints so that
+ cached artifacts can be found. Exactly one of `digest` and `digest_data`
+ must be passed.
+
+ :param TransformConfig config: The configuration for the kind being transformed.
+ :param dict taskdesc: The description of the current task.
+ :param str cache_type: The type of task result being cached.
+ :param str cache_name: The name of the object being cached.
+ :param digest: A unique string identifying this version of the artifacts
+ being generated. Typically this will be the hash of inputs to the task.
+ :type digest: bytes or None
+ :param digest_data: A list of bytes representing the inputs of this task.
+ They will be concatenated and hashed to create the digest for this
+ task.
+ :type digest_data: list of bytes or None
+ """
+ if (digest is None) == (digest_data is None):
+ raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+ if digest is None:
+ digest = hashlib.sha256("\n".join(digest_data).encode("utf-8")).hexdigest()
+
+ if "cached-task-prefix" in config.graph_config["taskgraph"]:
+ cache_prefix = config.graph_config["taskgraph"]["cached-task-prefix"]
+ else:
+ cache_prefix = config.graph_config["trust-domain"]
+
+ subs = {
+ "cache_prefix": cache_prefix,
+ "type": cache_type,
+ "name": cache_name,
+ "digest": digest,
+ }
+
+ # We'll try to find a cached version of the toolchain at levels above and
+ # including the current level, starting at the highest level.
+ # Chain-of-trust doesn't handle tasks not built on the tip of a
+ # pull-request, so don't look for level-1 tasks if building a pull-request.
+ index_routes = []
+ min_level = int(config.params["level"])
+ if config.params["tasks_for"] == "github-pull-request":
+ min_level = max(min_level, 3)
+ for level in reversed(range(min_level, 4)):
+ subs["level"] = level
+ index_routes.append(TARGET_CACHE_INDEX.format(**subs))
+
+ taskdesc["optimization"] = {"index-search": index_routes}
+
+ # ... and cache at the lowest level.
+ subs["level"] = config.params["level"]
+ taskdesc.setdefault("routes", []).append(
+ f"index.{TARGET_CACHE_INDEX.format(**subs)}"
+ )
+
+ # ... and add some extra routes for humans
+ subs["build_date_long"] = time.strftime(
+ "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"])
+ )
+ taskdesc["routes"].extend(
+ [f"index.{route.format(**subs)}" for route in EXTRA_CACHE_INDEXES]
+ )
+
+ taskdesc["attributes"]["cached_task"] = {
+ "type": cache_type,
+ "name": cache_name,
+ "digest": digest,
+ }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py
new file mode 100644
index 0000000000..d0e1e1079f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py
@@ -0,0 +1,79 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Utilities for generating a decision task from :file:`.taskcluster.yml`.
+"""
+
+
+import os
+
+import jsone
+import slugid
+import yaml
+
+from .templates import merge
+from .time import current_json_time
+from .vcs import find_hg_revision_push_info
+
+
+def make_decision_task(params, root, context, head_rev=None):
+ """Generate a basic decision task, based on the root .taskcluster.yml"""
+ with open(os.path.join(root, ".taskcluster.yml"), "rb") as f:
+ taskcluster_yml = yaml.safe_load(f)
+
+ if not head_rev:
+ head_rev = params["head_rev"]
+
+ if params["repository_type"] == "hg":
+ pushlog = find_hg_revision_push_info(params["repository_url"], head_rev)
+
+ hg_push_context = {
+ "pushlog_id": pushlog["pushid"],
+ "pushdate": pushlog["pushdate"],
+ "owner": pushlog["user"],
+ }
+ else:
+ hg_push_context = {}
+
+ slugids = {}
+
+ def as_slugid(name):
+ # https://github.com/taskcluster/json-e/issues/164
+ name = name[0]
+ if name not in slugids:
+ slugids[name] = slugid.nice()
+ return slugids[name]
+
+ # provide a similar JSON-e context to what mozilla-taskcluster provides:
+ # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+ # but with a different tasks_for and an extra `cron` section
+ context = merge(
+ {
+ "repository": {
+ "url": params["repository_url"],
+ "project": params["project"],
+ "level": params["level"],
+ },
+ "push": merge(
+ {
+ "revision": params["head_rev"],
+ # remainder are fake values, but the decision task expects them anyway
+ "comment": " ",
+ },
+ hg_push_context,
+ ),
+ "now": current_json_time(),
+ "as_slugid": as_slugid,
+ },
+ context,
+ )
+
+ rendered = jsone.render(taskcluster_yml, context)
+ if len(rendered["tasks"]) != 1:
+ raise Exception("Expected .taskcluster.yml to only produce one cron task")
+ task = rendered["tasks"][0]
+
+ task_id = task.pop("taskId")
+ return (task_id, task)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/dependencies.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/dependencies.py
new file mode 100644
index 0000000000..d33aa3d7f2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/dependencies.py
@@ -0,0 +1,92 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from typing import Dict, Iterator, Optional
+
+from taskgraph.task import Task
+from taskgraph.transforms.base import TransformConfig
+from taskgraph.util.schema import Schema
+
+# Define a collection of group_by functions
+GROUP_BY_MAP = {}
+
+
+def group_by(name, schema=None):
+ def wrapper(func):
+ assert (
+ name not in GROUP_BY_MAP
+ ), f"duplicate group_by function name {name} ({func} and {GROUP_BY_MAP[name]})"
+ GROUP_BY_MAP[name] = func
+ func.schema = schema
+ return func
+
+ return wrapper
+
+
+@group_by("single")
+def group_by_single(config, tasks):
+ for task in tasks:
+ yield [task]
+
+
+@group_by("all")
+def group_by_all(config, tasks):
+ return [[task for task in tasks]]
+
+
+@group_by("attribute", schema=Schema(str))
+def group_by_attribute(config, tasks, attr):
+ groups = {}
+ for task in tasks:
+ val = task.attributes.get(attr)
+ if not val:
+ continue
+ groups.setdefault(val, []).append(task)
+
+ return groups.values()
+
+
+def get_dependencies(config: TransformConfig, task: Dict) -> Iterator[Task]:
+ """Iterate over all dependencies as ``Task`` objects.
+
+ Args:
+ config (TransformConfig): The ``TransformConfig`` object associated
+ with the kind.
+ task (Dict): The task dictionary to retrieve dependencies from.
+
+ Returns:
+ Iterator[Task]: Returns a generator that iterates over the ``Task``
+ objects associated with each dependency.
+ """
+ if "dependencies" not in task:
+ return []
+
+ for label, dep in config.kind_dependencies_tasks.items():
+ if label in task["dependencies"].values():
+ yield dep
+
+
+def get_primary_dependency(config: TransformConfig, task: Dict) -> Optional[Task]:
+ """Return the ``Task`` object associated with the primary dependency.
+
+ This uses the task's ``primary-kind-dependency`` attribute to find the primary
+ dependency, or returns ``None`` if the attribute is unset.
+
+ Args:
+ config (TransformConfig): The ``TransformConfig`` object associated
+ with the kind.
+ task (Dict): The task dictionary to retrieve the primary dependency from.
+
+ Returns:
+ Optional[Task]: The ``Task`` object associated with the
+ primary dependency or ``None``.
+ """
+ try:
+ primary_kind = task["attributes"]["primary-kind-dependency"]
+ except KeyError:
+ return None
+
+ for dep in get_dependencies(config, task):
+ if dep.kind == primary_kind:
+ return dep
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
new file mode 100644
index 0000000000..c37a69f98f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
@@ -0,0 +1,237 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import io
+import os
+import re
+
+from taskgraph.util.archive import create_tar_gz_from_files
+from taskgraph.util.memoize import memoize
+
+IMAGE_DIR = os.path.join(".", "taskcluster", "docker")
+
+from .yaml import load_yaml
+
+
+def docker_image(name, by_tag=False):
+ """
+ Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``,
+ or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`.
+ """
+ try:
+ with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f:
+ registry = f.read().strip()
+ except OSError:
+ with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f:
+ registry = f.read().strip()
+
+ if not by_tag:
+ hashfile = os.path.join(IMAGE_DIR, name, "HASH")
+ try:
+ with open(hashfile) as f:
+ return f"{registry}/{name}@{f.read().strip()}"
+ except OSError:
+ raise Exception(f"Failed to read HASH file {hashfile}")
+
+ try:
+ with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f:
+ tag = f.read().strip()
+ except OSError:
+ tag = "latest"
+ return f"{registry}/{name}:{tag}"
+
+
+class VoidWriter:
+ """A file object with write capabilities that does nothing with the written
+ data."""
+
+ def write(self, buf):
+ pass
+
+
+def generate_context_hash(topsrcdir, image_path, args=None):
+ """Generates a sha256 hash for context directory used to build an image."""
+
+ return stream_context_tar(topsrcdir, image_path, VoidWriter(), args=args)
+
+
+class HashingWriter:
+ """A file object with write capabilities that hashes the written data at
+ the same time it passes down to a real file object."""
+
+ def __init__(self, writer):
+ self._hash = hashlib.sha256()
+ self._writer = writer
+
+ def write(self, buf):
+ self._hash.update(buf)
+ self._writer.write(buf)
+
+ def hexdigest(self):
+ return self._hash.hexdigest()
+
+
+def create_context_tar(topsrcdir, context_dir, out_path, args=None):
+ """Create a context tarball.
+
+ A directory ``context_dir`` containing a Dockerfile will be assembled into
+ a gzipped tar file at ``out_path``.
+
+ We also scan the source Dockerfile for special syntax that influences
+ context generation.
+
+ If a line in the Dockerfile has the form ``# %include <path>``,
+ the relative path specified on that line will be matched against
+ files in the source repository and added to the context under the
+ path ``topsrcdir/``. If an entry is a directory, we add all files
+ under that directory.
+
+ If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
+ the string ``$<name>`` in subsequent lines are replaced with the value
+ found in the ``args`` argument. Exception: this doesn't apply to VOLUME
+ definitions.
+
+ Returns the SHA-256 hex digest of the created archive.
+ """
+ with open(out_path, "wb") as fh:
+ return stream_context_tar(
+ topsrcdir,
+ context_dir,
+ fh,
+ image_name=os.path.basename(out_path),
+ args=args,
+ )
+
+
+RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task")
+RUN_TASK_FILES = {
+ f"run-task/{path}": os.path.join(RUN_TASK_ROOT, path)
+ for path in [
+ "run-task",
+ "fetch-content",
+ "hgrc",
+ "robustcheckout.py",
+ ]
+}
+RUN_TASK_SNIPPET = [
+ "COPY run-task/run-task /usr/local/bin/run-task\n",
+ "COPY run-task/fetch-content /usr/local/bin/fetch-content\n",
+ "COPY run-task/robustcheckout.py /usr/local/mercurial/robustcheckout.py\n"
+ "COPY run-task/hgrc /etc/mercurial/hgrc.d/mozilla.rc\n",
+]
+
+
+def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None):
+ """Like create_context_tar, but streams the tar file to the `out_file` file
+ object."""
+ archive_files = {}
+ replace = []
+ content = []
+
+ topsrcdir = os.path.abspath(topsrcdir)
+ context_dir = os.path.join(topsrcdir, context_dir)
+
+ for root, dirs, files in os.walk(context_dir):
+ for f in files:
+ source_path = os.path.join(root, f)
+ archive_path = source_path[len(context_dir) + 1 :]
+ archive_files[archive_path] = open(source_path, "rb")
+
+ # Parse Dockerfile for special syntax of extra files to include.
+ content = []
+ with open(os.path.join(context_dir, "Dockerfile")) as fh:
+ for line in fh:
+ if line.startswith("# %ARG"):
+ p = line[len("# %ARG ") :].strip()
+ if not args or p not in args:
+ raise Exception(f"missing argument: {p}")
+ replace.append((re.compile(rf"\${p}\b"), args[p]))
+ continue
+
+ for regexp, s in replace:
+ line = re.sub(regexp, s, line)
+
+ content.append(line)
+
+ if not line.startswith("# %include"):
+ continue
+
+ if line.strip() == "# %include-run-task":
+ content.extend(RUN_TASK_SNIPPET)
+ archive_files.update(RUN_TASK_FILES)
+ continue
+
+ p = line[len("# %include ") :].strip()
+ if os.path.isabs(p):
+ raise Exception("extra include path cannot be absolute: %s" % p)
+
+ fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+ # Check for filesystem traversal exploits.
+ if not fs_path.startswith(topsrcdir):
+ raise Exception("extra include path outside topsrcdir: %s" % p)
+
+ if not os.path.exists(fs_path):
+ raise Exception("extra include path does not exist: %s" % p)
+
+ if os.path.isdir(fs_path):
+ for root, dirs, files in os.walk(fs_path):
+ for f in files:
+ source_path = os.path.join(root, f)
+ rel = source_path[len(fs_path) + 1 :]
+ archive_path = os.path.join("topsrcdir", p, rel)
+ archive_files[archive_path] = source_path
+ else:
+ archive_path = os.path.join("topsrcdir", p)
+ archive_files[archive_path] = fs_path
+
+ archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8"))
+
+ writer = HashingWriter(out_file)
+ create_tar_gz_from_files(writer, archive_files, image_name)
+ return writer.hexdigest()
+
+
+@memoize
+def image_paths():
+ """Return a map of image name to paths containing their Dockerfile."""
+ config = load_yaml("taskcluster", "ci", "docker-image", "kind.yml")
+ return {
+ k: os.path.join(IMAGE_DIR, v.get("definition", k))
+ for k, v in config["tasks"].items()
+ }
+
+
+def image_path(name):
+ paths = image_paths()
+ if name in paths:
+ return paths[name]
+ return os.path.join(IMAGE_DIR, name)
+
+
+@memoize
+def parse_volumes(image):
+ """Parse VOLUME entries from a Dockerfile for an image."""
+ volumes = set()
+
+ path = image_path(image)
+
+ with open(os.path.join(path, "Dockerfile"), "rb") as fh:
+ for line in fh:
+ line = line.strip()
+ # We assume VOLUME definitions don't use %ARGS.
+ if not line.startswith(b"VOLUME "):
+ continue
+
+ v = line.split(None, 1)[1]
+ if v.startswith(b"["):
+ raise ValueError(
+ "cannot parse array syntax for VOLUME; "
+ "convert to multiple entries"
+ )
+
+ volumes |= {volume.decode("utf-8") for volume in v.split()}
+
+ return volumes
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
new file mode 100644
index 0000000000..5d884fc318
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
@@ -0,0 +1,58 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import hashlib
+from pathlib import Path
+
+from taskgraph.util import path as mozpath
+from taskgraph.util.memoize import memoize
+
+
+@memoize
+def hash_path(path):
+ """Hash a single file.
+
+ Returns the SHA-256 hash in hex form.
+ """
+ with open(path, "rb") as fh:
+ return hashlib.sha256(fh.read()).hexdigest()
+
+
+def hash_paths(base_path, patterns):
+ """
+ Give a list of path patterns, return a digest of the contents of all
+ the corresponding files, similarly to git tree objects or mercurial
+ manifests.
+
+ Each file is hashed. The list of all hashes and file paths is then
+ itself hashed to produce the result.
+ """
+ h = hashlib.sha256()
+
+ found = set()
+ for pattern in patterns:
+ matches = _find_matching_files(base_path, pattern)
+ if matches:
+ found.update(matches)
+ else:
+ raise Exception("%s did not match anything" % pattern)
+ for path in sorted(found):
+ h.update(
+ "{} {}\n".format(
+ hash_path(mozpath.abspath(mozpath.join(base_path, path))),
+ mozpath.normsep(path),
+ ).encode("utf-8")
+ )
+ return h.hexdigest()
+
+
+@memoize
+def _find_matching_files(base_path, pattern):
+ files = _get_all_files(base_path)
+ return [path for path in files if mozpath.match(path, pattern)]
+
+
+@memoize
+def _get_all_files(base_path):
+ return [str(path) for path in Path(base_path).rglob("*") if path.is_file()]
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py
new file mode 100644
index 0000000000..9b0c5a44fb
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from .attributes import keymatch
+
+
+def evaluate_keyed_by(
+ value, item_name, attributes, defer=None, enforce_single_match=True
+):
+ """
+ For values which can either accept a literal value, or be keyed by some
+ attributes, perform that lookup and return the result.
+
+ For example, given item::
+
+ by-test-platform:
+ macosx-10.11/debug: 13
+ win.*: 6
+ default: 12
+
+ a call to `evaluate_keyed_by(item, 'thing-name', {'test-platform': 'linux96')`
+ would return `12`.
+
+ Items can be nested as deeply as desired::
+
+ by-test-platform:
+ win.*:
+ by-project:
+ ash: ..
+ cedar: ..
+ linux: 13
+ default: 12
+
+ Args:
+ value (str): Name of the value to perform evaluation on.
+ item_name (str): Used to generate useful error messages.
+ attributes (dict): Dictionary of attributes used to lookup 'by-<key>' with.
+ defer (list):
+ Allows evaluating a by-* entry at a later time. In the example
+ above it's possible that the project attribute hasn't been set yet,
+ in which case we'd want to stop before resolving that subkey and
+ then call this function again later. This can be accomplished by
+ setting `defer=["project"]` in this example.
+ enforce_single_match (bool):
+ If True (default), each task may only match a single arm of the
+ evaluation.
+ """
+ while True:
+ if not isinstance(value, dict) or len(value) != 1:
+ return value
+ value_key = next(iter(value))
+ if not value_key.startswith("by-"):
+ return value
+
+ keyed_by = value_key[3:] # strip off 'by-' prefix
+
+ if defer and keyed_by in defer:
+ return value
+
+ key = attributes.get(keyed_by)
+ alternatives = next(iter(value.values()))
+
+ if len(alternatives) == 1 and "default" in alternatives:
+ # Error out when only 'default' is specified as only alternatives,
+ # because we don't need to by-{keyed_by} there.
+ raise Exception(
+ "Keyed-by '{}' unnecessary with only value 'default' "
+ "found, when determining item {}".format(keyed_by, item_name)
+ )
+
+ if key is None:
+ if "default" in alternatives:
+ value = alternatives["default"]
+ continue
+ else:
+ raise Exception(
+ "No attribute {} and no value for 'default' found "
+ "while determining item {}".format(keyed_by, item_name)
+ )
+
+ matches = keymatch(alternatives, key)
+ if enforce_single_match and len(matches) > 1:
+ raise Exception(
+ "Multiple matching values for {} {!r} found while "
+ "determining item {}".format(keyed_by, key, item_name)
+ )
+ elif matches:
+ value = matches[0]
+ continue
+
+ raise Exception(
+ "No {} matching {!r} nor 'default' found while determining item {}".format(
+ keyed_by, key, item_name
+ )
+ )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py
new file mode 100644
index 0000000000..56b513e74c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#923-949
+
+
+import functools
+
+
+class memoize(dict):
+ """A decorator to memoize the results of function calls depending
+ on its arguments.
+ Both functions and instance methods are handled, although in the
+ instance method case, the results are cache in the instance itself.
+ """
+
+ def __init__(self, func):
+ self.func = func
+ functools.update_wrapper(self, func)
+
+ def __call__(self, *args):
+ if args not in self:
+ self[args] = self.func(*args)
+ return self[args]
+
+ def method_call(self, instance, *args):
+ name = "_%s" % self.func.__name__
+ if not hasattr(instance, name):
+ setattr(instance, name, {})
+ cache = getattr(instance, name)
+ if args not in cache:
+ cache[args] = self.func(instance, *args)
+ return cache[args]
+
+ def __get__(self, instance, cls):
+ return functools.update_wrapper(
+ functools.partial(self.method_call, instance), self.func
+ )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py
new file mode 100644
index 0000000000..6233a98a40
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+
+from taskgraph.util.taskcluster import get_artifact_url
+from taskgraph.util.time import json_time_from_now
+
+TASK_REFERENCE_PATTERN = re.compile("<([^>]+)>")
+ARTIFACT_REFERENCE_PATTERN = re.compile("<([^/]+)/([^>]+)>")
+
+
+def _recurse(val, param_fns):
+ def recurse(val):
+ if isinstance(val, list):
+ return [recurse(v) for v in val]
+ elif isinstance(val, dict):
+ if len(val) == 1:
+ for param_key, param_fn in param_fns.items():
+ if set(val.keys()) == {param_key}:
+ return param_fn(val[param_key])
+ return {k: recurse(v) for k, v in val.items()}
+ else:
+ return val
+
+ return recurse(val)
+
+
+def resolve_timestamps(now, task_def):
+ """Resolve all instances of `{'relative-datestamp': '..'}` in the given task definition"""
+ return _recurse(
+ task_def,
+ {
+ "relative-datestamp": lambda v: json_time_from_now(v, now),
+ },
+ )
+
+
+def resolve_task_references(label, task_def, task_id, decision_task_id, dependencies):
+ """Resolve all instances of ``{'task-reference': '..<..>..'} ``
+ and ``{'artifact-reference`: '..<dependency/artifact/path>..'}``
+ in the given task definition, using the given dependencies.
+ """
+
+ def task_reference(val):
+ def repl(match):
+ key = match.group(1)
+ if key == "self":
+ return task_id
+ elif key == "decision":
+ return decision_task_id
+ try:
+ return dependencies[key]
+ except KeyError:
+ # handle escaping '<'
+ if key == "<":
+ return key
+ raise KeyError(f"task '{label}' has no dependency named '{key}'")
+
+ return TASK_REFERENCE_PATTERN.sub(repl, val)
+
+ def artifact_reference(val):
+ def repl(match):
+ dependency, artifact_name = match.group(1, 2)
+
+ if dependency == "self":
+ raise KeyError(f"task '{label}' can't reference artifacts of self")
+ elif dependency == "decision":
+ task_id = decision_task_id
+ else:
+ try:
+ task_id = dependencies[dependency]
+ except KeyError:
+ raise KeyError(
+ "task '{}' has no dependency named '{}'".format(
+ label, dependency
+ )
+ )
+
+ assert artifact_name.startswith(
+ "public/"
+ ), "artifact-reference only supports public artifacts, not `{}`".format(
+ artifact_name
+ )
+ return get_artifact_url(task_id, artifact_name)
+
+ return ARTIFACT_REFERENCE_PATTERN.sub(repl, val)
+
+ return _recurse(
+ task_def,
+ {
+ "task-reference": task_reference,
+ "artifact-reference": artifact_reference,
+ },
+ )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py
new file mode 100644
index 0000000000..c725140b12
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py
@@ -0,0 +1,167 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Like :py:mod:`os.path`, with a reduced set of functions, and with normalized path
+separators (always use forward slashes).
+Also contains a few additional utilities not found in :py:mod:`os.path`.
+"""
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozpack/path.py
+
+
+import os
+import posixpath
+import re
+
+
+def normsep(path):
+ """
+ Normalize path separators, by using forward slashes instead of whatever
+ :py:const:`os.sep` is.
+ """
+ if os.sep != "/":
+ path = path.replace(os.sep, "/")
+ if os.altsep and os.altsep != "/":
+ path = path.replace(os.altsep, "/")
+ return path
+
+
+def relpath(path, start):
+ rel = normsep(os.path.relpath(path, start))
+ return "" if rel == "." else rel
+
+
+def realpath(path):
+ return normsep(os.path.realpath(path))
+
+
+def abspath(path):
+ return normsep(os.path.abspath(path))
+
+
+def join(*paths):
+ return normsep(os.path.join(*paths))
+
+
+def normpath(path):
+ return posixpath.normpath(normsep(path))
+
+
+def dirname(path):
+ return posixpath.dirname(normsep(path))
+
+
+def commonprefix(paths):
+ return posixpath.commonprefix([normsep(path) for path in paths])
+
+
+def basename(path):
+ return os.path.basename(path)
+
+
+def splitext(path):
+ return posixpath.splitext(normsep(path))
+
+
+def split(path):
+ """
+ Return the normalized path as a list of its components.
+
+ ``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']``
+ """
+ return normsep(path).split("/")
+
+
+def basedir(path, bases):
+ """
+ Given a list of directories (`bases`), return which one contains the given
+ path. If several matches are found, the deepest base directory is returned.
+
+ ``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'``
+ (`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match)
+ """
+ path = normsep(path)
+ bases = [normsep(b) for b in bases]
+ if path in bases:
+ return path
+ for b in sorted(bases, reverse=True):
+ if not b or path.startswith(b + "/"):
+ return b
+
+
+re_cache = {}
+MATCH_STAR_STAR_RE = re.compile(r"(^|/)\\\*\\\*/")
+MATCH_STAR_STAR_END_RE = re.compile(r"(^|/)\\\*\\\*$")
+
+
+def match(path, pattern):
+ """
+ Return whether the given path matches the given pattern.
+ An asterisk can be used to match any string, including the null string, in
+ one part of the path:
+
+ ``foo`` matches ``*``, ``f*`` or ``fo*o``
+
+ However, an asterisk matching a subdirectory may not match the null string:
+
+ ``foo/bar`` does *not* match ``foo/*/bar``
+
+ If the pattern matches one of the ancestor directories of the path, the
+ patch is considered matching:
+
+ ``foo/bar`` matches ``foo``
+
+ Two adjacent asterisks can be used to match files and zero or more
+ directories and subdirectories.
+
+ ``foo/bar`` matches ``foo/**/bar``, or ``**/bar``
+ """
+ if not pattern:
+ return True
+ if pattern not in re_cache:
+ p = re.escape(pattern)
+ p = MATCH_STAR_STAR_RE.sub(r"\1(?:.+/)?", p)
+ p = MATCH_STAR_STAR_END_RE.sub(r"(?:\1.+)?", p)
+ p = p.replace(r"\*", "[^/]*") + "(?:/.*)?$"
+ re_cache[pattern] = re.compile(p)
+ return re_cache[pattern].match(path) is not None
+
+
+def rebase(oldbase, base, relativepath):
+ """
+ Return `relativepath` relative to `base` instead of `oldbase`.
+ """
+ if base == oldbase:
+ return relativepath
+ if len(base) < len(oldbase):
+ assert basedir(oldbase, [base]) == base
+ relbase = relpath(oldbase, base)
+ result = join(relbase, relativepath)
+ else:
+ assert basedir(base, [oldbase]) == oldbase
+ relbase = relpath(base, oldbase)
+ result = relpath(relativepath, relbase)
+ result = normpath(result)
+ if relativepath.endswith("/") and not result.endswith("/"):
+ result += "/"
+ return result
+
+
+def ancestors(path):
+ """Emit the parent directories of a path.
+
+ Args:
+ path (str): Path to emit parents of.
+
+ Yields:
+ str: Path of parent directory.
+ """
+ while path:
+ yield path
+ newpath = os.path.dirname(path)
+ if newpath == path:
+ break
+ path = newpath
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py
new file mode 100644
index 0000000000..3eb61dfbf3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py
@@ -0,0 +1,52 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import inspect
+import os
+
+
+def find_object(path):
+ """
+ Find a Python object given a path of the form <modulepath>:<objectpath>.
+ Conceptually equivalent to
+
+ def find_object(modulepath, objectpath):
+ import <modulepath> as mod
+ return mod.<objectpath>
+ """
+ if path.count(":") != 1:
+ raise ValueError(f'python path {path!r} does not have the form "module:object"')
+
+ modulepath, objectpath = path.split(":")
+ obj = __import__(modulepath)
+ for a in modulepath.split(".")[1:]:
+ obj = getattr(obj, a)
+ for a in objectpath.split("."):
+ obj = getattr(obj, a)
+ return obj
+
+
+def import_sibling_modules(exceptions=None):
+ """
+ Import all Python modules that are siblings of the calling module.
+
+ Args:
+ exceptions (list): A list of file names to exclude (caller and
+ __init__.py are implicitly excluded).
+ """
+ frame = inspect.stack()[1]
+ mod = inspect.getmodule(frame[0])
+
+ name = os.path.basename(mod.__file__)
+ excs = {"__init__.py", name}
+ if exceptions:
+ excs.update(exceptions)
+
+ modpath = mod.__name__
+ if not name.startswith("__init__.py"):
+ modpath = modpath.rsplit(".", 1)[0]
+
+ for f in os.listdir(os.path.dirname(mod.__file__)):
+ if f.endswith(".py") and f not in excs:
+ __import__(modpath + "." + f[:-3])
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py
new file mode 100644
index 0000000000..55d74f479a
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py
@@ -0,0 +1,22 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#115-127
+
+
+class ReadOnlyDict(dict):
+ """A read-only dictionary."""
+
+ def __init__(self, *args, **kwargs):
+ dict.__init__(self, *args, **kwargs)
+
+ def __delitem__(self, key):
+ raise Exception("Object does not support deletion.")
+
+ def __setitem__(self, key, value):
+ raise Exception("Object does not support assignment.")
+
+ def update(self, *args, **kwargs):
+ raise Exception("Object does not support update.")
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py
new file mode 100644
index 0000000000..3989f71182
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py
@@ -0,0 +1,260 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import collections
+import pprint
+import re
+
+import voluptuous
+
+import taskgraph
+
+from .keyed_by import evaluate_keyed_by
+
+
+def validate_schema(schema, obj, msg_prefix):
+ """
+ Validate that object satisfies schema. If not, generate a useful exception
+ beginning with msg_prefix.
+ """
+ if taskgraph.fast:
+ return
+ try:
+ schema(obj)
+ except voluptuous.MultipleInvalid as exc:
+ msg = [msg_prefix]
+ for error in exc.errors:
+ msg.append(str(error))
+ raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj))
+
+
+def optionally_keyed_by(*arguments):
+ """
+ Mark a schema value as optionally keyed by any of a number of fields. The
+ schema is the last argument, and the remaining fields are taken to be the
+ field names. For example:
+
+ 'some-value': optionally_keyed_by(
+ 'test-platform', 'build-platform',
+ Any('a', 'b', 'c'))
+
+ The resulting schema will allow nesting of `by-test-platform` and
+ `by-build-platform` in either order.
+ """
+ schema = arguments[-1]
+ fields = arguments[:-1]
+
+ def validator(obj):
+ if isinstance(obj, dict) and len(obj) == 1:
+ k, v = list(obj.items())[0]
+ if k.startswith("by-") and k[len("by-") :] in fields:
+ res = {}
+ for kk, vv in v.items():
+ try:
+ res[kk] = validator(vv)
+ except voluptuous.Invalid as e:
+ e.prepend([k, kk])
+ raise
+ return res
+ return Schema(schema)(obj)
+
+ return validator
+
+
+def resolve_keyed_by(
+ item, field, item_name, defer=None, enforce_single_match=True, **extra_values
+):
+ """
+ For values which can either accept a literal value, or be keyed by some
+ other attribute of the item, perform that lookup and replacement in-place
+ (modifying `item` directly). The field is specified using dotted notation
+ to traverse dictionaries.
+
+ For example, given item::
+
+ job:
+ test-platform: linux128
+ chunks:
+ by-test-platform:
+ macosx-10.11/debug: 13
+ win.*: 6
+ default: 12
+
+ a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])`
+ would mutate item in-place to::
+
+ job:
+ test-platform: linux128
+ chunks: 12
+
+ The `item_name` parameter is used to generate useful error messages.
+
+ If extra_values are supplied, they represent additional values available
+ for reference from by-<field>.
+
+ Items can be nested as deeply as the schema will allow::
+
+ chunks:
+ by-test-platform:
+ win.*:
+ by-project:
+ ash: ..
+ cedar: ..
+ linux: 13
+ default: 12
+
+ Args:
+ item (dict): Object being evaluated.
+ field (str): Name of the key to perform evaluation on.
+ item_name (str): Used to generate useful error messages.
+ defer (list):
+ Allows evaluating a by-* entry at a later time. In the example
+ above it's possible that the project attribute hasn't been set yet,
+ in which case we'd want to stop before resolving that subkey and
+ then call this function again later. This can be accomplished by
+ setting `defer=["project"]` in this example.
+ enforce_single_match (bool):
+ If True (default), each task may only match a single arm of the
+ evaluation.
+ extra_values (kwargs):
+ If supplied, represent additional values available
+ for reference from by-<field>.
+
+ Returns:
+ dict: item which has also been modified in-place.
+ """
+ # find the field, returning the item unchanged if anything goes wrong
+ container, subfield = item, field
+ while "." in subfield:
+ f, subfield = subfield.split(".", 1)
+ if f not in container:
+ return item
+ container = container[f]
+ if not isinstance(container, dict):
+ return item
+
+ if subfield not in container:
+ return item
+
+ container[subfield] = evaluate_keyed_by(
+ value=container[subfield],
+ item_name=f"`{field}` in `{item_name}`",
+ defer=defer,
+ enforce_single_match=enforce_single_match,
+ attributes=dict(item, **extra_values),
+ )
+
+ return item
+
+
+# Schemas for YAML files should use dashed identifiers by default. If there are
+# components of the schema for which there is a good reason to use another format,
+# they can be excepted here.
+EXCEPTED_SCHEMA_IDENTIFIERS = [
+ # upstream-artifacts and artifact-map are handed directly to scriptWorker,
+ # which expects interCaps
+ "upstream-artifacts",
+ "artifact-map",
+]
+
+
+def check_schema(schema):
+ identifier_re = re.compile(r"^\$?[a-z][a-z0-9-]*$")
+
+ def excepted(item):
+ for esi in EXCEPTED_SCHEMA_IDENTIFIERS:
+ if isinstance(esi, str):
+ if f"[{esi!r}]" in item:
+ return True
+ elif esi(item):
+ return True
+ return False
+
+ def iter(path, sch):
+ def check_identifier(path, k):
+ if k in (str,) or k in (str, voluptuous.Extra):
+ pass
+ elif isinstance(k, voluptuous.NotIn):
+ pass
+ elif isinstance(k, str):
+ if not identifier_re.match(k) and not excepted(path):
+ raise RuntimeError(
+ "YAML schemas should use dashed lower-case identifiers, "
+ "not {!r} @ {}".format(k, path)
+ )
+ elif isinstance(k, (voluptuous.Optional, voluptuous.Required)):
+ check_identifier(path, k.schema)
+ elif isinstance(k, (voluptuous.Any, voluptuous.All)):
+ for v in k.validators:
+ check_identifier(path, v)
+ elif not excepted(path):
+ raise RuntimeError(
+ "Unexpected type in YAML schema: {} @ {}".format(
+ type(k).__name__, path
+ )
+ )
+
+ if isinstance(sch, collections.abc.Mapping):
+ for k, v in sch.items():
+ child = f"{path}[{k!r}]"
+ check_identifier(child, k)
+ iter(child, v)
+ elif isinstance(sch, (list, tuple)):
+ for i, v in enumerate(sch):
+ iter(f"{path}[{i}]", v)
+ elif isinstance(sch, voluptuous.Any):
+ for v in sch.validators:
+ iter(path, v)
+
+ iter("schema", schema.schema)
+
+
+class Schema(voluptuous.Schema):
+ """
+ Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
+ in the process.
+ """
+
+ def __init__(self, *args, check=True, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.check = check
+ if not taskgraph.fast and self.check:
+ check_schema(self)
+
+ def extend(self, *args, **kwargs):
+ schema = super().extend(*args, **kwargs)
+
+ if self.check:
+ check_schema(schema)
+ # We want twice extend schema to be checked too.
+ schema.__class__ = Schema
+ return schema
+
+ def _compile(self, schema):
+ if taskgraph.fast:
+ return
+ return super()._compile(schema)
+
+ def __getitem__(self, item):
+ return self.schema[item]
+
+
+OptimizationSchema = voluptuous.Any(
+ # always run this task (default)
+ None,
+ # search the index for the given index namespaces, and replace this task if found
+ # the search occurs in order, with the first match winning
+ {"index-search": [str]},
+ # skip this task if none of the given file patterns match
+ {"skip-unless-changed": [str]},
+)
+
+# shortcut for a string where task references are allowed
+taskref_or_string = voluptuous.Any(
+ str,
+ {voluptuous.Required("task-reference"): str},
+ {voluptuous.Required("artifact-reference"): str},
+)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py
new file mode 100644
index 0000000000..d695767f05
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+SHELL_QUOTE_RE = re.compile(r"[\\\t\r\n \'\"#<>&|`(){}$;\*\?]")
+
+
+def _quote(s):
+ """Given a string, returns a version that can be used literally on a shell
+ command line, enclosing it with single quotes if necessary.
+
+ As a special case, if given an int, returns a string containing the int,
+ not enclosed in quotes.
+ """
+ if type(s) == int:
+ return "%d" % s
+
+ # Empty strings need to be quoted to have any significance
+ if s and not SHELL_QUOTE_RE.search(s) and not s.startswith("~"):
+ return s
+
+ # Single quoted strings can contain any characters unescaped except the
+ # single quote itself, which can't even be escaped, so the string needs to
+ # be closed, an escaped single quote added, and reopened.
+ t = type(s)
+ return t("'%s'") % s.replace(t("'"), t("'\\''"))
+
+
+def quote(*strings):
+ """Given one or more strings, returns a quoted string that can be used
+ literally on a shell command line.
+
+ >>> quote('a', 'b')
+ "a b"
+ >>> quote('a b', 'c')
+ "'a b' c"
+ """
+ return " ".join(_quote(s) for s in strings)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py
new file mode 100644
index 0000000000..a830a473b3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py
@@ -0,0 +1,373 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import datetime
+import functools
+import logging
+import os
+
+import requests
+import taskcluster_urls as liburls
+from requests.packages.urllib3.util.retry import Retry
+
+from taskgraph.task import Task
+from taskgraph.util import yaml
+from taskgraph.util.memoize import memoize
+
+logger = logging.getLogger(__name__)
+
+# this is set to true for `mach taskgraph action-callback --test`
+testing = False
+
+# Default rootUrl to use if none is given in the environment; this should point
+# to the production Taskcluster deployment used for CI.
+PRODUCTION_TASKCLUSTER_ROOT_URL = None
+
+# the maximum number of parallel Taskcluster API calls to make
+CONCURRENCY = 50
+
+
+@memoize
+def get_root_url(use_proxy):
+ """Get the current TASKCLUSTER_ROOT_URL.
+
+ When running in a task, this must come from $TASKCLUSTER_ROOT_URL; when run
+ on the command line, a default may be provided that points to the
+ production deployment of Taskcluster. If use_proxy is set, this attempts to
+ get TASKCLUSTER_PROXY_URL instead, failing if it is not set.
+ """
+ if use_proxy:
+ try:
+ return liburls.normalize_root_url(os.environ["TASKCLUSTER_PROXY_URL"])
+ except KeyError:
+ if "TASK_ID" not in os.environ:
+ raise RuntimeError(
+ "taskcluster-proxy is not available when not executing in a task"
+ )
+ else:
+ raise RuntimeError("taskcluster-proxy is not enabled for this task")
+
+ if "TASKCLUSTER_ROOT_URL" in os.environ:
+ logger.debug(
+ "Running in Taskcluster instance {}{}".format(
+ os.environ["TASKCLUSTER_ROOT_URL"],
+ " with taskcluster-proxy"
+ if "TASKCLUSTER_PROXY_URL" in os.environ
+ else "",
+ )
+ )
+ return liburls.normalize_root_url(os.environ["TASKCLUSTER_ROOT_URL"])
+
+ if "TASK_ID" in os.environ:
+ raise RuntimeError("$TASKCLUSTER_ROOT_URL must be set when running in a task")
+
+ if PRODUCTION_TASKCLUSTER_ROOT_URL is None:
+ raise RuntimeError(
+ "Could not detect Taskcluster instance, set $TASKCLUSTER_ROOT_URL"
+ )
+
+ logger.debug("Using default TASKCLUSTER_ROOT_URL")
+ return liburls.normalize_root_url(PRODUCTION_TASKCLUSTER_ROOT_URL)
+
+
+def requests_retry_session(
+ retries,
+ backoff_factor=0.1,
+ status_forcelist=(500, 502, 503, 504),
+ concurrency=CONCURRENCY,
+ session=None,
+):
+ session = session or requests.Session()
+ retry = Retry(
+ total=retries,
+ read=retries,
+ connect=retries,
+ backoff_factor=backoff_factor,
+ status_forcelist=status_forcelist,
+ )
+
+ # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase
+ # that limit. Connections are established as needed, so using a large value
+ # should not negatively impact performance.
+ http_adapter = requests.adapters.HTTPAdapter(
+ pool_connections=concurrency,
+ pool_maxsize=concurrency,
+ max_retries=retry,
+ )
+ session.mount("http://", http_adapter)
+ session.mount("https://", http_adapter)
+
+ return session
+
+
+@memoize
+def get_session():
+ return requests_retry_session(retries=5)
+
+
+def _do_request(url, method=None, **kwargs):
+ if method is None:
+ method = "post" if kwargs else "get"
+
+ session = get_session()
+ if method == "get":
+ kwargs["stream"] = True
+
+ response = getattr(session, method)(url, **kwargs)
+
+ if response.status_code >= 400:
+ # Consume content before raise_for_status, so that the connection can be
+ # reused.
+ response.content
+ response.raise_for_status()
+ return response
+
+
+def _handle_artifact(path, response):
+ if path.endswith(".json"):
+ return response.json()
+ if path.endswith(".yml"):
+ return yaml.load_stream(response.text)
+ response.raw.read = functools.partial(response.raw.read, decode_content=True)
+ return response.raw
+
+
+def get_artifact_url(task_id, path, use_proxy=False):
+ artifact_tmpl = liburls.api(
+ get_root_url(False), "queue", "v1", "task/{}/artifacts/{}"
+ )
+ data = artifact_tmpl.format(task_id, path)
+ if use_proxy:
+ # Until Bug 1405889 is deployed, we can't download directly
+ # from the taskcluster-proxy. Work around by using the /bewit
+ # endpoint instead.
+ # The bewit URL is the body of a 303 redirect, which we don't
+ # want to follow (which fetches a potentially large resource).
+ response = _do_request(
+ os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit",
+ data=data,
+ allow_redirects=False,
+ )
+ return response.text
+ return data
+
+
+def get_artifact(task_id, path, use_proxy=False):
+ """
+ Returns the artifact with the given path for the given task id.
+
+ If the path ends with ".json" or ".yml", the content is deserialized as,
+ respectively, json or yaml, and the corresponding python data (usually
+ dict) is returned.
+ For other types of content, a file-like object is returned.
+ """
+ response = _do_request(get_artifact_url(task_id, path, use_proxy))
+ return _handle_artifact(path, response)
+
+
+def list_artifacts(task_id, use_proxy=False):
+ response = _do_request(get_artifact_url(task_id, "", use_proxy).rstrip("/"))
+ return response.json()["artifacts"]
+
+
+def get_artifact_prefix(task):
+ prefix = None
+ if isinstance(task, dict):
+ prefix = task.get("attributes", {}).get("artifact_prefix")
+ elif isinstance(task, Task):
+ prefix = task.attributes.get("artifact_prefix")
+ else:
+ raise Exception(f"Can't find artifact-prefix of non-task: {task}")
+ return prefix or "public/build"
+
+
+def get_artifact_path(task, path):
+ return f"{get_artifact_prefix(task)}/{path}"
+
+
+def get_index_url(index_path, use_proxy=False, multiple=False):
+ index_tmpl = liburls.api(get_root_url(use_proxy), "index", "v1", "task{}/{}")
+ return index_tmpl.format("s" if multiple else "", index_path)
+
+
+def find_task_id(index_path, use_proxy=False):
+ try:
+ response = _do_request(get_index_url(index_path, use_proxy))
+ except requests.exceptions.HTTPError as e:
+ if e.response.status_code == 404:
+ raise KeyError(f"index path {index_path} not found")
+ raise
+ return response.json()["taskId"]
+
+
+def get_artifact_from_index(index_path, artifact_path, use_proxy=False):
+ full_path = index_path + "/artifacts/" + artifact_path
+ response = _do_request(get_index_url(full_path, use_proxy))
+ return _handle_artifact(full_path, response)
+
+
+def list_tasks(index_path, use_proxy=False):
+ """
+ Returns a list of task_ids where each task_id is indexed under a path
+ in the index. Results are sorted by expiration date from oldest to newest.
+ """
+ results = []
+ data = {}
+ while True:
+ response = _do_request(
+ get_index_url(index_path, use_proxy, multiple=True), json=data
+ )
+ response = response.json()
+ results += response["tasks"]
+ if response.get("continuationToken"):
+ data = {"continuationToken": response.get("continuationToken")}
+ else:
+ break
+
+ # We can sort on expires because in the general case
+ # all of these tasks should be created with the same expires time so they end up in
+ # order from earliest to latest action. If more correctness is needed, consider
+ # fetching each task and sorting on the created date.
+ results.sort(key=lambda t: parse_time(t["expires"]))
+ return [t["taskId"] for t in results]
+
+
+def parse_time(timestamp):
+ """Turn a "JSON timestamp" as used in TC APIs into a datetime"""
+ return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def get_task_url(task_id, use_proxy=False):
+ task_tmpl = liburls.api(get_root_url(use_proxy), "queue", "v1", "task/{}")
+ return task_tmpl.format(task_id)
+
+
+def get_task_definition(task_id, use_proxy=False):
+ response = _do_request(get_task_url(task_id, use_proxy))
+ return response.json()
+
+
+def cancel_task(task_id, use_proxy=False):
+ """Cancels a task given a task_id. In testing mode, just logs that it would
+ have cancelled."""
+ if testing:
+ logger.info(f"Would have cancelled {task_id}.")
+ else:
+ _do_request(get_task_url(task_id, use_proxy) + "/cancel", json={})
+
+
+def status_task(task_id, use_proxy=False):
+ """Gets the status of a task given a task_id.
+
+ In testing mode, just logs that it would have retrieved status.
+
+ Args:
+ task_id (str): A task id.
+ use_proxy (bool): Whether to use taskcluster-proxy (default: False)
+
+ Returns:
+ dict: A dictionary object as defined here:
+ https://docs.taskcluster.net/docs/reference/platform/queue/api#status
+ """
+ if testing:
+ logger.info(f"Would have gotten status for {task_id}.")
+ else:
+ resp = _do_request(get_task_url(task_id, use_proxy) + "/status")
+ status = resp.json().get("status", {})
+ return status
+
+
+def state_task(task_id, use_proxy=False):
+ """Gets the state of a task given a task_id.
+
+ In testing mode, just logs that it would have retrieved state. This is a subset of the
+ data returned by :func:`status_task`.
+
+ Args:
+ task_id (str): A task id.
+ use_proxy (bool): Whether to use taskcluster-proxy (default: False)
+
+ Returns:
+ str: The state of the task, one of
+ ``pending, running, completed, failed, exception, unknown``.
+ """
+ if testing:
+ logger.info(f"Would have gotten state for {task_id}.")
+ else:
+ status = status_task(task_id, use_proxy=use_proxy).get("state") or "unknown"
+ return status
+
+
+def rerun_task(task_id):
+ """Reruns a task given a task_id. In testing mode, just logs that it would
+ have reran."""
+ if testing:
+ logger.info(f"Would have rerun {task_id}.")
+ else:
+ _do_request(get_task_url(task_id, use_proxy=True) + "/rerun", json={})
+
+
+def get_current_scopes():
+ """Get the current scopes. This only makes sense in a task with the Taskcluster
+ proxy enabled, where it returns the actual scopes accorded to the task."""
+ auth_url = liburls.api(get_root_url(True), "auth", "v1", "scopes/current")
+ resp = _do_request(auth_url)
+ return resp.json().get("scopes", [])
+
+
+def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False):
+ url_tmpl = liburls.api(
+ get_root_url(use_proxy), "purge-cache", "v1", "purge-cache/{}/{}"
+ )
+ return url_tmpl.format(provisioner_id, worker_type)
+
+
+def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False):
+ """Requests a cache purge from the purge-caches service."""
+ if testing:
+ logger.info(
+ "Would have purged {}/{}/{}.".format(
+ provisioner_id, worker_type, cache_name
+ )
+ )
+ else:
+ logger.info(f"Purging {provisioner_id}/{worker_type}/{cache_name}.")
+ purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy)
+ _do_request(purge_cache_url, json={"cacheName": cache_name})
+
+
+def send_email(address, subject, content, link, use_proxy=False):
+ """Sends an email using the notify service"""
+ logger.info(f"Sending email to {address}.")
+ url = liburls.api(get_root_url(use_proxy), "notify", "v1", "email")
+ _do_request(
+ url,
+ json={
+ "address": address,
+ "subject": subject,
+ "content": content,
+ "link": link,
+ },
+ )
+
+
+def list_task_group_incomplete_tasks(task_group_id):
+ """Generate the incomplete tasks in a task group"""
+ params = {}
+ while True:
+ url = liburls.api(
+ get_root_url(False),
+ "queue",
+ "v1",
+ f"task-group/{task_group_id}/list",
+ )
+ resp = _do_request(url, method="get", params=params).json()
+ for task in [t["status"] for t in resp["tasks"]]:
+ if task["state"] in ["running", "pending", "unscheduled"]:
+ yield task["taskId"]
+ if resp.get("continuationToken"):
+ params = {"continuationToken": resp.get("continuationToken")}
+ else:
+ break
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py
new file mode 100644
index 0000000000..7b545595ef
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py
@@ -0,0 +1,54 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Tools for interacting with existing taskgraphs.
+"""
+
+
+from taskgraph.util.taskcluster import find_task_id, get_artifact
+
+
+def find_decision_task(parameters, graph_config):
+ """Given the parameters for this action, find the taskId of the decision
+ task"""
+ if parameters.get("repository_type", "hg") == "hg":
+ return find_task_id(
+ "{}.v2.{}.pushlog-id.{}.decision".format(
+ graph_config["trust-domain"],
+ parameters["project"],
+ parameters["pushlog_id"],
+ )
+ )
+ elif parameters["repository_type"] == "git":
+ return find_task_id(
+ "{}.v2.{}.revision.{}.taskgraph.decision".format(
+ graph_config["trust-domain"],
+ parameters["project"],
+ parameters["head_rev"],
+ )
+ )
+ else:
+ raise Exception(
+ "Unknown repository_type {}!".format(parameters["repository_type"])
+ )
+
+
+def find_existing_tasks_from_previous_kinds(
+ full_task_graph, previous_graph_ids, rebuild_kinds
+):
+ """Given a list of previous decision/action taskIds and kinds to ignore
+ from the previous graphs, return a dictionary of labels-to-taskids to use
+ as ``existing_tasks`` in the optimization step."""
+ existing_tasks = {}
+ for previous_graph_id in previous_graph_ids:
+ label_to_taskid = get_artifact(previous_graph_id, "public/label-to-taskid.json")
+ kind_labels = {
+ t.label
+ for t in full_task_graph.tasks.values()
+ if t.attributes["kind"] not in rebuild_kinds
+ }
+ for label in set(label_to_taskid.keys()).intersection(kind_labels):
+ existing_tasks[label] = label_to_taskid[label]
+ return existing_tasks
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py
new file mode 100644
index 0000000000..23cd5f8d68
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py
@@ -0,0 +1,80 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import copy
+
+
+def merge_to(source, dest):
+ """
+ Merge dict and arrays (override scalar values)
+
+ Keys from source override keys from dest, and elements from lists in source
+ are appended to lists in dest.
+
+ :param dict source: to copy from
+ :param dict dest: to copy to (modified in place)
+ """
+
+ for key, value in source.items():
+ # Override mismatching or empty types
+ if type(value) != type(dest.get(key)): # noqa
+ dest[key] = source[key]
+ continue
+
+ # Merge dict
+ if isinstance(value, dict):
+ merge_to(value, dest[key])
+ continue
+
+ if isinstance(value, list):
+ dest[key] = dest[key] + source[key]
+ continue
+
+ dest[key] = source[key]
+
+ return dest
+
+
+def merge(*objects):
+ """
+ Merge the given objects, using the semantics described for merge_to, with
+ objects later in the list taking precedence. From an inheritance
+ perspective, "parents" should be listed before "children".
+
+ Returns the result without modifying any arguments.
+ """
+ if len(objects) == 1:
+ return copy.deepcopy(objects[0])
+ return merge_to(objects[-1], merge(*objects[:-1]))
+
+
+def deep_get(dict_, field):
+ container, subfield = dict_, field
+ while "." in subfield:
+ f, subfield = subfield.split(".", 1)
+ if f not in container:
+ return None
+
+ container = container[f]
+
+ return container.get(subfield)
+
+
+def substitute(item, **subs):
+ if isinstance(item, list):
+ for i in range(len(item)):
+ item[i] = substitute(item[i], **subs)
+ elif isinstance(item, dict):
+ new_dict = {}
+ for k, v in item.items():
+ k = k.format(**subs)
+ new_dict[k] = substitute(v, **subs)
+ item = new_dict
+ elif isinstance(item, str):
+ item = item.format(**subs)
+ else:
+ item = item
+
+ return item
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py
new file mode 100644
index 0000000000..e511978b5f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py
@@ -0,0 +1,115 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Python port of the ms.js node module this is not a direct port some things are
+# more complicated or less precise and we lean on time delta here.
+
+
+import datetime
+import re
+
+PATTERN = re.compile(r"((?:\d+)?\.?\d+) *([a-z]+)")
+
+
+def seconds(value):
+ return datetime.timedelta(seconds=int(value))
+
+
+def minutes(value):
+ return datetime.timedelta(minutes=int(value))
+
+
+def hours(value):
+ return datetime.timedelta(hours=int(value))
+
+
+def days(value):
+ return datetime.timedelta(days=int(value))
+
+
+def months(value):
+ # See warning in years(), below
+ return datetime.timedelta(days=int(value) * 30)
+
+
+def years(value):
+ # Warning here "years" are vague don't use this for really sensitive date
+ # computation the idea is to give you a absolute amount of time in the
+ # future which is not the same thing as "precisely on this date next year"
+ return datetime.timedelta(days=int(value) * 365)
+
+
+ALIASES = {}
+ALIASES["seconds"] = ALIASES["second"] = ALIASES["s"] = seconds
+ALIASES["minutes"] = ALIASES["minute"] = ALIASES["min"] = minutes
+ALIASES["hours"] = ALIASES["hour"] = ALIASES["h"] = hours
+ALIASES["days"] = ALIASES["day"] = ALIASES["d"] = days
+ALIASES["months"] = ALIASES["month"] = ALIASES["mo"] = months
+ALIASES["years"] = ALIASES["year"] = ALIASES["y"] = years
+
+
+class InvalidString(Exception):
+ pass
+
+
+class UnknownTimeMeasurement(Exception):
+ pass
+
+
+def value_of(input_str):
+ """
+ Convert a string to a json date in the future
+ :param str input_str: (ex: 1d, 2d, 6years, 2 seconds)
+ :returns: Unit given in seconds
+ """
+
+ matches = PATTERN.search(input_str)
+
+ if matches is None or len(matches.groups()) < 2:
+ raise InvalidString(f"'{input_str}' is invalid string")
+
+ value, unit = matches.groups()
+
+ if unit not in ALIASES:
+ raise UnknownTimeMeasurement(
+ "{} is not a valid time measure use one of {}".format(
+ unit, sorted(ALIASES.keys())
+ )
+ )
+
+ return ALIASES[unit](value)
+
+
+def json_time_from_now(input_str, now=None, datetime_format=False):
+ """
+ :param str input_str: Input string (see value of)
+ :param datetime now: Optionally set the definition of `now`
+ :param boolean datetime_format: Set `True` to get a `datetime` output
+ :returns: JSON string representation of time in future.
+ """
+
+ if now is None:
+ now = datetime.datetime.utcnow()
+
+ time = now + value_of(input_str)
+
+ if datetime_format is True:
+ return time
+ else:
+ # Sorta a big hack but the json schema validator for date does not like the
+ # ISO dates until 'Z' (for timezone) is added...
+ # Microseconds are excluded (see bug 1381801)
+ return time.isoformat(timespec="milliseconds") + "Z"
+
+
+def current_json_time(datetime_format=False):
+ """
+ :param boolean datetime_format: Set `True` to get a `datetime` output
+ :returns: JSON string representation of the current time.
+ """
+ if datetime_format is True:
+ return datetime.datetime.utcnow()
+ else:
+ # Microseconds are excluded (see bug 1381801)
+ return datetime.datetime.utcnow().isoformat(timespec="milliseconds") + "Z"
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py
new file mode 100644
index 0000000000..cff5f286cc
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py
@@ -0,0 +1,84 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+_JOINED_SYMBOL_RE = re.compile(r"([^(]*)\(([^)]*)\)$")
+
+
+def split_symbol(treeherder_symbol):
+ """Split a symbol expressed as grp(sym) into its two parts. If no group is
+ given, the returned group is '?'"""
+ groupSymbol = "?"
+ symbol = treeherder_symbol
+ if "(" in symbol:
+ match = _JOINED_SYMBOL_RE.match(symbol)
+ if match:
+ groupSymbol, symbol = match.groups()
+ else:
+ raise Exception(f"`{symbol}` is not a valid treeherder symbol.")
+ return groupSymbol, symbol
+
+
+def join_symbol(group, symbol):
+ """Perform the reverse of split_symbol, combining the given group and
+ symbol. If the group is '?', then it is omitted."""
+ if group == "?":
+ return symbol
+ return f"{group}({symbol})"
+
+
+def add_suffix(treeherder_symbol, suffix):
+ """Add a suffix to a treeherder symbol that may contain a group."""
+ group, symbol = split_symbol(treeherder_symbol)
+ symbol += str(suffix)
+ return join_symbol(group, symbol)
+
+
+def replace_group(treeherder_symbol, new_group):
+ """Add a suffix to a treeherder symbol that may contain a group."""
+ _, symbol = split_symbol(treeherder_symbol)
+ return join_symbol(new_group, symbol)
+
+
+def inherit_treeherder_from_dep(job, dep_job):
+ """Inherit treeherder defaults from dep_job"""
+ treeherder = job.get("treeherder", {})
+
+ dep_th_platform = (
+ dep_job.task.get("extra", {})
+ .get("treeherder", {})
+ .get("machine", {})
+ .get("platform", "")
+ )
+ dep_th_collection = list(
+ dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys()
+ )[0]
+ treeherder.setdefault("platform", f"{dep_th_platform}/{dep_th_collection}")
+ treeherder.setdefault(
+ "tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1)
+ )
+ # Does not set symbol
+ treeherder.setdefault("kind", "build")
+ return treeherder
+
+
+def treeherder_defaults(kind, label):
+ defaults = {
+ # Despite its name, this is expected to be a platform+collection
+ "platform": "default/opt",
+ "tier": 1,
+ }
+ if "build" in kind:
+ defaults["kind"] = "build"
+ elif "test" in kind:
+ defaults["kind"] = "test"
+ else:
+ defaults["kind"] = "other"
+
+ # Takes the uppercased first letter of each part of the kind name, eg:
+ # apple-banana -> AB
+ defaults["symbol"] = "".join([c[0] for c in kind.split("-")]).upper()
+
+ return defaults
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py
new file mode 100644
index 0000000000..2d967d2645
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py
@@ -0,0 +1,552 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import os
+import re
+import subprocess
+from abc import ABC, abstractmethod, abstractproperty
+from shutil import which
+
+import requests
+from redo import retry
+
+from taskgraph.util.path import ancestors
+
+PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1"
+
+logger = logging.getLogger(__name__)
+
+
+class Repository(ABC):
+ # Both mercurial and git use sha1 as revision idenfiers. Luckily, both define
+ # the same value as the null revision.
+ #
+ # https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7
+ # https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30
+ NULL_REVISION = "0000000000000000000000000000000000000000"
+
+ def __init__(self, path):
+ self.path = path
+ self.binary = which(self.tool)
+ if self.binary is None:
+ raise OSError(f"{self.tool} not found!")
+ self._valid_diff_filter = ("m", "a", "d")
+
+ self._env = os.environ.copy()
+
+ def run(self, *args: str, **kwargs):
+ return_codes = kwargs.pop("return_codes", [])
+ cmd = (self.binary,) + args
+
+ try:
+ return subprocess.check_output(
+ cmd, cwd=self.path, env=self._env, encoding="utf-8", **kwargs
+ )
+ except subprocess.CalledProcessError as e:
+ if e.returncode in return_codes:
+ return ""
+ raise
+
+ @abstractproperty
+ def tool(self) -> str:
+ """Version control system being used, either 'hg' or 'git'."""
+
+ @abstractproperty
+ def head_rev(self) -> str:
+ """Hash of HEAD revision."""
+
+ @abstractproperty
+ def base_rev(self):
+ """Hash of revision the current topic branch is based on."""
+
+ @abstractproperty
+ def branch(self):
+ """Current branch or bookmark the checkout has active."""
+
+ @abstractproperty
+ def all_remote_names(self):
+ """Name of all configured remote repositories."""
+
+ @abstractproperty
+ def default_remote_name(self):
+ """Name the VCS defines for the remote repository when cloning
+ it for the first time. This name may not exist anymore if users
+ changed the default configuration, for instance."""
+
+ @abstractproperty
+ def remote_name(self):
+ """Name of the remote repository."""
+
+ def _get_most_suitable_remote(self, remote_instructions):
+ remotes = self.all_remote_names
+ if len(remotes) == 1:
+ return remotes[0]
+
+ if self.default_remote_name in remotes:
+ return self.default_remote_name
+
+ first_remote = remotes[0]
+ logger.warning(
+ f"Unable to determine which remote repository to use between: {remotes}. "
+ f'Arbitrarily using the first one "{first_remote}". Please set an '
+ f"`{self.default_remote_name}` remote if the arbitrarily selected one "
+ f"is not right. To do so: {remote_instructions}"
+ )
+
+ return first_remote
+
+ @abstractproperty
+ def default_branch(self):
+ """Name of the default branch."""
+
+ @abstractmethod
+ def get_url(self, remote=None):
+ """Get URL of the upstream repository."""
+
+ @abstractmethod
+ def get_commit_message(self, revision=None):
+ """Commit message of specified revision or current commit."""
+
+ @abstractmethod
+ def get_changed_files(self, diff_filter, mode="unstaged", rev=None, base_rev=None):
+ """Return a list of files that are changed in:
+ * either this repository's working copy,
+ * or at a given revision (``rev``)
+ * or between 2 revisions (``base_rev`` and ``rev``)
+
+ ``diff_filter`` controls which kinds of modifications are returned.
+ It is a string which may only contain the following characters:
+
+ A - Include files that were added
+ D - Include files that were deleted
+ M - Include files that were modified
+
+ By default, all three will be included.
+
+ ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an
+ effect on git. Defaults to 'unstaged'.
+
+ ``rev`` is a specifier for which changesets to consider for
+ changes. The exact meaning depends on the vcs system being used.
+
+ ``base_rev`` specifies the range of changesets. This parameter cannot
+ be used without ``rev``. The range includes ``rev`` but excludes
+ ``base_rev``.
+ """
+
+ @abstractmethod
+ def get_outgoing_files(self, diff_filter, upstream):
+ """Return a list of changed files compared to upstream.
+
+ ``diff_filter`` works the same as `get_changed_files`.
+ ``upstream`` is a remote ref to compare against. If unspecified,
+ this will be determined automatically. If there is no remote ref,
+ a MissingUpstreamRepo exception will be raised.
+ """
+
+ @abstractmethod
+ def working_directory_clean(self, untracked=False, ignored=False):
+ """Determine if the working directory is free of modifications.
+
+ Returns True if the working directory does not have any file
+ modifications. False otherwise.
+
+ By default, untracked and ignored files are not considered. If
+ ``untracked`` or ``ignored`` are set, they influence the clean check
+ to factor these file classes into consideration.
+ """
+
+ @abstractmethod
+ def update(self, ref):
+ """Update the working directory to the specified reference."""
+
+ @abstractmethod
+ def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+ """Find the latest revision that is common to both the given
+ ``head_rev`` and ``base_ref_or_rev``.
+
+ If no common revision exists, ``Repository.NULL_REVISION`` will
+ be returned."""
+
+ @abstractmethod
+ def does_revision_exist_locally(self, revision):
+ """Check whether this revision exists in the local repository.
+
+ If this function returns an unexpected value, then make sure
+ the revision was fetched from the remote repository."""
+
+
+class HgRepository(Repository):
+ tool = "hg"
+ default_remote_name = "default"
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._env["HGPLAIN"] = "1"
+
+ @property
+ def head_rev(self):
+ return self.run("log", "-r", ".", "-T", "{node}").strip()
+
+ @property
+ def base_rev(self):
+ return self.run("log", "-r", "last(ancestors(.) and public())", "-T", "{node}")
+
+ @property
+ def branch(self):
+ bookmarks_fn = os.path.join(self.path, ".hg", "bookmarks.current")
+ if os.path.exists(bookmarks_fn):
+ with open(bookmarks_fn) as f:
+ bookmark = f.read()
+ return bookmark or None
+
+ return None
+
+ @property
+ def all_remote_names(self):
+ remotes = self.run("paths", "--quiet").splitlines()
+ if not remotes:
+ raise RuntimeError("No remotes defined")
+ return remotes
+
+ @property
+ def remote_name(self):
+ return self._get_most_suitable_remote(
+ "Edit .hg/hgrc and add:\n\n[paths]\ndefault = $URL",
+ )
+
+ @property
+ def default_branch(self):
+ # Mercurial recommends keeping "default"
+ # https://www.mercurial-scm.org/wiki/StandardBranching#Don.27t_use_a_name_other_than_default_for_your_main_development_branch
+ return "default"
+
+ def get_url(self, remote="default"):
+ return self.run("path", "-T", "{url}", remote).strip()
+
+ def get_commit_message(self, revision=None):
+ revision = revision or "."
+ return self.run("log", "-r", revision, "-T", "{desc}")
+
+ def _format_diff_filter(self, diff_filter, for_status=False):
+ df = diff_filter.lower()
+ assert all(f in self._valid_diff_filter for f in df)
+
+ # When looking at the changes in the working directory, the hg status
+ # command uses 'd' for files that have been deleted with a non-hg
+ # command, and 'r' for files that have been `hg rm`ed. Use both.
+ return df.replace("d", "dr") if for_status else df
+
+ def _files_template(self, diff_filter):
+ template = ""
+ df = self._format_diff_filter(diff_filter)
+ if "a" in df:
+ template += "{file_adds % '{file}\\n'}"
+ if "d" in df:
+ template += "{file_dels % '{file}\\n'}"
+ if "m" in df:
+ template += "{file_mods % '{file}\\n'}"
+ return template
+
+ def get_changed_files(
+ self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
+ ):
+ if rev is None:
+ if base_rev is not None:
+ raise ValueError("Cannot specify `base_rev` without `rev`")
+ # Use --no-status to print just the filename.
+ df = self._format_diff_filter(diff_filter, for_status=True)
+ return self.run("status", "--no-status", f"-{df}").splitlines()
+ else:
+ template = self._files_template(diff_filter)
+ revision_argument = rev if base_rev is None else f"{base_rev}~-1::{rev}"
+ return self.run("log", "-r", revision_argument, "-T", template).splitlines()
+
+ def get_outgoing_files(self, diff_filter="ADM", upstream=None):
+ template = self._files_template(diff_filter)
+
+ if not upstream:
+ return self.run(
+ "log", "-r", "draft() and ancestors(.)", "--template", template
+ ).split()
+
+ return self.run(
+ "outgoing",
+ "-r",
+ ".",
+ "--quiet",
+ "--template",
+ template,
+ upstream,
+ return_codes=(1,),
+ ).split()
+
+ def working_directory_clean(self, untracked=False, ignored=False):
+ args = ["status", "--modified", "--added", "--removed", "--deleted"]
+ if untracked:
+ args.append("--unknown")
+ if ignored:
+ args.append("--ignored")
+
+ # If output is empty, there are no entries of requested status, which
+ # means we are clean.
+ return not len(self.run(*args).strip())
+
+ def update(self, ref):
+ return self.run("update", "--check", ref)
+
+ def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+ ancestor = self.run(
+ "log",
+ "-r",
+ f"last(ancestors('{base_ref_or_rev}') and ancestors('{head_rev}'))",
+ "--template",
+ "{node}",
+ ).strip()
+ return ancestor or self.NULL_REVISION
+
+ def does_revision_exist_locally(self, revision):
+ try:
+ return bool(self.run("log", "-r", revision).strip())
+ except subprocess.CalledProcessError as e:
+ # Error code 255 comes with the message:
+ # "abort: unknown revision $REVISION"
+ if e.returncode == 255:
+ return False
+ raise
+
+
+class GitRepository(Repository):
+ tool = "git"
+ default_remote_name = "origin"
+
+ _LS_REMOTE_PATTERN = re.compile(r"ref:\s+refs/heads/(?P<branch_name>\S+)\s+HEAD")
+
+ @property
+ def head_rev(self):
+ return self.run("rev-parse", "--verify", "HEAD").strip()
+
+ @property
+ def base_rev(self):
+ refs = self.run(
+ "rev-list", "HEAD", "--topo-order", "--boundary", "--not", "--remotes"
+ ).splitlines()
+ if refs:
+ return refs[-1][1:] # boundary starts with a prefix `-`
+ return self.head_rev
+
+ @property
+ def branch(self):
+ return self.run("branch", "--show-current").strip() or None
+
+ @property
+ def all_remote_names(self):
+ remotes = self.run("remote").splitlines()
+ if not remotes:
+ raise RuntimeError("No remotes defined")
+ return remotes
+
+ @property
+ def remote_name(self):
+ try:
+ remote_branch_name = self.run(
+ "rev-parse",
+ "--verify",
+ "--abbrev-ref",
+ "--symbolic-full-name",
+ "@{u}",
+ stderr=subprocess.PIPE,
+ ).strip()
+ return remote_branch_name.split("/")[0]
+ except subprocess.CalledProcessError as e:
+ # Error code 128 comes with the message:
+ # "fatal: no upstream configured for branch $BRANCH"
+ if e.returncode != 128:
+ print(e.stderr)
+ raise
+
+ return self._get_most_suitable_remote("`git remote add origin $URL`")
+
+ @property
+ def default_branch(self):
+ try:
+ # this one works if the current repo was cloned from an existing
+ # repo elsewhere
+ return self._get_default_branch_from_cloned_metadata()
+ except (subprocess.CalledProcessError, RuntimeError):
+ pass
+
+ try:
+ # This call works if you have (network) access to the repo
+ return self._get_default_branch_from_remote_query()
+ except (subprocess.CalledProcessError, RuntimeError):
+ pass
+
+ # this one is the last resort in case the remote is not accessible and
+ # the local repo is where `git init` was made
+ return self._guess_default_branch()
+
+ def _get_default_branch_from_remote_query(self):
+ # This function requires network access to the repo
+ remote_name = self.remote_name
+ output = self.run("ls-remote", "--symref", remote_name, "HEAD")
+ matches = self._LS_REMOTE_PATTERN.search(output)
+ if not matches:
+ raise RuntimeError(
+ f'Could not find the default branch of remote repository "{remote_name}". '
+ "Got: {output}"
+ )
+
+ branch_name = matches.group("branch_name")
+ return f"{remote_name}/{branch_name}"
+
+ def _get_default_branch_from_cloned_metadata(self):
+ return self.run("rev-parse", "--abbrev-ref", f"{self.remote_name}/HEAD").strip()
+
+ def _guess_default_branch(self):
+ branches = [
+ line.strip()
+ for line in self.run(
+ "branch", "--all", "--no-color", "--format=%(refname)"
+ ).splitlines()
+ for candidate_branch in ("main", "master", "branches/default/tip")
+ if line.strip().endswith(candidate_branch)
+ ]
+
+ if len(branches) == 1:
+ return branches[0]
+
+ raise RuntimeError(f"Unable to find default branch. Got: {branches}")
+
+ def get_url(self, remote="origin"):
+ return self.run("remote", "get-url", remote).strip()
+
+ def get_commit_message(self, revision=None):
+ revision = revision or "HEAD"
+ return self.run("log", "-n1", "--format=%B", revision)
+
+ def get_changed_files(
+ self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
+ ):
+ assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
+
+ if rev is None:
+ if base_rev is not None:
+ raise ValueError("Cannot specify `base_rev` without `rev`")
+ cmd = ["diff"]
+ if mode == "staged":
+ cmd.append("--cached")
+ elif mode == "all":
+ cmd.append("HEAD")
+ else:
+ revision_argument = (
+ f"{rev}~1..{rev}" if base_rev is None else f"{base_rev}..{rev}"
+ )
+ cmd = ["log", "--format=format:", revision_argument]
+
+ cmd.append("--name-only")
+ cmd.append("--diff-filter=" + diff_filter.upper())
+
+ files = self.run(*cmd).splitlines()
+ return [f for f in files if f]
+
+ def get_outgoing_files(self, diff_filter="ADM", upstream=None):
+ assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
+
+ not_condition = upstream if upstream else "--remotes"
+
+ files = self.run(
+ "log",
+ "--name-only",
+ f"--diff-filter={diff_filter.upper()}",
+ "--oneline",
+ "--pretty=format:",
+ "HEAD",
+ "--not",
+ not_condition,
+ ).splitlines()
+ return [f for f in files if f]
+
+ def working_directory_clean(self, untracked=False, ignored=False):
+ args = ["status", "--porcelain"]
+
+ # Even in --porcelain mode, behavior is affected by the
+ # ``status.showUntrackedFiles`` option, which means we need to be
+ # explicit about how to treat untracked files.
+ if untracked:
+ args.append("--untracked-files=all")
+ else:
+ args.append("--untracked-files=no")
+
+ if ignored:
+ args.append("--ignored")
+
+ # If output is empty, there are no entries of requested status, which
+ # means we are clean.
+ return not len(self.run(*args).strip())
+
+ def update(self, ref):
+ self.run("checkout", ref)
+
+ def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+ try:
+ return self.run("merge-base", base_ref_or_rev, head_rev).strip()
+ except subprocess.CalledProcessError:
+ return self.NULL_REVISION
+
+ def does_revision_exist_locally(self, revision):
+ try:
+ return self.run("cat-file", "-t", revision).strip() == "commit"
+ except subprocess.CalledProcessError as e:
+ # Error code 128 comes with the message:
+ # "git cat-file: could not get object info"
+ if e.returncode == 128:
+ return False
+ raise
+
+
+def get_repository(path):
+ """Get a repository object for the repository at `path`.
+ If `path` is not a known VCS repository, raise an exception.
+ """
+ for path in ancestors(path):
+ if os.path.isdir(os.path.join(path, ".hg")):
+ return HgRepository(path)
+ elif os.path.exists(os.path.join(path, ".git")):
+ return GitRepository(path)
+
+ raise RuntimeError("Current directory is neither a git or hg repository")
+
+
+def find_hg_revision_push_info(repository, revision):
+ """Given the parameters for this action and a revision, find the
+ pushlog_id of the revision."""
+ pushlog_url = PUSHLOG_TMPL.format(repository, revision)
+
+ def query_pushlog(url):
+ r = requests.get(pushlog_url, timeout=60)
+ r.raise_for_status()
+ return r
+
+ r = retry(
+ query_pushlog,
+ args=(pushlog_url,),
+ attempts=5,
+ sleeptime=10,
+ )
+ pushes = r.json()["pushes"]
+ if len(pushes) != 1:
+ raise RuntimeError(
+ "Unable to find a single pushlog_id for {} revision {}: {}".format(
+ repository, revision, pushes
+ )
+ )
+ pushid = list(pushes.keys())[0]
+ return {
+ "pushdate": pushes[pushid]["date"],
+ "pushid": pushid,
+ "user": pushes[pushid]["user"],
+ }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py
new file mode 100644
index 0000000000..e6705c16cf
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py
@@ -0,0 +1,283 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import sys
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Callable, Dict, List, Union
+
+from taskgraph.config import GraphConfig
+from taskgraph.parameters import Parameters
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.attributes import match_run_on_projects
+from taskgraph.util.treeherder import join_symbol
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class Verification(ABC):
+ func: Callable
+
+ @abstractmethod
+ def verify(self, **kwargs) -> None:
+ pass
+
+
+@dataclass(frozen=True)
+class InitialVerification(Verification):
+ """Verification that doesn't depend on any generation state."""
+
+ def verify(self):
+ self.func()
+
+
+@dataclass(frozen=True)
+class GraphVerification(Verification):
+ """Verification for a TaskGraph object."""
+
+ run_on_projects: Union[List, None] = field(default=None)
+
+ def verify(
+ self, graph: TaskGraph, graph_config: GraphConfig, parameters: Parameters
+ ):
+ if self.run_on_projects and not match_run_on_projects(
+ parameters["project"], self.run_on_projects
+ ):
+ return
+
+ scratch_pad = {}
+ graph.for_each_task(
+ self.func,
+ scratch_pad=scratch_pad,
+ graph_config=graph_config,
+ parameters=parameters,
+ )
+ self.func(
+ None,
+ graph,
+ scratch_pad=scratch_pad,
+ graph_config=graph_config,
+ parameters=parameters,
+ )
+
+
+@dataclass(frozen=True)
+class ParametersVerification(Verification):
+ """Verification for a set of parameters."""
+
+ def verify(self, parameters: Parameters):
+ self.func(parameters)
+
+
+@dataclass(frozen=True)
+class KindsVerification(Verification):
+ """Verification for kinds."""
+
+ def verify(self, kinds: dict):
+ self.func(kinds)
+
+
+@dataclass(frozen=True)
+class VerificationSequence:
+ """
+ Container for a sequence of verifications over a TaskGraph. Each
+ verification is represented as a callable taking (task, taskgraph,
+ scratch_pad), called for each task in the taskgraph, and one more
+ time with no task but with the taskgraph and the same scratch_pad
+ that was passed for each task.
+ """
+
+ _verifications: Dict = field(default_factory=dict)
+ _verification_types = {
+ "graph": GraphVerification,
+ "initial": InitialVerification,
+ "kinds": KindsVerification,
+ "parameters": ParametersVerification,
+ }
+
+ def __call__(self, name, *args, **kwargs):
+ for verification in self._verifications.get(name, []):
+ verification.verify(*args, **kwargs)
+
+ def add(self, name, **kwargs):
+ cls = self._verification_types.get(name, GraphVerification)
+
+ def wrap(func):
+ self._verifications.setdefault(name, []).append(cls(func, **kwargs))
+ return func
+
+ return wrap
+
+
+verifications = VerificationSequence()
+
+
+@verifications.add("full_task_graph")
+def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, parameters):
+ """
+ This function verifies that tuple
+ (collection.keys(), machine.platform, groupSymbol, symbol) is unique
+ for a target task graph.
+ """
+ if task is None:
+ return
+ task_dict = task.task
+ if "extra" in task_dict:
+ extra = task_dict["extra"]
+ if "treeherder" in extra:
+ treeherder = extra["treeherder"]
+
+ collection_keys = tuple(sorted(treeherder.get("collection", {}).keys()))
+ if len(collection_keys) != 1:
+ raise Exception(
+ "Task {} can't be in multiple treeherder collections "
+ "(the part of the platform after `/`): {}".format(
+ task.label, collection_keys
+ )
+ )
+ platform = treeherder.get("machine", {}).get("platform")
+ group_symbol = treeherder.get("groupSymbol")
+ symbol = treeherder.get("symbol")
+
+ key = (platform, collection_keys[0], group_symbol, symbol)
+ if key in scratch_pad:
+ raise Exception(
+ "Duplicate treeherder platform and symbol in tasks "
+ "`{}`and `{}`: {} {}".format(
+ task.label,
+ scratch_pad[key],
+ f"{platform}/{collection_keys[0]}",
+ join_symbol(group_symbol, symbol),
+ )
+ )
+ else:
+ scratch_pad[key] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_trust_domain_v2_routes(
+ task, taskgraph, scratch_pad, graph_config, parameters
+):
+ """
+ This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes.
+ """
+ if task is None:
+ return
+ route_prefix = "index.{}.v2".format(graph_config["trust-domain"])
+ task_dict = task.task
+ routes = task_dict.get("routes", [])
+
+ for route in routes:
+ if route.startswith(route_prefix):
+ if route in scratch_pad:
+ raise Exception(
+ "conflict between {}:{} for route: {}".format(
+ task.label, scratch_pad[route], route
+ )
+ )
+ else:
+ scratch_pad[route] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_routes_notification_filters(
+ task, taskgraph, scratch_pad, graph_config, parameters
+):
+ """
+ This function ensures that only understood filters for notifications are
+ specified.
+
+ See: https://docs.taskcluster.net/reference/core/taskcluster-notify/docs/usage
+ """
+ if task is None:
+ return
+ route_prefix = "notify."
+ valid_filters = ("on-any", "on-completed", "on-failed", "on-exception")
+ task_dict = task.task
+ routes = task_dict.get("routes", [])
+
+ for route in routes:
+ if route.startswith(route_prefix):
+ # Get the filter of the route
+ route_filter = route.split(".")[-1]
+ if route_filter not in valid_filters:
+ raise Exception(
+ "{} has invalid notification filter ({})".format(
+ task.label, route_filter
+ )
+ )
+
+
+@verifications.add("full_task_graph")
+def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, parameters):
+ tiers = scratch_pad
+ if task is not None:
+ tiers[task.label] = (
+ task.task.get("extra", {}).get("treeherder", {}).get("tier", sys.maxsize)
+ )
+ else:
+
+ def printable_tier(tier):
+ if tier == sys.maxsize:
+ return "unknown"
+ return tier
+
+ for task in taskgraph.tasks.values():
+ tier = tiers[task.label]
+ for d in task.dependencies.values():
+ if taskgraph[d].task.get("workerType") == "always-optimized":
+ continue
+ if "dummy" in taskgraph[d].kind:
+ continue
+ if tier < tiers[d]:
+ raise Exception(
+ "{} (tier {}) cannot depend on {} (tier {})".format(
+ task.label,
+ printable_tier(tier),
+ d,
+ printable_tier(tiers[d]),
+ )
+ )
+
+
+@verifications.add("full_task_graph")
+def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameters):
+ """
+ This function verifies that toolchain aliases are not reused.
+ """
+ if task is None:
+ return
+ attributes = task.attributes
+ if "toolchain-alias" in attributes:
+ keys = attributes["toolchain-alias"]
+ if not keys:
+ keys = []
+ elif isinstance(keys, str):
+ keys = [keys]
+ for key in keys:
+ if key in scratch_pad:
+ raise Exception(
+ "Duplicate toolchain-alias in tasks "
+ "`{}`and `{}`: {}".format(
+ task.label,
+ scratch_pad[key],
+ key,
+ )
+ )
+ else:
+ scratch_pad[key] = task.label
+
+
+@verifications.add("optimized_task_graph")
+def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters):
+ """
+ This function ensures that always-optimized tasks have been optimized.
+ """
+ if task is None:
+ return
+ if task.task.get("workerType") == "always-optimized":
+ raise Exception(f"Could not optimize the task {task.label!r}")
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py
new file mode 100644
index 0000000000..da39654d6b
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py
@@ -0,0 +1,78 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from dataclasses import dataclass
+
+from .keyed_by import evaluate_keyed_by
+from .memoize import memoize
+
+
+@dataclass
+class _BuiltinWorkerType:
+ provisioner: str
+ worker_type: str
+
+ @property
+ def implementation(self):
+ """
+ Since the list of built-in worker-types is small and fixed, we can get
+ away with punning the implementation name (in
+ `taskgraph.transforms.task`) and the worker_type.
+ """
+ return self.worker_type
+
+
+_BUILTIN_TYPES = {
+ "always-optimized": _BuiltinWorkerType("invalid", "always-optimized"),
+ "succeed": _BuiltinWorkerType("built-in", "succeed"),
+}
+
+
+@memoize
+def worker_type_implementation(graph_config, worker_type):
+ """Get the worker implementation and OS for the given workerType, where the
+ OS represents the host system, not the target OS, in the case of
+ cross-compiles."""
+ if worker_type in _BUILTIN_TYPES:
+ # For the built-in worker-types, we use an `implementation that matches
+ # the worker-type.
+ return _BUILTIN_TYPES[worker_type].implementation, None
+ worker_config = evaluate_keyed_by(
+ {"by-worker-type": graph_config["workers"]["aliases"]},
+ "worker-types.yml",
+ {"worker-type": worker_type},
+ )
+ return worker_config["implementation"], worker_config.get("os")
+
+
+@memoize
+def get_worker_type(graph_config, alias, level):
+ """
+ Get the worker type based, evaluating aliases from the graph config.
+ """
+ if alias in _BUILTIN_TYPES:
+ builtin_type = _BUILTIN_TYPES[alias]
+ return builtin_type.provisioner, builtin_type.worker_type
+
+ level = str(level)
+ worker_config = evaluate_keyed_by(
+ {"by-alias": graph_config["workers"]["aliases"]},
+ "graph_config.workers.aliases",
+ {"alias": alias},
+ )
+ provisioner = evaluate_keyed_by(
+ worker_config["provisioner"],
+ alias,
+ {"level": level},
+ ).format(
+ **{"alias": alias, "level": level, "trust-domain": graph_config["trust-domain"]}
+ )
+ worker_type = evaluate_keyed_by(
+ worker_config["worker-type"],
+ alias,
+ {"level": level},
+ ).format(
+ **{"alias": alias, "level": level, "trust-domain": graph_config["trust-domain"]}
+ )
+ return provisioner, worker_type
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py
new file mode 100644
index 0000000000..141c7a16d3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import os
+
+from yaml.loader import SafeLoader
+
+
+class UnicodeLoader(SafeLoader):
+ def construct_yaml_str(self, node):
+ return self.construct_scalar(node)
+
+
+UnicodeLoader.add_constructor("tag:yaml.org,2002:str", UnicodeLoader.construct_yaml_str)
+
+
+def load_stream(stream):
+ """
+ Parse the first YAML document in a stream
+ and produce the corresponding Python object.
+ """
+ loader = UnicodeLoader(stream)
+ try:
+ return loader.get_single_data()
+ finally:
+ loader.dispose()
+
+
+def load_yaml(*parts):
+ """Convenience function to load a YAML file in the given path. This is
+ useful for loading kind configuration files from the kind path."""
+ filename = os.path.join(*parts)
+ with open(filename, "rb") as f:
+ return load_stream(f)