summaryrefslogtreecommitdiffstats
path: root/third_party/python/taskcluster_taskgraph/taskgraph
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/taskcluster_taskgraph/taskgraph')
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/__init__.py15
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/actions/__init__.py16
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py64
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py42
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py61
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py352
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py301
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py282
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/config.py136
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/create.py132
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/decision.py377
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/docker.py215
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py91
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/filter_tasks.py34
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/generator.py449
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/graph.py134
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/loader/__init__.py0
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/loader/transform.py58
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/main.py756
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/morph.py271
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/optimize/__init__.py8
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py551
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py65
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/parameters.py369
-rwxr-xr-xthird_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content899
-rwxr-xr-xthird_party/python/taskcluster_taskgraph/taskgraph/run-task/hgrc33
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py826
-rwxr-xr-xthird_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task1307
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py107
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/task.py84
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/taskgraph.py72
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py0
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py157
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py90
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py23
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py213
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py335
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py438
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py196
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py37
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py240
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py174
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/release_notifications.py100
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py1288
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py0
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py86
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py84
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py86
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py79
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py342
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py54
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py97
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py40
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py97
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/path.py172
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py52
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py22
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py260
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py40
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py373
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py54
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py50
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/time.py115
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py64
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py539
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py283
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py75
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py36
68 files changed, 14498 insertions, 0 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py
new file mode 100644
index 0000000000..9aef5a8b7e
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py
@@ -0,0 +1,15 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# Maximum number of dependencies a single task can have
+# https://docs.taskcluster.net/reference/platform/taskcluster-queue/references/api#createTask
+# specifies 100, but we also optionally add the decision task id as a dep in
+# taskgraph.create, so let's set this to 99.
+MAX_DEPENDENCIES = 99
+
+# Enable fast task generation for local debugging
+# This is normally switched on via the --fast/-F flag to `mach taskgraph`
+# Currently this skips toolchain task optimizations and schema validation
+fast = False
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/__init__.py
new file mode 100644
index 0000000000..590a957282
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/__init__.py
@@ -0,0 +1,16 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from .registry import (
+ register_callback_action,
+ render_actions_json,
+ trigger_action_callback,
+)
+
+__all__ = [
+ "register_callback_action",
+ "render_actions_json",
+ "trigger_action_callback",
+]
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py
new file mode 100644
index 0000000000..fc10668566
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py
@@ -0,0 +1,64 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from taskgraph.actions.registry import register_callback_action
+from taskgraph.actions.util import (
+ combine_task_graph_files,
+ create_tasks,
+ fetch_graph_and_labels,
+)
+
+
+@register_callback_action(
+ name="add-new-jobs",
+ title="Add new jobs",
+ generic=True,
+ symbol="add-new",
+ description="Add new jobs using task labels.",
+ order=100,
+ context=[],
+ schema={
+ "type": "object",
+ "properties": {
+ "tasks": {
+ "type": "array",
+ "description": "An array of task labels",
+ "items": {"type": "string"},
+ },
+ "times": {
+ "type": "integer",
+ "default": 1,
+ "minimum": 1,
+ "maximum": 100,
+ "title": "Times",
+ "description": "How many times to run each task.",
+ },
+ },
+ },
+)
+def add_new_jobs_action(parameters, graph_config, input, task_group_id, task_id):
+ decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+ parameters, graph_config
+ )
+
+ to_run = []
+ for elem in input["tasks"]:
+ if elem in full_task_graph.tasks:
+ to_run.append(elem)
+ else:
+ raise Exception(f"{elem} was not found in the task-graph")
+
+ times = input.get("times", 1)
+ for i in range(times):
+ create_tasks(
+ graph_config,
+ to_run,
+ full_task_graph,
+ label_to_taskid,
+ parameters,
+ decision_task_id,
+ i,
+ )
+ combine_task_graph_files(list(range(times)))
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py
new file mode 100644
index 0000000000..03788c6538
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py
@@ -0,0 +1,42 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+import requests
+
+from taskgraph.util.taskcluster import cancel_task
+
+from .registry import register_callback_action
+
+logger = logging.getLogger(__name__)
+
+
+@register_callback_action(
+ title="Cancel Task",
+ name="cancel",
+ symbol="cx",
+ generic=True,
+ description=("Cancel the given task"),
+ order=350,
+ context=[{}],
+)
+def cancel_action(parameters, graph_config, input, task_group_id, task_id):
+ # Note that this is limited by the scopes afforded to generic actions to
+ # only cancel tasks with the level-specific schedulerId.
+ try:
+ cancel_task(task_id, use_proxy=True)
+ except requests.HTTPError as e:
+ if e.response.status_code == 409:
+ # A 409 response indicates that this task is past its deadline. It
+ # cannot be cancelled at this time, but it's also not running
+ # anymore, so we can ignore this error.
+ logger.info(
+ 'Task "{}" is past its deadline and cannot be cancelled.'.format(
+ task_id
+ )
+ )
+ return
+ raise
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py
new file mode 100644
index 0000000000..b2636f46a3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py
@@ -0,0 +1,61 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import concurrent.futures as futures
+import logging
+import os
+
+import requests
+
+from taskgraph.util.taskcluster import (
+ CONCURRENCY,
+ cancel_task,
+ list_task_group_incomplete_tasks,
+)
+
+from .registry import register_callback_action
+
+logger = logging.getLogger(__name__)
+
+
+@register_callback_action(
+ title="Cancel All",
+ name="cancel-all",
+ generic=True,
+ symbol="cAll",
+ description=(
+ "Cancel all running and pending tasks created by the decision task "
+ "this action task is associated with."
+ ),
+ order=400,
+ context=[],
+)
+def cancel_all_action(parameters, graph_config, input, task_group_id, task_id):
+ def do_cancel_task(task_id):
+ logger.info(f"Cancelling task {task_id}")
+ try:
+ cancel_task(task_id, use_proxy=True)
+ except requests.HTTPError as e:
+ if e.response.status_code == 409:
+ # A 409 response indicates that this task is past its deadline. It
+ # cannot be cancelled at this time, but it's also not running
+ # anymore, so we can ignore this error.
+ logger.info(
+ "Task {} is past its deadline and cannot be cancelled.".format(
+ task_id
+ )
+ )
+ return
+ raise
+
+ own_task_id = os.environ.get("TASK_ID", "")
+ to_cancel = [
+ t for t in list_task_group_incomplete_tasks(task_group_id) if t != own_task_id
+ ]
+ logger.info(f"Cancelling {len(to_cancel)} tasks")
+ with futures.ThreadPoolExecutor(CONCURRENCY) as e:
+ cancel_futs = [e.submit(do_cancel_task, t) for t in to_cancel]
+ for f in futures.as_completed(cancel_futs):
+ f.result()
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py
new file mode 100644
index 0000000000..1e909d30c7
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py
@@ -0,0 +1,352 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+from collections import namedtuple
+from types import FunctionType
+
+from mozilla_repo_urls import parse
+
+from taskgraph import create
+from taskgraph.config import load_graph_config
+from taskgraph.parameters import Parameters
+from taskgraph.util import hash, taskcluster, yaml
+from taskgraph.util.memoize import memoize
+from taskgraph.util.python_path import import_sibling_modules
+
+actions = []
+callbacks = {}
+
+Action = namedtuple("Action", ["order", "cb_name", "generic", "action_builder"])
+
+
+def is_json(data):
+ """Return ``True``, if ``data`` is a JSON serializable data structure."""
+ try:
+ json.dumps(data)
+ except ValueError:
+ return False
+ return True
+
+
+@memoize
+def read_taskcluster_yml(filename):
+ """Load and parse .taskcluster.yml, memoized to save some time"""
+ return yaml.load_yaml(filename)
+
+
+@memoize
+def hash_taskcluster_yml(filename):
+ """
+ Generate a hash of the given .taskcluster.yml. This is the first 10 digits
+ of the sha256 of the file's content, and is used by administrative scripts
+ to create a hook based on this content.
+ """
+ return hash.hash_path(filename)[:10]
+
+
+def register_callback_action(
+ name,
+ title,
+ symbol,
+ description,
+ order=10000,
+ context=[],
+ available=lambda parameters: True,
+ schema=None,
+ generic=True,
+ cb_name=None,
+):
+ """
+ Register an action callback that can be triggered from supporting
+ user interfaces, such as Treeherder.
+
+ This function is to be used as a decorator for a callback that takes
+ parameters as follows:
+
+ ``parameters``:
+ Decision task :class:`parameters <taskgraph.parameters.Parameters>`.
+ ``input``:
+ Input matching specified JSON schema, ``None`` if no ``schema``
+ parameter is given to ``register_callback_action``.
+ ``task_group_id``:
+ The id of the task-group this was triggered for.
+ ``task_id`` and `task``:
+ task identifier and task definition for task the action was triggered
+ for, ``None`` if no ``context`` parameters was given to
+ ``register_callback_action``.
+
+ Args:
+ name (str):
+ An identifier for this action, used by UIs to find the action.
+ title (str):
+ A human readable title for the action to be used as label on a button
+ or text on a link for triggering the action.
+ symbol (str):
+ Treeherder symbol for the action callback, this is the symbol that the
+ task calling your callback will be displayed as. This is usually 1-3
+ letters abbreviating the action title.
+ description (str):
+ A human readable description of the action in **markdown**.
+ This will be display as tooltip and in dialog window when the action
+ is triggered. This is a good place to describe how to use the action.
+ order (int):
+ Order of the action in menus, this is relative to the ``order`` of
+ other actions declared.
+ context (list of dict):
+ List of tag-sets specifying which tasks the action is can take as input.
+ If no tag-sets is specified as input the action is related to the
+ entire task-group, and won't be triggered with a given task.
+
+ Otherwise, if ``context = [{'k': 'b', 'p': 'l'}, {'k': 't'}]`` will only
+ be displayed in the context menu for tasks that has
+ ``task.tags.k == 'b' && task.tags.p = 'l'`` or ``task.tags.k = 't'``.
+ Essentially, this allows filtering on ``task.tags``.
+
+ If this is a function, it is given the decision parameters and must return
+ a value of the form described above.
+ available (function):
+ An optional function that given decision parameters decides if the
+ action is available. Defaults to a function that always returns ``True``.
+ schema (dict):
+ JSON schema specifying input accepted by the action.
+ This is optional and can be left ``null`` if no input is taken.
+ generic (bool)
+ Whether this is a generic action or has its own permissions.
+ cb_name (str):
+ The name under which this function should be registered, defaulting to
+ `name`. This is used to generation actionPerm for non-generic hook
+ actions, and thus appears in ci-configuration and various role and hook
+ names. Unlike `name`, which can appear multiple times, cb_name must be
+ unique among all registered callbacks.
+
+ Returns:
+ function: Decorator to be used for the callback function.
+ """
+ mem = {"registered": False} # workaround nonlocal missing in 2.x
+
+ assert isinstance(title, str), "title must be a string"
+ assert isinstance(description, str), "description must be a string"
+ title = title.strip()
+ description = description.strip()
+
+ # ensure that context is callable
+ if not callable(context):
+ context_value = context
+ context = lambda params: context_value # noqa
+
+ def register_callback(cb, cb_name=cb_name):
+ assert isinstance(name, str), "name must be a string"
+ assert isinstance(order, int), "order must be an integer"
+ assert callable(schema) or is_json(
+ schema
+ ), "schema must be a JSON compatible object"
+ assert isinstance(cb, FunctionType), "callback must be a function"
+ # Allow for json-e > 25 chars in the symbol.
+ if "$" not in symbol:
+ assert 1 <= len(symbol) <= 25, "symbol must be between 1 and 25 characters"
+ assert isinstance(symbol, str), "symbol must be a string"
+
+ assert not mem[
+ "registered"
+ ], "register_callback_action must be used as decorator"
+ if not cb_name:
+ cb_name = name
+ assert cb_name not in callbacks, "callback name {} is not unique".format(
+ cb_name
+ )
+
+ def action_builder(parameters, graph_config, decision_task_id):
+ if not available(parameters):
+ return None
+
+ actionPerm = "generic" if generic else cb_name
+
+ # gather up the common decision-task-supplied data for this action
+ repo_param = "head_repository"
+ repository = {
+ "url": parameters[repo_param],
+ "project": parameters["project"],
+ "level": parameters["level"],
+ }
+
+ revision = parameters["head_rev"]
+ push = {
+ "owner": "mozilla-taskcluster-maintenance@mozilla.com",
+ "pushlog_id": parameters["pushlog_id"],
+ "revision": revision,
+ }
+ branch = parameters.get("head_ref")
+ if branch:
+ push["branch"] = branch
+
+ action = {
+ "name": name,
+ "title": title,
+ "description": description,
+ # target taskGroupId (the task group this decision task is creating)
+ "taskGroupId": decision_task_id,
+ "cb_name": cb_name,
+ "symbol": symbol,
+ }
+
+ rv = {
+ "name": name,
+ "title": title,
+ "description": description,
+ "context": context(parameters),
+ }
+ if schema:
+ rv["schema"] = (
+ schema(graph_config=graph_config) if callable(schema) else schema
+ )
+
+ trustDomain = graph_config["trust-domain"]
+ level = parameters["level"]
+ tcyml_hash = hash_taskcluster_yml(graph_config.taskcluster_yml)
+
+ # the tcyml_hash is prefixed with `/` in the hookId, so users will be granted
+ # hooks:trigger-hook:project-gecko/in-tree-action-3-myaction/*; if another
+ # action was named `myaction/release`, then the `*` in the scope would also
+ # match that action. To prevent such an accident, we prohibit `/` in hook
+ # names.
+ if "/" in actionPerm:
+ raise Exception("`/` is not allowed in action names; use `-`")
+
+ rv.update(
+ {
+ "kind": "hook",
+ "hookGroupId": f"project-{trustDomain}",
+ "hookId": "in-tree-action-{}-{}/{}".format(
+ level, actionPerm, tcyml_hash
+ ),
+ "hookPayload": {
+ # provide the decision-task parameters as context for triggerHook
+ "decision": {
+ "action": action,
+ "repository": repository,
+ "push": push,
+ },
+ # and pass everything else through from our own context
+ "user": {
+ "input": {"$eval": "input"},
+ "taskId": {"$eval": "taskId"}, # target taskId (or null)
+ "taskGroupId": {
+ "$eval": "taskGroupId"
+ }, # target task group
+ },
+ },
+ "extra": {
+ "actionPerm": actionPerm,
+ },
+ }
+ )
+
+ return rv
+
+ actions.append(Action(order, cb_name, generic, action_builder))
+
+ mem["registered"] = True
+ callbacks[cb_name] = cb
+ return cb
+
+ return register_callback
+
+
+def render_actions_json(parameters, graph_config, decision_task_id):
+ """
+ Render JSON object for the ``public/actions.json`` artifact.
+
+ Args:
+ parameters (:class:`~taskgraph.parameters.Parameters`):
+ Decision task parameters.
+
+ Returns:
+ dict:
+ JSON object representation of the ``public/actions.json``
+ artifact.
+ """
+ assert isinstance(parameters, Parameters), "requires instance of Parameters"
+ actions = []
+ for action in sorted(_get_actions(graph_config), key=lambda action: action.order):
+ action = action.action_builder(parameters, graph_config, decision_task_id)
+ if action:
+ assert is_json(action), "action must be a JSON compatible object"
+ actions.append(action)
+ return {
+ "version": 1,
+ "variables": {},
+ "actions": actions,
+ }
+
+
+def sanity_check_task_scope(callback, parameters, graph_config):
+ """
+ If this action is not generic, then verify that this task has the necessary
+ scope to run the action. This serves as a backstop preventing abuse by
+ running non-generic actions using generic hooks. While scopes should
+ prevent serious damage from such abuse, it's never a valid thing to do.
+ """
+ for action in _get_actions(graph_config):
+ if action.cb_name == callback:
+ break
+ else:
+ raise ValueError(f"No action with cb_name {callback}")
+
+ actionPerm = "generic" if action.generic else action.cb_name
+
+ repo_param = "head_repository"
+ raw_url = parameters[repo_param]
+ parsed_url = parse(raw_url)
+ expected_scope = f"assume:{parsed_url.taskcluster_role_prefix}:action:{actionPerm}"
+
+ # the scope should appear literally; no need for a satisfaction check. The use of
+ # get_current_scopes here calls the auth service through the Taskcluster Proxy, giving
+ # the precise scopes available to this task.
+ if expected_scope not in taskcluster.get_current_scopes():
+ raise ValueError(f"Expected task scope {expected_scope} for this action")
+
+
+def trigger_action_callback(
+ task_group_id, task_id, input, callback, parameters, root, test=False
+):
+ """
+ Trigger action callback with the given inputs. If `test` is true, then run
+ the action callback in testing mode, without actually creating tasks.
+ """
+ graph_config = load_graph_config(root)
+ graph_config.register()
+ callbacks = _get_callbacks(graph_config)
+ cb = callbacks.get(callback, None)
+ if not cb:
+ raise Exception(
+ "Unknown callback: {}. Known callbacks: {}".format(
+ callback, ", ".join(callbacks)
+ )
+ )
+
+ if test:
+ create.testing = True
+ taskcluster.testing = True
+
+ if not test:
+ sanity_check_task_scope(callback, parameters, graph_config)
+
+ cb(Parameters(**parameters), graph_config, input, task_group_id, task_id)
+
+
+def _load(graph_config):
+ # Load all modules from this folder, relying on the side-effects of register_
+ # functions to populate the action registry.
+ import_sibling_modules(exceptions=("util.py",))
+ return callbacks, actions
+
+
+def _get_callbacks(graph_config):
+ return _load(graph_config)[0]
+
+
+def _get_actions(graph_config):
+ return _load(graph_config)[1]
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py
new file mode 100644
index 0000000000..4758beb625
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py
@@ -0,0 +1,301 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import sys
+import textwrap
+
+from slugid import nice as slugid
+
+from taskgraph.util import taskcluster
+
+from .registry import register_callback_action
+from .util import (
+ combine_task_graph_files,
+ create_task_from_def,
+ create_tasks,
+ fetch_graph_and_labels,
+ relativize_datestamps,
+)
+
+logger = logging.getLogger(__name__)
+
+RERUN_STATES = ("exception", "failed")
+
+
+def _should_retrigger(task_graph, label):
+ """
+ Return whether a given task in the taskgraph should be retriggered.
+
+ This handles the case where the task isn't there by assuming it should not be.
+ """
+ if label not in task_graph:
+ logger.info(
+ "Task {} not in full taskgraph, assuming task should not be retriggered.".format(
+ label
+ )
+ )
+ return False
+ return task_graph[label].attributes.get("retrigger", False)
+
+
+@register_callback_action(
+ title="Retrigger",
+ name="retrigger",
+ symbol="rt",
+ cb_name="retrigger-decision",
+ description=textwrap.dedent(
+ """\
+ Create a clone of the task (retriggering decision, action, and cron tasks requires
+ special scopes)."""
+ ),
+ order=11,
+ context=[
+ {"kind": "decision-task"},
+ {"kind": "action-callback"},
+ {"kind": "cron-task"},
+ ],
+)
+def retrigger_decision_action(parameters, graph_config, input, task_group_id, task_id):
+ """For a single task, we try to just run exactly the same task once more.
+ It's quite possible that we don't have the scopes to do so (especially for
+ an action), but this is best-effort."""
+
+ # make all of the timestamps relative; they will then be turned back into
+ # absolute timestamps relative to the current time.
+ task = taskcluster.get_task_definition(task_id)
+ task = relativize_datestamps(task)
+ create_task_from_def(slugid(), task, parameters["level"])
+
+
+@register_callback_action(
+ title="Retrigger",
+ name="retrigger",
+ symbol="rt",
+ generic=True,
+ description=("Create a clone of the task."),
+ order=19, # must be greater than other orders in this file, as this is the fallback version
+ context=[{"retrigger": "true"}],
+ schema={
+ "type": "object",
+ "properties": {
+ "downstream": {
+ "type": "boolean",
+ "description": (
+ "If true, downstream tasks from this one will be cloned as well. "
+ "The dependencies will be updated to work with the new task at the root."
+ ),
+ "default": False,
+ },
+ "times": {
+ "type": "integer",
+ "default": 1,
+ "minimum": 1,
+ "maximum": 100,
+ "title": "Times",
+ "description": "How many times to run each task.",
+ },
+ },
+ },
+)
+@register_callback_action(
+ title="Retrigger (disabled)",
+ name="retrigger",
+ cb_name="retrigger-disabled",
+ symbol="rt",
+ generic=True,
+ description=(
+ "Create a clone of the task.\n\n"
+ "This type of task should typically be re-run instead of re-triggered."
+ ),
+ order=20, # must be greater than other orders in this file, as this is the fallback version
+ context=[{}],
+ schema={
+ "type": "object",
+ "properties": {
+ "downstream": {
+ "type": "boolean",
+ "description": (
+ "If true, downstream tasks from this one will be cloned as well. "
+ "The dependencies will be updated to work with the new task at the root."
+ ),
+ "default": False,
+ },
+ "times": {
+ "type": "integer",
+ "default": 1,
+ "minimum": 1,
+ "maximum": 100,
+ "title": "Times",
+ "description": "How many times to run each task.",
+ },
+ "force": {
+ "type": "boolean",
+ "default": False,
+ "description": (
+ "This task should not be re-triggered. "
+ "This can be overridden by passing `true` here."
+ ),
+ },
+ },
+ },
+)
+def retrigger_action(parameters, graph_config, input, task_group_id, task_id):
+ decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+ parameters, graph_config
+ )
+
+ task = taskcluster.get_task_definition(task_id)
+ label = task["metadata"]["name"]
+
+ with_downstream = " "
+ to_run = [label]
+
+ if not input.get("force", None) and not _should_retrigger(full_task_graph, label):
+ logger.info(
+ "Not retriggering task {}, task should not be retrigged "
+ "and force not specified.".format(label)
+ )
+ sys.exit(1)
+
+ if input.get("downstream"):
+ to_run = full_task_graph.graph.transitive_closure(
+ set(to_run), reverse=True
+ ).nodes
+ to_run = to_run & set(label_to_taskid.keys())
+ with_downstream = " (with downstream) "
+
+ times = input.get("times", 1)
+ for i in range(times):
+ create_tasks(
+ graph_config,
+ to_run,
+ full_task_graph,
+ label_to_taskid,
+ parameters,
+ decision_task_id,
+ i,
+ )
+
+ logger.info(f"Scheduled {label}{with_downstream}(time {i + 1}/{times})")
+ combine_task_graph_files(list(range(times)))
+
+
+@register_callback_action(
+ title="Rerun",
+ name="rerun",
+ generic=True,
+ symbol="rr",
+ description=(
+ "Rerun a task.\n\n"
+ "This only works on failed or exception tasks in the original taskgraph,"
+ " and is CoT friendly."
+ ),
+ order=300,
+ context=[{}],
+ schema={"type": "object", "properties": {}},
+)
+def rerun_action(parameters, graph_config, input, task_group_id, task_id):
+ task = taskcluster.get_task_definition(task_id)
+ parameters = dict(parameters)
+ decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+ parameters, graph_config
+ )
+ label = task["metadata"]["name"]
+ if task_id not in label_to_taskid.values():
+ logger.error(
+ "Refusing to rerun {}: taskId {} not in decision task {} label_to_taskid!".format(
+ label, task_id, decision_task_id
+ )
+ )
+
+ _rerun_task(task_id, label)
+
+
+def _rerun_task(task_id, label):
+ state = taskcluster.state_task(task_id)
+ if state not in RERUN_STATES:
+ logger.warning(
+ "No need to rerun {}: state '{}' not in {}!".format(
+ label, state, RERUN_STATES
+ )
+ )
+ return
+ taskcluster.rerun_task(task_id)
+ logger.info(f"Reran {label}")
+
+
+@register_callback_action(
+ title="Retrigger",
+ name="retrigger-multiple",
+ symbol="rt",
+ generic=True,
+ description=("Create a clone of the task."),
+ context=[],
+ schema={
+ "type": "object",
+ "properties": {
+ "requests": {
+ "type": "array",
+ "items": {
+ "tasks": {
+ "type": "array",
+ "description": "An array of task labels",
+ "items": {"type": "string"},
+ },
+ "times": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 100,
+ "title": "Times",
+ "description": "How many times to run each task.",
+ },
+ "additionalProperties": False,
+ },
+ },
+ "additionalProperties": False,
+ },
+ },
+)
+def retrigger_multiple(parameters, graph_config, input, task_group_id, task_id):
+ decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels(
+ parameters, graph_config
+ )
+
+ suffixes = []
+ for i, request in enumerate(input.get("requests", [])):
+ times = request.get("times", 1)
+ rerun_tasks = [
+ label
+ for label in request.get("tasks")
+ if not _should_retrigger(full_task_graph, label)
+ ]
+ retrigger_tasks = [
+ label
+ for label in request.get("tasks")
+ if _should_retrigger(full_task_graph, label)
+ ]
+
+ for label in rerun_tasks:
+ # XXX we should not re-run tasks pulled in from other pushes
+ # In practice, this shouldn't matter, as only completed tasks
+ # are pulled in from other pushes and treeherder won't pass
+ # those labels.
+ _rerun_task(label_to_taskid[label], label)
+
+ for j in range(times):
+ suffix = f"{i}-{j}"
+ suffixes.append(suffix)
+ create_tasks(
+ graph_config,
+ retrigger_tasks,
+ full_task_graph,
+ label_to_taskid,
+ parameters,
+ decision_task_id,
+ suffix,
+ )
+
+ combine_task_graph_files(suffixes)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py
new file mode 100644
index 0000000000..dd3248d209
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py
@@ -0,0 +1,282 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import concurrent.futures as futures
+import copy
+import logging
+import os
+import re
+from functools import reduce
+
+from requests.exceptions import HTTPError
+
+from taskgraph import create
+from taskgraph.decision import read_artifact, rename_artifact, write_artifact
+from taskgraph.optimize.base import optimize_task_graph
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.taskcluster import (
+ CONCURRENCY,
+ get_artifact,
+ get_session,
+ list_tasks,
+ parse_time,
+)
+from taskgraph.util.taskgraph import find_decision_task
+
+logger = logging.getLogger(__name__)
+
+
+def get_parameters(decision_task_id):
+ return get_artifact(decision_task_id, "public/parameters.yml")
+
+
+def fetch_graph_and_labels(parameters, graph_config):
+ decision_task_id = find_decision_task(parameters, graph_config)
+
+ # First grab the graph and labels generated during the initial decision task
+ full_task_graph = get_artifact(decision_task_id, "public/full-task-graph.json")
+ _, full_task_graph = TaskGraph.from_json(full_task_graph)
+ label_to_taskid = get_artifact(decision_task_id, "public/label-to-taskid.json")
+
+ # fetch everything in parallel; this avoids serializing any delay in downloading
+ # each artifact (such as waiting for the artifact to be mirrored locally)
+ with futures.ThreadPoolExecutor(CONCURRENCY) as e:
+ fetches = []
+
+ # fetch any modifications made by action tasks and swap out new tasks
+ # for old ones
+ def fetch_action(task_id):
+ logger.info(f"fetching label-to-taskid.json for action task {task_id}")
+ try:
+ run_label_to_id = get_artifact(task_id, "public/label-to-taskid.json")
+ label_to_taskid.update(run_label_to_id)
+ except HTTPError as e:
+ if e.response.status_code != 404:
+ raise
+ logger.debug(f"No label-to-taskid.json found for {task_id}: {e}")
+
+ namespace = "{}.v2.{}.pushlog-id.{}.actions".format(
+ graph_config["trust-domain"],
+ parameters["project"],
+ parameters["pushlog_id"],
+ )
+ for task_id in list_tasks(namespace):
+ fetches.append(e.submit(fetch_action, task_id))
+
+ # Similarly for cron tasks..
+ def fetch_cron(task_id):
+ logger.info(f"fetching label-to-taskid.json for cron task {task_id}")
+ try:
+ run_label_to_id = get_artifact(task_id, "public/label-to-taskid.json")
+ label_to_taskid.update(run_label_to_id)
+ except HTTPError as e:
+ if e.response.status_code != 404:
+ raise
+ logger.debug(f"No label-to-taskid.json found for {task_id}: {e}")
+
+ namespace = "{}.v2.{}.revision.{}.cron".format(
+ graph_config["trust-domain"], parameters["project"], parameters["head_rev"]
+ )
+ for task_id in list_tasks(namespace):
+ fetches.append(e.submit(fetch_cron, task_id))
+
+ # now wait for each fetch to complete, raising an exception if there
+ # were any issues
+ for f in futures.as_completed(fetches):
+ f.result()
+
+ return (decision_task_id, full_task_graph, label_to_taskid)
+
+
+def create_task_from_def(task_id, task_def, level):
+ """Create a new task from a definition rather than from a label
+ that is already in the full-task-graph. The task definition will
+ have {relative-datestamp': '..'} rendered just like in a decision task.
+ Use this for entirely new tasks or ones that change internals of the task.
+ It is useful if you want to "edit" the full_task_graph and then hand
+ it to this function. No dependencies will be scheduled. You must handle
+ this yourself. Seeing how create_tasks handles it might prove helpful."""
+ task_def["schedulerId"] = f"gecko-level-{level}"
+ label = task_def["metadata"]["name"]
+ session = get_session()
+ create.create_task(session, task_id, label, task_def)
+
+
+def update_parent(task, graph):
+ task.task.setdefault("extra", {})["parent"] = os.environ.get("TASK_ID", "")
+ return task
+
+
+def update_dependencies(task, graph):
+ if os.environ.get("TASK_ID"):
+ task.task.setdefault("dependencies", []).append(os.environ["TASK_ID"])
+ return task
+
+
+def create_tasks(
+ graph_config,
+ to_run,
+ full_task_graph,
+ label_to_taskid,
+ params,
+ decision_task_id=None,
+ suffix="",
+ modifier=lambda t: t,
+):
+ """Create new tasks. The task definition will have {relative-datestamp':
+ '..'} rendered just like in a decision task. Action callbacks should use
+ this function to create new tasks,
+ allowing easy debugging with `mach taskgraph action-callback --test`.
+ This builds up all required tasks to run in order to run the tasks requested.
+
+ Optionally this function takes a `modifier` function that is passed in each
+ task before it is put into a new graph. It should return a valid task. Note
+ that this is passed _all_ tasks in the graph, not just the set in to_run. You
+ may want to skip modifying tasks not in your to_run list.
+
+ If `suffix` is given, then it is used to give unique names to the resulting
+ artifacts. If you call this function multiple times in the same action,
+ pass a different suffix each time to avoid overwriting artifacts.
+
+ If you wish to create the tasks in a new group, leave out decision_task_id.
+
+ Returns an updated label_to_taskid containing the new tasks"""
+ if suffix != "":
+ suffix = f"-{suffix}"
+ to_run = set(to_run)
+
+ # Copy to avoid side-effects later
+ full_task_graph = copy.deepcopy(full_task_graph)
+ label_to_taskid = label_to_taskid.copy()
+
+ target_graph = full_task_graph.graph.transitive_closure(to_run)
+ target_task_graph = TaskGraph(
+ {l: modifier(full_task_graph[l]) for l in target_graph.nodes}, target_graph
+ )
+ target_task_graph.for_each_task(update_parent)
+ if decision_task_id and decision_task_id != os.environ.get("TASK_ID"):
+ target_task_graph.for_each_task(update_dependencies)
+ optimized_task_graph, label_to_taskid = optimize_task_graph(
+ target_task_graph,
+ to_run,
+ params,
+ to_run,
+ decision_task_id,
+ existing_tasks=label_to_taskid,
+ )
+ write_artifact(f"task-graph{suffix}.json", optimized_task_graph.to_json())
+ write_artifact(f"label-to-taskid{suffix}.json", label_to_taskid)
+ write_artifact(f"to-run{suffix}.json", list(to_run))
+ create.create_tasks(
+ graph_config,
+ optimized_task_graph,
+ label_to_taskid,
+ params,
+ decision_task_id,
+ )
+ return label_to_taskid
+
+
+def _update_reducer(accumulator, new_value):
+ "similar to set or dict `update` method, but returning the modified object"
+ accumulator.update(new_value)
+ return accumulator
+
+
+def combine_task_graph_files(suffixes):
+ """Combine task-graph-{suffix}.json files into a single task-graph.json file.
+
+ Since Chain of Trust verification requires a task-graph.json file that
+ contains all children tasks, we can combine the various task-graph-0.json
+ type files into a master task-graph.json file at the end.
+
+ Actions also look for various artifacts, so we combine those in a similar
+ fashion.
+
+ In the case where there is only one suffix, we simply rename it to avoid the
+ additional cost of uploading two copies of the same data.
+ """
+
+ if len(suffixes) == 1:
+ for filename in ["task-graph", "label-to-taskid", "to-run"]:
+ rename_artifact(f"{filename}-{suffixes[0]}.json", f"{filename}.json")
+ return
+
+ def combine(file_contents, base):
+ return reduce(_update_reducer, file_contents, base)
+
+ files = [read_artifact(f"task-graph-{suffix}.json") for suffix in suffixes]
+ write_artifact("task-graph.json", combine(files, dict()))
+
+ files = [read_artifact(f"label-to-taskid-{suffix}.json") for suffix in suffixes]
+ write_artifact("label-to-taskid.json", combine(files, dict()))
+
+ files = [read_artifact(f"to-run-{suffix}.json") for suffix in suffixes]
+ write_artifact("to-run.json", list(combine(files, set())))
+
+
+def relativize_datestamps(task_def):
+ """
+ Given a task definition as received from the queue, convert all datestamps
+ to {relative_datestamp: ..} format, with the task creation time as "now".
+ The result is useful for handing to ``create_task``.
+ """
+ base = parse_time(task_def["created"])
+ # borrowed from https://github.com/epoberezkin/ajv/blob/master/lib/compile/formats.js
+ ts_pattern = re.compile(
+ r"^\d\d\d\d-[0-1]\d-[0-3]\d[t\s]"
+ r"(?:[0-2]\d:[0-5]\d:[0-5]\d|23:59:60)(?:\.\d+)?"
+ r"(?:z|[+-]\d\d:\d\d)$",
+ re.I,
+ )
+
+ def recurse(value):
+ if isinstance(value, str):
+ if ts_pattern.match(value):
+ value = parse_time(value)
+ diff = value - base
+ return {"relative-datestamp": f"{int(diff.total_seconds())} seconds"}
+ if isinstance(value, list):
+ return [recurse(e) for e in value]
+ if isinstance(value, dict):
+ return {k: recurse(v) for k, v in value.items()}
+ return value
+
+ return recurse(task_def)
+
+
+def add_args_to_command(cmd_parts, extra_args=[]):
+ """
+ Add custom command line args to a given command.
+
+ Args:
+ cmd_parts: the raw command as seen by taskcluster
+ extra_args: array of args we want to add
+ """
+ cmd_type = "default"
+ if len(cmd_parts) == 1 and isinstance(cmd_parts[0], dict):
+ # windows has single cmd part as dict: 'task-reference', with long string
+ cmd_parts = cmd_parts[0]["task-reference"].split(" ")
+ cmd_type = "dict"
+ elif len(cmd_parts) == 1 and (
+ isinstance(cmd_parts[0], str) or isinstance(cmd_parts[0], str)
+ ):
+ # windows has single cmd part as a long string
+ cmd_parts = cmd_parts[0].split(" ")
+ cmd_type = "unicode"
+ elif len(cmd_parts) == 1 and isinstance(cmd_parts[0], list):
+ # osx has an single value array with an array inside
+ cmd_parts = cmd_parts[0]
+ cmd_type = "subarray"
+
+ cmd_parts.extend(extra_args)
+
+ if cmd_type == "dict":
+ cmd_parts = [{"task-reference": " ".join(cmd_parts)}]
+ elif cmd_type == "unicode":
+ cmd_parts = [" ".join(cmd_parts)]
+ elif cmd_type == "subarray":
+ cmd_parts = [cmd_parts]
+ return cmd_parts
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/config.py b/third_party/python/taskcluster_taskgraph/taskgraph/config.py
new file mode 100644
index 0000000000..9517a4316c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/config.py
@@ -0,0 +1,136 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import os
+import sys
+
+import attr
+from voluptuous import All, Any, Extra, Length, Optional, Required
+
+from .util import path
+from .util.python_path import find_object
+from .util.schema import Schema, optionally_keyed_by, validate_schema
+from .util.yaml import load_yaml
+
+logger = logging.getLogger(__name__)
+
+graph_config_schema = Schema(
+ {
+ # The trust-domain for this graph.
+ # (See https://firefox-source-docs.mozilla.org/taskcluster/taskcluster/taskgraph.html#taskgraph-trust-domain) # noqa
+ Required("trust-domain"): str,
+ Required("task-priority"): optionally_keyed_by(
+ "project",
+ Any(
+ "highest",
+ "very-high",
+ "high",
+ "medium",
+ "low",
+ "very-low",
+ "lowest",
+ ),
+ ),
+ Required("workers"): {
+ Required("aliases"): {
+ str: {
+ Required("provisioner"): optionally_keyed_by("level", str),
+ Required("implementation"): str,
+ Required("os"): str,
+ Required("worker-type"): optionally_keyed_by("level", str),
+ }
+ },
+ },
+ Required("taskgraph"): {
+ Optional(
+ "register",
+ description="Python function to call to register extensions.",
+ ): str,
+ Optional("decision-parameters"): str,
+ Optional(
+ "cached-task-prefix",
+ description="The taskcluster index prefix to use for caching tasks. "
+ "Defaults to `trust-domain`.",
+ ): str,
+ Required("repositories"): All(
+ {
+ str: {
+ Required("name"): str,
+ Optional("project-regex"): str,
+ Optional("ssh-secret-name"): str,
+ # FIXME
+ Extra: str,
+ }
+ },
+ Length(min=1),
+ ),
+ },
+ Extra: object,
+ }
+)
+"""Schema for GraphConfig"""
+
+
+@attr.s(frozen=True, cmp=False)
+class GraphConfig:
+ _config = attr.ib()
+ root_dir = attr.ib()
+
+ _PATH_MODIFIED = False
+
+ def __getitem__(self, name):
+ return self._config[name]
+
+ def __contains__(self, name):
+ return name in self._config
+
+ def register(self):
+ """
+ Add the project's taskgraph directory to the python path, and register
+ any extensions present.
+ """
+ modify_path = os.path.dirname(self.root_dir)
+ if GraphConfig._PATH_MODIFIED:
+ if GraphConfig._PATH_MODIFIED == modify_path:
+ # Already modified path with the same root_dir.
+ # We currently need to do this to enable actions to call
+ # taskgraph_decision, e.g. relpro.
+ return
+ raise Exception("Can't register multiple directories on python path.")
+ GraphConfig._PATH_MODIFIED = modify_path
+ sys.path.insert(0, modify_path)
+ register_path = self["taskgraph"].get("register")
+ if register_path:
+ find_object(register_path)(self)
+
+ @property
+ def vcs_root(self):
+ if path.split(self.root_dir)[-2:] != ["taskcluster", "ci"]:
+ raise Exception(
+ "Not guessing path to vcs root. "
+ "Graph config in non-standard location."
+ )
+ return os.path.dirname(os.path.dirname(self.root_dir))
+
+ @property
+ def taskcluster_yml(self):
+ return os.path.join(self.vcs_root, ".taskcluster.yml")
+
+
+def validate_graph_config(config):
+ validate_schema(graph_config_schema, config, "Invalid graph configuration:")
+
+
+def load_graph_config(root_dir):
+ config_yml = os.path.join(root_dir, "config.yml")
+ if not os.path.exists(config_yml):
+ raise Exception(f"Couldn't find taskgraph configuration: {config_yml}")
+
+ logger.debug(f"loading config from `{config_yml}`")
+ config = load_yaml(config_yml)
+
+ validate_graph_config(config)
+ return GraphConfig(config=config, root_dir=root_dir)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/create.py b/third_party/python/taskcluster_taskgraph/taskgraph/create.py
new file mode 100644
index 0000000000..3661ac8271
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/create.py
@@ -0,0 +1,132 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import concurrent.futures as futures
+import json
+import logging
+import sys
+
+from slugid import nice as slugid
+
+from taskgraph.util.parameterization import resolve_timestamps
+from taskgraph.util.taskcluster import CONCURRENCY, get_session
+from taskgraph.util.time import current_json_time
+
+logger = logging.getLogger(__name__)
+
+# this is set to true for `mach taskgraph action-callback --test`
+testing = False
+
+
+def create_tasks(graph_config, taskgraph, label_to_taskid, params, decision_task_id):
+ taskid_to_label = {t: l for l, t in label_to_taskid.items()}
+
+ # when running as an actual decision task, we use the decision task's
+ # taskId as the taskGroupId. The process that created the decision task
+ # helpfully placed it in this same taskGroup. If there is no $TASK_ID,
+ # fall back to a slugid
+ scheduler_id = "{}-level-{}".format(graph_config["trust-domain"], params["level"])
+
+ # Add the taskGroupId, schedulerId and optionally the decision task
+ # dependency
+ for task_id in taskgraph.graph.nodes:
+ task_def = taskgraph.tasks[task_id].task
+
+ # if this task has no dependencies *within* this taskgraph, make it
+ # depend on this decision task. If it has another dependency within
+ # the taskgraph, then it already implicitly depends on the decision
+ # task. The result is that tasks do not start immediately. if this
+ # loop fails halfway through, none of the already-created tasks run.
+ if not any(t in taskgraph.tasks for t in task_def.get("dependencies", [])):
+ task_def.setdefault("dependencies", []).append(decision_task_id)
+
+ task_def["taskGroupId"] = decision_task_id
+ task_def["schedulerId"] = scheduler_id
+
+ # If `testing` is True, then run without parallelization
+ concurrency = CONCURRENCY if not testing else 1
+ session = get_session()
+ with futures.ThreadPoolExecutor(concurrency) as e:
+ fs = {}
+
+ # We can't submit a task until its dependencies have been submitted.
+ # So our strategy is to walk the graph and submit tasks once all
+ # their dependencies have been submitted.
+ tasklist = set(taskgraph.graph.visit_postorder())
+ alltasks = tasklist.copy()
+
+ def schedule_tasks():
+ # bail out early if any futures have failed
+ if any(f.done() and f.exception() for f in fs.values()):
+ return
+
+ to_remove = set()
+ new = set()
+
+ def submit(task_id, label, task_def):
+ fut = e.submit(create_task, session, task_id, label, task_def)
+ new.add(fut)
+ fs[task_id] = fut
+
+ for task_id in tasklist:
+ task_def = taskgraph.tasks[task_id].task
+ # If we haven't finished submitting all our dependencies yet,
+ # come back to this later.
+ # Some dependencies aren't in our graph, so make sure to filter
+ # those out
+ deps = set(task_def.get("dependencies", [])) & alltasks
+ if any((d not in fs or not fs[d].done()) for d in deps):
+ continue
+
+ submit(task_id, taskid_to_label[task_id], task_def)
+ to_remove.add(task_id)
+
+ # Schedule tasks as many times as task_duplicates indicates
+ attributes = taskgraph.tasks[task_id].attributes
+ for i in range(1, attributes.get("task_duplicates", 1)):
+ # We use slugid() since we want a distinct task id
+ submit(slugid(), taskid_to_label[task_id], task_def)
+ tasklist.difference_update(to_remove)
+
+ # as each of those futures complete, try to schedule more tasks
+ for f in futures.as_completed(new):
+ schedule_tasks()
+
+ # start scheduling tasks and run until everything is scheduled
+ schedule_tasks()
+
+ # check the result of each future, raising an exception if it failed
+ for f in futures.as_completed(fs.values()):
+ f.result()
+
+
+def create_task(session, task_id, label, task_def):
+ # create the task using 'http://taskcluster/queue', which is proxied to the queue service
+ # with credentials appropriate to this job.
+
+ # Resolve timestamps
+ now = current_json_time(datetime_format=True)
+ task_def = resolve_timestamps(now, task_def)
+
+ if testing:
+ json.dump(
+ [task_id, task_def],
+ sys.stdout,
+ sort_keys=True,
+ indent=4,
+ separators=(",", ": "),
+ )
+ # add a newline
+ print("")
+ return
+
+ logger.info(f"Creating task with taskId {task_id} for {label}")
+ res = session.put(f"http://taskcluster/queue/v1/task/{task_id}", json=task_def)
+ if res.status_code != 200:
+ try:
+ logger.error(res.json()["message"])
+ except Exception:
+ logger.error(res.text)
+ res.raise_for_status()
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py
new file mode 100644
index 0000000000..6c5da8c65d
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py
@@ -0,0 +1,377 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+import logging
+import os
+import pathlib
+import shutil
+import time
+from pathlib import Path
+
+import yaml
+from voluptuous import Optional
+
+from taskgraph.actions import render_actions_json
+from taskgraph.create import create_tasks
+from taskgraph.generator import TaskGraphGenerator
+from taskgraph.parameters import Parameters, get_version
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.python_path import find_object
+from taskgraph.util.schema import Schema, validate_schema
+from taskgraph.util.vcs import Repository, get_repository
+from taskgraph.util.yaml import load_yaml
+
+logger = logging.getLogger(__name__)
+
+ARTIFACTS_DIR = Path("artifacts")
+
+
+# For each project, this gives a set of parameters specific to the project.
+# See `taskcluster/docs/parameters.rst` for information on parameters.
+PER_PROJECT_PARAMETERS = {
+ # the default parameters are used for projects that do not match above.
+ "default": {
+ "target_tasks_method": "default",
+ }
+}
+
+
+try_task_config_schema_v2 = Schema(
+ {
+ Optional("parameters"): {str: object},
+ }
+)
+
+
+def full_task_graph_to_runnable_jobs(full_task_json):
+ runnable_jobs = {}
+ for label, node in full_task_json.items():
+ if not ("extra" in node["task"] and "treeherder" in node["task"]["extra"]):
+ continue
+
+ th = node["task"]["extra"]["treeherder"]
+ runnable_jobs[label] = {"symbol": th["symbol"]}
+
+ for i in ("groupName", "groupSymbol", "collection"):
+ if i in th:
+ runnable_jobs[label][i] = th[i]
+ if th.get("machine", {}).get("platform"):
+ runnable_jobs[label]["platform"] = th["machine"]["platform"]
+ return runnable_jobs
+
+
+def taskgraph_decision(options, parameters=None):
+ """
+ Run the decision task. This function implements `mach taskgraph decision`,
+ and is responsible for
+
+ * processing decision task command-line options into parameters
+ * running task-graph generation exactly the same way the other `mach
+ taskgraph` commands do
+ * generating a set of artifacts to memorialize the graph
+ * calling TaskCluster APIs to create the graph
+ """
+
+ parameters = parameters or (
+ lambda graph_config: get_decision_parameters(graph_config, options)
+ )
+
+ decision_task_id = os.environ["TASK_ID"]
+
+ # create a TaskGraphGenerator instance
+ tgg = TaskGraphGenerator(
+ root_dir=options.get("root"),
+ parameters=parameters,
+ decision_task_id=decision_task_id,
+ write_artifacts=True,
+ )
+
+ # write out the parameters used to generate this graph
+ write_artifact("parameters.yml", dict(**tgg.parameters))
+
+ # write out the public/actions.json file
+ write_artifact(
+ "actions.json",
+ render_actions_json(tgg.parameters, tgg.graph_config, decision_task_id),
+ )
+
+ # write out the full graph for reference
+ full_task_json = tgg.full_task_graph.to_json()
+ write_artifact("full-task-graph.json", full_task_json)
+
+ # write out the public/runnable-jobs.json file
+ write_artifact(
+ "runnable-jobs.json", full_task_graph_to_runnable_jobs(full_task_json)
+ )
+
+ # this is just a test to check whether the from_json() function is working
+ _, _ = TaskGraph.from_json(full_task_json)
+
+ # write out the target task set to allow reproducing this as input
+ write_artifact("target-tasks.json", list(tgg.target_task_set.tasks.keys()))
+
+ # write out the optimized task graph to describe what will actually happen,
+ # and the map of labels to taskids
+ write_artifact("task-graph.json", tgg.morphed_task_graph.to_json())
+ write_artifact("label-to-taskid.json", tgg.label_to_taskid)
+
+ # write out current run-task and fetch-content scripts
+ RUN_TASK_DIR = pathlib.Path(__file__).parent / "run-task"
+ shutil.copy2(RUN_TASK_DIR / "run-task", ARTIFACTS_DIR)
+ shutil.copy2(RUN_TASK_DIR / "fetch-content", ARTIFACTS_DIR)
+
+ # actually create the graph
+ create_tasks(
+ tgg.graph_config,
+ tgg.morphed_task_graph,
+ tgg.label_to_taskid,
+ tgg.parameters,
+ decision_task_id=decision_task_id,
+ )
+
+
+def get_decision_parameters(graph_config, options):
+ """
+ Load parameters from the command-line options for 'taskgraph decision'.
+ This also applies per-project parameters, based on the given project.
+
+ """
+ parameters = {
+ n: options[n]
+ for n in [
+ "base_repository",
+ "base_ref",
+ "base_rev",
+ "head_repository",
+ "head_rev",
+ "head_ref",
+ "head_tag",
+ "project",
+ "pushlog_id",
+ "pushdate",
+ "repository_type",
+ "owner",
+ "level",
+ "target_tasks_method",
+ "tasks_for",
+ ]
+ if n in options
+ }
+
+ repo_path = os.getcwd()
+ repo = get_repository(repo_path)
+ try:
+ commit_message = repo.get_commit_message()
+ except UnicodeDecodeError:
+ commit_message = ""
+
+ parameters["base_ref"] = _determine_more_accurate_base_ref(
+ repo,
+ candidate_base_ref=options.get("base_ref"),
+ head_ref=options.get("head_ref"),
+ base_rev=options.get("base_rev"),
+ )
+
+ parameters["base_rev"] = _determine_more_accurate_base_rev(
+ repo,
+ base_ref=parameters["base_ref"],
+ candidate_base_rev=options.get("base_rev"),
+ head_rev=options.get("head_rev"),
+ env_prefix=_get_env_prefix(graph_config),
+ )
+
+ # Define default filter list, as most configurations shouldn't need
+ # custom filters.
+ parameters["filters"] = [
+ "target_tasks_method",
+ ]
+ parameters["optimize_strategies"] = None
+ parameters["optimize_target_tasks"] = True
+ parameters["existing_tasks"] = {}
+ parameters["do_not_optimize"] = []
+ parameters["enable_always_target"] = True
+ parameters["build_number"] = 1
+ parameters["version"] = get_version(repo_path)
+ parameters["next_version"] = None
+
+ # owner must be an email, but sometimes (e.g., for ffxbld) it is not, in which
+ # case, fake it
+ if "@" not in parameters["owner"]:
+ parameters["owner"] += "@noreply.mozilla.org"
+
+ # use the pushdate as build_date if given, else use current time
+ parameters["build_date"] = parameters["pushdate"] or int(time.time())
+ # moz_build_date is the build identifier based on build_date
+ parameters["moz_build_date"] = time.strftime(
+ "%Y%m%d%H%M%S", time.gmtime(parameters["build_date"])
+ )
+
+ project = parameters["project"]
+ try:
+ parameters.update(PER_PROJECT_PARAMETERS[project])
+ except KeyError:
+ logger.warning(
+ "using default project parameters; add {} to "
+ "PER_PROJECT_PARAMETERS in {} to customize behavior "
+ "for this project".format(project, __file__)
+ )
+ parameters.update(PER_PROJECT_PARAMETERS["default"])
+
+ # `target_tasks_method` has higher precedence than `project` parameters
+ if options.get("target_tasks_method"):
+ parameters["target_tasks_method"] = options["target_tasks_method"]
+
+ # ..but can be overridden by the commit message: if it contains the special
+ # string "DONTBUILD" and this is an on-push decision task, then use the
+ # special 'nothing' target task method.
+ if "DONTBUILD" in commit_message and options["tasks_for"] == "hg-push":
+ parameters["target_tasks_method"] = "nothing"
+
+ if options.get("optimize_target_tasks") is not None:
+ parameters["optimize_target_tasks"] = options["optimize_target_tasks"]
+
+ if "decision-parameters" in graph_config["taskgraph"]:
+ find_object(graph_config["taskgraph"]["decision-parameters"])(
+ graph_config, parameters
+ )
+
+ if options.get("try_task_config_file"):
+ task_config_file = os.path.abspath(options.get("try_task_config_file"))
+ else:
+ # if try_task_config.json is present, load it
+ task_config_file = os.path.join(os.getcwd(), "try_task_config.json")
+
+ # load try settings
+ if ("try" in project and options["tasks_for"] == "hg-push") or options[
+ "tasks_for"
+ ] == "github-pull-request":
+ set_try_config(parameters, task_config_file)
+
+ result = Parameters(**parameters)
+ result.check()
+ return result
+
+
+def _determine_more_accurate_base_ref(repo, candidate_base_ref, head_ref, base_rev):
+ base_ref = candidate_base_ref
+
+ if not candidate_base_ref:
+ base_ref = repo.default_branch
+ elif candidate_base_ref == head_ref and base_rev == Repository.NULL_REVISION:
+ logger.info(
+ "base_ref and head_ref are identical but base_rev equals the null revision. "
+ "This is a new branch but Github didn't identify its actual base."
+ )
+ base_ref = repo.default_branch
+
+ if base_ref != candidate_base_ref:
+ logger.info(
+ f'base_ref has been reset from "{candidate_base_ref}" to "{base_ref}".'
+ )
+
+ return base_ref
+
+
+def _determine_more_accurate_base_rev(
+ repo, base_ref, candidate_base_rev, head_rev, env_prefix
+):
+ if not candidate_base_rev:
+ logger.info("base_rev is not set.")
+ base_ref_or_rev = base_ref
+ elif candidate_base_rev == Repository.NULL_REVISION:
+ logger.info("base_rev equals the null revision. This branch is a new one.")
+ base_ref_or_rev = base_ref
+ elif not repo.does_revision_exist_locally(candidate_base_rev):
+ logger.warning(
+ "base_rev does not exist locally. It is likely because the branch was force-pushed. "
+ "taskgraph is not able to assess how many commits were changed and assumes it is only "
+ f"the last one. Please set the {env_prefix.upper()}_BASE_REV environment variable "
+ "in the decision task and provide `--base-rev` to taskgraph."
+ )
+ base_ref_or_rev = base_ref
+ else:
+ base_ref_or_rev = candidate_base_rev
+
+ if base_ref_or_rev == base_ref:
+ logger.info(
+ f'Using base_ref "{base_ref}" to determine latest common revision...'
+ )
+
+ base_rev = repo.find_latest_common_revision(base_ref_or_rev, head_rev)
+ if base_rev != candidate_base_rev:
+ if base_ref_or_rev == candidate_base_rev:
+ logger.info("base_rev is not an ancestor of head_rev.")
+
+ logger.info(
+ f'base_rev has been reset from "{candidate_base_rev}" to "{base_rev}".'
+ )
+
+ return base_rev
+
+
+def _get_env_prefix(graph_config):
+ repo_keys = list(graph_config["taskgraph"].get("repositories", {}).keys())
+ return repo_keys[0] if repo_keys else ""
+
+
+def set_try_config(parameters, task_config_file):
+ if os.path.isfile(task_config_file):
+ logger.info(f"using try tasks from {task_config_file}")
+ with open(task_config_file) as fh:
+ task_config = json.load(fh)
+ task_config_version = task_config.pop("version")
+ if task_config_version == 2:
+ validate_schema(
+ try_task_config_schema_v2,
+ task_config,
+ "Invalid v2 `try_task_config.json`.",
+ )
+ parameters.update(task_config["parameters"])
+ return
+ else:
+ raise Exception(
+ f"Unknown `try_task_config.json` version: {task_config_version}"
+ )
+
+
+def write_artifact(filename, data):
+ logger.info(f"writing artifact file `{filename}`")
+ if not os.path.isdir(ARTIFACTS_DIR):
+ os.mkdir(ARTIFACTS_DIR)
+ path = ARTIFACTS_DIR / filename
+ if filename.endswith(".yml"):
+ with open(path, "w") as f:
+ yaml.safe_dump(data, f, allow_unicode=True, default_flow_style=False)
+ elif filename.endswith(".json"):
+ with open(path, "w") as f:
+ json.dump(data, f, sort_keys=True, indent=2, separators=(",", ": "))
+ elif filename.endswith(".gz"):
+ import gzip
+
+ with gzip.open(path, "wb") as f:
+ f.write(json.dumps(data))
+ else:
+ raise TypeError(f"Don't know how to write to {filename}")
+
+
+def read_artifact(filename):
+ path = ARTIFACTS_DIR / filename
+ if filename.endswith(".yml"):
+ return load_yaml(path, filename)
+ elif filename.endswith(".json"):
+ with open(path) as f:
+ return json.load(f)
+ elif filename.endswith(".gz"):
+ import gzip
+
+ with gzip.open(path, "rb") as f:
+ return json.load(f)
+ else:
+ raise TypeError(f"Don't know how to read {filename}")
+
+
+def rename_artifact(src, dest):
+ os.rename(ARTIFACTS_DIR / src, ARTIFACTS_DIR / dest)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/docker.py
new file mode 100644
index 0000000000..c142f36391
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/docker.py
@@ -0,0 +1,215 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+import os
+import tarfile
+from io import BytesIO
+from textwrap import dedent
+
+try:
+ import zstandard as zstd
+except ImportError as e:
+ zstd = e
+
+from taskgraph.util import docker
+from taskgraph.util.taskcluster import get_artifact_url, get_session
+
+
+def get_image_digest(image_name):
+ from taskgraph.generator import load_tasks_for_kind
+ from taskgraph.parameters import Parameters
+
+ params = Parameters(
+ level=os.environ.get("MOZ_SCM_LEVEL", "3"),
+ strict=False,
+ )
+ tasks = load_tasks_for_kind(params, "docker-image")
+ task = tasks[f"build-docker-image-{image_name}"]
+ return task.attributes["cached_task"]["digest"]
+
+
+def load_image_by_name(image_name, tag=None):
+ from taskgraph.generator import load_tasks_for_kind
+ from taskgraph.optimize import IndexSearch
+ from taskgraph.parameters import Parameters
+
+ params = Parameters(
+ level=os.environ.get("MOZ_SCM_LEVEL", "3"),
+ strict=False,
+ )
+ tasks = load_tasks_for_kind(params, "docker-image")
+ task = tasks[f"build-docker-image-{image_name}"]
+ task_id = IndexSearch().should_replace_task(
+ task, {}, task.optimization.get("index-search", [])
+ )
+
+ if task_id in (True, False):
+ print(
+ "Could not find artifacts for a docker image "
+ "named `{image_name}`. Local commits and other changes "
+ "in your checkout may cause this error. Try "
+ "updating to a fresh checkout of mozilla-central "
+ "to download image.".format(image_name=image_name)
+ )
+ return False
+
+ return load_image_by_task_id(task_id, tag)
+
+
+def load_image_by_task_id(task_id, tag=None):
+ artifact_url = get_artifact_url(task_id, "public/image.tar.zst")
+ result = load_image(artifact_url, tag)
+ print("Found docker image: {}:{}".format(result["image"], result["tag"]))
+ if tag:
+ print(f"Re-tagged as: {tag}")
+ else:
+ tag = "{}:{}".format(result["image"], result["tag"])
+ print(f"Try: docker run -ti --rm {tag} bash")
+ return True
+
+
+def build_context(name, outputFile, args=None):
+ """Build a context.tar for image with specified name."""
+ if not name:
+ raise ValueError("must provide a Docker image name")
+ if not outputFile:
+ raise ValueError("must provide a outputFile")
+
+ image_dir = docker.image_path(name)
+ if not os.path.isdir(image_dir):
+ raise Exception("image directory does not exist: %s" % image_dir)
+
+ docker.create_context_tar(".", image_dir, outputFile, args)
+
+
+def build_image(name, tag, args=None):
+ """Build a Docker image of specified name.
+
+ Output from image building process will be printed to stdout.
+ """
+ if not name:
+ raise ValueError("must provide a Docker image name")
+
+ image_dir = docker.image_path(name)
+ if not os.path.isdir(image_dir):
+ raise Exception("image directory does not exist: %s" % image_dir)
+
+ tag = tag or docker.docker_image(name, by_tag=True)
+
+ buf = BytesIO()
+ docker.stream_context_tar(".", image_dir, buf, "", args)
+ docker.post_to_docker(buf.getvalue(), "/build", nocache=1, t=tag)
+
+ print(f"Successfully built {name} and tagged with {tag}")
+
+ if tag.endswith(":latest"):
+ print("*" * 50)
+ print("WARNING: no VERSION file found in image directory.")
+ print("Image is not suitable for deploying/pushing.")
+ print("Create an image suitable for deploying/pushing by creating")
+ print("a VERSION file in the image directory.")
+ print("*" * 50)
+
+
+def load_image(url, imageName=None, imageTag=None):
+ """
+ Load docker image from URL as imageName:tag, if no imageName or tag is given
+ it will use whatever is inside the zstd compressed tarball.
+
+ Returns an object with properties 'image', 'tag' and 'layer'.
+ """
+ if isinstance(zstd, ImportError):
+ raise ImportError(
+ dedent(
+ """
+ zstandard is not installed! Use `pip install taskcluster-taskgraph[load-image]`
+ to use this feature.
+ """
+ )
+ ) from zstd
+
+ # If imageName is given and we don't have an imageTag
+ # we parse out the imageTag from imageName, or default it to 'latest'
+ # if no imageName and no imageTag is given, 'repositories' won't be rewritten
+ if imageName and not imageTag:
+ if ":" in imageName:
+ imageName, imageTag = imageName.split(":", 1)
+ else:
+ imageTag = "latest"
+
+ info = {}
+
+ def download_and_modify_image():
+ # This function downloads and edits the downloaded tar file on the fly.
+ # It emits chunked buffers of the edited tar file, as a generator.
+ print(f"Downloading from {url}")
+ # get_session() gets us a requests.Session set to retry several times.
+ req = get_session().get(url, stream=True)
+ req.raise_for_status()
+
+ with zstd.ZstdDecompressor().stream_reader(req.raw) as ifh:
+
+ tarin = tarfile.open(
+ mode="r|",
+ fileobj=ifh,
+ bufsize=zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
+ )
+
+ # Stream through each member of the downloaded tar file individually.
+ for member in tarin:
+ # Non-file members only need a tar header. Emit one.
+ if not member.isfile():
+ yield member.tobuf(tarfile.GNU_FORMAT)
+ continue
+
+ # Open stream reader for the member
+ reader = tarin.extractfile(member)
+
+ # If member is `repositories`, we parse and possibly rewrite the
+ # image tags.
+ if member.name == "repositories":
+ # Read and parse repositories
+ repos = json.loads(reader.read())
+ reader.close()
+
+ # If there is more than one image or tag, we can't handle it
+ # here.
+ if len(repos.keys()) > 1:
+ raise Exception("file contains more than one image")
+ info["image"] = image = list(repos.keys())[0]
+ if len(repos[image].keys()) > 1:
+ raise Exception("file contains more than one tag")
+ info["tag"] = tag = list(repos[image].keys())[0]
+ info["layer"] = layer = repos[image][tag]
+
+ # Rewrite the repositories file
+ data = json.dumps({imageName or image: {imageTag or tag: layer}})
+ reader = BytesIO(data.encode("utf-8"))
+ member.size = len(data)
+
+ # Emit the tar header for this member.
+ yield member.tobuf(tarfile.GNU_FORMAT)
+ # Then emit its content.
+ remaining = member.size
+ while remaining:
+ length = min(remaining, zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
+ buf = reader.read(length)
+ remaining -= len(buf)
+ yield buf
+ # Pad to fill a 512 bytes block, per tar format.
+ remainder = member.size % 512
+ if remainder:
+ yield ("\0" * (512 - remainder)).encode("utf-8")
+
+ reader.close()
+
+ docker.post_to_docker(download_and_modify_image(), "/images/load", quiet=0)
+
+ # Check that we found a repositories file
+ if not info.get("image") or not info.get("tag") or not info.get("layer"):
+ raise Exception("No repositories file found!")
+
+ return info
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py b/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py
new file mode 100644
index 0000000000..6be6e5eeee
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py
@@ -0,0 +1,91 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Support for optimizing tasks based on the set of files that have changed.
+"""
+
+
+import logging
+import os
+
+import requests
+from redo import retry
+
+from .util.memoize import memoize
+from .util.path import match as match_path
+from .util.vcs import get_repository
+
+logger = logging.getLogger(__name__)
+
+
+@memoize
+def get_changed_files(head_repository_url, head_rev, base_rev=None):
+ """
+ Get the set of files changed between revisions.
+ Responses are cached, so multiple calls with the same arguments are OK.
+ """
+ repo_path = os.getcwd()
+ repository = get_repository(repo_path)
+
+ if repository.tool == "hg":
+ # TODO Use VCS version once tested enough
+ return _get_changed_files_json_automationrelevance(
+ head_repository_url, head_rev
+ )
+
+ return repository.get_changed_files(rev=head_rev, base_rev=base_rev)
+
+
+def _get_changed_files_json_automationrelevance(head_repository_url, head_rev):
+ """
+ Get the set of files changed in the push headed by the given revision.
+ """
+ url = "{}/json-automationrelevance/{}".format(
+ head_repository_url.rstrip("/"), head_rev
+ )
+ logger.debug("Querying version control for metadata: %s", url)
+
+ def get_automationrelevance():
+ response = requests.get(url, timeout=30)
+ return response.json()
+
+ contents = retry(get_automationrelevance, attempts=10, sleeptime=10)
+
+ logger.debug(
+ "{} commits influencing task scheduling:".format(len(contents["changesets"]))
+ )
+ changed_files = set()
+ for c in contents["changesets"]:
+ desc = "" # Support empty desc
+ if c["desc"]:
+ desc = c["desc"].splitlines()[0].encode("ascii", "ignore")
+ logger.debug(" {cset} {desc}".format(cset=c["node"][0:12], desc=desc))
+ changed_files |= set(c["files"])
+
+ return changed_files
+
+
+def check(params, file_patterns):
+ """Determine whether any of the files changed between 2 revisions
+ match any of the given file patterns."""
+
+ head_repository_url = params.get("head_repository")
+ head_rev = params.get("head_rev")
+ if not head_repository_url or not head_rev:
+ logger.warning(
+ "Missing `head_repository` or `head_rev` parameters; "
+ "assuming all files have changed"
+ )
+ return True
+
+ base_rev = params.get("base_rev")
+ changed_files = get_changed_files(head_repository_url, head_rev, base_rev)
+
+ for pattern in file_patterns:
+ for path in changed_files:
+ if match_path(path, pattern):
+ return True
+
+ return False
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/filter_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/filter_tasks.py
new file mode 100644
index 0000000000..63bd2874d6
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/filter_tasks.py
@@ -0,0 +1,34 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+from . import target_tasks
+
+logger = logging.getLogger(__name__)
+
+filter_task_functions = {}
+
+
+def filter_task(name):
+ """Generator to declare a task filter function."""
+
+ def wrap(func):
+ filter_task_functions[name] = func
+ return func
+
+ return wrap
+
+
+@filter_task("target_tasks_method")
+def filter_target_tasks(graph, parameters, graph_config):
+ """Proxy filter to use legacy target tasks code.
+
+ This should go away once target_tasks are converted to filters.
+ """
+
+ attr = parameters.get("target_tasks_method", "all_tasks")
+ fn = target_tasks.get_method(attr)
+ return fn(graph, parameters, graph_config)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/generator.py b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py
new file mode 100644
index 0000000000..e1b900cf65
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py
@@ -0,0 +1,449 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import copy
+import logging
+import os
+from typing import AnyStr
+
+import attr
+
+from . import filter_tasks
+from .config import GraphConfig, load_graph_config
+from .graph import Graph
+from .morph import morph
+from .optimize.base import optimize_task_graph
+from .parameters import parameters_loader
+from .task import Task
+from .taskgraph import TaskGraph
+from .transforms.base import TransformConfig, TransformSequence
+from .util.python_path import find_object
+from .util.verify import verifications
+from .util.yaml import load_yaml
+
+logger = logging.getLogger(__name__)
+
+
+class KindNotFound(Exception):
+ """
+ Raised when trying to load kind from a directory without a kind.yml.
+ """
+
+
+@attr.s(frozen=True)
+class Kind:
+
+ name = attr.ib(type=AnyStr)
+ path = attr.ib(type=AnyStr)
+ config = attr.ib(type=dict)
+ graph_config = attr.ib(type=GraphConfig)
+
+ def _get_loader(self):
+ try:
+ loader = self.config["loader"]
+ except KeyError:
+ raise KeyError(f"{self.path!r} does not define `loader`")
+ return find_object(loader)
+
+ def load_tasks(self, parameters, loaded_tasks, write_artifacts):
+ loader = self._get_loader()
+ config = copy.deepcopy(self.config)
+
+ kind_dependencies = config.get("kind-dependencies", [])
+ kind_dependencies_tasks = {
+ task.label: task for task in loaded_tasks if task.kind in kind_dependencies
+ }
+
+ inputs = loader(self.name, self.path, config, parameters, loaded_tasks)
+
+ transforms = TransformSequence()
+ for xform_path in config["transforms"]:
+ transform = find_object(xform_path)
+ transforms.add(transform)
+
+ # perform the transformations on the loaded inputs
+ trans_config = TransformConfig(
+ self.name,
+ self.path,
+ config,
+ parameters,
+ kind_dependencies_tasks,
+ self.graph_config,
+ write_artifacts=write_artifacts,
+ )
+ tasks = [
+ Task(
+ self.name,
+ label=task_dict["label"],
+ description=task_dict["description"],
+ attributes=task_dict["attributes"],
+ task=task_dict["task"],
+ optimization=task_dict.get("optimization"),
+ dependencies=task_dict.get("dependencies"),
+ soft_dependencies=task_dict.get("soft-dependencies"),
+ if_dependencies=task_dict.get("if-dependencies"),
+ )
+ for task_dict in transforms(trans_config, inputs)
+ ]
+ return tasks
+
+ @classmethod
+ def load(cls, root_dir, graph_config, kind_name):
+ path = os.path.join(root_dir, kind_name)
+ kind_yml = os.path.join(path, "kind.yml")
+ if not os.path.exists(kind_yml):
+ raise KindNotFound(kind_yml)
+
+ logger.debug(f"loading kind `{kind_name}` from `{path}`")
+ config = load_yaml(kind_yml)
+
+ return cls(kind_name, path, config, graph_config)
+
+
+class TaskGraphGenerator:
+ """
+ The central controller for taskgraph. This handles all phases of graph
+ generation. The task is generated from all of the kinds defined in
+ subdirectories of the generator's root directory.
+
+ Access to the results of this generation, as well as intermediate values at
+ various phases of generation, is available via properties. This encourages
+ the provision of all generation inputs at instance construction time.
+ """
+
+ # Task-graph generation is implemented as a Python generator that yields
+ # each "phase" of generation. This allows some mach subcommands to short-
+ # circuit generation of the entire graph by never completing the generator.
+
+ def __init__(
+ self,
+ root_dir,
+ parameters,
+ decision_task_id="DECISION-TASK",
+ write_artifacts=False,
+ ):
+ """
+ @param root_dir: root directory, with subdirectories for each kind
+ @param parameters: parameters for this task-graph generation, or callable
+ taking a `GraphConfig` and returning parameters
+ @type parameters: Union[Parameters, Callable[[GraphConfig], Parameters]]
+ """
+ if root_dir is None:
+ root_dir = "taskcluster/ci"
+ self.root_dir = root_dir
+ self._parameters = parameters
+ self._decision_task_id = decision_task_id
+ self._write_artifacts = write_artifacts
+
+ # start the generator
+ self._run = self._run()
+ self._run_results = {}
+
+ @property
+ def parameters(self):
+ """
+ The properties used for this graph.
+
+ @type: Properties
+ """
+ return self._run_until("parameters")
+
+ @property
+ def full_task_set(self):
+ """
+ The full task set: all tasks defined by any kind (a graph without edges)
+
+ @type: TaskGraph
+ """
+ return self._run_until("full_task_set")
+
+ @property
+ def full_task_graph(self):
+ """
+ The full task graph: the full task set, with edges representing
+ dependencies.
+
+ @type: TaskGraph
+ """
+ return self._run_until("full_task_graph")
+
+ @property
+ def target_task_set(self):
+ """
+ The set of targeted tasks (a graph without edges)
+
+ @type: TaskGraph
+ """
+ return self._run_until("target_task_set")
+
+ @property
+ def target_task_graph(self):
+ """
+ The set of targeted tasks and all of their dependencies
+
+ @type: TaskGraph
+ """
+ return self._run_until("target_task_graph")
+
+ @property
+ def optimized_task_graph(self):
+ """
+ The set of targeted tasks and all of their dependencies; tasks that
+ have been optimized out are either omitted or replaced with a Task
+ instance containing only a task_id.
+
+ @type: TaskGraph
+ """
+ return self._run_until("optimized_task_graph")
+
+ @property
+ def label_to_taskid(self):
+ """
+ A dictionary mapping task label to assigned taskId. This property helps
+ in interpreting `optimized_task_graph`.
+
+ @type: dictionary
+ """
+ return self._run_until("label_to_taskid")
+
+ @property
+ def morphed_task_graph(self):
+ """
+ The optimized task graph, with any subsequent morphs applied. This graph
+ will have the same meaning as the optimized task graph, but be in a form
+ more palatable to TaskCluster.
+
+ @type: TaskGraph
+ """
+ return self._run_until("morphed_task_graph")
+
+ @property
+ def graph_config(self):
+ """
+ The configuration for this graph.
+
+ @type: TaskGraph
+ """
+ return self._run_until("graph_config")
+
+ def _load_kinds(self, graph_config, target_kind=None):
+ if target_kind:
+ # docker-image is an implicit dependency that never appears in
+ # kind-dependencies.
+ queue = [target_kind, "docker-image"]
+ seen_kinds = set()
+ while queue:
+ kind_name = queue.pop()
+ if kind_name in seen_kinds:
+ continue
+ seen_kinds.add(kind_name)
+ kind = Kind.load(self.root_dir, graph_config, kind_name)
+ yield kind
+ queue.extend(kind.config.get("kind-dependencies", []))
+ else:
+ for kind_name in os.listdir(self.root_dir):
+ try:
+ yield Kind.load(self.root_dir, graph_config, kind_name)
+ except KindNotFound:
+ continue
+
+ def _run(self):
+ logger.info("Loading graph configuration.")
+ graph_config = load_graph_config(self.root_dir)
+
+ yield ("graph_config", graph_config)
+
+ graph_config.register()
+
+ # Initial verifications that don't depend on any generation state.
+ verifications("initial")
+
+ if callable(self._parameters):
+ parameters = self._parameters(graph_config)
+ else:
+ parameters = self._parameters
+
+ logger.info(f"Using {parameters}")
+ logger.debug(f"Dumping parameters:\n{repr(parameters)}")
+
+ filters = parameters.get("filters", [])
+ # Always add legacy target tasks method until we deprecate that API.
+ if "target_tasks_method" not in filters:
+ filters.insert(0, "target_tasks_method")
+ filters = [filter_tasks.filter_task_functions[f] for f in filters]
+
+ yield self.verify("parameters", parameters)
+
+ logger.info("Loading kinds")
+ # put the kinds into a graph and sort topologically so that kinds are loaded
+ # in post-order
+ if parameters.get("target-kind"):
+ target_kind = parameters["target-kind"]
+ logger.info(
+ "Limiting kinds to {target_kind} and dependencies".format(
+ target_kind=target_kind
+ )
+ )
+ kinds = {
+ kind.name: kind
+ for kind in self._load_kinds(graph_config, parameters.get("target-kind"))
+ }
+ verifications("kinds", kinds)
+
+ edges = set()
+ for kind in kinds.values():
+ for dep in kind.config.get("kind-dependencies", []):
+ edges.add((kind.name, dep, "kind-dependency"))
+ kind_graph = Graph(set(kinds), edges)
+
+ if parameters.get("target-kind"):
+ kind_graph = kind_graph.transitive_closure({target_kind, "docker-image"})
+
+ logger.info("Generating full task set")
+ all_tasks = {}
+ for kind_name in kind_graph.visit_postorder():
+ logger.debug(f"Loading tasks for kind {kind_name}")
+ kind = kinds[kind_name]
+ try:
+ new_tasks = kind.load_tasks(
+ parameters,
+ list(all_tasks.values()),
+ self._write_artifacts,
+ )
+ except Exception:
+ logger.exception(f"Error loading tasks for kind {kind_name}:")
+ raise
+ for task in new_tasks:
+ if task.label in all_tasks:
+ raise Exception("duplicate tasks with label " + task.label)
+ all_tasks[task.label] = task
+ logger.info(f"Generated {len(new_tasks)} tasks for kind {kind_name}")
+ full_task_set = TaskGraph(all_tasks, Graph(set(all_tasks), set()))
+ yield self.verify("full_task_set", full_task_set, graph_config, parameters)
+
+ logger.info("Generating full task graph")
+ edges = set()
+ for t in full_task_set:
+ for depname, dep in t.dependencies.items():
+ edges.add((t.label, dep, depname))
+
+ full_task_graph = TaskGraph(all_tasks, Graph(full_task_set.graph.nodes, edges))
+ logger.info(
+ "Full task graph contains %d tasks and %d dependencies"
+ % (len(full_task_set.graph.nodes), len(edges))
+ )
+ yield self.verify("full_task_graph", full_task_graph, graph_config, parameters)
+
+ logger.info("Generating target task set")
+ target_task_set = TaskGraph(
+ dict(all_tasks), Graph(set(all_tasks.keys()), set())
+ )
+ for fltr in filters:
+ old_len = len(target_task_set.graph.nodes)
+ target_tasks = set(fltr(target_task_set, parameters, graph_config))
+ target_task_set = TaskGraph(
+ {l: all_tasks[l] for l in target_tasks}, Graph(target_tasks, set())
+ )
+ logger.info(
+ "Filter %s pruned %d tasks (%d remain)"
+ % (fltr.__name__, old_len - len(target_tasks), len(target_tasks))
+ )
+
+ yield self.verify("target_task_set", target_task_set, graph_config, parameters)
+
+ logger.info("Generating target task graph")
+ # include all docker-image build tasks here, in case they are needed for a graph morph
+ docker_image_tasks = {
+ t.label
+ for t in full_task_graph.tasks.values()
+ if t.attributes["kind"] == "docker-image"
+ }
+ # include all tasks with `always_target` set
+ if parameters["enable_always_target"]:
+ always_target_tasks = {
+ t.label
+ for t in full_task_graph.tasks.values()
+ if t.attributes.get("always_target")
+ }
+ else:
+ always_target_tasks = set()
+ logger.info(
+ "Adding %d tasks with `always_target` attribute"
+ % (len(always_target_tasks) - len(always_target_tasks & target_tasks))
+ )
+ requested_tasks = target_tasks | docker_image_tasks | always_target_tasks
+ target_graph = full_task_graph.graph.transitive_closure(requested_tasks)
+ target_task_graph = TaskGraph(
+ {l: all_tasks[l] for l in target_graph.nodes}, target_graph
+ )
+ yield self.verify(
+ "target_task_graph", target_task_graph, graph_config, parameters
+ )
+
+ logger.info("Generating optimized task graph")
+ existing_tasks = parameters.get("existing_tasks")
+ do_not_optimize = set(parameters.get("do_not_optimize", []))
+ if not parameters.get("optimize_target_tasks", True):
+ do_not_optimize = set(target_task_set.graph.nodes).union(do_not_optimize)
+
+ # this is used for testing experimental optimization strategies
+ strategies = os.environ.get(
+ "TASKGRAPH_OPTIMIZE_STRATEGIES", parameters.get("optimize_strategies")
+ )
+ if strategies:
+ strategies = find_object(strategies)
+
+ optimized_task_graph, label_to_taskid = optimize_task_graph(
+ target_task_graph,
+ requested_tasks,
+ parameters,
+ do_not_optimize,
+ self._decision_task_id,
+ existing_tasks=existing_tasks,
+ strategy_override=strategies,
+ )
+
+ yield self.verify(
+ "optimized_task_graph", optimized_task_graph, graph_config, parameters
+ )
+
+ morphed_task_graph, label_to_taskid = morph(
+ optimized_task_graph, label_to_taskid, parameters, graph_config
+ )
+
+ yield "label_to_taskid", label_to_taskid
+ yield self.verify(
+ "morphed_task_graph", morphed_task_graph, graph_config, parameters
+ )
+
+ def _run_until(self, name):
+ while name not in self._run_results:
+ try:
+ k, v = next(self._run)
+ except StopIteration:
+ raise AttributeError(f"No such run result {name}")
+ self._run_results[k] = v
+ return self._run_results[name]
+
+ def verify(self, name, obj, *args, **kwargs):
+ verifications(name, obj, *args, **kwargs)
+ return name, obj
+
+
+def load_tasks_for_kind(parameters, kind, root_dir=None):
+ """
+ Get all the tasks of a given kind.
+
+ This function is designed to be called from outside of taskgraph.
+ """
+ # make parameters read-write
+ parameters = dict(parameters)
+ parameters["target-kind"] = kind
+ parameters = parameters_loader(spec=None, strict=False, overrides=parameters)
+ tgg = TaskGraphGenerator(root_dir=root_dir, parameters=parameters)
+ return {
+ task.task["metadata"]["name"]: task
+ for task in tgg.full_task_set
+ if task.kind == kind
+ }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/graph.py b/third_party/python/taskcluster_taskgraph/taskgraph/graph.py
new file mode 100644
index 0000000000..cdd280e2b1
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/graph.py
@@ -0,0 +1,134 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import collections
+
+import attr
+
+
+@attr.s(frozen=True)
+class Graph:
+ """Generic representation of a directed acyclic graph with labeled edges
+ connecting the nodes. Graph operations are implemented in a functional
+ manner, so the data structure is immutable.
+
+ It permits at most one edge of a given name between any set of nodes. The
+ graph is not checked for cycles, and methods may hang or otherwise fail if
+ given a cyclic graph.
+
+ The `nodes` and `edges` attributes may be accessed in a read-only fashion.
+ The `nodes` attribute is a set of node names, while `edges` is a set of
+ `(left, right, name)` tuples representing an edge named `name` going from
+ node `left` to node `right`..
+ """
+
+ nodes = attr.ib(converter=frozenset)
+ edges = attr.ib(converter=frozenset)
+
+ def transitive_closure(self, nodes, reverse=False):
+ """Return the transitive closure of <nodes>: the graph containing all
+ specified nodes as well as any nodes reachable from them, and any
+ intervening edges.
+
+ If `reverse` is true, the "reachability" will be reversed and this
+ will return the set of nodes that can reach the specified nodes.
+
+ Example:
+
+ .. code-block::
+
+ a ------> b ------> c
+ |
+ `-------> d
+
+ transitive_closure([b]).nodes == set([a, b])
+ transitive_closure([c]).nodes == set([c, b, a])
+ transitive_closure([c], reverse=True).nodes == set([c])
+ transitive_closure([b], reverse=True).nodes == set([b, c, d])
+ """
+ assert isinstance(nodes, set)
+ if not (nodes <= self.nodes):
+ raise Exception(
+ f"Unknown nodes in transitive closure: {nodes - self.nodes}"
+ )
+
+ # generate a new graph by expanding along edges until reaching a fixed
+ # point
+ new_nodes, new_edges = nodes, set()
+ nodes, edges = set(), set()
+ while (new_nodes, new_edges) != (nodes, edges):
+ nodes, edges = new_nodes, new_edges
+ add_edges = {
+ (left, right, name)
+ for (left, right, name) in self.edges
+ if (right if reverse else left) in nodes
+ }
+ add_nodes = {(left if reverse else right) for (left, right, _) in add_edges}
+ new_nodes = nodes | add_nodes
+ new_edges = edges | add_edges
+ return Graph(new_nodes, new_edges)
+
+ def _visit(self, reverse):
+ queue = collections.deque(sorted(self.nodes))
+ links_by_node = self.reverse_links_dict() if reverse else self.links_dict()
+ seen = set()
+ while queue:
+ node = queue.popleft()
+ if node in seen:
+ continue
+ links = links_by_node[node]
+ if all((n in seen) for n in links):
+ seen.add(node)
+ yield node
+ else:
+ queue.extend(n for n in links if n not in seen)
+ queue.append(node)
+
+ def visit_postorder(self):
+ """
+ Generate a sequence of nodes in postorder, such that every node is
+ visited *after* any nodes it links to.
+
+ Behavior is undefined (read: it will hang) if the graph contains a
+ cycle.
+ """
+ return self._visit(False)
+
+ def visit_preorder(self):
+ """
+ Like visit_postorder, but in reverse: evrey node is visited *before*
+ any nodes it links to.
+ """
+ return self._visit(True)
+
+ def links_dict(self):
+ """
+ Return a dictionary mapping each node to a set of the nodes it links to
+ (omitting edge names)
+ """
+ links = collections.defaultdict(set)
+ for left, right, _ in self.edges:
+ links[left].add(right)
+ return links
+
+ def named_links_dict(self):
+ """
+ Return a two-level dictionary mapping each node to a dictionary mapping
+ edge names to labels.
+ """
+ links = collections.defaultdict(dict)
+ for left, right, name in self.edges:
+ links[left][name] = right
+ return links
+
+ def reverse_links_dict(self):
+ """
+ Return a dictionary mapping each node to a set of the nodes linking to
+ it (omitting edge names)
+ """
+ links = collections.defaultdict(set)
+ for left, right, _ in self.edges:
+ links[right].add(left)
+ return links
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/loader/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/loader/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/loader/__init__.py
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/loader/transform.py b/third_party/python/taskcluster_taskgraph/taskgraph/loader/transform.py
new file mode 100644
index 0000000000..a134ffd127
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/loader/transform.py
@@ -0,0 +1,58 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+from taskgraph.util.templates import merge
+from taskgraph.util.yaml import load_yaml
+
+logger = logging.getLogger(__name__)
+
+
+def loader(kind, path, config, params, loaded_tasks):
+ """
+ Get the input elements that will be transformed into tasks in a generic
+ way. The elements themselves are free-form, and become the input to the
+ first transform.
+
+ By default, this reads tasks from the `tasks` key, or from yaml files
+ named by `tasks-from`. The entities are read from mappings, and the
+ keys to those mappings are added in the `name` key of each entity.
+
+ If there is a `task-defaults` config, then every task is merged with it.
+ This provides a simple way to set default values for all tasks of a kind.
+ The `task-defaults` key can also be specified in a yaml file pointed to by
+ `tasks-from`. In this case it will only apply to tasks defined in the same
+ file.
+
+ Other kind implementations can use a different loader function to
+ produce inputs and hand them to `transform_inputs`.
+ """
+
+ def generate_tasks():
+ defaults = config.get("task-defaults")
+ for name, task in config.get("tasks", {}).items():
+ if defaults:
+ task = merge(defaults, task)
+ task["task-from"] = "kind.yml"
+ yield name, task
+
+ for filename in config.get("tasks-from", []):
+ tasks = load_yaml(path, filename)
+
+ file_defaults = tasks.pop("task-defaults", None)
+ if defaults:
+ file_defaults = merge(defaults, file_defaults or {})
+
+ for name, task in tasks.items():
+ if file_defaults:
+ task = merge(file_defaults, task)
+ task["task-from"] = filename
+ yield name, task
+
+ for name, task in generate_tasks():
+ task["name"] = name
+ logger.debug(f"Generating tasks for {kind} {name}")
+ yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/main.py b/third_party/python/taskcluster_taskgraph/taskgraph/main.py
new file mode 100644
index 0000000000..88f2f6d37d
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/main.py
@@ -0,0 +1,756 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import argparse
+import atexit
+import json
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import traceback
+from collections import namedtuple
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from pathlib import Path
+from typing import Any, List
+
+import appdirs
+import yaml
+
+Command = namedtuple("Command", ["func", "args", "kwargs", "defaults"])
+commands = {}
+
+
+def command(*args, **kwargs):
+ defaults = kwargs.pop("defaults", {})
+
+ def decorator(func):
+ commands[args[0]] = Command(func, args, kwargs, defaults)
+ return func
+
+ return decorator
+
+
+def argument(*args, **kwargs):
+ def decorator(func):
+ if not hasattr(func, "args"):
+ func.args = []
+ func.args.append((args, kwargs))
+ return func
+
+ return decorator
+
+
+def format_taskgraph_labels(taskgraph):
+ return "\n".join(
+ sorted(
+ taskgraph.tasks[index].label for index in taskgraph.graph.visit_postorder()
+ )
+ )
+
+
+def format_taskgraph_json(taskgraph):
+ return json.dumps(
+ taskgraph.to_json(), sort_keys=True, indent=2, separators=(",", ": ")
+ )
+
+
+def format_taskgraph_yaml(taskgraph):
+ return yaml.safe_dump(taskgraph.to_json(), default_flow_style=False)
+
+
+def get_filtered_taskgraph(taskgraph, tasksregex, exclude_keys):
+ """
+ Filter all the tasks on basis of a regular expression
+ and returns a new TaskGraph object
+ """
+ from taskgraph.graph import Graph
+ from taskgraph.task import Task
+ from taskgraph.taskgraph import TaskGraph
+
+ if tasksregex:
+ named_links_dict = taskgraph.graph.named_links_dict()
+ filteredtasks = {}
+ filterededges = set()
+ regexprogram = re.compile(tasksregex)
+
+ for key in taskgraph.graph.visit_postorder():
+ task = taskgraph.tasks[key]
+ if regexprogram.match(task.label):
+ filteredtasks[key] = task
+ for depname, dep in named_links_dict[key].items():
+ if regexprogram.match(dep):
+ filterededges.add((key, dep, depname))
+
+ taskgraph = TaskGraph(filteredtasks, Graph(set(filteredtasks), filterededges))
+
+ if exclude_keys:
+ for label, task in taskgraph.tasks.items():
+ task = task.to_json()
+ for key in exclude_keys:
+ obj = task
+ attrs = key.split(".")
+ while attrs[0] in obj:
+ if len(attrs) == 1:
+ del obj[attrs[0]]
+ break
+ obj = obj[attrs[0]]
+ attrs = attrs[1:]
+ taskgraph.tasks[label] = Task.from_json(task)
+
+ return taskgraph
+
+
+FORMAT_METHODS = {
+ "labels": format_taskgraph_labels,
+ "json": format_taskgraph_json,
+ "yaml": format_taskgraph_yaml,
+}
+
+
+def get_taskgraph_generator(root, parameters):
+ """Helper function to make testing a little easier."""
+ from taskgraph.generator import TaskGraphGenerator
+
+ return TaskGraphGenerator(root_dir=root, parameters=parameters)
+
+
+def format_taskgraph(options, parameters, logfile=None):
+ import taskgraph
+ from taskgraph.parameters import parameters_loader
+
+ if logfile:
+ handler = logging.FileHandler(logfile, mode="w")
+ if logging.root.handlers:
+ oldhandler = logging.root.handlers[-1]
+ logging.root.removeHandler(oldhandler)
+ handler.setFormatter(oldhandler.formatter)
+ logging.root.addHandler(handler)
+
+ if options["fast"]:
+ taskgraph.fast = True
+
+ if isinstance(parameters, str):
+ parameters = parameters_loader(
+ parameters,
+ overrides={"target-kind": options.get("target_kind")},
+ strict=False,
+ )
+
+ tgg = get_taskgraph_generator(options.get("root"), parameters)
+
+ tg = getattr(tgg, options["graph_attr"])
+ tg = get_filtered_taskgraph(tg, options["tasks_regex"], options["exclude_keys"])
+ format_method = FORMAT_METHODS[options["format"] or "labels"]
+ return format_method(tg)
+
+
+def dump_output(out, path=None, params_spec=None):
+ from taskgraph.parameters import Parameters
+
+ params_name = Parameters.format_spec(params_spec)
+ fh = None
+ if path:
+ # Substitute params name into file path if necessary
+ if params_spec and "{params}" not in path:
+ name, ext = os.path.splitext(path)
+ name += "_{params}"
+ path = name + ext
+
+ path = path.format(params=params_name)
+ fh = open(path, "w")
+ else:
+ print(
+ f"Dumping result with parameters from {params_name}:",
+ file=sys.stderr,
+ )
+ print(out + "\n", file=fh)
+
+
+def generate_taskgraph(options, parameters, logdir):
+ from taskgraph.parameters import Parameters
+
+ def logfile(spec):
+ """Determine logfile given a parameters specification."""
+ if logdir is None:
+ return None
+ return os.path.join(
+ logdir,
+ "{}_{}.log".format(options["graph_attr"], Parameters.format_spec(spec)),
+ )
+
+ # Don't bother using futures if there's only one parameter. This can make
+ # tracebacks a little more readable and avoids additional process overhead.
+ if len(parameters) == 1:
+ spec = parameters[0]
+ out = format_taskgraph(options, spec, logfile(spec))
+ dump_output(out, options["output_file"])
+ return
+
+ futures = {}
+ with ProcessPoolExecutor() as executor:
+ for spec in parameters:
+ f = executor.submit(format_taskgraph, options, spec, logfile(spec))
+ futures[f] = spec
+
+ for future in as_completed(futures):
+ output_file = options["output_file"]
+ spec = futures[future]
+ e = future.exception()
+ if e:
+ out = "".join(traceback.format_exception(type(e), e, e.__traceback__))
+ if options["diff"]:
+ # Dump to console so we don't accidentally diff the tracebacks.
+ output_file = None
+ else:
+ out = future.result()
+
+ dump_output(
+ out,
+ path=output_file,
+ params_spec=spec if len(parameters) > 1 else None,
+ )
+
+
+@command(
+ "tasks",
+ help="Show all tasks in the taskgraph.",
+ defaults={"graph_attr": "full_task_set"},
+)
+@command(
+ "full", help="Show the full taskgraph.", defaults={"graph_attr": "full_task_graph"}
+)
+@command(
+ "target",
+ help="Show the set of target tasks.",
+ defaults={"graph_attr": "target_task_set"},
+)
+@command(
+ "target-graph",
+ help="Show the target graph.",
+ defaults={"graph_attr": "target_task_graph"},
+)
+@command(
+ "optimized",
+ help="Show the optimized graph.",
+ defaults={"graph_attr": "optimized_task_graph"},
+)
+@command(
+ "morphed",
+ help="Show the morphed graph.",
+ defaults={"graph_attr": "morphed_task_graph"},
+)
+@argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
+@argument("--quiet", "-q", action="store_true", help="suppress all logging output")
+@argument(
+ "--verbose", "-v", action="store_true", help="include debug-level logging output"
+)
+@argument(
+ "--json",
+ "-J",
+ action="store_const",
+ dest="format",
+ const="json",
+ help="Output task graph as a JSON object",
+)
+@argument(
+ "--yaml",
+ "-Y",
+ action="store_const",
+ dest="format",
+ const="yaml",
+ help="Output task graph as a YAML object",
+)
+@argument(
+ "--labels",
+ "-L",
+ action="store_const",
+ dest="format",
+ const="labels",
+ help="Output the label for each task in the task graph (default)",
+)
+@argument(
+ "--parameters",
+ "-p",
+ default=None,
+ action="append",
+ help="Parameters to use for the generation. Can be a path to file (.yml or "
+ ".json; see `taskcluster/docs/parameters.rst`), a directory (containing "
+ "parameters files), a url, of the form `project=mozilla-central` to download "
+ "latest parameters file for the specified project from CI, or of the form "
+ "`task-id=<decision task id>` to download parameters from the specified "
+ "decision task. Can be specified multiple times, in which case multiple "
+ "generations will happen from the same invocation (one per parameters "
+ "specified).",
+)
+@argument(
+ "--no-optimize",
+ dest="optimize",
+ action="store_false",
+ default="true",
+ help="do not remove tasks from the graph that are found in the "
+ "index (a.k.a. optimize the graph)",
+)
+@argument(
+ "-o",
+ "--output-file",
+ default=None,
+ help="file path to store generated output.",
+)
+@argument(
+ "--tasks-regex",
+ "--tasks",
+ default=None,
+ help="only return tasks with labels matching this regular " "expression.",
+)
+@argument(
+ "--exclude-key",
+ default=None,
+ dest="exclude_keys",
+ action="append",
+ help="Exclude the specified key (using dot notation) from the final result. "
+ "This is mainly useful with '--diff' to filter out expected differences. Can be "
+ "used multiple times.",
+)
+@argument(
+ "--target-kind",
+ default=None,
+ help="only return tasks that are of the given kind, or their dependencies.",
+)
+@argument(
+ "-F",
+ "--fast",
+ default=False,
+ action="store_true",
+ help="enable fast task generation for local debugging.",
+)
+@argument(
+ "--diff",
+ const="default",
+ nargs="?",
+ default=None,
+ help="Generate and diff the current taskgraph against another revision. "
+ "Without args the base revision will be used. A revision specifier such as "
+ "the hash or `.~1` (hg) or `HEAD~1` (git) can be used as well.",
+)
+def show_taskgraph(options):
+ from taskgraph.parameters import Parameters, parameters_loader
+ from taskgraph.util.vcs import get_repository
+
+ if options.pop("verbose", False):
+ logging.root.setLevel(logging.DEBUG)
+
+ repo = None
+ cur_rev = None
+ diffdir = None
+ output_file = options["output_file"]
+
+ if options["diff"]:
+ repo = get_repository(os.getcwd())
+
+ if not repo.working_directory_clean():
+ print(
+ "abort: can't diff taskgraph with dirty working directory",
+ file=sys.stderr,
+ )
+ return 1
+
+ # We want to return the working directory to the current state
+ # as best we can after we're done. In all known cases, using
+ # branch or bookmark (which are both available on the VCS object)
+ # as `branch` is preferable to a specific revision.
+ cur_rev = repo.branch or repo.head_rev[:12]
+
+ diffdir = tempfile.mkdtemp()
+ atexit.register(
+ shutil.rmtree, diffdir
+ ) # make sure the directory gets cleaned up
+ options["output_file"] = os.path.join(
+ diffdir, f"{options['graph_attr']}_{cur_rev}"
+ )
+ print(f"Generating {options['graph_attr']} @ {cur_rev}", file=sys.stderr)
+
+ parameters: List[Any[str, Parameters]] = options.pop("parameters")
+ if not parameters:
+ overrides = {
+ "target-kind": options.get("target_kind"),
+ }
+ parameters = [
+ parameters_loader(None, strict=False, overrides=overrides)
+ ] # will use default values
+
+ for param in parameters[:]:
+ if isinstance(param, str) and os.path.isdir(param):
+ parameters.remove(param)
+ parameters.extend(
+ [
+ p.as_posix()
+ for p in Path(param).iterdir()
+ if p.suffix in (".yml", ".json")
+ ]
+ )
+
+ logdir = None
+ if len(parameters) > 1:
+ # Log to separate files for each process instead of stderr to
+ # avoid interleaving.
+ basename = os.path.basename(os.getcwd())
+ logdir = os.path.join(appdirs.user_log_dir("taskgraph"), basename)
+ if not os.path.isdir(logdir):
+ os.makedirs(logdir)
+ else:
+ # Only setup logging if we have a single parameter spec. Otherwise
+ # logging will go to files. This is also used as a hook for Gecko
+ # to setup its `mach` based logging.
+ setup_logging()
+
+ generate_taskgraph(options, parameters, logdir)
+
+ if options["diff"]:
+ assert diffdir is not None
+ assert repo is not None
+
+ # Reload taskgraph modules to pick up changes and clear global state.
+ for mod in sys.modules.copy():
+ if mod != __name__ and mod.split(".", 1)[0].endswith("taskgraph"):
+ del sys.modules[mod]
+
+ if options["diff"] == "default":
+ base_rev = repo.base_rev
+ else:
+ base_rev = options["diff"]
+
+ try:
+ repo.update(base_rev)
+ base_rev = repo.head_rev[:12]
+ options["output_file"] = os.path.join(
+ diffdir, f"{options['graph_attr']}_{base_rev}"
+ )
+ print(f"Generating {options['graph_attr']} @ {base_rev}", file=sys.stderr)
+ generate_taskgraph(options, parameters, logdir)
+ finally:
+ repo.update(cur_rev)
+
+ # Generate diff(s)
+ diffcmd = [
+ "diff",
+ "-U20",
+ "--report-identical-files",
+ f"--label={options['graph_attr']}@{base_rev}",
+ f"--label={options['graph_attr']}@{cur_rev}",
+ ]
+
+ for spec in parameters:
+ base_path = os.path.join(diffdir, f"{options['graph_attr']}_{base_rev}")
+ cur_path = os.path.join(diffdir, f"{options['graph_attr']}_{cur_rev}")
+
+ params_name = None
+ if len(parameters) > 1:
+ params_name = Parameters.format_spec(spec)
+ base_path += f"_{params_name}"
+ cur_path += f"_{params_name}"
+
+ try:
+ proc = subprocess.run(
+ diffcmd + [base_path, cur_path],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True,
+ check=True,
+ )
+ diff_output = proc.stdout
+ returncode = 0
+ except subprocess.CalledProcessError as e:
+ # returncode 1 simply means diffs were found
+ if e.returncode != 1:
+ print(e.stderr, file=sys.stderr)
+ raise
+ diff_output = e.output
+ returncode = e.returncode
+
+ dump_output(
+ diff_output,
+ # Don't bother saving file if no diffs were found. Log to
+ # console in this case instead.
+ path=None if returncode == 0 else output_file,
+ params_spec=spec if len(parameters) > 1 else None,
+ )
+
+ if options["format"] != "json":
+ print(
+ "If you were expecting differences in task bodies "
+ 'you should pass "-J"\n',
+ file=sys.stderr,
+ )
+
+ if len(parameters) > 1:
+ print(f"See '{logdir}' for logs", file=sys.stderr)
+
+
+@command("build-image", help="Build a Docker image")
+@argument("image_name", help="Name of the image to build")
+@argument(
+ "-t", "--tag", help="tag that the image should be built as.", metavar="name:tag"
+)
+@argument(
+ "--context-only",
+ help="File name the context tarball should be written to."
+ "with this option it will only build the context.tar.",
+ metavar="context.tar",
+)
+def build_image(args):
+ from taskgraph.docker import build_context, build_image
+
+ if args["context_only"] is None:
+ build_image(args["image_name"], args["tag"], os.environ)
+ else:
+ build_context(args["image_name"], args["context_only"], os.environ)
+
+
+@command(
+ "load-image",
+ help="Load a pre-built Docker image. Note that you need to "
+ "have docker installed and running for this to work.",
+)
+@argument(
+ "--task-id",
+ help="Load the image at public/image.tar.zst in this task, "
+ "rather than searching the index",
+)
+@argument(
+ "-t",
+ "--tag",
+ help="tag that the image should be loaded as. If not "
+ "image will be loaded with tag from the tarball",
+ metavar="name:tag",
+)
+@argument(
+ "image_name",
+ nargs="?",
+ help="Load the image of this name based on the current "
+ "contents of the tree (as built for mozilla-central "
+ "or mozilla-inbound)",
+)
+def load_image(args):
+ from taskgraph.docker import load_image_by_name, load_image_by_task_id
+
+ if not args.get("image_name") and not args.get("task_id"):
+ print("Specify either IMAGE-NAME or TASK-ID")
+ sys.exit(1)
+ try:
+ if args["task_id"]:
+ ok = load_image_by_task_id(args["task_id"], args.get("tag"))
+ else:
+ ok = load_image_by_name(args["image_name"], args.get("tag"))
+ if not ok:
+ sys.exit(1)
+ except Exception:
+ traceback.print_exc()
+ sys.exit(1)
+
+
+@command("image-digest", help="Print the digest of a docker image.")
+@argument(
+ "image_name",
+ help="Print the digest of the image of this name based on the current "
+ "contents of the tree.",
+)
+def image_digest(args):
+ from taskgraph.docker import get_image_digest
+
+ try:
+ digest = get_image_digest(args["image_name"])
+ print(digest)
+ except Exception:
+ traceback.print_exc()
+ sys.exit(1)
+
+
+@command("decision", help="Run the decision task")
+@argument("--root", "-r", help="root of the taskgraph definition relative to topsrcdir")
+@argument(
+ "--message",
+ required=False,
+ help=argparse.SUPPRESS,
+)
+@argument(
+ "--project",
+ required=True,
+ help="Project to use for creating task graph. Example: --project=try",
+)
+@argument("--pushlog-id", dest="pushlog_id", required=True, default="0")
+@argument("--pushdate", dest="pushdate", required=True, type=int, default=0)
+@argument("--owner", required=True, help="email address of who owns this graph")
+@argument("--level", required=True, help="SCM level of this repository")
+@argument(
+ "--target-tasks-method", help="method for selecting the target tasks to generate"
+)
+@argument(
+ "--repository-type",
+ required=True,
+ help='Type of repository, either "hg" or "git"',
+)
+@argument("--base-repository", required=True, help='URL for "base" repository to clone')
+@argument(
+ "--base-ref", default="", help='Reference of the revision in the "base" repository'
+)
+@argument(
+ "--base-rev",
+ default="",
+ help="Taskgraph decides what to do based on the revision range between "
+ "`--base-rev` and `--head-rev`. Value is determined automatically if not provided",
+)
+@argument(
+ "--head-repository",
+ required=True,
+ help='URL for "head" repository to fetch revision from',
+)
+@argument(
+ "--head-ref", required=True, help="Reference (this is same as rev usually for hg)"
+)
+@argument(
+ "--head-rev", required=True, help="Commit revision to use from head repository"
+)
+@argument("--head-tag", help="Tag attached to the revision", default="")
+@argument(
+ "--tasks-for", required=True, help="the tasks_for value used to generate this task"
+)
+@argument("--try-task-config-file", help="path to try task configuration file")
+def decision(options):
+ from taskgraph.decision import taskgraph_decision
+
+ taskgraph_decision(options)
+
+
+@command("action-callback", description="Run action callback used by action tasks")
+@argument(
+ "--root",
+ "-r",
+ default="taskcluster/ci",
+ help="root of the taskgraph definition relative to topsrcdir",
+)
+def action_callback(options):
+ from taskgraph.actions import trigger_action_callback
+ from taskgraph.actions.util import get_parameters
+
+ try:
+ # the target task for this action (or null if it's a group action)
+ task_id = json.loads(os.environ.get("ACTION_TASK_ID", "null"))
+ # the target task group for this action
+ task_group_id = os.environ.get("ACTION_TASK_GROUP_ID", None)
+ input = json.loads(os.environ.get("ACTION_INPUT", "null"))
+ callback = os.environ.get("ACTION_CALLBACK", None)
+ root = options["root"]
+
+ parameters = get_parameters(task_group_id)
+
+ return trigger_action_callback(
+ task_group_id=task_group_id,
+ task_id=task_id,
+ input=input,
+ callback=callback,
+ parameters=parameters,
+ root=root,
+ test=False,
+ )
+ except Exception:
+ traceback.print_exc()
+ sys.exit(1)
+
+
+@command("test-action-callback", description="Run an action callback in a testing mode")
+@argument(
+ "--root",
+ "-r",
+ default="taskcluster/ci",
+ help="root of the taskgraph definition relative to topsrcdir",
+)
+@argument(
+ "--parameters",
+ "-p",
+ default="",
+ help="parameters file (.yml or .json; see " "`taskcluster/docs/parameters.rst`)`",
+)
+@argument("--task-id", default=None, help="TaskId to which the action applies")
+@argument(
+ "--task-group-id", default=None, help="TaskGroupId to which the action applies"
+)
+@argument("--input", default=None, help="Action input (.yml or .json)")
+@argument("callback", default=None, help="Action callback name (Python function name)")
+def test_action_callback(options):
+ import taskgraph.actions
+ import taskgraph.parameters
+ from taskgraph.config import load_graph_config
+ from taskgraph.util import yaml
+
+ def load_data(filename):
+ with open(filename) as f:
+ if filename.endswith(".yml"):
+ return yaml.load_stream(f)
+ elif filename.endswith(".json"):
+ return json.load(f)
+ else:
+ raise Exception(f"unknown filename {filename}")
+
+ try:
+ task_id = options["task_id"]
+
+ if options["input"]:
+ input = load_data(options["input"])
+ else:
+ input = None
+
+ root = options["root"]
+ graph_config = load_graph_config(root)
+ trust_domain = graph_config["trust-domain"]
+ graph_config.register()
+
+ parameters = taskgraph.parameters.load_parameters_file(
+ options["parameters"], strict=False, trust_domain=trust_domain
+ )
+ parameters.check()
+
+ return taskgraph.actions.trigger_action_callback(
+ task_group_id=options["task_group_id"],
+ task_id=task_id,
+ input=input,
+ callback=options["callback"],
+ parameters=parameters,
+ root=root,
+ test=True,
+ )
+ except Exception:
+ traceback.print_exc()
+ sys.exit(1)
+
+
+def create_parser():
+ parser = argparse.ArgumentParser(description="Interact with taskgraph")
+ subparsers = parser.add_subparsers()
+ for _, (func, args, kwargs, defaults) in commands.items():
+ subparser = subparsers.add_parser(*args, **kwargs)
+ for arg in func.args:
+ subparser.add_argument(*arg[0], **arg[1])
+ subparser.set_defaults(command=func, **defaults)
+ return parser
+
+
+def setup_logging():
+ logging.basicConfig(
+ format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
+ )
+
+
+def main(args=sys.argv[1:]):
+ setup_logging()
+ parser = create_parser()
+ args = parser.parse_args(args)
+ try:
+ args.command(vars(args))
+ except Exception:
+ traceback.print_exc()
+ sys.exit(1)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/morph.py b/third_party/python/taskcluster_taskgraph/taskgraph/morph.py
new file mode 100644
index 0000000000..c488317782
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/morph.py
@@ -0,0 +1,271 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Graph morphs are modifications to task-graphs that take place *after* the
+optimization phase.
+
+These graph morphs are largely invisible to developers running `./mach`
+locally, so they should be limited to changes that do not modify the meaning of
+the graph.
+"""
+
+# Note that the translation of `{'task-reference': '..'}` and
+# `artifact-reference` are handled in the optimization phase (since
+# optimization involves dealing with taskIds directly). Similarly,
+# `{'relative-datestamp': '..'}` is handled at the last possible moment during
+# task creation.
+
+
+import logging
+import os
+import re
+
+from slugid import nice as slugid
+
+from .graph import Graph
+from .task import Task
+from .taskgraph import TaskGraph
+from .util.workertypes import get_worker_type
+
+here = os.path.abspath(os.path.dirname(__file__))
+logger = logging.getLogger(__name__)
+MAX_ROUTES = 10
+
+registered_morphs = []
+
+
+def register_morph(func):
+ registered_morphs.append(func)
+
+
+def amend_taskgraph(taskgraph, label_to_taskid, to_add):
+ """Add the given tasks to the taskgraph, returning a new taskgraph"""
+ new_tasks = taskgraph.tasks.copy()
+ new_edges = set(taskgraph.graph.edges)
+ for task in to_add:
+ new_tasks[task.task_id] = task
+ assert task.label not in label_to_taskid
+ label_to_taskid[task.label] = task.task_id
+ for depname, dep in task.dependencies.items():
+ new_edges.add((task.task_id, dep, depname))
+
+ taskgraph = TaskGraph(new_tasks, Graph(set(new_tasks), new_edges))
+ return taskgraph, label_to_taskid
+
+
+def derive_index_task(task, taskgraph, label_to_taskid, parameters, graph_config):
+ """Create the shell of a task that depends on `task` and on the given docker
+ image."""
+ purpose = "index-task"
+ label = f"{purpose}-{task.label}"
+ provisioner_id, worker_type = get_worker_type(
+ graph_config, "misc", parameters["level"]
+ )
+
+ task_def = {
+ "provisionerId": provisioner_id,
+ "workerType": worker_type,
+ "dependencies": [task.task_id],
+ "created": {"relative-datestamp": "0 seconds"},
+ "deadline": task.task["deadline"],
+ # no point existing past the parent task's deadline
+ "expires": task.task["deadline"],
+ "metadata": {
+ "name": label,
+ "description": "{} for {}".format(
+ purpose, task.task["metadata"]["description"]
+ ),
+ "owner": task.task["metadata"]["owner"],
+ "source": task.task["metadata"]["source"],
+ },
+ "scopes": [],
+ "payload": {
+ "image": {
+ "path": "public/image.tar.zst",
+ "namespace": "taskgraph.cache.level-3.docker-images.v2.index-task.latest",
+ "type": "indexed-image",
+ },
+ "features": {
+ "taskclusterProxy": True,
+ },
+ "maxRunTime": 600,
+ },
+ }
+
+ # only include the docker-image dependency here if it is actually in the
+ # taskgraph (has not been optimized). It is included in
+ # task_def['dependencies'] unconditionally.
+ dependencies = {"parent": task.task_id}
+
+ task = Task(
+ kind="misc",
+ label=label,
+ attributes={},
+ task=task_def,
+ dependencies=dependencies,
+ )
+ task.task_id = slugid()
+ return task, taskgraph, label_to_taskid
+
+
+# these regular expressions capture route prefixes for which we have a star
+# scope, allowing them to be summarized. Each should correspond to a star scope
+# in each Gecko `assume:repo:hg.mozilla.org/...` role.
+_SCOPE_SUMMARY_REGEXPS = [
+ # TODO Bug 1631839 - Remove these scopes once the migration is done
+ re.compile(r"(index:insert-task:project\.mobile\.fenix\.v2\.[^.]*\.).*"),
+ re.compile(
+ r"(index:insert-task:project\.mobile\.reference-browser\.v3\.[^.]*\.).*"
+ ),
+]
+
+
+def make_index_task(parent_task, taskgraph, label_to_taskid, parameters, graph_config):
+ index_paths = [
+ r.split(".", 1)[1] for r in parent_task.task["routes"] if r.startswith("index.")
+ ]
+ parent_task.task["routes"] = [
+ r for r in parent_task.task["routes"] if not r.startswith("index.")
+ ]
+
+ task, taskgraph, label_to_taskid = derive_index_task(
+ parent_task, taskgraph, label_to_taskid, parameters, graph_config
+ )
+
+ # we need to "summarize" the scopes, otherwise a particularly
+ # namespace-heavy index task might have more scopes than can fit in a
+ # temporary credential.
+ scopes = set()
+ domain_scope_regex = re.compile(
+ r"(index:insert-task:{trust_domain}\.v2\.[^.]*\.).*".format(
+ trust_domain=re.escape(graph_config["trust-domain"])
+ )
+ )
+ all_scopes_summary_regexps = _SCOPE_SUMMARY_REGEXPS + [domain_scope_regex]
+ for path in index_paths:
+ scope = f"index:insert-task:{path}"
+ for summ_re in all_scopes_summary_regexps:
+ match = summ_re.match(scope)
+ if match:
+ scope = match.group(1) + "*"
+ break
+ scopes.add(scope)
+ task.task["scopes"] = sorted(scopes)
+
+ task.task["payload"]["command"] = ["insert-indexes.js"] + index_paths
+ task.task["payload"]["env"] = {
+ "TARGET_TASKID": parent_task.task_id,
+ "INDEX_RANK": parent_task.task.get("extra", {}).get("index", {}).get("rank", 0),
+ }
+ return task, taskgraph, label_to_taskid
+
+
+@register_morph
+def add_index_tasks(taskgraph, label_to_taskid, parameters, graph_config):
+ """
+ The TaskCluster queue only allows 10 routes on a task, but we have tasks
+ with many more routes, for purposes of indexing. This graph morph adds
+ "index tasks" that depend on such tasks and do the index insertions
+ directly, avoiding the limits on task.routes.
+ """
+ logger.debug("Morphing: adding index tasks")
+
+ added = []
+ for label, task in taskgraph.tasks.items():
+ if len(task.task.get("routes", [])) <= MAX_ROUTES:
+ continue
+ task, taskgraph, label_to_taskid = make_index_task(
+ task, taskgraph, label_to_taskid, parameters, graph_config
+ )
+ added.append(task)
+
+ if added:
+ taskgraph, label_to_taskid = amend_taskgraph(taskgraph, label_to_taskid, added)
+ logger.info(f"Added {len(added)} index tasks")
+
+ return taskgraph, label_to_taskid
+
+
+def _get_morph_url():
+ """
+ Guess a URL for the current file, for source metadata for created tasks.
+
+ If we checked out the taskgraph code with run-task in the decision task,
+ we can use TASKGRAPH_* to find the right version, which covers the
+ existing use case.
+ """
+ taskgraph_repo = os.environ.get(
+ "TASKGRAPH_HEAD_REPOSITORY", "https://github.com/taskcluster/taskgraph"
+ )
+ taskgraph_rev = os.environ.get("TASKGRAPH_HEAD_REV", "default")
+ return f"{taskgraph_repo}/raw-file/{taskgraph_rev}/src/taskgraph/morph.py"
+
+
+@register_morph
+def add_code_review_task(taskgraph, label_to_taskid, parameters, graph_config):
+ logger.debug("Morphing: adding code review task")
+
+ review_config = parameters.get("code-review")
+ if not review_config:
+ return taskgraph, label_to_taskid
+
+ code_review_tasks = {}
+ for label, task in taskgraph.tasks.items():
+ if task.attributes.get("code-review"):
+ code_review_tasks[task.label] = task.task_id
+
+ if code_review_tasks:
+ code_review_task_def = {
+ "provisionerId": "built-in",
+ "workerType": "succeed",
+ "dependencies": sorted(code_review_tasks.values()),
+ # This option permits to run the task
+ # regardless of the dependencies tasks exit status
+ # as we are interested in the task failures
+ "requires": "all-resolved",
+ "created": {"relative-datestamp": "0 seconds"},
+ "deadline": {"relative-datestamp": "1 day"},
+ # no point existing past the parent task's deadline
+ "expires": {"relative-datestamp": "1 day"},
+ "metadata": {
+ "name": "code-review",
+ "description": "List all issues found in static analysis and linting tasks",
+ "owner": parameters["owner"],
+ "source": _get_morph_url(),
+ },
+ "scopes": [],
+ "payload": {},
+ "routes": ["project.relman.codereview.v1.try_ending"],
+ "extra": {
+ "code-review": {
+ "phabricator-build-target": review_config[
+ "phabricator-build-target"
+ ],
+ "repository": parameters["head_repository"],
+ "revision": parameters["head_rev"],
+ }
+ },
+ }
+ task = Task(
+ kind="misc",
+ label="code-review",
+ attributes={},
+ task=code_review_task_def,
+ dependencies=code_review_tasks,
+ )
+ task.task_id = slugid()
+ taskgraph, label_to_taskid = amend_taskgraph(taskgraph, label_to_taskid, [task])
+ logger.info("Added code review task.")
+
+ return taskgraph, label_to_taskid
+
+
+def morph(taskgraph, label_to_taskid, parameters, graph_config):
+ """Apply all morphs"""
+ for m in registered_morphs:
+ taskgraph, label_to_taskid = m(
+ taskgraph, label_to_taskid, parameters, graph_config
+ )
+ return taskgraph, label_to_taskid
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/__init__.py
new file mode 100644
index 0000000000..06287d877d
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/__init__.py
@@ -0,0 +1,8 @@
+from .base import ( # noqa: F401
+ Alias,
+ All,
+ Any,
+ Not,
+ OptimizationStrategy,
+ register_strategy,
+)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py
new file mode 100644
index 0000000000..367b94e1de
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py
@@ -0,0 +1,551 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+The objective of optimization is to remove as many tasks from the graph as
+possible, as efficiently as possible, thereby delivering useful results as
+quickly as possible. For example, ideally if only a test script is modified in
+a push, then the resulting graph contains only the corresponding test suite
+task.
+
+See ``taskcluster/docs/optimization.rst`` for more information.
+"""
+
+import datetime
+import logging
+from abc import ABCMeta, abstractmethod, abstractproperty
+from collections import defaultdict
+
+from slugid import nice as slugid
+
+from taskgraph.graph import Graph
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.parameterization import resolve_task_references, resolve_timestamps
+from taskgraph.util.python_path import import_sibling_modules
+
+logger = logging.getLogger(__name__)
+registry = {}
+
+
+def register_strategy(name, args=()):
+ def wrap(cls):
+ if name not in registry:
+ registry[name] = cls(*args)
+ if not hasattr(registry[name], "description"):
+ registry[name].description = name
+ return cls
+
+ return wrap
+
+
+def optimize_task_graph(
+ target_task_graph,
+ requested_tasks,
+ params,
+ do_not_optimize,
+ decision_task_id,
+ existing_tasks=None,
+ strategy_override=None,
+):
+ """
+ Perform task optimization, returning a taskgraph and a map from label to
+ assigned taskId, including replacement tasks.
+ """
+ label_to_taskid = {}
+ if not existing_tasks:
+ existing_tasks = {}
+
+ # instantiate the strategies for this optimization process
+ strategies = registry.copy()
+ if strategy_override:
+ strategies.update(strategy_override)
+
+ optimizations = _get_optimizations(target_task_graph, strategies)
+
+ removed_tasks = remove_tasks(
+ target_task_graph=target_task_graph,
+ requested_tasks=requested_tasks,
+ optimizations=optimizations,
+ params=params,
+ do_not_optimize=do_not_optimize,
+ )
+
+ replaced_tasks = replace_tasks(
+ target_task_graph=target_task_graph,
+ optimizations=optimizations,
+ params=params,
+ do_not_optimize=do_not_optimize,
+ label_to_taskid=label_to_taskid,
+ existing_tasks=existing_tasks,
+ removed_tasks=removed_tasks,
+ )
+
+ return (
+ get_subgraph(
+ target_task_graph,
+ removed_tasks,
+ replaced_tasks,
+ label_to_taskid,
+ decision_task_id,
+ ),
+ label_to_taskid,
+ )
+
+
+def _get_optimizations(target_task_graph, strategies):
+ def optimizations(label):
+ task = target_task_graph.tasks[label]
+ if task.optimization:
+ opt_by, arg = list(task.optimization.items())[0]
+ strategy = strategies[opt_by]
+ if hasattr(strategy, "description"):
+ opt_by += f" ({strategy.description})"
+ return (opt_by, strategy, arg)
+ else:
+ return ("never", strategies["never"], None)
+
+ return optimizations
+
+
+def _log_optimization(verb, opt_counts, opt_reasons=None):
+ if opt_reasons:
+ message = "optimize: {label} {action} because of {reason}"
+ for label, (action, reason) in opt_reasons.items():
+ logger.debug(message.format(label=label, action=action, reason=reason))
+
+ if opt_counts:
+ logger.info(
+ f"{verb.title()} "
+ + ", ".join(f"{c} tasks by {b}" for b, c in sorted(opt_counts.items()))
+ + " during optimization."
+ )
+ else:
+ logger.info(f"No tasks {verb} during optimization")
+
+
+def remove_tasks(
+ target_task_graph, requested_tasks, params, optimizations, do_not_optimize
+):
+ """
+ Implement the "Removing Tasks" phase, returning a set of task labels of all removed tasks.
+ """
+ opt_counts = defaultdict(int)
+ opt_reasons = {}
+ removed = set()
+ dependents_of = target_task_graph.graph.reverse_links_dict()
+ tasks = target_task_graph.tasks
+ prune_candidates = set()
+
+ # Traverse graph so dependents (child nodes) are guaranteed to be processed
+ # first.
+ for label in target_task_graph.graph.visit_preorder():
+ # Dependents that can be pruned away (shouldn't cause this task to run).
+ # Only dependents that either:
+ # A) Explicitly reference this task in their 'if_dependencies' list, or
+ # B) Don't have an 'if_dependencies' attribute (i.e are in 'prune_candidates'
+ # because they should be removed but have prune_deps themselves)
+ # should be considered.
+ prune_deps = {
+ l
+ for l in dependents_of[label]
+ if l in prune_candidates
+ if not tasks[l].if_dependencies or label in tasks[l].if_dependencies
+ }
+
+ def _keep(reason):
+ """Mark a task as being kept in the graph. Also recursively removes
+ any dependents from `prune_candidates`, assuming they should be
+ kept because of this task.
+ """
+ opt_reasons[label] = ("kept", reason)
+
+ # Removes dependents that were in 'prune_candidates' from a task
+ # that ended up being kept (and therefore the dependents should
+ # also be kept).
+ queue = list(prune_deps)
+ while queue:
+ l = queue.pop()
+
+ # If l is a prune_dep of multiple tasks it could be queued up
+ # multiple times. Guard against it being already removed.
+ if l not in prune_candidates:
+ continue
+
+ # If a task doesn't set 'if_dependencies' itself (rather it was
+ # added to 'prune_candidates' due to one of its depenendents),
+ # then we shouldn't remove it.
+ if not tasks[l].if_dependencies:
+ continue
+
+ prune_candidates.remove(l)
+ queue.extend([r for r in dependents_of[l] if r in prune_candidates])
+
+ def _remove(reason):
+ """Potentially mark a task as being removed from the graph. If the
+ task has dependents that can be pruned, add this task to
+ `prune_candidates` rather than removing it.
+ """
+ if prune_deps:
+ # If there are prune_deps, unsure if we can remove this task yet.
+ prune_candidates.add(label)
+ else:
+ opt_reasons[label] = ("removed", reason)
+ opt_counts[reason] += 1
+ removed.add(label)
+
+ # if we're not allowed to optimize, that's easy..
+ if label in do_not_optimize:
+ _keep("do not optimize")
+ continue
+
+ # If there are remaining tasks depending on this one, do not remove.
+ if any(
+ l for l in dependents_of[label] if l not in removed and l not in prune_deps
+ ):
+ _keep("dependent tasks")
+ continue
+
+ # Some tasks in the task graph only exist because they were required
+ # by a task that has just been optimized away. They can now be removed.
+ if label not in requested_tasks:
+ _remove("dependents optimized")
+ continue
+
+ # Call the optimization strategy.
+ task = tasks[label]
+ opt_by, opt, arg = optimizations(label)
+ if opt.should_remove_task(task, params, arg):
+ _remove(opt_by)
+ continue
+
+ # Some tasks should only run if their dependency was also run. Since we
+ # haven't processed dependencies yet, we add them to a list of
+ # candidate tasks for pruning.
+ if task.if_dependencies:
+ opt_reasons[label] = ("kept", opt_by)
+ prune_candidates.add(label)
+ else:
+ _keep(opt_by)
+
+ if prune_candidates:
+ reason = "if-dependencies pruning"
+ for label in prune_candidates:
+ # There's an edge case where a triangle graph can cause a
+ # dependency to stay in 'prune_candidates' when the dependent
+ # remains. Do a final check to ensure we don't create any bad
+ # edges.
+ dependents = any(
+ d
+ for d in dependents_of[label]
+ if d not in prune_candidates
+ if d not in removed
+ )
+ if dependents:
+ opt_reasons[label] = ("kept", "dependent tasks")
+ continue
+ removed.add(label)
+ opt_counts[reason] += 1
+ opt_reasons[label] = ("removed", reason)
+
+ _log_optimization("removed", opt_counts, opt_reasons)
+ return removed
+
+
+def replace_tasks(
+ target_task_graph,
+ params,
+ optimizations,
+ do_not_optimize,
+ label_to_taskid,
+ removed_tasks,
+ existing_tasks,
+):
+ """
+ Implement the "Replacing Tasks" phase, returning a set of task labels of
+ all replaced tasks. The replacement taskIds are added to label_to_taskid as
+ a side-effect.
+ """
+ opt_counts = defaultdict(int)
+ replaced = set()
+ dependents_of = target_task_graph.graph.reverse_links_dict()
+ dependencies_of = target_task_graph.graph.links_dict()
+
+ for label in target_task_graph.graph.visit_postorder():
+ # if we're not allowed to optimize, that's easy..
+ if label in do_not_optimize:
+ continue
+
+ # if this task depends on un-replaced, un-removed tasks, do not replace
+ if any(
+ l not in replaced and l not in removed_tasks for l in dependencies_of[label]
+ ):
+ continue
+
+ # if the task already exists, that's an easy replacement
+ repl = existing_tasks.get(label)
+ if repl:
+ label_to_taskid[label] = repl
+ replaced.add(label)
+ opt_counts["existing_tasks"] += 1
+ continue
+
+ # call the optimization strategy
+ task = target_task_graph.tasks[label]
+ opt_by, opt, arg = optimizations(label)
+
+ # compute latest deadline of dependents (if any)
+ dependents = [target_task_graph.tasks[l] for l in dependents_of[label]]
+ deadline = None
+ if dependents:
+ now = datetime.datetime.utcnow()
+ deadline = max(
+ resolve_timestamps(now, task.task["deadline"]) for task in dependents
+ )
+ repl = opt.should_replace_task(task, params, deadline, arg)
+ if repl:
+ if repl is True:
+ # True means remove this task; get_subgraph will catch any
+ # problems with removed tasks being depended on
+ removed_tasks.add(label)
+ else:
+ label_to_taskid[label] = repl
+ replaced.add(label)
+ opt_counts[opt_by] += 1
+ continue
+
+ _log_optimization("replaced", opt_counts)
+ return replaced
+
+
+def get_subgraph(
+ target_task_graph,
+ removed_tasks,
+ replaced_tasks,
+ label_to_taskid,
+ decision_task_id,
+):
+ """
+ Return the subgraph of target_task_graph consisting only of
+ non-optimized tasks and edges between them.
+
+ To avoid losing track of taskIds for tasks optimized away, this method
+ simultaneously substitutes real taskIds for task labels in the graph, and
+ populates each task definition's `dependencies` key with the appropriate
+ taskIds. Task references are resolved in the process.
+ """
+
+ # check for any dependency edges from included to removed tasks
+ bad_edges = [
+ (l, r, n)
+ for l, r, n in target_task_graph.graph.edges
+ if l not in removed_tasks and r in removed_tasks
+ ]
+ if bad_edges:
+ probs = ", ".join(
+ f"{l} depends on {r} as {n} but it has been removed"
+ for l, r, n in bad_edges
+ )
+ raise Exception("Optimization error: " + probs)
+
+ # fill in label_to_taskid for anything not removed or replaced
+ assert replaced_tasks <= set(label_to_taskid)
+ for label in sorted(
+ target_task_graph.graph.nodes - removed_tasks - set(label_to_taskid)
+ ):
+ label_to_taskid[label] = slugid()
+
+ # resolve labels to taskIds and populate task['dependencies']
+ tasks_by_taskid = {}
+ named_links_dict = target_task_graph.graph.named_links_dict()
+ omit = removed_tasks | replaced_tasks
+ for label, task in target_task_graph.tasks.items():
+ if label in omit:
+ continue
+ task.task_id = label_to_taskid[label]
+ named_task_dependencies = {
+ name: label_to_taskid[label]
+ for name, label in named_links_dict.get(label, {}).items()
+ }
+
+ # Add remaining soft dependencies
+ if task.soft_dependencies:
+ named_task_dependencies.update(
+ {
+ label: label_to_taskid[label]
+ for label in task.soft_dependencies
+ if label in label_to_taskid and label not in omit
+ }
+ )
+
+ task.task = resolve_task_references(
+ task.label,
+ task.task,
+ task_id=task.task_id,
+ decision_task_id=decision_task_id,
+ dependencies=named_task_dependencies,
+ )
+ deps = task.task.setdefault("dependencies", [])
+ deps.extend(sorted(named_task_dependencies.values()))
+ tasks_by_taskid[task.task_id] = task
+
+ # resolve edges to taskIds
+ edges_by_taskid = (
+ (label_to_taskid.get(left), label_to_taskid.get(right), name)
+ for (left, right, name) in target_task_graph.graph.edges
+ )
+ # ..and drop edges that are no longer entirely in the task graph
+ # (note that this omits edges to replaced tasks, but they are still in task.dependnecies)
+ edges_by_taskid = {
+ (left, right, name)
+ for (left, right, name) in edges_by_taskid
+ if left in tasks_by_taskid and right in tasks_by_taskid
+ }
+
+ return TaskGraph(tasks_by_taskid, Graph(set(tasks_by_taskid), edges_by_taskid))
+
+
+@register_strategy("never")
+class OptimizationStrategy:
+ def should_remove_task(self, task, params, arg):
+ """Determine whether to optimize this task by removing it. Returns
+ True to remove."""
+ return False
+
+ def should_replace_task(self, task, params, deadline, arg):
+ """Determine whether to optimize this task by replacing it. Returns a
+ taskId to replace this task, True to replace with nothing, or False to
+ keep the task."""
+ return False
+
+
+@register_strategy("always")
+class Always(OptimizationStrategy):
+ def should_remove_task(self, task, params, arg):
+ return True
+
+
+class CompositeStrategy(OptimizationStrategy, metaclass=ABCMeta):
+ def __init__(self, *substrategies, **kwargs):
+ self.substrategies = []
+ missing = set()
+ for sub in substrategies:
+ if isinstance(sub, str):
+ if sub not in registry.keys():
+ missing.add(sub)
+ continue
+ sub = registry[sub]
+
+ self.substrategies.append(sub)
+
+ if missing:
+ raise TypeError(
+ "substrategies aren't registered: {}".format(
+ ", ".join(sorted(missing))
+ )
+ )
+
+ self.split_args = kwargs.pop("split_args", None)
+ if not self.split_args:
+ self.split_args = lambda arg, substrategies: [arg] * len(substrategies)
+ if kwargs:
+ raise TypeError("unexpected keyword args")
+
+ @abstractproperty
+ def description(self):
+ """A textual description of the combined substrategies."""
+
+ @abstractmethod
+ def reduce(self, results):
+ """Given all substrategy results as a generator, return the overall
+ result."""
+
+ def _generate_results(self, fname, *args):
+ *passthru, arg = args
+ for sub, arg in zip(
+ self.substrategies, self.split_args(arg, self.substrategies)
+ ):
+ yield getattr(sub, fname)(*passthru, arg)
+
+ def should_remove_task(self, *args):
+ results = self._generate_results("should_remove_task", *args)
+ return self.reduce(results)
+
+ def should_replace_task(self, *args):
+ results = self._generate_results("should_replace_task", *args)
+ return self.reduce(results)
+
+
+class Any(CompositeStrategy):
+ """Given one or more optimization strategies, remove or replace a task if any of them
+ says to.
+
+ Replacement will use the value returned by the first strategy that says to replace.
+ """
+
+ @property
+ def description(self):
+ return "-or-".join([s.description for s in self.substrategies])
+
+ @classmethod
+ def reduce(cls, results):
+ for rv in results:
+ if rv:
+ return rv
+ return False
+
+
+class All(CompositeStrategy):
+ """Given one or more optimization strategies, remove or replace a task if all of them
+ says to.
+
+ Replacement will use the value returned by the first strategy passed in.
+ Note the values used for replacement need not be the same, as long as they
+ all say to replace.
+ """
+
+ @property
+ def description(self):
+ return "-and-".join([s.description for s in self.substrategies])
+
+ @classmethod
+ def reduce(cls, results):
+ for rv in results:
+ if not rv:
+ return rv
+ return True
+
+
+class Alias(CompositeStrategy):
+ """Provides an alias to an existing strategy.
+
+ This can be useful to swap strategies in and out without needing to modify
+ the task transforms.
+ """
+
+ def __init__(self, strategy):
+ super().__init__(strategy)
+
+ @property
+ def description(self):
+ return self.substrategies[0].description
+
+ def reduce(self, results):
+ return next(results)
+
+
+class Not(CompositeStrategy):
+ """Given a strategy, returns the opposite."""
+
+ def __init__(self, strategy):
+ super().__init__(strategy)
+
+ @property
+ def description(self):
+ return "not-" + self.substrategies[0].description
+
+ def reduce(self, results):
+ return not next(results)
+
+
+# Trigger registration in sibling modules.
+import_sibling_modules()
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py
new file mode 100644
index 0000000000..c6846e60c5
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py
@@ -0,0 +1,65 @@
+import logging
+from datetime import datetime
+
+from taskgraph import files_changed
+from taskgraph.optimize.base import OptimizationStrategy, register_strategy
+from taskgraph.util.taskcluster import find_task_id, status_task
+
+logger = logging.getLogger(__name__)
+
+
+@register_strategy("index-search")
+class IndexSearch(OptimizationStrategy):
+
+ # A task with no dependencies remaining after optimization will be replaced
+ # if artifacts exist for the corresponding index_paths.
+ # Otherwise, we're in one of the following cases:
+ # - the task has un-optimized dependencies
+ # - the artifacts have expired
+ # - some changes altered the index_paths and new artifacts need to be
+ # created.
+ # In every of those cases, we need to run the task to create or refresh
+ # artifacts.
+
+ fmt = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+ def should_replace_task(self, task, params, deadline, index_paths):
+ "Look for a task with one of the given index paths"
+ for index_path in index_paths:
+ try:
+ task_id = find_task_id(index_path)
+ status = status_task(task_id)
+ # status can be `None` if we're in `testing` mode
+ # (e.g. test-action-callback)
+ if not status or status.get("state") in ("exception", "failed"):
+ continue
+
+ if deadline and datetime.strptime(
+ status["expires"], self.fmt
+ ) < datetime.strptime(deadline, self.fmt):
+ continue
+
+ return task_id
+ except KeyError:
+ # 404 will end up here and go on to the next index path
+ pass
+
+ return False
+
+
+@register_strategy("skip-unless-changed")
+class SkipUnlessChanged(OptimizationStrategy):
+ def should_remove_task(self, task, params, file_patterns):
+ # pushlog_id == -1 - this is the case when run from a cron.yml job or on a git repository
+ if params.get("repository_type") == "hg" and params.get("pushlog_id") == -1:
+ return False
+
+ changed = files_changed.check(params, file_patterns)
+ if not changed:
+ logger.debug(
+ 'no files found matching a pattern in `skip-unless-changed` for "{}"'.format(
+ task.label
+ )
+ )
+ return True
+ return False
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py
new file mode 100644
index 0000000000..ed662c704e
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py
@@ -0,0 +1,369 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import gzip
+import hashlib
+import json
+import os
+import time
+from datetime import datetime
+from io import BytesIO
+from pprint import pformat
+from subprocess import CalledProcessError
+from urllib.parse import urlparse
+from urllib.request import urlopen
+
+import mozilla_repo_urls
+from voluptuous import ALLOW_EXTRA, Any, Optional, Required, Schema
+
+from taskgraph.util import yaml
+from taskgraph.util.readonlydict import ReadOnlyDict
+from taskgraph.util.schema import validate_schema
+from taskgraph.util.taskcluster import find_task_id, get_artifact_url
+from taskgraph.util.vcs import get_repository
+
+
+class ParameterMismatch(Exception):
+ """Raised when a parameters.yml has extra or missing parameters."""
+
+
+# Please keep this list sorted and in sync with docs/reference/parameters.rst
+base_schema = Schema(
+ {
+ Required("base_repository"): str,
+ Required("base_ref"): str,
+ Required("base_rev"): str,
+ Required("build_date"): int,
+ Required("build_number"): int,
+ Required("do_not_optimize"): [str],
+ Required("enable_always_target"): bool,
+ Required("existing_tasks"): {str: str},
+ Required("filters"): [str],
+ Required("head_ref"): str,
+ Required("head_repository"): str,
+ Required("head_rev"): str,
+ Required("head_tag"): str,
+ Required("level"): str,
+ Required("moz_build_date"): str,
+ Required("next_version"): Any(str, None),
+ Required("optimize_strategies"): Any(str, None),
+ Required("optimize_target_tasks"): bool,
+ Required("owner"): str,
+ Required("project"): str,
+ Required("pushdate"): int,
+ Required("pushlog_id"): str,
+ Required("repository_type"): str,
+ # target-kind is not included, since it should never be
+ # used at run-time
+ Required("target_tasks_method"): str,
+ Required("tasks_for"): str,
+ Required("version"): Any(str, None),
+ Optional("code-review"): {
+ Required("phabricator-build-target"): str,
+ },
+ }
+)
+
+
+def get_contents(path):
+ with open(path) as fh:
+ contents = fh.readline().rstrip()
+ return contents
+
+
+def get_version(repo_path):
+ version_path = os.path.join(repo_path, "version.txt")
+ return get_contents(version_path) if os.path.isfile(version_path) else None
+
+
+def _get_defaults(repo_root=None):
+ repo_path = repo_root or os.getcwd()
+ repo = get_repository(repo_path)
+ try:
+ repo_url = repo.get_url()
+ parsed_url = mozilla_repo_urls.parse(repo_url)
+ project = parsed_url.repo_name
+ except (
+ CalledProcessError,
+ mozilla_repo_urls.errors.InvalidRepoUrlError,
+ mozilla_repo_urls.errors.UnsupportedPlatformError,
+ ):
+ repo_url = ""
+ project = ""
+
+ return {
+ "base_repository": repo_url,
+ "base_ref": "",
+ "base_rev": "",
+ "build_date": int(time.time()),
+ "build_number": 1,
+ "do_not_optimize": [],
+ "enable_always_target": True,
+ "existing_tasks": {},
+ "filters": ["target_tasks_method"],
+ "head_ref": repo.branch or repo.head_rev,
+ "head_repository": repo_url,
+ "head_rev": repo.head_rev,
+ "head_tag": "",
+ "level": "3",
+ "moz_build_date": datetime.now().strftime("%Y%m%d%H%M%S"),
+ "next_version": None,
+ "optimize_strategies": None,
+ "optimize_target_tasks": True,
+ "owner": "nobody@mozilla.com",
+ "project": project,
+ "pushdate": int(time.time()),
+ "pushlog_id": "0",
+ "repository_type": repo.tool,
+ "target_tasks_method": "default",
+ "tasks_for": "",
+ "version": get_version(repo_path),
+ }
+
+
+defaults_functions = [_get_defaults]
+
+
+def extend_parameters_schema(schema, defaults_fn=None):
+ """
+ Extend the schema for parameters to include per-project configuration.
+
+ This should be called by the `taskgraph.register` function in the
+ graph-configuration.
+
+ Args:
+ schema (Schema): The voluptuous.Schema object used to describe extended
+ parameters.
+ defaults_fn (function): A function which takes no arguments and returns a
+ dict mapping parameter name to default value in the
+ event strict=False (optional).
+ """
+ global base_schema
+ global defaults_functions
+ base_schema = base_schema.extend(schema)
+ if defaults_fn:
+ defaults_functions.append(defaults_fn)
+
+
+class Parameters(ReadOnlyDict):
+ """An immutable dictionary with nicer KeyError messages on failure"""
+
+ def __init__(self, strict=True, repo_root=None, **kwargs):
+ self.strict = strict
+ self.spec = kwargs.pop("spec", None)
+ self._id = None
+
+ if not self.strict:
+ # apply defaults to missing parameters
+ kwargs = Parameters._fill_defaults(repo_root=repo_root, **kwargs)
+
+ ReadOnlyDict.__init__(self, **kwargs)
+
+ @property
+ def id(self):
+ if not self._id:
+ self._id = hashlib.sha256(
+ json.dumps(self, sort_keys=True).encode("utf-8")
+ ).hexdigest()[:12]
+
+ return self._id
+
+ @staticmethod
+ def format_spec(spec):
+ """
+ Get a friendly identifier from a parameters specifier.
+
+ Args:
+ spec (str): Parameters specifier.
+
+ Returns:
+ str: Name to identify parameters by.
+ """
+ if spec is None:
+ return "defaults"
+
+ if any(spec.startswith(s) for s in ("task-id=", "project=")):
+ return spec
+
+ result = urlparse(spec)
+ if result.scheme in ("http", "https"):
+ spec = result.path
+
+ return os.path.splitext(os.path.basename(spec))[0]
+
+ @staticmethod
+ def _fill_defaults(repo_root=None, **kwargs):
+ defaults = {}
+ for fn in defaults_functions:
+ defaults.update(fn(repo_root))
+
+ for name, default in defaults.items():
+ if name not in kwargs:
+ kwargs[name] = default
+ return kwargs
+
+ def check(self):
+ schema = (
+ base_schema if self.strict else base_schema.extend({}, extra=ALLOW_EXTRA)
+ )
+ try:
+ validate_schema(schema, self.copy(), "Invalid parameters:")
+ except Exception as e:
+ raise ParameterMismatch(str(e))
+
+ def __getitem__(self, k):
+ try:
+ return super().__getitem__(k)
+ except KeyError:
+ raise KeyError(f"taskgraph parameter {k!r} not found")
+
+ def is_try(self):
+ """
+ Determine whether this graph is being built on a try project or for
+ `mach try fuzzy`.
+ """
+ return "try" in self["project"] or self["tasks_for"] == "github-pull-request"
+
+ @property
+ def moz_build_date(self):
+ # XXX self["moz_build_date"] is left as a string because:
+ # * of backward compatibility
+ # * parameters are output in a YAML file
+ return datetime.strptime(self["moz_build_date"], "%Y%m%d%H%M%S")
+
+ def file_url(self, path, pretty=False):
+ """
+ Determine the VCS URL for viewing a file in the tree, suitable for
+ viewing by a human.
+
+ :param str path: The path, relative to the root of the repository.
+ :param bool pretty: Whether to return a link to a formatted version of the
+ file, or the raw file version.
+
+ :return str: The URL displaying the given path.
+ """
+ if self["repository_type"] == "hg":
+ if path.startswith("comm/"):
+ path = path[len("comm/") :]
+ repo = self["comm_head_repository"]
+ rev = self["comm_head_rev"]
+ else:
+ repo = self["head_repository"]
+ rev = self["head_rev"]
+ endpoint = "file" if pretty else "raw-file"
+ return f"{repo}/{endpoint}/{rev}/{path}"
+ elif self["repository_type"] == "git":
+ # For getting the file URL for git repositories, we only support a Github HTTPS remote
+ repo = self["head_repository"]
+ if repo.startswith("https://github.com/"):
+ if repo.endswith("/"):
+ repo = repo[:-1]
+
+ rev = self["head_rev"]
+ endpoint = "blob" if pretty else "raw"
+ return f"{repo}/{endpoint}/{rev}/{path}"
+ elif repo.startswith("git@github.com:"):
+ if repo.endswith(".git"):
+ repo = repo[:-4]
+ rev = self["head_rev"]
+ endpoint = "blob" if pretty else "raw"
+ return "{}/{}/{}/{}".format(
+ repo.replace("git@github.com:", "https://github.com/"),
+ endpoint,
+ rev,
+ path,
+ )
+ else:
+ raise ParameterMismatch(
+ "Don't know how to determine file URL for non-github"
+ "repo: {}".format(repo)
+ )
+ else:
+ raise RuntimeError(
+ 'Only the "git" and "hg" repository types are supported for using file_url()'
+ )
+
+ def __str__(self):
+ return f"Parameters(id={self.id}) (from {self.format_spec(self.spec)})"
+
+ def __repr__(self):
+ return pformat(dict(self), indent=2)
+
+
+def load_parameters_file(
+ spec, strict=True, overrides=None, trust_domain=None, repo_root=None
+):
+ """
+ Load parameters from a path, url, decision task-id or project.
+
+ Examples:
+ task-id=fdtgsD5DQUmAQZEaGMvQ4Q
+ project=mozilla-central
+ """
+
+ if overrides is None:
+ overrides = {}
+ overrides["spec"] = spec
+
+ if not spec:
+ return Parameters(strict=strict, repo_root=repo_root, **overrides)
+
+ try:
+ # reading parameters from a local parameters.yml file
+ f = open(spec)
+ except OSError:
+ # fetching parameters.yml using task task-id, project or supplied url
+ task_id = None
+ if spec.startswith("task-id="):
+ task_id = spec.split("=")[1]
+ elif spec.startswith("project="):
+ if trust_domain is None:
+ raise ValueError(
+ "Can't specify parameters by project "
+ "if trust domain isn't supplied.",
+ )
+ index = "{trust_domain}.v2.{project}.latest.taskgraph.decision".format(
+ trust_domain=trust_domain,
+ project=spec.split("=")[1],
+ )
+ task_id = find_task_id(index)
+
+ if task_id:
+ spec = get_artifact_url(task_id, "public/parameters.yml")
+ f = urlopen(spec)
+
+ # Decompress gzipped parameters.
+ if f.info().get("Content-Encoding") == "gzip":
+ buf = BytesIO(f.read())
+ f = gzip.GzipFile(fileobj=buf)
+
+ if spec.endswith(".yml"):
+ kwargs = yaml.load_stream(f)
+ elif spec.endswith(".json"):
+ kwargs = json.load(f)
+ else:
+ raise TypeError(f"Parameters file `{spec}` is not JSON or YAML")
+
+ kwargs.update(overrides)
+ return Parameters(strict=strict, repo_root=repo_root, **kwargs)
+
+
+def parameters_loader(spec, strict=True, overrides=None):
+ def get_parameters(graph_config):
+ try:
+ repo_root = graph_config.vcs_root
+ except Exception:
+ repo_root = None
+
+ parameters = load_parameters_file(
+ spec,
+ strict=strict,
+ overrides=overrides,
+ repo_root=repo_root,
+ trust_domain=graph_config["trust-domain"],
+ )
+ parameters.check()
+ return parameters
+
+ return get_parameters
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content
new file mode 100755
index 0000000000..42dc5e2b28
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content
@@ -0,0 +1,899 @@
+#!/usr/bin/python3 -u
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import argparse
+import bz2
+import concurrent.futures
+import contextlib
+import datetime
+import gzip
+import hashlib
+import json
+import lzma
+import multiprocessing
+import os
+import pathlib
+import random
+import re
+import stat
+import subprocess
+import sys
+import tarfile
+import tempfile
+import time
+import urllib.parse
+import urllib.request
+import zipfile
+
+try:
+ import zstandard
+except ImportError:
+ zstandard = None
+
+try:
+ import certifi
+except ImportError:
+ certifi = None
+
+
+CONCURRENCY = multiprocessing.cpu_count()
+
+
+def log(msg):
+ print(msg, file=sys.stderr)
+ sys.stderr.flush()
+
+
+class IntegrityError(Exception):
+ """Represents an integrity error when downloading a URL."""
+
+
+def ZstdCompressor(*args, **kwargs):
+ if not zstandard:
+ raise ValueError("zstandard Python package not available")
+ return zstandard.ZstdCompressor(*args, **kwargs)
+
+
+def ZstdDecompressor(*args, **kwargs):
+ if not zstandard:
+ raise ValueError("zstandard Python package not available")
+ return zstandard.ZstdDecompressor(*args, **kwargs)
+
+
+@contextlib.contextmanager
+def rename_after_close(fname, *args, **kwargs):
+ """
+ Context manager that opens a temporary file to use as a writer,
+ and closes the file on context exit, renaming it to the expected
+ file name in case of success, or removing it in case of failure.
+
+ Takes the same options as open(), but must be used as a context
+ manager.
+ """
+ path = pathlib.Path(fname)
+ tmp = path.with_name("%s.tmp" % path.name)
+ try:
+ with tmp.open(*args, **kwargs) as fh:
+ yield fh
+ except Exception:
+ tmp.unlink()
+ raise
+ else:
+ tmp.rename(fname)
+
+
+# The following is copied from
+# https://github.com/mozilla-releng/redo/blob/6d07678a014e0c525e54a860381a165d34db10ff/redo/__init__.py#L15-L85
+def retrier(attempts=5, sleeptime=10, max_sleeptime=300, sleepscale=1.5, jitter=1):
+ """
+ A generator function that sleeps between retries, handles exponential
+ backoff and jitter. The action you are retrying is meant to run after
+ retrier yields.
+
+ At each iteration, we sleep for sleeptime + random.randint(-jitter, jitter).
+ Afterwards sleeptime is multiplied by sleepscale for the next iteration.
+
+ Args:
+ attempts (int): maximum number of times to try; defaults to 5
+ sleeptime (float): how many seconds to sleep between tries; defaults to
+ 60s (one minute)
+ max_sleeptime (float): the longest we'll sleep, in seconds; defaults to
+ 300s (five minutes)
+ sleepscale (float): how much to multiply the sleep time by each
+ iteration; defaults to 1.5
+ jitter (int): random jitter to introduce to sleep time each iteration.
+ the amount is chosen at random between [-jitter, +jitter]
+ defaults to 1
+
+ Yields:
+ None, a maximum of `attempts` number of times
+
+ Example:
+ >>> n = 0
+ >>> for _ in retrier(sleeptime=0, jitter=0):
+ ... if n == 3:
+ ... # We did the thing!
+ ... break
+ ... n += 1
+ >>> n
+ 3
+
+ >>> n = 0
+ >>> for _ in retrier(sleeptime=0, jitter=0):
+ ... if n == 6:
+ ... # We did the thing!
+ ... break
+ ... n += 1
+ ... else:
+ ... print("max tries hit")
+ max tries hit
+ """
+ jitter = jitter or 0 # py35 barfs on the next line if jitter is None
+ if jitter > sleeptime:
+ # To prevent negative sleep times
+ raise Exception(
+ "jitter ({}) must be less than sleep time ({})".format(jitter, sleeptime)
+ )
+
+ sleeptime_real = sleeptime
+ for _ in range(attempts):
+ log("attempt %i/%i" % (_ + 1, attempts))
+
+ yield sleeptime_real
+
+ if jitter:
+ sleeptime_real = sleeptime + random.randint(-jitter, jitter)
+ # our jitter should scale along with the sleeptime
+ jitter = int(jitter * sleepscale)
+ else:
+ sleeptime_real = sleeptime
+
+ sleeptime *= sleepscale
+
+ if sleeptime_real > max_sleeptime:
+ sleeptime_real = max_sleeptime
+
+ # Don't need to sleep the last time
+ if _ < attempts - 1:
+ log(
+ "sleeping for %.2fs (attempt %i/%i)" % (sleeptime_real, _ + 1, attempts)
+ )
+ time.sleep(sleeptime_real)
+
+
+def stream_download(url, sha256=None, size=None, headers=None):
+ """Download a URL to a generator, optionally with content verification.
+
+ If ``sha256`` or ``size`` are defined, the downloaded URL will be
+ validated against those requirements and ``IntegrityError`` will be
+ raised if expectations do not match.
+
+ Because verification cannot occur until the file is completely downloaded
+ it is recommended for consumers to not do anything meaningful with the
+ data if content verification is being used. To securely handle retrieved
+ content, it should be streamed to a file or memory and only operated
+ on after the generator is exhausted without raising.
+ """
+ log("Downloading %s" % url)
+ headers = headers or []
+
+ h = hashlib.sha256()
+ length = 0
+
+ t0 = time.time()
+ req_headers = {}
+ for header in headers:
+ key, val = header.split(":")
+ req_headers[key.strip()] = val.strip()
+
+ req = urllib.request.Request(url, None, req_headers)
+ with urllib.request.urlopen(
+ req, cafile=certifi.where()
+ ) if certifi else urllib.request.urlopen(req) as fh:
+ if not url.endswith(".gz") and fh.info().get("Content-Encoding") == "gzip":
+ fh = gzip.GzipFile(fileobj=fh)
+
+ while True:
+ chunk = fh.read(65536)
+ if not chunk:
+ break
+
+ h.update(chunk)
+ length += len(chunk)
+
+ yield chunk
+
+ duration = time.time() - t0
+ digest = h.hexdigest()
+
+ log(
+ "%s resolved to %d bytes with sha256 %s in %.3fs"
+ % (url, length, digest, duration)
+ )
+
+ if size:
+ if size == length:
+ log("Verified size of %s" % url)
+ else:
+ raise IntegrityError(
+ "size mismatch on %s: wanted %d; got %d" % (url, size, length)
+ )
+
+ if sha256:
+ if digest == sha256:
+ log("Verified sha256 integrity of %s" % url)
+ else:
+ raise IntegrityError(
+ "sha256 mismatch on %s: wanted %s; got %s" % (url, sha256, digest)
+ )
+
+
+def download_to_path(url, path, sha256=None, size=None, headers=None):
+ """Download a URL to a filesystem path, possibly with verification."""
+
+ # We download to a temporary file and rename at the end so there's
+ # no chance of the final file being partially written or containing
+ # bad data.
+ try:
+ path.unlink()
+ except FileNotFoundError:
+ pass
+
+ for _ in retrier(attempts=5, sleeptime=60):
+ try:
+ log("Downloading %s to %s" % (url, path))
+
+ with rename_after_close(path, "wb") as fh:
+ for chunk in stream_download(
+ url, sha256=sha256, size=size, headers=headers
+ ):
+ fh.write(chunk)
+
+ return
+ except IntegrityError:
+ raise
+ except Exception as e:
+ log("Download failed: {}".format(e))
+ continue
+
+ raise Exception("Download failed, no more retries!")
+
+
+def download_to_memory(url, sha256=None, size=None):
+ """Download a URL to memory, possibly with verification."""
+
+ data = b""
+ for _ in retrier(attempts=5, sleeptime=60):
+ try:
+ log("Downloading %s" % (url))
+
+ for chunk in stream_download(url, sha256=sha256, size=size):
+ data += chunk
+
+ return data
+ except IntegrityError:
+ raise
+ except Exception as e:
+ log("Download failed: {}".format(e))
+ continue
+
+ raise Exception("Download failed, no more retries!")
+
+
+def gpg_verify_path(path: pathlib.Path, public_key_data: bytes, signature_data: bytes):
+ """Verify that a filesystem path verifies using GPG.
+
+ Takes a Path defining a file to verify. ``public_key_data`` contains
+ bytes with GPG public key data. ``signature_data`` contains a signed
+ GPG document to use with ``gpg --verify``.
+ """
+ log("Validating GPG signature of %s" % path)
+ log("GPG key data:\n%s" % public_key_data.decode("ascii"))
+
+ with tempfile.TemporaryDirectory() as td:
+ try:
+ # --batch since we're running unattended.
+ gpg_args = ["gpg", "--homedir", td, "--batch"]
+
+ log("Importing GPG key...")
+ subprocess.run(gpg_args + ["--import"], input=public_key_data, check=True)
+
+ log("Verifying GPG signature...")
+ subprocess.run(
+ gpg_args + ["--verify", "-", "%s" % path],
+ input=signature_data,
+ check=True,
+ )
+
+ log("GPG signature verified!")
+ finally:
+ # There is a race between the agent self-terminating and
+ # shutil.rmtree() from the temporary directory cleanup that can
+ # lead to exceptions. Kill the agent before cleanup to prevent this.
+ env = dict(os.environ)
+ env["GNUPGHOME"] = td
+ subprocess.run(["gpgconf", "--kill", "gpg-agent"], env=env)
+
+
+def open_tar_stream(path: pathlib.Path):
+ """"""
+ if path.suffix == ".bz2":
+ return bz2.open(str(path), "rb")
+ elif path.suffix == ".gz":
+ return gzip.open(str(path), "rb")
+ elif path.suffix == ".xz":
+ return lzma.open(str(path), "rb")
+ elif path.suffix == ".zst":
+ dctx = ZstdDecompressor()
+ return dctx.stream_reader(path.open("rb"))
+ elif path.suffix == ".tar":
+ return path.open("rb")
+ else:
+ raise ValueError("unknown archive format for tar file: %s" % path)
+
+
+def archive_type(path: pathlib.Path):
+ """Attempt to identify a path as an extractable archive."""
+ if path.suffixes[-2:-1] == [".tar"]:
+ return "tar"
+ elif path.suffix == ".zip":
+ return "zip"
+ else:
+ return None
+
+
+def extract_archive(path, dest_dir, typ):
+ """Extract an archive to a destination directory."""
+
+ # Resolve paths to absolute variants.
+ path = path.resolve()
+ dest_dir = dest_dir.resolve()
+
+ log("Extracting %s to %s" % (path, dest_dir))
+ t0 = time.time()
+
+ # We pipe input to the decompressor program so that we can apply
+ # custom decompressors that the program may not know about.
+ if typ == "tar":
+ ifh = open_tar_stream(path)
+ # On Windows, the tar program doesn't support things like symbolic
+ # links, while Windows actually support them. The tarfile module in
+ # python does. So use that. But since it's significantly slower than
+ # the tar program on Linux, only use tarfile on Windows (tarfile is
+ # also not much slower on Windows, presumably because of the
+ # notoriously bad I/O).
+ if sys.platform == "win32":
+ tar = tarfile.open(fileobj=ifh, mode="r|")
+ tar.extractall(str(dest_dir))
+ args = []
+ else:
+ args = ["tar", "xf", "-"]
+ pipe_stdin = True
+ elif typ == "zip":
+ # unzip from stdin has wonky behavior. We don't use a pipe for it.
+ ifh = open(os.devnull, "rb")
+ args = ["unzip", "-o", str(path)]
+ pipe_stdin = False
+ else:
+ raise ValueError("unknown archive format: %s" % path)
+
+ if args:
+ with ifh, subprocess.Popen(
+ args, cwd=str(dest_dir), bufsize=0, stdin=subprocess.PIPE
+ ) as p:
+ while True:
+ if not pipe_stdin:
+ break
+
+ chunk = ifh.read(131072)
+ if not chunk:
+ break
+
+ p.stdin.write(chunk)
+
+ if p.returncode:
+ raise Exception("%r exited %d" % (args, p.returncode))
+
+ log("%s extracted in %.3fs" % (path, time.time() - t0))
+
+
+def repack_archive(
+ orig: pathlib.Path, dest: pathlib.Path, strip_components=0, prefix=""
+):
+ assert orig != dest
+ log("Repacking as %s" % dest)
+ orig_typ = archive_type(orig)
+ typ = archive_type(dest)
+ if not orig_typ:
+ raise Exception("Archive type not supported for %s" % orig.name)
+ if not typ:
+ raise Exception("Archive type not supported for %s" % dest.name)
+
+ if dest.suffixes[-2:] != [".tar", ".zst"]:
+ raise Exception("Only producing .tar.zst archives is supported.")
+
+ if strip_components or prefix:
+
+ def filter(name):
+ if strip_components:
+ stripped = "/".join(name.split("/")[strip_components:])
+ if not stripped:
+ raise Exception(
+ "Stripping %d components would remove files" % strip_components
+ )
+ name = stripped
+ return prefix + name
+
+ else:
+ filter = None
+
+ with rename_after_close(dest, "wb") as fh:
+ ctx = ZstdCompressor()
+ if orig_typ == "zip":
+ assert typ == "tar"
+ zip = zipfile.ZipFile(orig)
+ # Convert the zip stream to a tar on the fly.
+ with ctx.stream_writer(fh) as compressor, tarfile.open(
+ fileobj=compressor, mode="w:"
+ ) as tar:
+ for zipinfo in zip.infolist():
+ if zipinfo.is_dir():
+ continue
+ tarinfo = tarfile.TarInfo()
+ filename = zipinfo.filename
+ tarinfo.name = filter(filename) if filter else filename
+ tarinfo.size = zipinfo.file_size
+ # Zip files don't have any knowledge of the timezone
+ # they were created in. Which is not really convenient to
+ # reliably convert to a timestamp. But we don't really
+ # care about accuracy, but rather about reproducibility,
+ # so we pick UTC.
+ time = datetime.datetime(
+ *zipinfo.date_time, tzinfo=datetime.timezone.utc
+ )
+ tarinfo.mtime = time.timestamp()
+ # 0 is MS-DOS, 3 is UNIX. Only in the latter case do we
+ # get anything useful for the tar file mode.
+ if zipinfo.create_system == 3:
+ mode = zipinfo.external_attr >> 16
+ else:
+ mode = 0o0644
+ tarinfo.mode = stat.S_IMODE(mode)
+ if stat.S_ISLNK(mode):
+ tarinfo.type = tarfile.SYMTYPE
+ tarinfo.linkname = zip.read(filename).decode()
+ tar.addfile(tarinfo, zip.open(filename))
+ elif stat.S_ISREG(mode) or stat.S_IFMT(mode) == 0:
+ tar.addfile(tarinfo, zip.open(filename))
+ else:
+ raise Exception("Unsupported file mode %o" % stat.S_IFMT(mode))
+
+ elif orig_typ == "tar":
+ if typ == "zip":
+ raise Exception("Repacking a tar to zip is not supported")
+ assert typ == "tar"
+
+ ifh = open_tar_stream(orig)
+ if filter:
+ # To apply the filter, we need to open the tar stream and
+ # tweak it.
+ origtar = tarfile.open(fileobj=ifh, mode="r|")
+ with ctx.stream_writer(fh) as compressor, tarfile.open(
+ fileobj=compressor,
+ mode="w:",
+ format=origtar.format,
+ ) as tar:
+ for tarinfo in origtar:
+ if tarinfo.isdir():
+ continue
+ tarinfo.name = filter(tarinfo.name)
+ if "path" in tarinfo.pax_headers:
+ tarinfo.pax_headers["path"] = filter(
+ tarinfo.pax_headers["path"]
+ )
+ if tarinfo.isfile():
+ tar.addfile(tarinfo, origtar.extractfile(tarinfo))
+ else:
+ tar.addfile(tarinfo)
+ else:
+ # We only change compression here. The tar stream is unchanged.
+ ctx.copy_stream(ifh, fh)
+
+
+def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
+ """Fetch a URL and extract it to a destination path.
+
+ If the downloaded URL is an archive, it is extracted automatically
+ and the archive is deleted. Otherwise the file remains in place in
+ the destination directory.
+ """
+
+ basename = urllib.parse.urlparse(url).path.split("/")[-1]
+ dest_path = dest_dir / basename
+
+ download_to_path(url, dest_path, sha256=sha256, size=size)
+
+ if not extract:
+ return
+
+ typ = archive_type(dest_path)
+ if typ:
+ extract_archive(dest_path, dest_dir, typ)
+ log("Removing %s" % dest_path)
+ dest_path.unlink()
+
+
+def fetch_urls(downloads):
+ """Fetch URLs pairs to a pathlib.Path."""
+ with concurrent.futures.ThreadPoolExecutor(CONCURRENCY) as e:
+ fs = []
+
+ for download in downloads:
+ fs.append(e.submit(fetch_and_extract, *download))
+
+ for f in fs:
+ f.result()
+
+
+def _git_checkout_github_archive(
+ dest_path: pathlib.Path, repo: str, commit: str, prefix: str
+):
+ "Use github archive generator to speed up github git repo cloning"
+ repo = repo.rstrip("/")
+ github_url = "{repo}/archive/{commit}.tar.gz".format(**locals())
+
+ with tempfile.TemporaryDirectory() as td:
+ temp_dir = pathlib.Path(td)
+ dl_dest = temp_dir / "archive.tar.gz"
+ download_to_path(github_url, dl_dest)
+ repack_archive(dl_dest, dest_path, strip_components=1, prefix=prefix + "/")
+
+
+def _github_submodule_required(repo: str, commit: str):
+ "Use github API to check if submodules are used"
+ url = "{repo}/blob/{commit}/.gitmodules".format(**locals())
+ try:
+ status_code = urllib.request.urlopen(url).getcode()
+ return status_code == 200
+ except:
+ return False
+
+
+def git_checkout_archive(
+ dest_path: pathlib.Path,
+ repo: str,
+ commit: str,
+ prefix=None,
+ ssh_key=None,
+ include_dot_git=False,
+):
+ """Produce an archive of the files comprising a Git checkout."""
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
+
+ if not prefix:
+ prefix = repo.rstrip("/").rsplit("/", 1)[-1]
+
+ if dest_path.suffixes[-2:] != [".tar", ".zst"]:
+ raise Exception("Only producing .tar.zst archives is supported.")
+
+ if repo.startswith("https://github.com/"):
+ if not include_dot_git and not _github_submodule_required(repo, commit):
+ log("Using github archive service to speedup archive creation")
+ # Always log sha1 info, either from commit or resolved from repo.
+ if re.match(r"^[a-fA-F0-9]{40}$", commit):
+ revision = commit
+ else:
+ ref_output = subprocess.check_output(["git", "ls-remote", repo,
+ 'refs/heads/' + commit])
+ revision, _ = ref_output.decode().split(maxsplit=1)
+ log("Fetching revision {}".format(revision))
+ return _git_checkout_github_archive(dest_path, repo, commit, prefix)
+
+ with tempfile.TemporaryDirectory() as td:
+ temp_dir = pathlib.Path(td)
+
+ git_dir = temp_dir / prefix
+
+ # This could be faster with a shallow clone. However, Git requires a ref
+ # to initiate a clone. Since the commit-ish may not refer to a ref, we
+ # simply perform a full clone followed by a checkout.
+ print("cloning %s to %s" % (repo, git_dir))
+
+ env = os.environ.copy()
+ keypath = ""
+ if ssh_key:
+ taskcluster_secret_url = api(
+ os.environ.get("TASKCLUSTER_PROXY_URL"),
+ "secrets",
+ "v1",
+ "secret/{keypath}".format(keypath=ssh_key),
+ )
+ taskcluster_secret = b"".join(stream_download(taskcluster_secret_url))
+ taskcluster_secret = json.loads(taskcluster_secret)
+ sshkey = taskcluster_secret["secret"]["ssh_privkey"]
+
+ keypath = temp_dir.joinpath("ssh-key")
+ keypath.write_text(sshkey)
+ keypath.chmod(0o600)
+
+ env = {
+ "GIT_SSH_COMMAND": "ssh -o 'StrictHostKeyChecking no' -i {keypath}".format(
+ keypath=keypath
+ )
+ }
+
+ subprocess.run(["git", "clone", "-n", repo, str(git_dir)], check=True, env=env)
+
+ # Always use a detached head so that git prints out what it checked out.
+ subprocess.run(
+ ["git", "checkout", "--detach", commit], cwd=str(git_dir), check=True
+ )
+
+ # When including the .git, we want --depth 1, but a direct clone would not
+ # necessarily be able to give us the right commit.
+ if include_dot_git:
+ initial_clone = git_dir.with_name(git_dir.name + ".orig")
+ git_dir.rename(initial_clone)
+ subprocess.run(
+ [
+ "git",
+ "clone",
+ "file://" + str(initial_clone),
+ str(git_dir),
+ "--depth",
+ "1",
+ ],
+ check=True,
+ )
+ subprocess.run(
+ ["git", "remote", "set-url", "origin", repo],
+ cwd=str(git_dir),
+ check=True,
+ )
+
+ # --depth 1 can induce more work on the server side, so only use it for
+ # submodule initialization when we want to keep the .git directory.
+ depth = ["--depth", "1"] if include_dot_git else []
+ subprocess.run(
+ ["git", "submodule", "update", "--init"] + depth,
+ cwd=str(git_dir),
+ check=True,
+ )
+
+ if keypath:
+ os.remove(keypath)
+
+ print("creating archive %s of commit %s" % (dest_path, commit))
+ exclude_dot_git = [] if include_dot_git else ["--exclude=.git"]
+ proc = subprocess.Popen(
+ [
+ "tar",
+ "cf",
+ "-",
+ ]
+ + exclude_dot_git
+ + [
+ "-C",
+ str(temp_dir),
+ prefix,
+ ],
+ stdout=subprocess.PIPE,
+ )
+
+ with rename_after_close(dest_path, "wb") as out:
+ ctx = ZstdCompressor()
+ ctx.copy_stream(proc.stdout, out)
+
+ proc.wait()
+
+
+def command_git_checkout_archive(args):
+ dest = pathlib.Path(args.dest)
+
+ try:
+ git_checkout_archive(
+ dest,
+ args.repo,
+ args.commit,
+ prefix=args.path_prefix,
+ ssh_key=args.ssh_key_secret,
+ include_dot_git=args.include_dot_git,
+ )
+ except Exception:
+ try:
+ dest.unlink()
+ except FileNotFoundError:
+ pass
+
+ raise
+
+
+def command_static_url(args):
+ gpg_sig_url = args.gpg_sig_url
+ gpg_env_key = args.gpg_key_env
+
+ if bool(gpg_sig_url) != bool(gpg_env_key):
+ print("--gpg-sig-url and --gpg-key-env must both be defined")
+ return 1
+
+ if gpg_sig_url:
+ gpg_signature = b"".join(stream_download(gpg_sig_url))
+ gpg_key = os.environb[gpg_env_key.encode("ascii")]
+
+ dest = pathlib.Path(args.dest)
+ dest.parent.mkdir(parents=True, exist_ok=True)
+
+ basename = urllib.parse.urlparse(args.url).path.split("/")[-1]
+ if basename.endswith("".join(dest.suffixes)):
+ dl_dest = dest
+ else:
+ dl_dest = dest.parent / basename
+
+ try:
+ download_to_path(
+ args.url, dl_dest, sha256=args.sha256, size=args.size, headers=args.headers
+ )
+
+ if gpg_sig_url:
+ gpg_verify_path(dl_dest, gpg_key, gpg_signature)
+
+ if dl_dest != dest or args.strip_components or args.add_prefix:
+ repack_archive(dl_dest, dest, args.strip_components, args.add_prefix)
+ except Exception:
+ try:
+ dl_dest.unlink()
+ except FileNotFoundError:
+ pass
+
+ raise
+
+ if dl_dest != dest:
+ log("Removing %s" % dl_dest)
+ dl_dest.unlink()
+
+
+def api(root_url, service, version, path):
+ # taskcluster-lib-urls is not available when this script runs, so
+ # simulate its behavior:
+ return "{root_url}/api/{service}/{version}/{path}".format(
+ root_url=root_url, service=service, version=version, path=path
+ )
+
+
+def get_hash(fetch, root_url):
+ path = "task/{task}/artifacts/{artifact}".format(
+ task=fetch["task"], artifact="public/chain-of-trust.json"
+ )
+ url = api(root_url, "queue", "v1", path)
+ cot = json.loads(download_to_memory(url))
+ return cot["artifacts"][fetch["artifact"]]["sha256"]
+
+
+def command_task_artifacts(args):
+ start = time.monotonic()
+ fetches = json.loads(os.environ["MOZ_FETCHES"])
+ downloads = []
+ for fetch in fetches:
+ extdir = pathlib.Path(args.dest)
+ if "dest" in fetch:
+ # Note: normpath doesn't like pathlib.Path in python 3.5
+ extdir = pathlib.Path(os.path.normpath(str(extdir.joinpath(fetch["dest"]))))
+ extdir.mkdir(parents=True, exist_ok=True)
+ root_url = os.environ["TASKCLUSTER_ROOT_URL"]
+ sha256 = None
+ if fetch.get("verify-hash"):
+ sha256 = get_hash(fetch, root_url)
+ if fetch["artifact"].startswith("public/"):
+ path = "task/{task}/artifacts/{artifact}".format(
+ task=fetch["task"], artifact=fetch["artifact"]
+ )
+ url = api(root_url, "queue", "v1", path)
+ else:
+ url = ("{proxy_url}/api/queue/v1/task/{task}/artifacts/{artifact}").format(
+ proxy_url=os.environ["TASKCLUSTER_PROXY_URL"],
+ task=fetch["task"],
+ artifact=fetch["artifact"],
+ )
+ downloads.append((url, extdir, fetch["extract"], sha256))
+
+ fetch_urls(downloads)
+ end = time.monotonic()
+
+ perfherder_data = {
+ "framework": {"name": "build_metrics"},
+ "suites": [
+ {
+ "name": "fetch_content",
+ "value": end - start,
+ "lowerIsBetter": True,
+ "shouldAlert": False,
+ "subtests": [],
+ }
+ ],
+ }
+ print("PERFHERDER_DATA: {}".format(json.dumps(perfherder_data)), file=sys.stderr)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ subparsers = parser.add_subparsers(title="sub commands")
+
+ git_checkout = subparsers.add_parser(
+ "git-checkout-archive",
+ help="Obtain an archive of files from a Git repository checkout",
+ )
+ git_checkout.set_defaults(func=command_git_checkout_archive)
+ git_checkout.add_argument(
+ "--path-prefix", help="Prefix for paths in produced archive"
+ )
+ git_checkout.add_argument("repo", help="URL to Git repository to be cloned")
+ git_checkout.add_argument("commit", help="Git commit to check out")
+ git_checkout.add_argument("dest", help="Destination path of archive")
+ git_checkout.add_argument(
+ "--ssh-key-secret", help="The scope path of the ssh key to used for checkout"
+ )
+ git_checkout.add_argument(
+ "--include-dot-git", action="store_true", help="Include the .git directory"
+ )
+
+ url = subparsers.add_parser("static-url", help="Download a static URL")
+ url.set_defaults(func=command_static_url)
+ url.add_argument("--sha256", required=True, help="SHA-256 of downloaded content")
+ url.add_argument(
+ "--size", required=True, type=int, help="Size of downloaded content, in bytes"
+ )
+ url.add_argument(
+ "--gpg-sig-url",
+ help="URL containing signed GPG document validating " "URL to fetch",
+ )
+ url.add_argument(
+ "--gpg-key-env", help="Environment variable containing GPG key to validate"
+ )
+ url.add_argument(
+ "--strip-components",
+ type=int,
+ default=0,
+ help="Number of leading components to strip from file "
+ "names in the downloaded archive",
+ )
+ url.add_argument(
+ "--add-prefix",
+ default="",
+ help="Prefix to add to file names in the downloaded " "archive",
+ )
+ url.add_argument(
+ "-H",
+ "--header",
+ default=[],
+ action="append",
+ dest="headers",
+ help="Header to send as part of the request, can be passed " "multiple times",
+ )
+ url.add_argument("url", help="URL to fetch")
+ url.add_argument("dest", help="Destination path")
+
+ artifacts = subparsers.add_parser("task-artifacts", help="Fetch task artifacts")
+ artifacts.set_defaults(func=command_task_artifacts)
+ artifacts.add_argument(
+ "-d",
+ "--dest",
+ default=os.environ.get("MOZ_FETCHES_DIR"),
+ help="Destination directory which will contain all "
+ "artifacts (defaults to $MOZ_FETCHES_DIR)",
+ )
+
+ args = parser.parse_args()
+
+ if not args.dest:
+ parser.error(
+ "no destination directory specified, either pass in --dest "
+ "or set $MOZ_FETCHES_DIR"
+ )
+
+ return args.func(args)
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/hgrc b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/hgrc
new file mode 100755
index 0000000000..f6a2f6643c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/hgrc
@@ -0,0 +1,33 @@
+# By default the progress bar starts after 3s and updates every 0.1s. We
+# change this so it shows and updates every 1.0s.
+# We also tell progress to assume a TTY is present so updates are printed
+# even if there is no known TTY.
+[progress]
+delay = 1.0
+refresh = 1.0
+assume-tty = true
+
+[extensions]
+share =
+sparse =
+robustcheckout = /usr/local/mercurial/robustcheckout.py
+
+[hostsecurity]
+# When running a modern Python, Mercurial will default to TLS 1.1+.
+# When running on a legacy Python, Mercurial will default to TLS 1.0+.
+# There is no good reason we shouldn't be running a modern Python
+# capable of speaking TLS 1.2. And the only Mercurial servers we care
+# about should be running TLS 1.2. So make TLS 1.2 the minimum.
+minimumprotocol = tls1.2
+
+# Settings to make 1-click loaners more useful.
+[extensions]
+histedit =
+rebase =
+
+[diff]
+git = 1
+showfunc = 1
+
+[pager]
+pager = LESS=FRSXQ less
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py
new file mode 100644
index 0000000000..7e12d07d50
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py
@@ -0,0 +1,826 @@
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+"""Robustly perform a checkout.
+
+This extension provides the ``hg robustcheckout`` command for
+ensuring a working directory is updated to the specified revision
+from a source repo using best practices to ensure optimal clone
+times and storage efficiency.
+"""
+
+from __future__ import absolute_import
+
+import contextlib
+import json
+import os
+import random
+import re
+import socket
+import ssl
+import time
+
+from mercurial.i18n import _
+from mercurial.node import hex, nullid
+from mercurial import (
+ commands,
+ configitems,
+ error,
+ exchange,
+ extensions,
+ hg,
+ match as matchmod,
+ pycompat,
+ registrar,
+ scmutil,
+ urllibcompat,
+ util,
+ vfs,
+)
+
+# Causes worker to purge caches on process exit and for task to retry.
+EXIT_PURGE_CACHE = 72
+
+testedwith = b"4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9"
+minimumhgversion = b"4.5"
+
+cmdtable = {}
+command = registrar.command(cmdtable)
+
+configtable = {}
+configitem = registrar.configitem(configtable)
+
+configitem(b"robustcheckout", b"retryjittermin", default=configitems.dynamicdefault)
+configitem(b"robustcheckout", b"retryjittermax", default=configitems.dynamicdefault)
+
+
+def getsparse():
+ from mercurial import sparse
+
+ return sparse
+
+
+def peerlookup(remote, v):
+ with remote.commandexecutor() as e:
+ return e.callcommand(b"lookup", {b"key": v}).result()
+
+
+@command(
+ b"robustcheckout",
+ [
+ (b"", b"upstream", b"", b"URL of upstream repo to clone from"),
+ (b"r", b"revision", b"", b"Revision to check out"),
+ (b"b", b"branch", b"", b"Branch to check out"),
+ (b"", b"purge", False, b"Whether to purge the working directory"),
+ (b"", b"sharebase", b"", b"Directory where shared repos should be placed"),
+ (
+ b"",
+ b"networkattempts",
+ 3,
+ b"Maximum number of attempts for network " b"operations",
+ ),
+ (b"", b"sparseprofile", b"", b"Sparse checkout profile to use (path in repo)"),
+ (
+ b"U",
+ b"noupdate",
+ False,
+ b"the clone will include an empty working directory\n"
+ b"(only a repository)",
+ ),
+ ],
+ b"[OPTION]... URL DEST",
+ norepo=True,
+)
+def robustcheckout(
+ ui,
+ url,
+ dest,
+ upstream=None,
+ revision=None,
+ branch=None,
+ purge=False,
+ sharebase=None,
+ networkattempts=None,
+ sparseprofile=None,
+ noupdate=False,
+):
+ """Ensure a working copy has the specified revision checked out.
+
+ Repository data is automatically pooled into the common directory
+ specified by ``--sharebase``, which is a required argument. It is required
+ because pooling storage prevents excessive cloning, which makes operations
+ complete faster.
+
+ One of ``--revision`` or ``--branch`` must be specified. ``--revision``
+ is preferred, as it is deterministic and there is no ambiguity as to which
+ revision will actually be checked out.
+
+ If ``--upstream`` is used, the repo at that URL is used to perform the
+ initial clone instead of cloning from the repo where the desired revision
+ is located.
+
+ ``--purge`` controls whether to removed untracked and ignored files from
+ the working directory. If used, the end state of the working directory
+ should only contain files explicitly under version control for the requested
+ revision.
+
+ ``--sparseprofile`` can be used to specify a sparse checkout profile to use.
+ The sparse checkout profile corresponds to a file in the revision to be
+ checked out. If a previous sparse profile or config is present, it will be
+ replaced by this sparse profile. We choose not to "widen" the sparse config
+ so operations are as deterministic as possible. If an existing checkout
+ is present and it isn't using a sparse checkout, we error. This is to
+ prevent accidentally enabling sparse on a repository that may have
+ clients that aren't sparse aware. Sparse checkout support requires Mercurial
+ 4.3 or newer and the ``sparse`` extension must be enabled.
+ """
+ if not revision and not branch:
+ raise error.Abort(b"must specify one of --revision or --branch")
+
+ if revision and branch:
+ raise error.Abort(b"cannot specify both --revision and --branch")
+
+ # Require revision to look like a SHA-1.
+ if revision:
+ if (
+ len(revision) < 12
+ or len(revision) > 40
+ or not re.match(b"^[a-f0-9]+$", revision)
+ ):
+ raise error.Abort(
+ b"--revision must be a SHA-1 fragment 12-40 " b"characters long"
+ )
+
+ sharebase = sharebase or ui.config(b"share", b"pool")
+ if not sharebase:
+ raise error.Abort(
+ b"share base directory not defined; refusing to operate",
+ hint=b"define share.pool config option or pass --sharebase",
+ )
+
+ # Sparse profile support was added in Mercurial 4.3, where it was highly
+ # experimental. Because of the fragility of it, we only support sparse
+ # profiles on 4.3. When 4.4 is released, we'll need to opt in to sparse
+ # support. We /could/ silently fall back to non-sparse when not supported.
+ # However, given that sparse has performance implications, we want to fail
+ # fast if we can't satisfy the desired checkout request.
+ if sparseprofile:
+ try:
+ extensions.find(b"sparse")
+ except KeyError:
+ raise error.Abort(
+ b"sparse extension must be enabled to use " b"--sparseprofile"
+ )
+
+ ui.warn(b"(using Mercurial %s)\n" % util.version())
+
+ # worker.backgroundclose only makes things faster if running anti-virus,
+ # which our automation doesn't. Disable it.
+ ui.setconfig(b"worker", b"backgroundclose", False)
+
+ # By default the progress bar starts after 3s and updates every 0.1s. We
+ # change this so it shows and updates every 1.0s.
+ # We also tell progress to assume a TTY is present so updates are printed
+ # even if there is no known TTY.
+ # We make the config change here instead of in a config file because
+ # otherwise we're at the whim of whatever configs are used in automation.
+ ui.setconfig(b"progress", b"delay", 1.0)
+ ui.setconfig(b"progress", b"refresh", 1.0)
+ ui.setconfig(b"progress", b"assume-tty", True)
+
+ sharebase = os.path.realpath(sharebase)
+
+ optimes = []
+ behaviors = set()
+ start = time.time()
+
+ try:
+ return _docheckout(
+ ui,
+ url,
+ dest,
+ upstream,
+ revision,
+ branch,
+ purge,
+ sharebase,
+ optimes,
+ behaviors,
+ networkattempts,
+ sparse_profile=sparseprofile,
+ noupdate=noupdate,
+ )
+ finally:
+ overall = time.time() - start
+
+ # We store the overall time multiple ways in order to help differentiate
+ # the various "flavors" of operations.
+
+ # ``overall`` is always the total operation time.
+ optimes.append(("overall", overall))
+
+ def record_op(name):
+ # If special behaviors due to "corrupt" storage occur, we vary the
+ # name to convey that.
+ if "remove-store" in behaviors:
+ name += "_rmstore"
+ if "remove-wdir" in behaviors:
+ name += "_rmwdir"
+
+ optimes.append((name, overall))
+
+ # We break out overall operations primarily by their network interaction
+ # We have variants within for working directory operations.
+ if "clone" in behaviors and "create-store" in behaviors:
+ record_op("overall_clone")
+
+ if "sparse-update" in behaviors:
+ record_op("overall_clone_sparsecheckout")
+ else:
+ record_op("overall_clone_fullcheckout")
+
+ elif "pull" in behaviors or "clone" in behaviors:
+ record_op("overall_pull")
+
+ if "sparse-update" in behaviors:
+ record_op("overall_pull_sparsecheckout")
+ else:
+ record_op("overall_pull_fullcheckout")
+
+ if "empty-wdir" in behaviors:
+ record_op("overall_pull_emptywdir")
+ else:
+ record_op("overall_pull_populatedwdir")
+
+ else:
+ record_op("overall_nopull")
+
+ if "sparse-update" in behaviors:
+ record_op("overall_nopull_sparsecheckout")
+ else:
+ record_op("overall_nopull_fullcheckout")
+
+ if "empty-wdir" in behaviors:
+ record_op("overall_nopull_emptywdir")
+ else:
+ record_op("overall_nopull_populatedwdir")
+
+ server_url = urllibcompat.urlreq.urlparse(url).netloc
+
+ if "TASKCLUSTER_INSTANCE_TYPE" in os.environ:
+ perfherder = {
+ "framework": {
+ "name": "vcs",
+ },
+ "suites": [],
+ }
+ for op, duration in optimes:
+ perfherder["suites"].append(
+ {
+ "name": op,
+ "value": duration,
+ "lowerIsBetter": True,
+ "shouldAlert": False,
+ "serverUrl": server_url.decode("utf-8"),
+ "hgVersion": util.version().decode("utf-8"),
+ "extraOptions": [os.environ["TASKCLUSTER_INSTANCE_TYPE"]],
+ "subtests": [],
+ }
+ )
+ ui.write(
+ b"PERFHERDER_DATA: %s\n"
+ % pycompat.bytestr(json.dumps(perfherder, sort_keys=True))
+ )
+
+
+def _docheckout(
+ ui,
+ url,
+ dest,
+ upstream,
+ revision,
+ branch,
+ purge,
+ sharebase,
+ optimes,
+ behaviors,
+ networkattemptlimit,
+ networkattempts=None,
+ sparse_profile=None,
+ noupdate=False,
+):
+ if not networkattempts:
+ networkattempts = [1]
+
+ def callself():
+ return _docheckout(
+ ui,
+ url,
+ dest,
+ upstream,
+ revision,
+ branch,
+ purge,
+ sharebase,
+ optimes,
+ behaviors,
+ networkattemptlimit,
+ networkattempts=networkattempts,
+ sparse_profile=sparse_profile,
+ noupdate=noupdate,
+ )
+
+ @contextlib.contextmanager
+ def timeit(op, behavior):
+ behaviors.add(behavior)
+ errored = False
+ try:
+ start = time.time()
+ yield
+ except Exception:
+ errored = True
+ raise
+ finally:
+ elapsed = time.time() - start
+
+ if errored:
+ op += "_errored"
+
+ optimes.append((op, elapsed))
+
+ ui.write(b"ensuring %s@%s is available at %s\n" % (url, revision or branch, dest))
+
+ # We assume that we're the only process on the machine touching the
+ # repository paths that we were told to use. This means our recovery
+ # scenario when things aren't "right" is to just nuke things and start
+ # from scratch. This is easier to implement than verifying the state
+ # of the data and attempting recovery. And in some scenarios (such as
+ # potential repo corruption), it is probably faster, since verifying
+ # repos can take a while.
+
+ destvfs = vfs.vfs(dest, audit=False, realpath=True)
+
+ def deletesharedstore(path=None):
+ storepath = path or destvfs.read(b".hg/sharedpath").strip()
+ if storepath.endswith(b".hg"):
+ storepath = os.path.dirname(storepath)
+
+ storevfs = vfs.vfs(storepath, audit=False)
+ storevfs.rmtree(forcibly=True)
+
+ if destvfs.exists() and not destvfs.exists(b".hg"):
+ raise error.Abort(b"destination exists but no .hg directory")
+
+ # Refuse to enable sparse checkouts on existing checkouts. The reasoning
+ # here is that another consumer of this repo may not be sparse aware. If we
+ # enabled sparse, we would lock them out.
+ if destvfs.exists() and sparse_profile and not destvfs.exists(b".hg/sparse"):
+ raise error.Abort(
+ b"cannot enable sparse profile on existing " b"non-sparse checkout",
+ hint=b"use a separate working directory to use sparse",
+ )
+
+ # And the other direction for symmetry.
+ if not sparse_profile and destvfs.exists(b".hg/sparse"):
+ raise error.Abort(
+ b"cannot use non-sparse checkout on existing sparse " b"checkout",
+ hint=b"use a separate working directory to use sparse",
+ )
+
+ # Require checkouts to be tied to shared storage because efficiency.
+ if destvfs.exists(b".hg") and not destvfs.exists(b".hg/sharedpath"):
+ ui.warn(b"(destination is not shared; deleting)\n")
+ with timeit("remove_unshared_dest", "remove-wdir"):
+ destvfs.rmtree(forcibly=True)
+
+ # Verify the shared path exists and is using modern pooled storage.
+ if destvfs.exists(b".hg/sharedpath"):
+ storepath = destvfs.read(b".hg/sharedpath").strip()
+
+ ui.write(b"(existing repository shared store: %s)\n" % storepath)
+
+ if not os.path.exists(storepath):
+ ui.warn(b"(shared store does not exist; deleting destination)\n")
+ with timeit("removed_missing_shared_store", "remove-wdir"):
+ destvfs.rmtree(forcibly=True)
+ elif not re.search(b"[a-f0-9]{40}/\.hg$", storepath.replace(b"\\", b"/")):
+ ui.warn(
+ b"(shared store does not belong to pooled storage; "
+ b"deleting destination to improve efficiency)\n"
+ )
+ with timeit("remove_unpooled_store", "remove-wdir"):
+ destvfs.rmtree(forcibly=True)
+
+ if destvfs.isfileorlink(b".hg/wlock"):
+ ui.warn(
+ b"(dest has an active working directory lock; assuming it is "
+ b"left over from a previous process and that the destination "
+ b"is corrupt; deleting it just to be sure)\n"
+ )
+ with timeit("remove_locked_wdir", "remove-wdir"):
+ destvfs.rmtree(forcibly=True)
+
+ def handlerepoerror(e):
+ if pycompat.bytestr(e) == _(b"abandoned transaction found"):
+ ui.warn(b"(abandoned transaction found; trying to recover)\n")
+ repo = hg.repository(ui, dest)
+ if not repo.recover():
+ ui.warn(b"(could not recover repo state; " b"deleting shared store)\n")
+ with timeit("remove_unrecovered_shared_store", "remove-store"):
+ deletesharedstore()
+
+ ui.warn(b"(attempting checkout from beginning)\n")
+ return callself()
+
+ raise
+
+ # At this point we either have an existing working directory using
+ # shared, pooled storage or we have nothing.
+
+ def handlenetworkfailure():
+ if networkattempts[0] >= networkattemptlimit:
+ raise error.Abort(
+ b"reached maximum number of network attempts; " b"giving up\n"
+ )
+
+ ui.warn(
+ b"(retrying after network failure on attempt %d of %d)\n"
+ % (networkattempts[0], networkattemptlimit)
+ )
+
+ # Do a backoff on retries to mitigate the thundering herd
+ # problem. This is an exponential backoff with a multipler
+ # plus random jitter thrown in for good measure.
+ # With the default settings, backoffs will be:
+ # 1) 2.5 - 6.5
+ # 2) 5.5 - 9.5
+ # 3) 11.5 - 15.5
+ backoff = (2 ** networkattempts[0] - 1) * 1.5
+ jittermin = ui.configint(b"robustcheckout", b"retryjittermin", 1000)
+ jittermax = ui.configint(b"robustcheckout", b"retryjittermax", 5000)
+ backoff += float(random.randint(jittermin, jittermax)) / 1000.0
+ ui.warn(b"(waiting %.2fs before retry)\n" % backoff)
+ time.sleep(backoff)
+
+ networkattempts[0] += 1
+
+ def handlepullerror(e):
+ """Handle an exception raised during a pull.
+
+ Returns True if caller should call ``callself()`` to retry.
+ """
+ if isinstance(e, error.Abort):
+ if e.args[0] == _(b"repository is unrelated"):
+ ui.warn(b"(repository is unrelated; deleting)\n")
+ destvfs.rmtree(forcibly=True)
+ return True
+ elif e.args[0].startswith(_(b"stream ended unexpectedly")):
+ ui.warn(b"%s\n" % e.args[0])
+ # Will raise if failure limit reached.
+ handlenetworkfailure()
+ return True
+ # TODO test this branch
+ elif isinstance(e, error.ResponseError):
+ if e.args[0].startswith(_(b"unexpected response from remote server:")):
+ ui.warn(b"(unexpected response from remote server; retrying)\n")
+ destvfs.rmtree(forcibly=True)
+ # Will raise if failure limit reached.
+ handlenetworkfailure()
+ return True
+ elif isinstance(e, ssl.SSLError):
+ # Assume all SSL errors are due to the network, as Mercurial
+ # should convert non-transport errors like cert validation failures
+ # to error.Abort.
+ ui.warn(b"ssl error: %s\n" % pycompat.bytestr(str(e)))
+ handlenetworkfailure()
+ return True
+ elif isinstance(e, urllibcompat.urlerr.urlerror):
+ if isinstance(e.reason, socket.error):
+ ui.warn(b"socket error: %s\n" % pycompat.bytestr(str(e.reason)))
+ handlenetworkfailure()
+ return True
+ else:
+ ui.warn(
+ b"unhandled URLError; reason type: %s; value: %s\n"
+ % (
+ pycompat.bytestr(e.reason.__class__.__name__),
+ pycompat.bytestr(str(e.reason)),
+ )
+ )
+ else:
+ ui.warn(
+ b"unhandled exception during network operation; type: %s; "
+ b"value: %s\n"
+ % (pycompat.bytestr(e.__class__.__name__), pycompat.bytestr(str(e)))
+ )
+
+ return False
+
+ # Perform sanity checking of store. We may or may not know the path to the
+ # local store. It depends if we have an existing destvfs pointing to a
+ # share. To ensure we always find a local store, perform the same logic
+ # that Mercurial's pooled storage does to resolve the local store path.
+ cloneurl = upstream or url
+
+ try:
+ clonepeer = hg.peer(ui, {}, cloneurl)
+ rootnode = peerlookup(clonepeer, b"0")
+ except error.RepoLookupError:
+ raise error.Abort(b"unable to resolve root revision from clone " b"source")
+ except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
+ if handlepullerror(e):
+ return callself()
+ raise
+
+ if rootnode == nullid:
+ raise error.Abort(b"source repo appears to be empty")
+
+ storepath = os.path.join(sharebase, hex(rootnode))
+ storevfs = vfs.vfs(storepath, audit=False)
+
+ if storevfs.isfileorlink(b".hg/store/lock"):
+ ui.warn(
+ b"(shared store has an active lock; assuming it is left "
+ b"over from a previous process and that the store is "
+ b"corrupt; deleting store and destination just to be "
+ b"sure)\n"
+ )
+ if destvfs.exists():
+ with timeit("remove_dest_active_lock", "remove-wdir"):
+ destvfs.rmtree(forcibly=True)
+
+ with timeit("remove_shared_store_active_lock", "remove-store"):
+ storevfs.rmtree(forcibly=True)
+
+ if storevfs.exists() and not storevfs.exists(b".hg/requires"):
+ ui.warn(
+ b"(shared store missing requires file; this is a really "
+ b"odd failure; deleting store and destination)\n"
+ )
+ if destvfs.exists():
+ with timeit("remove_dest_no_requires", "remove-wdir"):
+ destvfs.rmtree(forcibly=True)
+
+ with timeit("remove_shared_store_no_requires", "remove-store"):
+ storevfs.rmtree(forcibly=True)
+
+ if storevfs.exists(b".hg/requires"):
+ requires = set(storevfs.read(b".hg/requires").splitlines())
+ # "share-safe" (enabled by default as of hg 6.1) moved most
+ # requirements to a new file, so we need to look there as well to avoid
+ # deleting and re-cloning each time
+ if b"share-safe" in requires:
+ requires |= set(storevfs.read(b".hg/store/requires").splitlines())
+ # FUTURE when we require generaldelta, this is where we can check
+ # for that.
+ required = {b"dotencode", b"fncache"}
+
+ missing = required - requires
+ if missing:
+ ui.warn(
+ b"(shared store missing requirements: %s; deleting "
+ b"store and destination to ensure optimal behavior)\n"
+ % b", ".join(sorted(missing))
+ )
+ if destvfs.exists():
+ with timeit("remove_dest_missing_requires", "remove-wdir"):
+ destvfs.rmtree(forcibly=True)
+
+ with timeit("remove_shared_store_missing_requires", "remove-store"):
+ storevfs.rmtree(forcibly=True)
+
+ created = False
+
+ if not destvfs.exists():
+ # Ensure parent directories of destination exist.
+ # Mercurial 3.8 removed ensuredirs and made makedirs race safe.
+ if util.safehasattr(util, "ensuredirs"):
+ makedirs = util.ensuredirs
+ else:
+ makedirs = util.makedirs
+
+ makedirs(os.path.dirname(destvfs.base), notindexed=True)
+ makedirs(sharebase, notindexed=True)
+
+ if upstream:
+ ui.write(b"(cloning from upstream repo %s)\n" % upstream)
+
+ if not storevfs.exists():
+ behaviors.add(b"create-store")
+
+ try:
+ with timeit("clone", "clone"):
+ shareopts = {b"pool": sharebase, b"mode": b"identity"}
+ res = hg.clone(
+ ui,
+ {},
+ clonepeer,
+ dest=dest,
+ update=False,
+ shareopts=shareopts,
+ stream=True,
+ )
+ except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
+ if handlepullerror(e):
+ return callself()
+ raise
+ except error.RepoError as e:
+ return handlerepoerror(e)
+ except error.RevlogError as e:
+ ui.warn(b"(repo corruption: %s; deleting shared store)\n" % e)
+ with timeit("remove_shared_store_revlogerror", "remote-store"):
+ deletesharedstore()
+ return callself()
+
+ # TODO retry here.
+ if res is None:
+ raise error.Abort(b"clone failed")
+
+ # Verify it is using shared pool storage.
+ if not destvfs.exists(b".hg/sharedpath"):
+ raise error.Abort(b"clone did not create a shared repo")
+
+ created = True
+
+ # The destination .hg directory should exist. Now make sure we have the
+ # wanted revision.
+
+ repo = hg.repository(ui, dest)
+
+ # We only pull if we are using symbolic names or the requested revision
+ # doesn't exist.
+ havewantedrev = False
+
+ if revision:
+ try:
+ ctx = scmutil.revsingle(repo, revision)
+ except error.RepoLookupError:
+ ctx = None
+
+ if ctx:
+ if not ctx.hex().startswith(revision):
+ raise error.Abort(
+ b"--revision argument is ambiguous",
+ hint=b"must be the first 12+ characters of a " b"SHA-1 fragment",
+ )
+
+ checkoutrevision = ctx.hex()
+ havewantedrev = True
+
+ if not havewantedrev:
+ ui.write(b"(pulling to obtain %s)\n" % (revision or branch,))
+
+ remote = None
+ try:
+ remote = hg.peer(repo, {}, url)
+ pullrevs = [peerlookup(remote, revision or branch)]
+ checkoutrevision = hex(pullrevs[0])
+ if branch:
+ ui.warn(
+ b"(remote resolved %s to %s; "
+ b"result is not deterministic)\n" % (branch, checkoutrevision)
+ )
+
+ if checkoutrevision in repo:
+ ui.warn(b"(revision already present locally; not pulling)\n")
+ else:
+ with timeit("pull", "pull"):
+ pullop = exchange.pull(repo, remote, heads=pullrevs)
+ if not pullop.rheads:
+ raise error.Abort(b"unable to pull requested revision")
+ except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
+ if handlepullerror(e):
+ return callself()
+ raise
+ except error.RepoError as e:
+ return handlerepoerror(e)
+ except error.RevlogError as e:
+ ui.warn(b"(repo corruption: %s; deleting shared store)\n" % e)
+ deletesharedstore()
+ return callself()
+ finally:
+ if remote:
+ remote.close()
+
+ # Now we should have the wanted revision in the store. Perform
+ # working directory manipulation.
+
+ # Avoid any working directory manipulations if `-U`/`--noupdate` was passed
+ if noupdate:
+ ui.write(b"(skipping update since `-U` was passed)\n")
+ return None
+
+ # Purge if requested. We purge before update because this way we're
+ # guaranteed to not have conflicts on `hg update`.
+ if purge and not created:
+ ui.write(b"(purging working directory)\n")
+ purge = getattr(commands, "purge", None)
+ if not purge:
+ purge = extensions.find(b"purge").purge
+
+ # Mercurial 4.3 doesn't purge files outside the sparse checkout.
+ # See https://bz.mercurial-scm.org/show_bug.cgi?id=5626. Force
+ # purging by monkeypatching the sparse matcher.
+ try:
+ old_sparse_fn = getattr(repo.dirstate, "_sparsematchfn", None)
+ if old_sparse_fn is not None:
+ repo.dirstate._sparsematchfn = lambda: matchmod.always()
+
+ with timeit("purge", "purge"):
+ if purge(
+ ui,
+ repo,
+ all=True,
+ abort_on_err=True,
+ # The function expects all arguments to be
+ # defined.
+ **{"print": None, "print0": None, "dirs": None, "files": None}
+ ):
+ raise error.Abort(b"error purging")
+ finally:
+ if old_sparse_fn is not None:
+ repo.dirstate._sparsematchfn = old_sparse_fn
+
+ # Update the working directory.
+
+ if repo[b"."].node() == nullid:
+ behaviors.add("empty-wdir")
+ else:
+ behaviors.add("populated-wdir")
+
+ if sparse_profile:
+ sparsemod = getsparse()
+
+ # By default, Mercurial will ignore unknown sparse profiles. This could
+ # lead to a full checkout. Be more strict.
+ try:
+ repo.filectx(sparse_profile, changeid=checkoutrevision).data()
+ except error.ManifestLookupError:
+ raise error.Abort(
+ b"sparse profile %s does not exist at revision "
+ b"%s" % (sparse_profile, checkoutrevision)
+ )
+
+ old_config = sparsemod.parseconfig(
+ repo.ui, repo.vfs.tryread(b"sparse"), b"sparse"
+ )
+
+ old_includes, old_excludes, old_profiles = old_config
+
+ if old_profiles == {sparse_profile} and not old_includes and not old_excludes:
+ ui.write(
+ b"(sparse profile %s already set; no need to update "
+ b"sparse config)\n" % sparse_profile
+ )
+ else:
+ if old_includes or old_excludes or old_profiles:
+ ui.write(
+ b"(replacing existing sparse config with profile "
+ b"%s)\n" % sparse_profile
+ )
+ else:
+ ui.write(b"(setting sparse config to profile %s)\n" % sparse_profile)
+
+ # If doing an incremental update, this will perform two updates:
+ # one to change the sparse profile and another to update to the new
+ # revision. This is not desired. But there's not a good API in
+ # Mercurial to do this as one operation.
+ with repo.wlock(), repo.dirstate.parentchange(), timeit(
+ "sparse_update_config", "sparse-update-config"
+ ):
+ # pylint --py3k: W1636
+ fcounts = list(
+ map(
+ len,
+ sparsemod._updateconfigandrefreshwdir(
+ repo, [], [], [sparse_profile], force=True
+ ),
+ )
+ )
+
+ repo.ui.status(
+ b"%d files added, %d files dropped, "
+ b"%d files conflicting\n" % tuple(fcounts)
+ )
+
+ ui.write(b"(sparse refresh complete)\n")
+
+ op = "update_sparse" if sparse_profile else "update"
+ behavior = "update-sparse" if sparse_profile else "update"
+
+ with timeit(op, behavior):
+ if commands.update(ui, repo, rev=checkoutrevision, clean=True):
+ raise error.Abort(b"error updating")
+
+ ui.write(b"updated to %s\n" % checkoutrevision)
+
+ return None
+
+
+def extsetup(ui):
+ # Ensure required extensions are loaded.
+ for ext in (b"purge", b"share"):
+ try:
+ extensions.find(ext)
+ except KeyError:
+ extensions.load(ui, ext, None)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task
new file mode 100755
index 0000000000..f1e281f5cd
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task
@@ -0,0 +1,1307 @@
+#!/usr/bin/python3 -u
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Run a task after performing common actions.
+
+This script is meant to be the "driver" for TaskCluster based tasks.
+It receives some common arguments to control the run-time environment.
+
+It performs actions as requested from the arguments. Then it executes
+the requested process and prints its output, prefixing it with the
+current time to improve log usefulness.
+"""
+
+import sys
+from typing import Optional
+
+if sys.version_info[0:2] < (3, 5):
+ print("run-task requires Python 3.5+")
+ sys.exit(1)
+
+
+import argparse
+import datetime
+import errno
+import io
+import json
+import os
+from pathlib import Path
+import re
+import shutil
+import signal
+import socket
+import stat
+import subprocess
+import time
+
+import urllib.error
+import urllib.request
+
+from threading import Thread
+
+SECRET_BASEURL_TPL = "http://taskcluster/secrets/v1/secret/{}"
+
+GITHUB_SSH_FINGERPRINT = (
+ b"github.com ssh-rsa "
+ b"AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkcc"
+ b"Krpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFz"
+ b"LQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaS"
+ b"jB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3sku"
+ b"a2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==\n"
+)
+
+
+CACHE_UID_GID_MISMATCH = """
+There is a UID/GID mismatch on the cache. This likely means:
+
+a) different tasks are running as a different user/group
+b) different Docker images have different UID/GID for the same user/group
+
+Our cache policy is that the UID/GID for ALL tasks must be consistent
+for the lifetime of the cache. This eliminates permissions problems due
+to file/directory user/group ownership.
+
+To make this error go away, ensure that all Docker images are use
+a consistent UID/GID and that all tasks using this cache are running as
+the same user/group.
+"""
+
+
+NON_EMPTY_VOLUME = """
+error: volume %s is not empty
+
+Our Docker image policy requires volumes to be empty.
+
+The volume was likely populated as part of building the Docker image.
+Change the Dockerfile and anything run from it to not create files in
+any VOLUME.
+
+A lesser possibility is that you stumbled upon a TaskCluster platform bug
+where it fails to use new volumes for tasks.
+"""
+
+
+FETCH_CONTENT_NOT_FOUND = """
+error: fetch-content script not found
+
+The script at `taskcluster/scripts/misc/fetch-content` could not be
+detected in the current environment.
+"""
+
+# The exit code to use when caches should be purged and the task retried.
+# This is EX_OSFILE (from sysexits.h):
+# Some system file does not exist, cannot be opened, or has some
+# sort of error (e.g., syntax error).
+EXIT_PURGE_CACHE = 72
+
+
+IS_MACOSX = sys.platform == "darwin"
+IS_POSIX = os.name == "posix"
+IS_WINDOWS = os.name == "nt"
+
+# Both mercurial and git use sha1 as revision idenfiers. Luckily, both define
+# the same value as the null revision.
+#
+# https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7
+# https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30
+NULL_REVISION = "0000000000000000000000000000000000000000"
+
+
+def print_line(prefix, m):
+ now = datetime.datetime.utcnow().isoformat().encode("utf-8")
+ # slice microseconds to 3 decimals.
+ now = now[:-3] if now[-7:-6] == b"." else now
+ sys.stdout.buffer.write(b"[%s %sZ] %s" % (prefix, now, m))
+ sys.stdout.buffer.flush()
+
+
+def _call_windows_retry(func, args=(), retry_max=5, retry_delay=0.5):
+ """
+ It's possible to see spurious errors on Windows due to various things
+ keeping a handle to the directory open (explorer, virus scanners, etc)
+ So we try a few times if it fails with a known error.
+ retry_delay is multiplied by the number of failed attempts to increase
+ the likelihood of success in subsequent attempts.
+ """
+ retry_count = 0
+ while True:
+ try:
+ func(*args)
+ except OSError as e:
+ # Error codes are defined in:
+ # https://docs.python.org/3/library/errno.html#module-errno
+ if e.errno not in (errno.EACCES, errno.ENOTEMPTY, errno.ENOENT):
+ raise
+
+ if retry_count == retry_max:
+ raise
+
+ retry_count += 1
+
+ print(
+ '%s() failed for "%s". Reason: %s (%s). Retrying...'
+ % (func.__name__, args, e.strerror, e.errno)
+ )
+ time.sleep(retry_count * retry_delay)
+ else:
+ # If no exception has been thrown it should be done
+ break
+
+
+def remove(path):
+ """Removes the specified file, link, or directory tree.
+
+ This is a replacement for shutil.rmtree that works better under
+ windows. It does the following things:
+
+ - check path access for the current user before trying to remove
+ - retry operations on some known errors due to various things keeping
+ a handle on file paths - like explorer, virus scanners, etc. The
+ known errors are errno.EACCES and errno.ENOTEMPTY, and it will
+ retry up to 5 five times with a delay of (failed_attempts * 0.5) seconds
+ between each attempt.
+
+ Note that no error will be raised if the given path does not exists.
+
+ :param path: path to be removed
+ """
+
+ def _update_permissions(path):
+ """Sets specified pemissions depending on filetype"""
+ if os.path.islink(path):
+ # Path is a symlink which we don't have to modify
+ # because it should already have all the needed permissions
+ return
+
+ stats = os.stat(path)
+
+ if os.path.isfile(path):
+ mode = stats.st_mode | stat.S_IWUSR
+ elif os.path.isdir(path):
+ mode = stats.st_mode | stat.S_IWUSR | stat.S_IXUSR
+ else:
+ # Not supported type
+ return
+
+ _call_windows_retry(os.chmod, (path, mode))
+
+ if not os.path.lexists(path):
+ print_line(b"remove", b"WARNING: %s does not exists!\n" % path.encode("utf-8"))
+ return
+
+ """
+ On Windows, adds '\\\\?\\' to paths which match ^[A-Za-z]:\\.* to access
+ files or directories that exceed MAX_PATH(260) limitation or that ends
+ with a period.
+ """
+ if (
+ sys.platform in ("win32", "cygwin")
+ and len(path) >= 3
+ and path[1] == ":"
+ and path[2] == "\\"
+ ):
+ path = "\\\\?\\%s" % path
+
+ if os.path.isfile(path) or os.path.islink(path):
+ # Verify the file or link is read/write for the current user
+ _update_permissions(path)
+ _call_windows_retry(os.remove, (path,))
+
+ elif os.path.isdir(path):
+ # Verify the directory is read/write/execute for the current user
+ _update_permissions(path)
+
+ # We're ensuring that every nested item has writable permission.
+ for root, dirs, files in os.walk(path):
+ for entry in dirs + files:
+ _update_permissions(os.path.join(root, entry))
+ _call_windows_retry(shutil.rmtree, (path,))
+
+
+def run_required_command(prefix, args, *, extra_env=None, cwd=None):
+ res = run_command(prefix, args, extra_env=extra_env, cwd=cwd)
+ if res:
+ sys.exit(res)
+
+
+def retry_required_command(prefix, args, *, extra_env=None, cwd=None, retries=2):
+ backoff = 1
+ while True:
+ res = run_command(prefix, args, extra_env=extra_env, cwd=cwd)
+ if not res:
+ return
+ if not retries:
+ sys.exit(res)
+ retries -= 1
+ backoff *= 2
+ time.sleep(backoff)
+
+
+def run_command(prefix, args, *, extra_env=None, cwd=None):
+ """Runs a process and prefixes its output with the time.
+
+ Returns the process exit code.
+ """
+ print_line(prefix, b"executing %r\n" % args)
+
+ env = dict(os.environ)
+ env.update(extra_env or {})
+
+ # Note: TaskCluster's stdin is a TTY. This attribute is lost
+ # when we pass sys.stdin to the invoked process. If we cared
+ # to preserve stdin as a TTY, we could make this work. But until
+ # someone needs it, don't bother.
+
+ # We want stdout to be bytes on Python 3. That means we can't use
+ # universal_newlines=True (because it implies text mode). But
+ # p.stdout.readline() won't work for bytes text streams. So, on Python 3,
+ # we manually install a latin1 stream wrapper. This allows us to readline()
+ # and preserves bytes, without losing any data.
+
+ p = subprocess.Popen(
+ args,
+ # Disable buffering because we want to receive output
+ # as it is generated so timestamps in logs are
+ # accurate.
+ bufsize=0,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ stdin=sys.stdin.fileno(),
+ cwd=cwd,
+ env=env,
+ )
+
+ stdout = io.TextIOWrapper(p.stdout, encoding="latin1")
+
+ while True:
+ data = stdout.readline().encode("latin1")
+
+ if data == b"":
+ break
+
+ print_line(prefix, data)
+
+ return p.wait()
+
+
+def get_posix_user_group(user, group):
+ import grp
+ import pwd
+
+ try:
+ user_record = pwd.getpwnam(user)
+ except KeyError:
+ print("could not find user %s; specify a valid user with --user" % user)
+ sys.exit(1)
+
+ try:
+ group_record = grp.getgrnam(group)
+ except KeyError:
+ print("could not find group %s; specify a valid group with --group" % group)
+ sys.exit(1)
+
+ # Most tasks use worker:worker. We require they have a specific numeric ID
+ # because otherwise it is too easy for files written to caches to have
+ # mismatched numeric IDs, which results in permissions errors.
+ if user_record.pw_name == "worker" and user_record.pw_uid != 1000:
+ print("user `worker` must have uid=1000; got %d" % user_record.pw_uid)
+ sys.exit(1)
+
+ if group_record.gr_name == "worker" and group_record.gr_gid != 1000:
+ print("group `worker` must have gid=1000; got %d" % group_record.gr_gid)
+ sys.exit(1)
+
+ # Find all groups to which this user is a member.
+ gids = [g.gr_gid for g in grp.getgrall() if group in g.gr_mem]
+
+ return user_record, group_record, gids
+
+
+def write_audit_entry(path, msg):
+ now = datetime.datetime.utcnow().isoformat().encode("utf-8")
+ with open(path, "ab") as fh:
+ fh.write(b"[%sZ %s] %s\n" % (now, os.environb.get(b"TASK_ID", b"UNKNOWN"), msg))
+
+
+WANTED_DIR_MODE = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR
+
+
+def set_dir_permissions(path, uid, gid):
+ st = os.lstat(path)
+
+ if st.st_uid != uid or st.st_gid != gid:
+ os.chown(path, uid, gid)
+
+ # Also make sure dirs are writable in case we need to delete
+ # them.
+ if st.st_mode & WANTED_DIR_MODE != WANTED_DIR_MODE:
+ os.chmod(path, st.st_mode | WANTED_DIR_MODE)
+
+
+def chown_recursive(path, user, group, uid, gid):
+ print_line(
+ b"chown",
+ b"recursively changing ownership of %s to %s:%s\n"
+ % (path.encode("utf-8"), user.encode("utf-8"), group.encode("utf-8")),
+ )
+
+ set_dir_permissions(path, uid, gid)
+
+ for root, dirs, files in os.walk(path):
+ for d in dirs:
+ set_dir_permissions(os.path.join(root, d), uid, gid)
+
+ for f in files:
+ # File may be a symlink that points to nowhere. In which case
+ # os.chown() would fail because it attempts to follow the
+ # symlink. We only care about directory entries, not what
+ # they point to. So setting the owner of the symlink should
+ # be sufficient.
+ os.lchown(os.path.join(root, f), uid, gid)
+
+
+def configure_cache_posix(cache, user, group, untrusted_caches, running_as_root):
+ """Configure a cache path on POSIX platforms.
+
+ For each cache, we write out a special file denoting attributes and
+ capabilities of run-task and the task being executed. These attributes
+ are used by subsequent run-task invocations to validate that use of
+ the cache is acceptable.
+
+ We /could/ blow away the cache data on requirements mismatch.
+ While this would be convenient, this could result in "competing" tasks
+ effectively undoing the other's work. This would slow down task
+ execution in aggregate. Without monitoring for this, people may not notice
+ the problem and tasks would be slower than they could be. We follow the
+ principle of "fail fast" to ensure optimal task execution.
+
+ We also write an audit log of who used the caches. This log is printed
+ during failures to help aid debugging.
+ """
+
+ our_requirements = {
+ # Include a version string that we can bump whenever to trigger
+ # fresh caches. The actual value is not relevant and doesn't need
+ # to follow any explicit order. Since taskgraph bakes this file's
+ # hash into cache names, any change to this file/version is sufficient
+ # to force the use of a new cache.
+ b"version=1",
+ # Include the UID and GID the task will run as to ensure that tasks
+ # with different UID and GID don't share the same cache.
+ b"uid=%d" % user.pw_uid,
+ b"gid=%d" % group.gr_gid,
+ }
+
+ requires_path = os.path.join(cache, ".cacherequires")
+ audit_path = os.path.join(cache, ".cachelog")
+
+ # The cache is empty. Configure it.
+ if not os.listdir(cache):
+ print_line(
+ b"cache",
+ b"cache %s is empty; writing requirements: "
+ b"%s\n" % (cache.encode("utf-8"), b" ".join(sorted(our_requirements))),
+ )
+
+ # We write a requirements file so future invocations know what the
+ # requirements are.
+ with open(requires_path, "wb") as fh:
+ fh.write(b"\n".join(sorted(our_requirements)))
+
+ # And make it read-only as a precaution against deletion.
+ os.chmod(requires_path, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
+
+ write_audit_entry(
+ audit_path,
+ b"created; requirements: %s" % b", ".join(sorted(our_requirements)),
+ )
+
+ set_dir_permissions(cache, user.pw_uid, group.gr_gid)
+ return
+
+ # The cache has content and we have a requirements file. Validate
+ # requirements alignment.
+ if os.path.exists(requires_path):
+ with open(requires_path, "rb") as fh:
+ wanted_requirements = set(fh.read().splitlines())
+
+ print_line(
+ b"cache",
+ b"cache %s exists; requirements: %s\n"
+ % (cache.encode("utf-8"), b" ".join(sorted(wanted_requirements))),
+ )
+
+ missing = wanted_requirements - our_requirements
+
+ # Allow requirements mismatch for uid/gid if and only if caches
+ # are untrusted. This allows cache behavior on Try to be
+ # reasonable. Otherwise, random tasks could "poison" cache
+ # usability by introducing uid/gid mismatches. For untrusted
+ # environments like Try, this is a perfectly reasonable thing to
+ # allow.
+ if (
+ missing
+ and untrusted_caches
+ and running_as_root
+ and all(s.startswith((b"uid=", b"gid=")) for s in missing)
+ ):
+ print_line(
+ b"cache",
+ b"cache %s uid/gid mismatch; this is acceptable "
+ b"because caches for this task are untrusted; "
+ b"changing ownership to facilitate cache use\n" % cache.encode("utf-8"),
+ )
+ chown_recursive(
+ cache, user.pw_name, group.gr_name, user.pw_uid, group.gr_gid
+ )
+
+ # And write out the updated reality.
+ with open(requires_path, "wb") as fh:
+ fh.write(b"\n".join(sorted(our_requirements)))
+
+ write_audit_entry(
+ audit_path,
+ b"chown; requirements: %s" % b", ".join(sorted(our_requirements)),
+ )
+
+ elif missing:
+ print(
+ "error: requirements for populated cache %s differ from "
+ "this task" % cache
+ )
+ print(
+ "cache requirements: %s"
+ % " ".join(sorted(s.decode("utf-8") for s in wanted_requirements))
+ )
+ print(
+ "our requirements: %s"
+ % " ".join(sorted(s.decode("utf-8") for s in our_requirements))
+ )
+ if any(s.startswith((b"uid=", b"gid=")) for s in missing):
+ print(CACHE_UID_GID_MISMATCH)
+
+ write_audit_entry(
+ audit_path,
+ b"requirements mismatch; wanted: %s"
+ % b", ".join(sorted(our_requirements)),
+ )
+
+ print("")
+ print("audit log:")
+ with open(audit_path, "r") as fh:
+ print(fh.read())
+
+ return True
+ else:
+ write_audit_entry(audit_path, b"used")
+
+ # We don't need to adjust permissions here because the cache is
+ # associated with a uid/gid and the first task should have set
+ # a proper owner/group.
+
+ return
+
+ # The cache has content and no requirements file. This shouldn't
+ # happen because run-task should be the first thing that touches a
+ # cache.
+ print(
+ "error: cache %s is not empty and is missing a "
+ ".cacherequires file; the cache names for this task are "
+ "likely mis-configured or TASKCLUSTER_CACHES is not set "
+ "properly" % cache
+ )
+
+ write_audit_entry(audit_path, b"missing .cacherequires")
+ return True
+
+
+def configure_volume_posix(volume, user, group, running_as_root):
+ # The only time we should see files in the volume is if the Docker
+ # image build put files there.
+ #
+ # For the sake of simplicity, our policy is that volumes should be
+ # empty. This also has the advantage that an empty volume looks
+ # a lot like an empty cache. Tasks can rely on caches being
+ # swapped in and out on any volume without any noticeable change
+ # of behavior.
+ volume_files = os.listdir(volume)
+ if volume_files:
+ print(NON_EMPTY_VOLUME % volume)
+ print("entries in root directory: %s" % " ".join(sorted(volume_files)))
+ sys.exit(1)
+
+ # The volume is almost certainly owned by root:root. Chown it so it
+ # is writable.
+
+ if running_as_root:
+ print_line(
+ b"volume",
+ b"changing ownership of volume %s "
+ b"to %d:%d\n" % (volume.encode("utf-8"), user.pw_uid, group.gr_gid),
+ )
+ set_dir_permissions(volume, user.pw_uid, group.gr_gid)
+
+
+def _clean_git_checkout(destination_path):
+ # Delete untracked files (i.e. build products)
+ print_line(b"vcs", b"cleaning git checkout...\n")
+ args = [
+ "git",
+ "clean",
+ # Two -f`s causes subdirectories with `.git`
+ # directories to be cleaned as well.
+ "-nxdff",
+ ]
+ print_line(b"vcs", b"executing %r\n" % args)
+ p = subprocess.Popen(
+ args,
+ # Disable buffering because we want to receive output
+ # as it is generated so timestamps in logs are
+ # accurate.
+ bufsize=0,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ stdin=sys.stdin.fileno(),
+ cwd=destination_path,
+ env=os.environ,
+ )
+ stdout = io.TextIOWrapper(p.stdout, encoding="latin1")
+ ret = p.wait()
+ if ret:
+ sys.exit(ret)
+ data = stdout.read()
+ prefix = "Would remove "
+ filenames = [
+ os.path.join(destination_path, line[len(prefix) :])
+ for line in data.splitlines()
+ ]
+ print_line(b"vcs", b"removing %r\n" % filenames)
+ for filename in filenames:
+ remove(filename)
+ print_line(b"vcs", b"successfully cleaned git checkout!\n")
+
+
+def git_checkout(
+ destination_path: str,
+ head_repo: str,
+ base_repo: Optional[str],
+ base_ref: Optional[str],
+ base_rev: Optional[str],
+ ref: Optional[str],
+ commit: Optional[str],
+ ssh_key_file: Optional[Path],
+ ssh_known_hosts_file: Optional[Path],
+):
+ env = {"PYTHONUNBUFFERED": "1"}
+
+ if ssh_key_file and ssh_known_hosts_file:
+ if not ssh_key_file.exists():
+ raise RuntimeError("Can't find specified ssh_key file.")
+ if not ssh_known_hosts_file.exists():
+ raise RuntimeError("Can't find specified known_hosts file.")
+ env["GIT_SSH_COMMAND"] = " ".join(
+ [
+ "ssh",
+ "-oIdentityFile={}".format(ssh_key_file.as_posix()),
+ "-oStrictHostKeyChecking=yes",
+ "-oUserKnownHostsFile={}".format(ssh_known_hosts_file.as_posix()),
+ ]
+ )
+ elif ssh_key_file or ssh_known_hosts_file:
+ raise RuntimeError(
+ "Must specify both ssh_key_file and ssh_known_hosts_file, if either are specified",
+ )
+
+ if not os.path.exists(destination_path):
+ # Repository doesn't already exist, needs to be cloned
+ args = [
+ "git",
+ "clone",
+ base_repo if base_repo else head_repo,
+ destination_path,
+ ]
+
+ retry_required_command(b"vcs", args, extra_env=env)
+
+ if base_ref:
+ args = ["git", "fetch", "origin", base_ref]
+
+ retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+ # Create local branch so that taskgraph is able to compute differences
+ # between the head branch and the base one, if needed
+ args = ["git", "checkout", base_ref]
+
+ retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+ # When commits are force-pushed (like on a testing branch), base_rev doesn't
+ # exist on base_ref. Fetching it allows taskgraph to compute differences
+ # between the previous state before the force-push and the current state.
+ #
+ # Unlike base_ref just above, there is no need to checkout the revision:
+ # it's immediately avaiable after the fetch.
+ if base_rev and base_rev != NULL_REVISION:
+ args = ["git", "fetch", "origin", base_rev]
+
+ retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+ # If a ref isn't provided, we fetch all refs from head_repo, which may be slow
+ args = [
+ "git",
+ "fetch",
+ "--no-tags",
+ head_repo,
+ ref if ref else "+refs/heads/*:refs/remotes/work/*",
+ ]
+
+ retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env)
+
+ args = [
+ "git",
+ "checkout",
+ "-f",
+ ]
+
+ if ref:
+ args.extend(["-B", ref])
+ args.append(commit if commit else ref)
+
+ run_required_command(b"vcs", args, cwd=destination_path)
+
+ if os.path.exists(os.path.join(destination_path, ".gitmodules")):
+ args = [
+ "git",
+ "submodule",
+ "init",
+ ]
+
+ run_required_command(b"vcs", args, cwd=destination_path)
+
+ args = [
+ "git",
+ "submodule",
+ "update",
+ ]
+
+ run_required_command(b"vcs", args, cwd=destination_path)
+
+ _clean_git_checkout(destination_path)
+
+ args = ["git", "rev-parse", "--verify", "HEAD"]
+
+ commit_hash = subprocess.check_output(
+ args, cwd=destination_path, universal_newlines=True
+ ).strip()
+ assert re.match("^[a-f0-9]{40}$", commit_hash)
+
+ if head_repo.startswith("https://github.com"):
+ if head_repo.endswith("/"):
+ head_repo = head_repo[:-1]
+
+ tinderbox_link = "{}/commit/{}".format(head_repo, commit_hash)
+ repo_name = head_repo.split("/")[-1]
+ else:
+ tinderbox_link = head_repo
+ repo_name = head_repo
+
+ msg = (
+ "TinderboxPrint:<a href='{link}' "
+ "title='Built from {name} commit {commit_hash}'>"
+ "{commit_hash}</a>\n".format(
+ commit_hash=commit_hash, link=tinderbox_link, name=repo_name
+ )
+ )
+
+ print_line(b"vcs", msg.encode("utf-8"))
+
+ return commit_hash
+
+
+def fetch_ssh_secret(secret_name):
+ """Retrieves the private ssh key, and returns it as a StringIO object"""
+ secret_url = SECRET_BASEURL_TPL.format(secret_name)
+ try:
+ print_line(
+ b"vcs",
+ b"fetching secret %s from %s\n"
+ % (secret_name.encode("utf-8"), secret_url.encode("utf-8")),
+ )
+ res = urllib.request.urlopen(secret_url, timeout=10)
+ secret = res.read()
+ try:
+ secret = json.loads(secret.decode("utf-8"))
+ except ValueError:
+ print_line(b"vcs", b"invalid JSON in secret")
+ sys.exit(1)
+ except (urllib.error.URLError, socket.timeout):
+ print_line(b"vcs", b"Unable to retrieve ssh secret. aborting...")
+ sys.exit(1)
+
+ return secret["secret"]["ssh_privkey"]
+
+
+def hg_checkout(
+ destination_path: str,
+ head_repo: str,
+ base_repo: Optional[str],
+ store_path: str,
+ sparse_profile: Optional[str],
+ branch: Optional[str],
+ revision: Optional[str],
+):
+ if IS_MACOSX:
+ hg_bin = "/tools/python27-mercurial/bin/hg"
+ elif IS_POSIX:
+ hg_bin = "hg"
+ elif IS_WINDOWS:
+ # This is where OCC installs it in the AMIs.
+ hg_bin = r"C:\Program Files\Mercurial\hg.exe"
+ if not os.path.exists(hg_bin):
+ print("could not find Mercurial executable: %s" % hg_bin)
+ sys.exit(1)
+ else:
+ raise RuntimeError("Must be running on mac, posix or windows")
+
+ args = [
+ hg_bin,
+ "robustcheckout",
+ "--sharebase",
+ store_path,
+ "--purge",
+ ]
+
+ if base_repo:
+ args.extend(["--upstream", base_repo])
+ if sparse_profile:
+ args.extend(["--sparseprofile", sparse_profile])
+
+ # Specify method to checkout a revision. This defaults to revisions as
+ # SHA-1 strings, but also supports symbolic revisions like `tip` via the
+ # branch flag.
+ args.extend(
+ [
+ "--branch" if branch else "--revision",
+ branch or revision,
+ head_repo,
+ destination_path,
+ ]
+ )
+
+ run_required_command(b"vcs", args, extra_env={"PYTHONUNBUFFERED": "1"})
+
+ # Update the current revision hash and ensure that it is well formed.
+ revision = subprocess.check_output(
+ [hg_bin, "log", "--rev", ".", "--template", "{node}"],
+ cwd=destination_path,
+ # Triggers text mode on Python 3.
+ universal_newlines=True,
+ )
+
+ assert re.match("^[a-f0-9]{40}$", revision)
+
+ msg = (
+ "TinderboxPrint:<a href={head_repo}/rev/{revision} "
+ "title='Built from {repo_name} revision {revision}'>"
+ "{revision}</a>\n".format(
+ revision=revision, head_repo=head_repo, repo_name=head_repo.split("/")[-1]
+ )
+ )
+
+ print_line(b"vcs", msg.encode("utf-8"))
+
+ return revision
+
+
+def fetch_artifacts():
+ print_line(b"fetches", b"fetching artifacts\n")
+
+ fetch_content = shutil.which("fetch-content")
+
+ if not fetch_content or not os.path.isfile(fetch_content):
+ fetch_content = os.path.join(os.path.dirname(__file__), "fetch-content")
+
+ if not os.path.isfile(fetch_content):
+ print(FETCH_CONTENT_NOT_FOUND)
+ sys.exit(1)
+
+ cmd = [sys.executable, "-u", fetch_content, "task-artifacts"]
+ print_line(b"fetches", b"executing %r\n" % cmd)
+ subprocess.run(cmd, check=True, env=os.environ)
+ print_line(b"fetches", b"finished fetching artifacts\n")
+
+
+def add_vcs_arguments(parser, project, name):
+ """Adds arguments to ArgumentParser to control VCS options for a project."""
+
+ parser.add_argument(
+ "--%s-checkout" % project,
+ help="Directory where %s checkout should be created" % name,
+ )
+ parser.add_argument(
+ "--%s-sparse-profile" % project,
+ help="Path to sparse profile for %s checkout" % name,
+ )
+
+
+def collect_vcs_options(args, project, name):
+ checkout = getattr(args, "%s_checkout" % project)
+ sparse_profile = getattr(args, "%s_sparse_profile" % project)
+
+ env_prefix = project.upper()
+
+ repo_type = os.environ.get("%s_REPOSITORY_TYPE" % env_prefix)
+ base_repo = os.environ.get("%s_BASE_REPOSITORY" % env_prefix)
+ base_ref = os.environ.get("%s_BASE_REF" % env_prefix)
+ base_rev = os.environ.get("%s_BASE_REV" % env_prefix)
+ head_repo = os.environ.get("%s_HEAD_REPOSITORY" % env_prefix)
+ revision = os.environ.get("%s_HEAD_REV" % env_prefix)
+ ref = os.environ.get("%s_HEAD_REF" % env_prefix)
+ pip_requirements = os.environ.get("%s_PIP_REQUIREMENTS" % env_prefix)
+ private_key_secret = os.environ.get("%s_SSH_SECRET_NAME" % env_prefix)
+
+ store_path = os.environ.get("HG_STORE_PATH")
+
+ # Expand ~ in some paths.
+ if checkout:
+ checkout = os.path.abspath(os.path.expanduser(checkout))
+ if store_path:
+ store_path = os.path.abspath(os.path.expanduser(store_path))
+
+ if pip_requirements:
+ pip_requirements = os.path.join(checkout, pip_requirements)
+
+ # Some callers set the base repository to mozilla-central for historical
+ # reasons. Switch to mozilla-unified because robustcheckout works best
+ # with it.
+ if base_repo == "https://hg.mozilla.org/mozilla-central":
+ base_repo = "https://hg.mozilla.org/mozilla-unified"
+
+ return {
+ "store-path": store_path,
+ "project": project,
+ "name": name,
+ "env-prefix": env_prefix,
+ "checkout": checkout,
+ "sparse-profile": sparse_profile,
+ "base-repo": base_repo,
+ "base-ref": base_ref,
+ "base-rev": base_rev,
+ "head-repo": head_repo,
+ "revision": revision,
+ "ref": ref,
+ "repo-type": repo_type,
+ "ssh-secret-name": private_key_secret,
+ "pip-requirements": pip_requirements,
+ }
+
+
+def vcs_checkout_from_args(options):
+
+ if not options["checkout"]:
+ if options["ref"] and not options["revision"]:
+ print("task should be defined in terms of non-symbolic revision")
+ sys.exit(1)
+ return
+
+ revision = options["revision"]
+ ref = options["ref"]
+ ssh_key_file = None
+ ssh_known_hosts_file = None
+ ssh_dir = None
+
+ try:
+ if options.get("ssh-secret-name"):
+ ssh_dir = Path("~/.ssh-run-task").expanduser()
+ os.makedirs(ssh_dir, 0o700)
+ ssh_key_file = ssh_dir.joinpath("private_ssh_key")
+ ssh_key = fetch_ssh_secret(options["ssh-secret-name"])
+ # We don't use write_text here, to avoid \n -> \r\n on windows
+ ssh_key_file.write_bytes(ssh_key.encode("ascii"))
+ ssh_key_file.chmod(0o600)
+ # TODO: We should pull this from a secret, so it can be updated on old trees
+ ssh_known_hosts_file = ssh_dir.joinpath("known_hosts")
+ ssh_known_hosts_file.write_bytes(GITHUB_SSH_FINGERPRINT)
+
+ if options["repo-type"] == "git":
+ if not revision and not ref:
+ raise RuntimeError(
+ "Git requires that either a ref, a revision, or both are provided"
+ )
+
+ if not ref:
+ print("Providing a ref will improve the performance of this checkout")
+
+ revision = git_checkout(
+ options["checkout"],
+ options["head-repo"],
+ options["base-repo"],
+ options["base-ref"],
+ options["base-rev"],
+ ref,
+ revision,
+ ssh_key_file,
+ ssh_known_hosts_file,
+ )
+ elif options["repo-type"] == "hg":
+ if not revision and not ref:
+ raise RuntimeError(
+ "Hg requires that at least one of a ref or revision " "is provided"
+ )
+
+ revision = hg_checkout(
+ options["checkout"],
+ options["head-repo"],
+ options["base-repo"],
+ options["store-path"],
+ options["sparse-profile"],
+ ref,
+ revision,
+ )
+ else:
+ raise RuntimeError('Type of VCS must be either "git" or "hg"')
+ finally:
+ if ssh_dir:
+ shutil.rmtree(ssh_dir, ignore_errors=True)
+ pass
+
+ os.environ["%s_HEAD_REV" % options["env-prefix"]] = revision
+
+
+def install_pip_requirements(repositories):
+ """Install pip requirements files from specified repositories, if necessary."""
+ requirements = [
+ r["pip-requirements"] for r in repositories if r["pip-requirements"]
+ ]
+ if not requirements:
+ return
+
+ cmd = [sys.executable, "-mpip", "install"]
+ if os.environ.get("PIP_DISABLE_REQUIRE_HASHES") != "1":
+ cmd.append("--require-hashes")
+
+ for path in requirements:
+ cmd.extend(["-r", path])
+
+ run_required_command(b"pip-install", cmd)
+
+
+def maybe_run_resource_monitoring():
+ """Run the resource monitor if available.
+
+ Discussion in https://github.com/taskcluster/taskcluster-rfcs/pull/160
+ and https://bugzil.la/1648051
+
+ """
+ if "MOZ_FETCHES" not in os.environ:
+ return
+ if "RESOURCE_MONITOR_OUTPUT" not in os.environ:
+ return
+
+ prefix = b"resource_monitor"
+
+ executable = "{}/resource-monitor/resource-monitor{}".format(
+ os.environ.get("MOZ_FETCHES_DIR"), ".exe" if IS_WINDOWS else ""
+ )
+
+ if not os.path.exists(executable) or not os.access(executable, os.X_OK):
+ print_line(prefix, b"%s not executable\n" % executable.encode("utf-8"))
+ return
+ args = [
+ executable,
+ "-process",
+ str(os.getpid()),
+ "-output",
+ os.environ["RESOURCE_MONITOR_OUTPUT"],
+ ]
+ print_line(prefix, b"Resource monitor starting: %s\n" % str(args).encode("utf-8"))
+ # Avoid environment variables the payload doesn't need.
+ del os.environ["RESOURCE_MONITOR_OUTPUT"]
+
+ # Without CREATE_NEW_PROCESS_GROUP Windows signals will attempt to kill run-task, too.
+ process = subprocess.Popen(
+ args,
+ # Disable buffering because we want to receive output
+ # as it is generated so timestamps in logs are
+ # accurate.
+ bufsize=0,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if IS_WINDOWS else 0,
+ cwd=os.getcwd(),
+ )
+
+ def capture_output():
+ fh = io.TextIOWrapper(process.stdout, encoding="latin1")
+ while True:
+ data = fh.readline().encode("latin1")
+ if data == b"":
+ break
+ print_line(prefix, data)
+
+ monitor_process = Thread(target=capture_output)
+ monitor_process.start()
+ return process
+
+
+def main(args):
+ os.environ["TASK_WORKDIR"] = os.getcwd()
+ print_line(
+ b"setup",
+ b"run-task started in %s\n" % os.environ["TASK_WORKDIR"].encode("utf-8"),
+ )
+ running_as_root = IS_POSIX and os.getuid() == 0
+
+ # Arguments up to '--' are ours. After are for the main task
+ # to be executed.
+ try:
+ i = args.index("--")
+ our_args = args[0:i]
+ task_args = args[i + 1 :]
+ except ValueError:
+ our_args = args
+ task_args = []
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--user", default="worker", help="user to run as")
+ parser.add_argument("--group", default="worker", help="group to run as")
+ parser.add_argument("--task-cwd", help="directory to run the provided command in")
+
+ repositories = os.environ.get("REPOSITORIES")
+ if repositories:
+ repositories = json.loads(repositories)
+ else:
+ repositories = {"vcs": "repository"}
+
+ for repository, name in repositories.items():
+ add_vcs_arguments(parser, repository, name)
+
+ parser.add_argument(
+ "--fetch-hgfingerprint", action="store_true", help=argparse.SUPPRESS
+ )
+
+ args = parser.parse_args(our_args)
+
+ repositories = [
+ collect_vcs_options(args, repository, name)
+ for (repository, name) in repositories.items()
+ ]
+ # Sort repositories so that parent checkout paths come before children
+ repositories.sort(key=lambda repo: Path(repo["checkout"] or "/").parts)
+
+ uid = gid = gids = None
+ if IS_POSIX and running_as_root:
+ user, group, gids = get_posix_user_group(args.user, args.group)
+ uid = user.pw_uid
+ gid = group.gr_gid
+
+ if running_as_root and os.path.exists("/dev/kvm"):
+ # Ensure kvm permissions for worker, required for Android x86
+ st = os.stat("/dev/kvm")
+ os.chmod("/dev/kvm", st.st_mode | 0o666)
+
+ # Validate caches.
+ #
+ # Taskgraph should pass in a list of paths that are caches via an
+ # environment variable (which we don't want to pass down to child
+ # processes).
+
+ if "TASKCLUSTER_CACHES" in os.environ:
+ caches = os.environ["TASKCLUSTER_CACHES"].split(";")
+ del os.environ["TASKCLUSTER_CACHES"]
+ else:
+ caches = []
+
+ if "TASKCLUSTER_UNTRUSTED_CACHES" in os.environ:
+ untrusted_caches = True
+ del os.environ["TASKCLUSTER_UNTRUSTED_CACHES"]
+ else:
+ untrusted_caches = False
+
+ for cache in caches:
+ if not os.path.isdir(cache):
+ print(
+ "error: cache %s is not a directory; this should never "
+ "happen" % cache
+ )
+ return 1
+
+ purge = configure_cache_posix(
+ cache, user, group, untrusted_caches, running_as_root
+ )
+
+ if purge:
+ return EXIT_PURGE_CACHE
+
+ if "TASKCLUSTER_VOLUMES" in os.environ:
+ volumes = os.environ["TASKCLUSTER_VOLUMES"].split(";")
+ del os.environ["TASKCLUSTER_VOLUMES"]
+ else:
+ volumes = []
+
+ if volumes and not IS_POSIX:
+ print("assertion failed: volumes not expected on Windows")
+ return 1
+
+ # Sanitize volumes.
+ for volume in volumes:
+ # If a volume is a cache, it was dealt with above.
+ if volume in caches:
+ print_line(b"volume", b"volume %s is a cache\n" % volume.encode("utf-8"))
+ continue
+
+ configure_volume_posix(volume, user, group, running_as_root)
+
+ all_caches_and_volumes = set(map(os.path.normpath, caches))
+ all_caches_and_volumes |= set(map(os.path.normpath, volumes))
+
+ def path_in_cache_or_volume(path):
+ path = os.path.normpath(path)
+
+ while path:
+ if path in all_caches_and_volumes:
+ return True
+
+ path, child = os.path.split(path)
+ if not child:
+ break
+
+ return False
+
+ def prepare_checkout_dir(checkout):
+ if not checkout:
+ return
+
+ # The checkout path becomes the working directory. Since there are
+ # special cache files in the cache's root directory and working
+ # directory purging could blow them away, disallow this scenario.
+ if os.path.exists(os.path.join(checkout, ".cacherequires")):
+ print("error: cannot perform vcs checkout into cache root: %s" % checkout)
+ sys.exit(1)
+
+ # TODO given the performance implications, consider making this a fatal
+ # error.
+ if not path_in_cache_or_volume(checkout):
+ print_line(
+ b"vcs",
+ b"WARNING: vcs checkout path (%s) not in cache "
+ b"or volume; performance will likely suffer\n"
+ % checkout.encode("utf-8"),
+ )
+
+ # Ensure the directory for the source checkout exists.
+ try:
+ os.makedirs(os.path.dirname(checkout))
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+ # And that it is owned by the appropriate user/group.
+ if running_as_root:
+ os.chown(os.path.dirname(checkout), uid, gid)
+
+ def prepare_hg_store_path():
+ # And ensure the shared store path exists and has proper permissions.
+ if "HG_STORE_PATH" not in os.environ:
+ print("error: HG_STORE_PATH environment variable not set")
+ sys.exit(1)
+
+ store_path = os.environ["HG_STORE_PATH"]
+
+ if not path_in_cache_or_volume(store_path):
+ print_line(
+ b"vcs",
+ b"WARNING: HG_STORE_PATH (%s) not in cache or "
+ b"volume; performance will likely suffer\n"
+ % store_path.encode("utf-8"),
+ )
+
+ try:
+ os.makedirs(store_path)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+ if running_as_root:
+ os.chown(store_path, uid, gid)
+
+ repository_paths = [
+ Path(repo["checkout"]) for repo in repositories if repo["checkout"]
+ ]
+ for repo in repositories:
+ if not repo["checkout"]:
+ continue
+ parents = Path(repo["checkout"]).parents
+ if any((path in repository_paths) for path in parents):
+ # Skip creating any checkouts that are inside other checokuts
+ continue
+ prepare_checkout_dir(repo["checkout"])
+
+ if any(repo["checkout"] and repo["repo-type"] == "hg" for repo in repositories):
+ prepare_hg_store_path()
+
+ if IS_POSIX and running_as_root:
+ # Drop permissions to requested user.
+ # This code is modeled after what `sudo` was observed to do in a Docker
+ # container. We do not bother calling setrlimit() because containers have
+ # their own limits.
+ print_line(
+ b"setup",
+ b"running as %s:%s\n"
+ % (args.user.encode("utf-8"), args.group.encode("utf-8")),
+ )
+
+ os.setgroups(gids)
+ os.umask(0o22)
+ os.setresgid(gid, gid, gid)
+ os.setresuid(uid, uid, uid)
+
+ for repo in repositories:
+ vcs_checkout_from_args(repo)
+
+ resource_process = None
+
+ try:
+ for k in ["MOZ_FETCHES_DIR", "UPLOAD_DIR"] + [
+ "{}_PATH".format(repository["project"].upper())
+ for repository in repositories
+ ]:
+ if k in os.environ:
+ os.environ[k] = os.path.abspath(os.environ[k])
+ print_line(
+ b"setup",
+ b"%s is %s\n" % (k.encode("utf-8"), os.environ[k].encode("utf-8")),
+ )
+
+ if "MOZ_FETCHES" in os.environ:
+ fetch_artifacts()
+
+ # Install Python requirements after fetches in case tasks want to use
+ # fetches to grab dependencies.
+ install_pip_requirements(repositories)
+
+ resource_process = maybe_run_resource_monitoring()
+
+ return run_command(b"task", task_args, cwd=args.task_cwd)
+ finally:
+ if resource_process:
+ print_line(b"resource_monitor", b"terminating\n")
+ if IS_WINDOWS:
+ # .terminate() on Windows is not a graceful shutdown, due to
+ # differences in signals. CTRL_BREAK_EVENT will work provided
+ # the subprocess is in a different process group, so this script
+ # isn't also killed.
+ os.kill(resource_process.pid, signal.CTRL_BREAK_EVENT)
+ else:
+ resource_process.terminate()
+ resource_process.wait()
+ fetches_dir = os.environ.get("MOZ_FETCHES_DIR")
+ if fetches_dir and os.path.isdir(fetches_dir):
+ print_line(b"fetches", b"removing %s\n" % fetches_dir.encode("utf-8"))
+ remove(fetches_dir)
+ print_line(b"fetches", b"finished\n")
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py
new file mode 100644
index 0000000000..1119a1c960
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py
@@ -0,0 +1,107 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from taskgraph.util.attributes import (
+ match_run_on_git_branches,
+ match_run_on_projects,
+ match_run_on_tasks_for,
+)
+
+_target_task_methods = {}
+
+_GIT_REFS_HEADS_PREFIX = "refs/heads/"
+
+
+def _target_task(name):
+ def wrap(func):
+ _target_task_methods[name] = func
+ return func
+
+ return wrap
+
+
+def get_method(method):
+ """Get a target_task_method to pass to a TaskGraphGenerator."""
+ return _target_task_methods[method]
+
+
+def filter_out_cron(task, parameters):
+ """
+ Filter out tasks that run via cron.
+ """
+ return not task.attributes.get("cron")
+
+
+def filter_for_project(task, parameters):
+ """Filter tasks by project. Optionally enable nightlies."""
+ run_on_projects = set(task.attributes.get("run_on_projects", []))
+ return match_run_on_projects(parameters["project"], run_on_projects)
+
+
+def filter_for_tasks_for(task, parameters):
+ run_on_tasks_for = set(task.attributes.get("run_on_tasks_for", ["all"]))
+ return match_run_on_tasks_for(parameters["tasks_for"], run_on_tasks_for)
+
+
+def filter_for_git_branch(task, parameters):
+ """Filter tasks by git branch.
+ If `run_on_git_branch` is not defined, then task runs on all branches"""
+ # We cannot filter out on git branches if we not on a git repository
+ if parameters.get("repository_type") != "git":
+ return True
+
+ # Pull requests usually have arbitrary names, let's not filter git branches on them.
+ if parameters["tasks_for"] == "github-pull-request":
+ return True
+
+ run_on_git_branches = set(task.attributes.get("run_on_git_branches", ["all"]))
+ git_branch = parameters["head_ref"]
+ if git_branch.startswith(_GIT_REFS_HEADS_PREFIX):
+ git_branch = git_branch[len(_GIT_REFS_HEADS_PREFIX) :]
+
+ return match_run_on_git_branches(git_branch, run_on_git_branches)
+
+
+def filter_out_shipping_phase(task, parameters):
+ return task.attributes.get("shipping_phase") in (None, "build")
+
+
+def standard_filter(task, parameters):
+ return all(
+ filter_func(task, parameters)
+ for filter_func in (
+ filter_out_cron,
+ filter_out_shipping_phase,
+ filter_for_project,
+ filter_for_tasks_for,
+ filter_for_git_branch,
+ )
+ )
+
+
+@_target_task("default")
+def target_tasks_default(full_task_graph, parameters, graph_config):
+ """Target the tasks which have indicated they should be run on this project
+ via the `run_on_projects` attributes."""
+ return [
+ l for l, t in full_task_graph.tasks.items() if standard_filter(t, parameters)
+ ]
+
+
+@_target_task("codereview")
+def target_tasks_codereview(full_task_graph, parameters, graph_config):
+ """Target the tasks which have indicated they should be run on this project
+ via the `run_on_projects` attributes."""
+ return [
+ l
+ for l, t in full_task_graph.tasks.items()
+ if standard_filter(t, parameters) and t.attributes.get("code-review")
+ ]
+
+
+@_target_task("nothing")
+def target_tasks_nothing(full_task_graph, parameters, graph_config):
+ """Select nothing, for DONTBUILD pushes"""
+ return []
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/task.py b/third_party/python/taskcluster_taskgraph/taskgraph/task.py
new file mode 100644
index 0000000000..a38a52b38e
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/task.py
@@ -0,0 +1,84 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import attr
+
+
+@attr.s
+class Task:
+ """
+ Representation of a task in a TaskGraph. Each Task has, at creation:
+
+ - kind: the name of the task kind
+ - label; the label for this task
+ - attributes: a dictionary of attributes for this task (used for filtering)
+ - task: the task definition (JSON-able dictionary)
+ - optimization: optimization to apply to the task (see taskgraph.optimize)
+ - dependencies: tasks this one depends on, in the form {name: label}, for example
+ {'build': 'build-linux64/opt', 'docker-image': 'build-docker-image-desktop-test'}
+ - soft_dependencies: tasks this one may depend on if they are available post
+ optimisation. They are set as a list of tasks label.
+ - if_dependencies: only run this task if at least one of these dependencies
+ are present.
+
+ And later, as the task-graph processing proceeds:
+
+ - task_id -- TaskCluster taskId under which this task will be created
+
+ This class is just a convenience wrapper for the data type and managing
+ display, comparison, serialization, etc. It has no functionality of its own.
+ """
+
+ kind = attr.ib()
+ label = attr.ib()
+ attributes = attr.ib()
+ task = attr.ib()
+ description = attr.ib(default="")
+ task_id = attr.ib(default=None, init=False)
+ optimization = attr.ib(default=None)
+ dependencies = attr.ib(factory=dict)
+ soft_dependencies = attr.ib(factory=list)
+ if_dependencies = attr.ib(factory=list)
+
+ def __attrs_post_init__(self):
+ self.attributes["kind"] = self.kind
+
+ def to_json(self):
+ rv = {
+ "kind": self.kind,
+ "label": self.label,
+ "description": self.description,
+ "attributes": self.attributes,
+ "dependencies": self.dependencies,
+ "soft_dependencies": self.soft_dependencies,
+ "if_dependencies": self.if_dependencies,
+ "optimization": self.optimization,
+ "task": self.task,
+ }
+ if self.task_id:
+ rv["task_id"] = self.task_id
+ return rv
+
+ @classmethod
+ def from_json(cls, task_dict):
+ """
+ Given a data structure as produced by taskgraph.to_json, re-construct
+ the original Task object. This is used to "resume" the task-graph
+ generation process, for example in Action tasks.
+ """
+ rv = cls(
+ kind=task_dict["kind"],
+ label=task_dict["label"],
+ description=task_dict.get("description", ""),
+ attributes=task_dict["attributes"],
+ task=task_dict["task"],
+ optimization=task_dict["optimization"],
+ dependencies=task_dict.get("dependencies"),
+ soft_dependencies=task_dict.get("soft_dependencies"),
+ if_dependencies=task_dict.get("if_dependencies"),
+ )
+ if "task_id" in task_dict:
+ rv.task_id = task_dict["task_id"]
+ return rv
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/taskgraph.py b/third_party/python/taskcluster_taskgraph/taskgraph/taskgraph.py
new file mode 100644
index 0000000000..158cfb861c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/taskgraph.py
@@ -0,0 +1,72 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import attr
+
+from .graph import Graph
+from .task import Task
+
+
+@attr.s(frozen=True)
+class TaskGraph:
+ """
+ Representation of a task graph.
+
+ A task graph is a combination of a Graph and a dictionary of tasks indexed
+ by label. TaskGraph instances should be treated as immutable.
+
+ In the graph, tasks are said to "link to" their dependencies. Whereas
+ tasks are "linked from" their dependents.
+ """
+
+ tasks = attr.ib()
+ graph = attr.ib()
+
+ def __attrs_post_init__(self):
+ assert set(self.tasks) == self.graph.nodes
+
+ def for_each_task(self, f, *args, **kwargs):
+ for task_label in self.graph.visit_postorder():
+ task = self.tasks[task_label]
+ f(task, self, *args, **kwargs)
+
+ def __getitem__(self, label):
+ "Get a task by label"
+ return self.tasks[label]
+
+ def __contains__(self, label):
+ return label in self.tasks
+
+ def __iter__(self):
+ "Iterate over tasks in undefined order"
+ return iter(self.tasks.values())
+
+ def to_json(self):
+ "Return a JSON-able object representing the task graph, as documented"
+ named_links_dict = self.graph.named_links_dict()
+ # this dictionary may be keyed by label or by taskid, so let's just call it 'key'
+ tasks = {}
+ for key in self.graph.visit_postorder():
+ tasks[key] = self.tasks[key].to_json()
+ # overwrite dependencies with the information in the taskgraph's edges.
+ tasks[key]["dependencies"] = named_links_dict.get(key, {})
+ return tasks
+
+ @classmethod
+ def from_json(cls, tasks_dict):
+ """
+ This code is used to generate the a TaskGraph using a dictionary
+ which is representative of the TaskGraph.
+ """
+ tasks = {}
+ edges = set()
+ for key, value in tasks_dict.items():
+ tasks[key] = Task.from_json(value)
+ if "task_id" in value:
+ tasks[key].task_id = value["task_id"]
+ for depname, dep in value["dependencies"].items():
+ edges.add((key, dep, depname))
+ task_graph = cls(tasks, Graph(set(tasks), edges))
+ return tasks, task_graph
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py
new file mode 100644
index 0000000000..383e6a4798
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py
@@ -0,0 +1,157 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+from typing import AnyStr
+
+import attr
+
+from ..config import GraphConfig
+from ..parameters import Parameters
+from ..util.memoize import memoize
+from ..util.schema import Schema, validate_schema
+
+
+@attr.s(frozen=True)
+class RepoConfig:
+ prefix = attr.ib(type=str)
+ name = attr.ib(type=str)
+ base_repository = attr.ib(type=str)
+ head_repository = attr.ib(type=str)
+ head_ref = attr.ib(type=str)
+ type = attr.ib(type=str)
+ path = attr.ib(type=str, default="")
+ head_rev = attr.ib(type=str, default=None)
+ ssh_secret_name = attr.ib(type=str, default=None)
+
+
+@attr.s(frozen=True, cmp=False)
+class TransformConfig:
+ """
+ A container for configuration affecting transforms. The `config` argument
+ to transforms is an instance of this class.
+ """
+
+ # the name of the current kind
+ kind = attr.ib()
+
+ # the path to the kind configuration directory
+ path = attr.ib(type=AnyStr)
+
+ # the parsed contents of kind.yml
+ config = attr.ib(type=dict)
+
+ # the parameters for this task-graph generation run
+ params = attr.ib(type=Parameters)
+
+ # a dict of all the tasks associated with the kind dependencies of the
+ # current kind
+ kind_dependencies_tasks = attr.ib(type=dict)
+
+ # Global configuration of the taskgraph
+ graph_config = attr.ib(type=GraphConfig)
+
+ # whether to write out artifacts for the decision task
+ write_artifacts = attr.ib(type=bool)
+
+ @property
+ @memoize
+ def repo_configs(self):
+ repositories = self.graph_config["taskgraph"]["repositories"]
+ if len(repositories) == 1:
+ current_prefix = list(repositories.keys())[0]
+ else:
+ project = self.params["project"]
+ matching_repos = {
+ repo_prefix: repo
+ for (repo_prefix, repo) in repositories.items()
+ if re.match(repo["project-regex"], project)
+ }
+ if len(matching_repos) != 1:
+ raise Exception(
+ f"Couldn't find repository matching project `{project}`"
+ )
+ current_prefix = list(matching_repos.keys())[0]
+
+ repo_configs = {
+ current_prefix: RepoConfig(
+ prefix=current_prefix,
+ name=repositories[current_prefix]["name"],
+ base_repository=self.params["base_repository"],
+ head_repository=self.params["head_repository"],
+ head_ref=self.params["head_ref"],
+ head_rev=self.params["head_rev"],
+ type=self.params["repository_type"],
+ ssh_secret_name=repositories[current_prefix].get("ssh-secret-name"),
+ ),
+ }
+ if len(repositories) != 1:
+ repo_configs.update(
+ {
+ repo_prefix: RepoConfig(
+ prefix=repo_prefix,
+ name=repo["name"],
+ base_repository=repo["default-repository"],
+ head_repository=repo["default-repository"],
+ head_ref=repo["default-ref"],
+ type=repo["type"],
+ ssh_secret_name=repo.get("ssh-secret-name"),
+ )
+ for (repo_prefix, repo) in repositories.items()
+ if repo_prefix != current_prefix
+ }
+ )
+ return repo_configs
+
+
+@attr.s()
+class TransformSequence:
+ """
+ Container for a sequence of transforms. Each transform is represented as a
+ callable taking (config, items) and returning a generator which will yield
+ transformed items. The resulting sequence has the same interface.
+
+ This is convenient to use in a file full of transforms, as it provides a
+ decorator, @transforms.add, that will add the decorated function to the
+ sequence.
+ """
+
+ _transforms = attr.ib(factory=list)
+
+ def __call__(self, config, items):
+ for xform in self._transforms:
+ items = xform(config, items)
+ if items is None:
+ raise Exception(f"Transform {xform} is not a generator")
+ return items
+
+ def add(self, func):
+ self._transforms.append(func)
+ return func
+
+ def add_validate(self, schema):
+ self.add(ValidateSchema(schema))
+
+
+@attr.s
+class ValidateSchema:
+ schema = attr.ib(type=Schema)
+
+ def __call__(self, config, tasks):
+ for task in tasks:
+ if "name" in task:
+ error = "In {kind} kind task {name!r}:".format(
+ kind=config.kind, name=task["name"]
+ )
+ elif "label" in task:
+ error = "In job {label!r}:".format(label=task["label"])
+ elif "primary-dependency" in task:
+ error = "In {kind} kind task for {dependency!r}:".format(
+ kind=config.kind, dependency=task["primary-dependency"].label
+ )
+ else:
+ error = "In unknown task:"
+ validate_schema(self.schema, task, error)
+ yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py
new file mode 100644
index 0000000000..57a55dffb3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py
@@ -0,0 +1,90 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from collections import deque
+
+import taskgraph
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.cached_tasks import add_optimization
+
+transforms = TransformSequence()
+
+
+def order_tasks(config, tasks):
+ """Iterate image tasks in an order where parent tasks come first."""
+ if config.kind == "docker-image":
+ kind_prefix = "build-docker-image-"
+ else:
+ kind_prefix = config.kind + "-"
+
+ pending = deque(tasks)
+ task_labels = {task["label"] for task in pending}
+ emitted = set()
+ while True:
+ try:
+ task = pending.popleft()
+ except IndexError:
+ break
+ parents = {
+ task
+ for task in task.get("dependencies", {}).values()
+ if task.startswith(kind_prefix)
+ }
+ if parents and not emitted.issuperset(parents & task_labels):
+ pending.append(task)
+ continue
+ emitted.add(task["label"])
+ yield task
+
+
+def format_task_digest(cached_task):
+ return "/".join(
+ [
+ cached_task["type"],
+ cached_task["name"],
+ cached_task["digest"],
+ ]
+ )
+
+
+@transforms.add
+def cache_task(config, tasks):
+ if taskgraph.fast:
+ for task in tasks:
+ yield task
+ return
+
+ digests = {}
+ for task in config.kind_dependencies_tasks.values():
+ if "cached_task" in task.attributes:
+ digests[task.label] = format_task_digest(task.attributes["cached_task"])
+
+ for task in order_tasks(config, tasks):
+ cache = task.pop("cache", None)
+ if cache is None:
+ yield task
+ continue
+
+ dependency_digests = []
+ for p in task.get("dependencies", {}).values():
+ if p in digests:
+ dependency_digests.append(digests[p])
+ else:
+ raise Exception(
+ "Cached task {} has uncached parent task: {}".format(
+ task["label"], p
+ )
+ )
+ digest_data = cache["digest-data"] + sorted(dependency_digests)
+ add_optimization(
+ config,
+ task,
+ cache_type=cache["type"],
+ cache_name=cache["name"],
+ digest_data=digest_data,
+ )
+ digests[task["label"]] = format_task_digest(task["attributes"]["cached_task"])
+
+ yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py
new file mode 100644
index 0000000000..bdb655b97d
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py
@@ -0,0 +1,23 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Add soft dependencies and configuration to code-review tasks.
+"""
+
+
+from taskgraph.transforms.base import TransformSequence
+
+transforms = TransformSequence()
+
+
+@transforms.add
+def add_dependencies(config, jobs):
+ for job in jobs:
+ job.setdefault("soft-dependencies", [])
+ job["soft-dependencies"] += [
+ dep_task.label
+ for dep_task in config.kind_dependencies_tasks.values()
+ if dep_task.attributes.get("code-review") is True
+ ]
+ yield job
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py
new file mode 100644
index 0000000000..dd7c01e5a9
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py
@@ -0,0 +1,213 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import json
+import logging
+import os
+import re
+
+from voluptuous import Optional, Required
+
+import taskgraph
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.docker import create_context_tar, generate_context_hash
+from taskgraph.util.schema import Schema
+
+from .task import task_description_schema
+
+logger = logging.getLogger(__name__)
+
+CONTEXTS_DIR = "docker-contexts"
+
+DIGEST_RE = re.compile("^[0-9a-f]{64}$")
+
+IMAGE_BUILDER_IMAGE = (
+ "taskcluster/image_builder:4.0.0"
+ "@sha256:"
+ "866c304445334703b68653e1390816012c9e6bdabfbd1906842b5b229e8ed044"
+)
+
+transforms = TransformSequence()
+
+docker_image_schema = Schema(
+ {
+ # Name of the docker image.
+ Required("name"): str,
+ # Name of the parent docker image.
+ Optional("parent"): str,
+ # Treeherder symbol.
+ Optional("symbol"): str,
+ # relative path (from config.path) to the file the docker image was defined
+ # in.
+ Optional("task-from"): str,
+ # Arguments to use for the Dockerfile.
+ Optional("args"): {str: str},
+ # Name of the docker image definition under taskcluster/docker, when
+ # different from the docker image name.
+ Optional("definition"): str,
+ # List of package tasks this docker image depends on.
+ Optional("packages"): [str],
+ Optional(
+ "index",
+ description="information for indexing this build so its artifacts can be discovered",
+ ): task_description_schema["index"],
+ Optional(
+ "cache",
+ description="Whether this image should be cached based on inputs.",
+ ): bool,
+ }
+)
+
+
+transforms.add_validate(docker_image_schema)
+
+
+@transforms.add
+def fill_template(config, tasks):
+ available_packages = set()
+ for task in config.kind_dependencies_tasks.values():
+ if task.kind != "packages":
+ continue
+ name = task.label.replace("packages-", "")
+ available_packages.add(name)
+
+ context_hashes = {}
+
+ tasks = list(tasks)
+
+ if not taskgraph.fast and config.write_artifacts:
+ if not os.path.isdir(CONTEXTS_DIR):
+ os.makedirs(CONTEXTS_DIR)
+
+ for task in tasks:
+ image_name = task.pop("name")
+ job_symbol = task.pop("symbol", None)
+ args = task.pop("args", {})
+ definition = task.pop("definition", image_name)
+ packages = task.pop("packages", [])
+ parent = task.pop("parent", None)
+
+ for p in packages:
+ if p not in available_packages:
+ raise Exception(
+ "Missing package job for {}-{}: {}".format(
+ config.kind, image_name, p
+ )
+ )
+
+ if not taskgraph.fast:
+ context_path = os.path.join("taskcluster", "docker", definition)
+ topsrcdir = os.path.dirname(config.graph_config.taskcluster_yml)
+ if config.write_artifacts:
+ context_file = os.path.join(CONTEXTS_DIR, f"{image_name}.tar.gz")
+ logger.info(f"Writing {context_file} for docker image {image_name}")
+ context_hash = create_context_tar(
+ topsrcdir,
+ context_path,
+ context_file,
+ args,
+ )
+ else:
+ context_hash = generate_context_hash(topsrcdir, context_path, args)
+ else:
+ if config.write_artifacts:
+ raise Exception("Can't write artifacts if `taskgraph.fast` is set.")
+ context_hash = "0" * 40
+ digest_data = [context_hash]
+ digest_data += [json.dumps(args, sort_keys=True)]
+ context_hashes[image_name] = context_hash
+
+ description = "Build the docker image {} for use by dependent tasks".format(
+ image_name
+ )
+
+ args["DOCKER_IMAGE_PACKAGES"] = " ".join(f"<{p}>" for p in packages)
+
+ # Adjust the zstandard compression level based on the execution level.
+ # We use faster compression for level 1 because we care more about
+ # end-to-end times. We use slower/better compression for other levels
+ # because images are read more often and it is worth the trade-off to
+ # burn more CPU once to reduce image size.
+ zstd_level = "3" if int(config.params["level"]) == 1 else "10"
+
+ # include some information that is useful in reconstructing this task
+ # from JSON
+ taskdesc = {
+ "label": "build-docker-image-" + image_name,
+ "description": description,
+ "attributes": {
+ "image_name": image_name,
+ "artifact_prefix": "public",
+ },
+ "expires-after": "28 days" if config.params.is_try() else "1 year",
+ "scopes": [],
+ "run-on-projects": [],
+ "worker-type": "images",
+ "worker": {
+ "implementation": "docker-worker",
+ "os": "linux",
+ "artifacts": [
+ {
+ "type": "file",
+ "path": "/workspace/image.tar.zst",
+ "name": "public/image.tar.zst",
+ }
+ ],
+ "env": {
+ "CONTEXT_TASK_ID": {"task-reference": "<decision>"},
+ "CONTEXT_PATH": "public/docker-contexts/{}.tar.gz".format(
+ image_name
+ ),
+ "HASH": context_hash,
+ "PROJECT": config.params["project"],
+ "IMAGE_NAME": image_name,
+ "DOCKER_IMAGE_ZSTD_LEVEL": zstd_level,
+ "DOCKER_BUILD_ARGS": {
+ "task-reference": json.dumps(args),
+ },
+ "VCS_BASE_REPOSITORY": config.params["base_repository"],
+ "VCS_HEAD_REPOSITORY": config.params["head_repository"],
+ "VCS_HEAD_REV": config.params["head_rev"],
+ "VCS_REPOSITORY_TYPE": config.params["repository_type"],
+ },
+ "chain-of-trust": True,
+ "max-run-time": 7200,
+ },
+ }
+ if "index" in task:
+ taskdesc["index"] = task["index"]
+ if job_symbol:
+ taskdesc["treeherder"] = {
+ "symbol": job_symbol,
+ "platform": "taskcluster-images/opt",
+ "kind": "other",
+ "tier": 1,
+ }
+
+ worker = taskdesc["worker"]
+
+ worker["docker-image"] = IMAGE_BUILDER_IMAGE
+ digest_data.append(f"image-builder-image:{IMAGE_BUILDER_IMAGE}")
+
+ if packages:
+ deps = taskdesc.setdefault("dependencies", {})
+ for p in sorted(packages):
+ deps[p] = f"packages-{p}"
+
+ if parent:
+ deps = taskdesc.setdefault("dependencies", {})
+ deps["parent"] = f"build-docker-image-{parent}"
+ worker["env"]["PARENT_TASK_ID"] = {
+ "task-reference": "<parent>",
+ }
+
+ if task.get("cache", True) and not taskgraph.fast:
+ taskdesc["cache"] = {
+ "type": "docker-images.v2",
+ "name": image_name,
+ "digest-data": digest_data,
+ }
+
+ yield taskdesc
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py
new file mode 100644
index 0000000000..65d4b62482
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py
@@ -0,0 +1,335 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Support for running tasks that download remote content and re-export
+# it as task artifacts.
+
+
+import os
+import re
+
+import attr
+from voluptuous import Extra, Optional, Required
+
+import taskgraph
+
+from ..util import path
+from ..util.cached_tasks import add_optimization
+from ..util.schema import Schema, validate_schema
+from ..util.treeherder import join_symbol
+from .base import TransformSequence
+
+CACHE_TYPE = "content.v1"
+
+FETCH_SCHEMA = Schema(
+ {
+ # Name of the task.
+ Required("name"): str,
+ # Relative path (from config.path) to the file the task was defined
+ # in.
+ Optional("task-from"): str,
+ # Description of the task.
+ Required("description"): str,
+ Optional("docker-image"): object,
+ Optional(
+ "fetch-alias",
+ description="An alias that can be used instead of the real fetch job name in "
+ "fetch stanzas for jobs.",
+ ): str,
+ Optional(
+ "artifact-prefix",
+ description="The prefix of the taskcluster artifact being uploaded. "
+ "Defaults to `public/`; if it starts with something other than "
+ "`public/` the artifact will require scopes to access.",
+ ): str,
+ Optional("attributes"): {str: object},
+ Required("fetch"): {
+ Required("type"): str,
+ Extra: object,
+ },
+ }
+)
+
+
+# define a collection of payload builders, depending on the worker implementation
+fetch_builders = {}
+
+
+@attr.s(frozen=True)
+class FetchBuilder:
+ schema = attr.ib(type=Schema)
+ builder = attr.ib()
+
+
+def fetch_builder(name, schema):
+ schema = Schema({Required("type"): name}).extend(schema)
+
+ def wrap(func):
+ fetch_builders[name] = FetchBuilder(schema, func)
+ return func
+
+ return wrap
+
+
+transforms = TransformSequence()
+transforms.add_validate(FETCH_SCHEMA)
+
+
+@transforms.add
+def process_fetch_job(config, jobs):
+ # Converts fetch-url entries to the job schema.
+ for job in jobs:
+ typ = job["fetch"]["type"]
+ name = job["name"]
+ fetch = job.pop("fetch")
+
+ if typ not in fetch_builders:
+ raise Exception(f"Unknown fetch type {typ} in fetch {name}")
+ validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:")
+
+ job.update(configure_fetch(config, typ, name, fetch))
+
+ yield job
+
+
+def configure_fetch(config, typ, name, fetch):
+ if typ not in fetch_builders:
+ raise Exception(f"No fetch type {typ} in fetch {name}")
+ validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:")
+
+ return fetch_builders[typ].builder(config, name, fetch)
+
+
+@transforms.add
+def make_task(config, jobs):
+ # Fetch tasks are idempotent and immutable. Have them live for
+ # essentially forever.
+ if config.params["level"] == "3":
+ expires = "1000 years"
+ else:
+ expires = "28 days"
+
+ for job in jobs:
+ name = job["name"]
+ artifact_prefix = job.get("artifact-prefix", "public")
+ env = job.get("env", {})
+ env.update({"UPLOAD_DIR": "/builds/worker/artifacts"})
+ attributes = job.get("attributes", {})
+ attributes["fetch-artifact"] = path.join(artifact_prefix, job["artifact_name"])
+ alias = job.get("fetch-alias")
+ if alias:
+ attributes["fetch-alias"] = alias
+
+ task = {
+ "attributes": attributes,
+ "name": name,
+ "description": job["description"],
+ "expires-after": expires,
+ "label": "fetch-%s" % name,
+ "run-on-projects": [],
+ "run": {
+ "using": "run-task",
+ "checkout": False,
+ "command": job["command"],
+ },
+ "worker-type": "images",
+ "worker": {
+ "chain-of-trust": True,
+ "docker-image": job.get("docker-image", {"in-tree": "fetch"}),
+ "env": env,
+ "max-run-time": 900,
+ "artifacts": [
+ {
+ "type": "directory",
+ "name": artifact_prefix,
+ "path": "/builds/worker/artifacts",
+ }
+ ],
+ },
+ }
+
+ if "treeherder" in config.graph_config:
+ task["treeherder"] = {
+ "symbol": join_symbol("Fetch", name),
+ "kind": "build",
+ "platform": "fetch/opt",
+ "tier": 1,
+ }
+
+ if job.get("secret", None):
+ task["scopes"] = ["secrets:get:" + job.get("secret")]
+ task["worker"]["taskcluster-proxy"] = True
+
+ if not taskgraph.fast:
+ cache_name = task["label"].replace(f"{config.kind}-", "", 1)
+
+ # This adds the level to the index path automatically.
+ add_optimization(
+ config,
+ task,
+ cache_type=CACHE_TYPE,
+ cache_name=cache_name,
+ digest_data=job["digest_data"],
+ )
+ yield task
+
+
+@fetch_builder(
+ "static-url",
+ schema={
+ # The URL to download.
+ Required("url"): str,
+ # The SHA-256 of the downloaded content.
+ Required("sha256"): str,
+ # Size of the downloaded entity, in bytes.
+ Required("size"): int,
+ # GPG signature verification.
+ Optional("gpg-signature"): {
+ # URL where GPG signature document can be obtained. Can contain the
+ # value ``{url}``, which will be substituted with the value from
+ # ``url``.
+ Required("sig-url"): str,
+ # Path to file containing GPG public key(s) used to validate
+ # download.
+ Required("key-path"): str,
+ },
+ # The name to give to the generated artifact. Defaults to the file
+ # portion of the URL. Using a different extension converts the
+ # archive to the given type. Only conversion to .tar.zst is
+ # supported.
+ Optional("artifact-name"): str,
+ # Strip the given number of path components at the beginning of
+ # each file entry in the archive.
+ # Requires an artifact-name ending with .tar.zst.
+ Optional("strip-components"): int,
+ # Add the given prefix to each file entry in the archive.
+ # Requires an artifact-name ending with .tar.zst.
+ Optional("add-prefix"): str,
+ # Headers to pass alongside the request.
+ Optional("headers"): {
+ str: str,
+ },
+ # IMPORTANT: when adding anything that changes the behavior of the task,
+ # it is important to update the digest data used to compute cache hits.
+ },
+)
+def create_fetch_url_task(config, name, fetch):
+ artifact_name = fetch.get("artifact-name")
+ if not artifact_name:
+ artifact_name = fetch["url"].split("/")[-1]
+
+ command = [
+ "fetch-content",
+ "static-url",
+ ]
+
+ # Arguments that matter to the cache digest
+ args = [
+ "--sha256",
+ fetch["sha256"],
+ "--size",
+ "%d" % fetch["size"],
+ ]
+
+ if fetch.get("strip-components"):
+ args.extend(["--strip-components", "%d" % fetch["strip-components"]])
+
+ if fetch.get("add-prefix"):
+ args.extend(["--add-prefix", fetch["add-prefix"]])
+
+ command.extend(args)
+
+ env = {}
+
+ if "gpg-signature" in fetch:
+ sig_url = fetch["gpg-signature"]["sig-url"].format(url=fetch["url"])
+ key_path = os.path.join(taskgraph.GECKO, fetch["gpg-signature"]["key-path"])
+
+ with open(key_path) as fh:
+ gpg_key = fh.read()
+
+ env["FETCH_GPG_KEY"] = gpg_key
+ command.extend(
+ [
+ "--gpg-sig-url",
+ sig_url,
+ "--gpg-key-env",
+ "FETCH_GPG_KEY",
+ ]
+ )
+
+ if "headers" in fetch:
+ for k, v in fetch["headers"].items():
+ command.extend(["-H", f"{k}:{v}"])
+
+ command.extend(
+ [
+ fetch["url"],
+ "/builds/worker/artifacts/%s" % artifact_name,
+ ]
+ )
+
+ return {
+ "command": command,
+ "artifact_name": artifact_name,
+ "env": env,
+ # We don't include the GPG signature in the digest because it isn't
+ # materially important for caching: GPG signatures are supplemental
+ # trust checking beyond what the shasum already provides.
+ "digest_data": args + [artifact_name],
+ }
+
+
+@fetch_builder(
+ "git",
+ schema={
+ Required("repo"): str,
+ Required("revision"): str,
+ Optional("include-dot-git"): bool,
+ Optional("artifact-name"): str,
+ Optional("path-prefix"): str,
+ # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key)
+ # In the secret dictionary, the key should be specified as
+ # "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..."
+ # n.b. The OpenSSH private key file format requires a newline at the end of the file.
+ Optional("ssh-key"): str,
+ },
+)
+def create_git_fetch_task(config, name, fetch):
+ path_prefix = fetch.get("path-prefix")
+ if not path_prefix:
+ path_prefix = fetch["repo"].rstrip("/").rsplit("/", 1)[-1]
+ artifact_name = fetch.get("artifact-name")
+ if not artifact_name:
+ artifact_name = f"{path_prefix}.tar.zst"
+
+ if not re.match(r"[0-9a-fA-F]{40}", fetch["revision"]):
+ raise Exception(f'Revision is not a sha1 in fetch task "{name}"')
+
+ args = [
+ "fetch-content",
+ "git-checkout-archive",
+ "--path-prefix",
+ path_prefix,
+ fetch["repo"],
+ fetch["revision"],
+ "/builds/worker/artifacts/%s" % artifact_name,
+ ]
+
+ ssh_key = fetch.get("ssh-key")
+ if ssh_key:
+ args.append("--ssh-key-secret")
+ args.append(ssh_key)
+
+ digest_data = [fetch["revision"], path_prefix, artifact_name]
+ if fetch.get("include-dot-git", False):
+ args.append("--include-dot-git")
+ digest_data.append(".git")
+
+ return {
+ "command": args,
+ "artifact_name": artifact_name,
+ "digest_data": digest_data,
+ "secret": ssh_key,
+ }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py
new file mode 100644
index 0000000000..cc2615b702
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py
@@ -0,0 +1,438 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Convert a job description into a task description.
+
+Jobs descriptions are similar to task descriptions, but they specify how to run
+the job at a higher level, using a "run" field that can be interpreted by
+run-using handlers in `taskcluster/taskgraph/transforms/job`.
+"""
+
+
+import copy
+import json
+import logging
+
+from voluptuous import Any, Exclusive, Extra, Optional, Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.transforms.cached_tasks import order_tasks
+from taskgraph.transforms.task import task_description_schema
+from taskgraph.util import path as mozpath
+from taskgraph.util.python_path import import_sibling_modules
+from taskgraph.util.schema import Schema, validate_schema
+from taskgraph.util.taskcluster import get_artifact_prefix
+from taskgraph.util.workertypes import worker_type_implementation
+
+logger = logging.getLogger(__name__)
+
+# Schema for a build description
+job_description_schema = Schema(
+ {
+ # The name of the job and the job's label. At least one must be specified,
+ # and the label will be generated from the name if necessary, by prepending
+ # the kind.
+ Optional("name"): str,
+ Optional("label"): str,
+ # the following fields are passed directly through to the task description,
+ # possibly modified by the run implementation. See
+ # taskcluster/taskgraph/transforms/task.py for the schema details.
+ Required("description"): task_description_schema["description"],
+ Optional("attributes"): task_description_schema["attributes"],
+ Optional("task-from"): task_description_schema["task-from"],
+ Optional("dependencies"): task_description_schema["dependencies"],
+ Optional("soft-dependencies"): task_description_schema["soft-dependencies"],
+ Optional("if-dependencies"): task_description_schema["if-dependencies"],
+ Optional("requires"): task_description_schema["requires"],
+ Optional("expires-after"): task_description_schema["expires-after"],
+ Optional("routes"): task_description_schema["routes"],
+ Optional("scopes"): task_description_schema["scopes"],
+ Optional("tags"): task_description_schema["tags"],
+ Optional("extra"): task_description_schema["extra"],
+ Optional("treeherder"): task_description_schema["treeherder"],
+ Optional("index"): task_description_schema["index"],
+ Optional("run-on-projects"): task_description_schema["run-on-projects"],
+ Optional("run-on-tasks-for"): task_description_schema["run-on-tasks-for"],
+ Optional("run-on-git-branches"): task_description_schema["run-on-git-branches"],
+ Optional("always-target"): task_description_schema["always-target"],
+ Exclusive("optimization", "optimization"): task_description_schema[
+ "optimization"
+ ],
+ Optional("needs-sccache"): task_description_schema["needs-sccache"],
+ # The "when" section contains descriptions of the circumstances under which
+ # this task should be included in the task graph. This will be converted
+ # into an optimization, so it cannot be specified in a job description that
+ # also gives 'optimization'.
+ Exclusive("when", "optimization"): {
+ # This task only needs to be run if a file matching one of the given
+ # patterns has changed in the push. The patterns use the mozpack
+ # match function (python/mozbuild/mozpack/path.py).
+ Optional("files-changed"): [str],
+ },
+ # A list of artifacts to install from 'fetch' tasks.
+ Optional("fetches"): {
+ Any("toolchain", "fetch"): [str],
+ str: [
+ str,
+ {
+ Required("artifact"): str,
+ Optional("dest"): str,
+ Optional("extract"): bool,
+ Optional("verify-hash"): bool,
+ },
+ ],
+ },
+ # A description of how to run this job.
+ "run": {
+ # The key to a job implementation in a peer module to this one
+ "using": str,
+ # Base work directory used to set up the task.
+ Optional("workdir"): str,
+ # Any remaining content is verified against that job implementation's
+ # own schema.
+ Extra: object,
+ },
+ Required("worker-type"): task_description_schema["worker-type"],
+ # This object will be passed through to the task description, with additions
+ # provided by the job's run-using function
+ Optional("worker"): dict,
+ }
+)
+
+transforms = TransformSequence()
+transforms.add_validate(job_description_schema)
+
+
+@transforms.add
+def rewrite_when_to_optimization(config, jobs):
+ for job in jobs:
+ when = job.pop("when", {})
+ if not when:
+ yield job
+ continue
+
+ files_changed = when.get("files-changed")
+
+ # implicitly add task config directory.
+ files_changed.append(f"{config.path}/**")
+
+ # "only when files changed" implies "skip if files have not changed"
+ job["optimization"] = {"skip-unless-changed": files_changed}
+
+ assert "when" not in job
+ yield job
+
+
+@transforms.add
+def set_implementation(config, jobs):
+ for job in jobs:
+ impl, os = worker_type_implementation(config.graph_config, job["worker-type"])
+ if os:
+ job.setdefault("tags", {})["os"] = os
+ if impl:
+ job.setdefault("tags", {})["worker-implementation"] = impl
+ worker = job.setdefault("worker", {})
+ assert "implementation" not in worker
+ worker["implementation"] = impl
+ if os:
+ worker["os"] = os
+ yield job
+
+
+@transforms.add
+def set_label(config, jobs):
+ for job in jobs:
+ if "label" not in job:
+ if "name" not in job:
+ raise Exception("job has neither a name nor a label")
+ job["label"] = "{}-{}".format(config.kind, job["name"])
+ if job.get("name"):
+ del job["name"]
+ yield job
+
+
+@transforms.add
+def add_resource_monitor(config, jobs):
+ for job in jobs:
+ if job.get("attributes", {}).get("resource-monitor"):
+ worker_implementation, worker_os = worker_type_implementation(
+ config.graph_config, job["worker-type"]
+ )
+ # Normalise worker os so that linux-bitbar and similar use linux tools.
+ worker_os = worker_os.split("-")[0]
+ if "win7" in job["worker-type"]:
+ arch = "32"
+ else:
+ arch = "64"
+ job.setdefault("fetches", {})
+ job["fetches"].setdefault("toolchain", [])
+ job["fetches"]["toolchain"].append(f"{worker_os}{arch}-resource-monitor")
+
+ if worker_implementation == "docker-worker":
+ artifact_source = "/builds/worker/monitoring/resource-monitor.json"
+ else:
+ artifact_source = "monitoring/resource-monitor.json"
+ job["worker"].setdefault("artifacts", [])
+ job["worker"]["artifacts"].append(
+ {
+ "name": "public/monitoring/resource-monitor.json",
+ "type": "file",
+ "path": artifact_source,
+ }
+ )
+ # Set env for output file
+ job["worker"].setdefault("env", {})
+ job["worker"]["env"]["RESOURCE_MONITOR_OUTPUT"] = artifact_source
+
+ yield job
+
+
+def get_attribute(dict, key, attributes, attribute_name):
+ """Get `attribute_name` from the given `attributes` dict, and if there
+ is a corresponding value, set `key` in `dict` to that value."""
+ value = attributes.get(attribute_name)
+ if value:
+ dict[key] = value
+
+
+@transforms.add
+def use_fetches(config, jobs):
+ artifact_names = {}
+ aliases = {}
+ extra_env = {}
+
+ if config.kind in ("toolchain", "fetch"):
+ jobs = list(jobs)
+ for job in jobs:
+ run = job.get("run", {})
+ label = job["label"]
+ get_attribute(artifact_names, label, run, "toolchain-artifact")
+ value = run.get(f"{config.kind}-alias")
+ if value:
+ aliases[f"{config.kind}-{value}"] = label
+
+ for task in config.kind_dependencies_tasks.values():
+ if task.kind in ("fetch", "toolchain"):
+ get_attribute(
+ artifact_names,
+ task.label,
+ task.attributes,
+ f"{task.kind}-artifact",
+ )
+ get_attribute(extra_env, task.label, task.attributes, f"{task.kind}-env")
+ value = task.attributes.get(f"{task.kind}-alias")
+ if value:
+ aliases[f"{task.kind}-{value}"] = task.label
+
+ artifact_prefixes = {}
+ for job in order_tasks(config, jobs):
+ artifact_prefixes[job["label"]] = get_artifact_prefix(job)
+
+ fetches = job.pop("fetches", None)
+ if not fetches:
+ yield job
+ continue
+
+ job_fetches = []
+ name = job.get("name", job.get("label"))
+ dependencies = job.setdefault("dependencies", {})
+ worker = job.setdefault("worker", {})
+ env = worker.setdefault("env", {})
+ prefix = get_artifact_prefix(job)
+ for kind, artifacts in fetches.items():
+ if kind in ("fetch", "toolchain"):
+ for fetch_name in artifacts:
+ label = f"{kind}-{fetch_name}"
+ label = aliases.get(label, label)
+ if label not in artifact_names:
+ raise Exception(
+ "Missing fetch job for {kind}-{name}: {fetch}".format(
+ kind=config.kind, name=name, fetch=fetch_name
+ )
+ )
+ if label in extra_env:
+ env.update(extra_env[label])
+
+ path = artifact_names[label]
+
+ dependencies[label] = label
+ job_fetches.append(
+ {
+ "artifact": path,
+ "task": f"<{label}>",
+ "extract": True,
+ }
+ )
+ else:
+ if kind not in dependencies:
+ raise Exception(
+ "{name} can't fetch {kind} artifacts because "
+ "it has no {kind} dependencies!".format(name=name, kind=kind)
+ )
+ dep_label = dependencies[kind]
+ if dep_label in artifact_prefixes:
+ prefix = artifact_prefixes[dep_label]
+ else:
+ dep_tasks = [
+ task
+ for label, task in config.kind_dependencies_tasks.items()
+ if label == dep_label
+ ]
+ if len(dep_tasks) != 1:
+ raise Exception(
+ "{name} can't fetch {kind} artifacts because "
+ "there are {tasks} with label {label} in kind dependencies!".format(
+ name=name,
+ kind=kind,
+ label=dependencies[kind],
+ tasks="no tasks"
+ if len(dep_tasks) == 0
+ else "multiple tasks",
+ )
+ )
+
+ prefix = get_artifact_prefix(dep_tasks[0])
+
+ for artifact in artifacts:
+ if isinstance(artifact, str):
+ path = artifact
+ dest = None
+ extract = True
+ verify_hash = False
+ else:
+ path = artifact["artifact"]
+ dest = artifact.get("dest")
+ extract = artifact.get("extract", True)
+ verify_hash = artifact.get("verify-hash", False)
+
+ fetch = {
+ "artifact": f"{prefix}/{path}",
+ "task": f"<{kind}>",
+ "extract": extract,
+ }
+ if dest is not None:
+ fetch["dest"] = dest
+ if verify_hash:
+ fetch["verify-hash"] = verify_hash
+ job_fetches.append(fetch)
+
+ job_artifact_prefixes = {
+ mozpath.dirname(fetch["artifact"])
+ for fetch in job_fetches
+ if not fetch["artifact"].startswith("public/")
+ }
+ if job_artifact_prefixes:
+ # Use taskcluster-proxy and request appropriate scope. For example, add
+ # 'scopes: [queue:get-artifact:path/to/*]' for 'path/to/artifact.tar.xz'.
+ worker["taskcluster-proxy"] = True
+ for prefix in sorted(job_artifact_prefixes):
+ scope = f"queue:get-artifact:{prefix}/*"
+ if scope not in job.setdefault("scopes", []):
+ job["scopes"].append(scope)
+
+ env["MOZ_FETCHES"] = {"task-reference": json.dumps(job_fetches, sort_keys=True)}
+
+ env.setdefault("MOZ_FETCHES_DIR", "fetches")
+
+ yield job
+
+
+@transforms.add
+def make_task_description(config, jobs):
+ """Given a build description, create a task description"""
+ # import plugin modules first, before iterating over jobs
+ import_sibling_modules(exceptions=("common.py",))
+
+ for job in jobs:
+ # always-optimized tasks never execute, so have no workdir
+ if job["worker"]["implementation"] in ("docker-worker", "generic-worker"):
+ job["run"].setdefault("workdir", "/builds/worker")
+
+ taskdesc = copy.deepcopy(job)
+
+ # fill in some empty defaults to make run implementations easier
+ taskdesc.setdefault("attributes", {})
+ taskdesc.setdefault("dependencies", {})
+ taskdesc.setdefault("soft-dependencies", [])
+ taskdesc.setdefault("routes", [])
+ taskdesc.setdefault("scopes", [])
+ taskdesc.setdefault("extra", {})
+
+ # give the function for job.run.using on this worker implementation a
+ # chance to set up the task description.
+ configure_taskdesc_for_run(
+ config, job, taskdesc, job["worker"]["implementation"]
+ )
+ del taskdesc["run"]
+
+ # yield only the task description, discarding the job description
+ yield taskdesc
+
+
+# A registry of all functions decorated with run_job_using
+registry = {}
+
+
+def run_job_using(worker_implementation, run_using, schema=None, defaults={}):
+ """Register the decorated function as able to set up a task description for
+ jobs with the given worker implementation and `run.using` property. If
+ `schema` is given, the job's run field will be verified to match it.
+
+ The decorated function should have the signature `using_foo(config, job, taskdesc)`
+ and should modify the task description in-place. The skeleton of
+ the task description is already set up, but without a payload."""
+
+ def wrap(func):
+ for_run_using = registry.setdefault(run_using, {})
+ if worker_implementation in for_run_using:
+ raise Exception(
+ "run_job_using({!r}, {!r}) already exists: {!r}".format(
+ run_using, worker_implementation, for_run_using[run_using]
+ )
+ )
+ for_run_using[worker_implementation] = (func, schema, defaults)
+ return func
+
+ return wrap
+
+
+@run_job_using(
+ "always-optimized", "always-optimized", Schema({"using": "always-optimized"})
+)
+def always_optimized(config, job, taskdesc):
+ pass
+
+
+def configure_taskdesc_for_run(config, job, taskdesc, worker_implementation):
+ """
+ Run the appropriate function for this job against the given task
+ description.
+
+ This will raise an appropriate error if no function exists, or if the job's
+ run is not valid according to the schema.
+ """
+ run_using = job["run"]["using"]
+ if run_using not in registry:
+ raise Exception(f"no functions for run.using {run_using!r}")
+
+ if worker_implementation not in registry[run_using]:
+ raise Exception(
+ "no functions for run.using {!r} on {!r}".format(
+ run_using, worker_implementation
+ )
+ )
+
+ func, schema, defaults = registry[run_using][worker_implementation]
+ for k, v in defaults.items():
+ job["run"].setdefault(k, v)
+
+ if schema:
+ validate_schema(
+ schema,
+ job["run"],
+ "In job.run using {!r}/{!r} for job {!r}:".format(
+ job["run"]["using"], worker_implementation, job["label"]
+ ),
+ )
+ func(config, job, taskdesc)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py
new file mode 100644
index 0000000000..1660d0856a
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py
@@ -0,0 +1,196 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Common support for various job types. These functions are all named after the
+worker implementation they operate on, and take the same three parameters, for
+consistency.
+"""
+
+
+import hashlib
+import json
+
+from taskgraph.util.taskcluster import get_artifact_prefix
+
+
+def get_vcsdir_name(os):
+ if os == "windows":
+ return "src"
+ else:
+ return "vcs"
+
+
+def add_cache(job, taskdesc, name, mount_point, skip_untrusted=False):
+ """Adds a cache based on the worker's implementation.
+
+ Args:
+ job (dict): Task's job description.
+ taskdesc (dict): Target task description to modify.
+ name (str): Name of the cache.
+ mount_point (path): Path on the host to mount the cache.
+ skip_untrusted (bool): Whether cache is used in untrusted environments
+ (default: False). Only applies to docker-worker.
+ """
+ if not job["run"].get("use-caches", True):
+ return
+
+ worker = job["worker"]
+
+ if worker["implementation"] == "docker-worker":
+ taskdesc["worker"].setdefault("caches", []).append(
+ {
+ "type": "persistent",
+ "name": name,
+ "mount-point": mount_point,
+ "skip-untrusted": skip_untrusted,
+ }
+ )
+
+ elif worker["implementation"] == "generic-worker":
+ taskdesc["worker"].setdefault("mounts", []).append(
+ {
+ "cache-name": name,
+ "directory": mount_point,
+ }
+ )
+
+ else:
+ # Caches not implemented
+ pass
+
+
+def docker_worker_add_workspace_cache(config, job, taskdesc, extra=None):
+ """Add the workspace cache.
+
+ Args:
+ config (TransformConfig): Transform configuration object.
+ job (dict): Task's job description.
+ taskdesc (dict): Target task description to modify.
+ extra (str): Optional context passed in that supports extending the cache
+ key name to avoid undesired conflicts with other caches.
+ """
+ cache_name = "{}-build-{}-{}-workspace".format(
+ config.params["project"],
+ taskdesc["attributes"]["build_platform"],
+ taskdesc["attributes"]["build_type"],
+ )
+ if extra:
+ cache_name = f"{cache_name}-{extra}"
+
+ mount_point = "{workdir}/workspace".format(**job["run"])
+
+ # Don't enable the workspace cache when we can't guarantee its
+ # behavior, like on Try.
+ add_cache(job, taskdesc, cache_name, mount_point, skip_untrusted=True)
+
+
+def add_artifacts(config, job, taskdesc, path):
+ taskdesc["worker"].setdefault("artifacts", []).append(
+ {
+ "name": get_artifact_prefix(taskdesc),
+ "path": path,
+ "type": "directory",
+ }
+ )
+
+
+def docker_worker_add_artifacts(config, job, taskdesc):
+ """Adds an artifact directory to the task"""
+ path = "{workdir}/artifacts/".format(**job["run"])
+ taskdesc["worker"]["env"]["UPLOAD_DIR"] = path
+ add_artifacts(config, job, taskdesc, path)
+
+
+def generic_worker_add_artifacts(config, job, taskdesc):
+ """Adds an artifact directory to the task"""
+ # The path is the location on disk; it doesn't necessarily
+ # mean the artifacts will be public or private; that is set via the name
+ # attribute in add_artifacts.
+ add_artifacts(config, job, taskdesc, path=get_artifact_prefix(taskdesc))
+
+
+def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False):
+ """Update a job/task with parameters to enable a VCS checkout.
+
+ This can only be used with ``run-task`` tasks, as the cache name is
+ reserved for ``run-task`` tasks.
+ """
+ worker = job["worker"]
+ is_mac = worker["os"] == "macosx"
+ is_win = worker["os"] == "windows"
+ is_linux = worker["os"] == "linux"
+ is_docker = worker["implementation"] == "docker-worker"
+ assert is_mac or is_win or is_linux
+
+ if is_win:
+ checkoutdir = "./build"
+ hgstore = "y:/hg-shared"
+ elif is_docker:
+ checkoutdir = "{workdir}/checkouts".format(**job["run"])
+ hgstore = f"{checkoutdir}/hg-store"
+ else:
+ checkoutdir = "./checkouts"
+ hgstore = f"{checkoutdir}/hg-shared"
+
+ vcsdir = checkoutdir + "/" + get_vcsdir_name(worker["os"])
+ cache_name = "checkouts"
+
+ # Robust checkout does not clean up subrepositories, so ensure that tasks
+ # that checkout different sets of paths have separate caches.
+ # See https://bugzilla.mozilla.org/show_bug.cgi?id=1631610
+ if len(repo_configs) > 1:
+ checkout_paths = {
+ "\t".join([repo_config.path, repo_config.prefix])
+ for repo_config in sorted(
+ repo_configs.values(), key=lambda repo_config: repo_config.path
+ )
+ }
+ checkout_paths_str = "\n".join(checkout_paths).encode("utf-8")
+ digest = hashlib.sha256(checkout_paths_str).hexdigest()
+ cache_name += f"-repos-{digest}"
+
+ # Sparse checkouts need their own cache because they can interfere
+ # with clients that aren't sparse aware.
+ if sparse:
+ cache_name += "-sparse"
+
+ # Workers using Mercurial >= 5.8 will enable revlog-compression-zstd, which
+ # workers using older versions can't understand, so they can't share cache.
+ # At the moment, only docker workers use the newer version.
+ if is_docker:
+ cache_name += "-hg58"
+
+ add_cache(job, taskdesc, cache_name, checkoutdir)
+
+ env = taskdesc["worker"].setdefault("env", {})
+ env.update(
+ {
+ "HG_STORE_PATH": hgstore,
+ "REPOSITORIES": json.dumps(
+ {repo.prefix: repo.name for repo in repo_configs.values()}
+ ),
+ "VCS_PATH": vcsdir,
+ }
+ )
+ for repo_config in repo_configs.values():
+ env.update(
+ {
+ f"{repo_config.prefix.upper()}_{key}": value
+ for key, value in {
+ "BASE_REPOSITORY": repo_config.base_repository,
+ "HEAD_REPOSITORY": repo_config.head_repository,
+ "HEAD_REV": repo_config.head_rev,
+ "HEAD_REF": repo_config.head_ref,
+ "REPOSITORY_TYPE": repo_config.type,
+ "SSH_SECRET_NAME": repo_config.ssh_secret_name,
+ }.items()
+ if value is not None
+ }
+ )
+ if repo_config.ssh_secret_name:
+ taskdesc["scopes"].append(f"secrets:get:{repo_config.ssh_secret_name}")
+
+ # only some worker platforms have taskcluster-proxy enabled
+ if job["worker"]["implementation"] in ("docker-worker",):
+ taskdesc["worker"]["taskcluster-proxy"] = True
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py
new file mode 100644
index 0000000000..09b48fe594
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py
@@ -0,0 +1,37 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+This transform allows including indexed tasks from other projects in the
+current taskgraph. The transform takes a list of indexes, and the optimization
+phase will replace the task with the task from the other graph.
+"""
+
+
+from voluptuous import Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.transforms.job import run_job_using
+from taskgraph.util.schema import Schema
+
+transforms = TransformSequence()
+
+run_task_schema = Schema(
+ {
+ Required("using"): "index-search",
+ Required(
+ "index-search",
+ "A list of indexes in decreasing order of priority at which to lookup for this "
+ "task. This is interpolated with the graph parameters.",
+ ): [str],
+ }
+)
+
+
+@run_job_using("always-optimized", "index-search", schema=run_task_schema)
+def fill_template(config, job, taskdesc):
+ run = job["run"]
+ taskdesc["optimization"] = {
+ "index-search": [index.format(**config.params) for index in run["index-search"]]
+ }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py
new file mode 100644
index 0000000000..a44f30d5bd
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py
@@ -0,0 +1,240 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Support for running jobs that are invoked via the `run-task` script.
+"""
+
+
+import os
+
+import attr
+from voluptuous import Any, Optional, Required
+
+from taskgraph.transforms.job import run_job_using
+from taskgraph.transforms.job.common import support_vcs_checkout
+from taskgraph.transforms.task import taskref_or_string
+from taskgraph.util import path, taskcluster
+from taskgraph.util.schema import Schema
+
+EXEC_COMMANDS = {
+ "bash": ["bash", "-cx"],
+ "powershell": ["powershell.exe", "-ExecutionPolicy", "Bypass"],
+}
+
+run_task_schema = Schema(
+ {
+ Required("using"): "run-task",
+ # if true, add a cache at ~worker/.cache, which is where things like pip
+ # tend to hide their caches. This cache is never added for level-1 jobs.
+ # TODO Once bug 1526028 is fixed, this and 'use-caches' should be merged.
+ Required("cache-dotcache"): bool,
+ # Whether or not to use caches.
+ Optional("use-caches"): bool,
+ # if true (the default), perform a checkout on the worker
+ Required("checkout"): Any(bool, {str: dict}),
+ Optional(
+ "cwd",
+ description="Path to run command in. If a checkout is present, the path "
+ "to the checkout will be interpolated with the key `checkout`",
+ ): str,
+ # The sparse checkout profile to use. Value is the filename relative to the
+ # directory where sparse profiles are defined (build/sparse-profiles/).
+ Required("sparse-profile"): Any(str, None),
+ # The command arguments to pass to the `run-task` script, after the
+ # checkout arguments. If a list, it will be passed directly; otherwise
+ # it will be included in a single argument to the command specified by
+ # `exec-with`.
+ Required("command"): Any([taskref_or_string], taskref_or_string),
+ # Context to substitute into the command using format string
+ # substitution (e.g {value}). This is useful if certain aspects of the
+ # command need to be generated in transforms.
+ Optional("command-context"): dict,
+ # What to execute the command with in the event command is a string.
+ Optional("exec-with"): Any(*list(EXEC_COMMANDS)),
+ # Base work directory used to set up the task.
+ Required("workdir"): str,
+ # Whether to run as root. (defaults to False)
+ Optional("run-as-root"): bool,
+ }
+)
+
+
+def common_setup(config, job, taskdesc, command):
+ run = job["run"]
+ if run["checkout"]:
+ repo_configs = config.repo_configs
+ if len(repo_configs) > 1 and run["checkout"] is True:
+ raise Exception("Must explicitly specify checkouts with multiple repos.")
+ elif run["checkout"] is not True:
+ repo_configs = {
+ repo: attr.evolve(repo_configs[repo], **config)
+ for (repo, config) in run["checkout"].items()
+ }
+
+ support_vcs_checkout(
+ config,
+ job,
+ taskdesc,
+ repo_configs=repo_configs,
+ sparse=bool(run["sparse-profile"]),
+ )
+
+ vcs_path = taskdesc["worker"]["env"]["VCS_PATH"]
+ for repo_config in repo_configs.values():
+ checkout_path = path.join(vcs_path, repo_config.path)
+ command.append(f"--{repo_config.prefix}-checkout={checkout_path}")
+
+ if run["sparse-profile"]:
+ command.append(
+ "--{}-sparse-profile=build/sparse-profiles/{}".format(
+ repo_config.prefix,
+ run["sparse-profile"],
+ )
+ )
+
+ if "cwd" in run:
+ run["cwd"] = path.normpath(run["cwd"].format(checkout=vcs_path))
+ elif "cwd" in run and "{checkout}" in run["cwd"]:
+ raise Exception(
+ "Found `{{checkout}}` interpolation in `cwd` for task {name} "
+ "but the task doesn't have a checkout: {cwd}".format(
+ cwd=run["cwd"], name=job.get("name", job.get("label"))
+ )
+ )
+
+ if "cwd" in run:
+ command.extend(("--task-cwd", run["cwd"]))
+
+ taskdesc["worker"].setdefault("env", {})["MOZ_SCM_LEVEL"] = config.params["level"]
+
+
+worker_defaults = {
+ "cache-dotcache": False,
+ "checkout": True,
+ "sparse-profile": None,
+ "run-as-root": False,
+}
+
+
+def script_url(config, script):
+ if "MOZ_AUTOMATION" in os.environ and "TASK_ID" not in os.environ:
+ raise Exception("TASK_ID must be defined to use run-task on generic-worker")
+ task_id = os.environ.get("TASK_ID", "<TASK_ID>")
+ # use_proxy = False to avoid having all generic-workers turn on proxy
+ # Assumes the cluster allows anonymous downloads of public artifacts
+ tc_url = taskcluster.get_root_url(False)
+ # TODO: Use util/taskcluster.py:get_artifact_url once hack for Bug 1405889 is removed
+ return f"{tc_url}/api/queue/v1/task/{task_id}/artifacts/public/{script}"
+
+
+@run_job_using(
+ "docker-worker", "run-task", schema=run_task_schema, defaults=worker_defaults
+)
+def docker_worker_run_task(config, job, taskdesc):
+ run = job["run"]
+ worker = taskdesc["worker"] = job["worker"]
+ command = ["/usr/local/bin/run-task"]
+ common_setup(config, job, taskdesc, command)
+
+ if run.get("cache-dotcache"):
+ worker["caches"].append(
+ {
+ "type": "persistent",
+ "name": "{project}-dotcache".format(**config.params),
+ "mount-point": "{workdir}/.cache".format(**run),
+ "skip-untrusted": True,
+ }
+ )
+
+ run_command = run["command"]
+
+ command_context = run.get("command-context")
+ if command_context:
+ run_command = run_command.format(**command_context)
+
+ # dict is for the case of `{'task-reference': str}`.
+ if isinstance(run_command, str) or isinstance(run_command, dict):
+ exec_cmd = EXEC_COMMANDS[run.pop("exec-with", "bash")]
+ run_command = exec_cmd + [run_command]
+ if run["run-as-root"]:
+ command.extend(("--user", "root", "--group", "root"))
+ command.append("--")
+ command.extend(run_command)
+ worker["command"] = command
+
+
+@run_job_using(
+ "generic-worker", "run-task", schema=run_task_schema, defaults=worker_defaults
+)
+def generic_worker_run_task(config, job, taskdesc):
+ run = job["run"]
+ worker = taskdesc["worker"] = job["worker"]
+ is_win = worker["os"] == "windows"
+ is_mac = worker["os"] == "macosx"
+ is_bitbar = worker["os"] == "linux-bitbar"
+
+ if is_win:
+ command = ["C:/mozilla-build/python3/python3.exe", "run-task"]
+ elif is_mac:
+ command = ["/tools/python36/bin/python3", "run-task"]
+ else:
+ command = ["./run-task"]
+
+ common_setup(config, job, taskdesc, command)
+
+ worker.setdefault("mounts", [])
+ if run.get("cache-dotcache"):
+ worker["mounts"].append(
+ {
+ "cache-name": "{project}-dotcache".format(**config.params),
+ "directory": "{workdir}/.cache".format(**run),
+ }
+ )
+ worker["mounts"].append(
+ {
+ "content": {
+ "url": script_url(config, "run-task"),
+ },
+ "file": "./run-task",
+ }
+ )
+ if worker.get("env", {}).get("MOZ_FETCHES"):
+ worker["mounts"].append(
+ {
+ "content": {
+ "url": script_url(config, "fetch-content"),
+ },
+ "file": "./fetch-content",
+ }
+ )
+
+ run_command = run["command"]
+
+ if isinstance(run_command, str):
+ if is_win:
+ run_command = f'"{run_command}"'
+ exec_cmd = EXEC_COMMANDS[run.pop("exec-with", "bash")]
+ run_command = exec_cmd + [run_command]
+
+ command_context = run.get("command-context")
+ if command_context:
+ for i in range(len(run_command)):
+ run_command[i] = run_command[i].format(**command_context)
+
+ if run["run-as-root"]:
+ command.extend(("--user", "root", "--group", "root"))
+ command.append("--")
+ if is_bitbar:
+ # Use the bitbar wrapper script which sets up the device and adb
+ # environment variables
+ command.append("/builds/taskcluster/script.py")
+ command.extend(run_command)
+
+ if is_win:
+ worker["command"] = [" ".join(command)]
+ else:
+ worker["command"] = [
+ ["chmod", "+x", "run-task"],
+ command,
+ ]
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py
new file mode 100644
index 0000000000..5d4ee02f4a
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py
@@ -0,0 +1,174 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Support for running toolchain-building jobs via dedicated scripts
+"""
+
+from voluptuous import Any, Optional, Required
+
+import taskgraph
+from taskgraph.transforms.job import configure_taskdesc_for_run, run_job_using
+from taskgraph.transforms.job.common import (
+ docker_worker_add_artifacts,
+ generic_worker_add_artifacts,
+ get_vcsdir_name,
+)
+from taskgraph.util.hash import hash_paths
+from taskgraph.util.schema import Schema
+from taskgraph.util.shell import quote as shell_quote
+
+CACHE_TYPE = "toolchains.v3"
+
+toolchain_run_schema = Schema(
+ {
+ Required("using"): "toolchain-script",
+ # The script (in taskcluster/scripts/misc) to run.
+ Required("script"): str,
+ # Arguments to pass to the script.
+ Optional("arguments"): [str],
+ # Sparse profile to give to checkout using `run-task`. If given,
+ # a filename in `build/sparse-profiles`. Defaults to
+ # "toolchain-build", i.e., to
+ # `build/sparse-profiles/toolchain-build`. If `None`, instructs
+ # `run-task` to not use a sparse profile at all.
+ Required("sparse-profile"): Any(str, None),
+ # Paths/patterns pointing to files that influence the outcome of a
+ # toolchain build.
+ Optional("resources"): [str],
+ # Path to the artifact produced by the toolchain job
+ Required("toolchain-artifact"): str,
+ Optional(
+ "toolchain-alias",
+ description="An alias that can be used instead of the real toolchain job name in "
+ "fetch stanzas for jobs.",
+ ): Any(str, [str]),
+ Optional(
+ "toolchain-env",
+ description="Additional env variables to add to the worker when using this toolchain",
+ ): {str: object},
+ # Base work directory used to set up the task.
+ Required("workdir"): str,
+ }
+)
+
+
+def get_digest_data(config, run, taskdesc):
+ files = list(run.pop("resources", []))
+ # The script
+ files.append("taskcluster/scripts/toolchain/{}".format(run["script"]))
+
+ # Accumulate dependency hashes for index generation.
+ data = [hash_paths(config.graph_config.vcs_root, files)]
+
+ data.append(taskdesc["attributes"]["toolchain-artifact"])
+
+ # If the task uses an in-tree docker image, we want it to influence
+ # the index path as well. Ideally, the content of the docker image itself
+ # should have an influence, but at the moment, we can't get that
+ # information here. So use the docker image name as a proxy. Not a lot of
+ # changes to docker images actually have an impact on the resulting
+ # toolchain artifact, so we'll just rely on such important changes to be
+ # accompanied with a docker image name change.
+ image = taskdesc["worker"].get("docker-image", {}).get("in-tree")
+ if image:
+ data.append(image)
+
+ # Likewise script arguments should influence the index.
+ args = run.get("arguments")
+ if args:
+ data.extend(args)
+ return data
+
+
+def common_toolchain(config, job, taskdesc, is_docker):
+ run = job["run"]
+
+ worker = taskdesc["worker"] = job["worker"]
+ worker["chain-of-trust"] = True
+
+ srcdir = get_vcsdir_name(worker["os"])
+
+ if is_docker:
+ # If the task doesn't have a docker-image, set a default
+ worker.setdefault("docker-image", {"in-tree": "toolchain-build"})
+
+ # Allow the job to specify where artifacts come from, but add
+ # public/build if it's not there already.
+ artifacts = worker.setdefault("artifacts", [])
+ if not any(artifact.get("name") == "public/build" for artifact in artifacts):
+ if is_docker:
+ docker_worker_add_artifacts(config, job, taskdesc)
+ else:
+ generic_worker_add_artifacts(config, job, taskdesc)
+
+ env = worker["env"]
+ env.update(
+ {
+ "MOZ_BUILD_DATE": config.params["moz_build_date"],
+ "MOZ_SCM_LEVEL": config.params["level"],
+ }
+ )
+
+ attributes = taskdesc.setdefault("attributes", {})
+ attributes["toolchain-artifact"] = run.pop("toolchain-artifact")
+ if "toolchain-alias" in run:
+ attributes["toolchain-alias"] = run.pop("toolchain-alias")
+ if "toolchain-env" in run:
+ attributes["toolchain-env"] = run.pop("toolchain-env")
+
+ if not taskgraph.fast:
+ name = taskdesc["label"].replace(f"{config.kind}-", "", 1)
+ taskdesc["cache"] = {
+ "type": CACHE_TYPE,
+ "name": name,
+ "digest-data": get_digest_data(config, run, taskdesc),
+ }
+
+ script = run.pop("script")
+ run["using"] = "run-task"
+ run["cwd"] = "{checkout}/.."
+
+ if script.endswith(".ps1"):
+ run["exec-with"] = "powershell"
+
+ command = [f"{srcdir}/taskcluster/scripts/toolchain/{script}"] + run.pop(
+ "arguments", []
+ )
+
+ if not is_docker:
+ # Don't quote the first item in the command because it purposely contains
+ # an environment variable that is not meant to be quoted.
+ if len(command) > 1:
+ command = command[0] + " " + shell_quote(*command[1:])
+ else:
+ command = command[0]
+
+ run["command"] = command
+
+ configure_taskdesc_for_run(config, job, taskdesc, worker["implementation"])
+
+
+toolchain_defaults = {
+ "sparse-profile": "toolchain-build",
+}
+
+
+@run_job_using(
+ "docker-worker",
+ "toolchain-script",
+ schema=toolchain_run_schema,
+ defaults=toolchain_defaults,
+)
+def docker_worker_toolchain(config, job, taskdesc):
+ common_toolchain(config, job, taskdesc, is_docker=True)
+
+
+@run_job_using(
+ "generic-worker",
+ "toolchain-script",
+ schema=toolchain_run_schema,
+ defaults=toolchain_defaults,
+)
+def generic_worker_toolchain(config, job, taskdesc):
+ common_toolchain(config, job, taskdesc, is_docker=False)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/release_notifications.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/release_notifications.py
new file mode 100644
index 0000000000..0796b028e8
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/release_notifications.py
@@ -0,0 +1,100 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+Add notifications via taskcluster-notify for release tasks
+"""
+from string import Formatter
+
+from voluptuous import ALLOW_EXTRA, Any, Optional, Required
+
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.schema import Schema, optionally_keyed_by, resolve_keyed_by
+
+RELEASE_NOTIFICATIONS_SCHEMA = Schema(
+ {
+ Optional("notifications"): {
+ Required("emails"): optionally_keyed_by("project", "level", [str]),
+ Required("subject"): str,
+ Optional("message"): str,
+ Optional("status-types"): [
+ Any(
+ "on-completed",
+ "on-defined",
+ "on-exception",
+ "on-failed",
+ "on-pending",
+ "on-resolved",
+ "on-running",
+ )
+ ],
+ },
+ },
+ extra=ALLOW_EXTRA,
+)
+
+
+transforms = TransformSequence()
+transforms.add_validate(RELEASE_NOTIFICATIONS_SCHEMA)
+
+
+class TitleCaseFormatter(Formatter):
+ """Support title formatter for strings"""
+
+ def convert_field(self, value, conversion):
+ if conversion == "t":
+ return str(value).title()
+ super().convert_field(value, conversion)
+ return value
+
+
+titleformatter = TitleCaseFormatter()
+
+
+@transforms.add
+def add_notifications(config, jobs):
+ for job in jobs:
+ label = "{}-{}".format(config.kind, job["name"])
+
+ notifications = job.pop("notifications", None)
+ if notifications:
+ resolve_keyed_by(
+ notifications,
+ "emails",
+ label,
+ **{
+ "level": config.params["level"],
+ "project": config.params["project"],
+ },
+ )
+ emails = notifications["emails"]
+ format_kwargs = dict(
+ task=job,
+ config=config.__dict__,
+ )
+ subject = titleformatter.format(notifications["subject"], **format_kwargs)
+ message = notifications.get("message", notifications["subject"])
+ message = titleformatter.format(message, **format_kwargs)
+ emails = [email.format(**format_kwargs) for email in emails]
+
+ # By default, we only send mail on success to avoid messages like 'blah is in the
+ # candidates dir' when cancelling graphs, dummy job failure, etc
+ status_types = notifications.get("status-types", ["on-completed"])
+ for s in status_types:
+ job.setdefault("routes", []).extend(
+ [f"notify.email.{email}.{s}" for email in emails]
+ )
+
+ # Customize the email subject to include release name and build number
+ job.setdefault("extra", {}).update(
+ {
+ "notify": {
+ "email": {
+ "subject": subject,
+ "content": message,
+ }
+ }
+ }
+ )
+
+ yield job
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py
new file mode 100644
index 0000000000..8ab3762b8c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py
@@ -0,0 +1,1288 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+These transformations take a task description and turn it into a TaskCluster
+task definition (along with attributes, label, etc.). The input to these
+transformations is generic to any kind of task, but abstracts away some of the
+complexities of worker implementations, scopes, and treeherder annotations.
+"""
+
+
+import hashlib
+import os
+import re
+import time
+from copy import deepcopy
+
+import attr
+from voluptuous import All, Any, Extra, NotIn, Optional, Required
+
+from taskgraph import MAX_DEPENDENCIES
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.hash import hash_path
+from taskgraph.util.keyed_by import evaluate_keyed_by
+from taskgraph.util.memoize import memoize
+from taskgraph.util.schema import (
+ OptimizationSchema,
+ Schema,
+ optionally_keyed_by,
+ resolve_keyed_by,
+ taskref_or_string,
+ validate_schema,
+)
+from taskgraph.util.treeherder import split_symbol
+from taskgraph.util.workertypes import worker_type_implementation
+
+from ..util import docker as dockerutil
+from ..util.workertypes import get_worker_type
+
+RUN_TASK = os.path.join(
+ os.path.dirname(os.path.dirname(__file__)), "run-task", "run-task"
+)
+
+
+@memoize
+def _run_task_suffix():
+ """String to append to cache names under control of run-task."""
+ return hash_path(RUN_TASK)[0:20]
+
+
+# A task description is a general description of a TaskCluster task
+task_description_schema = Schema(
+ {
+ # the label for this task
+ Required("label"): str,
+ # description of the task (for metadata)
+ Required("description"): str,
+ # attributes for this task
+ Optional("attributes"): {str: object},
+ # relative path (from config.path) to the file task was defined in
+ Optional("task-from"): str,
+ # dependencies of this task, keyed by name; these are passed through
+ # verbatim and subject to the interpretation of the Task's get_dependencies
+ # method.
+ Optional("dependencies"): {
+ All(
+ str,
+ NotIn(
+ ["self", "decision"],
+ "Can't use 'self` or 'decision' as dependency names.",
+ ),
+ ): object,
+ },
+ # Soft dependencies of this task, as a list of tasks labels
+ Optional("soft-dependencies"): [str],
+ # Dependencies that must be scheduled in order for this task to run.
+ Optional("if-dependencies"): [str],
+ Optional("requires"): Any("all-completed", "all-resolved"),
+ # expiration and deadline times, relative to task creation, with units
+ # (e.g., "14 days"). Defaults are set based on the project.
+ Optional("expires-after"): str,
+ Optional("deadline-after"): str,
+ # custom routes for this task; the default treeherder routes will be added
+ # automatically
+ Optional("routes"): [str],
+ # custom scopes for this task; any scopes required for the worker will be
+ # added automatically. The following parameters will be substituted in each
+ # scope:
+ # {level} -- the scm level of this push
+ # {project} -- the project of this push
+ Optional("scopes"): [str],
+ # Tags
+ Optional("tags"): {str: str},
+ # custom "task.extra" content
+ Optional("extra"): {str: object},
+ # treeherder-related information; see
+ # https://schemas.taskcluster.net/taskcluster-treeherder/v1/task-treeherder-config.json
+ # If not specified, no treeherder extra information or routes will be
+ # added to the task
+ Optional("treeherder"): {
+ # either a bare symbol, or "grp(sym)".
+ "symbol": str,
+ # the job kind
+ "kind": Any("build", "test", "other"),
+ # tier for this task
+ "tier": int,
+ # task platform, in the form platform/collection, used to set
+ # treeherder.machine.platform and treeherder.collection or
+ # treeherder.labels
+ "platform": str,
+ },
+ # information for indexing this build so its artifacts can be discovered;
+ # if omitted, the build will not be indexed.
+ Optional("index"): {
+ # the name of the product this build produces
+ "product": str,
+ # the names to use for this job in the TaskCluster index
+ "job-name": str,
+ # Type of gecko v2 index to use
+ "type": str,
+ # The rank that the task will receive in the TaskCluster
+ # index. A newly completed task supersedes the currently
+ # indexed task iff it has a higher rank. If unspecified,
+ # 'by-tier' behavior will be used.
+ "rank": Any(
+ # Rank is equal the timestamp of the build_date for tier-1
+ # tasks, and zero for non-tier-1. This sorts tier-{2,3}
+ # builds below tier-1 in the index.
+ "by-tier",
+ # Rank is given as an integer constant (e.g. zero to make
+ # sure a task is last in the index).
+ int,
+ # Rank is equal to the timestamp of the build_date. This
+ # option can be used to override the 'by-tier' behavior
+ # for non-tier-1 tasks.
+ "build_date",
+ ),
+ },
+ # The `run_on_projects` attribute, defaulting to "all". This dictates the
+ # projects on which this task should be included in the target task set.
+ # See the attributes documentation for details.
+ Optional("run-on-projects"): optionally_keyed_by("build-platform", [str]),
+ Optional("run-on-tasks-for"): [str],
+ Optional("run-on-git-branches"): [str],
+ # The `always-target` attribute will cause the task to be included in the
+ # target_task_graph regardless of filtering. Tasks included in this manner
+ # will be candidates for optimization even when `optimize_target_tasks` is
+ # False, unless the task was also explicitly chosen by the target_tasks
+ # method.
+ Required("always-target"): bool,
+ # Optimization to perform on this task during the optimization phase.
+ # Optimizations are defined in taskcluster/taskgraph/optimize.py.
+ Required("optimization"): OptimizationSchema,
+ # the provisioner-id/worker-type for the task. The following parameters will
+ # be substituted in this string:
+ # {level} -- the scm level of this push
+ "worker-type": str,
+ # Whether the job should use sccache compiler caching.
+ Required("needs-sccache"): bool,
+ # information specific to the worker implementation that will run this task
+ Optional("worker"): {
+ Required("implementation"): str,
+ Extra: object,
+ },
+ }
+)
+
+TC_TREEHERDER_SCHEMA_URL = (
+ "https://github.com/taskcluster/taskcluster-treeherder/"
+ "blob/master/schemas/task-treeherder-config.yml"
+)
+
+
+UNKNOWN_GROUP_NAME = (
+ "Treeherder group {} (from {}) has no name; " "add it to taskcluster/ci/config.yml"
+)
+
+V2_ROUTE_TEMPLATES = [
+ "index.{trust-domain}.v2.{project}.latest.{product}.{job-name}",
+ "index.{trust-domain}.v2.{project}.pushdate.{build_date_long}.{product}.{job-name}",
+ "index.{trust-domain}.v2.{project}.pushlog-id.{pushlog_id}.{product}.{job-name}",
+ "index.{trust-domain}.v2.{project}.revision.{branch_rev}.{product}.{job-name}",
+]
+
+# the roots of the treeherder routes
+TREEHERDER_ROUTE_ROOT = "tc-treeherder"
+
+
+def get_branch_rev(config):
+ return config.params["head_rev"]
+
+
+@memoize
+def get_default_priority(graph_config, project):
+ return evaluate_keyed_by(
+ graph_config["task-priority"], "Graph Config", {"project": project}
+ )
+
+
+# define a collection of payload builders, depending on the worker implementation
+payload_builders = {}
+
+
+@attr.s(frozen=True)
+class PayloadBuilder:
+ schema = attr.ib(type=Schema)
+ builder = attr.ib()
+
+
+def payload_builder(name, schema):
+ schema = Schema({Required("implementation"): name, Optional("os"): str}).extend(
+ schema
+ )
+
+ def wrap(func):
+ payload_builders[name] = PayloadBuilder(schema, func)
+ return func
+
+ return wrap
+
+
+# define a collection of index builders, depending on the type implementation
+index_builders = {}
+
+
+def index_builder(name):
+ def wrap(func):
+ index_builders[name] = func
+ return func
+
+ return wrap
+
+
+UNSUPPORTED_INDEX_PRODUCT_ERROR = """\
+The index product {product} is not in the list of configured products in
+`taskcluster/ci/config.yml'.
+"""
+
+
+def verify_index(config, index):
+ product = index["product"]
+ if product not in config.graph_config["index"]["products"]:
+ raise Exception(UNSUPPORTED_INDEX_PRODUCT_ERROR.format(product=product))
+
+
+@payload_builder(
+ "docker-worker",
+ schema={
+ Required("os"): "linux",
+ # For tasks that will run in docker-worker, this is the name of the docker
+ # image or in-tree docker image to run the task in. If in-tree, then a
+ # dependency will be created automatically. This is generally
+ # `desktop-test`, or an image that acts an awful lot like it.
+ Required("docker-image"): Any(
+ # a raw Docker image path (repo/image:tag)
+ str,
+ # an in-tree generated docker image (from `taskcluster/docker/<name>`)
+ {"in-tree": str},
+ # an indexed docker image
+ {"indexed": str},
+ ),
+ # worker features that should be enabled
+ Required("relengapi-proxy"): bool,
+ Required("chain-of-trust"): bool,
+ Required("taskcluster-proxy"): bool,
+ Required("allow-ptrace"): bool,
+ Required("loopback-video"): bool,
+ Required("loopback-audio"): bool,
+ Required("docker-in-docker"): bool, # (aka 'dind')
+ Required("privileged"): bool,
+ Required("disable-seccomp"): bool,
+ # Paths to Docker volumes.
+ #
+ # For in-tree Docker images, volumes can be parsed from Dockerfile.
+ # This only works for the Dockerfile itself: if a volume is defined in
+ # a base image, it will need to be declared here. Out-of-tree Docker
+ # images will also require explicit volume annotation.
+ #
+ # Caches are often mounted to the same path as Docker volumes. In this
+ # case, they take precedence over a Docker volume. But a volume still
+ # needs to be declared for the path.
+ Optional("volumes"): [str],
+ # caches to set up for the task
+ Optional("caches"): [
+ {
+ # only one type is supported by any of the workers right now
+ "type": "persistent",
+ # name of the cache, allowing re-use by subsequent tasks naming the
+ # same cache
+ "name": str,
+ # location in the task image where the cache will be mounted
+ "mount-point": str,
+ # Whether the cache is not used in untrusted environments
+ # (like the Try repo).
+ Optional("skip-untrusted"): bool,
+ }
+ ],
+ # artifacts to extract from the task image after completion
+ Optional("artifacts"): [
+ {
+ # type of artifact -- simple file, or recursive directory
+ "type": Any("file", "directory"),
+ # task image path from which to read artifact
+ "path": str,
+ # name of the produced artifact (root of the names for
+ # type=directory)
+ "name": str,
+ }
+ ],
+ # environment variables
+ Required("env"): {str: taskref_or_string},
+ # the command to run; if not given, docker-worker will default to the
+ # command in the docker image
+ Optional("command"): [taskref_or_string],
+ # the maximum time to run, in seconds
+ Required("max-run-time"): int,
+ # the exit status code(s) that indicates the task should be retried
+ Optional("retry-exit-status"): [int],
+ # the exit status code(s) that indicates the caches used by the task
+ # should be purged
+ Optional("purge-caches-exit-status"): [int],
+ # Whether any artifacts are assigned to this worker
+ Optional("skip-artifacts"): bool,
+ },
+)
+def build_docker_worker_payload(config, task, task_def):
+ worker = task["worker"]
+ level = int(config.params["level"])
+
+ image = worker["docker-image"]
+ if isinstance(image, dict):
+ if "in-tree" in image:
+ name = image["in-tree"]
+ docker_image_task = "build-docker-image-" + image["in-tree"]
+ task.setdefault("dependencies", {})["docker-image"] = docker_image_task
+
+ image = {
+ "path": "public/image.tar.zst",
+ "taskId": {"task-reference": "<docker-image>"},
+ "type": "task-image",
+ }
+
+ # Find VOLUME in Dockerfile.
+ volumes = dockerutil.parse_volumes(name)
+ for v in sorted(volumes):
+ if v in worker["volumes"]:
+ raise Exception(
+ "volume %s already defined; "
+ "if it is defined in a Dockerfile, "
+ "it does not need to be specified in the "
+ "worker definition" % v
+ )
+
+ worker["volumes"].append(v)
+
+ elif "indexed" in image:
+ image = {
+ "path": "public/image.tar.zst",
+ "namespace": image["indexed"],
+ "type": "indexed-image",
+ }
+ else:
+ raise Exception("unknown docker image type")
+
+ features = {}
+
+ if worker.get("relengapi-proxy"):
+ features["relengAPIProxy"] = True
+
+ if worker.get("taskcluster-proxy"):
+ features["taskclusterProxy"] = True
+
+ if worker.get("allow-ptrace"):
+ features["allowPtrace"] = True
+ task_def["scopes"].append("docker-worker:feature:allowPtrace")
+
+ if worker.get("chain-of-trust"):
+ features["chainOfTrust"] = True
+
+ if worker.get("docker-in-docker"):
+ features["dind"] = True
+
+ if task.get("needs-sccache"):
+ features["taskclusterProxy"] = True
+ task_def["scopes"].append(
+ "assume:project:taskcluster:{trust_domain}:level-{level}-sccache-buckets".format(
+ trust_domain=config.graph_config["trust-domain"],
+ level=config.params["level"],
+ )
+ )
+ worker["env"]["USE_SCCACHE"] = "1"
+ # Disable sccache idle shutdown.
+ worker["env"]["SCCACHE_IDLE_TIMEOUT"] = "0"
+ else:
+ worker["env"]["SCCACHE_DISABLE"] = "1"
+
+ capabilities = {}
+
+ for lo in "audio", "video":
+ if worker.get("loopback-" + lo):
+ capitalized = "loopback" + lo.capitalize()
+ devices = capabilities.setdefault("devices", {})
+ devices[capitalized] = True
+ task_def["scopes"].append("docker-worker:capability:device:" + capitalized)
+
+ if worker.get("privileged"):
+ capabilities["privileged"] = True
+ task_def["scopes"].append("docker-worker:capability:privileged")
+
+ if worker.get("disable-seccomp"):
+ capabilities["disableSeccomp"] = True
+ task_def["scopes"].append("docker-worker:capability:disableSeccomp")
+
+ task_def["payload"] = payload = {
+ "image": image,
+ "env": worker["env"],
+ }
+ if "command" in worker:
+ payload["command"] = worker["command"]
+
+ if "max-run-time" in worker:
+ payload["maxRunTime"] = worker["max-run-time"]
+
+ run_task = payload.get("command", [""])[0].endswith("run-task")
+
+ # run-task exits EXIT_PURGE_CACHES if there is a problem with caches.
+ # Automatically retry the tasks and purge caches if we see this exit
+ # code.
+ # TODO move this closer to code adding run-task once bug 1469697 is
+ # addressed.
+ if run_task:
+ worker.setdefault("retry-exit-status", []).append(72)
+ worker.setdefault("purge-caches-exit-status", []).append(72)
+
+ payload["onExitStatus"] = {}
+ if "retry-exit-status" in worker:
+ payload["onExitStatus"]["retry"] = worker["retry-exit-status"]
+ if "purge-caches-exit-status" in worker:
+ payload["onExitStatus"]["purgeCaches"] = worker["purge-caches-exit-status"]
+
+ if "artifacts" in worker:
+ artifacts = {}
+ for artifact in worker["artifacts"]:
+ artifacts[artifact["name"]] = {
+ "path": artifact["path"],
+ "type": artifact["type"],
+ "expires": task_def["expires"], # always expire with the task
+ }
+ payload["artifacts"] = artifacts
+
+ if isinstance(worker.get("docker-image"), str):
+ out_of_tree_image = worker["docker-image"]
+ else:
+ out_of_tree_image = None
+ image = worker.get("docker-image", {}).get("in-tree")
+
+ if "caches" in worker:
+ caches = {}
+
+ # run-task knows how to validate caches.
+ #
+ # To help ensure new run-task features and bug fixes don't interfere
+ # with existing caches, we seed the hash of run-task into cache names.
+ # So, any time run-task changes, we should get a fresh set of caches.
+ # This means run-task can make changes to cache interaction at any time
+ # without regards for backwards or future compatibility.
+ #
+ # But this mechanism only works for in-tree Docker images that are built
+ # with the current run-task! For out-of-tree Docker images, we have no
+ # way of knowing their content of run-task. So, in addition to varying
+ # cache names by the contents of run-task, we also take the Docker image
+ # name into consideration. This means that different Docker images will
+ # never share the same cache. This is a bit unfortunate. But it is the
+ # safest thing to do. Fortunately, most images are defined in-tree.
+ #
+ # For out-of-tree Docker images, we don't strictly need to incorporate
+ # the run-task content into the cache name. However, doing so preserves
+ # the mechanism whereby changing run-task results in new caches
+ # everywhere.
+
+ # As an additional mechanism to force the use of different caches, the
+ # string literal in the variable below can be changed. This is
+ # preferred to changing run-task because it doesn't require images
+ # to be rebuilt.
+ cache_version = "v3"
+
+ if run_task:
+ suffix = f"{cache_version}-{_run_task_suffix()}"
+
+ if out_of_tree_image:
+ name_hash = hashlib.sha256(
+ out_of_tree_image.encode("utf-8")
+ ).hexdigest()
+ suffix += name_hash[0:12]
+
+ else:
+ suffix = cache_version
+
+ skip_untrusted = config.params.is_try() or level == 1
+
+ for cache in worker["caches"]:
+ # Some caches aren't enabled in environments where we can't
+ # guarantee certain behavior. Filter those out.
+ if cache.get("skip-untrusted") and skip_untrusted:
+ continue
+
+ name = "{trust_domain}-level-{level}-{name}-{suffix}".format(
+ trust_domain=config.graph_config["trust-domain"],
+ level=config.params["level"],
+ name=cache["name"],
+ suffix=suffix,
+ )
+ caches[name] = cache["mount-point"]
+ task_def["scopes"].append("docker-worker:cache:%s" % name)
+
+ # Assertion: only run-task is interested in this.
+ if run_task:
+ payload["env"]["TASKCLUSTER_CACHES"] = ";".join(sorted(caches.values()))
+
+ payload["cache"] = caches
+
+ # And send down volumes information to run-task as well.
+ if run_task and worker.get("volumes"):
+ payload["env"]["TASKCLUSTER_VOLUMES"] = ";".join(sorted(worker["volumes"]))
+
+ if payload.get("cache") and skip_untrusted:
+ payload["env"]["TASKCLUSTER_UNTRUSTED_CACHES"] = "1"
+
+ if features:
+ payload["features"] = features
+ if capabilities:
+ payload["capabilities"] = capabilities
+
+ check_caches_are_volumes(task)
+
+
+@payload_builder(
+ "generic-worker",
+ schema={
+ Required("os"): Any("windows", "macosx", "linux", "linux-bitbar"),
+ # see http://schemas.taskcluster.net/generic-worker/v1/payload.json
+ # and https://docs.taskcluster.net/reference/workers/generic-worker/payload
+ # command is a list of commands to run, sequentially
+ # on Windows, each command is a string, on OS X and Linux, each command is
+ # a string array
+ Required("command"): Any(
+ [taskref_or_string], [[taskref_or_string]] # Windows # Linux / OS X
+ ),
+ # artifacts to extract from the task image after completion; note that artifacts
+ # for the generic worker cannot have names
+ Optional("artifacts"): [
+ {
+ # type of artifact -- simple file, or recursive directory
+ "type": Any("file", "directory"),
+ # filesystem path from which to read artifact
+ "path": str,
+ # if not specified, path is used for artifact name
+ Optional("name"): str,
+ }
+ ],
+ # Directories and/or files to be mounted.
+ # The actual allowed combinations are stricter than the model below,
+ # but this provides a simple starting point.
+ # See https://docs.taskcluster.net/reference/workers/generic-worker/payload
+ Optional("mounts"): [
+ {
+ # A unique name for the cache volume, implies writable cache directory
+ # (otherwise mount is a read-only file or directory).
+ Optional("cache-name"): str,
+ # Optional content for pre-loading cache, or mandatory content for
+ # read-only file or directory. Pre-loaded content can come from either
+ # a task artifact or from a URL.
+ Optional("content"): {
+ # *** Either (artifact and task-id) or url must be specified. ***
+ # Artifact name that contains the content.
+ Optional("artifact"): str,
+ # Task ID that has the artifact that contains the content.
+ Optional("task-id"): taskref_or_string,
+ # URL that supplies the content in response to an unauthenticated
+ # GET request.
+ Optional("url"): str,
+ },
+ # *** Either file or directory must be specified. ***
+ # If mounting a cache or read-only directory, the filesystem location of
+ # the directory should be specified as a relative path to the task
+ # directory here.
+ Optional("directory"): str,
+ # If mounting a file, specify the relative path within the task
+ # directory to mount the file (the file will be read only).
+ Optional("file"): str,
+ # Required if and only if `content` is specified and mounting a
+ # directory (not a file). This should be the archive format of the
+ # content (either pre-loaded cache or read-only directory).
+ Optional("format"): Any("rar", "tar.bz2", "tar.gz", "zip"),
+ }
+ ],
+ # environment variables
+ Required("env"): {str: taskref_or_string},
+ # the maximum time to run, in seconds
+ Required("max-run-time"): int,
+ # os user groups for test task workers
+ Optional("os-groups"): [str],
+ # feature for test task to run as administarotr
+ Optional("run-as-administrator"): bool,
+ # optional features
+ Required("chain-of-trust"): bool,
+ Optional("taskcluster-proxy"): bool,
+ # Whether any artifacts are assigned to this worker
+ Optional("skip-artifacts"): bool,
+ },
+)
+def build_generic_worker_payload(config, task, task_def):
+ worker = task["worker"]
+
+ task_def["payload"] = {
+ "command": worker["command"],
+ "maxRunTime": worker["max-run-time"],
+ }
+
+ on_exit_status = {}
+ if "retry-exit-status" in worker:
+ on_exit_status["retry"] = worker["retry-exit-status"]
+ if worker["os"] == "windows":
+ on_exit_status.setdefault("retry", []).extend(
+ [
+ # These codes (on windows) indicate a process interruption,
+ # rather than a task run failure. See bug 1544403.
+ 1073807364, # process force-killed due to system shutdown
+ 3221225786, # sigint (any interrupt)
+ ]
+ )
+ if on_exit_status:
+ task_def["payload"]["onExitStatus"] = on_exit_status
+
+ env = worker.get("env", {})
+
+ if task.get("needs-sccache"):
+ env["USE_SCCACHE"] = "1"
+ # Disable sccache idle shutdown.
+ env["SCCACHE_IDLE_TIMEOUT"] = "0"
+ else:
+ env["SCCACHE_DISABLE"] = "1"
+
+ if env:
+ task_def["payload"]["env"] = env
+
+ artifacts = []
+
+ for artifact in worker.get("artifacts", []):
+ a = {
+ "path": artifact["path"],
+ "type": artifact["type"],
+ }
+ if "name" in artifact:
+ a["name"] = artifact["name"]
+ artifacts.append(a)
+
+ if artifacts:
+ task_def["payload"]["artifacts"] = artifacts
+
+ # Need to copy over mounts, but rename keys to respect naming convention
+ # * 'cache-name' -> 'cacheName'
+ # * 'task-id' -> 'taskId'
+ # All other key names are already suitable, and don't need renaming.
+ mounts = deepcopy(worker.get("mounts", []))
+ for mount in mounts:
+ if "cache-name" in mount:
+ mount["cacheName"] = "{trust_domain}-level-{level}-{name}".format(
+ trust_domain=config.graph_config["trust-domain"],
+ level=config.params["level"],
+ name=mount.pop("cache-name"),
+ )
+ task_def["scopes"].append(
+ "generic-worker:cache:{}".format(mount["cacheName"])
+ )
+ if "content" in mount:
+ if "task-id" in mount["content"]:
+ mount["content"]["taskId"] = mount["content"].pop("task-id")
+ if "artifact" in mount["content"]:
+ if not mount["content"]["artifact"].startswith("public/"):
+ task_def["scopes"].append(
+ "queue:get-artifact:{}".format(mount["content"]["artifact"])
+ )
+
+ if mounts:
+ task_def["payload"]["mounts"] = mounts
+
+ if worker.get("os-groups"):
+ task_def["payload"]["osGroups"] = worker["os-groups"]
+ task_def["scopes"].extend(
+ [
+ "generic-worker:os-group:{}/{}".format(task["worker-type"], group)
+ for group in worker["os-groups"]
+ ]
+ )
+
+ features = {}
+
+ if worker.get("chain-of-trust"):
+ features["chainOfTrust"] = True
+
+ if worker.get("taskcluster-proxy"):
+ features["taskclusterProxy"] = True
+
+ if worker.get("run-as-administrator", False):
+ features["runAsAdministrator"] = True
+ task_def["scopes"].append(
+ "generic-worker:run-as-administrator:{}".format(task["worker-type"]),
+ )
+
+ if features:
+ task_def["payload"]["features"] = features
+
+
+@payload_builder(
+ "beetmover",
+ schema={
+ # the maximum time to run, in seconds
+ Required("max-run-time"): int,
+ # locale key, if this is a locale beetmover job
+ Optional("locale"): str,
+ Optional("partner-public"): bool,
+ Required("release-properties"): {
+ "app-name": str,
+ "app-version": str,
+ "branch": str,
+ "build-id": str,
+ "hash-type": str,
+ "platform": str,
+ },
+ # list of artifact URLs for the artifacts that should be beetmoved
+ Required("upstream-artifacts"): [
+ {
+ # taskId of the task with the artifact
+ Required("taskId"): taskref_or_string,
+ # type of signing task (for CoT)
+ Required("taskType"): str,
+ # Paths to the artifacts to sign
+ Required("paths"): [str],
+ # locale is used to map upload path and allow for duplicate simple names
+ Required("locale"): str,
+ }
+ ],
+ Optional("artifact-map"): object,
+ },
+)
+def build_beetmover_payload(config, task, task_def):
+ worker = task["worker"]
+ release_properties = worker["release-properties"]
+
+ task_def["payload"] = {
+ "maxRunTime": worker["max-run-time"],
+ "releaseProperties": {
+ "appName": release_properties["app-name"],
+ "appVersion": release_properties["app-version"],
+ "branch": release_properties["branch"],
+ "buildid": release_properties["build-id"],
+ "hashType": release_properties["hash-type"],
+ "platform": release_properties["platform"],
+ },
+ "upload_date": config.params["build_date"],
+ "upstreamArtifacts": worker["upstream-artifacts"],
+ }
+ if worker.get("locale"):
+ task_def["payload"]["locale"] = worker["locale"]
+ if worker.get("artifact-map"):
+ task_def["payload"]["artifactMap"] = worker["artifact-map"]
+ if worker.get("partner-public"):
+ task_def["payload"]["is_partner_repack_public"] = worker["partner-public"]
+
+
+@payload_builder(
+ "invalid",
+ schema={
+ # an invalid task is one which should never actually be created; this is used in
+ # release automation on branches where the task just doesn't make sense
+ Extra: object,
+ },
+)
+def build_invalid_payload(config, task, task_def):
+ task_def["payload"] = "invalid task - should never be created"
+
+
+@payload_builder(
+ "always-optimized",
+ schema={
+ Extra: object,
+ },
+)
+@payload_builder("succeed", schema={})
+def build_dummy_payload(config, task, task_def):
+ task_def["payload"] = {}
+
+
+transforms = TransformSequence()
+
+
+@transforms.add
+def set_implementation(config, tasks):
+ """
+ Set the worker implementation based on the worker-type alias.
+ """
+ for task in tasks:
+ worker = task.setdefault("worker", {})
+ if "implementation" in task["worker"]:
+ yield task
+ continue
+
+ impl, os = worker_type_implementation(config.graph_config, task["worker-type"])
+
+ tags = task.setdefault("tags", {})
+ tags["worker-implementation"] = impl
+ if os:
+ task["tags"]["os"] = os
+ worker["implementation"] = impl
+ if os:
+ worker["os"] = os
+
+ yield task
+
+
+@transforms.add
+def set_defaults(config, tasks):
+ for task in tasks:
+ task.setdefault("always-target", False)
+ task.setdefault("optimization", None)
+ task.setdefault("needs-sccache", False)
+
+ worker = task["worker"]
+ if worker["implementation"] in ("docker-worker",):
+ worker.setdefault("relengapi-proxy", False)
+ worker.setdefault("chain-of-trust", False)
+ worker.setdefault("taskcluster-proxy", False)
+ worker.setdefault("allow-ptrace", False)
+ worker.setdefault("loopback-video", False)
+ worker.setdefault("loopback-audio", False)
+ worker.setdefault("docker-in-docker", False)
+ worker.setdefault("privileged", False)
+ worker.setdefault("disable-seccomp", False)
+ worker.setdefault("volumes", [])
+ worker.setdefault("env", {})
+ if "caches" in worker:
+ for c in worker["caches"]:
+ c.setdefault("skip-untrusted", False)
+ elif worker["implementation"] == "generic-worker":
+ worker.setdefault("env", {})
+ worker.setdefault("os-groups", [])
+ if worker["os-groups"] and worker["os"] != "windows":
+ raise Exception(
+ "os-groups feature of generic-worker is only supported on "
+ "Windows, not on {}".format(worker["os"])
+ )
+ worker.setdefault("chain-of-trust", False)
+ elif worker["implementation"] in (
+ "scriptworker-signing",
+ "beetmover",
+ "beetmover-push-to-release",
+ "beetmover-maven",
+ ):
+ worker.setdefault("max-run-time", 600)
+ elif worker["implementation"] == "push-apk":
+ worker.setdefault("commit", False)
+
+ yield task
+
+
+@transforms.add
+def task_name_from_label(config, tasks):
+ for task in tasks:
+ if "label" not in task:
+ if "name" not in task:
+ raise Exception("task has neither a name nor a label")
+ task["label"] = "{}-{}".format(config.kind, task["name"])
+ if task.get("name"):
+ del task["name"]
+ yield task
+
+
+@transforms.add
+def validate(config, tasks):
+ for task in tasks:
+ validate_schema(
+ task_description_schema,
+ task,
+ "In task {!r}:".format(task.get("label", "?no-label?")),
+ )
+ validate_schema(
+ payload_builders[task["worker"]["implementation"]].schema,
+ task["worker"],
+ "In task.run {!r}:".format(task.get("label", "?no-label?")),
+ )
+ yield task
+
+
+@index_builder("generic")
+def add_generic_index_routes(config, task):
+ index = task.get("index")
+ routes = task.setdefault("routes", [])
+
+ verify_index(config, index)
+
+ subs = config.params.copy()
+ subs["job-name"] = index["job-name"]
+ subs["build_date_long"] = time.strftime(
+ "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"])
+ )
+ subs["product"] = index["product"]
+ subs["trust-domain"] = config.graph_config["trust-domain"]
+ subs["branch_rev"] = get_branch_rev(config)
+
+ for tpl in V2_ROUTE_TEMPLATES:
+ routes.append(tpl.format(**subs))
+
+ return task
+
+
+@transforms.add
+def add_index_routes(config, tasks):
+ for task in tasks:
+ index = task.get("index", {})
+
+ # The default behavior is to rank tasks according to their tier
+ extra_index = task.setdefault("extra", {}).setdefault("index", {})
+ rank = index.get("rank", "by-tier")
+
+ if rank == "by-tier":
+ # rank is zero for non-tier-1 tasks and based on pushid for others;
+ # this sorts tier-{2,3} builds below tier-1 in the index
+ tier = task.get("treeherder", {}).get("tier", 3)
+ extra_index["rank"] = 0 if tier > 1 else int(config.params["build_date"])
+ elif rank == "build_date":
+ extra_index["rank"] = int(config.params["build_date"])
+ else:
+ extra_index["rank"] = rank
+
+ if not index:
+ yield task
+ continue
+
+ index_type = index.get("type", "generic")
+ if index_type not in index_builders:
+ raise ValueError(f"Unknown index-type {index_type}")
+ task = index_builders[index_type](config, task)
+
+ del task["index"]
+ yield task
+
+
+@transforms.add
+def build_task(config, tasks):
+ for task in tasks:
+ level = str(config.params["level"])
+
+ provisioner_id, worker_type = get_worker_type(
+ config.graph_config,
+ task["worker-type"],
+ level,
+ )
+ task["worker-type"] = "/".join([provisioner_id, worker_type])
+ project = config.params["project"]
+
+ routes = task.get("routes", [])
+ scopes = [
+ s.format(level=level, project=project) for s in task.get("scopes", [])
+ ]
+
+ # set up extra
+ extra = task.get("extra", {})
+ extra["parent"] = os.environ.get("TASK_ID", "")
+ task_th = task.get("treeherder")
+ if task_th:
+ extra.setdefault("treeherder-platform", task_th["platform"])
+ treeherder = extra.setdefault("treeherder", {})
+
+ machine_platform, collection = task_th["platform"].split("/", 1)
+ treeherder["machine"] = {"platform": machine_platform}
+ treeherder["collection"] = {collection: True}
+
+ group_names = config.graph_config["treeherder"]["group-names"]
+ groupSymbol, symbol = split_symbol(task_th["symbol"])
+ if groupSymbol != "?":
+ treeherder["groupSymbol"] = groupSymbol
+ if groupSymbol not in group_names:
+ path = os.path.join(config.path, task.get("task-from", ""))
+ raise Exception(UNKNOWN_GROUP_NAME.format(groupSymbol, path))
+ treeherder["groupName"] = group_names[groupSymbol]
+ treeherder["symbol"] = symbol
+ if len(symbol) > 25 or len(groupSymbol) > 25:
+ raise RuntimeError(
+ "Treeherder group and symbol names must not be longer than "
+ "25 characters: {} (see {})".format(
+ task_th["symbol"],
+ TC_TREEHERDER_SCHEMA_URL,
+ )
+ )
+ treeherder["jobKind"] = task_th["kind"]
+ treeherder["tier"] = task_th["tier"]
+
+ branch_rev = get_branch_rev(config)
+
+ if config.params["tasks_for"].startswith("github-pull-request"):
+ # In the past we used `project` for this, but that ends up being
+ # set to the repository name of the _head_ repo, which is not correct
+ # (and causes scope issues) if it doesn't match the name of the
+ # base repo
+ base_project = config.params["base_repository"].split("/")[-1]
+ if base_project.endswith(".git"):
+ base_project = base_project[:-4]
+ th_project_suffix = "-pr"
+ else:
+ base_project = config.params["project"]
+ th_project_suffix = ""
+
+ routes.append(
+ "{}.v2.{}.{}.{}".format(
+ TREEHERDER_ROUTE_ROOT,
+ base_project + th_project_suffix,
+ branch_rev,
+ config.params["pushlog_id"],
+ )
+ )
+
+ if "expires-after" not in task:
+ task["expires-after"] = "28 days" if config.params.is_try() else "1 year"
+
+ if "deadline-after" not in task:
+ task["deadline-after"] = "1 day"
+
+ if "priority" not in task:
+ task["priority"] = get_default_priority(
+ config.graph_config, config.params["project"]
+ )
+
+ tags = task.get("tags", {})
+ tags.update(
+ {
+ "createdForUser": config.params["owner"],
+ "kind": config.kind,
+ "label": task["label"],
+ }
+ )
+
+ task_def = {
+ "provisionerId": provisioner_id,
+ "workerType": worker_type,
+ "routes": routes,
+ "created": {"relative-datestamp": "0 seconds"},
+ "deadline": {"relative-datestamp": task["deadline-after"]},
+ "expires": {"relative-datestamp": task["expires-after"]},
+ "scopes": scopes,
+ "metadata": {
+ "description": task["description"],
+ "name": task["label"],
+ "owner": config.params["owner"],
+ "source": config.params.file_url(config.path, pretty=True),
+ },
+ "extra": extra,
+ "tags": tags,
+ "priority": task["priority"],
+ }
+
+ if task.get("requires", None):
+ task_def["requires"] = task["requires"]
+
+ if task_th:
+ # link back to treeherder in description
+ th_push_link = (
+ "https://treeherder.mozilla.org/#/jobs?repo={}&revision={}".format(
+ config.params["project"] + th_project_suffix, branch_rev
+ )
+ )
+ task_def["metadata"]["description"] += " ([Treeherder push]({}))".format(
+ th_push_link
+ )
+
+ # add the payload and adjust anything else as required (e.g., scopes)
+ payload_builders[task["worker"]["implementation"]].builder(
+ config, task, task_def
+ )
+
+ attributes = task.get("attributes", {})
+ # Resolve run-on-projects
+ build_platform = attributes.get("build_platform")
+ resolve_keyed_by(
+ task,
+ "run-on-projects",
+ item_name=task["label"],
+ **{"build-platform": build_platform},
+ )
+ attributes["run_on_projects"] = task.get("run-on-projects", ["all"])
+ attributes["run_on_tasks_for"] = task.get("run-on-tasks-for", ["all"])
+ # We don't want to pollute non git repos with this attribute. Moreover, target_tasks
+ # already assumes the default value is ['all']
+ if task.get("run-on-git-branches"):
+ attributes["run_on_git_branches"] = task["run-on-git-branches"]
+
+ attributes["always_target"] = task["always-target"]
+
+ # Set MOZ_AUTOMATION on all jobs.
+ if task["worker"]["implementation"] in (
+ "generic-worker",
+ "docker-worker",
+ ):
+ payload = task_def.get("payload")
+ if payload:
+ env = payload.setdefault("env", {})
+ env["MOZ_AUTOMATION"] = "1"
+
+ dependencies = task.get("dependencies", {})
+ if_dependencies = task.get("if-dependencies", [])
+ if if_dependencies:
+ for i, dep in enumerate(if_dependencies):
+ if dep in dependencies:
+ if_dependencies[i] = dependencies[dep]
+ continue
+
+ raise Exception(
+ "{label} specifies '{dep}' in if-dependencies, "
+ "but {dep} is not a dependency!".format(
+ label=task["label"], dep=dep
+ )
+ )
+
+ yield {
+ "label": task["label"],
+ "description": task["description"],
+ "task": task_def,
+ "dependencies": dependencies,
+ "if-dependencies": if_dependencies,
+ "soft-dependencies": task.get("soft-dependencies", []),
+ "attributes": attributes,
+ "optimization": task.get("optimization", None),
+ }
+
+
+@transforms.add
+def add_github_checks(config, tasks):
+ """
+ For git repositories, add checks route to all tasks.
+
+ This will be replaced by a configurable option in the future.
+ """
+ if config.params["repository_type"] != "git":
+ for task in tasks:
+ yield task
+
+ for task in tasks:
+ task["task"]["routes"].append("checks")
+ yield task
+
+
+@transforms.add
+def chain_of_trust(config, tasks):
+ for task in tasks:
+ if task["task"].get("payload", {}).get("features", {}).get("chainOfTrust"):
+ image = task.get("dependencies", {}).get("docker-image")
+ if image:
+ cot = (
+ task["task"].setdefault("extra", {}).setdefault("chainOfTrust", {})
+ )
+ cot.setdefault("inputs", {})["docker-image"] = {
+ "task-reference": "<docker-image>"
+ }
+ yield task
+
+
+@transforms.add
+def check_task_identifiers(config, tasks):
+ """Ensures that all tasks have well defined identifiers:
+ ``^[a-zA-Z0-9_-]{1,38}$``
+ """
+ e = re.compile("^[a-zA-Z0-9_-]{1,38}$")
+ for task in tasks:
+ for attrib in ("workerType", "provisionerId"):
+ if not e.match(task["task"][attrib]):
+ raise Exception(
+ "task {}.{} is not a valid identifier: {}".format(
+ task["label"], attrib, task["task"][attrib]
+ )
+ )
+ yield task
+
+
+@transforms.add
+def check_task_dependencies(config, tasks):
+ """Ensures that tasks don't have more than 100 dependencies."""
+ for task in tasks:
+ if len(task["dependencies"]) > MAX_DEPENDENCIES:
+ raise Exception(
+ "task {}/{} has too many dependencies ({} > {})".format(
+ config.kind,
+ task["label"],
+ len(task["dependencies"]),
+ MAX_DEPENDENCIES,
+ )
+ )
+ yield task
+
+
+def check_caches_are_volumes(task):
+ """Ensures that all cache paths are defined as volumes.
+
+ Caches and volumes are the only filesystem locations whose content
+ isn't defined by the Docker image itself. Some caches are optional
+ depending on the job environment. We want paths that are potentially
+ caches to have as similar behavior regardless of whether a cache is
+ used. To help enforce this, we require that all paths used as caches
+ to be declared as Docker volumes. This check won't catch all offenders.
+ But it is better than nothing.
+ """
+ volumes = set(task["worker"]["volumes"])
+ paths = {c["mount-point"] for c in task["worker"].get("caches", [])}
+ missing = paths - volumes
+
+ if not missing:
+ return
+
+ raise Exception(
+ "task %s (image %s) has caches that are not declared as "
+ "Docker volumes: %s "
+ "(have you added them as VOLUMEs in the Dockerfile?)"
+ % (task["label"], task["worker"]["docker-image"], ", ".join(sorted(missing)))
+ )
+
+
+@transforms.add
+def check_run_task_caches(config, tasks):
+ """Audit for caches requiring run-task.
+
+ run-task manages caches in certain ways. If a cache managed by run-task
+ is used by a non run-task task, it could cause problems. So we audit for
+ that and make sure certain cache names are exclusive to run-task.
+
+ IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY
+ CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS
+ THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT.
+ """
+ re_reserved_caches = re.compile(
+ """^
+ (checkouts|tooltool-cache)
+ """,
+ re.VERBOSE,
+ )
+
+ cache_prefix = "{trust_domain}-level-{level}-".format(
+ trust_domain=config.graph_config["trust-domain"],
+ level=config.params["level"],
+ )
+
+ suffix = _run_task_suffix()
+
+ for task in tasks:
+ payload = task["task"].get("payload", {})
+ command = payload.get("command") or [""]
+
+ main_command = command[0] if isinstance(command[0], str) else ""
+ run_task = main_command.endswith("run-task")
+
+ for cache in payload.get("cache", {}):
+ if not cache.startswith(cache_prefix):
+ raise Exception(
+ "{} is using a cache ({}) which is not appropriate "
+ "for its trust-domain and level. It should start with {}.".format(
+ task["label"], cache, cache_prefix
+ )
+ )
+
+ cache = cache[len(cache_prefix) :]
+
+ if not re_reserved_caches.match(cache):
+ continue
+
+ if not run_task:
+ raise Exception(
+ "%s is using a cache (%s) reserved for run-task "
+ "change the task to use run-task or use a different "
+ "cache name" % (task["label"], cache)
+ )
+
+ if not cache.endswith(suffix):
+ raise Exception(
+ "%s is using a cache (%s) reserved for run-task "
+ "but the cache name is not dependent on the contents "
+ "of run-task; change the cache name to conform to the "
+ "naming requirements" % (task["label"], cache)
+ )
+
+ yield task
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
new file mode 100644
index 0000000000..ee59ba4548
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import gzip
+import os
+import stat
+import tarfile
+
+# 2016-01-01T00:00:00+0000
+DEFAULT_MTIME = 1451606400
+
+
+def create_tar_from_files(fp, files):
+ """Create a tar file deterministically.
+
+ Receives a dict mapping names of files in the archive to local filesystem
+ paths or ``mozpack.files.BaseFile`` instances.
+
+ The files will be archived and written to the passed file handle opened
+ for writing.
+
+ Only regular files can be written.
+
+ FUTURE accept a filename argument (or create APIs to write files)
+ """
+ with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf:
+ for archive_path, f in sorted(files.items()):
+ if isinstance(f, str):
+ mode = os.stat(f).st_mode
+ f = open(f, "rb")
+ else:
+ mode = 0o0644
+
+ ti = tarfile.TarInfo(archive_path)
+ ti.mode = mode
+ ti.type = tarfile.REGTYPE
+
+ if not ti.isreg():
+ raise ValueError("not a regular file: %s" % f)
+
+ # Disallow setuid and setgid bits. This is an arbitrary restriction.
+ # However, since we set uid/gid to root:root, setuid and setgid
+ # would be a glaring security hole if the archive were
+ # uncompressed as root.
+ if ti.mode & (stat.S_ISUID | stat.S_ISGID):
+ raise ValueError("cannot add file with setuid or setgid set: " "%s" % f)
+
+ # Set uid, gid, username, and group as deterministic values.
+ ti.uid = 0
+ ti.gid = 0
+ ti.uname = ""
+ ti.gname = ""
+
+ # Set mtime to a constant value.
+ ti.mtime = DEFAULT_MTIME
+
+ f.seek(0, 2)
+ ti.size = f.tell()
+ f.seek(0, 0)
+ # tarfile wants to pass a size argument to read(). So just
+ # wrap/buffer in a proper file object interface.
+ tf.addfile(ti, f)
+
+
+def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
+ """Create a tar.gz file deterministically from files.
+
+ This is a glorified wrapper around ``create_tar_from_files`` that
+ adds gzip compression.
+
+ The passed file handle should be opened for writing in binary mode.
+ When the function returns, all data has been written to the handle.
+ """
+ # Offset 3-7 in the gzip header contains an mtime. Pin it to a known
+ # value so output is deterministic.
+ gf = gzip.GzipFile(
+ filename=filename or "",
+ mode="wb",
+ fileobj=fp,
+ compresslevel=compresslevel,
+ mtime=DEFAULT_MTIME,
+ )
+ with gf:
+ create_tar_from_files(gf, files)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py
new file mode 100644
index 0000000000..cf6f11c573
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py
@@ -0,0 +1,84 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+
+
+def attrmatch(attributes, **kwargs):
+ """Determine whether the given set of task attributes matches. The
+ conditions are given as keyword arguments, where each keyword names an
+ attribute. The keyword value can be a literal, a set, or a callable. A
+ literal must match the attribute exactly. Given a set, the attribute value
+ must be in the set. A callable is called with the attribute value. If an
+ attribute is specified as a keyword argument but not present in the
+ attributes, the result is False."""
+ for kwkey, kwval in kwargs.items():
+ if kwkey not in attributes:
+ return False
+ attval = attributes[kwkey]
+ if isinstance(kwval, set):
+ if attval not in kwval:
+ return False
+ elif callable(kwval):
+ if not kwval(attval):
+ return False
+ elif kwval != attributes[kwkey]:
+ return False
+ return True
+
+
+def keymatch(attributes, target):
+ """Determine if any keys in attributes are a match to target, then return
+ a list of matching values. First exact matches will be checked. Failing
+ that, regex matches and finally a default key.
+ """
+ # exact match
+ if target in attributes:
+ return [attributes[target]]
+
+ # regular expression match
+ matches = [v for k, v in attributes.items() if re.match(k + "$", target)]
+ if matches:
+ return matches
+
+ # default
+ if "default" in attributes:
+ return [attributes["default"]]
+
+ return []
+
+
+def _match_run_on(key, run_on):
+ """
+ Determine whether the given parameter is included in the corresponding `run-on-attribute`.
+ """
+ if "all" in run_on:
+ return True
+ return key in run_on
+
+
+match_run_on_projects = _match_run_on
+match_run_on_tasks_for = _match_run_on
+
+
+def match_run_on_git_branches(git_branch, run_on_git_branches):
+ """
+ Determine whether the given project is included in the `run-on-git-branches` parameter.
+ Allows 'all'.
+ """
+ if "all" in run_on_git_branches:
+ return True
+
+ for expected_git_branch_pattern in run_on_git_branches:
+ if re.match(expected_git_branch_pattern, git_branch):
+ return True
+
+ return False
+
+
+def sorted_unique_list(*args):
+ """Join one or more lists, and return a sorted list of unique members"""
+ combined = set().union(*args)
+ return sorted(combined)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py
new file mode 100644
index 0000000000..974b114902
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import time
+
+TARGET_CACHE_INDEX = "{cache_prefix}.cache.level-{level}.{type}.{name}.hash.{digest}"
+EXTRA_CACHE_INDEXES = [
+ "{cache_prefix}.cache.level-{level}.{type}.{name}.latest",
+ "{cache_prefix}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}",
+]
+
+
+def add_optimization(
+ config, taskdesc, cache_type, cache_name, digest=None, digest_data=None
+):
+ """
+ Allow the results of this task to be cached. This adds index routes to the
+ task so it can be looked up for future runs, and optimization hints so that
+ cached artifacts can be found. Exactly one of `digest` and `digest_data`
+ must be passed.
+
+ :param TransformConfig config: The configuration for the kind being transformed.
+ :param dict taskdesc: The description of the current task.
+ :param str cache_type: The type of task result being cached.
+ :param str cache_name: The name of the object being cached.
+ :param digest: A unique string identifying this version of the artifacts
+ being generated. Typically this will be the hash of inputs to the task.
+ :type digest: bytes or None
+ :param digest_data: A list of bytes representing the inputs of this task.
+ They will be concatenated and hashed to create the digest for this
+ task.
+ :type digest_data: list of bytes or None
+ """
+ if (digest is None) == (digest_data is None):
+ raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+ if digest is None:
+ digest = hashlib.sha256("\n".join(digest_data).encode("utf-8")).hexdigest()
+
+ if "cached-task-prefix" in config.graph_config["taskgraph"]:
+ cache_prefix = config.graph_config["taskgraph"]["cached-task-prefix"]
+ else:
+ cache_prefix = config.graph_config["trust-domain"]
+
+ subs = {
+ "cache_prefix": cache_prefix,
+ "type": cache_type,
+ "name": cache_name,
+ "digest": digest,
+ }
+
+ # We'll try to find a cached version of the toolchain at levels above and
+ # including the current level, starting at the highest level.
+ # Chain-of-trust doesn't handle tasks not built on the tip of a
+ # pull-request, so don't look for level-1 tasks if building a pull-request.
+ index_routes = []
+ min_level = int(config.params["level"])
+ if config.params["tasks_for"] == "github-pull-request":
+ min_level = max(min_level, 3)
+ for level in reversed(range(min_level, 4)):
+ subs["level"] = level
+ index_routes.append(TARGET_CACHE_INDEX.format(**subs))
+
+ taskdesc["optimization"] = {"index-search": index_routes}
+
+ # ... and cache at the lowest level.
+ subs["level"] = config.params["level"]
+ taskdesc.setdefault("routes", []).append(
+ f"index.{TARGET_CACHE_INDEX.format(**subs)}"
+ )
+
+ # ... and add some extra routes for humans
+ subs["build_date_long"] = time.strftime(
+ "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"])
+ )
+ taskdesc["routes"].extend(
+ [f"index.{route.format(**subs)}" for route in EXTRA_CACHE_INDEXES]
+ )
+
+ taskdesc["attributes"]["cached_task"] = {
+ "type": cache_type,
+ "name": cache_name,
+ "digest": digest,
+ }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py
new file mode 100644
index 0000000000..d0e1e1079f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py
@@ -0,0 +1,79 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Utilities for generating a decision task from :file:`.taskcluster.yml`.
+"""
+
+
+import os
+
+import jsone
+import slugid
+import yaml
+
+from .templates import merge
+from .time import current_json_time
+from .vcs import find_hg_revision_push_info
+
+
+def make_decision_task(params, root, context, head_rev=None):
+ """Generate a basic decision task, based on the root .taskcluster.yml"""
+ with open(os.path.join(root, ".taskcluster.yml"), "rb") as f:
+ taskcluster_yml = yaml.safe_load(f)
+
+ if not head_rev:
+ head_rev = params["head_rev"]
+
+ if params["repository_type"] == "hg":
+ pushlog = find_hg_revision_push_info(params["repository_url"], head_rev)
+
+ hg_push_context = {
+ "pushlog_id": pushlog["pushid"],
+ "pushdate": pushlog["pushdate"],
+ "owner": pushlog["user"],
+ }
+ else:
+ hg_push_context = {}
+
+ slugids = {}
+
+ def as_slugid(name):
+ # https://github.com/taskcluster/json-e/issues/164
+ name = name[0]
+ if name not in slugids:
+ slugids[name] = slugid.nice()
+ return slugids[name]
+
+ # provide a similar JSON-e context to what mozilla-taskcluster provides:
+ # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml
+ # but with a different tasks_for and an extra `cron` section
+ context = merge(
+ {
+ "repository": {
+ "url": params["repository_url"],
+ "project": params["project"],
+ "level": params["level"],
+ },
+ "push": merge(
+ {
+ "revision": params["head_rev"],
+ # remainder are fake values, but the decision task expects them anyway
+ "comment": " ",
+ },
+ hg_push_context,
+ ),
+ "now": current_json_time(),
+ "as_slugid": as_slugid,
+ },
+ context,
+ )
+
+ rendered = jsone.render(taskcluster_yml, context)
+ if len(rendered["tasks"]) != 1:
+ raise Exception("Expected .taskcluster.yml to only produce one cron task")
+ task = rendered["tasks"][0]
+
+ task_id = task.pop("taskId")
+ return (task_id, task)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
new file mode 100644
index 0000000000..4b211cc4b3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
@@ -0,0 +1,342 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import io
+import json
+import os
+import re
+import sys
+import urllib.parse
+
+import requests_unixsocket
+
+from taskgraph.util.archive import create_tar_gz_from_files
+from taskgraph.util.memoize import memoize
+
+IMAGE_DIR = os.path.join(".", "taskcluster", "docker")
+
+from .yaml import load_yaml
+
+
+def docker_url(path, **kwargs):
+ docker_socket = os.environ.get("DOCKER_SOCKET", "/var/run/docker.sock")
+ return urllib.parse.urlunparse(
+ (
+ "http+unix",
+ urllib.parse.quote(docker_socket, safe=""),
+ path,
+ "",
+ urllib.parse.urlencode(kwargs),
+ "",
+ )
+ )
+
+
+def post_to_docker(tar, api_path, **kwargs):
+ """POSTs a tar file to a given docker API path.
+
+ The tar argument can be anything that can be passed to requests.post()
+ as data (e.g. iterator or file object).
+ The extra keyword arguments are passed as arguments to the docker API.
+ """
+ req = requests_unixsocket.Session().post(
+ docker_url(api_path, **kwargs),
+ data=tar,
+ stream=True,
+ headers={"Content-Type": "application/x-tar"},
+ )
+ if req.status_code != 200:
+ message = req.json().get("message")
+ if not message:
+ message = f"docker API returned HTTP code {req.status_code}"
+ raise Exception(message)
+ status_line = {}
+
+ buf = b""
+ for content in req.iter_content(chunk_size=None):
+ if not content:
+ continue
+ # Sometimes, a chunk of content is not a complete json, so we cumulate
+ # with leftovers from previous iterations.
+ buf += content
+ try:
+ data = json.loads(buf)
+ except Exception:
+ continue
+ buf = b""
+ # data is sometimes an empty dict.
+ if not data:
+ continue
+ # Mimic how docker itself presents the output. This code was tested
+ # with API version 1.18 and 1.26.
+ if "status" in data:
+ if "id" in data:
+ if sys.stderr.isatty():
+ total_lines = len(status_line)
+ line = status_line.setdefault(data["id"], total_lines)
+ n = total_lines - line
+ if n > 0:
+ # Move the cursor up n lines.
+ sys.stderr.write(f"\033[{n}A")
+ # Clear line and move the cursor to the beginning of it.
+ sys.stderr.write("\033[2K\r")
+ sys.stderr.write(
+ "{}: {} {}\n".format(
+ data["id"], data["status"], data.get("progress", "")
+ )
+ )
+ if n > 1:
+ # Move the cursor down n - 1 lines, which, considering
+ # the carriage return on the last write, gets us back
+ # where we started.
+ sys.stderr.write(f"\033[{n - 1}B")
+ else:
+ status = status_line.get(data["id"])
+ # Only print status changes.
+ if status != data["status"]:
+ sys.stderr.write("{}: {}\n".format(data["id"], data["status"]))
+ status_line[data["id"]] = data["status"]
+ else:
+ status_line = {}
+ sys.stderr.write("{}\n".format(data["status"]))
+ elif "stream" in data:
+ sys.stderr.write(data["stream"])
+ elif "aux" in data:
+ sys.stderr.write(repr(data["aux"]))
+ elif "error" in data:
+ sys.stderr.write("{}\n".format(data["error"]))
+ # Sadly, docker doesn't give more than a plain string for errors,
+ # so the best we can do to propagate the error code from the command
+ # that failed is to parse the error message...
+ errcode = 1
+ m = re.search(r"returned a non-zero code: (\d+)", data["error"])
+ if m:
+ errcode = int(m.group(1))
+ sys.exit(errcode)
+ else:
+ raise NotImplementedError(repr(data))
+ sys.stderr.flush()
+
+
+def docker_image(name, by_tag=False):
+ """
+ Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``,
+ or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`.
+ """
+ try:
+ with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f:
+ registry = f.read().strip()
+ except OSError:
+ with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f:
+ registry = f.read().strip()
+
+ if not by_tag:
+ hashfile = os.path.join(IMAGE_DIR, name, "HASH")
+ try:
+ with open(hashfile) as f:
+ return f"{registry}/{name}@{f.read().strip()}"
+ except OSError:
+ raise Exception(f"Failed to read HASH file {hashfile}")
+
+ try:
+ with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f:
+ tag = f.read().strip()
+ except OSError:
+ tag = "latest"
+ return f"{registry}/{name}:{tag}"
+
+
+class VoidWriter:
+ """A file object with write capabilities that does nothing with the written
+ data."""
+
+ def write(self, buf):
+ pass
+
+
+def generate_context_hash(topsrcdir, image_path, args=None):
+ """Generates a sha256 hash for context directory used to build an image."""
+
+ return stream_context_tar(topsrcdir, image_path, VoidWriter(), args=args)
+
+
+class HashingWriter:
+ """A file object with write capabilities that hashes the written data at
+ the same time it passes down to a real file object."""
+
+ def __init__(self, writer):
+ self._hash = hashlib.sha256()
+ self._writer = writer
+
+ def write(self, buf):
+ self._hash.update(buf)
+ self._writer.write(buf)
+
+ def hexdigest(self):
+ return self._hash.hexdigest()
+
+
+def create_context_tar(topsrcdir, context_dir, out_path, args=None):
+ """Create a context tarball.
+
+ A directory ``context_dir`` containing a Dockerfile will be assembled into
+ a gzipped tar file at ``out_path``.
+
+ We also scan the source Dockerfile for special syntax that influences
+ context generation.
+
+ If a line in the Dockerfile has the form ``# %include <path>``,
+ the relative path specified on that line will be matched against
+ files in the source repository and added to the context under the
+ path ``topsrcdir/``. If an entry is a directory, we add all files
+ under that directory.
+
+ If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
+ the string ``$<name>`` in subsequent lines are replaced with the value
+ found in the ``args`` argument. Exception: this doesn't apply to VOLUME
+ definitions.
+
+ Returns the SHA-256 hex digest of the created archive.
+ """
+ with open(out_path, "wb") as fh:
+ return stream_context_tar(
+ topsrcdir,
+ context_dir,
+ fh,
+ image_name=os.path.basename(out_path),
+ args=args,
+ )
+
+
+RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task")
+RUN_TASK_FILES = {
+ f"run-task/{path}": os.path.join(RUN_TASK_ROOT, path)
+ for path in [
+ "run-task",
+ "fetch-content",
+ "hgrc",
+ "robustcheckout.py",
+ ]
+}
+RUN_TASK_SNIPPET = [
+ "COPY run-task/run-task /usr/local/bin/run-task\n",
+ "COPY run-task/fetch-content /usr/local/bin/fetch-content\n",
+ "COPY run-task/robustcheckout.py /usr/local/mercurial/robustcheckout.py\n"
+ "COPY run-task/hgrc /etc/mercurial/hgrc.d/mozilla.rc\n",
+]
+
+
+def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None):
+ """Like create_context_tar, but streams the tar file to the `out_file` file
+ object."""
+ archive_files = {}
+ replace = []
+ content = []
+
+ topsrcdir = os.path.abspath(topsrcdir)
+ context_dir = os.path.join(topsrcdir, context_dir)
+
+ for root, dirs, files in os.walk(context_dir):
+ for f in files:
+ source_path = os.path.join(root, f)
+ archive_path = source_path[len(context_dir) + 1 :]
+ archive_files[archive_path] = open(source_path, "rb")
+
+ # Parse Dockerfile for special syntax of extra files to include.
+ content = []
+ with open(os.path.join(context_dir, "Dockerfile")) as fh:
+ for line in fh:
+ if line.startswith("# %ARG"):
+ p = line[len("# %ARG ") :].strip()
+ if not args or p not in args:
+ raise Exception(f"missing argument: {p}")
+ replace.append((re.compile(rf"\${p}\b"), args[p]))
+ continue
+
+ for regexp, s in replace:
+ line = re.sub(regexp, s, line)
+
+ content.append(line)
+
+ if not line.startswith("# %include"):
+ continue
+
+ if line.strip() == "# %include-run-task":
+ content.extend(RUN_TASK_SNIPPET)
+ archive_files.update(RUN_TASK_FILES)
+ continue
+
+ p = line[len("# %include ") :].strip()
+ if os.path.isabs(p):
+ raise Exception("extra include path cannot be absolute: %s" % p)
+
+ fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+ # Check for filesystem traversal exploits.
+ if not fs_path.startswith(topsrcdir):
+ raise Exception("extra include path outside topsrcdir: %s" % p)
+
+ if not os.path.exists(fs_path):
+ raise Exception("extra include path does not exist: %s" % p)
+
+ if os.path.isdir(fs_path):
+ for root, dirs, files in os.walk(fs_path):
+ for f in files:
+ source_path = os.path.join(root, f)
+ rel = source_path[len(fs_path) + 1 :]
+ archive_path = os.path.join("topsrcdir", p, rel)
+ archive_files[archive_path] = source_path
+ else:
+ archive_path = os.path.join("topsrcdir", p)
+ archive_files[archive_path] = fs_path
+
+ archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8"))
+
+ writer = HashingWriter(out_file)
+ create_tar_gz_from_files(writer, archive_files, image_name)
+ return writer.hexdigest()
+
+
+@memoize
+def image_paths():
+ """Return a map of image name to paths containing their Dockerfile."""
+ config = load_yaml("taskcluster", "ci", "docker-image", "kind.yml")
+ return {
+ k: os.path.join(IMAGE_DIR, v.get("definition", k))
+ for k, v in config["tasks"].items()
+ }
+
+
+def image_path(name):
+ paths = image_paths()
+ if name in paths:
+ return paths[name]
+ return os.path.join(IMAGE_DIR, name)
+
+
+@memoize
+def parse_volumes(image):
+ """Parse VOLUME entries from a Dockerfile for an image."""
+ volumes = set()
+
+ path = image_path(image)
+
+ with open(os.path.join(path, "Dockerfile"), "rb") as fh:
+ for line in fh:
+ line = line.strip()
+ # We assume VOLUME definitions don't use %ARGS.
+ if not line.startswith(b"VOLUME "):
+ continue
+
+ v = line.split(None, 1)[1]
+ if v.startswith(b"["):
+ raise ValueError(
+ "cannot parse array syntax for VOLUME; "
+ "convert to multiple entries"
+ )
+
+ volumes |= {volume.decode("utf-8") for volume in v.split()}
+
+ return volumes
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
new file mode 100644
index 0000000000..bf786e92e4
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py
@@ -0,0 +1,54 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import hashlib
+from pathlib import Path
+
+from taskgraph.util import path as mozpath
+from taskgraph.util.memoize import memoize
+
+
+@memoize
+def hash_path(path):
+ """Hash a single file.
+
+ Returns the SHA-256 hash in hex form.
+ """
+ with open(path, "rb") as fh:
+ return hashlib.sha256(fh.read()).hexdigest()
+
+
+def _find_files(base_path):
+ for path in Path(base_path).rglob("*"):
+ if path.is_file():
+ yield str(path)
+
+
+def hash_paths(base_path, patterns):
+ """
+ Give a list of path patterns, return a digest of the contents of all
+ the corresponding files, similarly to git tree objects or mercurial
+ manifests.
+
+ Each file is hashed. The list of all hashes and file paths is then
+ itself hashed to produce the result.
+ """
+ h = hashlib.sha256()
+
+ found = set()
+ for pattern in patterns:
+ files = _find_files(base_path)
+ matches = [path for path in files if mozpath.match(path, pattern)]
+ if matches:
+ found.update(matches)
+ else:
+ raise Exception("%s did not match anything" % pattern)
+ for path in sorted(found):
+ h.update(
+ "{} {}\n".format(
+ hash_path(mozpath.abspath(mozpath.join(base_path, path))),
+ mozpath.normsep(path),
+ ).encode("utf-8")
+ )
+ return h.hexdigest()
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py
new file mode 100644
index 0000000000..9b0c5a44fb
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from .attributes import keymatch
+
+
+def evaluate_keyed_by(
+ value, item_name, attributes, defer=None, enforce_single_match=True
+):
+ """
+ For values which can either accept a literal value, or be keyed by some
+ attributes, perform that lookup and return the result.
+
+ For example, given item::
+
+ by-test-platform:
+ macosx-10.11/debug: 13
+ win.*: 6
+ default: 12
+
+ a call to `evaluate_keyed_by(item, 'thing-name', {'test-platform': 'linux96')`
+ would return `12`.
+
+ Items can be nested as deeply as desired::
+
+ by-test-platform:
+ win.*:
+ by-project:
+ ash: ..
+ cedar: ..
+ linux: 13
+ default: 12
+
+ Args:
+ value (str): Name of the value to perform evaluation on.
+ item_name (str): Used to generate useful error messages.
+ attributes (dict): Dictionary of attributes used to lookup 'by-<key>' with.
+ defer (list):
+ Allows evaluating a by-* entry at a later time. In the example
+ above it's possible that the project attribute hasn't been set yet,
+ in which case we'd want to stop before resolving that subkey and
+ then call this function again later. This can be accomplished by
+ setting `defer=["project"]` in this example.
+ enforce_single_match (bool):
+ If True (default), each task may only match a single arm of the
+ evaluation.
+ """
+ while True:
+ if not isinstance(value, dict) or len(value) != 1:
+ return value
+ value_key = next(iter(value))
+ if not value_key.startswith("by-"):
+ return value
+
+ keyed_by = value_key[3:] # strip off 'by-' prefix
+
+ if defer and keyed_by in defer:
+ return value
+
+ key = attributes.get(keyed_by)
+ alternatives = next(iter(value.values()))
+
+ if len(alternatives) == 1 and "default" in alternatives:
+ # Error out when only 'default' is specified as only alternatives,
+ # because we don't need to by-{keyed_by} there.
+ raise Exception(
+ "Keyed-by '{}' unnecessary with only value 'default' "
+ "found, when determining item {}".format(keyed_by, item_name)
+ )
+
+ if key is None:
+ if "default" in alternatives:
+ value = alternatives["default"]
+ continue
+ else:
+ raise Exception(
+ "No attribute {} and no value for 'default' found "
+ "while determining item {}".format(keyed_by, item_name)
+ )
+
+ matches = keymatch(alternatives, key)
+ if enforce_single_match and len(matches) > 1:
+ raise Exception(
+ "Multiple matching values for {} {!r} found while "
+ "determining item {}".format(keyed_by, key, item_name)
+ )
+ elif matches:
+ value = matches[0]
+ continue
+
+ raise Exception(
+ "No {} matching {!r} nor 'default' found while determining item {}".format(
+ keyed_by, key, item_name
+ )
+ )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py
new file mode 100644
index 0000000000..56b513e74c
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#923-949
+
+
+import functools
+
+
+class memoize(dict):
+ """A decorator to memoize the results of function calls depending
+ on its arguments.
+ Both functions and instance methods are handled, although in the
+ instance method case, the results are cache in the instance itself.
+ """
+
+ def __init__(self, func):
+ self.func = func
+ functools.update_wrapper(self, func)
+
+ def __call__(self, *args):
+ if args not in self:
+ self[args] = self.func(*args)
+ return self[args]
+
+ def method_call(self, instance, *args):
+ name = "_%s" % self.func.__name__
+ if not hasattr(instance, name):
+ setattr(instance, name, {})
+ cache = getattr(instance, name)
+ if args not in cache:
+ cache[args] = self.func(instance, *args)
+ return cache[args]
+
+ def __get__(self, instance, cls):
+ return functools.update_wrapper(
+ functools.partial(self.method_call, instance), self.func
+ )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py
new file mode 100644
index 0000000000..6233a98a40
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import re
+
+from taskgraph.util.taskcluster import get_artifact_url
+from taskgraph.util.time import json_time_from_now
+
+TASK_REFERENCE_PATTERN = re.compile("<([^>]+)>")
+ARTIFACT_REFERENCE_PATTERN = re.compile("<([^/]+)/([^>]+)>")
+
+
+def _recurse(val, param_fns):
+ def recurse(val):
+ if isinstance(val, list):
+ return [recurse(v) for v in val]
+ elif isinstance(val, dict):
+ if len(val) == 1:
+ for param_key, param_fn in param_fns.items():
+ if set(val.keys()) == {param_key}:
+ return param_fn(val[param_key])
+ return {k: recurse(v) for k, v in val.items()}
+ else:
+ return val
+
+ return recurse(val)
+
+
+def resolve_timestamps(now, task_def):
+ """Resolve all instances of `{'relative-datestamp': '..'}` in the given task definition"""
+ return _recurse(
+ task_def,
+ {
+ "relative-datestamp": lambda v: json_time_from_now(v, now),
+ },
+ )
+
+
+def resolve_task_references(label, task_def, task_id, decision_task_id, dependencies):
+ """Resolve all instances of ``{'task-reference': '..<..>..'} ``
+ and ``{'artifact-reference`: '..<dependency/artifact/path>..'}``
+ in the given task definition, using the given dependencies.
+ """
+
+ def task_reference(val):
+ def repl(match):
+ key = match.group(1)
+ if key == "self":
+ return task_id
+ elif key == "decision":
+ return decision_task_id
+ try:
+ return dependencies[key]
+ except KeyError:
+ # handle escaping '<'
+ if key == "<":
+ return key
+ raise KeyError(f"task '{label}' has no dependency named '{key}'")
+
+ return TASK_REFERENCE_PATTERN.sub(repl, val)
+
+ def artifact_reference(val):
+ def repl(match):
+ dependency, artifact_name = match.group(1, 2)
+
+ if dependency == "self":
+ raise KeyError(f"task '{label}' can't reference artifacts of self")
+ elif dependency == "decision":
+ task_id = decision_task_id
+ else:
+ try:
+ task_id = dependencies[dependency]
+ except KeyError:
+ raise KeyError(
+ "task '{}' has no dependency named '{}'".format(
+ label, dependency
+ )
+ )
+
+ assert artifact_name.startswith(
+ "public/"
+ ), "artifact-reference only supports public artifacts, not `{}`".format(
+ artifact_name
+ )
+ return get_artifact_url(task_id, artifact_name)
+
+ return ARTIFACT_REFERENCE_PATTERN.sub(repl, val)
+
+ return _recurse(
+ task_def,
+ {
+ "task-reference": task_reference,
+ "artifact-reference": artifact_reference,
+ },
+ )
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py
new file mode 100644
index 0000000000..728b648ac1
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py
@@ -0,0 +1,172 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Like :py:mod:`os.path`, with a reduced set of functions, and with normalized path
+separators (always use forward slashes).
+Also contains a few additional utilities not found in :py:mod:`os.path`.
+"""
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozpack/path.py
+
+
+import os
+import posixpath
+import re
+
+
+def normsep(path):
+ """
+ Normalize path separators, by using forward slashes instead of whatever
+ :py:const:`os.sep` is.
+ """
+ if os.sep != "/":
+ path = path.replace(os.sep, "/")
+ if os.altsep and os.altsep != "/":
+ path = path.replace(os.altsep, "/")
+ return path
+
+
+def relpath(path, start):
+ rel = normsep(os.path.relpath(path, start))
+ return "" if rel == "." else rel
+
+
+def realpath(path):
+ return normsep(os.path.realpath(path))
+
+
+def abspath(path):
+ return normsep(os.path.abspath(path))
+
+
+def join(*paths):
+ return normsep(os.path.join(*paths))
+
+
+def normpath(path):
+ return posixpath.normpath(normsep(path))
+
+
+def dirname(path):
+ return posixpath.dirname(normsep(path))
+
+
+def commonprefix(paths):
+ return posixpath.commonprefix([normsep(path) for path in paths])
+
+
+def basename(path):
+ return os.path.basename(path)
+
+
+def splitext(path):
+ return posixpath.splitext(normsep(path))
+
+
+def split(path):
+ """
+ Return the normalized path as a list of its components.
+
+ ``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']``
+ """
+ return normsep(path).split("/")
+
+
+def basedir(path, bases):
+ """
+ Given a list of directories (`bases`), return which one contains the given
+ path. If several matches are found, the deepest base directory is returned.
+
+ ``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'``
+ (`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match)
+ """
+ path = normsep(path)
+ bases = [normsep(b) for b in bases]
+ if path in bases:
+ return path
+ for b in sorted(bases, reverse=True):
+ if b == "" or path.startswith(b + "/"):
+ return b
+
+
+re_cache = {}
+# Python versions < 3.7 return r'\/' for re.escape('/').
+if re.escape("/") == "/":
+ MATCH_STAR_STAR_RE = re.compile(r"(^|/)\\\*\\\*/")
+ MATCH_STAR_STAR_END_RE = re.compile(r"(^|/)\\\*\\\*$")
+else:
+ MATCH_STAR_STAR_RE = re.compile(r"(^|\\\/)\\\*\\\*\\\/")
+ MATCH_STAR_STAR_END_RE = re.compile(r"(^|\\\/)\\\*\\\*$")
+
+
+def match(path, pattern):
+ """
+ Return whether the given path matches the given pattern.
+ An asterisk can be used to match any string, including the null string, in
+ one part of the path:
+
+ ``foo`` matches ``*``, ``f*`` or ``fo*o``
+
+ However, an asterisk matching a subdirectory may not match the null string:
+
+ ``foo/bar`` does *not* match ``foo/*/bar``
+
+ If the pattern matches one of the ancestor directories of the path, the
+ patch is considered matching:
+
+ ``foo/bar`` matches ``foo``
+
+ Two adjacent asterisks can be used to match files and zero or more
+ directories and subdirectories.
+
+ ``foo/bar`` matches ``foo/**/bar``, or ``**/bar``
+ """
+ if not pattern:
+ return True
+ if pattern not in re_cache:
+ p = re.escape(pattern)
+ p = MATCH_STAR_STAR_RE.sub(r"\1(?:.+/)?", p)
+ p = MATCH_STAR_STAR_END_RE.sub(r"(?:\1.+)?", p)
+ p = p.replace(r"\*", "[^/]*") + "(?:/.*)?$"
+ re_cache[pattern] = re.compile(p)
+ return re_cache[pattern].match(path) is not None
+
+
+def rebase(oldbase, base, relativepath):
+ """
+ Return `relativepath` relative to `base` instead of `oldbase`.
+ """
+ if base == oldbase:
+ return relativepath
+ if len(base) < len(oldbase):
+ assert basedir(oldbase, [base]) == base
+ relbase = relpath(oldbase, base)
+ result = join(relbase, relativepath)
+ else:
+ assert basedir(base, [oldbase]) == oldbase
+ relbase = relpath(base, oldbase)
+ result = relpath(relativepath, relbase)
+ result = normpath(result)
+ if relativepath.endswith("/") and not result.endswith("/"):
+ result += "/"
+ return result
+
+
+def ancestors(path):
+ """Emit the parent directories of a path.
+
+ Args:
+ path (str): Path to emit parents of.
+
+ Yields:
+ str: Path of parent directory.
+ """
+ while path:
+ yield path
+ newpath = os.path.dirname(path)
+ if newpath == path:
+ break
+ path = newpath
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py
new file mode 100644
index 0000000000..3eb61dfbf3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py
@@ -0,0 +1,52 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import inspect
+import os
+
+
+def find_object(path):
+ """
+ Find a Python object given a path of the form <modulepath>:<objectpath>.
+ Conceptually equivalent to
+
+ def find_object(modulepath, objectpath):
+ import <modulepath> as mod
+ return mod.<objectpath>
+ """
+ if path.count(":") != 1:
+ raise ValueError(f'python path {path!r} does not have the form "module:object"')
+
+ modulepath, objectpath = path.split(":")
+ obj = __import__(modulepath)
+ for a in modulepath.split(".")[1:]:
+ obj = getattr(obj, a)
+ for a in objectpath.split("."):
+ obj = getattr(obj, a)
+ return obj
+
+
+def import_sibling_modules(exceptions=None):
+ """
+ Import all Python modules that are siblings of the calling module.
+
+ Args:
+ exceptions (list): A list of file names to exclude (caller and
+ __init__.py are implicitly excluded).
+ """
+ frame = inspect.stack()[1]
+ mod = inspect.getmodule(frame[0])
+
+ name = os.path.basename(mod.__file__)
+ excs = {"__init__.py", name}
+ if exceptions:
+ excs.update(exceptions)
+
+ modpath = mod.__name__
+ if not name.startswith("__init__.py"):
+ modpath = modpath.rsplit(".", 1)[0]
+
+ for f in os.listdir(os.path.dirname(mod.__file__)):
+ if f.endswith(".py") and f not in excs:
+ __import__(modpath + "." + f[:-3])
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py
new file mode 100644
index 0000000000..55d74f479a
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py
@@ -0,0 +1,22 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Imported from
+# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#115-127
+
+
+class ReadOnlyDict(dict):
+ """A read-only dictionary."""
+
+ def __init__(self, *args, **kwargs):
+ dict.__init__(self, *args, **kwargs)
+
+ def __delitem__(self, key):
+ raise Exception("Object does not support deletion.")
+
+ def __setitem__(self, key, value):
+ raise Exception("Object does not support assignment.")
+
+ def update(self, *args, **kwargs):
+ raise Exception("Object does not support update.")
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py
new file mode 100644
index 0000000000..3989f71182
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py
@@ -0,0 +1,260 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import collections
+import pprint
+import re
+
+import voluptuous
+
+import taskgraph
+
+from .keyed_by import evaluate_keyed_by
+
+
+def validate_schema(schema, obj, msg_prefix):
+ """
+ Validate that object satisfies schema. If not, generate a useful exception
+ beginning with msg_prefix.
+ """
+ if taskgraph.fast:
+ return
+ try:
+ schema(obj)
+ except voluptuous.MultipleInvalid as exc:
+ msg = [msg_prefix]
+ for error in exc.errors:
+ msg.append(str(error))
+ raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj))
+
+
+def optionally_keyed_by(*arguments):
+ """
+ Mark a schema value as optionally keyed by any of a number of fields. The
+ schema is the last argument, and the remaining fields are taken to be the
+ field names. For example:
+
+ 'some-value': optionally_keyed_by(
+ 'test-platform', 'build-platform',
+ Any('a', 'b', 'c'))
+
+ The resulting schema will allow nesting of `by-test-platform` and
+ `by-build-platform` in either order.
+ """
+ schema = arguments[-1]
+ fields = arguments[:-1]
+
+ def validator(obj):
+ if isinstance(obj, dict) and len(obj) == 1:
+ k, v = list(obj.items())[0]
+ if k.startswith("by-") and k[len("by-") :] in fields:
+ res = {}
+ for kk, vv in v.items():
+ try:
+ res[kk] = validator(vv)
+ except voluptuous.Invalid as e:
+ e.prepend([k, kk])
+ raise
+ return res
+ return Schema(schema)(obj)
+
+ return validator
+
+
+def resolve_keyed_by(
+ item, field, item_name, defer=None, enforce_single_match=True, **extra_values
+):
+ """
+ For values which can either accept a literal value, or be keyed by some
+ other attribute of the item, perform that lookup and replacement in-place
+ (modifying `item` directly). The field is specified using dotted notation
+ to traverse dictionaries.
+
+ For example, given item::
+
+ job:
+ test-platform: linux128
+ chunks:
+ by-test-platform:
+ macosx-10.11/debug: 13
+ win.*: 6
+ default: 12
+
+ a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])`
+ would mutate item in-place to::
+
+ job:
+ test-platform: linux128
+ chunks: 12
+
+ The `item_name` parameter is used to generate useful error messages.
+
+ If extra_values are supplied, they represent additional values available
+ for reference from by-<field>.
+
+ Items can be nested as deeply as the schema will allow::
+
+ chunks:
+ by-test-platform:
+ win.*:
+ by-project:
+ ash: ..
+ cedar: ..
+ linux: 13
+ default: 12
+
+ Args:
+ item (dict): Object being evaluated.
+ field (str): Name of the key to perform evaluation on.
+ item_name (str): Used to generate useful error messages.
+ defer (list):
+ Allows evaluating a by-* entry at a later time. In the example
+ above it's possible that the project attribute hasn't been set yet,
+ in which case we'd want to stop before resolving that subkey and
+ then call this function again later. This can be accomplished by
+ setting `defer=["project"]` in this example.
+ enforce_single_match (bool):
+ If True (default), each task may only match a single arm of the
+ evaluation.
+ extra_values (kwargs):
+ If supplied, represent additional values available
+ for reference from by-<field>.
+
+ Returns:
+ dict: item which has also been modified in-place.
+ """
+ # find the field, returning the item unchanged if anything goes wrong
+ container, subfield = item, field
+ while "." in subfield:
+ f, subfield = subfield.split(".", 1)
+ if f not in container:
+ return item
+ container = container[f]
+ if not isinstance(container, dict):
+ return item
+
+ if subfield not in container:
+ return item
+
+ container[subfield] = evaluate_keyed_by(
+ value=container[subfield],
+ item_name=f"`{field}` in `{item_name}`",
+ defer=defer,
+ enforce_single_match=enforce_single_match,
+ attributes=dict(item, **extra_values),
+ )
+
+ return item
+
+
+# Schemas for YAML files should use dashed identifiers by default. If there are
+# components of the schema for which there is a good reason to use another format,
+# they can be excepted here.
+EXCEPTED_SCHEMA_IDENTIFIERS = [
+ # upstream-artifacts and artifact-map are handed directly to scriptWorker,
+ # which expects interCaps
+ "upstream-artifacts",
+ "artifact-map",
+]
+
+
+def check_schema(schema):
+ identifier_re = re.compile(r"^\$?[a-z][a-z0-9-]*$")
+
+ def excepted(item):
+ for esi in EXCEPTED_SCHEMA_IDENTIFIERS:
+ if isinstance(esi, str):
+ if f"[{esi!r}]" in item:
+ return True
+ elif esi(item):
+ return True
+ return False
+
+ def iter(path, sch):
+ def check_identifier(path, k):
+ if k in (str,) or k in (str, voluptuous.Extra):
+ pass
+ elif isinstance(k, voluptuous.NotIn):
+ pass
+ elif isinstance(k, str):
+ if not identifier_re.match(k) and not excepted(path):
+ raise RuntimeError(
+ "YAML schemas should use dashed lower-case identifiers, "
+ "not {!r} @ {}".format(k, path)
+ )
+ elif isinstance(k, (voluptuous.Optional, voluptuous.Required)):
+ check_identifier(path, k.schema)
+ elif isinstance(k, (voluptuous.Any, voluptuous.All)):
+ for v in k.validators:
+ check_identifier(path, v)
+ elif not excepted(path):
+ raise RuntimeError(
+ "Unexpected type in YAML schema: {} @ {}".format(
+ type(k).__name__, path
+ )
+ )
+
+ if isinstance(sch, collections.abc.Mapping):
+ for k, v in sch.items():
+ child = f"{path}[{k!r}]"
+ check_identifier(child, k)
+ iter(child, v)
+ elif isinstance(sch, (list, tuple)):
+ for i, v in enumerate(sch):
+ iter(f"{path}[{i}]", v)
+ elif isinstance(sch, voluptuous.Any):
+ for v in sch.validators:
+ iter(path, v)
+
+ iter("schema", schema.schema)
+
+
+class Schema(voluptuous.Schema):
+ """
+ Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks
+ in the process.
+ """
+
+ def __init__(self, *args, check=True, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self.check = check
+ if not taskgraph.fast and self.check:
+ check_schema(self)
+
+ def extend(self, *args, **kwargs):
+ schema = super().extend(*args, **kwargs)
+
+ if self.check:
+ check_schema(schema)
+ # We want twice extend schema to be checked too.
+ schema.__class__ = Schema
+ return schema
+
+ def _compile(self, schema):
+ if taskgraph.fast:
+ return
+ return super()._compile(schema)
+
+ def __getitem__(self, item):
+ return self.schema[item]
+
+
+OptimizationSchema = voluptuous.Any(
+ # always run this task (default)
+ None,
+ # search the index for the given index namespaces, and replace this task if found
+ # the search occurs in order, with the first match winning
+ {"index-search": [str]},
+ # skip this task if none of the given file patterns match
+ {"skip-unless-changed": [str]},
+)
+
+# shortcut for a string where task references are allowed
+taskref_or_string = voluptuous.Any(
+ str,
+ {voluptuous.Required("task-reference"): str},
+ {voluptuous.Required("artifact-reference"): str},
+)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py
new file mode 100644
index 0000000000..d695767f05
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py
@@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+SHELL_QUOTE_RE = re.compile(r"[\\\t\r\n \'\"#<>&|`(){}$;\*\?]")
+
+
+def _quote(s):
+ """Given a string, returns a version that can be used literally on a shell
+ command line, enclosing it with single quotes if necessary.
+
+ As a special case, if given an int, returns a string containing the int,
+ not enclosed in quotes.
+ """
+ if type(s) == int:
+ return "%d" % s
+
+ # Empty strings need to be quoted to have any significance
+ if s and not SHELL_QUOTE_RE.search(s) and not s.startswith("~"):
+ return s
+
+ # Single quoted strings can contain any characters unescaped except the
+ # single quote itself, which can't even be escaped, so the string needs to
+ # be closed, an escaped single quote added, and reopened.
+ t = type(s)
+ return t("'%s'") % s.replace(t("'"), t("'\\''"))
+
+
+def quote(*strings):
+ """Given one or more strings, returns a quoted string that can be used
+ literally on a shell command line.
+
+ >>> quote('a', 'b')
+ "a b"
+ >>> quote('a b', 'c')
+ "'a b' c"
+ """
+ return " ".join(_quote(s) for s in strings)
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py
new file mode 100644
index 0000000000..a830a473b3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py
@@ -0,0 +1,373 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import datetime
+import functools
+import logging
+import os
+
+import requests
+import taskcluster_urls as liburls
+from requests.packages.urllib3.util.retry import Retry
+
+from taskgraph.task import Task
+from taskgraph.util import yaml
+from taskgraph.util.memoize import memoize
+
+logger = logging.getLogger(__name__)
+
+# this is set to true for `mach taskgraph action-callback --test`
+testing = False
+
+# Default rootUrl to use if none is given in the environment; this should point
+# to the production Taskcluster deployment used for CI.
+PRODUCTION_TASKCLUSTER_ROOT_URL = None
+
+# the maximum number of parallel Taskcluster API calls to make
+CONCURRENCY = 50
+
+
+@memoize
+def get_root_url(use_proxy):
+ """Get the current TASKCLUSTER_ROOT_URL.
+
+ When running in a task, this must come from $TASKCLUSTER_ROOT_URL; when run
+ on the command line, a default may be provided that points to the
+ production deployment of Taskcluster. If use_proxy is set, this attempts to
+ get TASKCLUSTER_PROXY_URL instead, failing if it is not set.
+ """
+ if use_proxy:
+ try:
+ return liburls.normalize_root_url(os.environ["TASKCLUSTER_PROXY_URL"])
+ except KeyError:
+ if "TASK_ID" not in os.environ:
+ raise RuntimeError(
+ "taskcluster-proxy is not available when not executing in a task"
+ )
+ else:
+ raise RuntimeError("taskcluster-proxy is not enabled for this task")
+
+ if "TASKCLUSTER_ROOT_URL" in os.environ:
+ logger.debug(
+ "Running in Taskcluster instance {}{}".format(
+ os.environ["TASKCLUSTER_ROOT_URL"],
+ " with taskcluster-proxy"
+ if "TASKCLUSTER_PROXY_URL" in os.environ
+ else "",
+ )
+ )
+ return liburls.normalize_root_url(os.environ["TASKCLUSTER_ROOT_URL"])
+
+ if "TASK_ID" in os.environ:
+ raise RuntimeError("$TASKCLUSTER_ROOT_URL must be set when running in a task")
+
+ if PRODUCTION_TASKCLUSTER_ROOT_URL is None:
+ raise RuntimeError(
+ "Could not detect Taskcluster instance, set $TASKCLUSTER_ROOT_URL"
+ )
+
+ logger.debug("Using default TASKCLUSTER_ROOT_URL")
+ return liburls.normalize_root_url(PRODUCTION_TASKCLUSTER_ROOT_URL)
+
+
+def requests_retry_session(
+ retries,
+ backoff_factor=0.1,
+ status_forcelist=(500, 502, 503, 504),
+ concurrency=CONCURRENCY,
+ session=None,
+):
+ session = session or requests.Session()
+ retry = Retry(
+ total=retries,
+ read=retries,
+ connect=retries,
+ backoff_factor=backoff_factor,
+ status_forcelist=status_forcelist,
+ )
+
+ # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase
+ # that limit. Connections are established as needed, so using a large value
+ # should not negatively impact performance.
+ http_adapter = requests.adapters.HTTPAdapter(
+ pool_connections=concurrency,
+ pool_maxsize=concurrency,
+ max_retries=retry,
+ )
+ session.mount("http://", http_adapter)
+ session.mount("https://", http_adapter)
+
+ return session
+
+
+@memoize
+def get_session():
+ return requests_retry_session(retries=5)
+
+
+def _do_request(url, method=None, **kwargs):
+ if method is None:
+ method = "post" if kwargs else "get"
+
+ session = get_session()
+ if method == "get":
+ kwargs["stream"] = True
+
+ response = getattr(session, method)(url, **kwargs)
+
+ if response.status_code >= 400:
+ # Consume content before raise_for_status, so that the connection can be
+ # reused.
+ response.content
+ response.raise_for_status()
+ return response
+
+
+def _handle_artifact(path, response):
+ if path.endswith(".json"):
+ return response.json()
+ if path.endswith(".yml"):
+ return yaml.load_stream(response.text)
+ response.raw.read = functools.partial(response.raw.read, decode_content=True)
+ return response.raw
+
+
+def get_artifact_url(task_id, path, use_proxy=False):
+ artifact_tmpl = liburls.api(
+ get_root_url(False), "queue", "v1", "task/{}/artifacts/{}"
+ )
+ data = artifact_tmpl.format(task_id, path)
+ if use_proxy:
+ # Until Bug 1405889 is deployed, we can't download directly
+ # from the taskcluster-proxy. Work around by using the /bewit
+ # endpoint instead.
+ # The bewit URL is the body of a 303 redirect, which we don't
+ # want to follow (which fetches a potentially large resource).
+ response = _do_request(
+ os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit",
+ data=data,
+ allow_redirects=False,
+ )
+ return response.text
+ return data
+
+
+def get_artifact(task_id, path, use_proxy=False):
+ """
+ Returns the artifact with the given path for the given task id.
+
+ If the path ends with ".json" or ".yml", the content is deserialized as,
+ respectively, json or yaml, and the corresponding python data (usually
+ dict) is returned.
+ For other types of content, a file-like object is returned.
+ """
+ response = _do_request(get_artifact_url(task_id, path, use_proxy))
+ return _handle_artifact(path, response)
+
+
+def list_artifacts(task_id, use_proxy=False):
+ response = _do_request(get_artifact_url(task_id, "", use_proxy).rstrip("/"))
+ return response.json()["artifacts"]
+
+
+def get_artifact_prefix(task):
+ prefix = None
+ if isinstance(task, dict):
+ prefix = task.get("attributes", {}).get("artifact_prefix")
+ elif isinstance(task, Task):
+ prefix = task.attributes.get("artifact_prefix")
+ else:
+ raise Exception(f"Can't find artifact-prefix of non-task: {task}")
+ return prefix or "public/build"
+
+
+def get_artifact_path(task, path):
+ return f"{get_artifact_prefix(task)}/{path}"
+
+
+def get_index_url(index_path, use_proxy=False, multiple=False):
+ index_tmpl = liburls.api(get_root_url(use_proxy), "index", "v1", "task{}/{}")
+ return index_tmpl.format("s" if multiple else "", index_path)
+
+
+def find_task_id(index_path, use_proxy=False):
+ try:
+ response = _do_request(get_index_url(index_path, use_proxy))
+ except requests.exceptions.HTTPError as e:
+ if e.response.status_code == 404:
+ raise KeyError(f"index path {index_path} not found")
+ raise
+ return response.json()["taskId"]
+
+
+def get_artifact_from_index(index_path, artifact_path, use_proxy=False):
+ full_path = index_path + "/artifacts/" + artifact_path
+ response = _do_request(get_index_url(full_path, use_proxy))
+ return _handle_artifact(full_path, response)
+
+
+def list_tasks(index_path, use_proxy=False):
+ """
+ Returns a list of task_ids where each task_id is indexed under a path
+ in the index. Results are sorted by expiration date from oldest to newest.
+ """
+ results = []
+ data = {}
+ while True:
+ response = _do_request(
+ get_index_url(index_path, use_proxy, multiple=True), json=data
+ )
+ response = response.json()
+ results += response["tasks"]
+ if response.get("continuationToken"):
+ data = {"continuationToken": response.get("continuationToken")}
+ else:
+ break
+
+ # We can sort on expires because in the general case
+ # all of these tasks should be created with the same expires time so they end up in
+ # order from earliest to latest action. If more correctness is needed, consider
+ # fetching each task and sorting on the created date.
+ results.sort(key=lambda t: parse_time(t["expires"]))
+ return [t["taskId"] for t in results]
+
+
+def parse_time(timestamp):
+ """Turn a "JSON timestamp" as used in TC APIs into a datetime"""
+ return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def get_task_url(task_id, use_proxy=False):
+ task_tmpl = liburls.api(get_root_url(use_proxy), "queue", "v1", "task/{}")
+ return task_tmpl.format(task_id)
+
+
+def get_task_definition(task_id, use_proxy=False):
+ response = _do_request(get_task_url(task_id, use_proxy))
+ return response.json()
+
+
+def cancel_task(task_id, use_proxy=False):
+ """Cancels a task given a task_id. In testing mode, just logs that it would
+ have cancelled."""
+ if testing:
+ logger.info(f"Would have cancelled {task_id}.")
+ else:
+ _do_request(get_task_url(task_id, use_proxy) + "/cancel", json={})
+
+
+def status_task(task_id, use_proxy=False):
+ """Gets the status of a task given a task_id.
+
+ In testing mode, just logs that it would have retrieved status.
+
+ Args:
+ task_id (str): A task id.
+ use_proxy (bool): Whether to use taskcluster-proxy (default: False)
+
+ Returns:
+ dict: A dictionary object as defined here:
+ https://docs.taskcluster.net/docs/reference/platform/queue/api#status
+ """
+ if testing:
+ logger.info(f"Would have gotten status for {task_id}.")
+ else:
+ resp = _do_request(get_task_url(task_id, use_proxy) + "/status")
+ status = resp.json().get("status", {})
+ return status
+
+
+def state_task(task_id, use_proxy=False):
+ """Gets the state of a task given a task_id.
+
+ In testing mode, just logs that it would have retrieved state. This is a subset of the
+ data returned by :func:`status_task`.
+
+ Args:
+ task_id (str): A task id.
+ use_proxy (bool): Whether to use taskcluster-proxy (default: False)
+
+ Returns:
+ str: The state of the task, one of
+ ``pending, running, completed, failed, exception, unknown``.
+ """
+ if testing:
+ logger.info(f"Would have gotten state for {task_id}.")
+ else:
+ status = status_task(task_id, use_proxy=use_proxy).get("state") or "unknown"
+ return status
+
+
+def rerun_task(task_id):
+ """Reruns a task given a task_id. In testing mode, just logs that it would
+ have reran."""
+ if testing:
+ logger.info(f"Would have rerun {task_id}.")
+ else:
+ _do_request(get_task_url(task_id, use_proxy=True) + "/rerun", json={})
+
+
+def get_current_scopes():
+ """Get the current scopes. This only makes sense in a task with the Taskcluster
+ proxy enabled, where it returns the actual scopes accorded to the task."""
+ auth_url = liburls.api(get_root_url(True), "auth", "v1", "scopes/current")
+ resp = _do_request(auth_url)
+ return resp.json().get("scopes", [])
+
+
+def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False):
+ url_tmpl = liburls.api(
+ get_root_url(use_proxy), "purge-cache", "v1", "purge-cache/{}/{}"
+ )
+ return url_tmpl.format(provisioner_id, worker_type)
+
+
+def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False):
+ """Requests a cache purge from the purge-caches service."""
+ if testing:
+ logger.info(
+ "Would have purged {}/{}/{}.".format(
+ provisioner_id, worker_type, cache_name
+ )
+ )
+ else:
+ logger.info(f"Purging {provisioner_id}/{worker_type}/{cache_name}.")
+ purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy)
+ _do_request(purge_cache_url, json={"cacheName": cache_name})
+
+
+def send_email(address, subject, content, link, use_proxy=False):
+ """Sends an email using the notify service"""
+ logger.info(f"Sending email to {address}.")
+ url = liburls.api(get_root_url(use_proxy), "notify", "v1", "email")
+ _do_request(
+ url,
+ json={
+ "address": address,
+ "subject": subject,
+ "content": content,
+ "link": link,
+ },
+ )
+
+
+def list_task_group_incomplete_tasks(task_group_id):
+ """Generate the incomplete tasks in a task group"""
+ params = {}
+ while True:
+ url = liburls.api(
+ get_root_url(False),
+ "queue",
+ "v1",
+ f"task-group/{task_group_id}/list",
+ )
+ resp = _do_request(url, method="get", params=params).json()
+ for task in [t["status"] for t in resp["tasks"]]:
+ if task["state"] in ["running", "pending", "unscheduled"]:
+ yield task["taskId"]
+ if resp.get("continuationToken"):
+ params = {"continuationToken": resp.get("continuationToken")}
+ else:
+ break
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py
new file mode 100644
index 0000000000..7b545595ef
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py
@@ -0,0 +1,54 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Tools for interacting with existing taskgraphs.
+"""
+
+
+from taskgraph.util.taskcluster import find_task_id, get_artifact
+
+
+def find_decision_task(parameters, graph_config):
+ """Given the parameters for this action, find the taskId of the decision
+ task"""
+ if parameters.get("repository_type", "hg") == "hg":
+ return find_task_id(
+ "{}.v2.{}.pushlog-id.{}.decision".format(
+ graph_config["trust-domain"],
+ parameters["project"],
+ parameters["pushlog_id"],
+ )
+ )
+ elif parameters["repository_type"] == "git":
+ return find_task_id(
+ "{}.v2.{}.revision.{}.taskgraph.decision".format(
+ graph_config["trust-domain"],
+ parameters["project"],
+ parameters["head_rev"],
+ )
+ )
+ else:
+ raise Exception(
+ "Unknown repository_type {}!".format(parameters["repository_type"])
+ )
+
+
+def find_existing_tasks_from_previous_kinds(
+ full_task_graph, previous_graph_ids, rebuild_kinds
+):
+ """Given a list of previous decision/action taskIds and kinds to ignore
+ from the previous graphs, return a dictionary of labels-to-taskids to use
+ as ``existing_tasks`` in the optimization step."""
+ existing_tasks = {}
+ for previous_graph_id in previous_graph_ids:
+ label_to_taskid = get_artifact(previous_graph_id, "public/label-to-taskid.json")
+ kind_labels = {
+ t.label
+ for t in full_task_graph.tasks.values()
+ if t.attributes["kind"] not in rebuild_kinds
+ }
+ for label in set(label_to_taskid.keys()).intersection(kind_labels):
+ existing_tasks[label] = label_to_taskid[label]
+ return existing_tasks
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py
new file mode 100644
index 0000000000..465e4a43de
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py
@@ -0,0 +1,50 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import copy
+
+
+def merge_to(source, dest):
+ """
+ Merge dict and arrays (override scalar values)
+
+ Keys from source override keys from dest, and elements from lists in source
+ are appended to lists in dest.
+
+ :param dict source: to copy from
+ :param dict dest: to copy to (modified in place)
+ """
+
+ for key, value in source.items():
+ # Override mismatching or empty types
+ if type(value) != type(dest.get(key)): # noqa
+ dest[key] = source[key]
+ continue
+
+ # Merge dict
+ if isinstance(value, dict):
+ merge_to(value, dest[key])
+ continue
+
+ if isinstance(value, list):
+ dest[key] = dest[key] + source[key]
+ continue
+
+ dest[key] = source[key]
+
+ return dest
+
+
+def merge(*objects):
+ """
+ Merge the given objects, using the semantics described for merge_to, with
+ objects later in the list taking precedence. From an inheritance
+ perspective, "parents" should be listed before "children".
+
+ Returns the result without modifying any arguments.
+ """
+ if len(objects) == 1:
+ return copy.deepcopy(objects[0])
+ return merge_to(objects[-1], merge(*objects[:-1]))
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py
new file mode 100644
index 0000000000..e511978b5f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py
@@ -0,0 +1,115 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Python port of the ms.js node module this is not a direct port some things are
+# more complicated or less precise and we lean on time delta here.
+
+
+import datetime
+import re
+
+PATTERN = re.compile(r"((?:\d+)?\.?\d+) *([a-z]+)")
+
+
+def seconds(value):
+ return datetime.timedelta(seconds=int(value))
+
+
+def minutes(value):
+ return datetime.timedelta(minutes=int(value))
+
+
+def hours(value):
+ return datetime.timedelta(hours=int(value))
+
+
+def days(value):
+ return datetime.timedelta(days=int(value))
+
+
+def months(value):
+ # See warning in years(), below
+ return datetime.timedelta(days=int(value) * 30)
+
+
+def years(value):
+ # Warning here "years" are vague don't use this for really sensitive date
+ # computation the idea is to give you a absolute amount of time in the
+ # future which is not the same thing as "precisely on this date next year"
+ return datetime.timedelta(days=int(value) * 365)
+
+
+ALIASES = {}
+ALIASES["seconds"] = ALIASES["second"] = ALIASES["s"] = seconds
+ALIASES["minutes"] = ALIASES["minute"] = ALIASES["min"] = minutes
+ALIASES["hours"] = ALIASES["hour"] = ALIASES["h"] = hours
+ALIASES["days"] = ALIASES["day"] = ALIASES["d"] = days
+ALIASES["months"] = ALIASES["month"] = ALIASES["mo"] = months
+ALIASES["years"] = ALIASES["year"] = ALIASES["y"] = years
+
+
+class InvalidString(Exception):
+ pass
+
+
+class UnknownTimeMeasurement(Exception):
+ pass
+
+
+def value_of(input_str):
+ """
+ Convert a string to a json date in the future
+ :param str input_str: (ex: 1d, 2d, 6years, 2 seconds)
+ :returns: Unit given in seconds
+ """
+
+ matches = PATTERN.search(input_str)
+
+ if matches is None or len(matches.groups()) < 2:
+ raise InvalidString(f"'{input_str}' is invalid string")
+
+ value, unit = matches.groups()
+
+ if unit not in ALIASES:
+ raise UnknownTimeMeasurement(
+ "{} is not a valid time measure use one of {}".format(
+ unit, sorted(ALIASES.keys())
+ )
+ )
+
+ return ALIASES[unit](value)
+
+
+def json_time_from_now(input_str, now=None, datetime_format=False):
+ """
+ :param str input_str: Input string (see value of)
+ :param datetime now: Optionally set the definition of `now`
+ :param boolean datetime_format: Set `True` to get a `datetime` output
+ :returns: JSON string representation of time in future.
+ """
+
+ if now is None:
+ now = datetime.datetime.utcnow()
+
+ time = now + value_of(input_str)
+
+ if datetime_format is True:
+ return time
+ else:
+ # Sorta a big hack but the json schema validator for date does not like the
+ # ISO dates until 'Z' (for timezone) is added...
+ # Microseconds are excluded (see bug 1381801)
+ return time.isoformat(timespec="milliseconds") + "Z"
+
+
+def current_json_time(datetime_format=False):
+ """
+ :param boolean datetime_format: Set `True` to get a `datetime` output
+ :returns: JSON string representation of the current time.
+ """
+ if datetime_format is True:
+ return datetime.datetime.utcnow()
+ else:
+ # Microseconds are excluded (see bug 1381801)
+ return datetime.datetime.utcnow().isoformat(timespec="milliseconds") + "Z"
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py
new file mode 100644
index 0000000000..9d0c032a1b
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py
@@ -0,0 +1,64 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+_JOINED_SYMBOL_RE = re.compile(r"([^(]*)\(([^)]*)\)$")
+
+
+def split_symbol(treeherder_symbol):
+ """Split a symbol expressed as grp(sym) into its two parts. If no group is
+ given, the returned group is '?'"""
+ groupSymbol = "?"
+ symbol = treeherder_symbol
+ if "(" in symbol:
+ match = _JOINED_SYMBOL_RE.match(symbol)
+ if match:
+ groupSymbol, symbol = match.groups()
+ else:
+ raise Exception(f"`{symbol}` is not a valid treeherder symbol.")
+ return groupSymbol, symbol
+
+
+def join_symbol(group, symbol):
+ """Perform the reverse of split_symbol, combining the given group and
+ symbol. If the group is '?', then it is omitted."""
+ if group == "?":
+ return symbol
+ return f"{group}({symbol})"
+
+
+def add_suffix(treeherder_symbol, suffix):
+ """Add a suffix to a treeherder symbol that may contain a group."""
+ group, symbol = split_symbol(treeherder_symbol)
+ symbol += str(suffix)
+ return join_symbol(group, symbol)
+
+
+def replace_group(treeherder_symbol, new_group):
+ """Add a suffix to a treeherder symbol that may contain a group."""
+ _, symbol = split_symbol(treeherder_symbol)
+ return join_symbol(new_group, symbol)
+
+
+def inherit_treeherder_from_dep(job, dep_job):
+ """Inherit treeherder defaults from dep_job"""
+ treeherder = job.get("treeherder", {})
+
+ dep_th_platform = (
+ dep_job.task.get("extra", {})
+ .get("treeherder", {})
+ .get("machine", {})
+ .get("platform", "")
+ )
+ dep_th_collection = list(
+ dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys()
+ )[0]
+ treeherder.setdefault("platform", f"{dep_th_platform}/{dep_th_collection}")
+ treeherder.setdefault(
+ "tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1)
+ )
+ # Does not set symbol
+ treeherder.setdefault("kind", "build")
+ return treeherder
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py
new file mode 100644
index 0000000000..ba1d909019
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py
@@ -0,0 +1,539 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import os
+import re
+import subprocess
+from abc import ABC, abstractmethod, abstractproperty
+from shutil import which
+
+import requests
+from redo import retry
+
+from taskgraph.util.path import ancestors
+
+PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1"
+
+logger = logging.getLogger(__name__)
+
+
+class Repository(ABC):
+ # Both mercurial and git use sha1 as revision idenfiers. Luckily, both define
+ # the same value as the null revision.
+ #
+ # https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7
+ # https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30
+ NULL_REVISION = "0000000000000000000000000000000000000000"
+
+ def __init__(self, path):
+ self.path = path
+ self.binary = which(self.tool)
+ if self.binary is None:
+ raise OSError(f"{self.tool} not found!")
+ self._valid_diff_filter = ("m", "a", "d")
+
+ self._env = os.environ.copy()
+
+ def run(self, *args: str, **kwargs):
+ return_codes = kwargs.pop("return_codes", [])
+ cmd = (self.binary,) + args
+
+ try:
+ return subprocess.check_output(
+ cmd, cwd=self.path, env=self._env, encoding="utf-8", **kwargs
+ )
+ except subprocess.CalledProcessError as e:
+ if e.returncode in return_codes:
+ return ""
+ raise
+
+ @abstractproperty
+ def tool(self) -> str:
+ """Version control system being used, either 'hg' or 'git'."""
+
+ @abstractproperty
+ def head_rev(self) -> str:
+ """Hash of HEAD revision."""
+
+ @abstractproperty
+ def base_rev(self):
+ """Hash of revision the current topic branch is based on."""
+
+ @abstractproperty
+ def branch(self):
+ """Current branch or bookmark the checkout has active."""
+
+ @abstractproperty
+ def all_remote_names(self):
+ """Name of all configured remote repositories."""
+
+ @abstractproperty
+ def default_remote_name(self):
+ """Name the VCS defines for the remote repository when cloning
+ it for the first time. This name may not exist anymore if users
+ changed the default configuration, for instance."""
+
+ @abstractproperty
+ def remote_name(self):
+ """Name of the remote repository."""
+
+ def _get_most_suitable_remote(self, remote_instructions):
+ remotes = self.all_remote_names
+ if len(remotes) == 1:
+ return remotes[0]
+
+ if self.default_remote_name in remotes:
+ return self.default_remote_name
+
+ first_remote = remotes[0]
+ logger.warning(
+ f"Unable to determine which remote repository to use between: {remotes}. "
+ f'Arbitrarily using the first one "{first_remote}". Please set an '
+ f"`{self.default_remote_name}` remote if the arbitrarily selected one "
+ f"is not right. To do so: {remote_instructions}"
+ )
+
+ return first_remote
+
+ @abstractproperty
+ def default_branch(self):
+ """Name of the default branch."""
+
+ @abstractmethod
+ def get_url(self, remote=None):
+ """Get URL of the upstream repository."""
+
+ @abstractmethod
+ def get_commit_message(self, revision=None):
+ """Commit message of specified revision or current commit."""
+
+ @abstractmethod
+ def get_changed_files(self, diff_filter, mode="unstaged", rev=None, base_rev=None):
+ """Return a list of files that are changed in:
+ * either this repository's working copy,
+ * or at a given revision (``rev``)
+ * or between 2 revisions (``base_rev`` and ``rev``)
+
+ ``diff_filter`` controls which kinds of modifications are returned.
+ It is a string which may only contain the following characters:
+
+ A - Include files that were added
+ D - Include files that were deleted
+ M - Include files that were modified
+
+ By default, all three will be included.
+
+ ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an
+ effect on git. Defaults to 'unstaged'.
+
+ ``rev`` is a specifier for which changesets to consider for
+ changes. The exact meaning depends on the vcs system being used.
+
+ ``base_rev`` specifies the range of changesets. This parameter cannot
+ be used without ``rev``. The range includes ``rev`` but excludes
+ ``base_rev``.
+ """
+
+ @abstractmethod
+ def get_outgoing_files(self, diff_filter, upstream):
+ """Return a list of changed files compared to upstream.
+
+ ``diff_filter`` works the same as `get_changed_files`.
+ ``upstream`` is a remote ref to compare against. If unspecified,
+ this will be determined automatically. If there is no remote ref,
+ a MissingUpstreamRepo exception will be raised.
+ """
+
+ @abstractmethod
+ def working_directory_clean(self, untracked=False, ignored=False):
+ """Determine if the working directory is free of modifications.
+
+ Returns True if the working directory does not have any file
+ modifications. False otherwise.
+
+ By default, untracked and ignored files are not considered. If
+ ``untracked`` or ``ignored`` are set, they influence the clean check
+ to factor these file classes into consideration.
+ """
+
+ @abstractmethod
+ def update(self, ref):
+ """Update the working directory to the specified reference."""
+
+ @abstractmethod
+ def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+ """Find the latest revision that is common to both the given
+ ``head_rev`` and ``base_ref_or_rev``"""
+
+ @abstractmethod
+ def does_revision_exist_locally(self, revision):
+ """Check whether this revision exists in the local repository.
+
+ If this function returns an unexpected value, then make sure
+ the revision was fetched from the remote repository."""
+
+
+class HgRepository(Repository):
+ tool = "hg"
+ default_remote_name = "default"
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._env["HGPLAIN"] = "1"
+
+ @property
+ def head_rev(self):
+ return self.run("log", "-r", ".", "-T", "{node}").strip()
+
+ @property
+ def base_rev(self):
+ return self.run("log", "-r", "last(ancestors(.) and public())", "-T", "{node}")
+
+ @property
+ def branch(self):
+ bookmarks_fn = os.path.join(self.path, ".hg", "bookmarks.current")
+ if os.path.exists(bookmarks_fn):
+ with open(bookmarks_fn) as f:
+ bookmark = f.read()
+ return bookmark or None
+
+ return None
+
+ @property
+ def all_remote_names(self):
+ remotes = self.run("paths", "--quiet").splitlines()
+ if not remotes:
+ raise RuntimeError("No remotes defined")
+ return remotes
+
+ @property
+ def remote_name(self):
+ return self._get_most_suitable_remote(
+ "Edit .hg/hgrc and add:\n\n[paths]\ndefault = $URL",
+ )
+
+ @property
+ def default_branch(self):
+ # Mercurial recommends keeping "default"
+ # https://www.mercurial-scm.org/wiki/StandardBranching#Don.27t_use_a_name_other_than_default_for_your_main_development_branch
+ return "default"
+
+ def get_url(self, remote="default"):
+ return self.run("path", "-T", "{url}", remote).strip()
+
+ def get_commit_message(self, revision=None):
+ revision = revision or self.head_rev
+ return self.run("log", "-r", ".", "-T", "{desc}")
+
+ def _format_diff_filter(self, diff_filter, for_status=False):
+ df = diff_filter.lower()
+ assert all(f in self._valid_diff_filter for f in df)
+
+ # When looking at the changes in the working directory, the hg status
+ # command uses 'd' for files that have been deleted with a non-hg
+ # command, and 'r' for files that have been `hg rm`ed. Use both.
+ return df.replace("d", "dr") if for_status else df
+
+ def _files_template(self, diff_filter):
+ template = ""
+ df = self._format_diff_filter(diff_filter)
+ if "a" in df:
+ template += "{file_adds % '{file}\\n'}"
+ if "d" in df:
+ template += "{file_dels % '{file}\\n'}"
+ if "m" in df:
+ template += "{file_mods % '{file}\\n'}"
+ return template
+
+ def get_changed_files(
+ self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
+ ):
+ if rev is None:
+ if base_rev is not None:
+ raise ValueError("Cannot specify `base_rev` without `rev`")
+ # Use --no-status to print just the filename.
+ df = self._format_diff_filter(diff_filter, for_status=True)
+ return self.run("status", "--no-status", f"-{df}").splitlines()
+ else:
+ template = self._files_template(diff_filter)
+ revision_argument = rev if base_rev is None else f"{base_rev}~-1::{rev}"
+ return self.run("log", "-r", revision_argument, "-T", template).splitlines()
+
+ def get_outgoing_files(self, diff_filter="ADM", upstream=None):
+ template = self._files_template(diff_filter)
+
+ if not upstream:
+ return self.run(
+ "log", "-r", "draft() and ancestors(.)", "--template", template
+ ).split()
+
+ return self.run(
+ "outgoing",
+ "-r",
+ ".",
+ "--quiet",
+ "--template",
+ template,
+ upstream,
+ return_codes=(1,),
+ ).split()
+
+ def working_directory_clean(self, untracked=False, ignored=False):
+ args = ["status", "--modified", "--added", "--removed", "--deleted"]
+ if untracked:
+ args.append("--unknown")
+ if ignored:
+ args.append("--ignored")
+
+ # If output is empty, there are no entries of requested status, which
+ # means we are clean.
+ return not len(self.run(*args).strip())
+
+ def update(self, ref):
+ return self.run("update", "--check", ref)
+
+ def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+ return self.run(
+ "log",
+ "-r",
+ f"last(ancestors('{base_ref_or_rev}') and ancestors('{head_rev}'))",
+ "--template",
+ "{node}",
+ ).strip()
+
+ def does_revision_exist_locally(self, revision):
+ try:
+ return self.run("log", "-r", revision).strip() != ""
+ except subprocess.CalledProcessError as e:
+ # Error code 255 comes with the message:
+ # "abort: unknown revision $REVISION"
+ if e.returncode == 255:
+ return False
+ raise
+
+
+class GitRepository(Repository):
+ tool = "git"
+ default_remote_name = "origin"
+
+ _LS_REMOTE_PATTERN = re.compile(r"ref:\s+refs/heads/(?P<branch_name>\S+)\s+HEAD")
+
+ @property
+ def head_rev(self):
+ return self.run("rev-parse", "--verify", "HEAD").strip()
+
+ @property
+ def base_rev(self):
+ refs = self.run(
+ "rev-list", "HEAD", "--topo-order", "--boundary", "--not", "--remotes"
+ ).splitlines()
+ if refs:
+ return refs[-1][1:] # boundary starts with a prefix `-`
+ return self.head_rev
+
+ @property
+ def branch(self):
+ return self.run("branch", "--show-current").strip() or None
+
+ @property
+ def all_remote_names(self):
+ remotes = self.run("remote").splitlines()
+ if not remotes:
+ raise RuntimeError("No remotes defined")
+ return remotes
+
+ @property
+ def remote_name(self):
+ try:
+ remote_branch_name = self.run(
+ "rev-parse", "--verify", "--abbrev-ref", "--symbolic-full-name", "@{u}"
+ ).strip()
+ return remote_branch_name.split("/")[0]
+ except subprocess.CalledProcessError as e:
+ # Error code 128 comes with the message:
+ # "fatal: no upstream configured for branch $BRANCH"
+ if e.returncode != 128:
+ raise
+
+ return self._get_most_suitable_remote("`git remote add origin $URL`")
+
+ @property
+ def default_branch(self):
+ try:
+ # this one works if the current repo was cloned from an existing
+ # repo elsewhere
+ return self._get_default_branch_from_cloned_metadata()
+ except (subprocess.CalledProcessError, RuntimeError):
+ pass
+
+ try:
+ # This call works if you have (network) access to the repo
+ return self._get_default_branch_from_remote_query()
+ except (subprocess.CalledProcessError, RuntimeError):
+ pass
+
+ # this one is the last resort in case the remote is not accessible and
+ # the local repo is where `git init` was made
+ return self._guess_default_branch()
+
+ def _get_default_branch_from_remote_query(self):
+ # This function requires network access to the repo
+ remote_name = self.remote_name
+ output = self.run("ls-remote", "--symref", remote_name, "HEAD")
+ matches = self._LS_REMOTE_PATTERN.search(output)
+ if not matches:
+ raise RuntimeError(
+ f'Could not find the default branch of remote repository "{remote_name}". '
+ "Got: {output}"
+ )
+
+ branch_name = matches.group("branch_name")
+ return f"{remote_name}/{branch_name}"
+
+ def _get_default_branch_from_cloned_metadata(self):
+ return self.run("rev-parse", "--abbrev-ref", f"{self.remote_name}/HEAD").strip()
+
+ def _guess_default_branch(self):
+ branches = [
+ line.strip()
+ for line in self.run(
+ "branch", "--all", "--no-color", "--format=%(refname)"
+ ).splitlines()
+ for candidate_branch in ("main", "master", "branches/default/tip")
+ if line.strip().endswith(candidate_branch)
+ ]
+
+ if len(branches) == 1:
+ return branches[0]
+
+ raise RuntimeError(f"Unable to find default branch. Got: {branches}")
+
+ def get_url(self, remote="origin"):
+ return self.run("remote", "get-url", remote).strip()
+
+ def get_commit_message(self, revision=None):
+ revision = revision or self.head_rev
+ return self.run("log", "-n1", "--format=%B")
+
+ def get_changed_files(
+ self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None
+ ):
+ assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
+
+ if rev is None:
+ if base_rev is not None:
+ raise ValueError("Cannot specify `base_rev` without `rev`")
+ cmd = ["diff"]
+ if mode == "staged":
+ cmd.append("--cached")
+ elif mode == "all":
+ cmd.append("HEAD")
+ else:
+ revision_argument = (
+ f"{rev}~1..{rev}" if base_rev is None else f"{base_rev}..{rev}"
+ )
+ cmd = ["log", "--format=format:", revision_argument]
+
+ cmd.append("--name-only")
+ cmd.append("--diff-filter=" + diff_filter.upper())
+
+ files = self.run(*cmd).splitlines()
+ return [f for f in files if f]
+
+ def get_outgoing_files(self, diff_filter="ADM", upstream=None):
+ assert all(f.lower() in self._valid_diff_filter for f in diff_filter)
+
+ not_condition = upstream if upstream else "--remotes"
+
+ files = self.run(
+ "log",
+ "--name-only",
+ f"--diff-filter={diff_filter.upper()}",
+ "--oneline",
+ "--pretty=format:",
+ "HEAD",
+ "--not",
+ not_condition,
+ ).splitlines()
+ return [f for f in files if f]
+
+ def working_directory_clean(self, untracked=False, ignored=False):
+ args = ["status", "--porcelain"]
+
+ # Even in --porcelain mode, behavior is affected by the
+ # ``status.showUntrackedFiles`` option, which means we need to be
+ # explicit about how to treat untracked files.
+ if untracked:
+ args.append("--untracked-files=all")
+ else:
+ args.append("--untracked-files=no")
+
+ if ignored:
+ args.append("--ignored")
+
+ # If output is empty, there are no entries of requested status, which
+ # means we are clean.
+ return not len(self.run(*args).strip())
+
+ def update(self, ref):
+ self.run("checkout", ref)
+
+ def find_latest_common_revision(self, base_ref_or_rev, head_rev):
+ return self.run("merge-base", base_ref_or_rev, head_rev).strip()
+
+ def does_revision_exist_locally(self, revision):
+ try:
+ return self.run("cat-file", "-t", revision).strip() == "commit"
+ except subprocess.CalledProcessError as e:
+ # Error code 128 comes with the message:
+ # "git cat-file: could not get object info"
+ if e.returncode == 128:
+ return False
+ raise
+
+
+def get_repository(path):
+ """Get a repository object for the repository at `path`.
+ If `path` is not a known VCS repository, raise an exception.
+ """
+ for path in ancestors(path):
+ if os.path.isdir(os.path.join(path, ".hg")):
+ return HgRepository(path)
+ elif os.path.exists(os.path.join(path, ".git")):
+ return GitRepository(path)
+
+ raise RuntimeError("Current directory is neither a git or hg repository")
+
+
+def find_hg_revision_push_info(repository, revision):
+ """Given the parameters for this action and a revision, find the
+ pushlog_id of the revision."""
+ pushlog_url = PUSHLOG_TMPL.format(repository, revision)
+
+ def query_pushlog(url):
+ r = requests.get(pushlog_url, timeout=60)
+ r.raise_for_status()
+ return r
+
+ r = retry(
+ query_pushlog,
+ args=(pushlog_url,),
+ attempts=5,
+ sleeptime=10,
+ )
+ pushes = r.json()["pushes"]
+ if len(pushes) != 1:
+ raise RuntimeError(
+ "Unable to find a single pushlog_id for {} revision {}: {}".format(
+ repository, revision, pushes
+ )
+ )
+ pushid = list(pushes.keys())[0]
+ return {
+ "pushdate": pushes[pushid]["date"],
+ "pushid": pushid,
+ "user": pushes[pushid]["user"],
+ }
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py
new file mode 100644
index 0000000000..5911914f13
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py
@@ -0,0 +1,283 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+import sys
+from abc import ABC, abstractmethod
+
+import attr
+
+from taskgraph.config import GraphConfig
+from taskgraph.parameters import Parameters
+from taskgraph.taskgraph import TaskGraph
+from taskgraph.util.attributes import match_run_on_projects
+from taskgraph.util.treeherder import join_symbol
+
+logger = logging.getLogger(__name__)
+
+
+@attr.s(frozen=True)
+class Verification(ABC):
+ func = attr.ib()
+
+ @abstractmethod
+ def verify(self, **kwargs) -> None:
+ pass
+
+
+@attr.s(frozen=True)
+class InitialVerification(Verification):
+ """Verification that doesn't depend on any generation state."""
+
+ def verify(self):
+ self.func()
+
+
+@attr.s(frozen=True)
+class GraphVerification(Verification):
+ """Verification for a TaskGraph object."""
+
+ run_on_projects = attr.ib(default=None)
+
+ def verify(
+ self, graph: TaskGraph, graph_config: GraphConfig, parameters: Parameters
+ ):
+ if self.run_on_projects and not match_run_on_projects(
+ parameters["project"], self.run_on_projects
+ ):
+ return
+
+ scratch_pad = {}
+ graph.for_each_task(
+ self.func,
+ scratch_pad=scratch_pad,
+ graph_config=graph_config,
+ parameters=parameters,
+ )
+ self.func(
+ None,
+ graph,
+ scratch_pad=scratch_pad,
+ graph_config=graph_config,
+ parameters=parameters,
+ )
+
+
+@attr.s(frozen=True)
+class ParametersVerification(Verification):
+ """Verification for a set of parameters."""
+
+ def verify(self, parameters: Parameters):
+ self.func(parameters)
+
+
+@attr.s(frozen=True)
+class KindsVerification(Verification):
+ """Verification for kinds."""
+
+ def verify(self, kinds: dict):
+ self.func(kinds)
+
+
+@attr.s(frozen=True)
+class VerificationSequence:
+ """
+ Container for a sequence of verifications over a TaskGraph. Each
+ verification is represented as a callable taking (task, taskgraph,
+ scratch_pad), called for each task in the taskgraph, and one more
+ time with no task but with the taskgraph and the same scratch_pad
+ that was passed for each task.
+ """
+
+ _verifications = attr.ib(factory=dict)
+ _verification_types = {
+ "graph": GraphVerification,
+ "initial": InitialVerification,
+ "kinds": KindsVerification,
+ "parameters": ParametersVerification,
+ }
+
+ def __call__(self, name, *args, **kwargs):
+ for verification in self._verifications.get(name, []):
+ verification.verify(*args, **kwargs)
+
+ def add(self, name, **kwargs):
+ cls = self._verification_types.get(name, GraphVerification)
+
+ def wrap(func):
+ self._verifications.setdefault(name, []).append(cls(func, **kwargs))
+ return func
+
+ return wrap
+
+
+verifications = VerificationSequence()
+
+
+@verifications.add("full_task_graph")
+def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, parameters):
+ """
+ This function verifies that tuple
+ (collection.keys(), machine.platform, groupSymbol, symbol) is unique
+ for a target task graph.
+ """
+ if task is None:
+ return
+ task_dict = task.task
+ if "extra" in task_dict:
+ extra = task_dict["extra"]
+ if "treeherder" in extra:
+ treeherder = extra["treeherder"]
+
+ collection_keys = tuple(sorted(treeherder.get("collection", {}).keys()))
+ if len(collection_keys) != 1:
+ raise Exception(
+ "Task {} can't be in multiple treeherder collections "
+ "(the part of the platform after `/`): {}".format(
+ task.label, collection_keys
+ )
+ )
+ platform = treeherder.get("machine", {}).get("platform")
+ group_symbol = treeherder.get("groupSymbol")
+ symbol = treeherder.get("symbol")
+
+ key = (platform, collection_keys[0], group_symbol, symbol)
+ if key in scratch_pad:
+ raise Exception(
+ "Duplicate treeherder platform and symbol in tasks "
+ "`{}`and `{}`: {} {}".format(
+ task.label,
+ scratch_pad[key],
+ f"{platform}/{collection_keys[0]}",
+ join_symbol(group_symbol, symbol),
+ )
+ )
+ else:
+ scratch_pad[key] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_trust_domain_v2_routes(
+ task, taskgraph, scratch_pad, graph_config, parameters
+):
+ """
+ This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes.
+ """
+ if task is None:
+ return
+ route_prefix = "index.{}.v2".format(graph_config["trust-domain"])
+ task_dict = task.task
+ routes = task_dict.get("routes", [])
+
+ for route in routes:
+ if route.startswith(route_prefix):
+ if route in scratch_pad:
+ raise Exception(
+ "conflict between {}:{} for route: {}".format(
+ task.label, scratch_pad[route], route
+ )
+ )
+ else:
+ scratch_pad[route] = task.label
+
+
+@verifications.add("full_task_graph")
+def verify_routes_notification_filters(
+ task, taskgraph, scratch_pad, graph_config, parameters
+):
+ """
+ This function ensures that only understood filters for notifications are
+ specified.
+
+ See: https://docs.taskcluster.net/reference/core/taskcluster-notify/docs/usage
+ """
+ if task is None:
+ return
+ route_prefix = "notify."
+ valid_filters = ("on-any", "on-completed", "on-failed", "on-exception")
+ task_dict = task.task
+ routes = task_dict.get("routes", [])
+
+ for route in routes:
+ if route.startswith(route_prefix):
+ # Get the filter of the route
+ route_filter = route.split(".")[-1]
+ if route_filter not in valid_filters:
+ raise Exception(
+ "{} has invalid notification filter ({})".format(
+ task.label, route_filter
+ )
+ )
+
+
+@verifications.add("full_task_graph")
+def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, parameters):
+ tiers = scratch_pad
+ if task is not None:
+ tiers[task.label] = (
+ task.task.get("extra", {}).get("treeherder", {}).get("tier", sys.maxsize)
+ )
+ else:
+
+ def printable_tier(tier):
+ if tier == sys.maxsize:
+ return "unknown"
+ return tier
+
+ for task in taskgraph.tasks.values():
+ tier = tiers[task.label]
+ for d in task.dependencies.values():
+ if taskgraph[d].task.get("workerType") == "always-optimized":
+ continue
+ if "dummy" in taskgraph[d].kind:
+ continue
+ if tier < tiers[d]:
+ raise Exception(
+ "{} (tier {}) cannot depend on {} (tier {})".format(
+ task.label,
+ printable_tier(tier),
+ d,
+ printable_tier(tiers[d]),
+ )
+ )
+
+
+@verifications.add("full_task_graph")
+def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameters):
+ """
+ This function verifies that toolchain aliases are not reused.
+ """
+ if task is None:
+ return
+ attributes = task.attributes
+ if "toolchain-alias" in attributes:
+ keys = attributes["toolchain-alias"]
+ if not keys:
+ keys = []
+ elif isinstance(keys, str):
+ keys = [keys]
+ for key in keys:
+ if key in scratch_pad:
+ raise Exception(
+ "Duplicate toolchain-alias in tasks "
+ "`{}`and `{}`: {}".format(
+ task.label,
+ scratch_pad[key],
+ key,
+ )
+ )
+ else:
+ scratch_pad[key] = task.label
+
+
+@verifications.add("optimized_task_graph")
+def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters):
+ """
+ This function ensures that always-optimized tasks have been optimized.
+ """
+ if task is None:
+ return
+ if task.task.get("workerType") == "always-optimized":
+ raise Exception(f"Could not optimize the task {task.label!r}")
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py
new file mode 100644
index 0000000000..d71f7e06a3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py
@@ -0,0 +1,75 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import attr
+
+from .keyed_by import evaluate_keyed_by
+from .memoize import memoize
+
+
+@attr.s
+class _BuiltinWorkerType:
+ provisioner = attr.ib(str)
+ worker_type = attr.ib(str)
+
+ @property
+ def implementation(self):
+ """
+ Since the list of built-in worker-types is small and fixed, we can get
+ away with punning the implementation name (in
+ `taskgraph.transforms.task`) and the worker_type.
+ """
+ return self.worker_type
+
+
+_BUILTIN_TYPES = {
+ "always-optimized": _BuiltinWorkerType("invalid", "always-optimized"),
+ "succeed": _BuiltinWorkerType("built-in", "succeed"),
+}
+
+
+@memoize
+def worker_type_implementation(graph_config, worker_type):
+ """Get the worker implementation and OS for the given workerType, where the
+ OS represents the host system, not the target OS, in the case of
+ cross-compiles."""
+ if worker_type in _BUILTIN_TYPES:
+ # For the built-in worker-types, we use an `implementation that matches
+ # the worker-type.
+ return _BUILTIN_TYPES[worker_type].implementation, None
+ worker_config = evaluate_keyed_by(
+ {"by-worker-type": graph_config["workers"]["aliases"]},
+ "worker-types.yml",
+ {"worker-type": worker_type},
+ )
+ return worker_config["implementation"], worker_config.get("os")
+
+
+@memoize
+def get_worker_type(graph_config, alias, level):
+ """
+ Get the worker type based, evaluating aliases from the graph config.
+ """
+ if alias in _BUILTIN_TYPES:
+ builtin_type = _BUILTIN_TYPES[alias]
+ return builtin_type.provisioner, builtin_type.worker_type
+
+ level = str(level)
+ worker_config = evaluate_keyed_by(
+ {"by-alias": graph_config["workers"]["aliases"]},
+ "graph_config.workers.aliases",
+ {"alias": alias},
+ )
+ provisioner = evaluate_keyed_by(
+ worker_config["provisioner"],
+ alias,
+ {"level": level},
+ ).format(level=level)
+ worker_type = evaluate_keyed_by(
+ worker_config["worker-type"],
+ alias,
+ {"level": level},
+ ).format(level=level, alias=alias)
+ return provisioner, worker_type
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py
new file mode 100644
index 0000000000..141c7a16d3
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import os
+
+from yaml.loader import SafeLoader
+
+
+class UnicodeLoader(SafeLoader):
+ def construct_yaml_str(self, node):
+ return self.construct_scalar(node)
+
+
+UnicodeLoader.add_constructor("tag:yaml.org,2002:str", UnicodeLoader.construct_yaml_str)
+
+
+def load_stream(stream):
+ """
+ Parse the first YAML document in a stream
+ and produce the corresponding Python object.
+ """
+ loader = UnicodeLoader(stream)
+ try:
+ return loader.get_single_data()
+ finally:
+ loader.dispose()
+
+
+def load_yaml(*parts):
+ """Convenience function to load a YAML file in the given path. This is
+ useful for loading kind configuration files from the kind path."""
+ filename = os.path.join(*parts)
+ with open(filename, "rb") as f:
+ return load_stream(f)