1 files changed, 263 insertions, 0 deletions
diff --git a/taskcluster/gecko_taskgraph/morph.py b/taskcluster/gecko_taskgraph/morph.py
new file mode 100644
index 0000000000..1d03ddaab6
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/morph.py
@@ -0,0 +1,263 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Graph morphs are modifications to task-graphs that take place *after* the
+optimization phase.
+
+These graph morphs are largely invisible to developers running `./mach`
+locally, so they should be limited to changes that do not modify the meaning of
+the graph.
+"""
+
+# Note that the translation of `{'task-reference': '..'}` and
+# `artifact-reference` are handled in the optimization phase (since
+# optimization involves dealing with taskIds directly).  Similarly,
+# `{'relative-datestamp': '..'}` is handled at the last possible moment during
+# task creation.
+
+
+import copy
+import logging
+import os
+import re
+
+from slugid import nice as slugid
+from taskgraph.graph import Graph
+from taskgraph.morph import register_morph
+from taskgraph.task import Task
+from taskgraph.taskgraph import TaskGraph
+
+from .util.workertypes import get_worker_type
+
+here = os.path.abspath(os.path.dirname(__file__))
+logger = logging.getLogger(__name__)
+MAX_ROUTES = 10
+
+
+def amend_taskgraph(taskgraph, label_to_taskid, to_add):
+    """Add the given tasks to the taskgraph, returning a new taskgraph"""
+    new_tasks = taskgraph.tasks.copy()
+    new_edges = set(taskgraph.graph.edges)
+    for task in to_add:
+        new_tasks[task.task_id] = task
+        assert task.label not in label_to_taskid
+        label_to_taskid[task.label] = task.task_id
+        for depname, dep in task.dependencies.items():
+            new_edges.add((task.task_id, dep, depname))
+
+    taskgraph = TaskGraph(new_tasks, Graph(set(new_tasks), new_edges))
+    return taskgraph, label_to_taskid
+
+
+def derive_misc_task(
+    target_task,
+    purpose,
+    image,
+    taskgraph,
+    label_to_taskid,
+    parameters,
+    graph_config,
+    dependencies,
+):
+    """Create the shell of a task that depends on `dependencies` and on the given docker
+    image."""
+    label = f"{purpose}-{target_task.label}"
+
+    # this is why all docker image tasks are included in the target task graph: we
+    # need to find them in label_to_taskid, even if nothing else required them
+    image_taskid = label_to_taskid["docker-image-" + image]
+
+    provisioner_id, worker_type = get_worker_type(
+        graph_config,
+        parameters,
+        "misc",
+    )
+
+    deps = copy.copy(dependencies)
+    deps["docker-image"] = image_taskid
+
+    task_def = {
+        "provisionerId": provisioner_id,
+        "workerType": worker_type,
+        "dependencies": [d for d in deps.values()],
+        "created": {"relative-datestamp": "0 seconds"},
+        "deadline": target_task.task["deadline"],
+        # no point existing past the parent task's deadline
+        "expires": target_task.task["deadline"],
+        "metadata": {
+            "name": label,
+            "description": f"{purpose} for {target_task.description}",
+            "owner": target_task.task["metadata"]["owner"],
+            "source": target_task.task["metadata"]["source"],
+        },
+        "scopes": [],
+        "payload": {
+            "image": {
+                "path": "public/image.tar.zst",
+                "taskId": image_taskid,
+                "type": "task-image",
+            },
+            "features": {"taskclusterProxy": True},
+            "maxRunTime": 600,
+        },
+    }
+
+    if image_taskid not in taskgraph.tasks:
+        # The task above depends on the replaced docker-image not one in
+        # this current graph.
+        del deps["docker-image"]
+
+    task = Task(
+        kind="misc",
+        label=label,
+        attributes={},
+        task=task_def,
+        dependencies=deps,
+    )
+    task.task_id = slugid()
+    return task
+
+
+# these regular expressions capture route prefixes for which we have a star
+# scope, allowing them to be summarized.  Each should correspond to a star scope
+# in each Gecko `assume:repo:hg.mozilla.org/...` role.
+SCOPE_SUMMARY_REGEXPS = [
+    re.compile(r"(index:insert-task:docker\.images\.v1\.[^.]*\.).*"),
+    re.compile(r"(index:insert-task:gecko\.v2\.[^.]*\.).*"),
+    re.compile(r"(index:insert-task:comm\.v2\.[^.]*\.).*"),
+]
+
+
+def make_index_task(
+    parent_task,
+    taskgraph,
+    label_to_taskid,
+    parameters,
+    graph_config,
+    index_paths,
+    index_rank,
+    purpose,
+    dependencies,
+):
+    task = derive_misc_task(
+        parent_task,
+        purpose,
+        "index-task",
+        taskgraph,
+        label_to_taskid,
+        parameters,
+        graph_config,
+        dependencies,
+    )
+
+    # we need to "summarize" the scopes, otherwise a particularly
+    # namespace-heavy index task might have more scopes than can fit in a
+    # temporary credential.
+    scopes = set()
+    for path in index_paths:
+        scope = f"index:insert-task:{path}"
+        for summ_re in SCOPE_SUMMARY_REGEXPS:
+            match = summ_re.match(scope)
+            if match:
+                scope = match.group(1) + "*"
+                break
+        scopes.add(scope)
+    task.task["scopes"] = sorted(scopes)
+
+    task.task["payload"]["command"] = ["insert-indexes.js"] + index_paths
+    task.task["payload"]["env"] = {
+        "TARGET_TASKID": parent_task.task_id,
+        "INDEX_RANK": index_rank,
+    }
+    return task
+
+
+@register_morph
+def add_index_tasks(taskgraph, label_to_taskid, parameters, graph_config):
+    """
+    The TaskCluster queue only allows 10 routes on a task, but we have tasks
+    with many more routes, for purposes of indexing. This graph morph adds
+    "index tasks" that depend on such tasks and do the index insertions
+    directly, avoiding the limits on task.routes.
+    """
+    logger.debug("Morphing: adding index tasks")
+
+    # Add indexes for tasks that exceed MAX_ROUTES.
+    added = []
+    for label, task in taskgraph.tasks.items():
+        if len(task.task.get("routes", [])) <= MAX_ROUTES:
+            continue
+        index_paths = [
+            r.split(".", 1)[1] for r in task.task["routes"] if r.startswith("index.")
+        ]
+        task.task["routes"] = [
+            r for r in task.task["routes"] if not r.startswith("index.")
+        ]
+        added.append(
+            make_index_task(
+                task,
+                taskgraph,
+                label_to_taskid,
+                parameters,
+                graph_config,
+                index_paths=index_paths,
+                index_rank=task.task.get("extra", {}).get("index", {}).get("rank", 0),
+                purpose="index-task",
+                dependencies={"parent": task.task_id},
+            )
+        )
+
+    if added:
+        taskgraph, label_to_taskid = amend_taskgraph(taskgraph, label_to_taskid, added)
+        logger.info(f"Added {len(added)} index tasks")
+
+    return taskgraph, label_to_taskid
+
+
+@register_morph
+def add_eager_cache_index_tasks(taskgraph, label_to_taskid, parameters, graph_config):
+    """
+    Some tasks (e.g. cached tasks) we want to exist in the index before they even
+    run/complete. Our current use is to allow us to depend on an unfinished cached
+    task in future pushes. This graph morph adds "eager-index tasks" that depend on
+    the decision task and do the index insertions directly, which does not need to
+    wait on the pointed at task to complete.
+    """
+    logger.debug("Morphing: Adding eager cached index's")
+
+    added = []
+    for label, task in taskgraph.tasks.items():
+        if "eager_indexes" not in task.attributes:
+            continue
+        eager_indexes = task.attributes["eager_indexes"]
+        added.append(
+            make_index_task(
+                task,
+                taskgraph,
+                label_to_taskid,
+                parameters,
+                graph_config,
+                index_paths=eager_indexes,
+                index_rank=0,  # Be sure complete tasks get priority
+                purpose="eager-index",
+                dependencies={},
+            )
+        )
+
+    if added:
+        taskgraph, label_to_taskid = amend_taskgraph(taskgraph, label_to_taskid, added)
+        logger.info(f"Added {len(added)} eager index tasks")
+    return taskgraph, label_to_taskid
+
+
+@register_morph
+def add_try_task_duplicates(taskgraph, label_to_taskid, parameters, graph_config):
+    try_config = parameters["try_task_config"]
+    rebuild = try_config.get("rebuild")
+    if rebuild:
+        for task in taskgraph.tasks.values():
+            if task.label in try_config.get("tasks", []):
+                task.attributes["task_duplicates"] = rebuild
+    return taskgraph, label_to_taskid