From 43a97878ce14b72f0981164f87f2e35e14151312 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:22:09 +0200 Subject: Adding upstream version 110.0.1. Signed-off-by: Daniel Baumann --- .../taskgraph/transforms/__init__.py | 0 .../taskgraph/transforms/base.py | 157 +++ .../taskgraph/transforms/cached_tasks.py | 90 ++ .../taskgraph/transforms/code_review.py | 23 + .../taskgraph/transforms/docker_image.py | 213 ++++ .../taskgraph/transforms/fetch.py | 335 +++++ .../taskgraph/transforms/job/__init__.py | 438 +++++++ .../taskgraph/transforms/job/common.py | 196 +++ .../taskgraph/transforms/job/index_search.py | 37 + .../taskgraph/transforms/job/run_task.py | 240 ++++ .../taskgraph/transforms/job/toolchain.py | 174 +++ .../taskgraph/transforms/release_notifications.py | 100 ++ .../taskgraph/transforms/task.py | 1288 ++++++++++++++++++++ 13 files changed, 3291 insertions(+) create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/release_notifications.py create mode 100644 third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py (limited to 'third_party/python/taskcluster_taskgraph/taskgraph/transforms') diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py new file mode 100644 index 0000000000..383e6a4798 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py @@ -0,0 +1,157 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import re +from typing import AnyStr + +import attr + +from ..config import GraphConfig +from ..parameters import Parameters +from ..util.memoize import memoize +from ..util.schema import Schema, validate_schema + + +@attr.s(frozen=True) +class RepoConfig: + prefix = attr.ib(type=str) + name = attr.ib(type=str) + base_repository = attr.ib(type=str) + head_repository = attr.ib(type=str) + head_ref = attr.ib(type=str) + type = attr.ib(type=str) + path = attr.ib(type=str, default="") + head_rev = attr.ib(type=str, default=None) + ssh_secret_name = attr.ib(type=str, default=None) + + +@attr.s(frozen=True, cmp=False) +class TransformConfig: + """ + A container for configuration affecting transforms. The `config` argument + to transforms is an instance of this class. + """ + + # the name of the current kind + kind = attr.ib() + + # the path to the kind configuration directory + path = attr.ib(type=AnyStr) + + # the parsed contents of kind.yml + config = attr.ib(type=dict) + + # the parameters for this task-graph generation run + params = attr.ib(type=Parameters) + + # a dict of all the tasks associated with the kind dependencies of the + # current kind + kind_dependencies_tasks = attr.ib(type=dict) + + # Global configuration of the taskgraph + graph_config = attr.ib(type=GraphConfig) + + # whether to write out artifacts for the decision task + write_artifacts = attr.ib(type=bool) + + @property + @memoize + def repo_configs(self): + repositories = self.graph_config["taskgraph"]["repositories"] + if len(repositories) == 1: + current_prefix = list(repositories.keys())[0] + else: + project = self.params["project"] + matching_repos = { + repo_prefix: repo + for (repo_prefix, repo) in repositories.items() + if re.match(repo["project-regex"], project) + } + if len(matching_repos) != 1: + raise Exception( + f"Couldn't find repository matching project `{project}`" + ) + current_prefix = list(matching_repos.keys())[0] + + repo_configs = { + current_prefix: RepoConfig( + prefix=current_prefix, + name=repositories[current_prefix]["name"], + base_repository=self.params["base_repository"], + head_repository=self.params["head_repository"], + head_ref=self.params["head_ref"], + head_rev=self.params["head_rev"], + type=self.params["repository_type"], + ssh_secret_name=repositories[current_prefix].get("ssh-secret-name"), + ), + } + if len(repositories) != 1: + repo_configs.update( + { + repo_prefix: RepoConfig( + prefix=repo_prefix, + name=repo["name"], + base_repository=repo["default-repository"], + head_repository=repo["default-repository"], + head_ref=repo["default-ref"], + type=repo["type"], + ssh_secret_name=repo.get("ssh-secret-name"), + ) + for (repo_prefix, repo) in repositories.items() + if repo_prefix != current_prefix + } + ) + return repo_configs + + +@attr.s() +class TransformSequence: + """ + Container for a sequence of transforms. Each transform is represented as a + callable taking (config, items) and returning a generator which will yield + transformed items. The resulting sequence has the same interface. + + This is convenient to use in a file full of transforms, as it provides a + decorator, @transforms.add, that will add the decorated function to the + sequence. + """ + + _transforms = attr.ib(factory=list) + + def __call__(self, config, items): + for xform in self._transforms: + items = xform(config, items) + if items is None: + raise Exception(f"Transform {xform} is not a generator") + return items + + def add(self, func): + self._transforms.append(func) + return func + + def add_validate(self, schema): + self.add(ValidateSchema(schema)) + + +@attr.s +class ValidateSchema: + schema = attr.ib(type=Schema) + + def __call__(self, config, tasks): + for task in tasks: + if "name" in task: + error = "In {kind} kind task {name!r}:".format( + kind=config.kind, name=task["name"] + ) + elif "label" in task: + error = "In job {label!r}:".format(label=task["label"]) + elif "primary-dependency" in task: + error = "In {kind} kind task for {dependency!r}:".format( + kind=config.kind, dependency=task["primary-dependency"].label + ) + else: + error = "In unknown task:" + validate_schema(self.schema, task, error) + yield task diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py new file mode 100644 index 0000000000..57a55dffb3 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/cached_tasks.py @@ -0,0 +1,90 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +from collections import deque + +import taskgraph +from taskgraph.transforms.base import TransformSequence +from taskgraph.util.cached_tasks import add_optimization + +transforms = TransformSequence() + + +def order_tasks(config, tasks): + """Iterate image tasks in an order where parent tasks come first.""" + if config.kind == "docker-image": + kind_prefix = "build-docker-image-" + else: + kind_prefix = config.kind + "-" + + pending = deque(tasks) + task_labels = {task["label"] for task in pending} + emitted = set() + while True: + try: + task = pending.popleft() + except IndexError: + break + parents = { + task + for task in task.get("dependencies", {}).values() + if task.startswith(kind_prefix) + } + if parents and not emitted.issuperset(parents & task_labels): + pending.append(task) + continue + emitted.add(task["label"]) + yield task + + +def format_task_digest(cached_task): + return "/".join( + [ + cached_task["type"], + cached_task["name"], + cached_task["digest"], + ] + ) + + +@transforms.add +def cache_task(config, tasks): + if taskgraph.fast: + for task in tasks: + yield task + return + + digests = {} + for task in config.kind_dependencies_tasks.values(): + if "cached_task" in task.attributes: + digests[task.label] = format_task_digest(task.attributes["cached_task"]) + + for task in order_tasks(config, tasks): + cache = task.pop("cache", None) + if cache is None: + yield task + continue + + dependency_digests = [] + for p in task.get("dependencies", {}).values(): + if p in digests: + dependency_digests.append(digests[p]) + else: + raise Exception( + "Cached task {} has uncached parent task: {}".format( + task["label"], p + ) + ) + digest_data = cache["digest-data"] + sorted(dependency_digests) + add_optimization( + config, + task, + cache_type=cache["type"], + cache_name=cache["name"], + digest_data=digest_data, + ) + digests[task["label"]] = format_task_digest(task["attributes"]["cached_task"]) + + yield task diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py new file mode 100644 index 0000000000..bdb655b97d --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py @@ -0,0 +1,23 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +Add soft dependencies and configuration to code-review tasks. +""" + + +from taskgraph.transforms.base import TransformSequence + +transforms = TransformSequence() + + +@transforms.add +def add_dependencies(config, jobs): + for job in jobs: + job.setdefault("soft-dependencies", []) + job["soft-dependencies"] += [ + dep_task.label + for dep_task in config.kind_dependencies_tasks.values() + if dep_task.attributes.get("code-review") is True + ] + yield job diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py new file mode 100644 index 0000000000..dd7c01e5a9 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py @@ -0,0 +1,213 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import json +import logging +import os +import re + +from voluptuous import Optional, Required + +import taskgraph +from taskgraph.transforms.base import TransformSequence +from taskgraph.util.docker import create_context_tar, generate_context_hash +from taskgraph.util.schema import Schema + +from .task import task_description_schema + +logger = logging.getLogger(__name__) + +CONTEXTS_DIR = "docker-contexts" + +DIGEST_RE = re.compile("^[0-9a-f]{64}$") + +IMAGE_BUILDER_IMAGE = ( + "taskcluster/image_builder:4.0.0" + "@sha256:" + "866c304445334703b68653e1390816012c9e6bdabfbd1906842b5b229e8ed044" +) + +transforms = TransformSequence() + +docker_image_schema = Schema( + { + # Name of the docker image. + Required("name"): str, + # Name of the parent docker image. + Optional("parent"): str, + # Treeherder symbol. + Optional("symbol"): str, + # relative path (from config.path) to the file the docker image was defined + # in. + Optional("task-from"): str, + # Arguments to use for the Dockerfile. + Optional("args"): {str: str}, + # Name of the docker image definition under taskcluster/docker, when + # different from the docker image name. + Optional("definition"): str, + # List of package tasks this docker image depends on. + Optional("packages"): [str], + Optional( + "index", + description="information for indexing this build so its artifacts can be discovered", + ): task_description_schema["index"], + Optional( + "cache", + description="Whether this image should be cached based on inputs.", + ): bool, + } +) + + +transforms.add_validate(docker_image_schema) + + +@transforms.add +def fill_template(config, tasks): + available_packages = set() + for task in config.kind_dependencies_tasks.values(): + if task.kind != "packages": + continue + name = task.label.replace("packages-", "") + available_packages.add(name) + + context_hashes = {} + + tasks = list(tasks) + + if not taskgraph.fast and config.write_artifacts: + if not os.path.isdir(CONTEXTS_DIR): + os.makedirs(CONTEXTS_DIR) + + for task in tasks: + image_name = task.pop("name") + job_symbol = task.pop("symbol", None) + args = task.pop("args", {}) + definition = task.pop("definition", image_name) + packages = task.pop("packages", []) + parent = task.pop("parent", None) + + for p in packages: + if p not in available_packages: + raise Exception( + "Missing package job for {}-{}: {}".format( + config.kind, image_name, p + ) + ) + + if not taskgraph.fast: + context_path = os.path.join("taskcluster", "docker", definition) + topsrcdir = os.path.dirname(config.graph_config.taskcluster_yml) + if config.write_artifacts: + context_file = os.path.join(CONTEXTS_DIR, f"{image_name}.tar.gz") + logger.info(f"Writing {context_file} for docker image {image_name}") + context_hash = create_context_tar( + topsrcdir, + context_path, + context_file, + args, + ) + else: + context_hash = generate_context_hash(topsrcdir, context_path, args) + else: + if config.write_artifacts: + raise Exception("Can't write artifacts if `taskgraph.fast` is set.") + context_hash = "0" * 40 + digest_data = [context_hash] + digest_data += [json.dumps(args, sort_keys=True)] + context_hashes[image_name] = context_hash + + description = "Build the docker image {} for use by dependent tasks".format( + image_name + ) + + args["DOCKER_IMAGE_PACKAGES"] = " ".join(f"<{p}>" for p in packages) + + # Adjust the zstandard compression level based on the execution level. + # We use faster compression for level 1 because we care more about + # end-to-end times. We use slower/better compression for other levels + # because images are read more often and it is worth the trade-off to + # burn more CPU once to reduce image size. + zstd_level = "3" if int(config.params["level"]) == 1 else "10" + + # include some information that is useful in reconstructing this task + # from JSON + taskdesc = { + "label": "build-docker-image-" + image_name, + "description": description, + "attributes": { + "image_name": image_name, + "artifact_prefix": "public", + }, + "expires-after": "28 days" if config.params.is_try() else "1 year", + "scopes": [], + "run-on-projects": [], + "worker-type": "images", + "worker": { + "implementation": "docker-worker", + "os": "linux", + "artifacts": [ + { + "type": "file", + "path": "/workspace/image.tar.zst", + "name": "public/image.tar.zst", + } + ], + "env": { + "CONTEXT_TASK_ID": {"task-reference": ""}, + "CONTEXT_PATH": "public/docker-contexts/{}.tar.gz".format( + image_name + ), + "HASH": context_hash, + "PROJECT": config.params["project"], + "IMAGE_NAME": image_name, + "DOCKER_IMAGE_ZSTD_LEVEL": zstd_level, + "DOCKER_BUILD_ARGS": { + "task-reference": json.dumps(args), + }, + "VCS_BASE_REPOSITORY": config.params["base_repository"], + "VCS_HEAD_REPOSITORY": config.params["head_repository"], + "VCS_HEAD_REV": config.params["head_rev"], + "VCS_REPOSITORY_TYPE": config.params["repository_type"], + }, + "chain-of-trust": True, + "max-run-time": 7200, + }, + } + if "index" in task: + taskdesc["index"] = task["index"] + if job_symbol: + taskdesc["treeherder"] = { + "symbol": job_symbol, + "platform": "taskcluster-images/opt", + "kind": "other", + "tier": 1, + } + + worker = taskdesc["worker"] + + worker["docker-image"] = IMAGE_BUILDER_IMAGE + digest_data.append(f"image-builder-image:{IMAGE_BUILDER_IMAGE}") + + if packages: + deps = taskdesc.setdefault("dependencies", {}) + for p in sorted(packages): + deps[p] = f"packages-{p}" + + if parent: + deps = taskdesc.setdefault("dependencies", {}) + deps["parent"] = f"build-docker-image-{parent}" + worker["env"]["PARENT_TASK_ID"] = { + "task-reference": "", + } + + if task.get("cache", True) and not taskgraph.fast: + taskdesc["cache"] = { + "type": "docker-images.v2", + "name": image_name, + "digest-data": digest_data, + } + + yield taskdesc diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py new file mode 100644 index 0000000000..65d4b62482 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py @@ -0,0 +1,335 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Support for running tasks that download remote content and re-export +# it as task artifacts. + + +import os +import re + +import attr +from voluptuous import Extra, Optional, Required + +import taskgraph + +from ..util import path +from ..util.cached_tasks import add_optimization +from ..util.schema import Schema, validate_schema +from ..util.treeherder import join_symbol +from .base import TransformSequence + +CACHE_TYPE = "content.v1" + +FETCH_SCHEMA = Schema( + { + # Name of the task. + Required("name"): str, + # Relative path (from config.path) to the file the task was defined + # in. + Optional("task-from"): str, + # Description of the task. + Required("description"): str, + Optional("docker-image"): object, + Optional( + "fetch-alias", + description="An alias that can be used instead of the real fetch job name in " + "fetch stanzas for jobs.", + ): str, + Optional( + "artifact-prefix", + description="The prefix of the taskcluster artifact being uploaded. " + "Defaults to `public/`; if it starts with something other than " + "`public/` the artifact will require scopes to access.", + ): str, + Optional("attributes"): {str: object}, + Required("fetch"): { + Required("type"): str, + Extra: object, + }, + } +) + + +# define a collection of payload builders, depending on the worker implementation +fetch_builders = {} + + +@attr.s(frozen=True) +class FetchBuilder: + schema = attr.ib(type=Schema) + builder = attr.ib() + + +def fetch_builder(name, schema): + schema = Schema({Required("type"): name}).extend(schema) + + def wrap(func): + fetch_builders[name] = FetchBuilder(schema, func) + return func + + return wrap + + +transforms = TransformSequence() +transforms.add_validate(FETCH_SCHEMA) + + +@transforms.add +def process_fetch_job(config, jobs): + # Converts fetch-url entries to the job schema. + for job in jobs: + typ = job["fetch"]["type"] + name = job["name"] + fetch = job.pop("fetch") + + if typ not in fetch_builders: + raise Exception(f"Unknown fetch type {typ} in fetch {name}") + validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") + + job.update(configure_fetch(config, typ, name, fetch)) + + yield job + + +def configure_fetch(config, typ, name, fetch): + if typ not in fetch_builders: + raise Exception(f"No fetch type {typ} in fetch {name}") + validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") + + return fetch_builders[typ].builder(config, name, fetch) + + +@transforms.add +def make_task(config, jobs): + # Fetch tasks are idempotent and immutable. Have them live for + # essentially forever. + if config.params["level"] == "3": + expires = "1000 years" + else: + expires = "28 days" + + for job in jobs: + name = job["name"] + artifact_prefix = job.get("artifact-prefix", "public") + env = job.get("env", {}) + env.update({"UPLOAD_DIR": "/builds/worker/artifacts"}) + attributes = job.get("attributes", {}) + attributes["fetch-artifact"] = path.join(artifact_prefix, job["artifact_name"]) + alias = job.get("fetch-alias") + if alias: + attributes["fetch-alias"] = alias + + task = { + "attributes": attributes, + "name": name, + "description": job["description"], + "expires-after": expires, + "label": "fetch-%s" % name, + "run-on-projects": [], + "run": { + "using": "run-task", + "checkout": False, + "command": job["command"], + }, + "worker-type": "images", + "worker": { + "chain-of-trust": True, + "docker-image": job.get("docker-image", {"in-tree": "fetch"}), + "env": env, + "max-run-time": 900, + "artifacts": [ + { + "type": "directory", + "name": artifact_prefix, + "path": "/builds/worker/artifacts", + } + ], + }, + } + + if "treeherder" in config.graph_config: + task["treeherder"] = { + "symbol": join_symbol("Fetch", name), + "kind": "build", + "platform": "fetch/opt", + "tier": 1, + } + + if job.get("secret", None): + task["scopes"] = ["secrets:get:" + job.get("secret")] + task["worker"]["taskcluster-proxy"] = True + + if not taskgraph.fast: + cache_name = task["label"].replace(f"{config.kind}-", "", 1) + + # This adds the level to the index path automatically. + add_optimization( + config, + task, + cache_type=CACHE_TYPE, + cache_name=cache_name, + digest_data=job["digest_data"], + ) + yield task + + +@fetch_builder( + "static-url", + schema={ + # The URL to download. + Required("url"): str, + # The SHA-256 of the downloaded content. + Required("sha256"): str, + # Size of the downloaded entity, in bytes. + Required("size"): int, + # GPG signature verification. + Optional("gpg-signature"): { + # URL where GPG signature document can be obtained. Can contain the + # value ``{url}``, which will be substituted with the value from + # ``url``. + Required("sig-url"): str, + # Path to file containing GPG public key(s) used to validate + # download. + Required("key-path"): str, + }, + # The name to give to the generated artifact. Defaults to the file + # portion of the URL. Using a different extension converts the + # archive to the given type. Only conversion to .tar.zst is + # supported. + Optional("artifact-name"): str, + # Strip the given number of path components at the beginning of + # each file entry in the archive. + # Requires an artifact-name ending with .tar.zst. + Optional("strip-components"): int, + # Add the given prefix to each file entry in the archive. + # Requires an artifact-name ending with .tar.zst. + Optional("add-prefix"): str, + # Headers to pass alongside the request. + Optional("headers"): { + str: str, + }, + # IMPORTANT: when adding anything that changes the behavior of the task, + # it is important to update the digest data used to compute cache hits. + }, +) +def create_fetch_url_task(config, name, fetch): + artifact_name = fetch.get("artifact-name") + if not artifact_name: + artifact_name = fetch["url"].split("/")[-1] + + command = [ + "fetch-content", + "static-url", + ] + + # Arguments that matter to the cache digest + args = [ + "--sha256", + fetch["sha256"], + "--size", + "%d" % fetch["size"], + ] + + if fetch.get("strip-components"): + args.extend(["--strip-components", "%d" % fetch["strip-components"]]) + + if fetch.get("add-prefix"): + args.extend(["--add-prefix", fetch["add-prefix"]]) + + command.extend(args) + + env = {} + + if "gpg-signature" in fetch: + sig_url = fetch["gpg-signature"]["sig-url"].format(url=fetch["url"]) + key_path = os.path.join(taskgraph.GECKO, fetch["gpg-signature"]["key-path"]) + + with open(key_path) as fh: + gpg_key = fh.read() + + env["FETCH_GPG_KEY"] = gpg_key + command.extend( + [ + "--gpg-sig-url", + sig_url, + "--gpg-key-env", + "FETCH_GPG_KEY", + ] + ) + + if "headers" in fetch: + for k, v in fetch["headers"].items(): + command.extend(["-H", f"{k}:{v}"]) + + command.extend( + [ + fetch["url"], + "/builds/worker/artifacts/%s" % artifact_name, + ] + ) + + return { + "command": command, + "artifact_name": artifact_name, + "env": env, + # We don't include the GPG signature in the digest because it isn't + # materially important for caching: GPG signatures are supplemental + # trust checking beyond what the shasum already provides. + "digest_data": args + [artifact_name], + } + + +@fetch_builder( + "git", + schema={ + Required("repo"): str, + Required("revision"): str, + Optional("include-dot-git"): bool, + Optional("artifact-name"): str, + Optional("path-prefix"): str, + # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key) + # In the secret dictionary, the key should be specified as + # "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..." + # n.b. The OpenSSH private key file format requires a newline at the end of the file. + Optional("ssh-key"): str, + }, +) +def create_git_fetch_task(config, name, fetch): + path_prefix = fetch.get("path-prefix") + if not path_prefix: + path_prefix = fetch["repo"].rstrip("/").rsplit("/", 1)[-1] + artifact_name = fetch.get("artifact-name") + if not artifact_name: + artifact_name = f"{path_prefix}.tar.zst" + + if not re.match(r"[0-9a-fA-F]{40}", fetch["revision"]): + raise Exception(f'Revision is not a sha1 in fetch task "{name}"') + + args = [ + "fetch-content", + "git-checkout-archive", + "--path-prefix", + path_prefix, + fetch["repo"], + fetch["revision"], + "/builds/worker/artifacts/%s" % artifact_name, + ] + + ssh_key = fetch.get("ssh-key") + if ssh_key: + args.append("--ssh-key-secret") + args.append(ssh_key) + + digest_data = [fetch["revision"], path_prefix, artifact_name] + if fetch.get("include-dot-git", False): + args.append("--include-dot-git") + digest_data.append(".git") + + return { + "command": args, + "artifact_name": artifact_name, + "digest_data": digest_data, + "secret": ssh_key, + } diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py new file mode 100644 index 0000000000..cc2615b702 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py @@ -0,0 +1,438 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +Convert a job description into a task description. + +Jobs descriptions are similar to task descriptions, but they specify how to run +the job at a higher level, using a "run" field that can be interpreted by +run-using handlers in `taskcluster/taskgraph/transforms/job`. +""" + + +import copy +import json +import logging + +from voluptuous import Any, Exclusive, Extra, Optional, Required + +from taskgraph.transforms.base import TransformSequence +from taskgraph.transforms.cached_tasks import order_tasks +from taskgraph.transforms.task import task_description_schema +from taskgraph.util import path as mozpath +from taskgraph.util.python_path import import_sibling_modules +from taskgraph.util.schema import Schema, validate_schema +from taskgraph.util.taskcluster import get_artifact_prefix +from taskgraph.util.workertypes import worker_type_implementation + +logger = logging.getLogger(__name__) + +# Schema for a build description +job_description_schema = Schema( + { + # The name of the job and the job's label. At least one must be specified, + # and the label will be generated from the name if necessary, by prepending + # the kind. + Optional("name"): str, + Optional("label"): str, + # the following fields are passed directly through to the task description, + # possibly modified by the run implementation. See + # taskcluster/taskgraph/transforms/task.py for the schema details. + Required("description"): task_description_schema["description"], + Optional("attributes"): task_description_schema["attributes"], + Optional("task-from"): task_description_schema["task-from"], + Optional("dependencies"): task_description_schema["dependencies"], + Optional("soft-dependencies"): task_description_schema["soft-dependencies"], + Optional("if-dependencies"): task_description_schema["if-dependencies"], + Optional("requires"): task_description_schema["requires"], + Optional("expires-after"): task_description_schema["expires-after"], + Optional("routes"): task_description_schema["routes"], + Optional("scopes"): task_description_schema["scopes"], + Optional("tags"): task_description_schema["tags"], + Optional("extra"): task_description_schema["extra"], + Optional("treeherder"): task_description_schema["treeherder"], + Optional("index"): task_description_schema["index"], + Optional("run-on-projects"): task_description_schema["run-on-projects"], + Optional("run-on-tasks-for"): task_description_schema["run-on-tasks-for"], + Optional("run-on-git-branches"): task_description_schema["run-on-git-branches"], + Optional("always-target"): task_description_schema["always-target"], + Exclusive("optimization", "optimization"): task_description_schema[ + "optimization" + ], + Optional("needs-sccache"): task_description_schema["needs-sccache"], + # The "when" section contains descriptions of the circumstances under which + # this task should be included in the task graph. This will be converted + # into an optimization, so it cannot be specified in a job description that + # also gives 'optimization'. + Exclusive("when", "optimization"): { + # This task only needs to be run if a file matching one of the given + # patterns has changed in the push. The patterns use the mozpack + # match function (python/mozbuild/mozpack/path.py). + Optional("files-changed"): [str], + }, + # A list of artifacts to install from 'fetch' tasks. + Optional("fetches"): { + Any("toolchain", "fetch"): [str], + str: [ + str, + { + Required("artifact"): str, + Optional("dest"): str, + Optional("extract"): bool, + Optional("verify-hash"): bool, + }, + ], + }, + # A description of how to run this job. + "run": { + # The key to a job implementation in a peer module to this one + "using": str, + # Base work directory used to set up the task. + Optional("workdir"): str, + # Any remaining content is verified against that job implementation's + # own schema. + Extra: object, + }, + Required("worker-type"): task_description_schema["worker-type"], + # This object will be passed through to the task description, with additions + # provided by the job's run-using function + Optional("worker"): dict, + } +) + +transforms = TransformSequence() +transforms.add_validate(job_description_schema) + + +@transforms.add +def rewrite_when_to_optimization(config, jobs): + for job in jobs: + when = job.pop("when", {}) + if not when: + yield job + continue + + files_changed = when.get("files-changed") + + # implicitly add task config directory. + files_changed.append(f"{config.path}/**") + + # "only when files changed" implies "skip if files have not changed" + job["optimization"] = {"skip-unless-changed": files_changed} + + assert "when" not in job + yield job + + +@transforms.add +def set_implementation(config, jobs): + for job in jobs: + impl, os = worker_type_implementation(config.graph_config, job["worker-type"]) + if os: + job.setdefault("tags", {})["os"] = os + if impl: + job.setdefault("tags", {})["worker-implementation"] = impl + worker = job.setdefault("worker", {}) + assert "implementation" not in worker + worker["implementation"] = impl + if os: + worker["os"] = os + yield job + + +@transforms.add +def set_label(config, jobs): + for job in jobs: + if "label" not in job: + if "name" not in job: + raise Exception("job has neither a name nor a label") + job["label"] = "{}-{}".format(config.kind, job["name"]) + if job.get("name"): + del job["name"] + yield job + + +@transforms.add +def add_resource_monitor(config, jobs): + for job in jobs: + if job.get("attributes", {}).get("resource-monitor"): + worker_implementation, worker_os = worker_type_implementation( + config.graph_config, job["worker-type"] + ) + # Normalise worker os so that linux-bitbar and similar use linux tools. + worker_os = worker_os.split("-")[0] + if "win7" in job["worker-type"]: + arch = "32" + else: + arch = "64" + job.setdefault("fetches", {}) + job["fetches"].setdefault("toolchain", []) + job["fetches"]["toolchain"].append(f"{worker_os}{arch}-resource-monitor") + + if worker_implementation == "docker-worker": + artifact_source = "/builds/worker/monitoring/resource-monitor.json" + else: + artifact_source = "monitoring/resource-monitor.json" + job["worker"].setdefault("artifacts", []) + job["worker"]["artifacts"].append( + { + "name": "public/monitoring/resource-monitor.json", + "type": "file", + "path": artifact_source, + } + ) + # Set env for output file + job["worker"].setdefault("env", {}) + job["worker"]["env"]["RESOURCE_MONITOR_OUTPUT"] = artifact_source + + yield job + + +def get_attribute(dict, key, attributes, attribute_name): + """Get `attribute_name` from the given `attributes` dict, and if there + is a corresponding value, set `key` in `dict` to that value.""" + value = attributes.get(attribute_name) + if value: + dict[key] = value + + +@transforms.add +def use_fetches(config, jobs): + artifact_names = {} + aliases = {} + extra_env = {} + + if config.kind in ("toolchain", "fetch"): + jobs = list(jobs) + for job in jobs: + run = job.get("run", {}) + label = job["label"] + get_attribute(artifact_names, label, run, "toolchain-artifact") + value = run.get(f"{config.kind}-alias") + if value: + aliases[f"{config.kind}-{value}"] = label + + for task in config.kind_dependencies_tasks.values(): + if task.kind in ("fetch", "toolchain"): + get_attribute( + artifact_names, + task.label, + task.attributes, + f"{task.kind}-artifact", + ) + get_attribute(extra_env, task.label, task.attributes, f"{task.kind}-env") + value = task.attributes.get(f"{task.kind}-alias") + if value: + aliases[f"{task.kind}-{value}"] = task.label + + artifact_prefixes = {} + for job in order_tasks(config, jobs): + artifact_prefixes[job["label"]] = get_artifact_prefix(job) + + fetches = job.pop("fetches", None) + if not fetches: + yield job + continue + + job_fetches = [] + name = job.get("name", job.get("label")) + dependencies = job.setdefault("dependencies", {}) + worker = job.setdefault("worker", {}) + env = worker.setdefault("env", {}) + prefix = get_artifact_prefix(job) + for kind, artifacts in fetches.items(): + if kind in ("fetch", "toolchain"): + for fetch_name in artifacts: + label = f"{kind}-{fetch_name}" + label = aliases.get(label, label) + if label not in artifact_names: + raise Exception( + "Missing fetch job for {kind}-{name}: {fetch}".format( + kind=config.kind, name=name, fetch=fetch_name + ) + ) + if label in extra_env: + env.update(extra_env[label]) + + path = artifact_names[label] + + dependencies[label] = label + job_fetches.append( + { + "artifact": path, + "task": f"<{label}>", + "extract": True, + } + ) + else: + if kind not in dependencies: + raise Exception( + "{name} can't fetch {kind} artifacts because " + "it has no {kind} dependencies!".format(name=name, kind=kind) + ) + dep_label = dependencies[kind] + if dep_label in artifact_prefixes: + prefix = artifact_prefixes[dep_label] + else: + dep_tasks = [ + task + for label, task in config.kind_dependencies_tasks.items() + if label == dep_label + ] + if len(dep_tasks) != 1: + raise Exception( + "{name} can't fetch {kind} artifacts because " + "there are {tasks} with label {label} in kind dependencies!".format( + name=name, + kind=kind, + label=dependencies[kind], + tasks="no tasks" + if len(dep_tasks) == 0 + else "multiple tasks", + ) + ) + + prefix = get_artifact_prefix(dep_tasks[0]) + + for artifact in artifacts: + if isinstance(artifact, str): + path = artifact + dest = None + extract = True + verify_hash = False + else: + path = artifact["artifact"] + dest = artifact.get("dest") + extract = artifact.get("extract", True) + verify_hash = artifact.get("verify-hash", False) + + fetch = { + "artifact": f"{prefix}/{path}", + "task": f"<{kind}>", + "extract": extract, + } + if dest is not None: + fetch["dest"] = dest + if verify_hash: + fetch["verify-hash"] = verify_hash + job_fetches.append(fetch) + + job_artifact_prefixes = { + mozpath.dirname(fetch["artifact"]) + for fetch in job_fetches + if not fetch["artifact"].startswith("public/") + } + if job_artifact_prefixes: + # Use taskcluster-proxy and request appropriate scope. For example, add + # 'scopes: [queue:get-artifact:path/to/*]' for 'path/to/artifact.tar.xz'. + worker["taskcluster-proxy"] = True + for prefix in sorted(job_artifact_prefixes): + scope = f"queue:get-artifact:{prefix}/*" + if scope not in job.setdefault("scopes", []): + job["scopes"].append(scope) + + env["MOZ_FETCHES"] = {"task-reference": json.dumps(job_fetches, sort_keys=True)} + + env.setdefault("MOZ_FETCHES_DIR", "fetches") + + yield job + + +@transforms.add +def make_task_description(config, jobs): + """Given a build description, create a task description""" + # import plugin modules first, before iterating over jobs + import_sibling_modules(exceptions=("common.py",)) + + for job in jobs: + # always-optimized tasks never execute, so have no workdir + if job["worker"]["implementation"] in ("docker-worker", "generic-worker"): + job["run"].setdefault("workdir", "/builds/worker") + + taskdesc = copy.deepcopy(job) + + # fill in some empty defaults to make run implementations easier + taskdesc.setdefault("attributes", {}) + taskdesc.setdefault("dependencies", {}) + taskdesc.setdefault("soft-dependencies", []) + taskdesc.setdefault("routes", []) + taskdesc.setdefault("scopes", []) + taskdesc.setdefault("extra", {}) + + # give the function for job.run.using on this worker implementation a + # chance to set up the task description. + configure_taskdesc_for_run( + config, job, taskdesc, job["worker"]["implementation"] + ) + del taskdesc["run"] + + # yield only the task description, discarding the job description + yield taskdesc + + +# A registry of all functions decorated with run_job_using +registry = {} + + +def run_job_using(worker_implementation, run_using, schema=None, defaults={}): + """Register the decorated function as able to set up a task description for + jobs with the given worker implementation and `run.using` property. If + `schema` is given, the job's run field will be verified to match it. + + The decorated function should have the signature `using_foo(config, job, taskdesc)` + and should modify the task description in-place. The skeleton of + the task description is already set up, but without a payload.""" + + def wrap(func): + for_run_using = registry.setdefault(run_using, {}) + if worker_implementation in for_run_using: + raise Exception( + "run_job_using({!r}, {!r}) already exists: {!r}".format( + run_using, worker_implementation, for_run_using[run_using] + ) + ) + for_run_using[worker_implementation] = (func, schema, defaults) + return func + + return wrap + + +@run_job_using( + "always-optimized", "always-optimized", Schema({"using": "always-optimized"}) +) +def always_optimized(config, job, taskdesc): + pass + + +def configure_taskdesc_for_run(config, job, taskdesc, worker_implementation): + """ + Run the appropriate function for this job against the given task + description. + + This will raise an appropriate error if no function exists, or if the job's + run is not valid according to the schema. + """ + run_using = job["run"]["using"] + if run_using not in registry: + raise Exception(f"no functions for run.using {run_using!r}") + + if worker_implementation not in registry[run_using]: + raise Exception( + "no functions for run.using {!r} on {!r}".format( + run_using, worker_implementation + ) + ) + + func, schema, defaults = registry[run_using][worker_implementation] + for k, v in defaults.items(): + job["run"].setdefault(k, v) + + if schema: + validate_schema( + schema, + job["run"], + "In job.run using {!r}/{!r} for job {!r}:".format( + job["run"]["using"], worker_implementation, job["label"] + ), + ) + func(config, job, taskdesc) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py new file mode 100644 index 0000000000..1660d0856a --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py @@ -0,0 +1,196 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +Common support for various job types. These functions are all named after the +worker implementation they operate on, and take the same three parameters, for +consistency. +""" + + +import hashlib +import json + +from taskgraph.util.taskcluster import get_artifact_prefix + + +def get_vcsdir_name(os): + if os == "windows": + return "src" + else: + return "vcs" + + +def add_cache(job, taskdesc, name, mount_point, skip_untrusted=False): + """Adds a cache based on the worker's implementation. + + Args: + job (dict): Task's job description. + taskdesc (dict): Target task description to modify. + name (str): Name of the cache. + mount_point (path): Path on the host to mount the cache. + skip_untrusted (bool): Whether cache is used in untrusted environments + (default: False). Only applies to docker-worker. + """ + if not job["run"].get("use-caches", True): + return + + worker = job["worker"] + + if worker["implementation"] == "docker-worker": + taskdesc["worker"].setdefault("caches", []).append( + { + "type": "persistent", + "name": name, + "mount-point": mount_point, + "skip-untrusted": skip_untrusted, + } + ) + + elif worker["implementation"] == "generic-worker": + taskdesc["worker"].setdefault("mounts", []).append( + { + "cache-name": name, + "directory": mount_point, + } + ) + + else: + # Caches not implemented + pass + + +def docker_worker_add_workspace_cache(config, job, taskdesc, extra=None): + """Add the workspace cache. + + Args: + config (TransformConfig): Transform configuration object. + job (dict): Task's job description. + taskdesc (dict): Target task description to modify. + extra (str): Optional context passed in that supports extending the cache + key name to avoid undesired conflicts with other caches. + """ + cache_name = "{}-build-{}-{}-workspace".format( + config.params["project"], + taskdesc["attributes"]["build_platform"], + taskdesc["attributes"]["build_type"], + ) + if extra: + cache_name = f"{cache_name}-{extra}" + + mount_point = "{workdir}/workspace".format(**job["run"]) + + # Don't enable the workspace cache when we can't guarantee its + # behavior, like on Try. + add_cache(job, taskdesc, cache_name, mount_point, skip_untrusted=True) + + +def add_artifacts(config, job, taskdesc, path): + taskdesc["worker"].setdefault("artifacts", []).append( + { + "name": get_artifact_prefix(taskdesc), + "path": path, + "type": "directory", + } + ) + + +def docker_worker_add_artifacts(config, job, taskdesc): + """Adds an artifact directory to the task""" + path = "{workdir}/artifacts/".format(**job["run"]) + taskdesc["worker"]["env"]["UPLOAD_DIR"] = path + add_artifacts(config, job, taskdesc, path) + + +def generic_worker_add_artifacts(config, job, taskdesc): + """Adds an artifact directory to the task""" + # The path is the location on disk; it doesn't necessarily + # mean the artifacts will be public or private; that is set via the name + # attribute in add_artifacts. + add_artifacts(config, job, taskdesc, path=get_artifact_prefix(taskdesc)) + + +def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False): + """Update a job/task with parameters to enable a VCS checkout. + + This can only be used with ``run-task`` tasks, as the cache name is + reserved for ``run-task`` tasks. + """ + worker = job["worker"] + is_mac = worker["os"] == "macosx" + is_win = worker["os"] == "windows" + is_linux = worker["os"] == "linux" + is_docker = worker["implementation"] == "docker-worker" + assert is_mac or is_win or is_linux + + if is_win: + checkoutdir = "./build" + hgstore = "y:/hg-shared" + elif is_docker: + checkoutdir = "{workdir}/checkouts".format(**job["run"]) + hgstore = f"{checkoutdir}/hg-store" + else: + checkoutdir = "./checkouts" + hgstore = f"{checkoutdir}/hg-shared" + + vcsdir = checkoutdir + "/" + get_vcsdir_name(worker["os"]) + cache_name = "checkouts" + + # Robust checkout does not clean up subrepositories, so ensure that tasks + # that checkout different sets of paths have separate caches. + # See https://bugzilla.mozilla.org/show_bug.cgi?id=1631610 + if len(repo_configs) > 1: + checkout_paths = { + "\t".join([repo_config.path, repo_config.prefix]) + for repo_config in sorted( + repo_configs.values(), key=lambda repo_config: repo_config.path + ) + } + checkout_paths_str = "\n".join(checkout_paths).encode("utf-8") + digest = hashlib.sha256(checkout_paths_str).hexdigest() + cache_name += f"-repos-{digest}" + + # Sparse checkouts need their own cache because they can interfere + # with clients that aren't sparse aware. + if sparse: + cache_name += "-sparse" + + # Workers using Mercurial >= 5.8 will enable revlog-compression-zstd, which + # workers using older versions can't understand, so they can't share cache. + # At the moment, only docker workers use the newer version. + if is_docker: + cache_name += "-hg58" + + add_cache(job, taskdesc, cache_name, checkoutdir) + + env = taskdesc["worker"].setdefault("env", {}) + env.update( + { + "HG_STORE_PATH": hgstore, + "REPOSITORIES": json.dumps( + {repo.prefix: repo.name for repo in repo_configs.values()} + ), + "VCS_PATH": vcsdir, + } + ) + for repo_config in repo_configs.values(): + env.update( + { + f"{repo_config.prefix.upper()}_{key}": value + for key, value in { + "BASE_REPOSITORY": repo_config.base_repository, + "HEAD_REPOSITORY": repo_config.head_repository, + "HEAD_REV": repo_config.head_rev, + "HEAD_REF": repo_config.head_ref, + "REPOSITORY_TYPE": repo_config.type, + "SSH_SECRET_NAME": repo_config.ssh_secret_name, + }.items() + if value is not None + } + ) + if repo_config.ssh_secret_name: + taskdesc["scopes"].append(f"secrets:get:{repo_config.ssh_secret_name}") + + # only some worker platforms have taskcluster-proxy enabled + if job["worker"]["implementation"] in ("docker-worker",): + taskdesc["worker"]["taskcluster-proxy"] = True diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py new file mode 100644 index 0000000000..09b48fe594 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py @@ -0,0 +1,37 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +This transform allows including indexed tasks from other projects in the +current taskgraph. The transform takes a list of indexes, and the optimization +phase will replace the task with the task from the other graph. +""" + + +from voluptuous import Required + +from taskgraph.transforms.base import TransformSequence +from taskgraph.transforms.job import run_job_using +from taskgraph.util.schema import Schema + +transforms = TransformSequence() + +run_task_schema = Schema( + { + Required("using"): "index-search", + Required( + "index-search", + "A list of indexes in decreasing order of priority at which to lookup for this " + "task. This is interpolated with the graph parameters.", + ): [str], + } +) + + +@run_job_using("always-optimized", "index-search", schema=run_task_schema) +def fill_template(config, job, taskdesc): + run = job["run"] + taskdesc["optimization"] = { + "index-search": [index.format(**config.params) for index in run["index-search"]] + } diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py new file mode 100644 index 0000000000..a44f30d5bd --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py @@ -0,0 +1,240 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +Support for running jobs that are invoked via the `run-task` script. +""" + + +import os + +import attr +from voluptuous import Any, Optional, Required + +from taskgraph.transforms.job import run_job_using +from taskgraph.transforms.job.common import support_vcs_checkout +from taskgraph.transforms.task import taskref_or_string +from taskgraph.util import path, taskcluster +from taskgraph.util.schema import Schema + +EXEC_COMMANDS = { + "bash": ["bash", "-cx"], + "powershell": ["powershell.exe", "-ExecutionPolicy", "Bypass"], +} + +run_task_schema = Schema( + { + Required("using"): "run-task", + # if true, add a cache at ~worker/.cache, which is where things like pip + # tend to hide their caches. This cache is never added for level-1 jobs. + # TODO Once bug 1526028 is fixed, this and 'use-caches' should be merged. + Required("cache-dotcache"): bool, + # Whether or not to use caches. + Optional("use-caches"): bool, + # if true (the default), perform a checkout on the worker + Required("checkout"): Any(bool, {str: dict}), + Optional( + "cwd", + description="Path to run command in. If a checkout is present, the path " + "to the checkout will be interpolated with the key `checkout`", + ): str, + # The sparse checkout profile to use. Value is the filename relative to the + # directory where sparse profiles are defined (build/sparse-profiles/). + Required("sparse-profile"): Any(str, None), + # The command arguments to pass to the `run-task` script, after the + # checkout arguments. If a list, it will be passed directly; otherwise + # it will be included in a single argument to the command specified by + # `exec-with`. + Required("command"): Any([taskref_or_string], taskref_or_string), + # Context to substitute into the command using format string + # substitution (e.g {value}). This is useful if certain aspects of the + # command need to be generated in transforms. + Optional("command-context"): dict, + # What to execute the command with in the event command is a string. + Optional("exec-with"): Any(*list(EXEC_COMMANDS)), + # Base work directory used to set up the task. + Required("workdir"): str, + # Whether to run as root. (defaults to False) + Optional("run-as-root"): bool, + } +) + + +def common_setup(config, job, taskdesc, command): + run = job["run"] + if run["checkout"]: + repo_configs = config.repo_configs + if len(repo_configs) > 1 and run["checkout"] is True: + raise Exception("Must explicitly specify checkouts with multiple repos.") + elif run["checkout"] is not True: + repo_configs = { + repo: attr.evolve(repo_configs[repo], **config) + for (repo, config) in run["checkout"].items() + } + + support_vcs_checkout( + config, + job, + taskdesc, + repo_configs=repo_configs, + sparse=bool(run["sparse-profile"]), + ) + + vcs_path = taskdesc["worker"]["env"]["VCS_PATH"] + for repo_config in repo_configs.values(): + checkout_path = path.join(vcs_path, repo_config.path) + command.append(f"--{repo_config.prefix}-checkout={checkout_path}") + + if run["sparse-profile"]: + command.append( + "--{}-sparse-profile=build/sparse-profiles/{}".format( + repo_config.prefix, + run["sparse-profile"], + ) + ) + + if "cwd" in run: + run["cwd"] = path.normpath(run["cwd"].format(checkout=vcs_path)) + elif "cwd" in run and "{checkout}" in run["cwd"]: + raise Exception( + "Found `{{checkout}}` interpolation in `cwd` for task {name} " + "but the task doesn't have a checkout: {cwd}".format( + cwd=run["cwd"], name=job.get("name", job.get("label")) + ) + ) + + if "cwd" in run: + command.extend(("--task-cwd", run["cwd"])) + + taskdesc["worker"].setdefault("env", {})["MOZ_SCM_LEVEL"] = config.params["level"] + + +worker_defaults = { + "cache-dotcache": False, + "checkout": True, + "sparse-profile": None, + "run-as-root": False, +} + + +def script_url(config, script): + if "MOZ_AUTOMATION" in os.environ and "TASK_ID" not in os.environ: + raise Exception("TASK_ID must be defined to use run-task on generic-worker") + task_id = os.environ.get("TASK_ID", "") + # use_proxy = False to avoid having all generic-workers turn on proxy + # Assumes the cluster allows anonymous downloads of public artifacts + tc_url = taskcluster.get_root_url(False) + # TODO: Use util/taskcluster.py:get_artifact_url once hack for Bug 1405889 is removed + return f"{tc_url}/api/queue/v1/task/{task_id}/artifacts/public/{script}" + + +@run_job_using( + "docker-worker", "run-task", schema=run_task_schema, defaults=worker_defaults +) +def docker_worker_run_task(config, job, taskdesc): + run = job["run"] + worker = taskdesc["worker"] = job["worker"] + command = ["/usr/local/bin/run-task"] + common_setup(config, job, taskdesc, command) + + if run.get("cache-dotcache"): + worker["caches"].append( + { + "type": "persistent", + "name": "{project}-dotcache".format(**config.params), + "mount-point": "{workdir}/.cache".format(**run), + "skip-untrusted": True, + } + ) + + run_command = run["command"] + + command_context = run.get("command-context") + if command_context: + run_command = run_command.format(**command_context) + + # dict is for the case of `{'task-reference': str}`. + if isinstance(run_command, str) or isinstance(run_command, dict): + exec_cmd = EXEC_COMMANDS[run.pop("exec-with", "bash")] + run_command = exec_cmd + [run_command] + if run["run-as-root"]: + command.extend(("--user", "root", "--group", "root")) + command.append("--") + command.extend(run_command) + worker["command"] = command + + +@run_job_using( + "generic-worker", "run-task", schema=run_task_schema, defaults=worker_defaults +) +def generic_worker_run_task(config, job, taskdesc): + run = job["run"] + worker = taskdesc["worker"] = job["worker"] + is_win = worker["os"] == "windows" + is_mac = worker["os"] == "macosx" + is_bitbar = worker["os"] == "linux-bitbar" + + if is_win: + command = ["C:/mozilla-build/python3/python3.exe", "run-task"] + elif is_mac: + command = ["/tools/python36/bin/python3", "run-task"] + else: + command = ["./run-task"] + + common_setup(config, job, taskdesc, command) + + worker.setdefault("mounts", []) + if run.get("cache-dotcache"): + worker["mounts"].append( + { + "cache-name": "{project}-dotcache".format(**config.params), + "directory": "{workdir}/.cache".format(**run), + } + ) + worker["mounts"].append( + { + "content": { + "url": script_url(config, "run-task"), + }, + "file": "./run-task", + } + ) + if worker.get("env", {}).get("MOZ_FETCHES"): + worker["mounts"].append( + { + "content": { + "url": script_url(config, "fetch-content"), + }, + "file": "./fetch-content", + } + ) + + run_command = run["command"] + + if isinstance(run_command, str): + if is_win: + run_command = f'"{run_command}"' + exec_cmd = EXEC_COMMANDS[run.pop("exec-with", "bash")] + run_command = exec_cmd + [run_command] + + command_context = run.get("command-context") + if command_context: + for i in range(len(run_command)): + run_command[i] = run_command[i].format(**command_context) + + if run["run-as-root"]: + command.extend(("--user", "root", "--group", "root")) + command.append("--") + if is_bitbar: + # Use the bitbar wrapper script which sets up the device and adb + # environment variables + command.append("/builds/taskcluster/script.py") + command.extend(run_command) + + if is_win: + worker["command"] = [" ".join(command)] + else: + worker["command"] = [ + ["chmod", "+x", "run-task"], + command, + ] diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py new file mode 100644 index 0000000000..5d4ee02f4a --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py @@ -0,0 +1,174 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +Support for running toolchain-building jobs via dedicated scripts +""" + +from voluptuous import Any, Optional, Required + +import taskgraph +from taskgraph.transforms.job import configure_taskdesc_for_run, run_job_using +from taskgraph.transforms.job.common import ( + docker_worker_add_artifacts, + generic_worker_add_artifacts, + get_vcsdir_name, +) +from taskgraph.util.hash import hash_paths +from taskgraph.util.schema import Schema +from taskgraph.util.shell import quote as shell_quote + +CACHE_TYPE = "toolchains.v3" + +toolchain_run_schema = Schema( + { + Required("using"): "toolchain-script", + # The script (in taskcluster/scripts/misc) to run. + Required("script"): str, + # Arguments to pass to the script. + Optional("arguments"): [str], + # Sparse profile to give to checkout using `run-task`. If given, + # a filename in `build/sparse-profiles`. Defaults to + # "toolchain-build", i.e., to + # `build/sparse-profiles/toolchain-build`. If `None`, instructs + # `run-task` to not use a sparse profile at all. + Required("sparse-profile"): Any(str, None), + # Paths/patterns pointing to files that influence the outcome of a + # toolchain build. + Optional("resources"): [str], + # Path to the artifact produced by the toolchain job + Required("toolchain-artifact"): str, + Optional( + "toolchain-alias", + description="An alias that can be used instead of the real toolchain job name in " + "fetch stanzas for jobs.", + ): Any(str, [str]), + Optional( + "toolchain-env", + description="Additional env variables to add to the worker when using this toolchain", + ): {str: object}, + # Base work directory used to set up the task. + Required("workdir"): str, + } +) + + +def get_digest_data(config, run, taskdesc): + files = list(run.pop("resources", [])) + # The script + files.append("taskcluster/scripts/toolchain/{}".format(run["script"])) + + # Accumulate dependency hashes for index generation. + data = [hash_paths(config.graph_config.vcs_root, files)] + + data.append(taskdesc["attributes"]["toolchain-artifact"]) + + # If the task uses an in-tree docker image, we want it to influence + # the index path as well. Ideally, the content of the docker image itself + # should have an influence, but at the moment, we can't get that + # information here. So use the docker image name as a proxy. Not a lot of + # changes to docker images actually have an impact on the resulting + # toolchain artifact, so we'll just rely on such important changes to be + # accompanied with a docker image name change. + image = taskdesc["worker"].get("docker-image", {}).get("in-tree") + if image: + data.append(image) + + # Likewise script arguments should influence the index. + args = run.get("arguments") + if args: + data.extend(args) + return data + + +def common_toolchain(config, job, taskdesc, is_docker): + run = job["run"] + + worker = taskdesc["worker"] = job["worker"] + worker["chain-of-trust"] = True + + srcdir = get_vcsdir_name(worker["os"]) + + if is_docker: + # If the task doesn't have a docker-image, set a default + worker.setdefault("docker-image", {"in-tree": "toolchain-build"}) + + # Allow the job to specify where artifacts come from, but add + # public/build if it's not there already. + artifacts = worker.setdefault("artifacts", []) + if not any(artifact.get("name") == "public/build" for artifact in artifacts): + if is_docker: + docker_worker_add_artifacts(config, job, taskdesc) + else: + generic_worker_add_artifacts(config, job, taskdesc) + + env = worker["env"] + env.update( + { + "MOZ_BUILD_DATE": config.params["moz_build_date"], + "MOZ_SCM_LEVEL": config.params["level"], + } + ) + + attributes = taskdesc.setdefault("attributes", {}) + attributes["toolchain-artifact"] = run.pop("toolchain-artifact") + if "toolchain-alias" in run: + attributes["toolchain-alias"] = run.pop("toolchain-alias") + if "toolchain-env" in run: + attributes["toolchain-env"] = run.pop("toolchain-env") + + if not taskgraph.fast: + name = taskdesc["label"].replace(f"{config.kind}-", "", 1) + taskdesc["cache"] = { + "type": CACHE_TYPE, + "name": name, + "digest-data": get_digest_data(config, run, taskdesc), + } + + script = run.pop("script") + run["using"] = "run-task" + run["cwd"] = "{checkout}/.." + + if script.endswith(".ps1"): + run["exec-with"] = "powershell" + + command = [f"{srcdir}/taskcluster/scripts/toolchain/{script}"] + run.pop( + "arguments", [] + ) + + if not is_docker: + # Don't quote the first item in the command because it purposely contains + # an environment variable that is not meant to be quoted. + if len(command) > 1: + command = command[0] + " " + shell_quote(*command[1:]) + else: + command = command[0] + + run["command"] = command + + configure_taskdesc_for_run(config, job, taskdesc, worker["implementation"]) + + +toolchain_defaults = { + "sparse-profile": "toolchain-build", +} + + +@run_job_using( + "docker-worker", + "toolchain-script", + schema=toolchain_run_schema, + defaults=toolchain_defaults, +) +def docker_worker_toolchain(config, job, taskdesc): + common_toolchain(config, job, taskdesc, is_docker=True) + + +@run_job_using( + "generic-worker", + "toolchain-script", + schema=toolchain_run_schema, + defaults=toolchain_defaults, +) +def generic_worker_toolchain(config, job, taskdesc): + common_toolchain(config, job, taskdesc, is_docker=False) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/release_notifications.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/release_notifications.py new file mode 100644 index 0000000000..0796b028e8 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/release_notifications.py @@ -0,0 +1,100 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +Add notifications via taskcluster-notify for release tasks +""" +from string import Formatter + +from voluptuous import ALLOW_EXTRA, Any, Optional, Required + +from taskgraph.transforms.base import TransformSequence +from taskgraph.util.schema import Schema, optionally_keyed_by, resolve_keyed_by + +RELEASE_NOTIFICATIONS_SCHEMA = Schema( + { + Optional("notifications"): { + Required("emails"): optionally_keyed_by("project", "level", [str]), + Required("subject"): str, + Optional("message"): str, + Optional("status-types"): [ + Any( + "on-completed", + "on-defined", + "on-exception", + "on-failed", + "on-pending", + "on-resolved", + "on-running", + ) + ], + }, + }, + extra=ALLOW_EXTRA, +) + + +transforms = TransformSequence() +transforms.add_validate(RELEASE_NOTIFICATIONS_SCHEMA) + + +class TitleCaseFormatter(Formatter): + """Support title formatter for strings""" + + def convert_field(self, value, conversion): + if conversion == "t": + return str(value).title() + super().convert_field(value, conversion) + return value + + +titleformatter = TitleCaseFormatter() + + +@transforms.add +def add_notifications(config, jobs): + for job in jobs: + label = "{}-{}".format(config.kind, job["name"]) + + notifications = job.pop("notifications", None) + if notifications: + resolve_keyed_by( + notifications, + "emails", + label, + **{ + "level": config.params["level"], + "project": config.params["project"], + }, + ) + emails = notifications["emails"] + format_kwargs = dict( + task=job, + config=config.__dict__, + ) + subject = titleformatter.format(notifications["subject"], **format_kwargs) + message = notifications.get("message", notifications["subject"]) + message = titleformatter.format(message, **format_kwargs) + emails = [email.format(**format_kwargs) for email in emails] + + # By default, we only send mail on success to avoid messages like 'blah is in the + # candidates dir' when cancelling graphs, dummy job failure, etc + status_types = notifications.get("status-types", ["on-completed"]) + for s in status_types: + job.setdefault("routes", []).extend( + [f"notify.email.{email}.{s}" for email in emails] + ) + + # Customize the email subject to include release name and build number + job.setdefault("extra", {}).update( + { + "notify": { + "email": { + "subject": subject, + "content": message, + } + } + } + ) + + yield job diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py new file mode 100644 index 0000000000..8ab3762b8c --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py @@ -0,0 +1,1288 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +These transformations take a task description and turn it into a TaskCluster +task definition (along with attributes, label, etc.). The input to these +transformations is generic to any kind of task, but abstracts away some of the +complexities of worker implementations, scopes, and treeherder annotations. +""" + + +import hashlib +import os +import re +import time +from copy import deepcopy + +import attr +from voluptuous import All, Any, Extra, NotIn, Optional, Required + +from taskgraph import MAX_DEPENDENCIES +from taskgraph.transforms.base import TransformSequence +from taskgraph.util.hash import hash_path +from taskgraph.util.keyed_by import evaluate_keyed_by +from taskgraph.util.memoize import memoize +from taskgraph.util.schema import ( + OptimizationSchema, + Schema, + optionally_keyed_by, + resolve_keyed_by, + taskref_or_string, + validate_schema, +) +from taskgraph.util.treeherder import split_symbol +from taskgraph.util.workertypes import worker_type_implementation + +from ..util import docker as dockerutil +from ..util.workertypes import get_worker_type + +RUN_TASK = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "run-task", "run-task" +) + + +@memoize +def _run_task_suffix(): + """String to append to cache names under control of run-task.""" + return hash_path(RUN_TASK)[0:20] + + +# A task description is a general description of a TaskCluster task +task_description_schema = Schema( + { + # the label for this task + Required("label"): str, + # description of the task (for metadata) + Required("description"): str, + # attributes for this task + Optional("attributes"): {str: object}, + # relative path (from config.path) to the file task was defined in + Optional("task-from"): str, + # dependencies of this task, keyed by name; these are passed through + # verbatim and subject to the interpretation of the Task's get_dependencies + # method. + Optional("dependencies"): { + All( + str, + NotIn( + ["self", "decision"], + "Can't use 'self` or 'decision' as dependency names.", + ), + ): object, + }, + # Soft dependencies of this task, as a list of tasks labels + Optional("soft-dependencies"): [str], + # Dependencies that must be scheduled in order for this task to run. + Optional("if-dependencies"): [str], + Optional("requires"): Any("all-completed", "all-resolved"), + # expiration and deadline times, relative to task creation, with units + # (e.g., "14 days"). Defaults are set based on the project. + Optional("expires-after"): str, + Optional("deadline-after"): str, + # custom routes for this task; the default treeherder routes will be added + # automatically + Optional("routes"): [str], + # custom scopes for this task; any scopes required for the worker will be + # added automatically. The following parameters will be substituted in each + # scope: + # {level} -- the scm level of this push + # {project} -- the project of this push + Optional("scopes"): [str], + # Tags + Optional("tags"): {str: str}, + # custom "task.extra" content + Optional("extra"): {str: object}, + # treeherder-related information; see + # https://schemas.taskcluster.net/taskcluster-treeherder/v1/task-treeherder-config.json + # If not specified, no treeherder extra information or routes will be + # added to the task + Optional("treeherder"): { + # either a bare symbol, or "grp(sym)". + "symbol": str, + # the job kind + "kind": Any("build", "test", "other"), + # tier for this task + "tier": int, + # task platform, in the form platform/collection, used to set + # treeherder.machine.platform and treeherder.collection or + # treeherder.labels + "platform": str, + }, + # information for indexing this build so its artifacts can be discovered; + # if omitted, the build will not be indexed. + Optional("index"): { + # the name of the product this build produces + "product": str, + # the names to use for this job in the TaskCluster index + "job-name": str, + # Type of gecko v2 index to use + "type": str, + # The rank that the task will receive in the TaskCluster + # index. A newly completed task supersedes the currently + # indexed task iff it has a higher rank. If unspecified, + # 'by-tier' behavior will be used. + "rank": Any( + # Rank is equal the timestamp of the build_date for tier-1 + # tasks, and zero for non-tier-1. This sorts tier-{2,3} + # builds below tier-1 in the index. + "by-tier", + # Rank is given as an integer constant (e.g. zero to make + # sure a task is last in the index). + int, + # Rank is equal to the timestamp of the build_date. This + # option can be used to override the 'by-tier' behavior + # for non-tier-1 tasks. + "build_date", + ), + }, + # The `run_on_projects` attribute, defaulting to "all". This dictates the + # projects on which this task should be included in the target task set. + # See the attributes documentation for details. + Optional("run-on-projects"): optionally_keyed_by("build-platform", [str]), + Optional("run-on-tasks-for"): [str], + Optional("run-on-git-branches"): [str], + # The `always-target` attribute will cause the task to be included in the + # target_task_graph regardless of filtering. Tasks included in this manner + # will be candidates for optimization even when `optimize_target_tasks` is + # False, unless the task was also explicitly chosen by the target_tasks + # method. + Required("always-target"): bool, + # Optimization to perform on this task during the optimization phase. + # Optimizations are defined in taskcluster/taskgraph/optimize.py. + Required("optimization"): OptimizationSchema, + # the provisioner-id/worker-type for the task. The following parameters will + # be substituted in this string: + # {level} -- the scm level of this push + "worker-type": str, + # Whether the job should use sccache compiler caching. + Required("needs-sccache"): bool, + # information specific to the worker implementation that will run this task + Optional("worker"): { + Required("implementation"): str, + Extra: object, + }, + } +) + +TC_TREEHERDER_SCHEMA_URL = ( + "https://github.com/taskcluster/taskcluster-treeherder/" + "blob/master/schemas/task-treeherder-config.yml" +) + + +UNKNOWN_GROUP_NAME = ( + "Treeherder group {} (from {}) has no name; " "add it to taskcluster/ci/config.yml" +) + +V2_ROUTE_TEMPLATES = [ + "index.{trust-domain}.v2.{project}.latest.{product}.{job-name}", + "index.{trust-domain}.v2.{project}.pushdate.{build_date_long}.{product}.{job-name}", + "index.{trust-domain}.v2.{project}.pushlog-id.{pushlog_id}.{product}.{job-name}", + "index.{trust-domain}.v2.{project}.revision.{branch_rev}.{product}.{job-name}", +] + +# the roots of the treeherder routes +TREEHERDER_ROUTE_ROOT = "tc-treeherder" + + +def get_branch_rev(config): + return config.params["head_rev"] + + +@memoize +def get_default_priority(graph_config, project): + return evaluate_keyed_by( + graph_config["task-priority"], "Graph Config", {"project": project} + ) + + +# define a collection of payload builders, depending on the worker implementation +payload_builders = {} + + +@attr.s(frozen=True) +class PayloadBuilder: + schema = attr.ib(type=Schema) + builder = attr.ib() + + +def payload_builder(name, schema): + schema = Schema({Required("implementation"): name, Optional("os"): str}).extend( + schema + ) + + def wrap(func): + payload_builders[name] = PayloadBuilder(schema, func) + return func + + return wrap + + +# define a collection of index builders, depending on the type implementation +index_builders = {} + + +def index_builder(name): + def wrap(func): + index_builders[name] = func + return func + + return wrap + + +UNSUPPORTED_INDEX_PRODUCT_ERROR = """\ +The index product {product} is not in the list of configured products in +`taskcluster/ci/config.yml'. +""" + + +def verify_index(config, index): + product = index["product"] + if product not in config.graph_config["index"]["products"]: + raise Exception(UNSUPPORTED_INDEX_PRODUCT_ERROR.format(product=product)) + + +@payload_builder( + "docker-worker", + schema={ + Required("os"): "linux", + # For tasks that will run in docker-worker, this is the name of the docker + # image or in-tree docker image to run the task in. If in-tree, then a + # dependency will be created automatically. This is generally + # `desktop-test`, or an image that acts an awful lot like it. + Required("docker-image"): Any( + # a raw Docker image path (repo/image:tag) + str, + # an in-tree generated docker image (from `taskcluster/docker/`) + {"in-tree": str}, + # an indexed docker image + {"indexed": str}, + ), + # worker features that should be enabled + Required("relengapi-proxy"): bool, + Required("chain-of-trust"): bool, + Required("taskcluster-proxy"): bool, + Required("allow-ptrace"): bool, + Required("loopback-video"): bool, + Required("loopback-audio"): bool, + Required("docker-in-docker"): bool, # (aka 'dind') + Required("privileged"): bool, + Required("disable-seccomp"): bool, + # Paths to Docker volumes. + # + # For in-tree Docker images, volumes can be parsed from Dockerfile. + # This only works for the Dockerfile itself: if a volume is defined in + # a base image, it will need to be declared here. Out-of-tree Docker + # images will also require explicit volume annotation. + # + # Caches are often mounted to the same path as Docker volumes. In this + # case, they take precedence over a Docker volume. But a volume still + # needs to be declared for the path. + Optional("volumes"): [str], + # caches to set up for the task + Optional("caches"): [ + { + # only one type is supported by any of the workers right now + "type": "persistent", + # name of the cache, allowing re-use by subsequent tasks naming the + # same cache + "name": str, + # location in the task image where the cache will be mounted + "mount-point": str, + # Whether the cache is not used in untrusted environments + # (like the Try repo). + Optional("skip-untrusted"): bool, + } + ], + # artifacts to extract from the task image after completion + Optional("artifacts"): [ + { + # type of artifact -- simple file, or recursive directory + "type": Any("file", "directory"), + # task image path from which to read artifact + "path": str, + # name of the produced artifact (root of the names for + # type=directory) + "name": str, + } + ], + # environment variables + Required("env"): {str: taskref_or_string}, + # the command to run; if not given, docker-worker will default to the + # command in the docker image + Optional("command"): [taskref_or_string], + # the maximum time to run, in seconds + Required("max-run-time"): int, + # the exit status code(s) that indicates the task should be retried + Optional("retry-exit-status"): [int], + # the exit status code(s) that indicates the caches used by the task + # should be purged + Optional("purge-caches-exit-status"): [int], + # Whether any artifacts are assigned to this worker + Optional("skip-artifacts"): bool, + }, +) +def build_docker_worker_payload(config, task, task_def): + worker = task["worker"] + level = int(config.params["level"]) + + image = worker["docker-image"] + if isinstance(image, dict): + if "in-tree" in image: + name = image["in-tree"] + docker_image_task = "build-docker-image-" + image["in-tree"] + task.setdefault("dependencies", {})["docker-image"] = docker_image_task + + image = { + "path": "public/image.tar.zst", + "taskId": {"task-reference": ""}, + "type": "task-image", + } + + # Find VOLUME in Dockerfile. + volumes = dockerutil.parse_volumes(name) + for v in sorted(volumes): + if v in worker["volumes"]: + raise Exception( + "volume %s already defined; " + "if it is defined in a Dockerfile, " + "it does not need to be specified in the " + "worker definition" % v + ) + + worker["volumes"].append(v) + + elif "indexed" in image: + image = { + "path": "public/image.tar.zst", + "namespace": image["indexed"], + "type": "indexed-image", + } + else: + raise Exception("unknown docker image type") + + features = {} + + if worker.get("relengapi-proxy"): + features["relengAPIProxy"] = True + + if worker.get("taskcluster-proxy"): + features["taskclusterProxy"] = True + + if worker.get("allow-ptrace"): + features["allowPtrace"] = True + task_def["scopes"].append("docker-worker:feature:allowPtrace") + + if worker.get("chain-of-trust"): + features["chainOfTrust"] = True + + if worker.get("docker-in-docker"): + features["dind"] = True + + if task.get("needs-sccache"): + features["taskclusterProxy"] = True + task_def["scopes"].append( + "assume:project:taskcluster:{trust_domain}:level-{level}-sccache-buckets".format( + trust_domain=config.graph_config["trust-domain"], + level=config.params["level"], + ) + ) + worker["env"]["USE_SCCACHE"] = "1" + # Disable sccache idle shutdown. + worker["env"]["SCCACHE_IDLE_TIMEOUT"] = "0" + else: + worker["env"]["SCCACHE_DISABLE"] = "1" + + capabilities = {} + + for lo in "audio", "video": + if worker.get("loopback-" + lo): + capitalized = "loopback" + lo.capitalize() + devices = capabilities.setdefault("devices", {}) + devices[capitalized] = True + task_def["scopes"].append("docker-worker:capability:device:" + capitalized) + + if worker.get("privileged"): + capabilities["privileged"] = True + task_def["scopes"].append("docker-worker:capability:privileged") + + if worker.get("disable-seccomp"): + capabilities["disableSeccomp"] = True + task_def["scopes"].append("docker-worker:capability:disableSeccomp") + + task_def["payload"] = payload = { + "image": image, + "env": worker["env"], + } + if "command" in worker: + payload["command"] = worker["command"] + + if "max-run-time" in worker: + payload["maxRunTime"] = worker["max-run-time"] + + run_task = payload.get("command", [""])[0].endswith("run-task") + + # run-task exits EXIT_PURGE_CACHES if there is a problem with caches. + # Automatically retry the tasks and purge caches if we see this exit + # code. + # TODO move this closer to code adding run-task once bug 1469697 is + # addressed. + if run_task: + worker.setdefault("retry-exit-status", []).append(72) + worker.setdefault("purge-caches-exit-status", []).append(72) + + payload["onExitStatus"] = {} + if "retry-exit-status" in worker: + payload["onExitStatus"]["retry"] = worker["retry-exit-status"] + if "purge-caches-exit-status" in worker: + payload["onExitStatus"]["purgeCaches"] = worker["purge-caches-exit-status"] + + if "artifacts" in worker: + artifacts = {} + for artifact in worker["artifacts"]: + artifacts[artifact["name"]] = { + "path": artifact["path"], + "type": artifact["type"], + "expires": task_def["expires"], # always expire with the task + } + payload["artifacts"] = artifacts + + if isinstance(worker.get("docker-image"), str): + out_of_tree_image = worker["docker-image"] + else: + out_of_tree_image = None + image = worker.get("docker-image", {}).get("in-tree") + + if "caches" in worker: + caches = {} + + # run-task knows how to validate caches. + # + # To help ensure new run-task features and bug fixes don't interfere + # with existing caches, we seed the hash of run-task into cache names. + # So, any time run-task changes, we should get a fresh set of caches. + # This means run-task can make changes to cache interaction at any time + # without regards for backwards or future compatibility. + # + # But this mechanism only works for in-tree Docker images that are built + # with the current run-task! For out-of-tree Docker images, we have no + # way of knowing their content of run-task. So, in addition to varying + # cache names by the contents of run-task, we also take the Docker image + # name into consideration. This means that different Docker images will + # never share the same cache. This is a bit unfortunate. But it is the + # safest thing to do. Fortunately, most images are defined in-tree. + # + # For out-of-tree Docker images, we don't strictly need to incorporate + # the run-task content into the cache name. However, doing so preserves + # the mechanism whereby changing run-task results in new caches + # everywhere. + + # As an additional mechanism to force the use of different caches, the + # string literal in the variable below can be changed. This is + # preferred to changing run-task because it doesn't require images + # to be rebuilt. + cache_version = "v3" + + if run_task: + suffix = f"{cache_version}-{_run_task_suffix()}" + + if out_of_tree_image: + name_hash = hashlib.sha256( + out_of_tree_image.encode("utf-8") + ).hexdigest() + suffix += name_hash[0:12] + + else: + suffix = cache_version + + skip_untrusted = config.params.is_try() or level == 1 + + for cache in worker["caches"]: + # Some caches aren't enabled in environments where we can't + # guarantee certain behavior. Filter those out. + if cache.get("skip-untrusted") and skip_untrusted: + continue + + name = "{trust_domain}-level-{level}-{name}-{suffix}".format( + trust_domain=config.graph_config["trust-domain"], + level=config.params["level"], + name=cache["name"], + suffix=suffix, + ) + caches[name] = cache["mount-point"] + task_def["scopes"].append("docker-worker:cache:%s" % name) + + # Assertion: only run-task is interested in this. + if run_task: + payload["env"]["TASKCLUSTER_CACHES"] = ";".join(sorted(caches.values())) + + payload["cache"] = caches + + # And send down volumes information to run-task as well. + if run_task and worker.get("volumes"): + payload["env"]["TASKCLUSTER_VOLUMES"] = ";".join(sorted(worker["volumes"])) + + if payload.get("cache") and skip_untrusted: + payload["env"]["TASKCLUSTER_UNTRUSTED_CACHES"] = "1" + + if features: + payload["features"] = features + if capabilities: + payload["capabilities"] = capabilities + + check_caches_are_volumes(task) + + +@payload_builder( + "generic-worker", + schema={ + Required("os"): Any("windows", "macosx", "linux", "linux-bitbar"), + # see http://schemas.taskcluster.net/generic-worker/v1/payload.json + # and https://docs.taskcluster.net/reference/workers/generic-worker/payload + # command is a list of commands to run, sequentially + # on Windows, each command is a string, on OS X and Linux, each command is + # a string array + Required("command"): Any( + [taskref_or_string], [[taskref_or_string]] # Windows # Linux / OS X + ), + # artifacts to extract from the task image after completion; note that artifacts + # for the generic worker cannot have names + Optional("artifacts"): [ + { + # type of artifact -- simple file, or recursive directory + "type": Any("file", "directory"), + # filesystem path from which to read artifact + "path": str, + # if not specified, path is used for artifact name + Optional("name"): str, + } + ], + # Directories and/or files to be mounted. + # The actual allowed combinations are stricter than the model below, + # but this provides a simple starting point. + # See https://docs.taskcluster.net/reference/workers/generic-worker/payload + Optional("mounts"): [ + { + # A unique name for the cache volume, implies writable cache directory + # (otherwise mount is a read-only file or directory). + Optional("cache-name"): str, + # Optional content for pre-loading cache, or mandatory content for + # read-only file or directory. Pre-loaded content can come from either + # a task artifact or from a URL. + Optional("content"): { + # *** Either (artifact and task-id) or url must be specified. *** + # Artifact name that contains the content. + Optional("artifact"): str, + # Task ID that has the artifact that contains the content. + Optional("task-id"): taskref_or_string, + # URL that supplies the content in response to an unauthenticated + # GET request. + Optional("url"): str, + }, + # *** Either file or directory must be specified. *** + # If mounting a cache or read-only directory, the filesystem location of + # the directory should be specified as a relative path to the task + # directory here. + Optional("directory"): str, + # If mounting a file, specify the relative path within the task + # directory to mount the file (the file will be read only). + Optional("file"): str, + # Required if and only if `content` is specified and mounting a + # directory (not a file). This should be the archive format of the + # content (either pre-loaded cache or read-only directory). + Optional("format"): Any("rar", "tar.bz2", "tar.gz", "zip"), + } + ], + # environment variables + Required("env"): {str: taskref_or_string}, + # the maximum time to run, in seconds + Required("max-run-time"): int, + # os user groups for test task workers + Optional("os-groups"): [str], + # feature for test task to run as administarotr + Optional("run-as-administrator"): bool, + # optional features + Required("chain-of-trust"): bool, + Optional("taskcluster-proxy"): bool, + # Whether any artifacts are assigned to this worker + Optional("skip-artifacts"): bool, + }, +) +def build_generic_worker_payload(config, task, task_def): + worker = task["worker"] + + task_def["payload"] = { + "command": worker["command"], + "maxRunTime": worker["max-run-time"], + } + + on_exit_status = {} + if "retry-exit-status" in worker: + on_exit_status["retry"] = worker["retry-exit-status"] + if worker["os"] == "windows": + on_exit_status.setdefault("retry", []).extend( + [ + # These codes (on windows) indicate a process interruption, + # rather than a task run failure. See bug 1544403. + 1073807364, # process force-killed due to system shutdown + 3221225786, # sigint (any interrupt) + ] + ) + if on_exit_status: + task_def["payload"]["onExitStatus"] = on_exit_status + + env = worker.get("env", {}) + + if task.get("needs-sccache"): + env["USE_SCCACHE"] = "1" + # Disable sccache idle shutdown. + env["SCCACHE_IDLE_TIMEOUT"] = "0" + else: + env["SCCACHE_DISABLE"] = "1" + + if env: + task_def["payload"]["env"] = env + + artifacts = [] + + for artifact in worker.get("artifacts", []): + a = { + "path": artifact["path"], + "type": artifact["type"], + } + if "name" in artifact: + a["name"] = artifact["name"] + artifacts.append(a) + + if artifacts: + task_def["payload"]["artifacts"] = artifacts + + # Need to copy over mounts, but rename keys to respect naming convention + # * 'cache-name' -> 'cacheName' + # * 'task-id' -> 'taskId' + # All other key names are already suitable, and don't need renaming. + mounts = deepcopy(worker.get("mounts", [])) + for mount in mounts: + if "cache-name" in mount: + mount["cacheName"] = "{trust_domain}-level-{level}-{name}".format( + trust_domain=config.graph_config["trust-domain"], + level=config.params["level"], + name=mount.pop("cache-name"), + ) + task_def["scopes"].append( + "generic-worker:cache:{}".format(mount["cacheName"]) + ) + if "content" in mount: + if "task-id" in mount["content"]: + mount["content"]["taskId"] = mount["content"].pop("task-id") + if "artifact" in mount["content"]: + if not mount["content"]["artifact"].startswith("public/"): + task_def["scopes"].append( + "queue:get-artifact:{}".format(mount["content"]["artifact"]) + ) + + if mounts: + task_def["payload"]["mounts"] = mounts + + if worker.get("os-groups"): + task_def["payload"]["osGroups"] = worker["os-groups"] + task_def["scopes"].extend( + [ + "generic-worker:os-group:{}/{}".format(task["worker-type"], group) + for group in worker["os-groups"] + ] + ) + + features = {} + + if worker.get("chain-of-trust"): + features["chainOfTrust"] = True + + if worker.get("taskcluster-proxy"): + features["taskclusterProxy"] = True + + if worker.get("run-as-administrator", False): + features["runAsAdministrator"] = True + task_def["scopes"].append( + "generic-worker:run-as-administrator:{}".format(task["worker-type"]), + ) + + if features: + task_def["payload"]["features"] = features + + +@payload_builder( + "beetmover", + schema={ + # the maximum time to run, in seconds + Required("max-run-time"): int, + # locale key, if this is a locale beetmover job + Optional("locale"): str, + Optional("partner-public"): bool, + Required("release-properties"): { + "app-name": str, + "app-version": str, + "branch": str, + "build-id": str, + "hash-type": str, + "platform": str, + }, + # list of artifact URLs for the artifacts that should be beetmoved + Required("upstream-artifacts"): [ + { + # taskId of the task with the artifact + Required("taskId"): taskref_or_string, + # type of signing task (for CoT) + Required("taskType"): str, + # Paths to the artifacts to sign + Required("paths"): [str], + # locale is used to map upload path and allow for duplicate simple names + Required("locale"): str, + } + ], + Optional("artifact-map"): object, + }, +) +def build_beetmover_payload(config, task, task_def): + worker = task["worker"] + release_properties = worker["release-properties"] + + task_def["payload"] = { + "maxRunTime": worker["max-run-time"], + "releaseProperties": { + "appName": release_properties["app-name"], + "appVersion": release_properties["app-version"], + "branch": release_properties["branch"], + "buildid": release_properties["build-id"], + "hashType": release_properties["hash-type"], + "platform": release_properties["platform"], + }, + "upload_date": config.params["build_date"], + "upstreamArtifacts": worker["upstream-artifacts"], + } + if worker.get("locale"): + task_def["payload"]["locale"] = worker["locale"] + if worker.get("artifact-map"): + task_def["payload"]["artifactMap"] = worker["artifact-map"] + if worker.get("partner-public"): + task_def["payload"]["is_partner_repack_public"] = worker["partner-public"] + + +@payload_builder( + "invalid", + schema={ + # an invalid task is one which should never actually be created; this is used in + # release automation on branches where the task just doesn't make sense + Extra: object, + }, +) +def build_invalid_payload(config, task, task_def): + task_def["payload"] = "invalid task - should never be created" + + +@payload_builder( + "always-optimized", + schema={ + Extra: object, + }, +) +@payload_builder("succeed", schema={}) +def build_dummy_payload(config, task, task_def): + task_def["payload"] = {} + + +transforms = TransformSequence() + + +@transforms.add +def set_implementation(config, tasks): + """ + Set the worker implementation based on the worker-type alias. + """ + for task in tasks: + worker = task.setdefault("worker", {}) + if "implementation" in task["worker"]: + yield task + continue + + impl, os = worker_type_implementation(config.graph_config, task["worker-type"]) + + tags = task.setdefault("tags", {}) + tags["worker-implementation"] = impl + if os: + task["tags"]["os"] = os + worker["implementation"] = impl + if os: + worker["os"] = os + + yield task + + +@transforms.add +def set_defaults(config, tasks): + for task in tasks: + task.setdefault("always-target", False) + task.setdefault("optimization", None) + task.setdefault("needs-sccache", False) + + worker = task["worker"] + if worker["implementation"] in ("docker-worker",): + worker.setdefault("relengapi-proxy", False) + worker.setdefault("chain-of-trust", False) + worker.setdefault("taskcluster-proxy", False) + worker.setdefault("allow-ptrace", False) + worker.setdefault("loopback-video", False) + worker.setdefault("loopback-audio", False) + worker.setdefault("docker-in-docker", False) + worker.setdefault("privileged", False) + worker.setdefault("disable-seccomp", False) + worker.setdefault("volumes", []) + worker.setdefault("env", {}) + if "caches" in worker: + for c in worker["caches"]: + c.setdefault("skip-untrusted", False) + elif worker["implementation"] == "generic-worker": + worker.setdefault("env", {}) + worker.setdefault("os-groups", []) + if worker["os-groups"] and worker["os"] != "windows": + raise Exception( + "os-groups feature of generic-worker is only supported on " + "Windows, not on {}".format(worker["os"]) + ) + worker.setdefault("chain-of-trust", False) + elif worker["implementation"] in ( + "scriptworker-signing", + "beetmover", + "beetmover-push-to-release", + "beetmover-maven", + ): + worker.setdefault("max-run-time", 600) + elif worker["implementation"] == "push-apk": + worker.setdefault("commit", False) + + yield task + + +@transforms.add +def task_name_from_label(config, tasks): + for task in tasks: + if "label" not in task: + if "name" not in task: + raise Exception("task has neither a name nor a label") + task["label"] = "{}-{}".format(config.kind, task["name"]) + if task.get("name"): + del task["name"] + yield task + + +@transforms.add +def validate(config, tasks): + for task in tasks: + validate_schema( + task_description_schema, + task, + "In task {!r}:".format(task.get("label", "?no-label?")), + ) + validate_schema( + payload_builders[task["worker"]["implementation"]].schema, + task["worker"], + "In task.run {!r}:".format(task.get("label", "?no-label?")), + ) + yield task + + +@index_builder("generic") +def add_generic_index_routes(config, task): + index = task.get("index") + routes = task.setdefault("routes", []) + + verify_index(config, index) + + subs = config.params.copy() + subs["job-name"] = index["job-name"] + subs["build_date_long"] = time.strftime( + "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"]) + ) + subs["product"] = index["product"] + subs["trust-domain"] = config.graph_config["trust-domain"] + subs["branch_rev"] = get_branch_rev(config) + + for tpl in V2_ROUTE_TEMPLATES: + routes.append(tpl.format(**subs)) + + return task + + +@transforms.add +def add_index_routes(config, tasks): + for task in tasks: + index = task.get("index", {}) + + # The default behavior is to rank tasks according to their tier + extra_index = task.setdefault("extra", {}).setdefault("index", {}) + rank = index.get("rank", "by-tier") + + if rank == "by-tier": + # rank is zero for non-tier-1 tasks and based on pushid for others; + # this sorts tier-{2,3} builds below tier-1 in the index + tier = task.get("treeherder", {}).get("tier", 3) + extra_index["rank"] = 0 if tier > 1 else int(config.params["build_date"]) + elif rank == "build_date": + extra_index["rank"] = int(config.params["build_date"]) + else: + extra_index["rank"] = rank + + if not index: + yield task + continue + + index_type = index.get("type", "generic") + if index_type not in index_builders: + raise ValueError(f"Unknown index-type {index_type}") + task = index_builders[index_type](config, task) + + del task["index"] + yield task + + +@transforms.add +def build_task(config, tasks): + for task in tasks: + level = str(config.params["level"]) + + provisioner_id, worker_type = get_worker_type( + config.graph_config, + task["worker-type"], + level, + ) + task["worker-type"] = "/".join([provisioner_id, worker_type]) + project = config.params["project"] + + routes = task.get("routes", []) + scopes = [ + s.format(level=level, project=project) for s in task.get("scopes", []) + ] + + # set up extra + extra = task.get("extra", {}) + extra["parent"] = os.environ.get("TASK_ID", "") + task_th = task.get("treeherder") + if task_th: + extra.setdefault("treeherder-platform", task_th["platform"]) + treeherder = extra.setdefault("treeherder", {}) + + machine_platform, collection = task_th["platform"].split("/", 1) + treeherder["machine"] = {"platform": machine_platform} + treeherder["collection"] = {collection: True} + + group_names = config.graph_config["treeherder"]["group-names"] + groupSymbol, symbol = split_symbol(task_th["symbol"]) + if groupSymbol != "?": + treeherder["groupSymbol"] = groupSymbol + if groupSymbol not in group_names: + path = os.path.join(config.path, task.get("task-from", "")) + raise Exception(UNKNOWN_GROUP_NAME.format(groupSymbol, path)) + treeherder["groupName"] = group_names[groupSymbol] + treeherder["symbol"] = symbol + if len(symbol) > 25 or len(groupSymbol) > 25: + raise RuntimeError( + "Treeherder group and symbol names must not be longer than " + "25 characters: {} (see {})".format( + task_th["symbol"], + TC_TREEHERDER_SCHEMA_URL, + ) + ) + treeherder["jobKind"] = task_th["kind"] + treeherder["tier"] = task_th["tier"] + + branch_rev = get_branch_rev(config) + + if config.params["tasks_for"].startswith("github-pull-request"): + # In the past we used `project` for this, but that ends up being + # set to the repository name of the _head_ repo, which is not correct + # (and causes scope issues) if it doesn't match the name of the + # base repo + base_project = config.params["base_repository"].split("/")[-1] + if base_project.endswith(".git"): + base_project = base_project[:-4] + th_project_suffix = "-pr" + else: + base_project = config.params["project"] + th_project_suffix = "" + + routes.append( + "{}.v2.{}.{}.{}".format( + TREEHERDER_ROUTE_ROOT, + base_project + th_project_suffix, + branch_rev, + config.params["pushlog_id"], + ) + ) + + if "expires-after" not in task: + task["expires-after"] = "28 days" if config.params.is_try() else "1 year" + + if "deadline-after" not in task: + task["deadline-after"] = "1 day" + + if "priority" not in task: + task["priority"] = get_default_priority( + config.graph_config, config.params["project"] + ) + + tags = task.get("tags", {}) + tags.update( + { + "createdForUser": config.params["owner"], + "kind": config.kind, + "label": task["label"], + } + ) + + task_def = { + "provisionerId": provisioner_id, + "workerType": worker_type, + "routes": routes, + "created": {"relative-datestamp": "0 seconds"}, + "deadline": {"relative-datestamp": task["deadline-after"]}, + "expires": {"relative-datestamp": task["expires-after"]}, + "scopes": scopes, + "metadata": { + "description": task["description"], + "name": task["label"], + "owner": config.params["owner"], + "source": config.params.file_url(config.path, pretty=True), + }, + "extra": extra, + "tags": tags, + "priority": task["priority"], + } + + if task.get("requires", None): + task_def["requires"] = task["requires"] + + if task_th: + # link back to treeherder in description + th_push_link = ( + "https://treeherder.mozilla.org/#/jobs?repo={}&revision={}".format( + config.params["project"] + th_project_suffix, branch_rev + ) + ) + task_def["metadata"]["description"] += " ([Treeherder push]({}))".format( + th_push_link + ) + + # add the payload and adjust anything else as required (e.g., scopes) + payload_builders[task["worker"]["implementation"]].builder( + config, task, task_def + ) + + attributes = task.get("attributes", {}) + # Resolve run-on-projects + build_platform = attributes.get("build_platform") + resolve_keyed_by( + task, + "run-on-projects", + item_name=task["label"], + **{"build-platform": build_platform}, + ) + attributes["run_on_projects"] = task.get("run-on-projects", ["all"]) + attributes["run_on_tasks_for"] = task.get("run-on-tasks-for", ["all"]) + # We don't want to pollute non git repos with this attribute. Moreover, target_tasks + # already assumes the default value is ['all'] + if task.get("run-on-git-branches"): + attributes["run_on_git_branches"] = task["run-on-git-branches"] + + attributes["always_target"] = task["always-target"] + + # Set MOZ_AUTOMATION on all jobs. + if task["worker"]["implementation"] in ( + "generic-worker", + "docker-worker", + ): + payload = task_def.get("payload") + if payload: + env = payload.setdefault("env", {}) + env["MOZ_AUTOMATION"] = "1" + + dependencies = task.get("dependencies", {}) + if_dependencies = task.get("if-dependencies", []) + if if_dependencies: + for i, dep in enumerate(if_dependencies): + if dep in dependencies: + if_dependencies[i] = dependencies[dep] + continue + + raise Exception( + "{label} specifies '{dep}' in if-dependencies, " + "but {dep} is not a dependency!".format( + label=task["label"], dep=dep + ) + ) + + yield { + "label": task["label"], + "description": task["description"], + "task": task_def, + "dependencies": dependencies, + "if-dependencies": if_dependencies, + "soft-dependencies": task.get("soft-dependencies", []), + "attributes": attributes, + "optimization": task.get("optimization", None), + } + + +@transforms.add +def add_github_checks(config, tasks): + """ + For git repositories, add checks route to all tasks. + + This will be replaced by a configurable option in the future. + """ + if config.params["repository_type"] != "git": + for task in tasks: + yield task + + for task in tasks: + task["task"]["routes"].append("checks") + yield task + + +@transforms.add +def chain_of_trust(config, tasks): + for task in tasks: + if task["task"].get("payload", {}).get("features", {}).get("chainOfTrust"): + image = task.get("dependencies", {}).get("docker-image") + if image: + cot = ( + task["task"].setdefault("extra", {}).setdefault("chainOfTrust", {}) + ) + cot.setdefault("inputs", {})["docker-image"] = { + "task-reference": "" + } + yield task + + +@transforms.add +def check_task_identifiers(config, tasks): + """Ensures that all tasks have well defined identifiers: + ``^[a-zA-Z0-9_-]{1,38}$`` + """ + e = re.compile("^[a-zA-Z0-9_-]{1,38}$") + for task in tasks: + for attrib in ("workerType", "provisionerId"): + if not e.match(task["task"][attrib]): + raise Exception( + "task {}.{} is not a valid identifier: {}".format( + task["label"], attrib, task["task"][attrib] + ) + ) + yield task + + +@transforms.add +def check_task_dependencies(config, tasks): + """Ensures that tasks don't have more than 100 dependencies.""" + for task in tasks: + if len(task["dependencies"]) > MAX_DEPENDENCIES: + raise Exception( + "task {}/{} has too many dependencies ({} > {})".format( + config.kind, + task["label"], + len(task["dependencies"]), + MAX_DEPENDENCIES, + ) + ) + yield task + + +def check_caches_are_volumes(task): + """Ensures that all cache paths are defined as volumes. + + Caches and volumes are the only filesystem locations whose content + isn't defined by the Docker image itself. Some caches are optional + depending on the job environment. We want paths that are potentially + caches to have as similar behavior regardless of whether a cache is + used. To help enforce this, we require that all paths used as caches + to be declared as Docker volumes. This check won't catch all offenders. + But it is better than nothing. + """ + volumes = set(task["worker"]["volumes"]) + paths = {c["mount-point"] for c in task["worker"].get("caches", [])} + missing = paths - volumes + + if not missing: + return + + raise Exception( + "task %s (image %s) has caches that are not declared as " + "Docker volumes: %s " + "(have you added them as VOLUMEs in the Dockerfile?)" + % (task["label"], task["worker"]["docker-image"], ", ".join(sorted(missing))) + ) + + +@transforms.add +def check_run_task_caches(config, tasks): + """Audit for caches requiring run-task. + + run-task manages caches in certain ways. If a cache managed by run-task + is used by a non run-task task, it could cause problems. So we audit for + that and make sure certain cache names are exclusive to run-task. + + IF YOU ARE TEMPTED TO MAKE EXCLUSIONS TO THIS POLICY, YOU ARE LIKELY + CONTRIBUTING TECHNICAL DEBT AND WILL HAVE TO SOLVE MANY OF THE PROBLEMS + THAT RUN-TASK ALREADY SOLVES. THINK LONG AND HARD BEFORE DOING THAT. + """ + re_reserved_caches = re.compile( + """^ + (checkouts|tooltool-cache) + """, + re.VERBOSE, + ) + + cache_prefix = "{trust_domain}-level-{level}-".format( + trust_domain=config.graph_config["trust-domain"], + level=config.params["level"], + ) + + suffix = _run_task_suffix() + + for task in tasks: + payload = task["task"].get("payload", {}) + command = payload.get("command") or [""] + + main_command = command[0] if isinstance(command[0], str) else "" + run_task = main_command.endswith("run-task") + + for cache in payload.get("cache", {}): + if not cache.startswith(cache_prefix): + raise Exception( + "{} is using a cache ({}) which is not appropriate " + "for its trust-domain and level. It should start with {}.".format( + task["label"], cache, cache_prefix + ) + ) + + cache = cache[len(cache_prefix) :] + + if not re_reserved_caches.match(cache): + continue + + if not run_task: + raise Exception( + "%s is using a cache (%s) reserved for run-task " + "change the task to use run-task or use a different " + "cache name" % (task["label"], cache) + ) + + if not cache.endswith(suffix): + raise Exception( + "%s is using a cache (%s) reserved for run-task " + "but the cache name is not dependent on the contents " + "of run-task; change the cache name to conform to the " + "naming requirements" % (task["label"], cache) + ) + + yield task -- cgit v1.2.3