diff options
Diffstat (limited to '')
-rw-r--r-- | third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py new file mode 100644 index 0000000000..06978ff46d --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py @@ -0,0 +1,453 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +Convert a job description into a task description. + +Jobs descriptions are similar to task descriptions, but they specify how to run +the job at a higher level, using a "run" field that can be interpreted by +run-using handlers in `taskcluster/taskgraph/transforms/job`. +""" + + +import copy +import json +import logging + +from voluptuous import Any, Exclusive, Extra, Optional, Required + +from taskgraph.transforms.base import TransformSequence +from taskgraph.transforms.cached_tasks import order_tasks +from taskgraph.transforms.task import task_description_schema +from taskgraph.util import path as mozpath +from taskgraph.util.python_path import import_sibling_modules +from taskgraph.util.schema import Schema, validate_schema +from taskgraph.util.taskcluster import get_artifact_prefix +from taskgraph.util.workertypes import worker_type_implementation + +logger = logging.getLogger(__name__) + +# Fetches may be accepted in other transforms and eventually passed along +# to a `job` (eg: from_deps). Defining this here allows them to re-use +# the schema and avoid duplication. +fetches_schema = { + Required("artifact"): str, + Optional("dest"): str, + Optional("extract"): bool, + Optional("verify-hash"): bool, +} + +# Schema for a build description +job_description_schema = Schema( + { + # The name of the job and the job's label. At least one must be specified, + # and the label will be generated from the name if necessary, by prepending + # the kind. + Optional("name"): str, + Optional("label"): str, + # the following fields are passed directly through to the task description, + # possibly modified by the run implementation. See + # taskcluster/taskgraph/transforms/task.py for the schema details. + Required("description"): task_description_schema["description"], + Optional("attributes"): task_description_schema["attributes"], + Optional("task-from"): task_description_schema["task-from"], + Optional("dependencies"): task_description_schema["dependencies"], + Optional("soft-dependencies"): task_description_schema["soft-dependencies"], + Optional("if-dependencies"): task_description_schema["if-dependencies"], + Optional("requires"): task_description_schema["requires"], + Optional("expires-after"): task_description_schema["expires-after"], + Optional("routes"): task_description_schema["routes"], + Optional("scopes"): task_description_schema["scopes"], + Optional("tags"): task_description_schema["tags"], + Optional("extra"): task_description_schema["extra"], + Optional("treeherder"): task_description_schema["treeherder"], + Optional("index"): task_description_schema["index"], + Optional("run-on-projects"): task_description_schema["run-on-projects"], + Optional("run-on-tasks-for"): task_description_schema["run-on-tasks-for"], + Optional("run-on-git-branches"): task_description_schema["run-on-git-branches"], + Optional("shipping-phase"): task_description_schema["shipping-phase"], + Optional("always-target"): task_description_schema["always-target"], + Exclusive("optimization", "optimization"): task_description_schema[ + "optimization" + ], + Optional("needs-sccache"): task_description_schema["needs-sccache"], + # The "when" section contains descriptions of the circumstances under which + # this task should be included in the task graph. This will be converted + # into an optimization, so it cannot be specified in a job description that + # also gives 'optimization'. + Exclusive("when", "optimization"): { + # This task only needs to be run if a file matching one of the given + # patterns has changed in the push. The patterns use the mozpack + # match function (python/mozbuild/mozpack/path.py). + Optional("files-changed"): [str], + }, + # A list of artifacts to install from 'fetch' tasks. + Optional("fetches"): { + Any("toolchain", "fetch"): [str], + str: [ + str, + fetches_schema, + ], + }, + # A description of how to run this job. + "run": { + # The key to a job implementation in a peer module to this one + "using": str, + # Base work directory used to set up the task. + Optional("workdir"): str, + # Any remaining content is verified against that job implementation's + # own schema. + Extra: object, + }, + Required("worker-type"): task_description_schema["worker-type"], + # This object will be passed through to the task description, with additions + # provided by the job's run-using function + Optional("worker"): dict, + } +) + +transforms = TransformSequence() +transforms.add_validate(job_description_schema) + + +@transforms.add +def rewrite_when_to_optimization(config, jobs): + for job in jobs: + when = job.pop("when", {}) + if not when: + yield job + continue + + files_changed = when.get("files-changed") + + # implicitly add task config directory. + files_changed.append(f"{config.path}/**") + + # "only when files changed" implies "skip if files have not changed" + job["optimization"] = {"skip-unless-changed": files_changed} + + assert "when" not in job + yield job + + +@transforms.add +def set_implementation(config, jobs): + for job in jobs: + impl, os = worker_type_implementation(config.graph_config, job["worker-type"]) + if os: + job.setdefault("tags", {})["os"] = os + if impl: + job.setdefault("tags", {})["worker-implementation"] = impl + worker = job.setdefault("worker", {}) + assert "implementation" not in worker + worker["implementation"] = impl + if os: + worker["os"] = os + yield job + + +@transforms.add +def set_label(config, jobs): + for job in jobs: + if "label" not in job: + if "name" not in job: + raise Exception("job has neither a name nor a label") + job["label"] = "{}-{}".format(config.kind, job["name"]) + if job.get("name"): + del job["name"] + yield job + + +@transforms.add +def add_resource_monitor(config, jobs): + for job in jobs: + if job.get("attributes", {}).get("resource-monitor"): + worker_implementation, worker_os = worker_type_implementation( + config.graph_config, job["worker-type"] + ) + # Normalise worker os so that linux-bitbar and similar use linux tools. + worker_os = worker_os.split("-")[0] + if "win7" in job["worker-type"]: + arch = "32" + else: + arch = "64" + job.setdefault("fetches", {}) + job["fetches"].setdefault("toolchain", []) + job["fetches"]["toolchain"].append(f"{worker_os}{arch}-resource-monitor") + + if worker_implementation == "docker-worker": + artifact_source = "/builds/worker/monitoring/resource-monitor.json" + else: + artifact_source = "monitoring/resource-monitor.json" + job["worker"].setdefault("artifacts", []) + job["worker"]["artifacts"].append( + { + "name": "public/monitoring/resource-monitor.json", + "type": "file", + "path": artifact_source, + } + ) + # Set env for output file + job["worker"].setdefault("env", {}) + job["worker"]["env"]["RESOURCE_MONITOR_OUTPUT"] = artifact_source + + yield job + + +def get_attribute(dict, key, attributes, attribute_name): + """Get `attribute_name` from the given `attributes` dict, and if there + is a corresponding value, set `key` in `dict` to that value.""" + value = attributes.get(attribute_name) + if value: + dict[key] = value + + +@transforms.add +def use_fetches(config, jobs): + artifact_names = {} + aliases = {} + extra_env = {} + + if config.kind in ("toolchain", "fetch"): + jobs = list(jobs) + for job in jobs: + run = job.get("run", {}) + label = job["label"] + get_attribute(artifact_names, label, run, "toolchain-artifact") + value = run.get(f"{config.kind}-alias") + if value: + aliases[f"{config.kind}-{value}"] = label + + for task in config.kind_dependencies_tasks.values(): + if task.kind in ("fetch", "toolchain"): + get_attribute( + artifact_names, + task.label, + task.attributes, + f"{task.kind}-artifact", + ) + get_attribute(extra_env, task.label, task.attributes, f"{task.kind}-env") + value = task.attributes.get(f"{task.kind}-alias") + if value: + aliases[f"{task.kind}-{value}"] = task.label + + artifact_prefixes = {} + for job in order_tasks(config, jobs): + artifact_prefixes[job["label"]] = get_artifact_prefix(job) + + fetches = job.pop("fetches", None) + if not fetches: + yield job + continue + + job_fetches = [] + name = job.get("name", job.get("label")) + dependencies = job.setdefault("dependencies", {}) + worker = job.setdefault("worker", {}) + env = worker.setdefault("env", {}) + prefix = get_artifact_prefix(job) + for kind in sorted(fetches): + artifacts = fetches[kind] + if kind in ("fetch", "toolchain"): + for fetch_name in sorted(artifacts): + label = f"{kind}-{fetch_name}" + label = aliases.get(label, label) + if label not in artifact_names: + raise Exception( + "Missing fetch job for {kind}-{name}: {fetch}".format( + kind=config.kind, name=name, fetch=fetch_name + ) + ) + if label in extra_env: + env.update(extra_env[label]) + + path = artifact_names[label] + + dependencies[label] = label + job_fetches.append( + { + "artifact": path, + "task": f"<{label}>", + "extract": True, + } + ) + else: + if kind not in dependencies: + raise Exception( + "{name} can't fetch {kind} artifacts because " + "it has no {kind} dependencies!".format(name=name, kind=kind) + ) + dep_label = dependencies[kind] + if dep_label in artifact_prefixes: + prefix = artifact_prefixes[dep_label] + else: + dep_tasks = [ + task + for label, task in config.kind_dependencies_tasks.items() + if label == dep_label + ] + if len(dep_tasks) != 1: + raise Exception( + "{name} can't fetch {kind} artifacts because " + "there are {tasks} with label {label} in kind dependencies!".format( + name=name, + kind=kind, + label=dependencies[kind], + tasks="no tasks" + if len(dep_tasks) == 0 + else "multiple tasks", + ) + ) + + prefix = get_artifact_prefix(dep_tasks[0]) + + def cmp_artifacts(a): + if isinstance(a, str): + return a + else: + return a["artifact"] + + for artifact in sorted(artifacts, key=cmp_artifacts): + if isinstance(artifact, str): + path = artifact + dest = None + extract = True + verify_hash = False + else: + path = artifact["artifact"] + dest = artifact.get("dest") + extract = artifact.get("extract", True) + verify_hash = artifact.get("verify-hash", False) + + fetch = { + "artifact": f"{prefix}/{path}", + "task": f"<{kind}>", + "extract": extract, + } + if dest is not None: + fetch["dest"] = dest + if verify_hash: + fetch["verify-hash"] = verify_hash + job_fetches.append(fetch) + + job_artifact_prefixes = { + mozpath.dirname(fetch["artifact"]) + for fetch in job_fetches + if not fetch["artifact"].startswith("public/") + } + if job_artifact_prefixes: + # Use taskcluster-proxy and request appropriate scope. For example, add + # 'scopes: [queue:get-artifact:path/to/*]' for 'path/to/artifact.tar.xz'. + worker["taskcluster-proxy"] = True + for prefix in sorted(job_artifact_prefixes): + scope = f"queue:get-artifact:{prefix}/*" + if scope not in job.setdefault("scopes", []): + job["scopes"].append(scope) + + env["MOZ_FETCHES"] = {"task-reference": json.dumps(job_fetches, sort_keys=True)} + + env.setdefault("MOZ_FETCHES_DIR", "fetches") + + yield job + + +@transforms.add +def make_task_description(config, jobs): + """Given a build description, create a task description""" + # import plugin modules first, before iterating over jobs + import_sibling_modules(exceptions=("common.py",)) + + for job in jobs: + # always-optimized tasks never execute, so have no workdir + if job["worker"]["implementation"] in ("docker-worker", "generic-worker"): + job["run"].setdefault("workdir", "/builds/worker") + + taskdesc = copy.deepcopy(job) + + # fill in some empty defaults to make run implementations easier + taskdesc.setdefault("attributes", {}) + taskdesc.setdefault("dependencies", {}) + taskdesc.setdefault("soft-dependencies", []) + taskdesc.setdefault("routes", []) + taskdesc.setdefault("scopes", []) + taskdesc.setdefault("extra", {}) + + # give the function for job.run.using on this worker implementation a + # chance to set up the task description. + configure_taskdesc_for_run( + config, job, taskdesc, job["worker"]["implementation"] + ) + del taskdesc["run"] + + # yield only the task description, discarding the job description + yield taskdesc + + +# A registry of all functions decorated with run_job_using +registry = {} + + +def run_job_using(worker_implementation, run_using, schema=None, defaults={}): + """Register the decorated function as able to set up a task description for + jobs with the given worker implementation and `run.using` property. If + `schema` is given, the job's run field will be verified to match it. + + The decorated function should have the signature `using_foo(config, job, taskdesc)` + and should modify the task description in-place. The skeleton of + the task description is already set up, but without a payload.""" + + def wrap(func): + for_run_using = registry.setdefault(run_using, {}) + if worker_implementation in for_run_using: + raise Exception( + "run_job_using({!r}, {!r}) already exists: {!r}".format( + run_using, + worker_implementation, + for_run_using[worker_implementation], + ) + ) + for_run_using[worker_implementation] = (func, schema, defaults) + return func + + return wrap + + +@run_job_using( + "always-optimized", "always-optimized", Schema({"using": "always-optimized"}) +) +def always_optimized(config, job, taskdesc): + pass + + +def configure_taskdesc_for_run(config, job, taskdesc, worker_implementation): + """ + Run the appropriate function for this job against the given task + description. + + This will raise an appropriate error if no function exists, or if the job's + run is not valid according to the schema. + """ + run_using = job["run"]["using"] + if run_using not in registry: + raise Exception(f"no functions for run.using {run_using!r}") + + if worker_implementation not in registry[run_using]: + raise Exception( + "no functions for run.using {!r} on {!r}".format( + run_using, worker_implementation + ) + ) + + func, schema, defaults = registry[run_using][worker_implementation] + for k, v in defaults.items(): + job["run"].setdefault(k, v) + + if schema: + validate_schema( + schema, + job["run"], + "In job.run using {!r}/{!r} for job {!r}:".format( + job["run"]["using"], worker_implementation, job["label"] + ), + ) + func(config, job, taskdesc) |