diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-12 05:35:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-12 05:35:29 +0000 |
commit | 59203c63bb777a3bacec32fb8830fba33540e809 (patch) | |
tree | 58298e711c0ff0575818c30485b44a2f21bf28a0 /third_party/python/taskcluster_taskgraph/taskgraph | |
parent | Adding upstream version 126.0.1. (diff) | |
download | firefox-59203c63bb777a3bacec32fb8830fba33540e809.tar.xz firefox-59203c63bb777a3bacec32fb8830fba33540e809.zip |
Adding upstream version 127.0.upstream/127.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/taskcluster_taskgraph/taskgraph')
52 files changed, 721 insertions, 727 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py index 81cc763230..0bd794101c 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/__init__.py @@ -2,7 +2,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -__version__ = "6.3.0" +__version__ = "8.0.1" # Maximum number of dependencies a single task can have # https://docs.taskcluster.net/reference/platform/taskcluster-queue/references/api#createTask diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py index c5e1821546..f635250086 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/add_new_jobs.py @@ -40,7 +40,7 @@ from taskgraph.actions.util import ( ) def add_new_jobs_action(parameters, graph_config, input, task_group_id, task_id): decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels( - parameters, graph_config + parameters, graph_config, task_group_id=task_group_id ) to_run = [] diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py index 03788c6538..33a5394e68 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel.py @@ -34,9 +34,7 @@ def cancel_action(parameters, graph_config, input, task_group_id, task_id): # cannot be cancelled at this time, but it's also not running # anymore, so we can ignore this error. logger.info( - 'Task "{}" is past its deadline and cannot be cancelled.'.format( - task_id - ) + f'Task "{task_id}" is past its deadline and cannot be cancelled.' ) return raise diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py index d3e0440839..55453b7624 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/cancel_all.py @@ -43,9 +43,7 @@ def cancel_all_action(parameters, graph_config, input, task_group_id, task_id): # cannot be cancelled at this time, but it's also not running # anymore, so we can ignore this error. logger.info( - "Task {} is past its deadline and cannot be cancelled.".format( - task_id - ) + f"Task {task_id} is past its deadline and cannot be cancelled." ) return raise diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/rebuild_cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/rebuild_cached_tasks.py index 2b88e6a698..8ea2e37150 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/actions/rebuild_cached_tasks.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/rebuild_cached_tasks.py @@ -18,7 +18,7 @@ def rebuild_cached_tasks_action( parameters, graph_config, input, task_group_id, task_id ): decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels( - parameters, graph_config + parameters, graph_config, task_group_id=task_group_id ) cached_tasks = [ label diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py index 1e909d30c7..20955bd3f2 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/registry.py @@ -154,9 +154,7 @@ def register_callback_action( ], "register_callback_action must be used as decorator" if not cb_name: cb_name = name - assert cb_name not in callbacks, "callback name {} is not unique".format( - cb_name - ) + assert cb_name not in callbacks, f"callback name {cb_name} is not unique" def action_builder(parameters, graph_config, decision_task_id): if not available(parameters): @@ -165,11 +163,11 @@ def register_callback_action( actionPerm = "generic" if generic else cb_name # gather up the common decision-task-supplied data for this action - repo_param = "head_repository" repository = { - "url": parameters[repo_param], + "url": parameters["head_repository"], "project": parameters["project"], "level": parameters["level"], + "base_url": parameters["base_repository"], } revision = parameters["head_rev"] @@ -181,6 +179,9 @@ def register_callback_action( branch = parameters.get("head_ref") if branch: push["branch"] = branch + base_branch = parameters.get("base_ref") + if base_branch and branch != base_branch: + push["base_branch"] = base_branch action = { "name": name, @@ -215,13 +216,16 @@ def register_callback_action( if "/" in actionPerm: raise Exception("`/` is not allowed in action names; use `-`") + if parameters["tasks_for"].startswith("github-pull-request"): + hookId = f"in-tree-pr-action-{level}-{actionPerm}/{tcyml_hash}" + else: + hookId = f"in-tree-action-{level}-{actionPerm}/{tcyml_hash}" + rv.update( { "kind": "hook", "hookGroupId": f"project-{trustDomain}", - "hookId": "in-tree-action-{}-{}/{}".format( - level, actionPerm, tcyml_hash - ), + "hookId": hookId, "hookPayload": { # provide the decision-task parameters as context for triggerHook "decision": { @@ -297,16 +301,20 @@ def sanity_check_task_scope(callback, parameters, graph_config): actionPerm = "generic" if action.generic else action.cb_name - repo_param = "head_repository" - raw_url = parameters[repo_param] + raw_url = parameters["base_repository"] parsed_url = parse(raw_url) - expected_scope = f"assume:{parsed_url.taskcluster_role_prefix}:action:{actionPerm}" + action_scope = f"assume:{parsed_url.taskcluster_role_prefix}:action:{actionPerm}" + pr_action_scope = ( + f"assume:{parsed_url.taskcluster_role_prefix}:pr-action:{actionPerm}" + ) # the scope should appear literally; no need for a satisfaction check. The use of # get_current_scopes here calls the auth service through the Taskcluster Proxy, giving # the precise scopes available to this task. - if expected_scope not in taskcluster.get_current_scopes(): - raise ValueError(f"Expected task scope {expected_scope} for this action") + if not set((action_scope, pr_action_scope)) & set(taskcluster.get_current_scopes()): + raise ValueError( + f"Expected task scope {action_scope} or {pr_action_scope} for this action" + ) def trigger_action_callback( diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py index fd488b35fc..6c6091a47a 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/retrigger.py @@ -33,9 +33,7 @@ def _should_retrigger(task_graph, label): """ if label not in task_graph: logger.info( - "Task {} not in full taskgraph, assuming task should not be retriggered.".format( - label - ) + f"Task {label} not in full taskgraph, assuming task should not be retriggered." ) return False return task_graph[label].attributes.get("retrigger", False) @@ -67,7 +65,9 @@ def retrigger_decision_action(parameters, graph_config, input, task_group_id, ta # absolute timestamps relative to the current time. task = taskcluster.get_task_definition(task_id) task = relativize_datestamps(task) - create_task_from_def(slugid(), task, parameters["level"]) + create_task_from_def( + slugid(), task, parameters["level"], graph_config["trust-domain"] + ) @register_callback_action( @@ -144,7 +144,7 @@ def retrigger_decision_action(parameters, graph_config, input, task_group_id, ta ) def retrigger_action(parameters, graph_config, input, task_group_id, task_id): decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels( - parameters, graph_config + parameters, graph_config, task_group_id=task_group_id ) task = taskcluster.get_task_definition(task_id) @@ -155,8 +155,8 @@ def retrigger_action(parameters, graph_config, input, task_group_id, task_id): if not input.get("force", None) and not _should_retrigger(full_task_graph, label): logger.info( - "Not retriggering task {}, task should not be retrigged " - "and force not specified.".format(label) + f"Not retriggering task {label}, task should not be retrigged " + "and force not specified." ) sys.exit(1) @@ -201,14 +201,12 @@ def rerun_action(parameters, graph_config, input, task_group_id, task_id): task = taskcluster.get_task_definition(task_id) parameters = dict(parameters) decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels( - parameters, graph_config + parameters, graph_config, task_group_id=task_group_id ) label = task["metadata"]["name"] if task_id not in label_to_taskid.values(): logger.error( - "Refusing to rerun {}: taskId {} not in decision task {} label_to_taskid!".format( - label, task_id, decision_task_id - ) + f"Refusing to rerun {label}: taskId {task_id} not in decision task {decision_task_id} label_to_taskid!" ) _rerun_task(task_id, label) @@ -218,9 +216,7 @@ def _rerun_task(task_id, label): state = taskcluster.state_task(task_id) if state not in RERUN_STATES: logger.warning( - "No need to rerun {}: state '{}' not in {}!".format( - label, state, RERUN_STATES - ) + f"No need to rerun {label}: state '{state}' not in {RERUN_STATES}!" ) return taskcluster.rerun_task(task_id) @@ -261,7 +257,7 @@ def _rerun_task(task_id, label): ) def retrigger_multiple(parameters, graph_config, input, task_group_id, task_id): decision_task_id, full_task_graph, label_to_taskid = fetch_graph_and_labels( - parameters, graph_config + parameters, graph_config, task_group_id=task_group_id ) suffixes = [] diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py b/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py index cf81029da2..41e3b035de 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/actions/util.py @@ -32,8 +32,15 @@ def get_parameters(decision_task_id): return get_artifact(decision_task_id, "public/parameters.yml") -def fetch_graph_and_labels(parameters, graph_config): - decision_task_id = find_decision_task(parameters, graph_config) +def fetch_graph_and_labels(parameters, graph_config, task_group_id=None): + try: + # Look up the decision_task id in the index + decision_task_id = find_decision_task(parameters, graph_config) + except KeyError: + if not task_group_id: + raise + # Not found (e.g. from github-pull-request), fall back to the task group id. + decision_task_id = task_group_id # First grab the graph and labels generated during the initial decision task full_task_graph = get_artifact(decision_task_id, "public/full-task-graph.json") @@ -90,7 +97,7 @@ def fetch_graph_and_labels(parameters, graph_config): return (decision_task_id, full_task_graph, label_to_taskid) -def create_task_from_def(task_id, task_def, level): +def create_task_from_def(task_id, task_def, level, trust_domain): """Create a new task from a definition rather than from a label that is already in the full-task-graph. The task definition will have {relative-datestamp': '..'} rendered just like in a decision task. @@ -98,7 +105,7 @@ def create_task_from_def(task_id, task_def, level): It is useful if you want to "edit" the full_task_graph and then hand it to this function. No dependencies will be scheduled. You must handle this yourself. Seeing how create_tasks handles it might prove helpful.""" - task_def["schedulerId"] = f"gecko-level-{level}" + task_def["schedulerId"] = f"{trust_domain}-level-{level}" label = task_def["metadata"]["name"] session = get_session() create.create_task(session, task_id, label, task_def) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/config.py b/third_party/python/taskcluster_taskgraph/taskgraph/config.py index 7ea7dc7b33..ac384eab86 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/config.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/config.py @@ -40,6 +40,11 @@ graph_config_schema = Schema( description="Default 'deadline' for tasks, in relative date format. " "Eg: '1 week'", ): optionally_keyed_by("project", str), + Optional( + "task-expires-after", + description="Default 'expires-after' for level 1 tasks, in relative date format. " + "Eg: '90 days'", + ): str, Required("workers"): { Required("aliases"): { str: { @@ -62,6 +67,10 @@ graph_config_schema = Schema( "Defaults to `trust-domain`.", ): str, Optional( + "cache-pull-requests", + description="Should tasks from pull requests populate the cache", + ): bool, + Optional( "index-path-regexes", description="Regular expressions matching index paths to be summarized.", ): [str], @@ -102,28 +111,27 @@ class GraphConfig: Add the project's taskgraph directory to the python path, and register any extensions present. """ - modify_path = os.path.dirname(self.root_dir) if GraphConfig._PATH_MODIFIED: - if GraphConfig._PATH_MODIFIED == modify_path: + if GraphConfig._PATH_MODIFIED == self.root_dir: # Already modified path with the same root_dir. # We currently need to do this to enable actions to call # taskgraph_decision, e.g. relpro. return raise Exception("Can't register multiple directories on python path.") - GraphConfig._PATH_MODIFIED = modify_path - sys.path.insert(0, modify_path) + GraphConfig._PATH_MODIFIED = self.root_dir + sys.path.insert(0, self.root_dir) register_path = self["taskgraph"].get("register") if register_path: find_object(register_path)(self) @property def vcs_root(self): - if path.split(self.root_dir)[-2:] != ["taskcluster", "ci"]: + if path.split(self.root_dir)[-1:] != ["taskcluster"]: raise Exception( "Not guessing path to vcs root. " "Graph config in non-standard location." ) - return os.path.dirname(os.path.dirname(self.root_dir)) + return os.path.dirname(self.root_dir) @property def taskcluster_yml(self): diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/create.py b/third_party/python/taskcluster_taskgraph/taskgraph/create.py index deb1ac5348..e8baabb8a8 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/create.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/create.py @@ -104,7 +104,7 @@ def create_tasks(graph_config, taskgraph, label_to_taskid, params, decision_task def create_task(session, task_id, label, task_def): # create the task using 'http://taskcluster/queue', which is proxied to the queue service - # with credentials appropriate to this job. + # with credentials appropriate to this task. # Resolve timestamps now = current_json_time(datetime_format=True) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py index ed412f4473..d9eb9f3e90 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/decision.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py @@ -46,21 +46,21 @@ try_task_config_schema_v2 = Schema( ) -def full_task_graph_to_runnable_jobs(full_task_json): - runnable_jobs = {} +def full_task_graph_to_runnable_tasks(full_task_json): + runnable_tasks = {} for label, node in full_task_json.items(): if not ("extra" in node["task"] and "treeherder" in node["task"]["extra"]): continue th = node["task"]["extra"]["treeherder"] - runnable_jobs[label] = {"symbol": th["symbol"]} + runnable_tasks[label] = {"symbol": th["symbol"]} for i in ("groupName", "groupSymbol", "collection"): if i in th: - runnable_jobs[label][i] = th[i] + runnable_tasks[label][i] = th[i] if th.get("machine", {}).get("platform"): - runnable_jobs[label]["platform"] = th["machine"]["platform"] - return runnable_jobs + runnable_tasks[label]["platform"] = th["machine"]["platform"] + return runnable_tasks def taskgraph_decision(options, parameters=None): @@ -104,7 +104,7 @@ def taskgraph_decision(options, parameters=None): # write out the public/runnable-jobs.json file write_artifact( - "runnable-jobs.json", full_task_graph_to_runnable_jobs(full_task_json) + "runnable-jobs.json", full_task_graph_to_runnable_tasks(full_task_json) ) # this is just a test to check whether the from_json() function is working @@ -185,6 +185,9 @@ def get_decision_parameters(graph_config, options): # Define default filter list, as most configurations shouldn't need # custom filters. + parameters["files_changed"] = repo.get_changed_files( + rev=parameters["head_rev"], base_rev=parameters["base_rev"] + ) parameters["filters"] = [ "target_tasks_method", ] @@ -214,9 +217,9 @@ def get_decision_parameters(graph_config, options): parameters.update(PER_PROJECT_PARAMETERS[project]) except KeyError: logger.warning( - "using default project parameters; add {} to " - "PER_PROJECT_PARAMETERS in {} to customize behavior " - "for this project".format(project, __file__) + f"using default project parameters; add {project} to " + f"PER_PROJECT_PARAMETERS in {__file__} to customize behavior " + "for this project" ) parameters.update(PER_PROJECT_PARAMETERS["default"]) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/docker.py index 23897cbbee..9f849525fc 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/docker.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/docker.py @@ -18,6 +18,22 @@ except ImportError as e: from taskgraph.util import docker from taskgraph.util.taskcluster import get_artifact_url, get_session +DEPLOY_WARNING = """ +***************************************************************** +WARNING: Image is not suitable for deploying/pushing. + +To automatically tag the image the following files are required: +- {image_dir}/REGISTRY +- {image_dir}/VERSION + +The REGISTRY file contains the Docker registry hosting the image. +A default REGISTRY file may also be defined in the parent docker +directory. + +The VERSION file contains the version of the image. +***************************************************************** +""" + def get_image_digest(image_name): from taskgraph.generator import load_tasks_for_kind @@ -34,7 +50,7 @@ def get_image_digest(image_name): def load_image_by_name(image_name, tag=None): from taskgraph.generator import load_tasks_for_kind - from taskgraph.optimize import IndexSearch + from taskgraph.optimize.strategies import IndexSearch from taskgraph.parameters import Parameters params = Parameters( @@ -43,8 +59,9 @@ def load_image_by_name(image_name, tag=None): ) tasks = load_tasks_for_kind(params, "docker-image") task = tasks[f"build-docker-image-{image_name}"] + deadline = None task_id = IndexSearch().should_replace_task( - task, {}, task.optimization.get("index-search", []) + task, {}, deadline, task.optimization.get("index-search", []) ) if task_id in (True, False): @@ -52,8 +69,10 @@ def load_image_by_name(image_name, tag=None): "Could not find artifacts for a docker image " "named `{image_name}`. Local commits and other changes " "in your checkout may cause this error. Try " - "updating to a fresh checkout of mozilla-central " - "to download image.".format(image_name=image_name) + "updating to a fresh checkout of {project} " + "to download image.".format( + image_name=image_name, project=params["project"] + ) ) return False @@ -102,19 +121,18 @@ def build_image(name, tag, args=None): buf = BytesIO() docker.stream_context_tar(".", image_dir, buf, "", args) - subprocess.run( - ["docker", "image", "build", "--no-cache", "-t", tag, "-"], input=buf.getvalue() - ) + cmdargs = ["docker", "image", "build", "--no-cache", "-"] + if tag: + cmdargs.insert(-1, f"-t={tag}") + subprocess.run(cmdargs, input=buf.getvalue()) - print(f"Successfully built {name} and tagged with {tag}") + msg = f"Successfully built {name}" + if tag: + msg += f" and tagged with {tag}" + print(msg) - if tag.endswith(":latest"): - print("*" * 50) - print("WARNING: no VERSION file found in image directory.") - print("Image is not suitable for deploying/pushing.") - print("Create an image suitable for deploying/pushing by creating") - print("a VERSION file in the image directory.") - print("*" * 50) + if not tag or tag.endswith(":latest"): + print(DEPLOY_WARNING.format(image_dir=os.path.relpath(image_dir), image=name)) def load_image(url, imageName=None, imageTag=None): diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py b/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py deleted file mode 100644 index 6be6e5eeee..0000000000 --- a/third_party/python/taskcluster_taskgraph/taskgraph/files_changed.py +++ /dev/null @@ -1,91 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -""" -Support for optimizing tasks based on the set of files that have changed. -""" - - -import logging -import os - -import requests -from redo import retry - -from .util.memoize import memoize -from .util.path import match as match_path -from .util.vcs import get_repository - -logger = logging.getLogger(__name__) - - -@memoize -def get_changed_files(head_repository_url, head_rev, base_rev=None): - """ - Get the set of files changed between revisions. - Responses are cached, so multiple calls with the same arguments are OK. - """ - repo_path = os.getcwd() - repository = get_repository(repo_path) - - if repository.tool == "hg": - # TODO Use VCS version once tested enough - return _get_changed_files_json_automationrelevance( - head_repository_url, head_rev - ) - - return repository.get_changed_files(rev=head_rev, base_rev=base_rev) - - -def _get_changed_files_json_automationrelevance(head_repository_url, head_rev): - """ - Get the set of files changed in the push headed by the given revision. - """ - url = "{}/json-automationrelevance/{}".format( - head_repository_url.rstrip("/"), head_rev - ) - logger.debug("Querying version control for metadata: %s", url) - - def get_automationrelevance(): - response = requests.get(url, timeout=30) - return response.json() - - contents = retry(get_automationrelevance, attempts=10, sleeptime=10) - - logger.debug( - "{} commits influencing task scheduling:".format(len(contents["changesets"])) - ) - changed_files = set() - for c in contents["changesets"]: - desc = "" # Support empty desc - if c["desc"]: - desc = c["desc"].splitlines()[0].encode("ascii", "ignore") - logger.debug(" {cset} {desc}".format(cset=c["node"][0:12], desc=desc)) - changed_files |= set(c["files"]) - - return changed_files - - -def check(params, file_patterns): - """Determine whether any of the files changed between 2 revisions - match any of the given file patterns.""" - - head_repository_url = params.get("head_repository") - head_rev = params.get("head_rev") - if not head_repository_url or not head_rev: - logger.warning( - "Missing `head_repository` or `head_rev` parameters; " - "assuming all files have changed" - ) - return True - - base_rev = params.get("base_rev") - changed_files = get_changed_files(head_repository_url, head_rev, base_rev) - - for pattern in file_patterns: - for path in changed_files: - if match_path(path, pattern): - return True - - return False diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/generator.py b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py index 4ed2a41520..d649b91706 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/generator.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py @@ -91,7 +91,7 @@ class Kind: @classmethod def load(cls, root_dir, graph_config, kind_name): - path = os.path.join(root_dir, kind_name) + path = os.path.join(root_dir, "kinds", kind_name) kind_yml = os.path.join(path, "kind.yml") if not os.path.exists(kind_yml): raise KindNotFound(kind_yml) @@ -125,13 +125,13 @@ class TaskGraphGenerator: write_artifacts=False, ): """ - @param root_dir: root directory, with subdirectories for each kind + @param root_dir: root directory containing the Taskgraph config.yml file @param parameters: parameters for this task-graph generation, or callable taking a `GraphConfig` and returning parameters @type parameters: Union[Parameters, Callable[[GraphConfig], Parameters]] """ if root_dir is None: - root_dir = "taskcluster/ci" + root_dir = "taskcluster" self.root_dir = root_dir self._parameters = parameters self._decision_task_id = decision_task_id @@ -243,7 +243,7 @@ class TaskGraphGenerator: yield kind queue.extend(kind.config.get("kind-dependencies", [])) else: - for kind_name in os.listdir(self.root_dir): + for kind_name in os.listdir(os.path.join(self.root_dir, "kinds")): try: yield Kind.load(self.root_dir, graph_config, kind_name) except KindNotFound: diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/loader/default.py b/third_party/python/taskcluster_taskgraph/taskgraph/loader/default.py index 5b2c258917..f060a1d92d 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/loader/default.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/loader/default.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) DEFAULT_TRANSFORMS = [ - "taskgraph.transforms.job:transforms", + "taskgraph.transforms.run:transforms", "taskgraph.transforms.task:transforms", ] @@ -20,7 +20,7 @@ def loader(kind, path, config, params, loaded_tasks): """ This default loader builds on the `transform` loader by providing sensible default transforms that the majority of simple tasks will need. - Specifically, `job` and `task` transforms will be appended to the end of the + Specifically, `run` and `task` transforms will be appended to the end of the list of transforms in the kind being loaded. """ transform_refs = config.setdefault("transforms", []) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/main.py b/third_party/python/taskcluster_taskgraph/taskgraph/main.py index 88a4e2539b..e68cd5a787 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/main.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/main.py @@ -18,6 +18,7 @@ from concurrent.futures import ProcessPoolExecutor, as_completed from pathlib import Path from textwrap import dedent from typing import Any, List +from urllib.parse import urlparse import appdirs import yaml @@ -95,7 +96,7 @@ def get_filtered_taskgraph(taskgraph, tasksregex, exclude_keys): for key in exclude_keys: obj = task attrs = key.split(".") - while attrs[0] in obj: + while obj and attrs[0] in obj: if len(attrs) == 1: del obj[attrs[0]] break @@ -120,7 +121,7 @@ def get_taskgraph_generator(root, parameters): return TaskGraphGenerator(root_dir=root, parameters=parameters) -def format_taskgraph(options, parameters, logfile=None): +def format_taskgraph(options, parameters, overrides, logfile=None): import taskgraph from taskgraph.parameters import parameters_loader @@ -138,7 +139,7 @@ def format_taskgraph(options, parameters, logfile=None): if isinstance(parameters, str): parameters = parameters_loader( parameters, - overrides={"target-kinds": options.get("target_kinds")}, + overrides=overrides, strict=False, ) @@ -172,7 +173,7 @@ def dump_output(out, path=None, params_spec=None): print(out + "\n", file=fh) -def generate_taskgraph(options, parameters, logdir): +def generate_taskgraph(options, parameters, overrides, logdir): from taskgraph.parameters import Parameters def logfile(spec): @@ -188,14 +189,16 @@ def generate_taskgraph(options, parameters, logdir): # tracebacks a little more readable and avoids additional process overhead. if len(parameters) == 1: spec = parameters[0] - out = format_taskgraph(options, spec, logfile(spec)) + out = format_taskgraph(options, spec, overrides, logfile(spec)) dump_output(out, options["output_file"]) return 0 futures = {} with ProcessPoolExecutor(max_workers=options["max_workers"]) as executor: for spec in parameters: - f = executor.submit(format_taskgraph, options, spec, logfile(spec)) + f = executor.submit( + format_taskgraph, options, spec, overrides, logfile(spec) + ) futures[f] = spec returncode = 0 @@ -293,6 +296,15 @@ def generate_taskgraph(options, parameters, logdir): "specified).", ) @argument( + "--force-local-files-changed", + default=False, + action="store_true", + help="Compute the 'files-changed' parameter from local version control, " + "even when explicitly using a parameter set that already has it defined. " + "Note that this is already the default behaviour when no parameters are " + "specified.", +) +@argument( "--no-optimize", dest="optimize", action="store_false", @@ -366,9 +378,11 @@ def show_taskgraph(options): diffdir = None output_file = options["output_file"] - if options["diff"]: + if options["diff"] or options["force_local_files_changed"]: repo = get_repository(os.getcwd()) + if options["diff"]: + assert repo is not None if not repo.working_directory_clean(): print( "abort: can't diff taskgraph with dirty working directory", @@ -392,15 +406,22 @@ def show_taskgraph(options): ) print(f"Generating {options['graph_attr']} @ {cur_rev}", file=sys.stderr) + overrides = { + "target-kinds": options.get("target_kinds"), + } parameters: List[Any[str, Parameters]] = options.pop("parameters") if not parameters: - overrides = { - "target-kinds": options.get("target_kinds"), - } parameters = [ parameters_loader(None, strict=False, overrides=overrides) ] # will use default values + # This is the default behaviour anyway, so no need to re-compute. + options["force_local_files_changed"] = False + + elif options["force_local_files_changed"]: + assert repo is not None + overrides["files-changed"] = sorted(repo.get_changed_files("AM")) + for param in parameters[:]: if isinstance(param, str) and os.path.isdir(param): parameters.remove(param) @@ -426,7 +447,7 @@ def show_taskgraph(options): # to setup its `mach` based logging. setup_logging() - ret = generate_taskgraph(options, parameters, logdir) + ret = generate_taskgraph(options, parameters, overrides, logdir) if options["diff"]: assert diffdir is not None @@ -450,7 +471,7 @@ def show_taskgraph(options): diffdir, f"{options['graph_attr']}_{base_rev_file}" ) print(f"Generating {options['graph_attr']} @ {base_rev}", file=sys.stderr) - ret |= generate_taskgraph(options, parameters, logdir) + ret |= generate_taskgraph(options, parameters, overrides, logdir) finally: repo.update(cur_rev) @@ -463,6 +484,8 @@ def show_taskgraph(options): f"--label={options['graph_attr']}@{cur_rev}", ] + non_fatal_failures = [] + for spec in parameters: base_path = os.path.join( diffdir, f"{options['graph_attr']}_{base_rev_file}" @@ -475,7 +498,20 @@ def show_taskgraph(options): base_path += f"_{params_name}" cur_path += f"_{params_name}" + # If the base or cur files are missing it means that generation + # failed. If one of them failed but not the other, the failure is + # likely due to the patch making changes to taskgraph in modules + # that don't get reloaded (safe to ignore). If both generations + # failed, there's likely a real issue. + base_missing = not os.path.isfile(base_path) + cur_missing = not os.path.isfile(cur_path) + if base_missing != cur_missing: # != is equivalent to XOR for booleans + non_fatal_failures.append(os.path.basename(base_path)) + continue + try: + # If the output file(s) are missing, this command will raise + # CalledProcessError with a returncode > 1. proc = subprocess.run( diffcmd + [base_path, cur_path], capture_output=True, @@ -500,6 +536,16 @@ def show_taskgraph(options): params_spec=spec if len(parameters) > 1 else None, ) + if non_fatal_failures: + failstr = "\n ".join(sorted(non_fatal_failures)) + print( + "WARNING: Diff skipped for the following generation{s} " + "due to failures:\n {failstr}".format( + s="s" if len(non_fatal_failures) > 1 else "", failstr=failstr + ), + file=sys.stderr, + ) + if options["format"] != "json": print( "If you were expecting differences in task bodies " @@ -661,7 +707,7 @@ def decision(options): @argument( "--root", "-r", - default="taskcluster/ci", + default="taskcluster", help="root of the taskgraph definition relative to topsrcdir", ) def action_callback(options): @@ -697,7 +743,7 @@ def action_callback(options): @argument( "--root", "-r", - default="taskcluster/ci", + default="taskcluster", help="root of the taskgraph definition relative to topsrcdir", ) @argument( @@ -835,6 +881,10 @@ def init_taskgraph(options): ) return 1 + context["repo_name"] = urlparse(repo_url).path.rsplit("/", 1)[-1] + if context["repo_name"].endswith(".git"): + context["repo_name"] = context["repo_name"][: -len(".git")] + # Generate the project. cookiecutter( options["template"], @@ -867,6 +917,11 @@ def setup_logging(): def main(args=sys.argv[1:]): setup_logging() parser = create_parser() + + if not args: + parser.print_help() + sys.exit(1) + args = parser.parse_args(args) try: return args.command(vars(args)) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/morph.py b/third_party/python/taskcluster_taskgraph/taskgraph/morph.py index bfa1560270..e4bb268ab8 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/morph.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/morph.py @@ -38,6 +38,7 @@ registered_morphs = [] def register_morph(func): registered_morphs.append(func) + return func def amend_taskgraph(taskgraph, label_to_taskid, to_add): diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py index 367b94e1de..e5477d35b7 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/base.py @@ -271,14 +271,19 @@ def replace_tasks( dependencies_of = target_task_graph.graph.links_dict() for label in target_task_graph.graph.visit_postorder(): + logger.debug(f"replace_tasks: {label}") # if we're not allowed to optimize, that's easy.. if label in do_not_optimize: + logger.debug(f"replace_tasks: {label} is in do_not_optimize") continue # if this task depends on un-replaced, un-removed tasks, do not replace if any( l not in replaced and l not in removed_tasks for l in dependencies_of[label] ): + logger.debug( + f"replace_tasks: {label} depends on an unreplaced or unremoved task" + ) continue # if the task already exists, that's an easy replacement @@ -287,6 +292,7 @@ def replace_tasks( label_to_taskid[label] = repl replaced.add(label) opt_counts["existing_tasks"] += 1 + logger.debug(f"replace_tasks: {label} replaced from existing_tasks") continue # call the optimization strategy @@ -304,14 +310,20 @@ def replace_tasks( repl = opt.should_replace_task(task, params, deadline, arg) if repl: if repl is True: + logger.debug(f"replace_tasks: {label} removed by optimization strategy") # True means remove this task; get_subgraph will catch any # problems with removed tasks being depended on removed_tasks.add(label) else: + logger.debug( + f"replace_tasks: {label} replaced by optimization strategy" + ) label_to_taskid[label] = repl replaced.add(label) opt_counts[opt_by] += 1 continue + else: + logger.debug(f"replace_tasks: {label} kept by optimization strategy") _log_optimization("replaced", opt_counts) return replaced diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py index 973b550632..5baecfe645 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize/strategies.py @@ -1,8 +1,8 @@ import logging from datetime import datetime -from taskgraph import files_changed from taskgraph.optimize.base import OptimizationStrategy, register_strategy +from taskgraph.util.path import match as match_path from taskgraph.util.taskcluster import find_task_id, status_task logger = logging.getLogger(__name__) @@ -48,17 +48,23 @@ class IndexSearch(OptimizationStrategy): @register_strategy("skip-unless-changed") class SkipUnlessChanged(OptimizationStrategy): + + def check(self, files_changed, patterns): + for pattern in patterns: + for path in files_changed: + if match_path(path, pattern): + return True + return False + def should_remove_task(self, task, params, file_patterns): # pushlog_id == -1 - this is the case when run from a cron.yml job or on a git repository if params.get("repository_type") == "hg" and params.get("pushlog_id") == -1: return False - changed = files_changed.check(params, file_patterns) + changed = self.check(params["files_changed"], file_patterns) if not changed: logger.debug( - 'no files found matching a pattern in `skip-unless-changed` for "{}"'.format( - task.label - ) + f'no files found matching a pattern in `skip-unless-changed` for "{task.label}"' ) return True return False diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py index 48571d97ad..c69b201e34 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py @@ -40,6 +40,7 @@ base_schema = Schema( Required("do_not_optimize"): [str], Required("enable_always_target"): Any(bool, [str]), Required("existing_tasks"): {str: str}, + Required("files_changed"): [str], Required("filters"): [str], Required("head_ref"): str, Required("head_repository"): str, @@ -86,6 +87,7 @@ def _get_defaults(repo_root=None): # Use fake values if no repo is detected. repo = Mock(branch="", head_rev="", tool="git") repo.get_url.return_value = "" + repo.get_changed_files.return_value = [] try: repo_url = repo.get_url() @@ -108,6 +110,7 @@ def _get_defaults(repo_root=None): "do_not_optimize": [], "enable_always_target": True, "existing_tasks": {}, + "files_changed": repo.get_changed_files("AM"), "filters": ["target_tasks_method"], "head_ref": repo.branch or repo.head_rev, "head_repository": repo_url, @@ -284,7 +287,7 @@ class Parameters(ReadOnlyDict): else: raise ParameterMismatch( "Don't know how to determine file URL for non-github" - "repo: {}".format(repo) + f"repo: {repo}" ) else: raise RuntimeError( diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task index 267b5283ea..f3a343de33 100755 --- a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task +++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -u +#!/usr/bin/env -S python3 -u # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. @@ -624,6 +624,11 @@ def git_checkout( "Must specify both ssh_key_file and ssh_known_hosts_file, if either are specified", ) + # Bypass Git's "safe directory" feature as the destination could be + # coming from a cache and therefore cloned by a different user. + args = ["git", "config", "--global", "--add", "safe.directory", Path(destination_path).as_posix()] + retry_required_command(b"vcs", args, extra_env=env) + if not os.path.exists(destination_path): # Repository doesn't already exist, needs to be cloned args = [ @@ -782,9 +787,7 @@ def hg_checkout( branch: Optional[str], revision: Optional[str], ): - if IS_MACOSX: - hg_bin = "/tools/python27-mercurial/bin/hg" - elif IS_POSIX: + if IS_MACOSX or IS_POSIX: hg_bin = "hg" elif IS_WINDOWS: # This is where OCC installs it in the AMIs. @@ -1007,7 +1010,8 @@ def install_pip_requirements(repositories): if not requirements: return - cmd = [sys.executable, "-mpip", "install"] + # TODO: Stop using system Python (#381) + cmd = [sys.executable, "-mpip", "install", "--break-system-packages"] if os.environ.get("PIP_DISABLE_REQUIRE_HASHES") != "1": cmd.append("--require-hashes") diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py index 1119a1c960..7f44b6ab60 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/target_tasks.py @@ -14,7 +14,7 @@ _target_task_methods = {} _GIT_REFS_HEADS_PREFIX = "refs/heads/" -def _target_task(name): +def register_target_task(name): def wrap(func): _target_task_methods[name] = func return func @@ -81,7 +81,7 @@ def standard_filter(task, parameters): ) -@_target_task("default") +@register_target_task("default") def target_tasks_default(full_task_graph, parameters, graph_config): """Target the tasks which have indicated they should be run on this project via the `run_on_projects` attributes.""" @@ -90,7 +90,7 @@ def target_tasks_default(full_task_graph, parameters, graph_config): ] -@_target_task("codereview") +@register_target_task("codereview") def target_tasks_codereview(full_task_graph, parameters, graph_config): """Target the tasks which have indicated they should be run on this project via the `run_on_projects` attributes.""" @@ -101,7 +101,7 @@ def target_tasks_codereview(full_task_graph, parameters, graph_config): ] -@_target_task("nothing") +@register_target_task("nothing") def target_tasks_nothing(full_task_graph, parameters, graph_config): """Select nothing, for DONTBUILD pushes""" return [] diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py index 4fa7b5fc0c..e69de29bb2 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/__init__.py @@ -1,3 +0,0 @@ -from taskgraph.transforms import ( # noqa: Added for backwards compat - notify as release_notifications, -) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py index e6fcd2400c..fda0c584fc 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/base.py @@ -147,7 +147,7 @@ class ValidateSchema: kind=config.kind, name=task["name"] ) elif "label" in task: - error = "In job {label!r}:".format(label=task["label"]) + error = "In task {label!r}:".format(label=task["label"]) elif "primary-dependency" in task: error = "In {kind} kind task for {dependency!r}:".format( kind=config.kind, dependency=task["primary-dependency"].label diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py index bdb655b97d..2c859c36f6 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/code_review.py @@ -12,12 +12,12 @@ transforms = TransformSequence() @transforms.add -def add_dependencies(config, jobs): - for job in jobs: - job.setdefault("soft-dependencies", []) - job["soft-dependencies"] += [ +def add_dependencies(config, tasks): + for task in tasks: + task.setdefault("soft-dependencies", []) + task["soft-dependencies"] += [ dep_task.label for dep_task in config.kind_dependencies_tasks.values() if dep_task.attributes.get("code-review") is True ] - yield job + yield task diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py index d0c5b9c97b..b58320092b 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/docker_image.py @@ -92,9 +92,7 @@ def fill_template(config, tasks): for p in packages: if p not in available_packages: raise Exception( - "Missing package job for {}-{}: {}".format( - config.kind, image_name, p - ) + f"Missing package job for {config.kind}-{image_name}: {p}" ) if not taskgraph.fast: @@ -119,9 +117,7 @@ def fill_template(config, tasks): digest_data += [json.dumps(args, sort_keys=True)] context_hashes[image_name] = context_hash - description = "Build the docker image {} for use by dependent tasks".format( - image_name - ) + description = f"Build the docker image {image_name} for use by dependent tasks" args["DOCKER_IMAGE_PACKAGES"] = " ".join(f"<{p}>" for p in packages) @@ -132,6 +128,8 @@ def fill_template(config, tasks): # burn more CPU once to reduce image size. zstd_level = "3" if int(config.params["level"]) == 1 else "10" + expires = config.graph_config._config.get("task-expires-after", "28 days") + # include some information that is useful in reconstructing this task # from JSON taskdesc = { @@ -142,7 +140,7 @@ def fill_template(config, tasks): "artifact_prefix": "public", }, "always-target": True, - "expires-after": "28 days" if config.params.is_try() else "1 year", + "expires-after": expires if config.params.is_try() else "1 year", "scopes": [], "run-on-projects": [], "worker-type": "images", @@ -158,9 +156,7 @@ def fill_template(config, tasks): ], "env": { "CONTEXT_TASK_ID": {"task-reference": "<decision>"}, - "CONTEXT_PATH": "public/docker-contexts/{}.tar.gz".format( - image_name - ), + "CONTEXT_PATH": f"public/docker-contexts/{image_name}.tar.gz", "HASH": context_hash, "PROJECT": config.params["project"], "IMAGE_NAME": image_name, diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py index bcb8ff38a6..0e1b739677 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py @@ -32,11 +32,12 @@ FETCH_SCHEMA = Schema( Optional("task-from"): str, # Description of the task. Required("description"): str, + Optional("expires-after"): str, Optional("docker-image"): object, Optional( "fetch-alias", - description="An alias that can be used instead of the real fetch job name in " - "fetch stanzas for jobs.", + description="An alias that can be used instead of the real fetch task name in " + "fetch stanzas for tasks.", ): str, Optional( "artifact-prefix", @@ -78,20 +79,20 @@ transforms.add_validate(FETCH_SCHEMA) @transforms.add -def process_fetch_job(config, jobs): - # Converts fetch-url entries to the job schema. - for job in jobs: - typ = job["fetch"]["type"] - name = job["name"] - fetch = job.pop("fetch") +def process_fetch_task(config, tasks): + # Converts fetch-url entries to the run schema. + for task in tasks: + typ = task["fetch"]["type"] + name = task["name"] + fetch = task.pop("fetch") if typ not in fetch_builders: raise Exception(f"Unknown fetch type {typ} in fetch {name}") validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") - job.update(configure_fetch(config, typ, name, fetch)) + task.update(configure_fetch(config, typ, name, fetch)) - yield job + yield task def configure_fetch(config, typ, name, fetch): @@ -103,41 +104,41 @@ def configure_fetch(config, typ, name, fetch): @transforms.add -def make_task(config, jobs): +def make_task(config, tasks): # Fetch tasks are idempotent and immutable. Have them live for # essentially forever. if config.params["level"] == "3": expires = "1000 years" else: - expires = "28 days" + expires = config.graph_config._config.get("task-expires-after", "28 days") - for job in jobs: - name = job["name"] - artifact_prefix = job.get("artifact-prefix", "public") - env = job.get("env", {}) + for task in tasks: + name = task["name"] + artifact_prefix = task.get("artifact-prefix", "public") + env = task.get("env", {}) env.update({"UPLOAD_DIR": "/builds/worker/artifacts"}) - attributes = job.get("attributes", {}) - attributes["fetch-artifact"] = path.join(artifact_prefix, job["artifact_name"]) - alias = job.get("fetch-alias") + attributes = task.get("attributes", {}) + attributes["fetch-artifact"] = path.join(artifact_prefix, task["artifact_name"]) + alias = task.get("fetch-alias") if alias: attributes["fetch-alias"] = alias - task = { + task_desc = { "attributes": attributes, "name": name, - "description": job["description"], - "expires-after": expires, + "description": task["description"], + "expires-after": task.get("expires-after", expires), "label": "fetch-%s" % name, "run-on-projects": [], "run": { "using": "run-task", "checkout": False, - "command": job["command"], + "command": task["command"], }, "worker-type": "images", "worker": { "chain-of-trust": True, - "docker-image": job.get("docker-image", {"in-tree": "fetch"}), + "docker-image": task.get("docker-image", {"in-tree": "fetch"}), "env": env, "max-run-time": 900, "artifacts": [ @@ -151,29 +152,29 @@ def make_task(config, jobs): } if "treeherder" in config.graph_config: - task["treeherder"] = { + task_desc["treeherder"] = { "symbol": join_symbol("Fetch", name), "kind": "build", "platform": "fetch/opt", "tier": 1, } - if job.get("secret", None): - task["scopes"] = ["secrets:get:" + job.get("secret")] - task["worker"]["taskcluster-proxy"] = True + if task.get("secret", None): + task_desc["scopes"] = ["secrets:get:" + task.get("secret")] + task_desc["worker"]["taskcluster-proxy"] = True if not taskgraph.fast: - cache_name = task["label"].replace(f"{config.kind}-", "", 1) + cache_name = task_desc["label"].replace(f"{config.kind}-", "", 1) # This adds the level to the index path automatically. add_optimization( config, - task, + task_desc, cache_type=CACHE_TYPE, cache_name=cache_name, - digest_data=job["digest_data"], + digest_data=task["digest_data"], ) - yield task + yield task_desc @fetch_builder( diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/from_deps.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/from_deps.py index 337d68e4ba..191ef7d56a 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/from_deps.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/from_deps.py @@ -16,10 +16,11 @@ from textwrap import dedent from voluptuous import Any, Extra, Optional, Required from taskgraph.transforms.base import TransformSequence -from taskgraph.transforms.job import fetches_schema +from taskgraph.transforms.run import fetches_schema from taskgraph.util.attributes import attrmatch from taskgraph.util.dependencies import GROUP_BY_MAP, get_dependencies from taskgraph.util.schema import Schema, validate_schema +from taskgraph.util.set_name import SET_NAME_MAP FROM_DEPS_SCHEMA = Schema( { @@ -41,12 +42,14 @@ FROM_DEPS_SCHEMA = Schema( "set-name", description=dedent( """ - When True, `from_deps` will derive a name for the generated - tasks from the name of the primary dependency. Defaults to - True. + UPDATE ME AND DOCS """.lstrip() ), - ): bool, + ): Any( + None, + *SET_NAME_MAP, + {Any(*SET_NAME_MAP): object}, + ), Optional( "with-attributes", description=dedent( @@ -170,7 +173,7 @@ def from_deps(config, tasks): groups = func(config, deps) # Split the task, one per group. - set_name = from_deps.get("set-name", True) + set_name = from_deps.get("set-name", "strip-kind") copy_attributes = from_deps.get("copy-attributes", False) unique_kinds = from_deps.get("unique-kinds", True) fetches = from_deps.get("fetches", []) @@ -203,10 +206,8 @@ def from_deps(config, tasks): primary_dep = [dep for dep in group if dep.kind == primary_kind][0] if set_name: - if primary_dep.label.startswith(primary_kind): - new_task["name"] = primary_dep.label[len(primary_kind) + 1 :] - else: - new_task["name"] = primary_dep.label + func = SET_NAME_MAP[set_name] + new_task["name"] = func(config, deps, primary_dep, primary_kind) if copy_attributes: attrs = new_task.setdefault("attributes", {}) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/__init__.py index 06978ff46d..a783a0dc13 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/__init__.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/__init__.py @@ -2,11 +2,11 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. """ -Convert a job description into a task description. +Convert a run description into a task description. -Jobs descriptions are similar to task descriptions, but they specify how to run -the job at a higher level, using a "run" field that can be interpreted by -run-using handlers in `taskcluster/taskgraph/transforms/job`. +Run descriptions are similar to task descriptions, but they specify how to run +the task at a higher level, using a "run" field that can be interpreted by +run-using handlers in `taskcluster/taskgraph/transforms/run`. """ @@ -28,7 +28,7 @@ from taskgraph.util.workertypes import worker_type_implementation logger = logging.getLogger(__name__) # Fetches may be accepted in other transforms and eventually passed along -# to a `job` (eg: from_deps). Defining this here allows them to re-use +# to a `task` (eg: from_deps). Defining this here allows them to reuse # the schema and avoid duplication. fetches_schema = { Required("artifact"): str, @@ -38,9 +38,9 @@ fetches_schema = { } # Schema for a build description -job_description_schema = Schema( +run_description_schema = Schema( { - # The name of the job and the job's label. At least one must be specified, + # The name of the task and the task's label. At least one must be specified, # and the label will be generated from the name if necessary, by prepending # the kind. Optional("name"): str, @@ -55,6 +55,7 @@ job_description_schema = Schema( Optional("soft-dependencies"): task_description_schema["soft-dependencies"], Optional("if-dependencies"): task_description_schema["if-dependencies"], Optional("requires"): task_description_schema["requires"], + Optional("deadline-after"): task_description_schema["deadline-after"], Optional("expires-after"): task_description_schema["expires-after"], Optional("routes"): task_description_schema["routes"], Optional("scopes"): task_description_schema["scopes"], @@ -73,7 +74,7 @@ job_description_schema = Schema( Optional("needs-sccache"): task_description_schema["needs-sccache"], # The "when" section contains descriptions of the circumstances under which # this task should be included in the task graph. This will be converted - # into an optimization, so it cannot be specified in a job description that + # into an optimization, so it cannot be specified in a run description that # also gives 'optimization'. Exclusive("when", "optimization"): { # This task only needs to be run if a file matching one of the given @@ -89,33 +90,33 @@ job_description_schema = Schema( fetches_schema, ], }, - # A description of how to run this job. + # A description of how to run this task. "run": { - # The key to a job implementation in a peer module to this one + # The key to a run implementation in a peer module to this one "using": str, # Base work directory used to set up the task. Optional("workdir"): str, - # Any remaining content is verified against that job implementation's + # Any remaining content is verified against that run implementation's # own schema. Extra: object, }, Required("worker-type"): task_description_schema["worker-type"], # This object will be passed through to the task description, with additions - # provided by the job's run-using function + # provided by the task's run-using function Optional("worker"): dict, } ) transforms = TransformSequence() -transforms.add_validate(job_description_schema) +transforms.add_validate(run_description_schema) @transforms.add -def rewrite_when_to_optimization(config, jobs): - for job in jobs: - when = job.pop("when", {}) +def rewrite_when_to_optimization(config, tasks): + for task in tasks: + when = task.pop("when", {}) if not when: - yield job + yield task continue files_changed = when.get("files-changed") @@ -124,63 +125,64 @@ def rewrite_when_to_optimization(config, jobs): files_changed.append(f"{config.path}/**") # "only when files changed" implies "skip if files have not changed" - job["optimization"] = {"skip-unless-changed": files_changed} + task["optimization"] = {"skip-unless-changed": files_changed} - assert "when" not in job - yield job + assert "when" not in task + yield task @transforms.add -def set_implementation(config, jobs): - for job in jobs: - impl, os = worker_type_implementation(config.graph_config, job["worker-type"]) +def set_implementation(config, tasks): + for task in tasks: + impl, os = worker_type_implementation(config.graph_config, task["worker-type"]) if os: - job.setdefault("tags", {})["os"] = os + task.setdefault("tags", {})["os"] = os if impl: - job.setdefault("tags", {})["worker-implementation"] = impl - worker = job.setdefault("worker", {}) + task.setdefault("tags", {})["worker-implementation"] = impl + worker = task.setdefault("worker", {}) assert "implementation" not in worker worker["implementation"] = impl if os: worker["os"] = os - yield job + yield task @transforms.add -def set_label(config, jobs): - for job in jobs: - if "label" not in job: - if "name" not in job: - raise Exception("job has neither a name nor a label") - job["label"] = "{}-{}".format(config.kind, job["name"]) - if job.get("name"): - del job["name"] - yield job +def set_label(config, tasks): + for task in tasks: + if "label" not in task: + if "name" not in task: + raise Exception("task has neither a name nor a label") + task["label"] = "{}-{}".format(config.kind, task["name"]) + if task.get("name"): + del task["name"] + yield task @transforms.add -def add_resource_monitor(config, jobs): - for job in jobs: - if job.get("attributes", {}).get("resource-monitor"): +def add_resource_monitor(config, tasks): + for task in tasks: + if task.get("attributes", {}).get("resource-monitor"): worker_implementation, worker_os = worker_type_implementation( - config.graph_config, job["worker-type"] + config.graph_config, task["worker-type"] ) # Normalise worker os so that linux-bitbar and similar use linux tools. - worker_os = worker_os.split("-")[0] - if "win7" in job["worker-type"]: + if worker_os: + worker_os = worker_os.split("-")[0] + if "win7" in task["worker-type"]: arch = "32" else: arch = "64" - job.setdefault("fetches", {}) - job["fetches"].setdefault("toolchain", []) - job["fetches"]["toolchain"].append(f"{worker_os}{arch}-resource-monitor") + task.setdefault("fetches", {}) + task["fetches"].setdefault("toolchain", []) + task["fetches"]["toolchain"].append(f"{worker_os}{arch}-resource-monitor") if worker_implementation == "docker-worker": artifact_source = "/builds/worker/monitoring/resource-monitor.json" else: artifact_source = "monitoring/resource-monitor.json" - job["worker"].setdefault("artifacts", []) - job["worker"]["artifacts"].append( + task["worker"].setdefault("artifacts", []) + task["worker"]["artifacts"].append( { "name": "public/monitoring/resource-monitor.json", "type": "file", @@ -188,10 +190,10 @@ def add_resource_monitor(config, jobs): } ) # Set env for output file - job["worker"].setdefault("env", {}) - job["worker"]["env"]["RESOURCE_MONITOR_OUTPUT"] = artifact_source + task["worker"].setdefault("env", {}) + task["worker"]["env"]["RESOURCE_MONITOR_OUTPUT"] = artifact_source - yield job + yield task def get_attribute(dict, key, attributes, attribute_name): @@ -203,16 +205,16 @@ def get_attribute(dict, key, attributes, attribute_name): @transforms.add -def use_fetches(config, jobs): +def use_fetches(config, tasks): artifact_names = {} aliases = {} extra_env = {} if config.kind in ("toolchain", "fetch"): - jobs = list(jobs) - for job in jobs: - run = job.get("run", {}) - label = job["label"] + tasks = list(tasks) + for task in tasks: + run = task.get("run", {}) + label = task["label"] get_attribute(artifact_names, label, run, "toolchain-artifact") value = run.get(f"{config.kind}-alias") if value: @@ -232,20 +234,20 @@ def use_fetches(config, jobs): aliases[f"{task.kind}-{value}"] = task.label artifact_prefixes = {} - for job in order_tasks(config, jobs): - artifact_prefixes[job["label"]] = get_artifact_prefix(job) + for task in order_tasks(config, tasks): + artifact_prefixes[task["label"]] = get_artifact_prefix(task) - fetches = job.pop("fetches", None) + fetches = task.pop("fetches", None) if not fetches: - yield job + yield task continue - job_fetches = [] - name = job.get("name", job.get("label")) - dependencies = job.setdefault("dependencies", {}) - worker = job.setdefault("worker", {}) + task_fetches = [] + name = task.get("name", task.get("label")) + dependencies = task.setdefault("dependencies", {}) + worker = task.setdefault("worker", {}) env = worker.setdefault("env", {}) - prefix = get_artifact_prefix(job) + prefix = get_artifact_prefix(task) for kind in sorted(fetches): artifacts = fetches[kind] if kind in ("fetch", "toolchain"): @@ -254,9 +256,7 @@ def use_fetches(config, jobs): label = aliases.get(label, label) if label not in artifact_names: raise Exception( - "Missing fetch job for {kind}-{name}: {fetch}".format( - kind=config.kind, name=name, fetch=fetch_name - ) + f"Missing fetch task for {config.kind}-{name}: {fetch_name}" ) if label in extra_env: env.update(extra_env[label]) @@ -264,7 +264,7 @@ def use_fetches(config, jobs): path = artifact_names[label] dependencies[label] = label - job_fetches.append( + task_fetches.append( { "artifact": path, "task": f"<{label}>", @@ -274,8 +274,8 @@ def use_fetches(config, jobs): else: if kind not in dependencies: raise Exception( - "{name} can't fetch {kind} artifacts because " - "it has no {kind} dependencies!".format(name=name, kind=kind) + f"{name} can't fetch {kind} artifacts because " + f"it has no {kind} dependencies!" ) dep_label = dependencies[kind] if dep_label in artifact_prefixes: @@ -293,9 +293,11 @@ def use_fetches(config, jobs): name=name, kind=kind, label=dependencies[kind], - tasks="no tasks" - if len(dep_tasks) == 0 - else "multiple tasks", + tasks=( + "no tasks" + if len(dep_tasks) == 0 + else "multiple tasks" + ), ) ) @@ -328,41 +330,43 @@ def use_fetches(config, jobs): fetch["dest"] = dest if verify_hash: fetch["verify-hash"] = verify_hash - job_fetches.append(fetch) + task_fetches.append(fetch) - job_artifact_prefixes = { + task_artifact_prefixes = { mozpath.dirname(fetch["artifact"]) - for fetch in job_fetches + for fetch in task_fetches if not fetch["artifact"].startswith("public/") } - if job_artifact_prefixes: + if task_artifact_prefixes: # Use taskcluster-proxy and request appropriate scope. For example, add # 'scopes: [queue:get-artifact:path/to/*]' for 'path/to/artifact.tar.xz'. worker["taskcluster-proxy"] = True - for prefix in sorted(job_artifact_prefixes): + for prefix in sorted(task_artifact_prefixes): scope = f"queue:get-artifact:{prefix}/*" - if scope not in job.setdefault("scopes", []): - job["scopes"].append(scope) + if scope not in task.setdefault("scopes", []): + task["scopes"].append(scope) - env["MOZ_FETCHES"] = {"task-reference": json.dumps(job_fetches, sort_keys=True)} + env["MOZ_FETCHES"] = { + "task-reference": json.dumps(task_fetches, sort_keys=True) + } env.setdefault("MOZ_FETCHES_DIR", "fetches") - yield job + yield task @transforms.add -def make_task_description(config, jobs): +def make_task_description(config, tasks): """Given a build description, create a task description""" - # import plugin modules first, before iterating over jobs + # import plugin modules first, before iterating over tasks import_sibling_modules(exceptions=("common.py",)) - for job in jobs: + for task in tasks: # always-optimized tasks never execute, so have no workdir - if job["worker"]["implementation"] in ("docker-worker", "generic-worker"): - job["run"].setdefault("workdir", "/builds/worker") + if task["worker"]["implementation"] in ("docker-worker", "generic-worker"): + task["run"].setdefault("workdir", "/builds/worker") - taskdesc = copy.deepcopy(job) + taskdesc = copy.deepcopy(task) # fill in some empty defaults to make run implementations easier taskdesc.setdefault("attributes", {}) @@ -372,27 +376,27 @@ def make_task_description(config, jobs): taskdesc.setdefault("scopes", []) taskdesc.setdefault("extra", {}) - # give the function for job.run.using on this worker implementation a + # give the function for task.run.using on this worker implementation a # chance to set up the task description. configure_taskdesc_for_run( - config, job, taskdesc, job["worker"]["implementation"] + config, task, taskdesc, task["worker"]["implementation"] ) del taskdesc["run"] - # yield only the task description, discarding the job description + # yield only the task description, discarding the task description yield taskdesc -# A registry of all functions decorated with run_job_using +# A registry of all functions decorated with run_task_using registry = {} -def run_job_using(worker_implementation, run_using, schema=None, defaults={}): +def run_task_using(worker_implementation, run_using, schema=None, defaults={}): """Register the decorated function as able to set up a task description for - jobs with the given worker implementation and `run.using` property. If - `schema` is given, the job's run field will be verified to match it. + tasks with the given worker implementation and `run.using` property. If + `schema` is given, the task's run field will be verified to match it. - The decorated function should have the signature `using_foo(config, job, taskdesc)` + The decorated function should have the signature `using_foo(config, task, taskdesc)` and should modify the task description in-place. The skeleton of the task description is already set up, but without a payload.""" @@ -400,11 +404,7 @@ def run_job_using(worker_implementation, run_using, schema=None, defaults={}): for_run_using = registry.setdefault(run_using, {}) if worker_implementation in for_run_using: raise Exception( - "run_job_using({!r}, {!r}) already exists: {!r}".format( - run_using, - worker_implementation, - for_run_using[worker_implementation], - ) + f"run_task_using({run_using!r}, {worker_implementation!r}) already exists: {for_run_using[worker_implementation]!r}" ) for_run_using[worker_implementation] = (func, schema, defaults) return func @@ -412,42 +412,40 @@ def run_job_using(worker_implementation, run_using, schema=None, defaults={}): return wrap -@run_job_using( +@run_task_using( "always-optimized", "always-optimized", Schema({"using": "always-optimized"}) ) -def always_optimized(config, job, taskdesc): +def always_optimized(config, task, taskdesc): pass -def configure_taskdesc_for_run(config, job, taskdesc, worker_implementation): +def configure_taskdesc_for_run(config, task, taskdesc, worker_implementation): """ - Run the appropriate function for this job against the given task + Run the appropriate function for this task against the given task description. - This will raise an appropriate error if no function exists, or if the job's + This will raise an appropriate error if no function exists, or if the task's run is not valid according to the schema. """ - run_using = job["run"]["using"] + run_using = task["run"]["using"] if run_using not in registry: raise Exception(f"no functions for run.using {run_using!r}") if worker_implementation not in registry[run_using]: raise Exception( - "no functions for run.using {!r} on {!r}".format( - run_using, worker_implementation - ) + f"no functions for run.using {run_using!r} on {worker_implementation!r}" ) func, schema, defaults = registry[run_using][worker_implementation] for k, v in defaults.items(): - job["run"].setdefault(k, v) + task["run"].setdefault(k, v) if schema: validate_schema( schema, - job["run"], - "In job.run using {!r}/{!r} for job {!r}:".format( - job["run"]["using"], worker_implementation, job["label"] + task["run"], + "In task.run using {!r}/{!r} for task {!r}:".format( + task["run"]["using"], worker_implementation, task["label"] ), ) - func(config, job, taskdesc) + func(config, task, taskdesc) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/common.py index 04708daf81..66466bc5f9 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/common.py @@ -2,7 +2,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. """ -Common support for various job types. These functions are all named after the +Common support for various task types. These functions are all named after the worker implementation they operate on, and take the same three parameters, for consistency. """ @@ -21,21 +21,21 @@ def get_vcsdir_name(os): return "vcs" -def add_cache(job, taskdesc, name, mount_point, skip_untrusted=False): +def add_cache(task, taskdesc, name, mount_point, skip_untrusted=False): """Adds a cache based on the worker's implementation. Args: - job (dict): Task's job description. + task (dict): Tasks object. taskdesc (dict): Target task description to modify. name (str): Name of the cache. mount_point (path): Path on the host to mount the cache. skip_untrusted (bool): Whether cache is used in untrusted environments (default: False). Only applies to docker-worker. """ - if not job["run"].get("use-caches", True): + if not task["run"].get("use-caches", True): return - worker = job["worker"] + worker = task["worker"] if worker["implementation"] == "docker-worker": taskdesc["worker"].setdefault("caches", []).append( @@ -60,7 +60,7 @@ def add_cache(job, taskdesc, name, mount_point, skip_untrusted=False): pass -def add_artifacts(config, job, taskdesc, path): +def add_artifacts(config, task, taskdesc, path): taskdesc["worker"].setdefault("artifacts", []).append( { "name": get_artifact_prefix(taskdesc), @@ -70,28 +70,28 @@ def add_artifacts(config, job, taskdesc, path): ) -def docker_worker_add_artifacts(config, job, taskdesc): +def docker_worker_add_artifacts(config, task, taskdesc): """Adds an artifact directory to the task""" - path = "{workdir}/artifacts/".format(**job["run"]) + path = "{workdir}/artifacts/".format(**task["run"]) taskdesc["worker"]["env"]["UPLOAD_DIR"] = path - add_artifacts(config, job, taskdesc, path) + add_artifacts(config, task, taskdesc, path) -def generic_worker_add_artifacts(config, job, taskdesc): +def generic_worker_add_artifacts(config, task, taskdesc): """Adds an artifact directory to the task""" # The path is the location on disk; it doesn't necessarily # mean the artifacts will be public or private; that is set via the name # attribute in add_artifacts. - add_artifacts(config, job, taskdesc, path=get_artifact_prefix(taskdesc)) + add_artifacts(config, task, taskdesc, path=get_artifact_prefix(taskdesc)) -def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False): - """Update a job/task with parameters to enable a VCS checkout. +def support_vcs_checkout(config, task, taskdesc, repo_configs, sparse=False): + """Update a task with parameters to enable a VCS checkout. This can only be used with ``run-task`` tasks, as the cache name is reserved for ``run-task`` tasks. """ - worker = job["worker"] + worker = task["worker"] is_mac = worker["os"] == "macosx" is_win = worker["os"] == "windows" is_linux = worker["os"] == "linux" @@ -102,7 +102,7 @@ def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False): checkoutdir = "./build" hgstore = "y:/hg-shared" elif is_docker: - checkoutdir = "{workdir}/checkouts".format(**job["run"]) + checkoutdir = "{workdir}/checkouts".format(**task["run"]) hgstore = f"{checkoutdir}/hg-store" else: checkoutdir = "./checkouts" @@ -130,13 +130,7 @@ def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False): if sparse: cache_name += "-sparse" - # Workers using Mercurial >= 5.8 will enable revlog-compression-zstd, which - # workers using older versions can't understand, so they can't share cache. - # At the moment, only docker workers use the newer version. - if is_docker: - cache_name += "-hg58" - - add_cache(job, taskdesc, cache_name, checkoutdir) + add_cache(task, taskdesc, cache_name, checkoutdir) env = taskdesc["worker"].setdefault("env", {}) env.update( @@ -167,5 +161,5 @@ def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False): taskdesc["scopes"].append(f"secrets:get:{repo_config.ssh_secret_name}") # only some worker platforms have taskcluster-proxy enabled - if job["worker"]["implementation"] in ("docker-worker",): + if task["worker"]["implementation"] in ("docker-worker",): taskdesc["worker"]["taskcluster-proxy"] = True diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/index_search.py index 09b48fe594..c25946980e 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/index_search.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/index_search.py @@ -12,7 +12,7 @@ phase will replace the task with the task from the other graph. from voluptuous import Required from taskgraph.transforms.base import TransformSequence -from taskgraph.transforms.job import run_job_using +from taskgraph.transforms.run import run_task_using from taskgraph.util.schema import Schema transforms = TransformSequence() @@ -29,9 +29,9 @@ run_task_schema = Schema( ) -@run_job_using("always-optimized", "index-search", schema=run_task_schema) -def fill_template(config, job, taskdesc): - run = job["run"] +@run_task_using("always-optimized", "index-search", schema=run_task_schema) +def fill_template(config, task, taskdesc): + run = task["run"] taskdesc["optimization"] = { "index-search": [index.format(**config.params) for index in run["index-search"]] } diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/run_task.py index 6337673611..c2fbef83b0 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/run_task.py @@ -2,7 +2,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. """ -Support for running jobs that are invoked via the `run-task` script. +Support for running tasks that are invoked via the `run-task` script. """ import dataclasses @@ -10,8 +10,8 @@ import os from voluptuous import Any, Optional, Required -from taskgraph.transforms.job import run_job_using -from taskgraph.transforms.job.common import support_vcs_checkout +from taskgraph.transforms.run import run_task_using +from taskgraph.transforms.run.common import support_vcs_checkout from taskgraph.transforms.task import taskref_or_string from taskgraph.util import path, taskcluster from taskgraph.util.schema import Schema @@ -25,7 +25,7 @@ run_task_schema = Schema( { Required("using"): "run-task", # if true, add a cache at ~worker/.cache, which is where things like pip - # tend to hide their caches. This cache is never added for level-1 jobs. + # tend to hide their caches. This cache is never added for level-1 tasks. # TODO Once bug 1526028 is fixed, this and 'use-caches' should be merged. Required("cache-dotcache"): bool, # Whether or not to use caches. @@ -58,8 +58,8 @@ run_task_schema = Schema( ) -def common_setup(config, job, taskdesc, command): - run = job["run"] +def common_setup(config, task, taskdesc, command): + run = task["run"] if run["checkout"]: repo_configs = config.repo_configs if len(repo_configs) > 1 and run["checkout"] is True: @@ -72,7 +72,7 @@ def common_setup(config, job, taskdesc, command): support_vcs_checkout( config, - job, + task, taskdesc, repo_configs=repo_configs, sparse=bool(run["sparse-profile"]), @@ -97,7 +97,7 @@ def common_setup(config, job, taskdesc, command): raise Exception( "Found `{{checkout}}` interpolation in `cwd` for task {name} " "but the task doesn't have a checkout: {cwd}".format( - cwd=run["cwd"], name=job.get("name", job.get("label")) + cwd=run["cwd"], name=task.get("name", task.get("label")) ) ) @@ -126,14 +126,14 @@ def script_url(config, script): return f"{tc_url}/api/queue/v1/task/{task_id}/artifacts/public/{script}" -@run_job_using( +@run_task_using( "docker-worker", "run-task", schema=run_task_schema, defaults=worker_defaults ) -def docker_worker_run_task(config, job, taskdesc): - run = job["run"] - worker = taskdesc["worker"] = job["worker"] +def docker_worker_run_task(config, task, taskdesc): + run = task["run"] + worker = taskdesc["worker"] = task["worker"] command = run.pop("run-task-command", ["/usr/local/bin/run-task"]) - common_setup(config, job, taskdesc, command) + common_setup(config, task, taskdesc, command) if run.get("cache-dotcache"): worker["caches"].append( @@ -158,12 +158,12 @@ def docker_worker_run_task(config, job, taskdesc): worker["command"] = command -@run_job_using( +@run_task_using( "generic-worker", "run-task", schema=run_task_schema, defaults=worker_defaults ) -def generic_worker_run_task(config, job, taskdesc): - run = job["run"] - worker = taskdesc["worker"] = job["worker"] +def generic_worker_run_task(config, task, taskdesc): + run = task["run"] + worker = taskdesc["worker"] = task["worker"] is_win = worker["os"] == "windows" is_mac = worker["os"] == "macosx" is_bitbar = worker["os"] == "linux-bitbar" @@ -177,7 +177,7 @@ def generic_worker_run_task(config, job, taskdesc): else: command = ["./run-task"] - common_setup(config, job, taskdesc, command) + common_setup(config, task, taskdesc, command) worker.setdefault("mounts", []) if run.get("cache-dotcache"): diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/toolchain.py index c9c09542ff..59e66cb973 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/toolchain.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/run/toolchain.py @@ -2,14 +2,14 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. """ -Support for running toolchain-building jobs via dedicated scripts +Support for running toolchain-building tasks via dedicated scripts """ from voluptuous import ALLOW_EXTRA, Any, Optional, Required import taskgraph -from taskgraph.transforms.job import configure_taskdesc_for_run, run_job_using -from taskgraph.transforms.job.common import ( +from taskgraph.transforms.run import configure_taskdesc_for_run, run_task_using +from taskgraph.transforms.run.common import ( docker_worker_add_artifacts, generic_worker_add_artifacts, get_vcsdir_name, @@ -36,12 +36,12 @@ toolchain_run_schema = Schema( # Paths/patterns pointing to files that influence the outcome of a # toolchain build. Optional("resources"): [str], - # Path to the artifact produced by the toolchain job + # Path to the artifact produced by the toolchain task Required("toolchain-artifact"): str, Optional( "toolchain-alias", - description="An alias that can be used instead of the real toolchain job name in " - "fetch stanzas for jobs.", + description="An alias that can be used instead of the real toolchain task name in " + "fetch stanzas for tasks.", ): Any(str, [str]), Optional( "toolchain-env", @@ -82,10 +82,10 @@ def get_digest_data(config, run, taskdesc): return data -def common_toolchain(config, job, taskdesc, is_docker): - run = job["run"] +def common_toolchain(config, task, taskdesc, is_docker): + run = task["run"] - worker = taskdesc["worker"] = job["worker"] + worker = taskdesc["worker"] = task["worker"] worker["chain-of-trust"] = True srcdir = get_vcsdir_name(worker["os"]) @@ -94,14 +94,14 @@ def common_toolchain(config, job, taskdesc, is_docker): # If the task doesn't have a docker-image, set a default worker.setdefault("docker-image", {"in-tree": "toolchain-build"}) - # Allow the job to specify where artifacts come from, but add + # Allow the task to specify where artifacts come from, but add # public/build if it's not there already. artifacts = worker.setdefault("artifacts", []) if not any(artifact.get("name") == "public/build" for artifact in artifacts): if is_docker: - docker_worker_add_artifacts(config, job, taskdesc) + docker_worker_add_artifacts(config, task, taskdesc) else: - generic_worker_add_artifacts(config, job, taskdesc) + generic_worker_add_artifacts(config, task, taskdesc) env = worker["env"] env.update( @@ -147,7 +147,7 @@ def common_toolchain(config, job, taskdesc, is_docker): run["command"] = command - configure_taskdesc_for_run(config, job, taskdesc, worker["implementation"]) + configure_taskdesc_for_run(config, task, taskdesc, worker["implementation"]) toolchain_defaults = { @@ -155,21 +155,21 @@ toolchain_defaults = { } -@run_job_using( +@run_task_using( "docker-worker", "toolchain-script", schema=toolchain_run_schema, defaults=toolchain_defaults, ) -def docker_worker_toolchain(config, job, taskdesc): - common_toolchain(config, job, taskdesc, is_docker=True) +def docker_worker_toolchain(config, task, taskdesc): + common_toolchain(config, task, taskdesc, is_docker=True) -@run_job_using( +@run_task_using( "generic-worker", "toolchain-script", schema=toolchain_run_schema, defaults=toolchain_defaults, ) -def generic_worker_toolchain(config, job, taskdesc): - common_toolchain(config, job, taskdesc, is_docker=False) +def generic_worker_toolchain(config, task, taskdesc): + common_toolchain(config, task, taskdesc, is_docker=False) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py index c55de78513..168b8c00c9 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py @@ -110,7 +110,7 @@ task_description_schema = Schema( # section of the kind (delimited by "-") all smooshed together. # Eg: "test" becomes "T", "docker-image" becomes "DI", etc. "symbol": Optional(str), - # the job kind + # the task kind # If "build" or "test" is found in the kind name, this defaults # to the appropriate value. Otherwise, defaults to "other" "kind": Optional(Any("build", "test", "other")), @@ -129,7 +129,7 @@ task_description_schema = Schema( Optional("index"): { # the name of the product this build produces "product": str, - # the names to use for this job in the TaskCluster index + # the names to use for this task in the TaskCluster index "job-name": str, # Type of gecko v2 index to use "type": str, @@ -179,7 +179,7 @@ task_description_schema = Schema( # be substituted in this string: # {level} -- the scm level of this push "worker-type": str, - # Whether the job should use sccache compiler caching. + # Whether the task should use sccache compiler caching. Required("needs-sccache"): bool, # information specific to the worker implementation that will run this task Optional("worker"): { @@ -196,7 +196,7 @@ TC_TREEHERDER_SCHEMA_URL = ( UNKNOWN_GROUP_NAME = ( - "Treeherder group {} (from {}) has no name; " "add it to taskcluster/ci/config.yml" + "Treeherder group {} (from {}) has no name; " "add it to taskcluster/config.yml" ) V2_ROUTE_TEMPLATES = [ @@ -266,7 +266,7 @@ def index_builder(name): UNSUPPORTED_INDEX_PRODUCT_ERROR = """\ The index product {product} is not in the list of configured products in -`taskcluster/ci/config.yml'. +`taskcluster/config.yml'. """ @@ -317,7 +317,7 @@ def verify_index(config, index): { # only one type is supported by any of the workers right now "type": "persistent", - # name of the cache, allowing re-use by subsequent tasks naming the + # name of the cache, allowing reuse by subsequent tasks naming the # same cache "name": str, # location in the task image where the cache will be mounted @@ -364,6 +364,9 @@ def build_docker_worker_payload(config, task, task_def): if "in-tree" in image: name = image["in-tree"] docker_image_task = "build-docker-image-" + image["in-tree"] + assert "docker-image" not in task.get( + "dependencies", () + ), "docker-image key in dependencies object is reserved" task.setdefault("dependencies", {})["docker-image"] = docker_image_task image = { @@ -487,19 +490,19 @@ def build_docker_worker_payload(config, task, task_def): # run-task knows how to validate caches. # - # To help ensure new run-task features and bug fixes don't interfere - # with existing caches, we seed the hash of run-task into cache names. - # So, any time run-task changes, we should get a fresh set of caches. - # This means run-task can make changes to cache interaction at any time - # without regards for backwards or future compatibility. + # To help ensure new run-task features and bug fixes, as well as the + # versions of tools such as mercurial or git, don't interfere with + # existing caches, we seed the underlying docker-image task id into + # cache names, for tasks using in-tree Docker images. # # But this mechanism only works for in-tree Docker images that are built # with the current run-task! For out-of-tree Docker images, we have no # way of knowing their content of run-task. So, in addition to varying # cache names by the contents of run-task, we also take the Docker image - # name into consideration. This means that different Docker images will - # never share the same cache. This is a bit unfortunate. But it is the - # safest thing to do. Fortunately, most images are defined in-tree. + # name into consideration. + # + # This means that different Docker images will never share the same + # cache. This is a bit unfortunate, but is the safest thing to do. # # For out-of-tree Docker images, we don't strictly need to incorporate # the run-task content into the cache name. However, doing so preserves @@ -520,6 +523,8 @@ def build_docker_worker_payload(config, task, task_def): out_of_tree_image.encode("utf-8") ).hexdigest() suffix += name_hash[0:12] + else: + suffix += "-<docker-image>" else: suffix = cache_version @@ -539,13 +544,15 @@ def build_docker_worker_payload(config, task, task_def): suffix=suffix, ) caches[name] = cache["mount-point"] - task_def["scopes"].append("docker-worker:cache:%s" % name) + task_def["scopes"].append( + {"task-reference": "docker-worker:cache:%s" % name} + ) # Assertion: only run-task is interested in this. if run_task: payload["env"]["TASKCLUSTER_CACHES"] = ";".join(sorted(caches.values())) - payload["cache"] = caches + payload["cache"] = {"task-reference": caches} # And send down volumes information to run-task as well. if run_task and worker.get("volumes"): @@ -752,7 +759,7 @@ def build_generic_worker_payload(config, task, task_def): schema={ # the maximum time to run, in seconds Required("max-run-time"): int, - # locale key, if this is a locale beetmover job + # locale key, if this is a locale beetmover task Optional("locale"): str, Optional("partner-public"): bool, Required("release-properties"): { @@ -1075,7 +1082,11 @@ def build_task(config, tasks): extra["parent"] = os.environ.get("TASK_ID", "") if "expires-after" not in task: - task["expires-after"] = "28 days" if config.params.is_try() else "1 year" + task["expires-after"] = ( + config.graph_config._config.get("task-expires-after", "28 days") + if config.params.is_try() + else "1 year" + ) if "deadline-after" not in task: if "task-deadline-after" in config.graph_config: @@ -1142,9 +1153,9 @@ def build_task(config, tasks): config.params["project"] + th_project_suffix, branch_rev ) ) - task_def["metadata"]["description"] += " ([Treeherder push]({}))".format( - th_push_link - ) + task_def["metadata"][ + "description" + ] += f" ([Treeherder push]({th_push_link}))" # add the payload and adjust anything else as required (e.g., scopes) payload_builders[task["worker"]["implementation"]].builder( @@ -1288,7 +1299,7 @@ def check_caches_are_volumes(task): Caches and volumes are the only filesystem locations whose content isn't defined by the Docker image itself. Some caches are optional - depending on the job environment. We want paths that are potentially + depending on the task environment. We want paths that are potentially caches to have as similar behavior regardless of whether a cache is used. To help enforce this, we require that all paths used as caches to be declared as Docker volumes. This check won't catch all offenders. @@ -1343,7 +1354,9 @@ def check_run_task_caches(config, tasks): main_command = command[0] if isinstance(command[0], str) else "" run_task = main_command.endswith("run-task") - for cache in payload.get("cache", {}): + for cache in payload.get("cache", {}).get( + "task-reference", payload.get("cache", {}) + ): if not cache.startswith(cache_prefix): raise Exception( "{} is using a cache ({}) which is not appropriate " @@ -1364,7 +1377,7 @@ def check_run_task_caches(config, tasks): "cache name" ) - if not cache.endswith(suffix): + if suffix not in cache: raise Exception( f"{task['label']} is using a cache ({cache}) reserved for run-task " "but the cache name is not dependent on the contents " diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task_context.py b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task_context.py index 5c7ed6af80..bd36d827aa 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task_context.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task_context.py @@ -81,9 +81,9 @@ transforms.add_validate(SCHEMA) @transforms.add -def render_task(config, jobs): - for job in jobs: - sub_config = job.pop("task-context") +def render_task(config, tasks): + for task in tasks: + sub_config = task.pop("task-context") params_context = {} for var, path in sub_config.pop("from-parameters", {}).items(): if isinstance(path, str): @@ -111,11 +111,11 @@ def render_task(config, jobs): # Now that we have our combined context, we can substitute. for field in fields: - container, subfield = job, field + container, subfield = task, field while "." in subfield: f, subfield = subfield.split(".", 1) container = container[f] container[subfield] = substitute(container[subfield], **subs) - yield job + yield task diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py index ee59ba4548..261a031038 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py @@ -12,6 +12,40 @@ import tarfile DEFAULT_MTIME = 1451606400 +# Python 3.9 contains this change: +# https://github.com/python/cpython/commit/674935b8caf33e47c78f1b8e197b1b77a04992d2 +# which changes the output of tar creation compared to earlier versions. +# As this code is used to generate tar files that are meant to be deterministic +# across versions of python (specifically, it's used as part of computing the hash +# of docker images, which needs to be identical between CI (which uses python 3.8), +# and developer environments (using arbitrary versions of python, at this point, +# most probably more recent than 3.9)). +# What we do is subblass TarInfo so that if used on python >= 3.9, it reproduces the +# behavior from python < 3.9. +# Here's how it goes: +# - the behavior in python >= 3.9 is the same as python < 3.9 when the type encoded +# in the tarinfo is CHRTYPE or BLKTYPE. +# - the value of the type is only compared in the context of choosing which behavior +# to take +# - we replace the type with the same value (so that using the value has no changes) +# but that pretends to be the same as CHRTYPE so that the condition that enables the +# old behavior is taken. +class HackedType(bytes): + def __eq__(self, other): + if other == tarfile.CHRTYPE: + return True + return self == other + + +class TarInfo(tarfile.TarInfo): + @staticmethod + def _create_header(info, format, encoding, errors): + info["type"] = HackedType(info["type"]) + # ignore type checking because it looks like pyright complains because we're calling a + # non-public method + return tarfile.TarInfo._create_header(info, format, encoding, errors) # type: ignore + + def create_tar_from_files(fp, files): """Create a tar file deterministically. @@ -25,15 +59,23 @@ def create_tar_from_files(fp, files): FUTURE accept a filename argument (or create APIs to write files) """ - with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf: + # The format is explicitly set to tarfile.GNU_FORMAT, because this default format + # has been changed in Python 3.8. + with tarfile.open( + name="", mode="w", fileobj=fp, dereference=True, format=tarfile.GNU_FORMAT + ) as tf: for archive_path, f in sorted(files.items()): if isinstance(f, str): - mode = os.stat(f).st_mode + s = os.stat(f) + mode = s.st_mode + size = s.st_size f = open(f, "rb") else: mode = 0o0644 + size = len(f.read()) + f.seek(0) - ti = tarfile.TarInfo(archive_path) + ti = TarInfo(archive_path) ti.mode = mode ti.type = tarfile.REGTYPE @@ -56,9 +98,7 @@ def create_tar_from_files(fp, files): # Set mtime to a constant value. ti.mtime = DEFAULT_MTIME - f.seek(0, 2) - ti.size = f.tell() - f.seek(0, 0) + ti.size = size # tarfile wants to pass a size argument to read(). So just # wrap/buffer in a proper file object interface. tf.addfile(ti, f) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py index 974b114902..1a3baad5be 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py @@ -7,6 +7,7 @@ import hashlib import time TARGET_CACHE_INDEX = "{cache_prefix}.cache.level-{level}.{type}.{name}.hash.{digest}" +TARGET_PR_CACHE_INDEX = "{cache_prefix}.cache.pr.{type}.{name}.hash.{digest}" EXTRA_CACHE_INDEXES = [ "{cache_prefix}.cache.level-{level}.{type}.{name}.latest", "{cache_prefix}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}", @@ -53,31 +54,45 @@ def add_optimization( # We'll try to find a cached version of the toolchain at levels above and # including the current level, starting at the highest level. - # Chain-of-trust doesn't handle tasks not built on the tip of a - # pull-request, so don't look for level-1 tasks if building a pull-request. index_routes = [] min_level = int(config.params["level"]) - if config.params["tasks_for"] == "github-pull-request": - min_level = max(min_level, 3) for level in reversed(range(min_level, 4)): subs["level"] = level index_routes.append(TARGET_CACHE_INDEX.format(**subs)) - taskdesc["optimization"] = {"index-search": index_routes} + # Pull requests use a different target cache index route. This way we can + # be confident they won't be used by anything other than the pull request + # that created the cache in the first place. + if config.params["tasks_for"].startswith( + "github-pull-request" + ) and config.graph_config["taskgraph"].get("cache-pull-requests", True): + subs["head_ref"] = config.params["head_ref"] + if subs["head_ref"].startswith("refs/heads/"): + subs["head_ref"] = subs["head_ref"][11:] + index_routes.append(TARGET_PR_CACHE_INDEX.format(**subs)) + + taskdesc["optimization"] = {"index-search": index_routes} # ... and cache at the lowest level. subs["level"] = config.params["level"] - taskdesc.setdefault("routes", []).append( - f"index.{TARGET_CACHE_INDEX.format(**subs)}" - ) - # ... and add some extra routes for humans - subs["build_date_long"] = time.strftime( - "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"]) - ) - taskdesc["routes"].extend( - [f"index.{route.format(**subs)}" for route in EXTRA_CACHE_INDEXES] - ) + if config.params["tasks_for"].startswith("github-pull-request"): + if config.graph_config["taskgraph"].get("cache-pull-requests", True): + taskdesc.setdefault("routes", []).append( + f"index.{TARGET_PR_CACHE_INDEX.format(**subs)}" + ) + else: + taskdesc.setdefault("routes", []).append( + f"index.{TARGET_CACHE_INDEX.format(**subs)}" + ) + + # ... and add some extra routes for humans + subs["build_date_long"] = time.strftime( + "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"]) + ) + taskdesc["routes"].extend( + [f"index.{route.format(**subs)}" for route in EXTRA_CACHE_INDEXES] + ) taskdesc["attributes"]["cached_task"] = { "type": cache_type, diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py deleted file mode 100644 index d0e1e1079f..0000000000 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py +++ /dev/null @@ -1,79 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -""" -Utilities for generating a decision task from :file:`.taskcluster.yml`. -""" - - -import os - -import jsone -import slugid -import yaml - -from .templates import merge -from .time import current_json_time -from .vcs import find_hg_revision_push_info - - -def make_decision_task(params, root, context, head_rev=None): - """Generate a basic decision task, based on the root .taskcluster.yml""" - with open(os.path.join(root, ".taskcluster.yml"), "rb") as f: - taskcluster_yml = yaml.safe_load(f) - - if not head_rev: - head_rev = params["head_rev"] - - if params["repository_type"] == "hg": - pushlog = find_hg_revision_push_info(params["repository_url"], head_rev) - - hg_push_context = { - "pushlog_id": pushlog["pushid"], - "pushdate": pushlog["pushdate"], - "owner": pushlog["user"], - } - else: - hg_push_context = {} - - slugids = {} - - def as_slugid(name): - # https://github.com/taskcluster/json-e/issues/164 - name = name[0] - if name not in slugids: - slugids[name] = slugid.nice() - return slugids[name] - - # provide a similar JSON-e context to what mozilla-taskcluster provides: - # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml - # but with a different tasks_for and an extra `cron` section - context = merge( - { - "repository": { - "url": params["repository_url"], - "project": params["project"], - "level": params["level"], - }, - "push": merge( - { - "revision": params["head_rev"], - # remainder are fake values, but the decision task expects them anyway - "comment": " ", - }, - hg_push_context, - ), - "now": current_json_time(), - "as_slugid": as_slugid, - }, - context, - ) - - rendered = jsone.render(taskcluster_yml, context) - if len(rendered["tasks"]) != 1: - raise Exception("Expected .taskcluster.yml to only produce one cron task") - task = rendered["tasks"][0] - - task_id = task.pop("taskId") - return (task_id, task) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py index c37a69f98f..13815381ed 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py @@ -7,6 +7,7 @@ import hashlib import io import os import re +from typing import Optional from taskgraph.util.archive import create_tar_gz_from_files from taskgraph.util.memoize import memoize @@ -16,17 +17,27 @@ IMAGE_DIR = os.path.join(".", "taskcluster", "docker") from .yaml import load_yaml -def docker_image(name, by_tag=False): +def docker_image(name: str, by_tag: bool = False) -> Optional[str]: """ Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``, or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`. + + Args: + name (str): The image to build. + by_tag (bool): If True, will apply a tag based on VERSION file. + Otherwise will apply a hash based on HASH file. + Returns: + Optional[str]: Image if it can be resolved, otherwise None. """ try: with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f: registry = f.read().strip() except OSError: - with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f: - registry = f.read().strip() + try: + with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f: + registry = f.read().strip() + except OSError: + return None if not by_tag: hashfile = os.path.join(IMAGE_DIR, name, "HASH") @@ -34,7 +45,7 @@ def docker_image(name, by_tag=False): with open(hashfile) as f: return f"{registry}/{name}@{f.read().strip()}" except OSError: - raise Exception(f"Failed to read HASH file {hashfile}") + return None try: with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f: @@ -197,7 +208,7 @@ def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=N @memoize def image_paths(): """Return a map of image name to paths containing their Dockerfile.""" - config = load_yaml("taskcluster", "ci", "docker-image", "kind.yml") + config = load_yaml("taskcluster", "kinds", "docker-image", "kind.yml") return { k: os.path.join(IMAGE_DIR, v.get("definition", k)) for k, v in config["tasks"].items() diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py index 5d884fc318..d42b2ecef9 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py @@ -39,10 +39,7 @@ def hash_paths(base_path, patterns): raise Exception("%s did not match anything" % pattern) for path in sorted(found): h.update( - "{} {}\n".format( - hash_path(mozpath.abspath(mozpath.join(base_path, path))), - mozpath.normsep(path), - ).encode("utf-8") + f"{hash_path(mozpath.abspath(mozpath.join(base_path, path)))} {mozpath.normsep(path)}\n".encode() ) return h.hexdigest() @@ -55,4 +52,8 @@ def _find_matching_files(base_path, pattern): @memoize def _get_all_files(base_path): - return [str(path) for path in Path(base_path).rglob("*") if path.is_file()] + return [ + mozpath.normsep(str(path)) + for path in Path(base_path).rglob("*") + if path.is_file() + ] diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py index 9b0c5a44fb..00c84ba980 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py @@ -66,8 +66,8 @@ def evaluate_keyed_by( # Error out when only 'default' is specified as only alternatives, # because we don't need to by-{keyed_by} there. raise Exception( - "Keyed-by '{}' unnecessary with only value 'default' " - "found, when determining item {}".format(keyed_by, item_name) + f"Keyed-by '{keyed_by}' unnecessary with only value 'default' " + f"found, when determining item {item_name}" ) if key is None: @@ -76,22 +76,20 @@ def evaluate_keyed_by( continue else: raise Exception( - "No attribute {} and no value for 'default' found " - "while determining item {}".format(keyed_by, item_name) + f"No attribute {keyed_by} and no value for 'default' found " + f"while determining item {item_name}" ) matches = keymatch(alternatives, key) if enforce_single_match and len(matches) > 1: raise Exception( - "Multiple matching values for {} {!r} found while " - "determining item {}".format(keyed_by, key, item_name) + f"Multiple matching values for {keyed_by} {key!r} found while " + f"determining item {item_name}" ) elif matches: value = matches[0] continue raise Exception( - "No {} matching {!r} nor 'default' found while determining item {}".format( - keyed_by, key, item_name - ) + f"No {keyed_by} matching {key!r} nor 'default' found while determining item {item_name}" ) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py index 56b513e74c..a4bc50cc26 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py @@ -2,39 +2,6 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. -# Imported from -# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#923-949 - - import functools - -class memoize(dict): - """A decorator to memoize the results of function calls depending - on its arguments. - Both functions and instance methods are handled, although in the - instance method case, the results are cache in the instance itself. - """ - - def __init__(self, func): - self.func = func - functools.update_wrapper(self, func) - - def __call__(self, *args): - if args not in self: - self[args] = self.func(*args) - return self[args] - - def method_call(self, instance, *args): - name = "_%s" % self.func.__name__ - if not hasattr(instance, name): - setattr(instance, name, {}) - cache = getattr(instance, name) - if args not in cache: - cache[args] = self.func(instance, *args) - return cache[args] - - def __get__(self, instance, cls): - return functools.update_wrapper( - functools.partial(self.method_call, instance), self.func - ) +memoize = functools.lru_cache(maxsize=None) # backwards compatibility shim diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py index 6233a98a40..1973f6f7df 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py @@ -20,6 +20,12 @@ def _recurse(val, param_fns): if len(val) == 1: for param_key, param_fn in param_fns.items(): if set(val.keys()) == {param_key}: + if isinstance(val[param_key], dict): + # handle `{"task-reference": {"<foo>": "bar"}}` + return { + param_fn(key): recurse(v) + for key, v in val[param_key].items() + } return param_fn(val[param_key]) return {k: recurse(v) for k, v in val.items()} else: @@ -74,17 +80,14 @@ def resolve_task_references(label, task_def, task_id, decision_task_id, dependen task_id = dependencies[dependency] except KeyError: raise KeyError( - "task '{}' has no dependency named '{}'".format( - label, dependency - ) + f"task '{label}' has no dependency named '{dependency}'" ) - assert artifact_name.startswith( - "public/" - ), "artifact-reference only supports public artifacts, not `{}`".format( - artifact_name - ) - return get_artifact_url(task_id, artifact_name) + use_proxy = False + if not artifact_name.startswith("public/"): + use_proxy = True + + return get_artifact_url(task_id, artifact_name, use_proxy=use_proxy) return ARTIFACT_REFERENCE_PATTERN.sub(repl, val) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py index 3989f71182..02e79a3a27 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py @@ -74,7 +74,7 @@ def resolve_keyed_by( For example, given item:: - job: + task: test-platform: linux128 chunks: by-test-platform: @@ -82,10 +82,10 @@ def resolve_keyed_by( win.*: 6 default: 12 - a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])` + a call to `resolve_keyed_by(item, 'task.chunks', item['thing-name'])` would mutate item in-place to:: - job: + task: test-platform: linux128 chunks: 12 @@ -182,7 +182,7 @@ def check_schema(schema): if not identifier_re.match(k) and not excepted(path): raise RuntimeError( "YAML schemas should use dashed lower-case identifiers, " - "not {!r} @ {}".format(k, path) + f"not {k!r} @ {path}" ) elif isinstance(k, (voluptuous.Optional, voluptuous.Required)): check_identifier(path, k.schema) @@ -191,9 +191,7 @@ def check_schema(schema): check_identifier(path, v) elif not excepted(path): raise RuntimeError( - "Unexpected type in YAML schema: {} @ {}".format( - type(k).__name__, path - ) + f"Unexpected type in YAML schema: {type(k).__name__} @ {path}" ) if isinstance(sch, collections.abc.Mapping): diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/set_name.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/set_name.py new file mode 100644 index 0000000000..4c27a9cca1 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/set_name.py @@ -0,0 +1,34 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Define a collection of set_name functions +# Note: this is stored here instead of where it is used in the `from_deps` +# transform to give consumers a chance to register their own `set_name` +# handlers before the `from_deps` schema is created. +SET_NAME_MAP = {} + + +def set_name(name, schema=None): + def wrapper(func): + assert ( + name not in SET_NAME_MAP + ), f"duplicate set_name function name {name} ({func} and {SET_NAME_MAP[name]})" + SET_NAME_MAP[name] = func + func.schema = schema + return func + + return wrapper + + +@set_name("strip-kind") +def set_name_strip_kind(config, tasks, primary_dep, primary_kind): + if primary_dep.label.startswith(primary_kind): + return primary_dep.label[len(primary_kind) + 1 :] + else: + return primary_dep.label + + +@set_name("retain-kind") +def set_name_retain_kind(config, tasks, primary_dep, primary_kind): + return primary_dep.label diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py index d695767f05..16b71b7d6a 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py @@ -14,7 +14,7 @@ def _quote(s): As a special case, if given an int, returns a string containing the int, not enclosed in quotes. """ - if type(s) == int: + if isinstance(s, int): return "%d" % s # Empty strings need to be quoted to have any significance diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py index a830a473b3..b467e98a97 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py @@ -3,10 +3,12 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. +import copy import datetime import functools import logging import os +from typing import Dict, List, Union import requests import taskcluster_urls as liburls @@ -53,9 +55,11 @@ def get_root_url(use_proxy): logger.debug( "Running in Taskcluster instance {}{}".format( os.environ["TASKCLUSTER_ROOT_URL"], - " with taskcluster-proxy" - if "TASKCLUSTER_PROXY_URL" in os.environ - else "", + ( + " with taskcluster-proxy" + if "TASKCLUSTER_PROXY_URL" in os.environ + else "" + ), ) ) return liburls.normalize_root_url(os.environ["TASKCLUSTER_ROOT_URL"]) @@ -136,22 +140,9 @@ def _handle_artifact(path, response): def get_artifact_url(task_id, path, use_proxy=False): artifact_tmpl = liburls.api( - get_root_url(False), "queue", "v1", "task/{}/artifacts/{}" + get_root_url(use_proxy), "queue", "v1", "task/{}/artifacts/{}" ) - data = artifact_tmpl.format(task_id, path) - if use_proxy: - # Until Bug 1405889 is deployed, we can't download directly - # from the taskcluster-proxy. Work around by using the /bewit - # endpoint instead. - # The bewit URL is the body of a 303 redirect, which we don't - # want to follow (which fetches a potentially large resource). - response = _do_request( - os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit", - data=data, - allow_redirects=False, - ) - return response.text - return data + return artifact_tmpl.format(task_id, path) def get_artifact(task_id, path, use_proxy=False): @@ -244,6 +235,7 @@ def get_task_url(task_id, use_proxy=False): return task_tmpl.format(task_id) +@memoize def get_task_definition(task_id, use_proxy=False): response = _do_request(get_task_url(task_id, use_proxy)) return response.json() @@ -327,11 +319,7 @@ def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False): def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False): """Requests a cache purge from the purge-caches service.""" if testing: - logger.info( - "Would have purged {}/{}/{}.".format( - provisioner_id, worker_type, cache_name - ) - ) + logger.info(f"Would have purged {provisioner_id}/{worker_type}/{cache_name}.") else: logger.info(f"Purging {provisioner_id}/{worker_type}/{cache_name}.") purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy) @@ -371,3 +359,40 @@ def list_task_group_incomplete_tasks(task_group_id): params = {"continuationToken": resp.get("continuationToken")} else: break + + +@memoize +def _get_deps(task_ids, use_proxy): + upstream_tasks = {} + for task_id in task_ids: + task_def = get_task_definition(task_id, use_proxy) + upstream_tasks[task_def["metadata"]["name"]] = task_id + + upstream_tasks.update(_get_deps(tuple(task_def["dependencies"]), use_proxy)) + + return upstream_tasks + + +def get_ancestors( + task_ids: Union[List[str], str], use_proxy: bool = False +) -> Dict[str, str]: + """Gets the ancestor tasks of the given task_ids as a dictionary of label -> taskid. + + Args: + task_ids (str or [str]): A single task id or a list of task ids to find the ancestors of. + use_proxy (bool): See get_root_url. + + Returns: + dict: A dict whose keys are task labels and values are task ids. + """ + upstream_tasks: Dict[str, str] = {} + + if isinstance(task_ids, str): + task_ids = [task_ids] + + for task_id in task_ids: + task_def = get_task_definition(task_id, use_proxy) + + upstream_tasks.update(_get_deps(tuple(task_def["dependencies"]), use_proxy)) + + return copy.deepcopy(upstream_tasks) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py index e511978b5f..6639e5dddd 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py @@ -73,9 +73,7 @@ def value_of(input_str): if unit not in ALIASES: raise UnknownTimeMeasurement( - "{} is not a valid time measure use one of {}".format( - unit, sorted(ALIASES.keys()) - ) + f"{unit} is not a valid time measure use one of {sorted(ALIASES.keys())}" ) return ALIASES[unit](value) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py index cff5f286cc..6bb6dbd137 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py @@ -42,22 +42,25 @@ def replace_group(treeherder_symbol, new_group): return join_symbol(new_group, symbol) -def inherit_treeherder_from_dep(job, dep_job): - """Inherit treeherder defaults from dep_job""" - treeherder = job.get("treeherder", {}) +def inherit_treeherder_from_dep(task, dep_task): + """Inherit treeherder defaults from dep_task""" + treeherder = task.get("treeherder", {}) dep_th_platform = ( - dep_job.task.get("extra", {}) + dep_task.task.get("extra", {}) .get("treeherder", {}) .get("machine", {}) .get("platform", "") ) dep_th_collection = list( - dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys() + dep_task.task.get("extra", {}) + .get("treeherder", {}) + .get("collection", {}) + .keys() )[0] treeherder.setdefault("platform", f"{dep_th_platform}/{dep_th_collection}") treeherder.setdefault( - "tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1) + "tier", dep_task.task.get("extra", {}).get("treeherder", {}).get("tier", 1) ) # Does not set symbol treeherder.setdefault("kind", "build") diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py index 2d967d2645..c2fd0d3236 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py @@ -10,9 +10,6 @@ import subprocess from abc import ABC, abstractmethod, abstractproperty from shutil import which -import requests -from redo import retry - from taskgraph.util.path import ancestors PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1" @@ -21,7 +18,7 @@ logger = logging.getLogger(__name__) class Repository(ABC): - # Both mercurial and git use sha1 as revision idenfiers. Luckily, both define + # Both mercurial and git use sha1 as revision identifiers. Luckily, both define # the same value as the null revision. # # https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7 @@ -519,34 +516,3 @@ def get_repository(path): return GitRepository(path) raise RuntimeError("Current directory is neither a git or hg repository") - - -def find_hg_revision_push_info(repository, revision): - """Given the parameters for this action and a revision, find the - pushlog_id of the revision.""" - pushlog_url = PUSHLOG_TMPL.format(repository, revision) - - def query_pushlog(url): - r = requests.get(pushlog_url, timeout=60) - r.raise_for_status() - return r - - r = retry( - query_pushlog, - args=(pushlog_url,), - attempts=5, - sleeptime=10, - ) - pushes = r.json()["pushes"] - if len(pushes) != 1: - raise RuntimeError( - "Unable to find a single pushlog_id for {} revision {}: {}".format( - repository, revision, pushes - ) - ) - pushid = list(pushes.keys())[0] - return { - "pushdate": pushes[pushid]["date"], - "pushid": pushid, - "user": pushes[pushid]["user"], - } diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py index e6705c16cf..b5bb0889ae 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py @@ -134,10 +134,8 @@ def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, paramet collection_keys = tuple(sorted(treeherder.get("collection", {}).keys())) if len(collection_keys) != 1: raise Exception( - "Task {} can't be in multiple treeherder collections " - "(the part of the platform after `/`): {}".format( - task.label, collection_keys - ) + f"Task {task.label} can't be in multiple treeherder collections " + f"(the part of the platform after `/`): {collection_keys}" ) platform = treeherder.get("machine", {}).get("platform") group_symbol = treeherder.get("groupSymbol") @@ -175,9 +173,7 @@ def verify_trust_domain_v2_routes( if route.startswith(route_prefix): if route in scratch_pad: raise Exception( - "conflict between {}:{} for route: {}".format( - task.label, scratch_pad[route], route - ) + f"conflict between {task.label}:{scratch_pad[route]} for route: {route}" ) else: scratch_pad[route] = task.label @@ -206,9 +202,7 @@ def verify_routes_notification_filters( route_filter = route.split(".")[-1] if route_filter not in valid_filters: raise Exception( - "{} has invalid notification filter ({})".format( - task.label, route_filter - ) + f"{task.label} has invalid notification filter ({route_filter})" ) @@ -235,12 +229,7 @@ def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, paramete continue if tier < tiers[d]: raise Exception( - "{} (tier {}) cannot depend on {} (tier {})".format( - task.label, - printable_tier(tier), - d, - printable_tier(tiers[d]), - ) + f"{task.label} (tier {printable_tier(tier)}) cannot depend on {d} (tier {printable_tier(tiers[d])})" ) @@ -262,11 +251,7 @@ def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameter if key in scratch_pad: raise Exception( "Duplicate toolchain-alias in tasks " - "`{}`and `{}`: {}".format( - task.label, - scratch_pad[key], - key, - ) + f"`{task.label}`and `{scratch_pad[key]}`: {key}" ) else: scratch_pad[key] = task.label diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py index 141c7a16d3..a733521527 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py @@ -5,7 +5,10 @@ import os -from yaml.loader import SafeLoader +try: + from yaml import CSafeLoader as SafeLoader +except ImportError: + from yaml import SafeLoader class UnicodeLoader(SafeLoader): |