diff options
Diffstat (limited to 'third_party/python/taskcluster_taskgraph/taskgraph/create.py')
-rw-r--r-- | third_party/python/taskcluster_taskgraph/taskgraph/create.py | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/create.py b/third_party/python/taskcluster_taskgraph/taskgraph/create.py new file mode 100644 index 0000000000..3661ac8271 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/create.py @@ -0,0 +1,132 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import concurrent.futures as futures +import json +import logging +import sys + +from slugid import nice as slugid + +from taskgraph.util.parameterization import resolve_timestamps +from taskgraph.util.taskcluster import CONCURRENCY, get_session +from taskgraph.util.time import current_json_time + +logger = logging.getLogger(__name__) + +# this is set to true for `mach taskgraph action-callback --test` +testing = False + + +def create_tasks(graph_config, taskgraph, label_to_taskid, params, decision_task_id): + taskid_to_label = {t: l for l, t in label_to_taskid.items()} + + # when running as an actual decision task, we use the decision task's + # taskId as the taskGroupId. The process that created the decision task + # helpfully placed it in this same taskGroup. If there is no $TASK_ID, + # fall back to a slugid + scheduler_id = "{}-level-{}".format(graph_config["trust-domain"], params["level"]) + + # Add the taskGroupId, schedulerId and optionally the decision task + # dependency + for task_id in taskgraph.graph.nodes: + task_def = taskgraph.tasks[task_id].task + + # if this task has no dependencies *within* this taskgraph, make it + # depend on this decision task. If it has another dependency within + # the taskgraph, then it already implicitly depends on the decision + # task. The result is that tasks do not start immediately. if this + # loop fails halfway through, none of the already-created tasks run. + if not any(t in taskgraph.tasks for t in task_def.get("dependencies", [])): + task_def.setdefault("dependencies", []).append(decision_task_id) + + task_def["taskGroupId"] = decision_task_id + task_def["schedulerId"] = scheduler_id + + # If `testing` is True, then run without parallelization + concurrency = CONCURRENCY if not testing else 1 + session = get_session() + with futures.ThreadPoolExecutor(concurrency) as e: + fs = {} + + # We can't submit a task until its dependencies have been submitted. + # So our strategy is to walk the graph and submit tasks once all + # their dependencies have been submitted. + tasklist = set(taskgraph.graph.visit_postorder()) + alltasks = tasklist.copy() + + def schedule_tasks(): + # bail out early if any futures have failed + if any(f.done() and f.exception() for f in fs.values()): + return + + to_remove = set() + new = set() + + def submit(task_id, label, task_def): + fut = e.submit(create_task, session, task_id, label, task_def) + new.add(fut) + fs[task_id] = fut + + for task_id in tasklist: + task_def = taskgraph.tasks[task_id].task + # If we haven't finished submitting all our dependencies yet, + # come back to this later. + # Some dependencies aren't in our graph, so make sure to filter + # those out + deps = set(task_def.get("dependencies", [])) & alltasks + if any((d not in fs or not fs[d].done()) for d in deps): + continue + + submit(task_id, taskid_to_label[task_id], task_def) + to_remove.add(task_id) + + # Schedule tasks as many times as task_duplicates indicates + attributes = taskgraph.tasks[task_id].attributes + for i in range(1, attributes.get("task_duplicates", 1)): + # We use slugid() since we want a distinct task id + submit(slugid(), taskid_to_label[task_id], task_def) + tasklist.difference_update(to_remove) + + # as each of those futures complete, try to schedule more tasks + for f in futures.as_completed(new): + schedule_tasks() + + # start scheduling tasks and run until everything is scheduled + schedule_tasks() + + # check the result of each future, raising an exception if it failed + for f in futures.as_completed(fs.values()): + f.result() + + +def create_task(session, task_id, label, task_def): + # create the task using 'http://taskcluster/queue', which is proxied to the queue service + # with credentials appropriate to this job. + + # Resolve timestamps + now = current_json_time(datetime_format=True) + task_def = resolve_timestamps(now, task_def) + + if testing: + json.dump( + [task_id, task_def], + sys.stdout, + sort_keys=True, + indent=4, + separators=(",", ": "), + ) + # add a newline + print("") + return + + logger.info(f"Creating task with taskId {task_id} for {label}") + res = session.put(f"http://taskcluster/queue/v1/task/{task_id}", json=task_def) + if res.status_code != 200: + try: + logger.error(res.json()["message"]) + except Exception: + logger.error(res.text) + res.raise_for_status() |