summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/tools/ci/tc/decision.py
diff options
context:
space:
mode:
Diffstat (limited to 'testing/web-platform/tests/tools/ci/tc/decision.py')
-rw-r--r--testing/web-platform/tests/tools/ci/tc/decision.py404
1 files changed, 404 insertions, 0 deletions
diff --git a/testing/web-platform/tests/tools/ci/tc/decision.py b/testing/web-platform/tests/tools/ci/tc/decision.py
new file mode 100644
index 0000000000..0a6d03ab6c
--- /dev/null
+++ b/testing/web-platform/tests/tools/ci/tc/decision.py
@@ -0,0 +1,404 @@
+# mypy: allow-untyped-defs
+
+import argparse
+import json
+import logging
+import os
+import re
+import subprocess
+from collections import OrderedDict
+
+import taskcluster
+
+from . import taskgraph
+
+
+here = os.path.abspath(os.path.dirname(__file__))
+
+
+logging.basicConfig()
+logger = logging.getLogger()
+
+
+def get_triggers(event):
+ # Set some variables that we use to get the commits on the current branch
+ ref_prefix = "refs/heads/"
+ is_pr = "pull_request" in event
+ branch = None
+ if not is_pr and "ref" in event:
+ branch = event["ref"]
+ if branch.startswith(ref_prefix):
+ branch = branch[len(ref_prefix):]
+
+ return is_pr, branch
+
+
+def fetch_event_data(queue):
+ try:
+ task_id = os.environ["TASK_ID"]
+ except KeyError:
+ logger.warning("Missing TASK_ID environment variable")
+ # For example under local testing
+ return None
+
+ task_data = queue.task(task_id)
+
+ return task_data.get("extra", {}).get("github_event")
+
+
+def filter_triggers(event, all_tasks):
+ is_pr, branch = get_triggers(event)
+ triggered = OrderedDict()
+ for name, task in all_tasks.items():
+ if "trigger" in task:
+ if is_pr and "pull-request" in task["trigger"]:
+ triggered[name] = task
+ elif branch is not None and "branch" in task["trigger"]:
+ for trigger_branch in task["trigger"]["branch"]:
+ if (trigger_branch == branch or
+ trigger_branch.endswith("*") and branch.startswith(trigger_branch[:-1])):
+ triggered[name] = task
+ logger.info("Triggers match tasks:\n * %s" % "\n * ".join(triggered.keys()))
+ return triggered
+
+
+def get_run_jobs(event):
+ from tools.ci import jobs
+ revish = "%s..%s" % (event["pull_request"]["base"]["sha"]
+ if "pull_request" in event
+ else event["before"],
+ event["pull_request"]["head"]["sha"]
+ if "pull_request" in event
+ else event["after"])
+ logger.info("Looking for changes in range %s" % revish)
+ paths = jobs.get_paths(revish=revish)
+ logger.info("Found changes in paths:%s" % "\n".join(paths))
+ path_jobs = jobs.get_jobs(paths)
+ all_jobs = path_jobs | get_extra_jobs(event)
+ logger.info("Including jobs:\n * %s" % "\n * ".join(all_jobs))
+ return all_jobs
+
+
+def get_extra_jobs(event):
+ body = None
+ jobs = set()
+ if "commits" in event and event["commits"]:
+ body = event["commits"][0]["message"]
+ elif "pull_request" in event:
+ body = event["pull_request"]["body"]
+
+ if not body:
+ return jobs
+
+ regexp = re.compile(r"\s*tc-jobs:(.*)$")
+
+ for line in body.splitlines():
+ m = regexp.match(line)
+ if m:
+ items = m.group(1)
+ for item in items.split(","):
+ jobs.add(item.strip())
+ break
+ return jobs
+
+
+def filter_excluded_users(tasks, event):
+ # Some users' pull requests are excluded from tasks,
+ # such as pull requests from automated exports.
+ try:
+ submitter = event["pull_request"]["user"]["login"]
+ except KeyError:
+ # Just ignore excluded users if the
+ # username cannot be pulled from the event.
+ logger.debug("Unable to read username from event. Continuing.")
+ return
+
+ excluded_tasks = []
+ # A separate list of items for tasks is needed to iterate over
+ # because removing an item during iteration will raise an error.
+ for name, task in list(tasks.items()):
+ if submitter in task.get("exclude-users", []):
+ excluded_tasks.append(name)
+ tasks.pop(name) # removing excluded task
+ if excluded_tasks:
+ logger.info(
+ f"Tasks excluded for user {submitter}:\n * " +
+ "\n * ".join(excluded_tasks)
+ )
+
+
+def filter_schedule_if(event, tasks):
+ scheduled = OrderedDict()
+ run_jobs = None
+ for name, task in tasks.items():
+ if "schedule-if" in task:
+ if "run-job" in task["schedule-if"]:
+ if run_jobs is None:
+ run_jobs = get_run_jobs(event)
+ if "all" in run_jobs or any(item in run_jobs for item in task["schedule-if"]["run-job"]):
+ scheduled[name] = task
+ else:
+ scheduled[name] = task
+ logger.info("Scheduling rules match tasks:\n * %s" % "\n * ".join(scheduled.keys()))
+ return scheduled
+
+
+def get_fetch_rev(event):
+ is_pr, _ = get_triggers(event)
+ if is_pr:
+ # Try to get the actual rev so that all non-decision tasks are pinned to that
+ rv = ["refs/pull/%s/merge" % event["pull_request"]["number"]]
+ # For every PR GitHub maintains a 'head' branch with commits from the
+ # PR, and a 'merge' branch containing a merge commit between the base
+ # branch and the PR.
+ for ref_type in ["head", "merge"]:
+ ref = "refs/pull/%s/%s" % (event["pull_request"]["number"], ref_type)
+ sha = None
+ try:
+ output = subprocess.check_output(["git", "ls-remote", "origin", ref])
+ except subprocess.CalledProcessError:
+ import traceback
+ logger.error(traceback.format_exc())
+ logger.error("Failed to get commit sha1 for %s" % ref)
+ else:
+ if not output:
+ logger.error("Failed to get commit for %s" % ref)
+ else:
+ sha = output.decode("utf-8").split()[0]
+ rv.append(sha)
+ rv = tuple(rv)
+ else:
+ # For a branch push we have a ref and a head but no merge SHA
+ rv = (event["ref"], event["after"], None)
+ assert len(rv) == 3
+ return rv
+
+
+def build_full_command(event, task):
+ fetch_ref, head_sha, merge_sha = get_fetch_rev(event)
+ cmd_args = {
+ "task_name": task["name"],
+ "repo_url": event["repository"]["clone_url"],
+ "fetch_ref": fetch_ref,
+ "task_cmd": task["command"],
+ "install_str": "",
+ }
+
+ options = task.get("options", {})
+ options_args = []
+ options_args.append("--ref=%s" % fetch_ref)
+ if head_sha is not None:
+ options_args.append("--head-rev=%s" % head_sha)
+ if merge_sha is not None:
+ options_args.append("--merge-rev=%s" % merge_sha)
+ if options.get("oom-killer"):
+ options_args.append("--oom-killer")
+ if options.get("xvfb"):
+ options_args.append("--xvfb")
+ if not options.get("hosts"):
+ options_args.append("--no-hosts")
+ else:
+ options_args.append("--hosts")
+ # Check out the expected SHA unless it is overridden (e.g. to base_head).
+ if options.get("checkout"):
+ options_args.append("--checkout=%s" % options["checkout"])
+ for browser in options.get("browser", []):
+ options_args.append("--browser=%s" % browser)
+ if options.get("channel"):
+ options_args.append("--channel=%s" % options["channel"])
+ if options.get("install-certificates"):
+ options_args.append("--install-certificates")
+
+ cmd_args["options_str"] = " ".join(str(item) for item in options_args)
+
+ install_packages = task.get("install")
+ if install_packages:
+ install_items = ["apt update -qqy"]
+ install_items.extend("apt install -qqy %s" % item
+ for item in install_packages)
+ cmd_args["install_str"] = "\n".join("sudo %s;" % item for item in install_items)
+
+ return ["/bin/bash",
+ "--login",
+ "-xc",
+ """
+~/start.sh \
+ %(repo_url)s \
+ %(fetch_ref)s;
+%(install_str)s
+cd web-platform-tests;
+./tools/ci/run_tc.py %(options_str)s -- %(task_cmd)s;
+""" % cmd_args]
+
+
+def get_owner(event):
+ if "pusher" in event:
+ pusher = event.get("pusher", {}).get("email", "")
+ if pusher and "@" in pusher:
+ return pusher
+ return "web-platform-tests@users.noreply.github.com"
+
+
+def create_tc_task(event, task, taskgroup_id, depends_on_ids, env_extra=None):
+ command = build_full_command(event, task)
+ task_id = taskcluster.slugId()
+ task_data = {
+ "taskGroupId": taskgroup_id,
+ "created": taskcluster.fromNowJSON(""),
+ "deadline": taskcluster.fromNowJSON(task["deadline"]),
+ "provisionerId": task["provisionerId"],
+ "schedulerId": task["schedulerId"],
+ "workerType": task["workerType"],
+ "metadata": {
+ "name": task["name"],
+ "description": task.get("description", ""),
+ "owner": get_owner(event),
+ "source": event["repository"]["clone_url"]
+ },
+ "payload": {
+ "artifacts": task.get("artifacts"),
+ "command": command,
+ "image": task.get("image"),
+ "maxRunTime": task.get("maxRunTime"),
+ "env": task.get("env", {}),
+ },
+ "extra": {
+ "github_event": json.dumps(event)
+ },
+ "routes": ["checks"]
+ }
+ if "extra" in task:
+ task_data["extra"].update(task["extra"])
+ if env_extra:
+ task_data["payload"]["env"].update(env_extra)
+ if depends_on_ids:
+ task_data["dependencies"] = depends_on_ids
+ task_data["requires"] = task.get("requires", "all-completed")
+ return task_id, task_data
+
+
+def get_artifact_data(artifact, task_id_map):
+ task_id, data = task_id_map[artifact["task"]]
+ return {
+ "task": task_id,
+ "glob": artifact["glob"],
+ "dest": artifact["dest"],
+ "extract": artifact.get("extract", False)
+ }
+
+
+def build_task_graph(event, all_tasks, tasks):
+ task_id_map = OrderedDict()
+ taskgroup_id = os.environ.get("TASK_ID", taskcluster.slugId())
+
+ def add_task(task_name, task):
+ depends_on_ids = []
+ if "depends-on" in task:
+ for depends_name in task["depends-on"]:
+ if depends_name not in task_id_map:
+ add_task(depends_name,
+ all_tasks[depends_name])
+ depends_on_ids.append(task_id_map[depends_name][0])
+ env_extra = {}
+ if "download-artifacts" in task:
+ env_extra["TASK_ARTIFACTS"] = json.dumps(
+ [get_artifact_data(artifact, task_id_map)
+ for artifact in task["download-artifacts"]])
+
+ task_id, task_data = create_tc_task(event, task, taskgroup_id, depends_on_ids,
+ env_extra=env_extra)
+ task_id_map[task_name] = (task_id, task_data)
+
+ for task_name, task in tasks.items():
+ if task_name == "sink-task":
+ # sink-task will be created below at the end of the ordered dict,
+ # so that it can depend on all other tasks.
+ continue
+ add_task(task_name, task)
+
+ # GitHub branch protection for pull requests needs us to name explicit
+ # required tasks - which doesn't suffice when using a dynamic task graph.
+ # To work around this we declare a sink task that depends on all the other
+ # tasks completing, and checks if they have succeeded. We can then
+ # make the sink task the sole required task for pull requests.
+ sink_task = tasks.get("sink-task")
+ if sink_task:
+ logger.info("Scheduling sink-task")
+ depends_on_ids = [x[0] for x in task_id_map.values()]
+ sink_task["command"] += " {}".format(" ".join(depends_on_ids))
+ task_id_map["sink-task"] = create_tc_task(
+ event, sink_task, taskgroup_id, depends_on_ids)
+ else:
+ logger.info("sink-task is not scheduled")
+
+ return task_id_map
+
+
+def create_tasks(queue, task_id_map):
+ for (task_id, task_data) in task_id_map.values():
+ queue.createTask(task_id, task_data)
+
+
+def get_event(queue, event_path):
+ if event_path is not None:
+ try:
+ with open(event_path) as f:
+ event_str = f.read()
+ except OSError:
+ logger.error("Missing event file at path %s" % event_path)
+ raise
+ elif "TASK_EVENT" in os.environ:
+ event_str = os.environ["TASK_EVENT"]
+ else:
+ event_str = fetch_event_data(queue)
+ if not event_str:
+ raise ValueError("Can't find GitHub event definition; for local testing pass --event-path")
+ try:
+ return json.loads(event_str)
+ except ValueError:
+ logger.error("Event was not valid JSON")
+ raise
+
+
+def decide(event):
+ all_tasks = taskgraph.load_tasks_from_path(os.path.join(here, "tasks", "test.yml"))
+
+ triggered_tasks = filter_triggers(event, all_tasks)
+ scheduled_tasks = filter_schedule_if(event, triggered_tasks)
+ filter_excluded_users(scheduled_tasks, event)
+
+ logger.info("UNSCHEDULED TASKS:\n %s" % "\n ".join(sorted(set(all_tasks.keys()) -
+ set(scheduled_tasks.keys()))))
+ logger.info("SCHEDULED TASKS:\n %s" % "\n ".join(sorted(scheduled_tasks.keys())))
+
+ task_id_map = build_task_graph(event, all_tasks, scheduled_tasks)
+ return task_id_map
+
+
+def get_parser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--event-path",
+ help="Path to file containing serialized GitHub event")
+ parser.add_argument("--dry-run", action="store_true",
+ help="Don't actually create the tasks, just output the tasks that "
+ "would be created")
+ parser.add_argument("--tasks-path",
+ help="Path to file in which to write payload for all scheduled tasks")
+ return parser
+
+
+def run(venv, **kwargs):
+ queue = taskcluster.Queue({'rootUrl': os.environ['TASKCLUSTER_PROXY_URL']})
+ event = get_event(queue, event_path=kwargs["event_path"])
+
+ task_id_map = decide(event)
+
+ try:
+ if not kwargs["dry_run"]:
+ create_tasks(queue, task_id_map)
+ else:
+ print(json.dumps(task_id_map, indent=2))
+ finally:
+ if kwargs["tasks_path"]:
+ with open(kwargs["tasks_path"], "w") as f:
+ json.dump(task_id_map, f, indent=2)