diff options
Diffstat (limited to 'testing/web-platform/tests/tools/ci/tc/decision.py')
-rw-r--r-- | testing/web-platform/tests/tools/ci/tc/decision.py | 404 |
1 files changed, 404 insertions, 0 deletions
diff --git a/testing/web-platform/tests/tools/ci/tc/decision.py b/testing/web-platform/tests/tools/ci/tc/decision.py new file mode 100644 index 0000000000..0a6d03ab6c --- /dev/null +++ b/testing/web-platform/tests/tools/ci/tc/decision.py @@ -0,0 +1,404 @@ +# mypy: allow-untyped-defs + +import argparse +import json +import logging +import os +import re +import subprocess +from collections import OrderedDict + +import taskcluster + +from . import taskgraph + + +here = os.path.abspath(os.path.dirname(__file__)) + + +logging.basicConfig() +logger = logging.getLogger() + + +def get_triggers(event): + # Set some variables that we use to get the commits on the current branch + ref_prefix = "refs/heads/" + is_pr = "pull_request" in event + branch = None + if not is_pr and "ref" in event: + branch = event["ref"] + if branch.startswith(ref_prefix): + branch = branch[len(ref_prefix):] + + return is_pr, branch + + +def fetch_event_data(queue): + try: + task_id = os.environ["TASK_ID"] + except KeyError: + logger.warning("Missing TASK_ID environment variable") + # For example under local testing + return None + + task_data = queue.task(task_id) + + return task_data.get("extra", {}).get("github_event") + + +def filter_triggers(event, all_tasks): + is_pr, branch = get_triggers(event) + triggered = OrderedDict() + for name, task in all_tasks.items(): + if "trigger" in task: + if is_pr and "pull-request" in task["trigger"]: + triggered[name] = task + elif branch is not None and "branch" in task["trigger"]: + for trigger_branch in task["trigger"]["branch"]: + if (trigger_branch == branch or + trigger_branch.endswith("*") and branch.startswith(trigger_branch[:-1])): + triggered[name] = task + logger.info("Triggers match tasks:\n * %s" % "\n * ".join(triggered.keys())) + return triggered + + +def get_run_jobs(event): + from tools.ci import jobs + revish = "%s..%s" % (event["pull_request"]["base"]["sha"] + if "pull_request" in event + else event["before"], + event["pull_request"]["head"]["sha"] + if "pull_request" in event + else event["after"]) + logger.info("Looking for changes in range %s" % revish) + paths = jobs.get_paths(revish=revish) + logger.info("Found changes in paths:%s" % "\n".join(paths)) + path_jobs = jobs.get_jobs(paths) + all_jobs = path_jobs | get_extra_jobs(event) + logger.info("Including jobs:\n * %s" % "\n * ".join(all_jobs)) + return all_jobs + + +def get_extra_jobs(event): + body = None + jobs = set() + if "commits" in event and event["commits"]: + body = event["commits"][0]["message"] + elif "pull_request" in event: + body = event["pull_request"]["body"] + + if not body: + return jobs + + regexp = re.compile(r"\s*tc-jobs:(.*)$") + + for line in body.splitlines(): + m = regexp.match(line) + if m: + items = m.group(1) + for item in items.split(","): + jobs.add(item.strip()) + break + return jobs + + +def filter_excluded_users(tasks, event): + # Some users' pull requests are excluded from tasks, + # such as pull requests from automated exports. + try: + submitter = event["pull_request"]["user"]["login"] + except KeyError: + # Just ignore excluded users if the + # username cannot be pulled from the event. + logger.debug("Unable to read username from event. Continuing.") + return + + excluded_tasks = [] + # A separate list of items for tasks is needed to iterate over + # because removing an item during iteration will raise an error. + for name, task in list(tasks.items()): + if submitter in task.get("exclude-users", []): + excluded_tasks.append(name) + tasks.pop(name) # removing excluded task + if excluded_tasks: + logger.info( + f"Tasks excluded for user {submitter}:\n * " + + "\n * ".join(excluded_tasks) + ) + + +def filter_schedule_if(event, tasks): + scheduled = OrderedDict() + run_jobs = None + for name, task in tasks.items(): + if "schedule-if" in task: + if "run-job" in task["schedule-if"]: + if run_jobs is None: + run_jobs = get_run_jobs(event) + if "all" in run_jobs or any(item in run_jobs for item in task["schedule-if"]["run-job"]): + scheduled[name] = task + else: + scheduled[name] = task + logger.info("Scheduling rules match tasks:\n * %s" % "\n * ".join(scheduled.keys())) + return scheduled + + +def get_fetch_rev(event): + is_pr, _ = get_triggers(event) + if is_pr: + # Try to get the actual rev so that all non-decision tasks are pinned to that + rv = ["refs/pull/%s/merge" % event["pull_request"]["number"]] + # For every PR GitHub maintains a 'head' branch with commits from the + # PR, and a 'merge' branch containing a merge commit between the base + # branch and the PR. + for ref_type in ["head", "merge"]: + ref = "refs/pull/%s/%s" % (event["pull_request"]["number"], ref_type) + sha = None + try: + output = subprocess.check_output(["git", "ls-remote", "origin", ref]) + except subprocess.CalledProcessError: + import traceback + logger.error(traceback.format_exc()) + logger.error("Failed to get commit sha1 for %s" % ref) + else: + if not output: + logger.error("Failed to get commit for %s" % ref) + else: + sha = output.decode("utf-8").split()[0] + rv.append(sha) + rv = tuple(rv) + else: + # For a branch push we have a ref and a head but no merge SHA + rv = (event["ref"], event["after"], None) + assert len(rv) == 3 + return rv + + +def build_full_command(event, task): + fetch_ref, head_sha, merge_sha = get_fetch_rev(event) + cmd_args = { + "task_name": task["name"], + "repo_url": event["repository"]["clone_url"], + "fetch_ref": fetch_ref, + "task_cmd": task["command"], + "install_str": "", + } + + options = task.get("options", {}) + options_args = [] + options_args.append("--ref=%s" % fetch_ref) + if head_sha is not None: + options_args.append("--head-rev=%s" % head_sha) + if merge_sha is not None: + options_args.append("--merge-rev=%s" % merge_sha) + if options.get("oom-killer"): + options_args.append("--oom-killer") + if options.get("xvfb"): + options_args.append("--xvfb") + if not options.get("hosts"): + options_args.append("--no-hosts") + else: + options_args.append("--hosts") + # Check out the expected SHA unless it is overridden (e.g. to base_head). + if options.get("checkout"): + options_args.append("--checkout=%s" % options["checkout"]) + for browser in options.get("browser", []): + options_args.append("--browser=%s" % browser) + if options.get("channel"): + options_args.append("--channel=%s" % options["channel"]) + if options.get("install-certificates"): + options_args.append("--install-certificates") + + cmd_args["options_str"] = " ".join(str(item) for item in options_args) + + install_packages = task.get("install") + if install_packages: + install_items = ["apt update -qqy"] + install_items.extend("apt install -qqy %s" % item + for item in install_packages) + cmd_args["install_str"] = "\n".join("sudo %s;" % item for item in install_items) + + return ["/bin/bash", + "--login", + "-xc", + """ +~/start.sh \ + %(repo_url)s \ + %(fetch_ref)s; +%(install_str)s +cd web-platform-tests; +./tools/ci/run_tc.py %(options_str)s -- %(task_cmd)s; +""" % cmd_args] + + +def get_owner(event): + if "pusher" in event: + pusher = event.get("pusher", {}).get("email", "") + if pusher and "@" in pusher: + return pusher + return "web-platform-tests@users.noreply.github.com" + + +def create_tc_task(event, task, taskgroup_id, depends_on_ids, env_extra=None): + command = build_full_command(event, task) + task_id = taskcluster.slugId() + task_data = { + "taskGroupId": taskgroup_id, + "created": taskcluster.fromNowJSON(""), + "deadline": taskcluster.fromNowJSON(task["deadline"]), + "provisionerId": task["provisionerId"], + "schedulerId": task["schedulerId"], + "workerType": task["workerType"], + "metadata": { + "name": task["name"], + "description": task.get("description", ""), + "owner": get_owner(event), + "source": event["repository"]["clone_url"] + }, + "payload": { + "artifacts": task.get("artifacts"), + "command": command, + "image": task.get("image"), + "maxRunTime": task.get("maxRunTime"), + "env": task.get("env", {}), + }, + "extra": { + "github_event": json.dumps(event) + }, + "routes": ["checks"] + } + if "extra" in task: + task_data["extra"].update(task["extra"]) + if env_extra: + task_data["payload"]["env"].update(env_extra) + if depends_on_ids: + task_data["dependencies"] = depends_on_ids + task_data["requires"] = task.get("requires", "all-completed") + return task_id, task_data + + +def get_artifact_data(artifact, task_id_map): + task_id, data = task_id_map[artifact["task"]] + return { + "task": task_id, + "glob": artifact["glob"], + "dest": artifact["dest"], + "extract": artifact.get("extract", False) + } + + +def build_task_graph(event, all_tasks, tasks): + task_id_map = OrderedDict() + taskgroup_id = os.environ.get("TASK_ID", taskcluster.slugId()) + + def add_task(task_name, task): + depends_on_ids = [] + if "depends-on" in task: + for depends_name in task["depends-on"]: + if depends_name not in task_id_map: + add_task(depends_name, + all_tasks[depends_name]) + depends_on_ids.append(task_id_map[depends_name][0]) + env_extra = {} + if "download-artifacts" in task: + env_extra["TASK_ARTIFACTS"] = json.dumps( + [get_artifact_data(artifact, task_id_map) + for artifact in task["download-artifacts"]]) + + task_id, task_data = create_tc_task(event, task, taskgroup_id, depends_on_ids, + env_extra=env_extra) + task_id_map[task_name] = (task_id, task_data) + + for task_name, task in tasks.items(): + if task_name == "sink-task": + # sink-task will be created below at the end of the ordered dict, + # so that it can depend on all other tasks. + continue + add_task(task_name, task) + + # GitHub branch protection for pull requests needs us to name explicit + # required tasks - which doesn't suffice when using a dynamic task graph. + # To work around this we declare a sink task that depends on all the other + # tasks completing, and checks if they have succeeded. We can then + # make the sink task the sole required task for pull requests. + sink_task = tasks.get("sink-task") + if sink_task: + logger.info("Scheduling sink-task") + depends_on_ids = [x[0] for x in task_id_map.values()] + sink_task["command"] += " {}".format(" ".join(depends_on_ids)) + task_id_map["sink-task"] = create_tc_task( + event, sink_task, taskgroup_id, depends_on_ids) + else: + logger.info("sink-task is not scheduled") + + return task_id_map + + +def create_tasks(queue, task_id_map): + for (task_id, task_data) in task_id_map.values(): + queue.createTask(task_id, task_data) + + +def get_event(queue, event_path): + if event_path is not None: + try: + with open(event_path) as f: + event_str = f.read() + except OSError: + logger.error("Missing event file at path %s" % event_path) + raise + elif "TASK_EVENT" in os.environ: + event_str = os.environ["TASK_EVENT"] + else: + event_str = fetch_event_data(queue) + if not event_str: + raise ValueError("Can't find GitHub event definition; for local testing pass --event-path") + try: + return json.loads(event_str) + except ValueError: + logger.error("Event was not valid JSON") + raise + + +def decide(event): + all_tasks = taskgraph.load_tasks_from_path(os.path.join(here, "tasks", "test.yml")) + + triggered_tasks = filter_triggers(event, all_tasks) + scheduled_tasks = filter_schedule_if(event, triggered_tasks) + filter_excluded_users(scheduled_tasks, event) + + logger.info("UNSCHEDULED TASKS:\n %s" % "\n ".join(sorted(set(all_tasks.keys()) - + set(scheduled_tasks.keys())))) + logger.info("SCHEDULED TASKS:\n %s" % "\n ".join(sorted(scheduled_tasks.keys()))) + + task_id_map = build_task_graph(event, all_tasks, scheduled_tasks) + return task_id_map + + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--event-path", + help="Path to file containing serialized GitHub event") + parser.add_argument("--dry-run", action="store_true", + help="Don't actually create the tasks, just output the tasks that " + "would be created") + parser.add_argument("--tasks-path", + help="Path to file in which to write payload for all scheduled tasks") + return parser + + +def run(venv, **kwargs): + queue = taskcluster.Queue({'rootUrl': os.environ['TASKCLUSTER_PROXY_URL']}) + event = get_event(queue, event_path=kwargs["event_path"]) + + task_id_map = decide(event) + + try: + if not kwargs["dry_run"]: + create_tasks(queue, task_id_map) + else: + print(json.dumps(task_id_map, indent=2)) + finally: + if kwargs["tasks_path"]: + with open(kwargs["tasks_path"], "w") as f: + json.dump(task_id_map, f, indent=2) |