testing/web-platform/tests/tools/ci/tc/download.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

# mypy: allow-untyped-defs

import argparse
import os
import logging

import requests

import github


logging.basicConfig()
logger = logging.getLogger("tc-download")

# The root URL of the Taskcluster deployment from which to download wpt reports
# (after https://bugzilla.mozilla.org/show_bug.cgi?id=1574668 lands, this will
# be https://community-tc.services.mozilla.com)
TASKCLUSTER_ROOT_URL = 'https://taskcluster.net'


def get_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument("--ref", action="store", default="master",
                        help="Branch (in the GitHub repository) or commit to fetch logs for")
    parser.add_argument("--artifact-name", action="store", default="wpt_report.json.gz",
                        help="Log type to fetch")
    parser.add_argument("--repo-name", action="store", default="web-platform-tests/wpt",
                        help="GitHub repo name in the format owner/repo. "
                        "This must be the repo from which the Taskcluster run was scheduled "
                        "(for PRs this is the repo into which the PR would merge)")
    parser.add_argument("--token-file", action="store",
                        help="File containing GitHub token")
    parser.add_argument("--out-dir", action="store", default=".",
                        help="Path to save the logfiles")
    return parser


def get_json(url, key=None):
    resp = requests.get(url)
    resp.raise_for_status()
    data = resp.json()
    if key:
        data = data[key]
    return data


def get(url, dest, name):
    resp = requests.get(url)
    resp.raise_for_status()
    path = os.path.join(dest, name)
    with open(path, "w") as f:
        f.write(resp.content)
    return path


def run(*args, **kwargs):
    if not os.path.exists(kwargs["out_dir"]):
        os.mkdir(kwargs["out_dir"])

    if kwargs["token_file"]:
        with open(kwargs["token_file"]) as f:
            gh = github.Github(f.read().strip())
    else:
        gh = github.Github()

    repo = gh.get_repo(kwargs["repo_name"])
    commit = repo.get_commit(kwargs["ref"])
    statuses = commit.get_statuses()
    taskgroups = set()

    for status in statuses:
        if not status.context.startswith("Taskcluster "):
            continue
        if status.state == "pending":
            continue
        taskgroup_id = status.target_url.rsplit("/", 1)[1]
        taskgroups.add(taskgroup_id)

    if not taskgroups:
        logger.error("No complete Taskcluster runs found for ref %s" % kwargs["ref"])
        return 1

    for taskgroup in taskgroups:
        if TASKCLUSTER_ROOT_URL == 'https://taskcluster.net':
            # NOTE: this condition can be removed after November 9, 2019
            taskgroup_url = "https://queue.taskcluster.net/v1/task-group/%s/list"
            artifacts_list_url = "https://queue.taskcluster.net/v1/task/%s/artifacts"
        else:
            taskgroup_url = TASKCLUSTER_ROOT_URL + "/api/queue/v1/task-group/%s/list"
            artifacts_list_url = TASKCLUSTER_ROOT_URL + "/api/queue/v1/task/%s/artifacts"
        tasks = get_json(taskgroup_url % taskgroup, "tasks")
        for task in tasks:
            task_id = task["status"]["taskId"]
            url = artifacts_list_url % (task_id,)
            for artifact in get_json(url, "artifacts"):
                if artifact["name"].endswith(kwargs["artifact_name"]):
                    filename = "%s-%s-%s" % (task["task"]["metadata"]["name"],
                                             task_id,
                                             kwargs["artifact_name"])
                    path = get("%s/%s" % (url, artifact["name"]), kwargs["out_dir"], filename)
                    logger.info(path)


def main():
    kwargs = get_parser().parse_args()

    run(None, vars(kwargs))


if __name__ == "__main__":
    main()  # type: ignore