# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import argparse import os import requests import urlparse treeherder_base = "https://treeherder.mozilla.org/" """Simple script for downloading structured logs from treeherder. For the moment this is specialised to work with web-platform-tests logs; in due course it should move somewhere generic and get hooked up to mach or similar""" # Interpretation of the "job" list from # https://github.com/mozilla/treeherder-service/blob/master/treeherder/webapp/api/utils.py#L18 def create_parser(): parser = argparse.ArgumentParser() parser.add_argument("branch", action="store", help="Branch on which jobs ran") parser.add_argument("commit", action="store", help="Commit hash for push") return parser def download(url, prefix, dest, force_suffix=True): if dest is None: dest = "." if prefix and not force_suffix: name = os.path.join(dest, prefix + ".log") else: name = None counter = 0 while not name or os.path.exists(name): counter += 1 sep = "" if not prefix else "-" name = os.path.join(dest, prefix + sep + str(counter) + ".log") with open(name, "wb") as f: resp = requests.get(url, stream=True) for chunk in resp.iter_content(1024): f.write(chunk) def fetch_json(url, params=None): headers = { "Accept": "application/json", "User-Agent": "wpt-fetchlogs", } response = requests.get(url=url, params=params, headers=headers, timeout=30) response.raise_for_status() return response.json() def get_blobber_url(branch, job): job_guid = job["job_guid"] artifact_url = urlparse.urljoin(treeherder_base, "/api/jobdetail/") artifact_params = { "job_guid": job_guid, } job_data = fetch_json(artifact_url, params=artifact_params) if job_data: try: for item in job_data["results"]: if item["value"] == "wpt_raw.log" or item["value"] == "log_raw.log": return item["url"] except Exception: return None def get_structured_logs(branch, commit, dest=None): resultset_url = urlparse.urljoin( treeherder_base, "/api/project/%s/resultset/" % branch ) resultset_params = { "revision": commit, } revision_data = fetch_json(resultset_url, params=resultset_params) result_set = revision_data["results"][0]["id"] jobs_url = urlparse.urljoin(treeherder_base, "/api/project/%s/jobs/" % branch) jobs_params = { "result_set_id": result_set, "count": 2000, "exclusion_profile": "false", } job_data = fetch_json(jobs_url, params=jobs_params) tasks = [] for result in job_data["results"]: job_type_name = result["job_type_name"] if ( job_type_name.startswith("W3C Web Platform") or job_type_name.startswith("test-") and "-web-platform-tests-" in job_type_name ): url = get_blobber_url(branch, result) if url: prefix = result["platform"] # platform tasks.append((url, prefix, None)) for task in tasks: download(*task) def main(): parser = create_parser() args = parser.parse_args() get_structured_logs(args.branch, args.commit) if __name__ == "__main__": main()