diff options
Diffstat (limited to 'testing/web-platform/manifestdownload.py')
-rw-r--r-- | testing/web-platform/manifestdownload.py | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/testing/web-platform/manifestdownload.py b/testing/web-platform/manifestdownload.py new file mode 100644 index 0000000000..cc26e89736 --- /dev/null +++ b/testing/web-platform/manifestdownload.py @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import tarfile +from datetime import datetime, timedelta + +import mozversioncontrol +import requests +import six + +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + +HEADERS = {"User-Agent": "wpt manifest download"} + + +def get(logger, url, **kwargs): + logger.debug(url) + if "headers" not in kwargs: + kwargs["headers"] = HEADERS + return requests.get(url, **kwargs) + + +def abs_path(path): + return os.path.abspath(os.path.expanduser(path)) + + +def get_commits(logger, repo_root): + try: + repo = mozversioncontrol.get_repository_object(repo_root) + except mozversioncontrol.InvalidRepoPath: + logger.warning("No VCS found for path %s" % repo_root) + return [] + + # The base_ref doesn't actually return a ref, sadly + base_rev = repo.base_ref + if repo.name == "git": + logger.debug("Found git repo") + logger.debug("Base rev is %s" % base_rev) + if not repo.has_git_cinnabar: + logger.error("git cinnabar not found") + return [] + changeset_iter = ( + repo._run("cinnabar", "git2hg", rev).strip() + for rev in repo._run( + "log", + "--format=%H", + "-n50", + base_rev, + "testing/web-platform/tests", + "testing/web-platform/mozilla/tests", + ).splitlines() + ) + else: + logger.debug("Found hg repo") + logger.debug("Base rev is %s" % base_rev) + changeset_iter = repo._run( + "log", + "-fl50", + "--template={node}\n", + "-r", + base_rev, + "testing/web-platform/tests", + "testing/web-platform/mozilla/tests", + ).splitlines() + return changeset_iter + + +def should_download(logger, manifest_paths, rebuild_time=timedelta(days=5)): + # TODO: Improve logic for when to download. Maybe if x revisions behind? + for manifest_path in manifest_paths: + if not os.path.exists(manifest_path): + return True + mtime = datetime.fromtimestamp(os.path.getmtime(manifest_path)) + if mtime < datetime.now() - rebuild_time: + return True + if os.path.getsize(manifest_path) == 0: + return True + + logger.info("Skipping manifest download because existing file is recent") + return False + + +def taskcluster_url(logger, commits): + artifact_path = "/artifacts/public/manifests.tar.gz" + + repos = { + "mozilla-central": "mozilla-central", + "integration/autoland": "autoland", + "releases/mozilla-esr115": "mozilla-esr115", + } + cset_url = ( + "https://hg.mozilla.org/{repo}/json-pushes?" + "changeset={changeset}&version=2&tipsonly=1" + ) + + tc_url = ( + "https://firefox-ci-tc.services.mozilla.com/api/index/v1/" + "task/gecko.v2.{name}." + "revision.{changeset}.source.manifest-upload" + ) + + default = ( + "https://firefox-ci-tc.services.mozilla.com/api/index/v1/" + "task/gecko.v2.mozilla-central.latest.source.manifest-upload" + artifact_path + ) + + for revision in commits: + req = None + + if revision == 40 * "0": + continue + + for repo_path, index_name in six.iteritems(repos): + try: + req_headers = HEADERS.copy() + req_headers.update({"Accept": "application/json"}) + req = get( + logger, + cset_url.format(changeset=revision, repo=repo_path), + headers=req_headers, + ) + req.raise_for_status() + except requests.exceptions.RequestException: + if req is not None and req.status_code == 404: + # The API returns a 404 if it can't find a changeset for the revision. + logger.debug("%s not found in %s" % (revision, repo_path)) + continue + else: + return default + + result = req.json() + + pushes = result["pushes"] + if not pushes: + logger.debug("Error reading response; 'pushes' key not found") + continue + [cset] = next(iter(pushes.values()))["changesets"] + + tc_index_url = tc_url.format(changeset=cset, name=index_name) + try: + req = get(logger, tc_index_url) + except requests.exceptions.RequestException: + return default + + if req.status_code == 200: + return tc_index_url + artifact_path + + logger.info( + "Can't find a commit-specific manifest so just using the most " "recent one" + ) + + return default + + +def download_manifest(logger, test_paths, commits_func, url_func, force=False): + manifest_paths = [ + (item["manifest_path"] if isinstance(item, dict) else item.manifest_path) + for item in test_paths.values() + ] + + if not force and not should_download(logger, manifest_paths): + return True + + commits = commits_func() + + url = url_func(logger, commits) + if not url: + logger.warning("No generated manifest found") + return False + + logger.info("Downloading manifest from %s" % url) + try: + req = get(logger, url) + except Exception: + logger.warning("Downloading pregenerated manifest failed") + return False + + if req.status_code != 200: + logger.warning( + "Downloading pregenerated manifest failed; got " + "HTTP status %d" % req.status_code + ) + return False + + tar = tarfile.open(mode="r:gz", fileobj=BytesIO(req.content)) + for paths in six.itervalues(test_paths): + manifest_rel_path = ( + paths["manifest_rel_path"] + if isinstance(paths, dict) + else paths.manifest_rel_path + ) + manifest_path = ( + paths["manifest_path"] if isinstance(paths, dict) else paths.manifest_path + ) + + try: + member = tar.getmember(manifest_rel_path.replace(os.path.sep, "/")) + except KeyError: + logger.warning("Failed to find downloaded manifest %s" % manifest_rel_path) + else: + try: + logger.debug("Unpacking %s to %s" % (member.name, manifest_path)) + src = tar.extractfile(member) + with open(manifest_path, "wb") as dest: + dest.write(src.read()) + src.close() + except IOError: + import traceback + + logger.warning( + "Failed to decompress %s:\n%s" + % (manifest_rel_path, traceback.format_exc()) + ) + return False + + os.utime(manifest_path, None) + + return True + + +def download_from_taskcluster(logger, repo_root, test_paths, force=False): + return download_manifest( + logger, + test_paths, + lambda: get_commits(logger, repo_root), + taskcluster_url, + force, + ) |