diff options
Diffstat (limited to 'testing/talos/talos/heavy.py')
-rw-r--r-- | testing/talos/talos/heavy.py | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/testing/talos/talos/heavy.py b/testing/talos/talos/heavy.py new file mode 100644 index 0000000000..982a08f05a --- /dev/null +++ b/testing/talos/talos/heavy.py @@ -0,0 +1,146 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Downloads Heavy profiles from TaskCluster. +""" +import datetime +import functools +import os +import tarfile +from email.utils import parsedate + +import requests +from mozlog import get_proxy_logger +from requests.adapters import HTTPAdapter + +LOG = get_proxy_logger() +TC_LINK = ( + "https://index.taskcluster.net/v1/task/garbage.heavyprofile/" + "artifacts/public/today-%s.tgz" +) + + +class ProgressBar(object): + def __init__(self, size, template="\r%d%%"): + self.size = size + self.current = 0 + self.tens = 0 + self.template = template + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + return False + + def incr(self): + if self.current == self.size: + return + percent = float(self.current) / float(self.size) * 100 + tens, __ = divmod(percent, 10) + if tens > self.tens: + LOG.info(self.template % percent) + self.tens = tens + + self.current += 1 + + +def follow_redirects(url, max=3): + location = url + current = 0 + page = requests.head(url) + while page.status_code == 303 and current < max: + current += 1 + location = page.headers["Location"] + page = requests.head(location) + if page.status_code == 303 and current == max: + raise ValueError("Max redirects Reached") + + last_modified = page.headers.get("Last-Modified", None) + if last_modified is not None: + last_modified = datetime.datetime(*parsedate(last_modified)[:6]) + return location, last_modified + + +def _recursive_mtime(path): + max = os.path.getmtime(path) + for root, dirs, files in os.walk(path): + for element in dirs + files: + age = os.path.getmtime(os.path.join(root, element)) + if age > max: + max = age + return max + + +def profile_age(profile_dir, last_modified=None): + if last_modified is None: + last_modified = datetime.datetime.now() + + profile_ts = _recursive_mtime(profile_dir) + profile_ts = datetime.datetime.fromtimestamp(profile_ts) + return (last_modified - profile_ts).days + + +def download_profile(name, profiles_dir=None): + if profiles_dir is None: + profiles_dir = os.path.join(os.path.expanduser("~"), ".mozilla", "profiles") + profiles_dir = os.path.abspath(profiles_dir) + if not os.path.exists(profiles_dir): + os.makedirs(profiles_dir) + + target = os.path.join(profiles_dir, name) + url = TC_LINK % name + cache_dir = os.path.join(profiles_dir, ".cache") + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + + archive_file = os.path.join(cache_dir, "today-%s.tgz" % name) + + url, last_modified = follow_redirects(url) + if os.path.exists(target): + age = profile_age(target, last_modified) + if age < 7: + # profile is not older than a week, we're good + LOG.info("Local copy of %r is fresh enough" % name) + LOG.info("%d days old" % age) + return target + + LOG.info("Downloading from %r" % url) + session = requests.Session() + session.mount("https://", HTTPAdapter(max_retries=5)) + req = session.get(url, stream=True, timeout=20) + req.raise_for_status() + + total_length = int(req.headers.get("content-length")) + + # XXX implement Range to resume download on disconnects + template = "Download progress %d%%" + with open(archive_file, "wb") as f: + iter = req.iter_content(chunk_size=1024) + # pylint --py3k W1619 + size = total_length / 1024 + 1 + with ProgressBar(size=size, template=template) as bar: + for chunk in iter: + if chunk: + f.write(chunk) + bar.incr() + + LOG.info("Extracting profile in %r" % target) + template = "Extraction progress %d%%" + + with tarfile.open(archive_file, "r:gz") as tar: + LOG.info("Checking the tarball content...") + size = len(list(tar)) + with ProgressBar(size=size, template=template) as bar: + + def _extract(self, *args, **kw): + bar.incr() + return self.old(*args, **kw) + + tar.old = tar.extract + tar.extract = functools.partial(_extract, tar) + tar.extractall(target) + LOG.info("Profile downloaded.") + return target |