summaryrefslogtreecommitdiffstats
path: root/testing/talos/talos/heavy.py
diff options
context:
space:
mode:
Diffstat (limited to 'testing/talos/talos/heavy.py')
-rw-r--r--testing/talos/talos/heavy.py146
1 files changed, 146 insertions, 0 deletions
diff --git a/testing/talos/talos/heavy.py b/testing/talos/talos/heavy.py
new file mode 100644
index 0000000000..982a08f05a
--- /dev/null
+++ b/testing/talos/talos/heavy.py
@@ -0,0 +1,146 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+Downloads Heavy profiles from TaskCluster.
+"""
+import datetime
+import functools
+import os
+import tarfile
+from email.utils import parsedate
+
+import requests
+from mozlog import get_proxy_logger
+from requests.adapters import HTTPAdapter
+
+LOG = get_proxy_logger()
+TC_LINK = (
+ "https://index.taskcluster.net/v1/task/garbage.heavyprofile/"
+ "artifacts/public/today-%s.tgz"
+)
+
+
+class ProgressBar(object):
+ def __init__(self, size, template="\r%d%%"):
+ self.size = size
+ self.current = 0
+ self.tens = 0
+ self.template = template
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ return False
+
+ def incr(self):
+ if self.current == self.size:
+ return
+ percent = float(self.current) / float(self.size) * 100
+ tens, __ = divmod(percent, 10)
+ if tens > self.tens:
+ LOG.info(self.template % percent)
+ self.tens = tens
+
+ self.current += 1
+
+
+def follow_redirects(url, max=3):
+ location = url
+ current = 0
+ page = requests.head(url)
+ while page.status_code == 303 and current < max:
+ current += 1
+ location = page.headers["Location"]
+ page = requests.head(location)
+ if page.status_code == 303 and current == max:
+ raise ValueError("Max redirects Reached")
+
+ last_modified = page.headers.get("Last-Modified", None)
+ if last_modified is not None:
+ last_modified = datetime.datetime(*parsedate(last_modified)[:6])
+ return location, last_modified
+
+
+def _recursive_mtime(path):
+ max = os.path.getmtime(path)
+ for root, dirs, files in os.walk(path):
+ for element in dirs + files:
+ age = os.path.getmtime(os.path.join(root, element))
+ if age > max:
+ max = age
+ return max
+
+
+def profile_age(profile_dir, last_modified=None):
+ if last_modified is None:
+ last_modified = datetime.datetime.now()
+
+ profile_ts = _recursive_mtime(profile_dir)
+ profile_ts = datetime.datetime.fromtimestamp(profile_ts)
+ return (last_modified - profile_ts).days
+
+
+def download_profile(name, profiles_dir=None):
+ if profiles_dir is None:
+ profiles_dir = os.path.join(os.path.expanduser("~"), ".mozilla", "profiles")
+ profiles_dir = os.path.abspath(profiles_dir)
+ if not os.path.exists(profiles_dir):
+ os.makedirs(profiles_dir)
+
+ target = os.path.join(profiles_dir, name)
+ url = TC_LINK % name
+ cache_dir = os.path.join(profiles_dir, ".cache")
+ if not os.path.exists(cache_dir):
+ os.makedirs(cache_dir)
+
+ archive_file = os.path.join(cache_dir, "today-%s.tgz" % name)
+
+ url, last_modified = follow_redirects(url)
+ if os.path.exists(target):
+ age = profile_age(target, last_modified)
+ if age < 7:
+ # profile is not older than a week, we're good
+ LOG.info("Local copy of %r is fresh enough" % name)
+ LOG.info("%d days old" % age)
+ return target
+
+ LOG.info("Downloading from %r" % url)
+ session = requests.Session()
+ session.mount("https://", HTTPAdapter(max_retries=5))
+ req = session.get(url, stream=True, timeout=20)
+ req.raise_for_status()
+
+ total_length = int(req.headers.get("content-length"))
+
+ # XXX implement Range to resume download on disconnects
+ template = "Download progress %d%%"
+ with open(archive_file, "wb") as f:
+ iter = req.iter_content(chunk_size=1024)
+ # pylint --py3k W1619
+ size = total_length / 1024 + 1
+ with ProgressBar(size=size, template=template) as bar:
+ for chunk in iter:
+ if chunk:
+ f.write(chunk)
+ bar.incr()
+
+ LOG.info("Extracting profile in %r" % target)
+ template = "Extraction progress %d%%"
+
+ with tarfile.open(archive_file, "r:gz") as tar:
+ LOG.info("Checking the tarball content...")
+ size = len(list(tar))
+ with ProgressBar(size=size, template=template) as bar:
+
+ def _extract(self, *args, **kw):
+ bar.incr()
+ return self.old(*args, **kw)
+
+ tar.old = tar.extract
+ tar.extract = functools.partial(_extract, tar)
+ tar.extractall(target)
+ LOG.info("Profile downloaded.")
+ return target