4 files changed, 522 insertions, 0 deletions
diff --git a/third_party/python/dlmanager/dlmanager/__init__.py b/third_party/python/dlmanager/dlmanager/__init__.py
new file mode 100644
index 0000000000..0890af484a
--- /dev/null
+++ b/third_party/python/dlmanager/dlmanager/__init__.py
@@ -0,0 +1,18 @@
+import logging
+
+__version__ = "0.1.1"
+
+
+try:  # Python 2.7+
+    from logging import NullHandler
+except ImportError:
+    class NullHandler(logging.Handler):
+        def emit(self, record):
+            pass
+
+# Set default logging handler to avoid "No handler found" warnings.
+logging.getLogger(__name__).addHandler(NullHandler())
+
+# exported api
+from dlmanager.manager import Download, DownloadInterrupt, DownloadManager  # noqa
+from dlmanager.persist_limit import PersistLimit  # noqa
diff --git a/third_party/python/dlmanager/dlmanager/fs.py b/third_party/python/dlmanager/dlmanager/fs.py
new file mode 100644
index 0000000000..8908b5efce
--- /dev/null
+++ b/third_party/python/dlmanager/dlmanager/fs.py
@@ -0,0 +1,116 @@
+import errno
+import logging
+import os
+import shutil
+import stat
+import time
+
+"""
+File system utilities, copied from mozfile.
+"""
+
+LOG = logging.getLogger(__name__)
+
+
+def _call_windows_retry(func, args=(), retry_max=5, retry_delay=0.5):
+    """
+    It's possible to see spurious errors on Windows due to various things
+    keeping a handle to the directory open (explorer, virus scanners, etc)
+    So we try a few times if it fails with a known error.
+    """
+    retry_count = 0
+    while True:
+        try:
+            func(*args)
+        except OSError as e:
+            # Error codes are defined in:
+            # http://docs.python.org/2/library/errno.html#module-errno
+            if e.errno not in (errno.EACCES, errno.ENOTEMPTY):
+                raise
+
+            if retry_count == retry_max:
+                raise
+
+            retry_count += 1
+
+            LOG.info('%s() failed for "%s". Reason: %s (%s). Retrying...',
+                     func.__name__, args, e.strerror, e.errno)
+            time.sleep(retry_delay)
+        else:
+            # If no exception has been thrown it should be done
+            break
+
+
+def remove(path):
+    """Removes the specified file, link, or directory tree.
+
+    This is a replacement for shutil.rmtree that works better under
+    windows. It does the following things:
+
+     - check path access for the current user before trying to remove
+     - retry operations on some known errors due to various things keeping
+       a handle on file paths - like explorer, virus scanners, etc. The
+       known errors are errno.EACCES and errno.ENOTEMPTY, and it will
+       retry up to 5 five times with a delay of 0.5 seconds between each
+       attempt.
+
+    Note that no error will be raised if the given path does not exists.
+
+    :param path: path to be removed
+    """
+
+    def _call_with_windows_retry(*args, **kwargs):
+        try:
+            _call_windows_retry(*args, **kwargs)
+        except OSError as e:
+            # The file or directory to be removed doesn't exist anymore
+            if e.errno != errno.ENOENT:
+                raise
+
+    def _update_permissions(path):
+        """Sets specified pemissions depending on filetype"""
+        if os.path.islink(path):
+            # Path is a symlink which we don't have to modify
+            # because it should already have all the needed permissions
+            return
+
+        stats = os.stat(path)
+
+        if os.path.isfile(path):
+            mode = stats.st_mode | stat.S_IWUSR
+        elif os.path.isdir(path):
+            mode = stats.st_mode | stat.S_IWUSR | stat.S_IXUSR
+        else:
+            # Not supported type
+            return
+
+        _call_with_windows_retry(os.chmod, (path, mode))
+
+    if not os.path.exists(path):
+        return
+
+    if os.path.isfile(path) or os.path.islink(path):
+        # Verify the file or link is read/write for the current user
+        _update_permissions(path)
+        _call_with_windows_retry(os.remove, (path,))
+
+    elif os.path.isdir(path):
+        # Verify the directory is read/write/execute for the current user
+        _update_permissions(path)
+
+        # We're ensuring that every nested item has writable permission.
+        for root, dirs, files in os.walk(path):
+            for entry in dirs + files:
+                _update_permissions(os.path.join(root, entry))
+        _call_with_windows_retry(shutil.rmtree, (path,))
+
+
+def move(src, dst):
+    """
+    Move a file or directory path.
+
+    This is a replacement for shutil.move that works better under windows,
+    retrying operations on some known errors due to various things keeping
+    a handle on file paths.
+    """
+    _call_windows_retry(shutil.move, (src, dst))
diff --git a/third_party/python/dlmanager/dlmanager/manager.py b/third_party/python/dlmanager/dlmanager/manager.py
new file mode 100644
index 0000000000..3dce3b7838
--- /dev/null
+++ b/third_party/python/dlmanager/dlmanager/manager.py
@@ -0,0 +1,323 @@
+import os
+import requests
+import six
+import sys
+import tempfile
+import threading
+
+from contextlib import closing
+from six.moves.urllib.parse import urlparse
+
+from dlmanager import fs
+from dlmanager.persist_limit import PersistLimit
+
+
+class DownloadInterrupt(Exception):
+    "Raised when a download is interrupted."
+
+
+class Download(object):
+    """
+    Download is reponsible of downloading one file in the background.
+
+    Example of use: ::
+
+      dl = Download(url, dest)
+      dl.start()
+      dl.wait() # this will block until completion / cancel / error
+
+    If a download fail or is canceled, the temporary dest is removed from
+    the disk.
+
+    Usually, Downloads are created by using :meth:`DownloadManager.download`.
+
+    :param url: the url of the file to download
+    :param dest: the local file path destination
+    :param finished_callback: a callback that will be called in the thread
+                              when the thread work is done. Takes the download
+                              instance as a parameter.
+    :param chunk_size: size of the chunk that will be read. The thread can
+                        not be stopped while we are reading that chunk size.
+    :param session: a requests.Session instance that will do do the real
+                    downloading work. If None, `requests` module is used.
+    :param progress: A callable to report the progress (default to None).
+                     see :meth:`set_progress`.
+    """
+    def __init__(self, url, dest, finished_callback=None,
+                 chunk_size=16 * 1024, session=None, progress=None):
+        self.thread = threading.Thread(
+            target=self._download,
+            args=(url, dest, finished_callback, chunk_size,
+                  session or requests)
+        )
+        self._lock = threading.Lock()
+        self.__url = url
+        self.__dest = dest
+        self.__progress = progress
+        self.__canceled = False
+        self.__error = None
+
+    def start(self):
+        """
+        Start the thread that will do the download.
+        """
+        self.thread.start()
+
+    def cancel(self):
+        """
+        Cancel a previously started download.
+        """
+        self.__canceled = True
+
+    def is_canceled(self):
+        """
+        Returns True if we canceled this download.
+        """
+        return self.__canceled
+
+    def is_running(self):
+        """
+        Returns True if the downloading thread is running.
+        """
+        return self.thread.is_alive()
+
+    def wait(self, raise_if_error=True):
+        """
+        Block until the downloading thread is finished.
+
+        :param raise_if_error: if True (the default), :meth:`raise_if_error`
+                               will be called and raise an error if any.
+        """
+        while self.thread.is_alive():
+            try:
+                # in case of exception here (like KeyboardInterrupt),
+                # cancel the task.
+                self.thread.join(0.02)
+            except:
+                self.cancel()
+                raise
+        # this will raise exception that may happen inside the thread.
+        if raise_if_error:
+            self.raise_if_error()
+
+    def error(self):
+        """
+        Returns None or a tuple of three values (type, value, traceback)
+        that give information about the exception.
+        """
+        return self.__error
+
+    def raise_if_error(self):
+        """
+        Raise an error if any. If the download was canceled, raise
+        :class:`DownloadInterrupt`.
+        """
+        if self.__error:
+            six.reraise(*self.__error)
+        if self.__canceled:
+            raise DownloadInterrupt()
+
+    def set_progress(self, progress):
+        """
+        set a callable to report the progress of the download, or None to
+        disable any report.
+
+        The callable must take three parameters (download, current, total).
+        Note that this method is thread safe, you can call it during a
+        download.
+        """
+        with self._lock:
+            self.__progress = progress
+
+    def get_dest(self):
+        """
+        Returns the dest.
+        """
+        return self.__dest
+
+    def get_url(self):
+        """
+        Returns the url.
+        """
+        return self.__url
+
+    def _update_progress(self, current, total):
+        with self._lock:
+            if self.__progress:
+                self.__progress(self, current, total)
+
+    def _download(self, url, dest, finished_callback, chunk_size, session):
+        # save the file under a temporary name
+        # this allow to not use a broken file in case things went really bad
+        # while downloading the file (ie the python interpreter is killed
+        # abruptly)
+        temp = None
+        bytes_so_far = 0
+        try:
+            with closing(session.get(url, stream=True)) as response:
+                total_size = response.headers.get('Content-length', '').strip()
+                total_size = int(total_size) if total_size else None
+                self._update_progress(bytes_so_far, total_size)
+                # we use NamedTemporaryFile as raw open() call was causing
+                # issues on windows - see:
+                # https://bugzilla.mozilla.org/show_bug.cgi?id=1185756
+                with tempfile.NamedTemporaryFile(
+                        delete=False,
+                        suffix='.tmp',
+                        dir=os.path.dirname(dest)) as temp:
+                    for chunk in response.iter_content(chunk_size):
+                        if self.is_canceled():
+                            break
+                        if chunk:
+                            temp.write(chunk)
+                        bytes_so_far += len(chunk)
+                        self._update_progress(bytes_so_far, total_size)
+            response.raise_for_status()
+        except:
+            self.__error = sys.exc_info()
+        try:
+            if temp is None:
+                pass  # not even opened the temp file, nothing to do
+            elif self.is_canceled() or self.__error:
+                fs.remove(temp.name)
+            else:
+                # if all goes well, then rename the file to the real dest
+                fs.remove(dest)  # just in case it already existed
+                fs.move(temp.name, dest)
+        finally:
+            if finished_callback:
+                finished_callback(self)
+
+
+class DownloadManager(object):
+    """
+    DownloadManager is responsible of starting and managing downloads inside
+    a given directory. It will download a file only if a given filename
+    is not already there.
+
+    Note that background downloads needs to be stopped. For example, if
+    you have an exception while a download is occuring, python will only
+    exit when the download will finish. To get rid of that, there is a
+    possible idiom: ::
+
+      def download_things(manager):
+          # do things with the manager
+          manager.download(url1, f1)
+          manager.download(url2, f2)
+          ...
+
+      manager = DownloadManager(destdir)
+      try:
+          download_things(manager)
+      finally:
+          # ensure we cancel all background downloads to ask the end
+          # of possible remainings threads
+          manager.cancel()
+
+    :param destdir: a directory where files are downloaded. It will be created
+                    if it does not exists.
+    :param session: a requests session. If None, one will be created for you.
+    :param persist_limit: an instance of :class:`PersistLimit`, to allow
+                          limiting the size of the download dir. Defaults
+                          to None, meaning no limit.
+    """
+    def __init__(self, destdir, session=None, persist_limit=None):
+        self.destdir = destdir
+        self.session = session or requests.Session()
+        self._downloads = {}
+        self._lock = threading.Lock()
+        self.persist_limit = persist_limit or PersistLimit(0)
+        self.persist_limit.register_dir_content(self.destdir)
+
+        # if persist folder does not exist, create it
+        if not os.path.isdir(destdir):
+            os.makedirs(destdir)
+
+    def get_dest(self, fname):
+        return os.path.join(self.destdir, fname)
+
+    def cancel(self, cancel_if=None):
+        """
+        Cancel downloads, if any.
+
+        if cancel_if is given, it must be a callable that take the download
+        instance as parameter, and return True if the download needs to be
+        canceled.
+
+        Note that download threads won't be stopped directly.
+        """
+        with self._lock:
+            for download in six.itervalues(self._downloads):
+                if cancel_if is None or cancel_if(download):
+                    if download.is_running():
+                        download.cancel()
+
+    def wait(self, raise_if_error=True):
+        """
+        Wait for all downloads to be finished.
+        """
+        for download in self._downloads.values():
+            download.wait(raise_if_error=raise_if_error)
+
+    def download(self, url, fname=None, progress=None):
+        """
+        Returns a started :class:`Download` instance, or None if fname is
+        already present in destdir.
+
+        if a download is already running for the given fname, it is just
+        returned. Else the download is created, started and returned.
+
+        :param url: url of the file to download.
+        :param fname: name to give for the downloaded file. If None, it will
+                      be the name extracted in the url.
+        :param progress: a callable to report the download progress, or None.
+                         See :meth:`Download.set_progress`.
+        """
+        if fname is None:
+            fname = urlparse(url).path.split('/')[-1]
+        dest = self.get_dest(fname)
+        with self._lock:
+            # if we are downloading, returns the instance
+            if dest in self._downloads:
+                dl = self._downloads[dest]
+                if progress:
+                    dl.set_progress(progress)
+                return dl
+
+        if os.path.exists(dest):
+            return None
+
+        # else create the download (will be automatically removed of
+        # the list on completion) start it, and returns that.
+        with self._lock:
+            download = Download(url, dest,
+                                session=self.session,
+                                finished_callback=self._download_finished,
+                                progress=progress)
+            self._downloads[dest] = download
+            download.start()
+            self._download_started(download)
+            return download
+
+    def _download_started(self, dl):
+        """
+        Useful when sub-classing. Report the start event of a download.
+
+        :param dl: The :class:`Download` instance.
+        """
+        pass
+
+    def _download_finished(self, dl):
+        """
+        Useful when sub-classing. Report the end of a download.
+
+        Note that this is executed in the download thread. Also, you should
+        make sure to call the base implementation.
+
+        :param dl: The :class:`Download` instance.
+        """
+        with self._lock:
+            dest = dl.get_dest()
+            del self._downloads[dest]
+            self.persist_limit.register_file(dest)
+            self.persist_limit.remove_old_files()
diff --git a/third_party/python/dlmanager/dlmanager/persist_limit.py b/third_party/python/dlmanager/dlmanager/persist_limit.py
new file mode 100644
index 0000000000..03a1829f70
--- /dev/null
+++ b/third_party/python/dlmanager/dlmanager/persist_limit.py
@@ -0,0 +1,65 @@
+import os
+import stat
+
+from collections import namedtuple
+from glob import glob
+
+from dlmanager import fs
+
+
+File = namedtuple('File', ('path', 'stat'))
+
+
+class PersistLimit(object):
+    """
+    Keep a list of files, removing the oldest ones when the size_limit
+    is reached.
+
+    The access time of a file is used to determine the oldests, e.g. the
+    last time a file was read.
+
+    :param size_limit: the size limit in bytes. A value of 0 means no limit.
+    :param file_limit: even if the size limit is reached, this force
+                       to keep at least *file_limit* files.
+    """
+    def __init__(self, size_limit, file_limit=5):
+        self.size_limit = size_limit
+        self.file_limit = file_limit
+        self.files = []
+        self._files_size = 0
+
+    def register_file(self, path):
+        """
+        register a single file.
+        """
+        try:
+            fstat = os.stat(path)
+        except OSError:
+            # file do not exists probably, just skip it
+            # note this happen when backgound files are canceled
+            return
+        if stat.S_ISREG(fstat.st_mode):
+            self.files.append(File(path=path, stat=fstat))
+            self._files_size += fstat.st_size
+
+    def register_dir_content(self, directory, pattern="*"):
+        """
+        Register every files in a directory that match *pattern*.
+        """
+        for path in glob(os.path.join(directory, pattern)):
+            self.register_file(path)
+
+    def remove_old_files(self):
+        """
+        remove oldest registered files.
+        """
+        if self.size_limit <= 0 or self.file_limit <= 0:
+            return
+        # sort by creation time, oldest first
+        files = sorted(self.files, key=lambda f: f.stat.st_atime)
+        while len(files) > self.file_limit and \
+                self._files_size >= self.size_limit:
+            f = files.pop(0)
+            fs.remove(f.path)
+            self._files_size -= f.stat.st_size
+        self.files = files