1 files changed, 1271 insertions, 0 deletions
diff --git a/python/mozbuild/mozpack/files.py b/python/mozbuild/mozpack/files.py
new file mode 100644
index 0000000000..691c248b02
--- /dev/null
+++ b/python/mozbuild/mozpack/files.py
@@ -0,0 +1,1271 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import bisect
+import codecs
+import errno
+import inspect
+import os
+import platform
+import shutil
+import stat
+import subprocess
+import uuid
+from collections import OrderedDict
+from io import BytesIO
+from itertools import chain, takewhile
+from tarfile import TarFile, TarInfo
+from tempfile import NamedTemporaryFile, mkstemp
+
+import six
+from jsmin import JavascriptMinify
+
+import mozbuild.makeutil as makeutil
+import mozpack.path as mozpath
+from mozbuild.preprocessor import Preprocessor
+from mozbuild.util import FileAvoidWrite, ensure_unicode, memoize
+from mozpack.chrome.manifest import ManifestEntry, ManifestInterfaces
+from mozpack.errors import ErrorMessage, errors
+from mozpack.executables import elfhack, is_executable, may_elfhack, may_strip, strip
+from mozpack.mozjar import JarReader
+
+try:
+    import hglib
+except ImportError:
+    hglib = None
+
+
+# For clean builds, copying files on win32 using CopyFile through ctypes is
+# ~2x as fast as using shutil.copyfile.
+if platform.system() != "Windows":
+    _copyfile = shutil.copyfile
+else:
+    import ctypes
+
+    _kernel32 = ctypes.windll.kernel32
+    _CopyFileA = _kernel32.CopyFileA
+    _CopyFileW = _kernel32.CopyFileW
+
+    def _copyfile(src, dest):
+        # False indicates `dest` should be overwritten if it exists already.
+        if isinstance(src, six.text_type) and isinstance(dest, six.text_type):
+            _CopyFileW(src, dest, False)
+        elif isinstance(src, str) and isinstance(dest, str):
+            _CopyFileA(src, dest, False)
+        else:
+            raise TypeError("mismatched path types!")
+
+
+# Helper function; ensures we always open files with the correct encoding when
+# opening them in text mode.
+def _open(path, mode="r"):
+    if six.PY3 and "b" not in mode:
+        return open(path, mode, encoding="utf-8")
+    return open(path, mode)
+
+
+class Dest(object):
+    """
+    Helper interface for BaseFile.copy. The interface works as follows:
+      - read() and write() can be used to sequentially read/write from the underlying file.
+      - a call to read() after a write() will re-open the underlying file and read from it.
+      - a call to write() after a read() will re-open the underlying file, emptying it, and write to it.
+    """
+
+    def __init__(self, path):
+        self.file = None
+        self.mode = None
+        self.path = ensure_unicode(path)
+
+    @property
+    def name(self):
+        return self.path
+
+    def read(self, length=-1):
+        if self.mode != "r":
+            self.file = _open(self.path, mode="rb")
+            self.mode = "r"
+        return self.file.read(length)
+
+    def write(self, data):
+        if self.mode != "w":
+            self.file = _open(self.path, mode="wb")
+            self.mode = "w"
+        to_write = six.ensure_binary(data)
+        return self.file.write(to_write)
+
+    def exists(self):
+        return os.path.exists(self.path)
+
+    def close(self):
+        if self.mode:
+            self.mode = None
+            self.file.close()
+            self.file = None
+
+
+class BaseFile(object):
+    """
+    Base interface and helper for file copying. Derived class may implement
+    their own copy function, or rely on BaseFile.copy using the open() member
+    function and/or the path property.
+    """
+
+    @staticmethod
+    def is_older(first, second):
+        """
+        Compares the modification time of two files, and returns whether the
+        ``first`` file is older than the ``second`` file.
+        """
+        # os.path.getmtime returns a result in seconds with precision up to
+        # the microsecond. But microsecond is too precise because
+        # shutil.copystat only copies milliseconds, and seconds is not
+        # enough precision.
+        return int(os.path.getmtime(first) * 1000) <= int(
+            os.path.getmtime(second) * 1000
+        )
+
+    @staticmethod
+    def any_newer(dest, inputs):
+        """
+        Compares the modification time of ``dest`` to multiple input files, and
+        returns whether any of the ``inputs`` is newer (has a later mtime) than
+        ``dest``.
+        """
+        # os.path.getmtime returns a result in seconds with precision up to
+        # the microsecond. But microsecond is too precise because
+        # shutil.copystat only copies milliseconds, and seconds is not
+        # enough precision.
+        dest_mtime = int(os.path.getmtime(dest) * 1000)
+        for input in inputs:
+            try:
+                src_mtime = int(os.path.getmtime(input) * 1000)
+            except OSError as e:
+                if e.errno == errno.ENOENT:
+                    # If an input file was removed, we should update.
+                    return True
+                raise
+            if dest_mtime < src_mtime:
+                return True
+        return False
+
+    @staticmethod
+    def normalize_mode(mode):
+        # Normalize file mode:
+        # - keep file type (e.g. S_IFREG)
+        ret = stat.S_IFMT(mode)
+        # - expand user read and execute permissions to everyone
+        if mode & 0o0400:
+            ret |= 0o0444
+        if mode & 0o0100:
+            ret |= 0o0111
+        # - keep user write permissions
+        if mode & 0o0200:
+            ret |= 0o0200
+        # - leave away sticky bit, setuid, setgid
+        return ret
+
+    def copy(self, dest, skip_if_older=True):
+        """
+        Copy the BaseFile content to the destination given as a string or a
+        Dest instance. Avoids replacing existing files if the BaseFile content
+        matches that of the destination, or in case of plain files, if the
+        destination is newer than the original file. This latter behaviour is
+        disabled when skip_if_older is False.
+        Returns whether a copy was actually performed (True) or not (False).
+        """
+        if isinstance(dest, six.string_types):
+            dest = Dest(dest)
+        else:
+            assert isinstance(dest, Dest)
+
+        can_skip_content_check = False
+        if not dest.exists():
+            can_skip_content_check = True
+        elif getattr(self, "path", None) and getattr(dest, "path", None):
+            if skip_if_older and BaseFile.is_older(self.path, dest.path):
+                return False
+            elif os.path.getsize(self.path) != os.path.getsize(dest.path):
+                can_skip_content_check = True
+
+        if can_skip_content_check:
+            if getattr(self, "path", None) and getattr(dest, "path", None):
+                # The destination directory must exist, or CopyFile will fail.
+                destdir = os.path.dirname(dest.path)
+                try:
+                    os.makedirs(destdir)
+                except OSError as e:
+                    if e.errno != errno.EEXIST:
+                        raise
+                _copyfile(self.path, dest.path)
+                shutil.copystat(self.path, dest.path)
+            else:
+                # Ensure the file is always created
+                if not dest.exists():
+                    dest.write(b"")
+                shutil.copyfileobj(self.open(), dest)
+            return True
+
+        src = self.open()
+        accumulated_src_content = []
+        while True:
+            dest_content = dest.read(32768)
+            src_content = src.read(32768)
+            accumulated_src_content.append(src_content)
+            if len(dest_content) == len(src_content) == 0:
+                break
+            # If the read content differs between origin and destination,
+            # write what was read up to now, and copy the remainder.
+            if six.ensure_binary(dest_content) != six.ensure_binary(src_content):
+                dest.write(b"".join(accumulated_src_content))
+                shutil.copyfileobj(src, dest)
+                break
+        if hasattr(self, "path") and hasattr(dest, "path"):
+            shutil.copystat(self.path, dest.path)
+        return True
+
+    def open(self):
+        """
+        Return a file-like object allowing to read() the content of the
+        associated file. This is meant to be overloaded in subclasses to return
+        a custom file-like object.
+        """
+        assert self.path is not None
+        return open(self.path, "rb")
+
+    def read(self):
+        raise NotImplementedError("BaseFile.read() not implemented. Bug 1170329.")
+
+    def size(self):
+        """Returns size of the entry.
+
+        Derived classes are highly encouraged to override this with a more
+        optimal implementation.
+        """
+        return len(self.read())
+
+    @property
+    def mode(self):
+        """
+        Return the file's unix mode, or None if it has no meaning.
+        """
+        return None
+
+    def inputs(self):
+        """
+        Return an iterable of the input file paths that impact this output file.
+        """
+        raise NotImplementedError("BaseFile.inputs() not implemented.")
+
+
+class File(BaseFile):
+    """
+    File class for plain files.
+    """
+
+    def __init__(self, path):
+        self.path = ensure_unicode(path)
+
+    @property
+    def mode(self):
+        """
+        Return the file's unix mode, as returned by os.stat().st_mode.
+        """
+        if platform.system() == "Windows":
+            return None
+        assert self.path is not None
+        mode = os.stat(self.path).st_mode
+        return self.normalize_mode(mode)
+
+    def read(self):
+        """Return the contents of the file."""
+        with open(self.path, "rb") as fh:
+            return fh.read()
+
+    def size(self):
+        return os.stat(self.path).st_size
+
+    def inputs(self):
+        return (self.path,)
+
+
+class ExecutableFile(File):
+    """
+    File class for executable and library files on OS/2, OS/X and ELF systems.
+    (see mozpack.executables.is_executable documentation).
+    """
+
+    def __init__(self, path):
+        File.__init__(self, path)
+
+    def copy(self, dest, skip_if_older=True):
+        real_dest = dest
+        if not isinstance(dest, six.string_types):
+            fd, dest = mkstemp()
+            os.close(fd)
+            os.remove(dest)
+        assert isinstance(dest, six.string_types)
+        # If File.copy didn't actually copy because dest is newer, check the
+        # file sizes. If dest is smaller, it means it is already stripped and
+        # elfhacked, so we can skip.
+        if not File.copy(self, dest, skip_if_older) and os.path.getsize(
+            self.path
+        ) > os.path.getsize(dest):
+            return False
+        try:
+            if may_strip(dest):
+                strip(dest)
+            if may_elfhack(dest):
+                elfhack(dest)
+        except ErrorMessage:
+            os.remove(dest)
+            raise
+
+        if real_dest != dest:
+            f = File(dest)
+            ret = f.copy(real_dest, skip_if_older)
+            os.remove(dest)
+            return ret
+        return True
+
+
+class AbsoluteSymlinkFile(File):
+    """File class that is copied by symlinking (if available).
+
+    This class only works if the target path is absolute.
+    """
+
+    def __init__(self, path):
+        if not os.path.isabs(path):
+            raise ValueError("Symlink target not absolute: %s" % path)
+
+        File.__init__(self, path)
+
+    def copy(self, dest, skip_if_older=True):
+        assert isinstance(dest, six.string_types)
+
+        # The logic in this function is complicated by the fact that symlinks
+        # aren't universally supported. So, where symlinks aren't supported, we
+        # fall back to file copying. Keep in mind that symlink support is
+        # per-filesystem, not per-OS.
+
+        # Handle the simple case where symlinks are definitely not supported by
+        # falling back to file copy.
+        if not hasattr(os, "symlink"):
+            return File.copy(self, dest, skip_if_older=skip_if_older)
+
+        # Always verify the symlink target path exists.
+        if not os.path.exists(self.path):
+            errors.fatal("Symlink target path does not exist: %s" % self.path)
+
+        st = None
+
+        try:
+            st = os.lstat(dest)
+        except OSError as ose:
+            if ose.errno != errno.ENOENT:
+                raise
+
+        # If the dest is a symlink pointing to us, we have nothing to do.
+        # If it's the wrong symlink, the filesystem must support symlinks,
+        # so we replace with a proper symlink.
+        if st and stat.S_ISLNK(st.st_mode):
+            link = os.readlink(dest)
+            if link == self.path:
+                return False
+
+            os.remove(dest)
+            os.symlink(self.path, dest)
+            return True
+
+        # If the destination doesn't exist, we try to create a symlink. If that
+        # fails, we fall back to copy code.
+        if not st:
+            try:
+                os.symlink(self.path, dest)
+                return True
+            except OSError:
+                return File.copy(self, dest, skip_if_older=skip_if_older)
+
+        # Now the complicated part. If the destination exists, we could be
+        # replacing a file with a symlink. Or, the filesystem may not support
+        # symlinks. We want to minimize I/O overhead for performance reasons,
+        # so we keep the existing destination file around as long as possible.
+        # A lot of the system calls would be eliminated if we cached whether
+        # symlinks are supported. However, even if we performed a single
+        # up-front test of whether the root of the destination directory
+        # supports symlinks, there's no guarantee that all operations for that
+        # dest (or source) would be on the same filesystem and would support
+        # symlinks.
+        #
+        # Our strategy is to attempt to create a new symlink with a random
+        # name. If that fails, we fall back to copy mode. If that works, we
+        # remove the old destination and move the newly-created symlink into
+        # its place.
+
+        temp_dest = os.path.join(os.path.dirname(dest), str(uuid.uuid4()))
+        try:
+            os.symlink(self.path, temp_dest)
+        # TODO Figure out exactly how symlink creation fails and only trap
+        # that.
+        except EnvironmentError:
+            return File.copy(self, dest, skip_if_older=skip_if_older)
+
+        # If removing the original file fails, don't forget to clean up the
+        # temporary symlink.
+        try:
+            os.remove(dest)
+        except EnvironmentError:
+            os.remove(temp_dest)
+            raise
+
+        os.rename(temp_dest, dest)
+        return True
+
+
+class HardlinkFile(File):
+    """File class that is copied by hard linking (if available)
+
+    This is similar to the AbsoluteSymlinkFile, but with hard links. The symlink
+    implementation requires paths to be absolute, because they are resolved at
+    read time, which makes relative paths messy. Hard links resolve paths at
+    link-creation time, so relative paths are fine.
+    """
+
+    def copy(self, dest, skip_if_older=True):
+        assert isinstance(dest, six.string_types)
+
+        if not hasattr(os, "link"):
+            return super(HardlinkFile, self).copy(dest, skip_if_older=skip_if_older)
+
+        try:
+            path_st = os.stat(self.path)
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                errors.fatal("Hard link target path does not exist: %s" % self.path)
+            else:
+                raise
+
+        st = None
+        try:
+            st = os.lstat(dest)
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise
+
+        if st:
+            # The dest already points to the right place.
+            if st.st_dev == path_st.st_dev and st.st_ino == path_st.st_ino:
+                return False
+            # The dest exists and it points to the wrong place
+            os.remove(dest)
+
+        # At this point, either the dest used to exist and we just deleted it,
+        # or it never existed. We can now safely create the hard link.
+        try:
+            os.link(self.path, dest)
+        except OSError:
+            # If we can't hard link, fall back to copying
+            return super(HardlinkFile, self).copy(dest, skip_if_older=skip_if_older)
+        return True
+
+
+class ExistingFile(BaseFile):
+    """
+    File class that represents a file that may exist but whose content comes
+    from elsewhere.
+
+    This purpose of this class is to account for files that are installed via
+    external means. It is typically only used in manifests or in registries to
+    account for files.
+
+    When asked to copy, this class does nothing because nothing is known about
+    the source file/data.
+
+    Instances of this class come in two flavors: required and optional. If an
+    existing file is required, it must exist during copy() or an error is
+    raised.
+    """
+
+    def __init__(self, required):
+        self.required = required
+
+    def copy(self, dest, skip_if_older=True):
+        if isinstance(dest, six.string_types):
+            dest = Dest(dest)
+        else:
+            assert isinstance(dest, Dest)
+
+        if not self.required:
+            return
+
+        if not dest.exists():
+            errors.fatal("Required existing file doesn't exist: %s" % dest.path)
+
+    def inputs(self):
+        return ()
+
+
+class PreprocessedFile(BaseFile):
+    """
+    File class for a file that is preprocessed. PreprocessedFile.copy() runs
+    the preprocessor on the file to create the output.
+    """
+
+    def __init__(
+        self,
+        path,
+        depfile_path,
+        marker,
+        defines,
+        extra_depends=None,
+        silence_missing_directive_warnings=False,
+    ):
+        self.path = ensure_unicode(path)
+        self.depfile = ensure_unicode(depfile_path)
+        self.marker = marker
+        self.defines = defines
+        self.extra_depends = list(extra_depends or [])
+        self.silence_missing_directive_warnings = silence_missing_directive_warnings
+
+    def inputs(self):
+        pp = Preprocessor(defines=self.defines, marker=self.marker)
+        pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings)
+
+        with _open(self.path, "r") as input:
+            with _open(os.devnull, "w") as output:
+                pp.processFile(input=input, output=output)
+
+        # This always yields at least self.path.
+        return pp.includes
+
+    def copy(self, dest, skip_if_older=True):
+        """
+        Invokes the preprocessor to create the destination file.
+        """
+        if isinstance(dest, six.string_types):
+            dest = Dest(dest)
+        else:
+            assert isinstance(dest, Dest)
+
+        # We have to account for the case where the destination exists and is a
+        # symlink to something. Since we know the preprocessor is certainly not
+        # going to create a symlink, we can just remove the existing one. If the
+        # destination is not a symlink, we leave it alone, since we're going to
+        # overwrite its contents anyway.
+        # If symlinks aren't supported at all, we can skip this step.
+        # See comment in AbsoluteSymlinkFile about Windows.
+        if hasattr(os, "symlink") and platform.system() != "Windows":
+            if os.path.islink(dest.path):
+                os.remove(dest.path)
+
+        pp_deps = set(self.extra_depends)
+
+        # If a dependency file was specified, and it exists, add any
+        # dependencies from that file to our list.
+        if self.depfile and os.path.exists(self.depfile):
+            target = mozpath.normpath(dest.name)
+            with _open(self.depfile, "rt") as fileobj:
+                for rule in makeutil.read_dep_makefile(fileobj):
+                    if target in rule.targets():
+                        pp_deps.update(rule.dependencies())
+
+        skip = False
+        if dest.exists() and skip_if_older:
+            # If a dependency file was specified, and it doesn't exist,
+            # assume that the preprocessor needs to be rerun. That will
+            # regenerate the dependency file.
+            if self.depfile and not os.path.exists(self.depfile):
+                skip = False
+            else:
+                skip = not BaseFile.any_newer(dest.path, pp_deps)
+
+        if skip:
+            return False
+
+        deps_out = None
+        if self.depfile:
+            deps_out = FileAvoidWrite(self.depfile)
+        pp = Preprocessor(defines=self.defines, marker=self.marker)
+        pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings)
+
+        with _open(self.path, "r") as input:
+            pp.processFile(input=input, output=dest, depfile=deps_out)
+
+        dest.close()
+        if self.depfile:
+            deps_out.close()
+
+        return True
+
+
+class GeneratedFile(BaseFile):
+    """
+    File class for content with no previous existence on the filesystem.
+    """
+
+    def __init__(self, content):
+        self._content = content
+
+    @property
+    def content(self):
+        if inspect.isfunction(self._content):
+            self._content = self._content()
+        return six.ensure_binary(self._content)
+
+    @content.setter
+    def content(self, content):
+        self._content = content
+
+    def open(self):
+        return BytesIO(self.content)
+
+    def read(self):
+        return self.content
+
+    def size(self):
+        return len(self.content)
+
+    def inputs(self):
+        return ()
+
+
+class DeflatedFile(BaseFile):
+    """
+    File class for members of a jar archive. DeflatedFile.copy() effectively
+    extracts the file from the jar archive.
+    """
+
+    def __init__(self, file):
+        from mozpack.mozjar import JarFileReader
+
+        assert isinstance(file, JarFileReader)
+        self.file = file
+
+    def open(self):
+        self.file.seek(0)
+        return self.file
+
+
+class ExtractedTarFile(GeneratedFile):
+    """
+    File class for members of a tar archive. Contents of the underlying file
+    are extracted immediately and stored in memory.
+    """
+
+    def __init__(self, tar, info):
+        assert isinstance(info, TarInfo)
+        assert isinstance(tar, TarFile)
+        GeneratedFile.__init__(self, tar.extractfile(info).read())
+        self._unix_mode = self.normalize_mode(info.mode)
+
+    @property
+    def mode(self):
+        return self._unix_mode
+
+    def read(self):
+        return self.content
+
+
+class ManifestFile(BaseFile):
+    """
+    File class for a manifest file. It takes individual manifest entries (using
+    the add() and remove() member functions), and adjusts them to be relative
+    to the base path for the manifest, given at creation.
+    Example:
+        There is a manifest entry "content foobar foobar/content/" relative
+        to "foobar/chrome". When packaging, the entry will be stored in
+        jar:foobar/omni.ja!/chrome/chrome.manifest, which means the entry
+        will have to be relative to "chrome" instead of "foobar/chrome". This
+        doesn't really matter when serializing the entry, since this base path
+        is not written out, but it matters when moving the entry at the same
+        time, e.g. to jar:foobar/omni.ja!/chrome.manifest, which we don't do
+        currently but could in the future.
+    """
+
+    def __init__(self, base, entries=None):
+        self._base = base
+        self._entries = []
+        self._interfaces = []
+        for e in entries or []:
+            self.add(e)
+
+    def add(self, entry):
+        """
+        Add the given entry to the manifest. Entries are rebased at open() time
+        instead of add() time so that they can be more easily remove()d.
+        """
+        assert isinstance(entry, ManifestEntry)
+        if isinstance(entry, ManifestInterfaces):
+            self._interfaces.append(entry)
+        else:
+            self._entries.append(entry)
+
+    def remove(self, entry):
+        """
+        Remove the given entry from the manifest.
+        """
+        assert isinstance(entry, ManifestEntry)
+        if isinstance(entry, ManifestInterfaces):
+            self._interfaces.remove(entry)
+        else:
+            self._entries.remove(entry)
+
+    def open(self):
+        """
+        Return a file-like object allowing to read() the serialized content of
+        the manifest.
+        """
+        content = "".join(
+            "%s\n" % e.rebase(self._base)
+            for e in chain(self._entries, self._interfaces)
+        )
+        return BytesIO(six.ensure_binary(content))
+
+    def __iter__(self):
+        """
+        Iterate over entries in the manifest file.
+        """
+        return chain(self._entries, self._interfaces)
+
+    def isempty(self):
+        """
+        Return whether there are manifest entries to write
+        """
+        return len(self._entries) + len(self._interfaces) == 0
+
+
+class MinifiedCommentStripped(BaseFile):
+    """
+    File class for content minified by stripping comments. This wraps around a
+    BaseFile instance, and removes lines starting with a # from its content.
+    """
+
+    def __init__(self, file):
+        assert isinstance(file, BaseFile)
+        self._file = file
+
+    def open(self):
+        """
+        Return a file-like object allowing to read() the minified content of
+        the underlying file.
+        """
+        content = "".join(
+            l
+            for l in [six.ensure_text(s) for s in self._file.open().readlines()]
+            if not l.startswith("#")
+        )
+        return BytesIO(six.ensure_binary(content))
+
+
+class MinifiedJavaScript(BaseFile):
+    """
+    File class for minifying JavaScript files.
+    """
+
+    def __init__(self, file, verify_command=None):
+        assert isinstance(file, BaseFile)
+        self._file = file
+        self._verify_command = verify_command
+
+    def open(self):
+        output = six.StringIO()
+        minify = JavascriptMinify(
+            codecs.getreader("utf-8")(self._file.open()), output, quote_chars="'\"`"
+        )
+        minify.minify()
+        output.seek(0)
+        output_source = six.ensure_binary(output.getvalue())
+        output = BytesIO(output_source)
+
+        if not self._verify_command:
+            return output
+
+        input_source = self._file.open().read()
+
+        with NamedTemporaryFile("wb+") as fh1, NamedTemporaryFile("wb+") as fh2:
+            fh1.write(input_source)
+            fh2.write(output_source)
+            fh1.flush()
+            fh2.flush()
+
+            try:
+                args = list(self._verify_command)
+                args.extend([fh1.name, fh2.name])
+                subprocess.check_output(
+                    args, stderr=subprocess.STDOUT, universal_newlines=True
+                )
+            except subprocess.CalledProcessError as e:
+                errors.warn(
+                    "JS minification verification failed for %s:"
+                    % (getattr(self._file, "path", "<unknown>"))
+                )
+                # Prefix each line with "Warning:" so mozharness doesn't
+                # think these error messages are real errors.
+                for line in e.output.splitlines():
+                    errors.warn(line)
+
+                return self._file.open()
+
+        return output
+
+
+class BaseFinder(object):
+    def __init__(
+        self, base, minify=False, minify_js=False, minify_js_verify_command=None
+    ):
+        """
+        Initializes the instance with a reference base directory.
+
+        The optional minify argument specifies whether minification of code
+        should occur. minify_js is an additional option to control minification
+        of JavaScript. It requires minify to be True.
+
+        minify_js_verify_command can be used to optionally verify the results
+        of JavaScript minification. If defined, it is expected to be an iterable
+        that will constitute the first arguments to a called process which will
+        receive the filenames of the original and minified JavaScript files.
+        The invoked process can then verify the results. If minification is
+        rejected, the process exits with a non-0 exit code and the original
+        JavaScript source is used. An example value for this argument is
+        ('/path/to/js', '/path/to/verify/script.js').
+        """
+        if minify_js and not minify:
+            raise ValueError("minify_js requires minify.")
+
+        self.base = base
+        self._minify = minify
+        self._minify_js = minify_js
+        self._minify_js_verify_command = minify_js_verify_command
+
+    def find(self, pattern):
+        """
+        Yield path, BaseFile_instance pairs for all files under the base
+        directory and its subdirectories that match the given pattern. See the
+        mozpack.path.match documentation for a description of the handled
+        patterns.
+        """
+        while pattern.startswith("/"):
+            pattern = pattern[1:]
+        for p, f in self._find(pattern):
+            yield p, self._minify_file(p, f)
+
+    def get(self, path):
+        """Obtain a single file.
+
+        Where ``find`` is tailored towards matching multiple files, this method
+        is used for retrieving a single file. Use this method when performance
+        is critical.
+
+        Returns a ``BaseFile`` if at most one file exists or ``None`` otherwise.
+        """
+        files = list(self.find(path))
+        if len(files) != 1:
+            return None
+        return files[0][1]
+
+    def __iter__(self):
+        """
+        Iterates over all files under the base directory (excluding files
+        starting with a '.' and files at any level under a directory starting
+        with a '.').
+            for path, file in finder:
+                ...
+        """
+        return self.find("")
+
+    def __contains__(self, pattern):
+        raise RuntimeError(
+            "'in' operator forbidden for %s. Use contains()." % self.__class__.__name__
+        )
+
+    def contains(self, pattern):
+        """
+        Return whether some files under the base directory match the given
+        pattern. See the mozpack.path.match documentation for a description of
+        the handled patterns.
+        """
+        return any(self.find(pattern))
+
+    def _minify_file(self, path, file):
+        """
+        Return an appropriate MinifiedSomething wrapper for the given BaseFile
+        instance (file), according to the file type (determined by the given
+        path), if the FileFinder was created with minification enabled.
+        Otherwise, just return the given BaseFile instance.
+        """
+        if not self._minify or isinstance(file, ExecutableFile):
+            return file
+
+        if path.endswith((".ftl", ".properties")):
+            return MinifiedCommentStripped(file)
+
+        if self._minify_js and path.endswith((".js", ".jsm")):
+            return MinifiedJavaScript(file, self._minify_js_verify_command)
+
+        return file
+
+    def _find_helper(self, pattern, files, file_getter):
+        """Generic implementation of _find.
+
+        A few *Finder implementations share logic for returning results.
+        This function implements the custom logic.
+
+        The ``file_getter`` argument is a callable that receives a path
+        that is known to exist. The callable should return a ``BaseFile``
+        instance.
+        """
+        if "*" in pattern:
+            for p in files:
+                if mozpath.match(p, pattern):
+                    yield p, file_getter(p)
+        elif pattern == "":
+            for p in files:
+                yield p, file_getter(p)
+        elif pattern in files:
+            yield pattern, file_getter(pattern)
+        else:
+            for p in files:
+                if mozpath.basedir(p, [pattern]) == pattern:
+                    yield p, file_getter(p)
+
+
+class FileFinder(BaseFinder):
+    """
+    Helper to get appropriate BaseFile instances from the file system.
+    """
+
+    def __init__(
+        self,
+        base,
+        find_executables=False,
+        ignore=(),
+        ignore_broken_symlinks=False,
+        find_dotfiles=False,
+        **kargs
+    ):
+        """
+        Create a FileFinder for files under the given base directory.
+
+        The find_executables argument determines whether the finder needs to
+        try to guess whether files are executables. Disabling this guessing
+        when not necessary can speed up the finder significantly.
+
+        ``ignore`` accepts an iterable of patterns to ignore. Entries are
+        strings that match paths relative to ``base`` using
+        ``mozpath.match()``. This means if an entry corresponds
+        to a directory, all files under that directory will be ignored. If
+        an entry corresponds to a file, that particular file will be ignored.
+        ``ignore_broken_symlinks`` is passed by the packager to work around an
+        issue with the build system not cleaning up stale files in some common
+        cases. See bug 1297381.
+        """
+        BaseFinder.__init__(self, base, **kargs)
+        self.find_dotfiles = find_dotfiles
+        self.find_executables = find_executables
+        self.ignore = ignore
+        self.ignore_broken_symlinks = ignore_broken_symlinks
+
+    def _find(self, pattern):
+        """
+        Actual implementation of FileFinder.find(), dispatching to specialized
+        member functions depending on what kind of pattern was given.
+        Note all files with a name starting with a '.' are ignored when
+        scanning directories, but are not ignored when explicitely requested.
+        """
+        if "*" in pattern:
+            return self._find_glob("", mozpath.split(pattern))
+        elif os.path.isdir(os.path.join(self.base, pattern)):
+            return self._find_dir(pattern)
+        else:
+            f = self.get(pattern)
+            return ((pattern, f),) if f else ()
+
+    def _find_dir(self, path):
+        """
+        Actual implementation of FileFinder.find() when the given pattern
+        corresponds to an existing directory under the base directory.
+        Ignores file names starting with a '.' under the given path. If the
+        path itself has leafs starting with a '.', they are not ignored.
+        """
+        for p in self.ignore:
+            if mozpath.match(path, p):
+                return
+
+        # The sorted makes the output idempotent. Otherwise, we are
+        # likely dependent on filesystem implementation details, such as
+        # inode ordering.
+        for p in sorted(os.listdir(os.path.join(self.base, path))):
+            if p.startswith("."):
+                if p in (".", ".."):
+                    continue
+                if not self.find_dotfiles:
+                    continue
+            for p_, f in self._find(mozpath.join(path, p)):
+                yield p_, f
+
+    def get(self, path):
+        srcpath = os.path.join(self.base, path)
+        if not os.path.lexists(srcpath):
+            return None
+
+        if self.ignore_broken_symlinks and not os.path.exists(srcpath):
+            return None
+
+        for p in self.ignore:
+            if mozpath.match(path, p):
+                return None
+
+        if self.find_executables and is_executable(srcpath):
+            return ExecutableFile(srcpath)
+        else:
+            return File(srcpath)
+
+    def _find_glob(self, base, pattern):
+        """
+        Actual implementation of FileFinder.find() when the given pattern
+        contains globbing patterns ('*' or '**'). This is meant to be an
+        equivalent of:
+            for p, f in self:
+                if mozpath.match(p, pattern):
+                    yield p, f
+        but avoids scanning the entire tree.
+        """
+        if not pattern:
+            for p, f in self._find(base):
+                yield p, f
+        elif pattern[0] == "**":
+            for p, f in self._find(base):
+                if mozpath.match(p, mozpath.join(*pattern)):
+                    yield p, f
+        elif "*" in pattern[0]:
+            if not os.path.exists(os.path.join(self.base, base)):
+                return
+
+            for p in self.ignore:
+                if mozpath.match(base, p):
+                    return
+
+            # See above comment w.r.t. sorted() and idempotent behavior.
+            for p in sorted(os.listdir(os.path.join(self.base, base))):
+                if p.startswith(".") and not pattern[0].startswith("."):
+                    continue
+                if mozpath.match(p, pattern[0]):
+                    for p_, f in self._find_glob(mozpath.join(base, p), pattern[1:]):
+                        yield p_, f
+        else:
+            for p, f in self._find_glob(mozpath.join(base, pattern[0]), pattern[1:]):
+                yield p, f
+
+
+class JarFinder(BaseFinder):
+    """
+    Helper to get appropriate DeflatedFile instances from a JarReader.
+    """
+
+    def __init__(self, base, reader, **kargs):
+        """
+        Create a JarFinder for files in the given JarReader. The base argument
+        is used as an indication of the Jar file location.
+        """
+        assert isinstance(reader, JarReader)
+        BaseFinder.__init__(self, base, **kargs)
+        self._files = OrderedDict((f.filename, f) for f in reader)
+
+    def _find(self, pattern):
+        """
+        Actual implementation of JarFinder.find(), dispatching to specialized
+        member functions depending on what kind of pattern was given.
+        """
+        return self._find_helper(
+            pattern, self._files, lambda x: DeflatedFile(self._files[x])
+        )
+
+
+class TarFinder(BaseFinder):
+    """
+    Helper to get files from a TarFile.
+    """
+
+    def __init__(self, base, tar, **kargs):
+        """
+        Create a TarFinder for files in the given TarFile. The base argument
+        is used as an indication of the Tar file location.
+        """
+        assert isinstance(tar, TarFile)
+        self._tar = tar
+        BaseFinder.__init__(self, base, **kargs)
+        self._files = OrderedDict((f.name, f) for f in tar if f.isfile())
+
+    def _find(self, pattern):
+        """
+        Actual implementation of TarFinder.find(), dispatching to specialized
+        member functions depending on what kind of pattern was given.
+        """
+        return self._find_helper(
+            pattern, self._files, lambda x: ExtractedTarFile(self._tar, self._files[x])
+        )
+
+
+class ComposedFinder(BaseFinder):
+    """
+    Composes multiple File Finders in some sort of virtual file system.
+
+    A ComposedFinder is initialized from a dictionary associating paths
+    to `*Finder instances.`
+
+    Note this could be optimized to be smarter than getting all the files
+    in advance.
+    """
+
+    def __init__(self, finders):
+        # Can't import globally, because of the dependency of mozpack.copier
+        # on this module.
+        from mozpack.copier import FileRegistry
+
+        self.files = FileRegistry()
+
+        for base, finder in sorted(six.iteritems(finders)):
+            if self.files.contains(base):
+                self.files.remove(base)
+            for p, f in finder.find(""):
+                self.files.add(mozpath.join(base, p), f)
+
+    def find(self, pattern):
+        for p in self.files.match(pattern):
+            yield p, self.files[p]
+
+
+class MercurialFile(BaseFile):
+    """File class for holding data from Mercurial."""
+
+    def __init__(self, client, rev, path):
+        self._content = client.cat(
+            [six.ensure_binary(path)], rev=six.ensure_binary(rev)
+        )
+
+    def open(self):
+        return BytesIO(six.ensure_binary(self._content))
+
+    def read(self):
+        return self._content
+
+
+class MercurialRevisionFinder(BaseFinder):
+    """A finder that operates on a specific Mercurial revision."""
+
+    def __init__(self, repo, rev=".", recognize_repo_paths=False, **kwargs):
+        """Create a finder attached to a specific revision in a repository.
+
+        If no revision is given, open the parent of the working directory.
+
+        ``recognize_repo_paths`` will enable a mode where ``.get()`` will
+        recognize full paths that include the repo's path. Typically Finder
+        instances are "bound" to a base directory and paths are relative to
+        that directory. This mode changes that. When this mode is activated,
+        ``.find()`` will not work! This mode exists to support the moz.build
+        reader, which uses absolute paths instead of relative paths. The reader
+        should eventually be rewritten to use relative paths and this hack
+        should be removed (TODO bug 1171069).
+        """
+        if not hglib:
+            raise Exception("hglib package not found")
+
+        super(MercurialRevisionFinder, self).__init__(base=repo, **kwargs)
+
+        self._root = mozpath.normpath(repo).rstrip("/")
+        self._recognize_repo_paths = recognize_repo_paths
+
+        # We change directories here otherwise we have to deal with relative
+        # paths.
+        oldcwd = os.getcwd()
+        os.chdir(self._root)
+        try:
+            self._client = hglib.open(path=repo, encoding=b"utf-8")
+        finally:
+            os.chdir(oldcwd)
+        self._rev = rev if rev is not None else "."
+        self._files = OrderedDict()
+
+        # Immediately populate the list of files in the repo since nearly every
+        # operation requires this list.
+        out = self._client.rawcommand(
+            [
+                b"files",
+                b"--rev",
+                six.ensure_binary(self._rev),
+            ]
+        )
+        for relpath in out.splitlines():
+            # Mercurial may use \ as path separator on Windows. So use
+            # normpath().
+            self._files[six.ensure_text(mozpath.normpath(relpath))] = None
+
+    def _find(self, pattern):
+        if self._recognize_repo_paths:
+            raise NotImplementedError("cannot use find with recognize_repo_path")
+
+        return self._find_helper(pattern, self._files, self._get)
+
+    def get(self, path):
+        path = mozpath.normpath(path)
+        if self._recognize_repo_paths:
+            if not path.startswith(self._root):
+                raise ValueError(
+                    "lookups in recognize_repo_paths mode must be "
+                    "prefixed with repo path: %s" % path
+                )
+            path = path[len(self._root) + 1 :]
+
+        try:
+            return self._get(path)
+        except KeyError:
+            return None
+
+    def _get(self, path):
+        # We lazy populate self._files because potentially creating tens of
+        # thousands of MercurialFile instances for every file in the repo is
+        # inefficient.
+        f = self._files[path]
+        if not f:
+            f = MercurialFile(self._client, self._rev, path)
+            self._files[path] = f
+
+        return f
+
+
+class FileListFinder(BaseFinder):
+    """Finder for a literal list of file names."""
+
+    def __init__(self, files):
+        """files must be a sorted list."""
+        self._files = files
+
+    @memoize
+    def _match(self, pattern):
+        """Return a sorted list of all files matching the given pattern."""
+        # We don't use the utility _find_helper method because it's not tuned
+        # for performance in the way that we would like this class to be. That's
+        # a possible avenue for refactoring here.
+        ret = []
+        # We do this as an optimization to figure out where in the sorted list
+        # to search and where to stop searching.
+        components = pattern.split("/")
+        prefix = "/".join(takewhile(lambda s: "*" not in s, components))
+        start = bisect.bisect_left(self._files, prefix)
+        for i in six.moves.range(start, len(self._files)):
+            f = self._files[i]
+            if not f.startswith(prefix):
+                break
+            # Skip hidden files while scanning.
+            if "/." in f[len(prefix) :]:
+                continue
+            if mozpath.match(f, pattern):
+                ret.append(f)
+        return ret
+
+    def find(self, pattern):
+        pattern = pattern.strip("/")
+        for path in self._match(pattern):
+            yield path, File(path)