diff options
Diffstat (limited to 'python/mozbuild/mozpack/files.py')
-rw-r--r-- | python/mozbuild/mozpack/files.py | 1271 |
1 files changed, 1271 insertions, 0 deletions
diff --git a/python/mozbuild/mozpack/files.py b/python/mozbuild/mozpack/files.py new file mode 100644 index 0000000000..691c248b02 --- /dev/null +++ b/python/mozbuild/mozpack/files.py @@ -0,0 +1,1271 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import bisect +import codecs +import errno +import inspect +import os +import platform +import shutil +import stat +import subprocess +import uuid +from collections import OrderedDict +from io import BytesIO +from itertools import chain, takewhile +from tarfile import TarFile, TarInfo +from tempfile import NamedTemporaryFile, mkstemp + +import six +from jsmin import JavascriptMinify + +import mozbuild.makeutil as makeutil +import mozpack.path as mozpath +from mozbuild.preprocessor import Preprocessor +from mozbuild.util import FileAvoidWrite, ensure_unicode, memoize +from mozpack.chrome.manifest import ManifestEntry, ManifestInterfaces +from mozpack.errors import ErrorMessage, errors +from mozpack.executables import elfhack, is_executable, may_elfhack, may_strip, strip +from mozpack.mozjar import JarReader + +try: + import hglib +except ImportError: + hglib = None + + +# For clean builds, copying files on win32 using CopyFile through ctypes is +# ~2x as fast as using shutil.copyfile. +if platform.system() != "Windows": + _copyfile = shutil.copyfile +else: + import ctypes + + _kernel32 = ctypes.windll.kernel32 + _CopyFileA = _kernel32.CopyFileA + _CopyFileW = _kernel32.CopyFileW + + def _copyfile(src, dest): + # False indicates `dest` should be overwritten if it exists already. + if isinstance(src, six.text_type) and isinstance(dest, six.text_type): + _CopyFileW(src, dest, False) + elif isinstance(src, str) and isinstance(dest, str): + _CopyFileA(src, dest, False) + else: + raise TypeError("mismatched path types!") + + +# Helper function; ensures we always open files with the correct encoding when +# opening them in text mode. +def _open(path, mode="r"): + if six.PY3 and "b" not in mode: + return open(path, mode, encoding="utf-8") + return open(path, mode) + + +class Dest(object): + """ + Helper interface for BaseFile.copy. The interface works as follows: + - read() and write() can be used to sequentially read/write from the underlying file. + - a call to read() after a write() will re-open the underlying file and read from it. + - a call to write() after a read() will re-open the underlying file, emptying it, and write to it. + """ + + def __init__(self, path): + self.file = None + self.mode = None + self.path = ensure_unicode(path) + + @property + def name(self): + return self.path + + def read(self, length=-1): + if self.mode != "r": + self.file = _open(self.path, mode="rb") + self.mode = "r" + return self.file.read(length) + + def write(self, data): + if self.mode != "w": + self.file = _open(self.path, mode="wb") + self.mode = "w" + to_write = six.ensure_binary(data) + return self.file.write(to_write) + + def exists(self): + return os.path.exists(self.path) + + def close(self): + if self.mode: + self.mode = None + self.file.close() + self.file = None + + +class BaseFile(object): + """ + Base interface and helper for file copying. Derived class may implement + their own copy function, or rely on BaseFile.copy using the open() member + function and/or the path property. + """ + + @staticmethod + def is_older(first, second): + """ + Compares the modification time of two files, and returns whether the + ``first`` file is older than the ``second`` file. + """ + # os.path.getmtime returns a result in seconds with precision up to + # the microsecond. But microsecond is too precise because + # shutil.copystat only copies milliseconds, and seconds is not + # enough precision. + return int(os.path.getmtime(first) * 1000) <= int( + os.path.getmtime(second) * 1000 + ) + + @staticmethod + def any_newer(dest, inputs): + """ + Compares the modification time of ``dest`` to multiple input files, and + returns whether any of the ``inputs`` is newer (has a later mtime) than + ``dest``. + """ + # os.path.getmtime returns a result in seconds with precision up to + # the microsecond. But microsecond is too precise because + # shutil.copystat only copies milliseconds, and seconds is not + # enough precision. + dest_mtime = int(os.path.getmtime(dest) * 1000) + for input in inputs: + try: + src_mtime = int(os.path.getmtime(input) * 1000) + except OSError as e: + if e.errno == errno.ENOENT: + # If an input file was removed, we should update. + return True + raise + if dest_mtime < src_mtime: + return True + return False + + @staticmethod + def normalize_mode(mode): + # Normalize file mode: + # - keep file type (e.g. S_IFREG) + ret = stat.S_IFMT(mode) + # - expand user read and execute permissions to everyone + if mode & 0o0400: + ret |= 0o0444 + if mode & 0o0100: + ret |= 0o0111 + # - keep user write permissions + if mode & 0o0200: + ret |= 0o0200 + # - leave away sticky bit, setuid, setgid + return ret + + def copy(self, dest, skip_if_older=True): + """ + Copy the BaseFile content to the destination given as a string or a + Dest instance. Avoids replacing existing files if the BaseFile content + matches that of the destination, or in case of plain files, if the + destination is newer than the original file. This latter behaviour is + disabled when skip_if_older is False. + Returns whether a copy was actually performed (True) or not (False). + """ + if isinstance(dest, six.string_types): + dest = Dest(dest) + else: + assert isinstance(dest, Dest) + + can_skip_content_check = False + if not dest.exists(): + can_skip_content_check = True + elif getattr(self, "path", None) and getattr(dest, "path", None): + if skip_if_older and BaseFile.is_older(self.path, dest.path): + return False + elif os.path.getsize(self.path) != os.path.getsize(dest.path): + can_skip_content_check = True + + if can_skip_content_check: + if getattr(self, "path", None) and getattr(dest, "path", None): + # The destination directory must exist, or CopyFile will fail. + destdir = os.path.dirname(dest.path) + try: + os.makedirs(destdir) + except OSError as e: + if e.errno != errno.EEXIST: + raise + _copyfile(self.path, dest.path) + shutil.copystat(self.path, dest.path) + else: + # Ensure the file is always created + if not dest.exists(): + dest.write(b"") + shutil.copyfileobj(self.open(), dest) + return True + + src = self.open() + accumulated_src_content = [] + while True: + dest_content = dest.read(32768) + src_content = src.read(32768) + accumulated_src_content.append(src_content) + if len(dest_content) == len(src_content) == 0: + break + # If the read content differs between origin and destination, + # write what was read up to now, and copy the remainder. + if six.ensure_binary(dest_content) != six.ensure_binary(src_content): + dest.write(b"".join(accumulated_src_content)) + shutil.copyfileobj(src, dest) + break + if hasattr(self, "path") and hasattr(dest, "path"): + shutil.copystat(self.path, dest.path) + return True + + def open(self): + """ + Return a file-like object allowing to read() the content of the + associated file. This is meant to be overloaded in subclasses to return + a custom file-like object. + """ + assert self.path is not None + return open(self.path, "rb") + + def read(self): + raise NotImplementedError("BaseFile.read() not implemented. Bug 1170329.") + + def size(self): + """Returns size of the entry. + + Derived classes are highly encouraged to override this with a more + optimal implementation. + """ + return len(self.read()) + + @property + def mode(self): + """ + Return the file's unix mode, or None if it has no meaning. + """ + return None + + def inputs(self): + """ + Return an iterable of the input file paths that impact this output file. + """ + raise NotImplementedError("BaseFile.inputs() not implemented.") + + +class File(BaseFile): + """ + File class for plain files. + """ + + def __init__(self, path): + self.path = ensure_unicode(path) + + @property + def mode(self): + """ + Return the file's unix mode, as returned by os.stat().st_mode. + """ + if platform.system() == "Windows": + return None + assert self.path is not None + mode = os.stat(self.path).st_mode + return self.normalize_mode(mode) + + def read(self): + """Return the contents of the file.""" + with open(self.path, "rb") as fh: + return fh.read() + + def size(self): + return os.stat(self.path).st_size + + def inputs(self): + return (self.path,) + + +class ExecutableFile(File): + """ + File class for executable and library files on OS/2, OS/X and ELF systems. + (see mozpack.executables.is_executable documentation). + """ + + def __init__(self, path): + File.__init__(self, path) + + def copy(self, dest, skip_if_older=True): + real_dest = dest + if not isinstance(dest, six.string_types): + fd, dest = mkstemp() + os.close(fd) + os.remove(dest) + assert isinstance(dest, six.string_types) + # If File.copy didn't actually copy because dest is newer, check the + # file sizes. If dest is smaller, it means it is already stripped and + # elfhacked, so we can skip. + if not File.copy(self, dest, skip_if_older) and os.path.getsize( + self.path + ) > os.path.getsize(dest): + return False + try: + if may_strip(dest): + strip(dest) + if may_elfhack(dest): + elfhack(dest) + except ErrorMessage: + os.remove(dest) + raise + + if real_dest != dest: + f = File(dest) + ret = f.copy(real_dest, skip_if_older) + os.remove(dest) + return ret + return True + + +class AbsoluteSymlinkFile(File): + """File class that is copied by symlinking (if available). + + This class only works if the target path is absolute. + """ + + def __init__(self, path): + if not os.path.isabs(path): + raise ValueError("Symlink target not absolute: %s" % path) + + File.__init__(self, path) + + def copy(self, dest, skip_if_older=True): + assert isinstance(dest, six.string_types) + + # The logic in this function is complicated by the fact that symlinks + # aren't universally supported. So, where symlinks aren't supported, we + # fall back to file copying. Keep in mind that symlink support is + # per-filesystem, not per-OS. + + # Handle the simple case where symlinks are definitely not supported by + # falling back to file copy. + if not hasattr(os, "symlink"): + return File.copy(self, dest, skip_if_older=skip_if_older) + + # Always verify the symlink target path exists. + if not os.path.exists(self.path): + errors.fatal("Symlink target path does not exist: %s" % self.path) + + st = None + + try: + st = os.lstat(dest) + except OSError as ose: + if ose.errno != errno.ENOENT: + raise + + # If the dest is a symlink pointing to us, we have nothing to do. + # If it's the wrong symlink, the filesystem must support symlinks, + # so we replace with a proper symlink. + if st and stat.S_ISLNK(st.st_mode): + link = os.readlink(dest) + if link == self.path: + return False + + os.remove(dest) + os.symlink(self.path, dest) + return True + + # If the destination doesn't exist, we try to create a symlink. If that + # fails, we fall back to copy code. + if not st: + try: + os.symlink(self.path, dest) + return True + except OSError: + return File.copy(self, dest, skip_if_older=skip_if_older) + + # Now the complicated part. If the destination exists, we could be + # replacing a file with a symlink. Or, the filesystem may not support + # symlinks. We want to minimize I/O overhead for performance reasons, + # so we keep the existing destination file around as long as possible. + # A lot of the system calls would be eliminated if we cached whether + # symlinks are supported. However, even if we performed a single + # up-front test of whether the root of the destination directory + # supports symlinks, there's no guarantee that all operations for that + # dest (or source) would be on the same filesystem and would support + # symlinks. + # + # Our strategy is to attempt to create a new symlink with a random + # name. If that fails, we fall back to copy mode. If that works, we + # remove the old destination and move the newly-created symlink into + # its place. + + temp_dest = os.path.join(os.path.dirname(dest), str(uuid.uuid4())) + try: + os.symlink(self.path, temp_dest) + # TODO Figure out exactly how symlink creation fails and only trap + # that. + except EnvironmentError: + return File.copy(self, dest, skip_if_older=skip_if_older) + + # If removing the original file fails, don't forget to clean up the + # temporary symlink. + try: + os.remove(dest) + except EnvironmentError: + os.remove(temp_dest) + raise + + os.rename(temp_dest, dest) + return True + + +class HardlinkFile(File): + """File class that is copied by hard linking (if available) + + This is similar to the AbsoluteSymlinkFile, but with hard links. The symlink + implementation requires paths to be absolute, because they are resolved at + read time, which makes relative paths messy. Hard links resolve paths at + link-creation time, so relative paths are fine. + """ + + def copy(self, dest, skip_if_older=True): + assert isinstance(dest, six.string_types) + + if not hasattr(os, "link"): + return super(HardlinkFile, self).copy(dest, skip_if_older=skip_if_older) + + try: + path_st = os.stat(self.path) + except OSError as e: + if e.errno == errno.ENOENT: + errors.fatal("Hard link target path does not exist: %s" % self.path) + else: + raise + + st = None + try: + st = os.lstat(dest) + except OSError as e: + if e.errno != errno.ENOENT: + raise + + if st: + # The dest already points to the right place. + if st.st_dev == path_st.st_dev and st.st_ino == path_st.st_ino: + return False + # The dest exists and it points to the wrong place + os.remove(dest) + + # At this point, either the dest used to exist and we just deleted it, + # or it never existed. We can now safely create the hard link. + try: + os.link(self.path, dest) + except OSError: + # If we can't hard link, fall back to copying + return super(HardlinkFile, self).copy(dest, skip_if_older=skip_if_older) + return True + + +class ExistingFile(BaseFile): + """ + File class that represents a file that may exist but whose content comes + from elsewhere. + + This purpose of this class is to account for files that are installed via + external means. It is typically only used in manifests or in registries to + account for files. + + When asked to copy, this class does nothing because nothing is known about + the source file/data. + + Instances of this class come in two flavors: required and optional. If an + existing file is required, it must exist during copy() or an error is + raised. + """ + + def __init__(self, required): + self.required = required + + def copy(self, dest, skip_if_older=True): + if isinstance(dest, six.string_types): + dest = Dest(dest) + else: + assert isinstance(dest, Dest) + + if not self.required: + return + + if not dest.exists(): + errors.fatal("Required existing file doesn't exist: %s" % dest.path) + + def inputs(self): + return () + + +class PreprocessedFile(BaseFile): + """ + File class for a file that is preprocessed. PreprocessedFile.copy() runs + the preprocessor on the file to create the output. + """ + + def __init__( + self, + path, + depfile_path, + marker, + defines, + extra_depends=None, + silence_missing_directive_warnings=False, + ): + self.path = ensure_unicode(path) + self.depfile = ensure_unicode(depfile_path) + self.marker = marker + self.defines = defines + self.extra_depends = list(extra_depends or []) + self.silence_missing_directive_warnings = silence_missing_directive_warnings + + def inputs(self): + pp = Preprocessor(defines=self.defines, marker=self.marker) + pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings) + + with _open(self.path, "r") as input: + with _open(os.devnull, "w") as output: + pp.processFile(input=input, output=output) + + # This always yields at least self.path. + return pp.includes + + def copy(self, dest, skip_if_older=True): + """ + Invokes the preprocessor to create the destination file. + """ + if isinstance(dest, six.string_types): + dest = Dest(dest) + else: + assert isinstance(dest, Dest) + + # We have to account for the case where the destination exists and is a + # symlink to something. Since we know the preprocessor is certainly not + # going to create a symlink, we can just remove the existing one. If the + # destination is not a symlink, we leave it alone, since we're going to + # overwrite its contents anyway. + # If symlinks aren't supported at all, we can skip this step. + # See comment in AbsoluteSymlinkFile about Windows. + if hasattr(os, "symlink") and platform.system() != "Windows": + if os.path.islink(dest.path): + os.remove(dest.path) + + pp_deps = set(self.extra_depends) + + # If a dependency file was specified, and it exists, add any + # dependencies from that file to our list. + if self.depfile and os.path.exists(self.depfile): + target = mozpath.normpath(dest.name) + with _open(self.depfile, "rt") as fileobj: + for rule in makeutil.read_dep_makefile(fileobj): + if target in rule.targets(): + pp_deps.update(rule.dependencies()) + + skip = False + if dest.exists() and skip_if_older: + # If a dependency file was specified, and it doesn't exist, + # assume that the preprocessor needs to be rerun. That will + # regenerate the dependency file. + if self.depfile and not os.path.exists(self.depfile): + skip = False + else: + skip = not BaseFile.any_newer(dest.path, pp_deps) + + if skip: + return False + + deps_out = None + if self.depfile: + deps_out = FileAvoidWrite(self.depfile) + pp = Preprocessor(defines=self.defines, marker=self.marker) + pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings) + + with _open(self.path, "r") as input: + pp.processFile(input=input, output=dest, depfile=deps_out) + + dest.close() + if self.depfile: + deps_out.close() + + return True + + +class GeneratedFile(BaseFile): + """ + File class for content with no previous existence on the filesystem. + """ + + def __init__(self, content): + self._content = content + + @property + def content(self): + if inspect.isfunction(self._content): + self._content = self._content() + return six.ensure_binary(self._content) + + @content.setter + def content(self, content): + self._content = content + + def open(self): + return BytesIO(self.content) + + def read(self): + return self.content + + def size(self): + return len(self.content) + + def inputs(self): + return () + + +class DeflatedFile(BaseFile): + """ + File class for members of a jar archive. DeflatedFile.copy() effectively + extracts the file from the jar archive. + """ + + def __init__(self, file): + from mozpack.mozjar import JarFileReader + + assert isinstance(file, JarFileReader) + self.file = file + + def open(self): + self.file.seek(0) + return self.file + + +class ExtractedTarFile(GeneratedFile): + """ + File class for members of a tar archive. Contents of the underlying file + are extracted immediately and stored in memory. + """ + + def __init__(self, tar, info): + assert isinstance(info, TarInfo) + assert isinstance(tar, TarFile) + GeneratedFile.__init__(self, tar.extractfile(info).read()) + self._unix_mode = self.normalize_mode(info.mode) + + @property + def mode(self): + return self._unix_mode + + def read(self): + return self.content + + +class ManifestFile(BaseFile): + """ + File class for a manifest file. It takes individual manifest entries (using + the add() and remove() member functions), and adjusts them to be relative + to the base path for the manifest, given at creation. + Example: + There is a manifest entry "content foobar foobar/content/" relative + to "foobar/chrome". When packaging, the entry will be stored in + jar:foobar/omni.ja!/chrome/chrome.manifest, which means the entry + will have to be relative to "chrome" instead of "foobar/chrome". This + doesn't really matter when serializing the entry, since this base path + is not written out, but it matters when moving the entry at the same + time, e.g. to jar:foobar/omni.ja!/chrome.manifest, which we don't do + currently but could in the future. + """ + + def __init__(self, base, entries=None): + self._base = base + self._entries = [] + self._interfaces = [] + for e in entries or []: + self.add(e) + + def add(self, entry): + """ + Add the given entry to the manifest. Entries are rebased at open() time + instead of add() time so that they can be more easily remove()d. + """ + assert isinstance(entry, ManifestEntry) + if isinstance(entry, ManifestInterfaces): + self._interfaces.append(entry) + else: + self._entries.append(entry) + + def remove(self, entry): + """ + Remove the given entry from the manifest. + """ + assert isinstance(entry, ManifestEntry) + if isinstance(entry, ManifestInterfaces): + self._interfaces.remove(entry) + else: + self._entries.remove(entry) + + def open(self): + """ + Return a file-like object allowing to read() the serialized content of + the manifest. + """ + content = "".join( + "%s\n" % e.rebase(self._base) + for e in chain(self._entries, self._interfaces) + ) + return BytesIO(six.ensure_binary(content)) + + def __iter__(self): + """ + Iterate over entries in the manifest file. + """ + return chain(self._entries, self._interfaces) + + def isempty(self): + """ + Return whether there are manifest entries to write + """ + return len(self._entries) + len(self._interfaces) == 0 + + +class MinifiedCommentStripped(BaseFile): + """ + File class for content minified by stripping comments. This wraps around a + BaseFile instance, and removes lines starting with a # from its content. + """ + + def __init__(self, file): + assert isinstance(file, BaseFile) + self._file = file + + def open(self): + """ + Return a file-like object allowing to read() the minified content of + the underlying file. + """ + content = "".join( + l + for l in [six.ensure_text(s) for s in self._file.open().readlines()] + if not l.startswith("#") + ) + return BytesIO(six.ensure_binary(content)) + + +class MinifiedJavaScript(BaseFile): + """ + File class for minifying JavaScript files. + """ + + def __init__(self, file, verify_command=None): + assert isinstance(file, BaseFile) + self._file = file + self._verify_command = verify_command + + def open(self): + output = six.StringIO() + minify = JavascriptMinify( + codecs.getreader("utf-8")(self._file.open()), output, quote_chars="'\"`" + ) + minify.minify() + output.seek(0) + output_source = six.ensure_binary(output.getvalue()) + output = BytesIO(output_source) + + if not self._verify_command: + return output + + input_source = self._file.open().read() + + with NamedTemporaryFile("wb+") as fh1, NamedTemporaryFile("wb+") as fh2: + fh1.write(input_source) + fh2.write(output_source) + fh1.flush() + fh2.flush() + + try: + args = list(self._verify_command) + args.extend([fh1.name, fh2.name]) + subprocess.check_output( + args, stderr=subprocess.STDOUT, universal_newlines=True + ) + except subprocess.CalledProcessError as e: + errors.warn( + "JS minification verification failed for %s:" + % (getattr(self._file, "path", "<unknown>")) + ) + # Prefix each line with "Warning:" so mozharness doesn't + # think these error messages are real errors. + for line in e.output.splitlines(): + errors.warn(line) + + return self._file.open() + + return output + + +class BaseFinder(object): + def __init__( + self, base, minify=False, minify_js=False, minify_js_verify_command=None + ): + """ + Initializes the instance with a reference base directory. + + The optional minify argument specifies whether minification of code + should occur. minify_js is an additional option to control minification + of JavaScript. It requires minify to be True. + + minify_js_verify_command can be used to optionally verify the results + of JavaScript minification. If defined, it is expected to be an iterable + that will constitute the first arguments to a called process which will + receive the filenames of the original and minified JavaScript files. + The invoked process can then verify the results. If minification is + rejected, the process exits with a non-0 exit code and the original + JavaScript source is used. An example value for this argument is + ('/path/to/js', '/path/to/verify/script.js'). + """ + if minify_js and not minify: + raise ValueError("minify_js requires minify.") + + self.base = base + self._minify = minify + self._minify_js = minify_js + self._minify_js_verify_command = minify_js_verify_command + + def find(self, pattern): + """ + Yield path, BaseFile_instance pairs for all files under the base + directory and its subdirectories that match the given pattern. See the + mozpack.path.match documentation for a description of the handled + patterns. + """ + while pattern.startswith("/"): + pattern = pattern[1:] + for p, f in self._find(pattern): + yield p, self._minify_file(p, f) + + def get(self, path): + """Obtain a single file. + + Where ``find`` is tailored towards matching multiple files, this method + is used for retrieving a single file. Use this method when performance + is critical. + + Returns a ``BaseFile`` if at most one file exists or ``None`` otherwise. + """ + files = list(self.find(path)) + if len(files) != 1: + return None + return files[0][1] + + def __iter__(self): + """ + Iterates over all files under the base directory (excluding files + starting with a '.' and files at any level under a directory starting + with a '.'). + for path, file in finder: + ... + """ + return self.find("") + + def __contains__(self, pattern): + raise RuntimeError( + "'in' operator forbidden for %s. Use contains()." % self.__class__.__name__ + ) + + def contains(self, pattern): + """ + Return whether some files under the base directory match the given + pattern. See the mozpack.path.match documentation for a description of + the handled patterns. + """ + return any(self.find(pattern)) + + def _minify_file(self, path, file): + """ + Return an appropriate MinifiedSomething wrapper for the given BaseFile + instance (file), according to the file type (determined by the given + path), if the FileFinder was created with minification enabled. + Otherwise, just return the given BaseFile instance. + """ + if not self._minify or isinstance(file, ExecutableFile): + return file + + if path.endswith((".ftl", ".properties")): + return MinifiedCommentStripped(file) + + if self._minify_js and path.endswith((".js", ".jsm")): + return MinifiedJavaScript(file, self._minify_js_verify_command) + + return file + + def _find_helper(self, pattern, files, file_getter): + """Generic implementation of _find. + + A few *Finder implementations share logic for returning results. + This function implements the custom logic. + + The ``file_getter`` argument is a callable that receives a path + that is known to exist. The callable should return a ``BaseFile`` + instance. + """ + if "*" in pattern: + for p in files: + if mozpath.match(p, pattern): + yield p, file_getter(p) + elif pattern == "": + for p in files: + yield p, file_getter(p) + elif pattern in files: + yield pattern, file_getter(pattern) + else: + for p in files: + if mozpath.basedir(p, [pattern]) == pattern: + yield p, file_getter(p) + + +class FileFinder(BaseFinder): + """ + Helper to get appropriate BaseFile instances from the file system. + """ + + def __init__( + self, + base, + find_executables=False, + ignore=(), + ignore_broken_symlinks=False, + find_dotfiles=False, + **kargs + ): + """ + Create a FileFinder for files under the given base directory. + + The find_executables argument determines whether the finder needs to + try to guess whether files are executables. Disabling this guessing + when not necessary can speed up the finder significantly. + + ``ignore`` accepts an iterable of patterns to ignore. Entries are + strings that match paths relative to ``base`` using + ``mozpath.match()``. This means if an entry corresponds + to a directory, all files under that directory will be ignored. If + an entry corresponds to a file, that particular file will be ignored. + ``ignore_broken_symlinks`` is passed by the packager to work around an + issue with the build system not cleaning up stale files in some common + cases. See bug 1297381. + """ + BaseFinder.__init__(self, base, **kargs) + self.find_dotfiles = find_dotfiles + self.find_executables = find_executables + self.ignore = ignore + self.ignore_broken_symlinks = ignore_broken_symlinks + + def _find(self, pattern): + """ + Actual implementation of FileFinder.find(), dispatching to specialized + member functions depending on what kind of pattern was given. + Note all files with a name starting with a '.' are ignored when + scanning directories, but are not ignored when explicitely requested. + """ + if "*" in pattern: + return self._find_glob("", mozpath.split(pattern)) + elif os.path.isdir(os.path.join(self.base, pattern)): + return self._find_dir(pattern) + else: + f = self.get(pattern) + return ((pattern, f),) if f else () + + def _find_dir(self, path): + """ + Actual implementation of FileFinder.find() when the given pattern + corresponds to an existing directory under the base directory. + Ignores file names starting with a '.' under the given path. If the + path itself has leafs starting with a '.', they are not ignored. + """ + for p in self.ignore: + if mozpath.match(path, p): + return + + # The sorted makes the output idempotent. Otherwise, we are + # likely dependent on filesystem implementation details, such as + # inode ordering. + for p in sorted(os.listdir(os.path.join(self.base, path))): + if p.startswith("."): + if p in (".", ".."): + continue + if not self.find_dotfiles: + continue + for p_, f in self._find(mozpath.join(path, p)): + yield p_, f + + def get(self, path): + srcpath = os.path.join(self.base, path) + if not os.path.lexists(srcpath): + return None + + if self.ignore_broken_symlinks and not os.path.exists(srcpath): + return None + + for p in self.ignore: + if mozpath.match(path, p): + return None + + if self.find_executables and is_executable(srcpath): + return ExecutableFile(srcpath) + else: + return File(srcpath) + + def _find_glob(self, base, pattern): + """ + Actual implementation of FileFinder.find() when the given pattern + contains globbing patterns ('*' or '**'). This is meant to be an + equivalent of: + for p, f in self: + if mozpath.match(p, pattern): + yield p, f + but avoids scanning the entire tree. + """ + if not pattern: + for p, f in self._find(base): + yield p, f + elif pattern[0] == "**": + for p, f in self._find(base): + if mozpath.match(p, mozpath.join(*pattern)): + yield p, f + elif "*" in pattern[0]: + if not os.path.exists(os.path.join(self.base, base)): + return + + for p in self.ignore: + if mozpath.match(base, p): + return + + # See above comment w.r.t. sorted() and idempotent behavior. + for p in sorted(os.listdir(os.path.join(self.base, base))): + if p.startswith(".") and not pattern[0].startswith("."): + continue + if mozpath.match(p, pattern[0]): + for p_, f in self._find_glob(mozpath.join(base, p), pattern[1:]): + yield p_, f + else: + for p, f in self._find_glob(mozpath.join(base, pattern[0]), pattern[1:]): + yield p, f + + +class JarFinder(BaseFinder): + """ + Helper to get appropriate DeflatedFile instances from a JarReader. + """ + + def __init__(self, base, reader, **kargs): + """ + Create a JarFinder for files in the given JarReader. The base argument + is used as an indication of the Jar file location. + """ + assert isinstance(reader, JarReader) + BaseFinder.__init__(self, base, **kargs) + self._files = OrderedDict((f.filename, f) for f in reader) + + def _find(self, pattern): + """ + Actual implementation of JarFinder.find(), dispatching to specialized + member functions depending on what kind of pattern was given. + """ + return self._find_helper( + pattern, self._files, lambda x: DeflatedFile(self._files[x]) + ) + + +class TarFinder(BaseFinder): + """ + Helper to get files from a TarFile. + """ + + def __init__(self, base, tar, **kargs): + """ + Create a TarFinder for files in the given TarFile. The base argument + is used as an indication of the Tar file location. + """ + assert isinstance(tar, TarFile) + self._tar = tar + BaseFinder.__init__(self, base, **kargs) + self._files = OrderedDict((f.name, f) for f in tar if f.isfile()) + + def _find(self, pattern): + """ + Actual implementation of TarFinder.find(), dispatching to specialized + member functions depending on what kind of pattern was given. + """ + return self._find_helper( + pattern, self._files, lambda x: ExtractedTarFile(self._tar, self._files[x]) + ) + + +class ComposedFinder(BaseFinder): + """ + Composes multiple File Finders in some sort of virtual file system. + + A ComposedFinder is initialized from a dictionary associating paths + to `*Finder instances.` + + Note this could be optimized to be smarter than getting all the files + in advance. + """ + + def __init__(self, finders): + # Can't import globally, because of the dependency of mozpack.copier + # on this module. + from mozpack.copier import FileRegistry + + self.files = FileRegistry() + + for base, finder in sorted(six.iteritems(finders)): + if self.files.contains(base): + self.files.remove(base) + for p, f in finder.find(""): + self.files.add(mozpath.join(base, p), f) + + def find(self, pattern): + for p in self.files.match(pattern): + yield p, self.files[p] + + +class MercurialFile(BaseFile): + """File class for holding data from Mercurial.""" + + def __init__(self, client, rev, path): + self._content = client.cat( + [six.ensure_binary(path)], rev=six.ensure_binary(rev) + ) + + def open(self): + return BytesIO(six.ensure_binary(self._content)) + + def read(self): + return self._content + + +class MercurialRevisionFinder(BaseFinder): + """A finder that operates on a specific Mercurial revision.""" + + def __init__(self, repo, rev=".", recognize_repo_paths=False, **kwargs): + """Create a finder attached to a specific revision in a repository. + + If no revision is given, open the parent of the working directory. + + ``recognize_repo_paths`` will enable a mode where ``.get()`` will + recognize full paths that include the repo's path. Typically Finder + instances are "bound" to a base directory and paths are relative to + that directory. This mode changes that. When this mode is activated, + ``.find()`` will not work! This mode exists to support the moz.build + reader, which uses absolute paths instead of relative paths. The reader + should eventually be rewritten to use relative paths and this hack + should be removed (TODO bug 1171069). + """ + if not hglib: + raise Exception("hglib package not found") + + super(MercurialRevisionFinder, self).__init__(base=repo, **kwargs) + + self._root = mozpath.normpath(repo).rstrip("/") + self._recognize_repo_paths = recognize_repo_paths + + # We change directories here otherwise we have to deal with relative + # paths. + oldcwd = os.getcwd() + os.chdir(self._root) + try: + self._client = hglib.open(path=repo, encoding=b"utf-8") + finally: + os.chdir(oldcwd) + self._rev = rev if rev is not None else "." + self._files = OrderedDict() + + # Immediately populate the list of files in the repo since nearly every + # operation requires this list. + out = self._client.rawcommand( + [ + b"files", + b"--rev", + six.ensure_binary(self._rev), + ] + ) + for relpath in out.splitlines(): + # Mercurial may use \ as path separator on Windows. So use + # normpath(). + self._files[six.ensure_text(mozpath.normpath(relpath))] = None + + def _find(self, pattern): + if self._recognize_repo_paths: + raise NotImplementedError("cannot use find with recognize_repo_path") + + return self._find_helper(pattern, self._files, self._get) + + def get(self, path): + path = mozpath.normpath(path) + if self._recognize_repo_paths: + if not path.startswith(self._root): + raise ValueError( + "lookups in recognize_repo_paths mode must be " + "prefixed with repo path: %s" % path + ) + path = path[len(self._root) + 1 :] + + try: + return self._get(path) + except KeyError: + return None + + def _get(self, path): + # We lazy populate self._files because potentially creating tens of + # thousands of MercurialFile instances for every file in the repo is + # inefficient. + f = self._files[path] + if not f: + f = MercurialFile(self._client, self._rev, path) + self._files[path] = f + + return f + + +class FileListFinder(BaseFinder): + """Finder for a literal list of file names.""" + + def __init__(self, files): + """files must be a sorted list.""" + self._files = files + + @memoize + def _match(self, pattern): + """Return a sorted list of all files matching the given pattern.""" + # We don't use the utility _find_helper method because it's not tuned + # for performance in the way that we would like this class to be. That's + # a possible avenue for refactoring here. + ret = [] + # We do this as an optimization to figure out where in the sorted list + # to search and where to stop searching. + components = pattern.split("/") + prefix = "/".join(takewhile(lambda s: "*" not in s, components)) + start = bisect.bisect_left(self._files, prefix) + for i in six.moves.range(start, len(self._files)): + f = self._files[i] + if not f.startswith(prefix): + break + # Skip hidden files while scanning. + if "/." in f[len(prefix) :]: + continue + if mozpath.match(f, pattern): + ret.append(f) + return ret + + def find(self, pattern): + pattern = pattern.strip("/") + for path in self._match(pattern): + yield path, File(path) |