summaryrefslogtreecommitdiffstats
path: root/python/mozbuild/mozpack/copier.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--python/mozbuild/mozpack/copier.py604
1 files changed, 604 insertions, 0 deletions
diff --git a/python/mozbuild/mozpack/copier.py b/python/mozbuild/mozpack/copier.py
new file mode 100644
index 0000000000..bf222abbca
--- /dev/null
+++ b/python/mozbuild/mozpack/copier.py
@@ -0,0 +1,604 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import concurrent.futures as futures
+import errno
+import os
+import stat
+import sys
+from collections import Counter, OrderedDict, defaultdict
+
+import mozpack.path as mozpath
+import six
+from mozpack.errors import errors
+from mozpack.files import BaseFile, Dest
+
+
+class FileRegistry(object):
+ """
+ Generic container to keep track of a set of BaseFile instances. It
+ preserves the order under which the files are added, but doesn't keep
+ track of empty directories (directories are not stored at all).
+ The paths associated with the BaseFile instances are relative to an
+ unspecified (virtual) root directory.
+
+ registry = FileRegistry()
+ registry.add('foo/bar', file_instance)
+ """
+
+ def __init__(self):
+ self._files = OrderedDict()
+ self._required_directories = Counter()
+ self._partial_paths_cache = {}
+
+ def _partial_paths(self, path):
+ """
+ Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
+ """
+ dir_name = path.rpartition("/")[0]
+ if not dir_name:
+ return []
+
+ partial_paths = self._partial_paths_cache.get(dir_name)
+ if partial_paths:
+ return partial_paths
+
+ partial_paths = [dir_name] + self._partial_paths(dir_name)
+
+ self._partial_paths_cache[dir_name] = partial_paths
+ return partial_paths
+
+ def add(self, path, content):
+ """
+ Add a BaseFile instance to the container, under the given path.
+ """
+ assert isinstance(content, BaseFile)
+ if path in self._files:
+ return errors.error("%s already added" % path)
+ if self._required_directories[path] > 0:
+ return errors.error("Can't add %s: it is a required directory" % path)
+ # Check whether any parent of the given path is already stored
+ partial_paths = self._partial_paths(path)
+ for partial_path in partial_paths:
+ if partial_path in self._files:
+ return errors.error("Can't add %s: %s is a file" % (path, partial_path))
+ self._files[path] = content
+ self._required_directories.update(partial_paths)
+
+ def match(self, pattern):
+ """
+ Return the list of paths, stored in the container, matching the
+ given pattern. See the mozpack.path.match documentation for a
+ description of the handled patterns.
+ """
+ if "*" in pattern:
+ return [p for p in self.paths() if mozpath.match(p, pattern)]
+ if pattern == "":
+ return self.paths()
+ if pattern in self._files:
+ return [pattern]
+ return [p for p in self.paths() if mozpath.basedir(p, [pattern]) == pattern]
+
+ def remove(self, pattern):
+ """
+ Remove paths matching the given pattern from the container. See the
+ mozpack.path.match documentation for a description of the handled
+ patterns.
+ """
+ items = self.match(pattern)
+ if not items:
+ return errors.error(
+ "Can't remove %s: %s"
+ % (pattern, "not matching anything previously added")
+ )
+ for i in items:
+ del self._files[i]
+ self._required_directories.subtract(self._partial_paths(i))
+
+ def paths(self):
+ """
+ Return all paths stored in the container, in the order they were added.
+ """
+ return list(self._files)
+
+ def __len__(self):
+ """
+ Return number of paths stored in the container.
+ """
+ return len(self._files)
+
+ def __contains__(self, pattern):
+ raise RuntimeError(
+ "'in' operator forbidden for %s. Use contains()." % self.__class__.__name__
+ )
+
+ def contains(self, pattern):
+ """
+ Return whether the container contains paths matching the given
+ pattern. See the mozpack.path.match documentation for a description of
+ the handled patterns.
+ """
+ return len(self.match(pattern)) > 0
+
+ def __getitem__(self, path):
+ """
+ Return the BaseFile instance stored in the container for the given
+ path.
+ """
+ return self._files[path]
+
+ def __iter__(self):
+ """
+ Iterate over all (path, BaseFile instance) pairs from the container.
+ for path, file in registry:
+ (...)
+ """
+ return six.iteritems(self._files)
+
+ def required_directories(self):
+ """
+ Return the set of directories required by the paths in the container,
+ in no particular order. The returned directories are relative to an
+ unspecified (virtual) root directory (and do not include said root
+ directory).
+ """
+ return set(k for k, v in self._required_directories.items() if v > 0)
+
+ def output_to_inputs_tree(self):
+ """
+ Return a dictionary mapping each output path to the set of its
+ required input paths.
+
+ All paths are normalized.
+ """
+ tree = {}
+ for output, file in self:
+ output = mozpath.normpath(output)
+ tree[output] = set(mozpath.normpath(f) for f in file.inputs())
+ return tree
+
+ def input_to_outputs_tree(self):
+ """
+ Return a dictionary mapping each input path to the set of
+ impacted output paths.
+
+ All paths are normalized.
+ """
+ tree = defaultdict(set)
+ for output, file in self:
+ output = mozpath.normpath(output)
+ for input in file.inputs():
+ input = mozpath.normpath(input)
+ tree[input].add(output)
+ return dict(tree)
+
+
+class FileRegistrySubtree(object):
+ """A proxy class to give access to a subtree of an existing FileRegistry.
+
+ Note this doesn't implement the whole FileRegistry interface."""
+
+ def __new__(cls, base, registry):
+ if not base:
+ return registry
+ return object.__new__(cls)
+
+ def __init__(self, base, registry):
+ self._base = base
+ self._registry = registry
+
+ def _get_path(self, path):
+ # mozpath.join will return a trailing slash if path is empty, and we
+ # don't want that.
+ return mozpath.join(self._base, path) if path else self._base
+
+ def add(self, path, content):
+ return self._registry.add(self._get_path(path), content)
+
+ def match(self, pattern):
+ return [
+ mozpath.relpath(p, self._base)
+ for p in self._registry.match(self._get_path(pattern))
+ ]
+
+ def remove(self, pattern):
+ return self._registry.remove(self._get_path(pattern))
+
+ def paths(self):
+ return [p for p, f in self]
+
+ def __len__(self):
+ return len(self.paths())
+
+ def contains(self, pattern):
+ return self._registry.contains(self._get_path(pattern))
+
+ def __getitem__(self, path):
+ return self._registry[self._get_path(path)]
+
+ def __iter__(self):
+ for p, f in self._registry:
+ if mozpath.basedir(p, [self._base]):
+ yield mozpath.relpath(p, self._base), f
+
+
+class FileCopyResult(object):
+ """Represents results of a FileCopier.copy operation."""
+
+ def __init__(self):
+ self.updated_files = set()
+ self.existing_files = set()
+ self.removed_files = set()
+ self.removed_directories = set()
+
+ @property
+ def updated_files_count(self):
+ return len(self.updated_files)
+
+ @property
+ def existing_files_count(self):
+ return len(self.existing_files)
+
+ @property
+ def removed_files_count(self):
+ return len(self.removed_files)
+
+ @property
+ def removed_directories_count(self):
+ return len(self.removed_directories)
+
+
+class FileCopier(FileRegistry):
+ """
+ FileRegistry with the ability to copy the registered files to a separate
+ directory.
+ """
+
+ def copy(
+ self,
+ destination,
+ skip_if_older=True,
+ remove_unaccounted=True,
+ remove_all_directory_symlinks=True,
+ remove_empty_directories=True,
+ ):
+ """
+ Copy all registered files to the given destination path. The given
+ destination can be an existing directory, or not exist at all. It
+ can't be e.g. a file.
+ The copy process acts a bit like rsync: files are not copied when they
+ don't need to (see mozpack.files for details on file.copy).
+
+ By default, files in the destination directory that aren't
+ registered are removed and empty directories are deleted. In
+ addition, all directory symlinks in the destination directory
+ are deleted: this is a conservative approach to ensure that we
+ never accidently write files into a directory that is not the
+ destination directory. In the worst case, we might have a
+ directory symlink in the object directory to the source
+ directory.
+
+ To disable removing of unregistered files, pass
+ remove_unaccounted=False. To disable removing empty
+ directories, pass remove_empty_directories=False. In rare
+ cases, you might want to maintain directory symlinks in the
+ destination directory (at least those that are not required to
+ be regular directories): pass
+ remove_all_directory_symlinks=False. Exercise caution with
+ this flag: you almost certainly do not want to preserve
+ directory symlinks.
+
+ Returns a FileCopyResult that details what changed.
+ """
+ assert isinstance(destination, six.string_types)
+ assert not os.path.exists(destination) or os.path.isdir(destination)
+
+ result = FileCopyResult()
+ have_symlinks = hasattr(os, "symlink")
+ destination = os.path.normpath(destination)
+
+ # We create the destination directory specially. We can't do this as
+ # part of the loop doing mkdir() below because that loop munges
+ # symlinks and permissions and parent directories of the destination
+ # directory may have their own weird schema. The contract is we only
+ # manage children of destination, not its parents.
+ try:
+ os.makedirs(destination)
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+ # Because we could be handling thousands of files, code in this
+ # function is optimized to minimize system calls. We prefer CPU time
+ # in Python over possibly I/O bound filesystem calls to stat() and
+ # friends.
+
+ required_dirs = set([destination])
+ required_dirs |= set(
+ os.path.normpath(os.path.join(destination, d))
+ for d in self.required_directories()
+ )
+
+ # Ensure destination directories are in place and proper.
+ #
+ # The "proper" bit is important. We need to ensure that directories
+ # have appropriate permissions or we will be unable to discover
+ # and write files. Furthermore, we need to verify directories aren't
+ # symlinks.
+ #
+ # Symlinked directories (a symlink whose target is a directory) are
+ # incompatible with us because our manifest talks in terms of files,
+ # not directories. If we leave symlinked directories unchecked, we
+ # would blindly follow symlinks and this might confuse file
+ # installation. For example, if an existing directory is a symlink
+ # to directory X and we attempt to install a symlink in this directory
+ # to a file in directory X, we may create a recursive symlink!
+ for d in sorted(required_dirs, key=len):
+ try:
+ os.mkdir(d)
+ except OSError as error:
+ if error.errno != errno.EEXIST:
+ raise
+
+ # We allow the destination to be a symlink because the caller
+ # is responsible for managing the destination and we assume
+ # they know what they are doing.
+ if have_symlinks and d != destination:
+ st = os.lstat(d)
+ if stat.S_ISLNK(st.st_mode):
+ # While we have remove_unaccounted, it doesn't apply
+ # to directory symlinks because if it did, our behavior
+ # could be very wrong.
+ os.remove(d)
+ os.mkdir(d)
+
+ if not os.access(d, os.W_OK):
+ umask = os.umask(0o077)
+ os.umask(umask)
+ os.chmod(d, 0o777 & ~umask)
+
+ if isinstance(remove_unaccounted, FileRegistry):
+ existing_files = set(
+ os.path.normpath(os.path.join(destination, p))
+ for p in remove_unaccounted.paths()
+ )
+ existing_dirs = set(
+ os.path.normpath(os.path.join(destination, p))
+ for p in remove_unaccounted.required_directories()
+ )
+ existing_dirs |= {os.path.normpath(destination)}
+ else:
+ # While we have remove_unaccounted, it doesn't apply to empty
+ # directories because it wouldn't make sense: an empty directory
+ # is empty, so removing it should have no effect.
+ existing_dirs = set()
+ existing_files = set()
+ for root, dirs, files in os.walk(destination):
+ # We need to perform the same symlink detection as above.
+ # os.walk() doesn't follow symlinks into directories by
+ # default, so we need to check dirs (we can't wait for root).
+ if have_symlinks:
+ filtered = []
+ for d in dirs:
+ full = os.path.join(root, d)
+ st = os.lstat(full)
+ if stat.S_ISLNK(st.st_mode):
+ # This directory symlink is not a required
+ # directory: any such symlink would have been
+ # removed and a directory created above.
+ if remove_all_directory_symlinks:
+ os.remove(full)
+ result.removed_files.add(os.path.normpath(full))
+ else:
+ existing_files.add(os.path.normpath(full))
+ else:
+ filtered.append(d)
+
+ dirs[:] = filtered
+
+ existing_dirs.add(os.path.normpath(root))
+
+ for d in dirs:
+ existing_dirs.add(os.path.normpath(os.path.join(root, d)))
+
+ for f in files:
+ existing_files.add(os.path.normpath(os.path.join(root, f)))
+
+ # Now we reconcile the state of the world against what we want.
+ dest_files = set()
+
+ # Install files.
+ #
+ # Creating/appending new files on Windows/NTFS is slow. So we use a
+ # thread pool to speed it up significantly. The performance of this
+ # loop is so critical to common build operations on Linux that the
+ # overhead of the thread pool is worth avoiding, so we have 2 code
+ # paths. We also employ a low water mark to prevent thread pool
+ # creation if number of files is too small to benefit.
+ copy_results = []
+ if sys.platform == "win32" and len(self) > 100:
+ with futures.ThreadPoolExecutor(4) as e:
+ fs = []
+ for p, f in self:
+ destfile = os.path.normpath(os.path.join(destination, p))
+ fs.append((destfile, e.submit(f.copy, destfile, skip_if_older)))
+
+ copy_results = [(path, f.result) for path, f in fs]
+ else:
+ for p, f in self:
+ destfile = os.path.normpath(os.path.join(destination, p))
+ copy_results.append((destfile, f.copy(destfile, skip_if_older)))
+
+ for destfile, copy_result in copy_results:
+ dest_files.add(destfile)
+ if copy_result:
+ result.updated_files.add(destfile)
+ else:
+ result.existing_files.add(destfile)
+
+ # Remove files no longer accounted for.
+ if remove_unaccounted:
+ for f in existing_files - dest_files:
+ # Windows requires write access to remove files.
+ if os.name == "nt" and not os.access(f, os.W_OK):
+ # It doesn't matter what we set permissions to since we
+ # will remove this file shortly.
+ os.chmod(f, 0o600)
+
+ os.remove(f)
+ result.removed_files.add(f)
+
+ if not remove_empty_directories:
+ return result
+
+ # Figure out which directories can be removed. This is complicated
+ # by the fact we optionally remove existing files. This would be easy
+ # if we walked the directory tree after installing files. But, we're
+ # trying to minimize system calls.
+
+ # Start with the ideal set.
+ remove_dirs = existing_dirs - required_dirs
+
+ # Then don't remove directories if we didn't remove unaccounted files
+ # and one of those files exists.
+ if not remove_unaccounted:
+ parents = set()
+ pathsep = os.path.sep
+ for f in existing_files:
+ path = f
+ while True:
+ # All the paths are normalized and relative by this point,
+ # so os.path.dirname would only do extra work.
+ dirname = path.rpartition(pathsep)[0]
+ if dirname in parents:
+ break
+ parents.add(dirname)
+ path = dirname
+ remove_dirs -= parents
+
+ # Remove empty directories that aren't required.
+ for d in sorted(remove_dirs, key=len, reverse=True):
+ try:
+ try:
+ os.rmdir(d)
+ except OSError as e:
+ if e.errno in (errno.EPERM, errno.EACCES):
+ # Permissions may not allow deletion. So ensure write
+ # access is in place before attempting to rmdir again.
+ os.chmod(d, 0o700)
+ os.rmdir(d)
+ else:
+ raise
+ except OSError as e:
+ # If remove_unaccounted is a # FileRegistry, then we have a
+ # list of directories that may not be empty, so ignore rmdir
+ # ENOTEMPTY errors for them.
+ if (
+ isinstance(remove_unaccounted, FileRegistry)
+ and e.errno == errno.ENOTEMPTY
+ ):
+ continue
+ raise
+ result.removed_directories.add(d)
+
+ return result
+
+
+class Jarrer(FileRegistry, BaseFile):
+ """
+ FileRegistry with the ability to copy and pack the registered files as a
+ jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
+ """
+
+ def __init__(self, compress=True):
+ """
+ Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
+ for details on the compress argument.
+ """
+ self.compress = compress
+ self._preload = []
+ self._compress_options = {} # Map path to compress boolean option.
+ FileRegistry.__init__(self)
+
+ def add(self, path, content, compress=None):
+ FileRegistry.add(self, path, content)
+ if compress is not None:
+ self._compress_options[path] = compress
+
+ def copy(self, dest, skip_if_older=True):
+ """
+ Pack all registered files in the given destination jar. The given
+ destination jar may be a path to jar file, or a Dest instance for
+ a jar file.
+ If the destination jar file exists, its (compressed) contents are used
+ instead of the registered BaseFile instances when appropriate.
+ """
+
+ class DeflaterDest(Dest):
+ """
+ Dest-like class, reading from a file-like object initially, but
+ switching to a Deflater object if written to.
+
+ dest = DeflaterDest(original_file)
+ dest.read() # Reads original_file
+ dest.write(data) # Creates a Deflater and write data there
+ dest.read() # Re-opens the Deflater and reads from it
+ """
+
+ def __init__(self, orig=None, compress=True):
+ self.mode = None
+ self.deflater = orig
+ self.compress = compress
+
+ def read(self, length=-1):
+ if self.mode != "r":
+ assert self.mode is None
+ self.mode = "r"
+ return self.deflater.read(length)
+
+ def write(self, data):
+ if self.mode != "w":
+ from mozpack.mozjar import Deflater
+
+ self.deflater = Deflater(self.compress)
+ self.mode = "w"
+ self.deflater.write(data)
+
+ def exists(self):
+ return self.deflater is not None
+
+ if isinstance(dest, six.string_types):
+ dest = Dest(dest)
+ assert isinstance(dest, Dest)
+
+ from mozpack.mozjar import JarReader, JarWriter
+
+ try:
+ old_jar = JarReader(fileobj=dest)
+ except Exception:
+ old_jar = []
+
+ old_contents = dict([(f.filename, f) for f in old_jar])
+
+ with JarWriter(fileobj=dest, compress=self.compress) as jar:
+ for path, file in self:
+ compress = self._compress_options.get(path, self.compress)
+ if path in old_contents:
+ deflater = DeflaterDest(old_contents[path], compress)
+ else:
+ deflater = DeflaterDest(compress=compress)
+ file.copy(deflater, skip_if_older)
+ jar.add(path, deflater.deflater, mode=file.mode, compress=compress)
+ if self._preload:
+ jar.preload(self._preload)
+
+ def open(self):
+ raise RuntimeError("unsupported")
+
+ def preload(self, paths):
+ """
+ Add the given set of paths to the list of preloaded files. See
+ mozpack.mozjar.JarWriter documentation for details on jar preloading.
+ """
+ self._preload.extend(paths)