summaryrefslogtreecommitdiffstats
path: root/python/mozbuild/mozpack/copier.py
blob: c042e5432f28d743589ee0ffbfaa0122a350ce20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import concurrent.futures as futures
import errno
import os
import stat
import sys
from collections import Counter, OrderedDict, defaultdict

import six

import mozpack.path as mozpath
from mozpack.errors import errors
from mozpack.files import BaseFile, Dest


class FileRegistry(object):
    """
    Generic container to keep track of a set of BaseFile instances. It
    preserves the order under which the files are added, but doesn't keep
    track of empty directories (directories are not stored at all).
    The paths associated with the BaseFile instances are relative to an
    unspecified (virtual) root directory.

        registry = FileRegistry()
        registry.add('foo/bar', file_instance)
    """

    def __init__(self):
        self._files = OrderedDict()
        self._required_directories = Counter()
        self._partial_paths_cache = {}

    def _partial_paths(self, path):
        """
        Turn "foo/bar/baz/zot" into ["foo/bar/baz", "foo/bar", "foo"].
        """
        dir_name = path.rpartition("/")[0]
        if not dir_name:
            return []

        partial_paths = self._partial_paths_cache.get(dir_name)
        if partial_paths:
            return partial_paths

        partial_paths = [dir_name] + self._partial_paths(dir_name)

        self._partial_paths_cache[dir_name] = partial_paths
        return partial_paths

    def add(self, path, content):
        """
        Add a BaseFile instance to the container, under the given path.
        """
        assert isinstance(content, BaseFile)
        if path in self._files:
            return errors.error("%s already added" % path)
        if self._required_directories[path] > 0:
            return errors.error("Can't add %s: it is a required directory" % path)
        # Check whether any parent of the given path is already stored
        partial_paths = self._partial_paths(path)
        for partial_path in partial_paths:
            if partial_path in self._files:
                return errors.error("Can't add %s: %s is a file" % (path, partial_path))
        self._files[path] = content
        self._required_directories.update(partial_paths)

    def match(self, pattern):
        """
        Return the list of paths, stored in the container, matching the
        given pattern. See the mozpack.path.match documentation for a
        description of the handled patterns.
        """
        if "*" in pattern:
            return [p for p in self.paths() if mozpath.match(p, pattern)]
        if pattern == "":
            return self.paths()
        if pattern in self._files:
            return [pattern]
        return [p for p in self.paths() if mozpath.basedir(p, [pattern]) == pattern]

    def remove(self, pattern):
        """
        Remove paths matching the given pattern from the container. See the
        mozpack.path.match documentation for a description of the handled
        patterns.
        """
        items = self.match(pattern)
        if not items:
            return errors.error(
                "Can't remove %s: %s"
                % (pattern, "not matching anything previously added")
            )
        for i in items:
            del self._files[i]
            self._required_directories.subtract(self._partial_paths(i))

    def paths(self):
        """
        Return all paths stored in the container, in the order they were added.
        """
        return list(self._files)

    def __len__(self):
        """
        Return number of paths stored in the container.
        """
        return len(self._files)

    def __contains__(self, pattern):
        raise RuntimeError(
            "'in' operator forbidden for %s. Use contains()." % self.__class__.__name__
        )

    def contains(self, pattern):
        """
        Return whether the container contains paths matching the given
        pattern. See the mozpack.path.match documentation for a description of
        the handled patterns.
        """
        return len(self.match(pattern)) > 0

    def __getitem__(self, path):
        """
        Return the BaseFile instance stored in the container for the given
        path.
        """
        return self._files[path]

    def __iter__(self):
        """
        Iterate over all (path, BaseFile instance) pairs from the container.
            for path, file in registry:
                (...)
        """
        return six.iteritems(self._files)

    def required_directories(self):
        """
        Return the set of directories required by the paths in the container,
        in no particular order.  The returned directories are relative to an
        unspecified (virtual) root directory (and do not include said root
        directory).
        """
        return set(k for k, v in self._required_directories.items() if v > 0)

    def output_to_inputs_tree(self):
        """
        Return a dictionary mapping each output path to the set of its
        required input paths.

        All paths are normalized.
        """
        tree = {}
        for output, file in self:
            output = mozpath.normpath(output)
            tree[output] = set(mozpath.normpath(f) for f in file.inputs())
        return tree

    def input_to_outputs_tree(self):
        """
        Return a dictionary mapping each input path to the set of
        impacted output paths.

        All paths are normalized.
        """
        tree = defaultdict(set)
        for output, file in self:
            output = mozpath.normpath(output)
            for input in file.inputs():
                input = mozpath.normpath(input)
                tree[input].add(output)
        return dict(tree)


class FileRegistrySubtree(object):
    """A proxy class to give access to a subtree of an existing FileRegistry.

    Note this doesn't implement the whole FileRegistry interface."""

    def __new__(cls, base, registry):
        if not base:
            return registry
        return object.__new__(cls)

    def __init__(self, base, registry):
        self._base = base
        self._registry = registry

    def _get_path(self, path):
        # mozpath.join will return a trailing slash if path is empty, and we
        # don't want that.
        return mozpath.join(self._base, path) if path else self._base

    def add(self, path, content):
        return self._registry.add(self._get_path(path), content)

    def match(self, pattern):
        return [
            mozpath.relpath(p, self._base)
            for p in self._registry.match(self._get_path(pattern))
        ]

    def remove(self, pattern):
        return self._registry.remove(self._get_path(pattern))

    def paths(self):
        return [p for p, f in self]

    def __len__(self):
        return len(self.paths())

    def contains(self, pattern):
        return self._registry.contains(self._get_path(pattern))

    def __getitem__(self, path):
        return self._registry[self._get_path(path)]

    def __iter__(self):
        for p, f in self._registry:
            if mozpath.basedir(p, [self._base]):
                yield mozpath.relpath(p, self._base), f


class FileCopyResult(object):
    """Represents results of a FileCopier.copy operation."""

    def __init__(self):
        self.updated_files = set()
        self.existing_files = set()
        self.removed_files = set()
        self.removed_directories = set()

    @property
    def updated_files_count(self):
        return len(self.updated_files)

    @property
    def existing_files_count(self):
        return len(self.existing_files)

    @property
    def removed_files_count(self):
        return len(self.removed_files)

    @property
    def removed_directories_count(self):
        return len(self.removed_directories)


class FileCopier(FileRegistry):
    """
    FileRegistry with the ability to copy the registered files to a separate
    directory.
    """

    def copy(
        self,
        destination,
        skip_if_older=True,
        remove_unaccounted=True,
        remove_all_directory_symlinks=True,
        remove_empty_directories=True,
    ):
        """
        Copy all registered files to the given destination path. The given
        destination can be an existing directory, or not exist at all. It
        can't be e.g. a file.
        The copy process acts a bit like rsync: files are not copied when they
        don't need to (see mozpack.files for details on file.copy).

        By default, files in the destination directory that aren't
        registered are removed and empty directories are deleted. In
        addition, all directory symlinks in the destination directory
        are deleted: this is a conservative approach to ensure that we
        never accidently write files into a directory that is not the
        destination directory. In the worst case, we might have a
        directory symlink in the object directory to the source
        directory.

        To disable removing of unregistered files, pass
        remove_unaccounted=False. To disable removing empty
        directories, pass remove_empty_directories=False. In rare
        cases, you might want to maintain directory symlinks in the
        destination directory (at least those that are not required to
        be regular directories): pass
        remove_all_directory_symlinks=False. Exercise caution with
        this flag: you almost certainly do not want to preserve
        directory symlinks.

        Returns a FileCopyResult that details what changed.
        """
        assert isinstance(destination, six.string_types)
        assert not os.path.exists(destination) or os.path.isdir(destination)

        result = FileCopyResult()
        have_symlinks = hasattr(os, "symlink")
        destination = os.path.normpath(destination)

        # We create the destination directory specially. We can't do this as
        # part of the loop doing mkdir() below because that loop munges
        # symlinks and permissions and parent directories of the destination
        # directory may have their own weird schema. The contract is we only
        # manage children of destination, not its parents.
        try:
            os.makedirs(destination)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        # Because we could be handling thousands of files, code in this
        # function is optimized to minimize system calls. We prefer CPU time
        # in Python over possibly I/O bound filesystem calls to stat() and
        # friends.

        required_dirs = set([destination])
        required_dirs |= set(
            os.path.normpath(os.path.join(destination, d))
            for d in self.required_directories()
        )

        # Ensure destination directories are in place and proper.
        #
        # The "proper" bit is important. We need to ensure that directories
        # have appropriate permissions or we will be unable to discover
        # and write files. Furthermore, we need to verify directories aren't
        # symlinks.
        #
        # Symlinked directories (a symlink whose target is a directory) are
        # incompatible with us because our manifest talks in terms of files,
        # not directories. If we leave symlinked directories unchecked, we
        # would blindly follow symlinks and this might confuse file
        # installation. For example, if an existing directory is a symlink
        # to directory X and we attempt to install a symlink in this directory
        # to a file in directory X, we may create a recursive symlink!
        for d in sorted(required_dirs, key=len):
            try:
                os.mkdir(d)
            except OSError as error:
                if error.errno != errno.EEXIST:
                    raise

            # We allow the destination to be a symlink because the caller
            # is responsible for managing the destination and we assume
            # they know what they are doing.
            if have_symlinks and d != destination:
                st = os.lstat(d)
                if stat.S_ISLNK(st.st_mode):
                    # While we have remove_unaccounted, it doesn't apply
                    # to directory symlinks because if it did, our behavior
                    # could be very wrong.
                    os.remove(d)
                    os.mkdir(d)

            if not os.access(d, os.W_OK):
                umask = os.umask(0o077)
                os.umask(umask)
                os.chmod(d, 0o777 & ~umask)

        if isinstance(remove_unaccounted, FileRegistry):
            existing_files = set(
                os.path.normpath(os.path.join(destination, p))
                for p in remove_unaccounted.paths()
            )
            existing_dirs = set(
                os.path.normpath(os.path.join(destination, p))
                for p in remove_unaccounted.required_directories()
            )
            existing_dirs |= {os.path.normpath(destination)}
        else:
            # While we have remove_unaccounted, it doesn't apply to empty
            # directories because it wouldn't make sense: an empty directory
            # is empty, so removing it should have no effect.
            existing_dirs = set()
            existing_files = set()
            for root, dirs, files in os.walk(destination):
                # We need to perform the same symlink detection as above.
                # os.walk() doesn't follow symlinks into directories by
                # default, so we need to check dirs (we can't wait for root).
                if have_symlinks:
                    filtered = []
                    for d in dirs:
                        full = os.path.join(root, d)
                        st = os.lstat(full)
                        if stat.S_ISLNK(st.st_mode):
                            # This directory symlink is not a required
                            # directory: any such symlink would have been
                            # removed and a directory created above.
                            if remove_all_directory_symlinks:
                                os.remove(full)
                                result.removed_files.add(os.path.normpath(full))
                            else:
                                existing_files.add(os.path.normpath(full))
                        else:
                            filtered.append(d)

                    dirs[:] = filtered

                existing_dirs.add(os.path.normpath(root))

                for d in dirs:
                    existing_dirs.add(os.path.normpath(os.path.join(root, d)))

                for f in files:
                    existing_files.add(os.path.normpath(os.path.join(root, f)))

        # Now we reconcile the state of the world against what we want.
        dest_files = set()

        # Install files.
        #
        # Creating/appending new files on Windows/NTFS is slow. So we use a
        # thread pool to speed it up significantly. The performance of this
        # loop is so critical to common build operations on Linux that the
        # overhead of the thread pool is worth avoiding, so we have 2 code
        # paths. We also employ a low water mark to prevent thread pool
        # creation if number of files is too small to benefit.
        copy_results = []
        if sys.platform == "win32" and len(self) > 100:
            with futures.ThreadPoolExecutor(4) as e:
                fs = []
                for p, f in self:
                    destfile = os.path.normpath(os.path.join(destination, p))
                    fs.append((destfile, e.submit(f.copy, destfile, skip_if_older)))

            copy_results = [(path, f.result) for path, f in fs]
        else:
            for p, f in self:
                destfile = os.path.normpath(os.path.join(destination, p))
                copy_results.append((destfile, f.copy(destfile, skip_if_older)))

        for destfile, copy_result in copy_results:
            dest_files.add(destfile)
            if copy_result:
                result.updated_files.add(destfile)
            else:
                result.existing_files.add(destfile)

        # Remove files no longer accounted for.
        if remove_unaccounted:
            for f in existing_files - dest_files:
                # Windows requires write access to remove files.
                if os.name == "nt" and not os.access(f, os.W_OK):
                    # It doesn't matter what we set permissions to since we
                    # will remove this file shortly.
                    os.chmod(f, 0o600)

                os.remove(f)
                result.removed_files.add(f)

        if not remove_empty_directories:
            return result

        # Figure out which directories can be removed. This is complicated
        # by the fact we optionally remove existing files. This would be easy
        # if we walked the directory tree after installing files. But, we're
        # trying to minimize system calls.

        # Start with the ideal set.
        remove_dirs = existing_dirs - required_dirs

        # Then don't remove directories if we didn't remove unaccounted files
        # and one of those files exists.
        if not remove_unaccounted:
            parents = set()
            pathsep = os.path.sep
            for f in existing_files:
                path = f
                while True:
                    # All the paths are normalized and relative by this point,
                    # so os.path.dirname would only do extra work.
                    dirname = path.rpartition(pathsep)[0]
                    if dirname in parents:
                        break
                    parents.add(dirname)
                    path = dirname
            remove_dirs -= parents

        # Remove empty directories that aren't required.
        for d in sorted(remove_dirs, key=len, reverse=True):
            try:
                try:
                    os.rmdir(d)
                except OSError as e:
                    if e.errno in (errno.EPERM, errno.EACCES):
                        # Permissions may not allow deletion. So ensure write
                        # access is in place before attempting to rmdir again.
                        os.chmod(d, 0o700)
                        os.rmdir(d)
                    else:
                        raise
            except OSError as e:
                # If remove_unaccounted is a # FileRegistry, then we have a
                # list of directories that may not be empty, so ignore rmdir
                # ENOTEMPTY errors for them.
                if (
                    isinstance(remove_unaccounted, FileRegistry)
                    and e.errno == errno.ENOTEMPTY
                ):
                    continue
                raise
            result.removed_directories.add(d)

        return result


class Jarrer(FileRegistry, BaseFile):
    """
    FileRegistry with the ability to copy and pack the registered files as a
    jar file. Also acts as a BaseFile instance, to be copied with a FileCopier.
    """

    def __init__(self, compress=True):
        """
        Create a Jarrer instance. See mozpack.mozjar.JarWriter documentation
        for details on the compress argument.
        """
        self.compress = compress
        self._preload = []
        self._compress_options = {}  # Map path to compress boolean option.
        FileRegistry.__init__(self)

    def add(self, path, content, compress=None):
        FileRegistry.add(self, path, content)
        if compress is not None:
            self._compress_options[path] = compress

    def copy(self, dest, skip_if_older=True):
        """
        Pack all registered files in the given destination jar. The given
        destination jar may be a path to jar file, or a Dest instance for
        a jar file.
        If the destination jar file exists, its (compressed) contents are used
        instead of the registered BaseFile instances when appropriate.
        """

        class DeflaterDest(Dest):
            """
            Dest-like class, reading from a file-like object initially, but
            switching to a Deflater object if written to.

                dest = DeflaterDest(original_file)
                dest.read()      # Reads original_file
                dest.write(data) # Creates a Deflater and write data there
                dest.read()      # Re-opens the Deflater and reads from it
            """

            def __init__(self, orig=None, compress=True):
                self.mode = None
                self.deflater = orig
                self.compress = compress

            def read(self, length=-1):
                if self.mode != "r":
                    assert self.mode is None
                    self.mode = "r"
                return self.deflater.read(length)

            def write(self, data):
                if self.mode != "w":
                    from mozpack.mozjar import Deflater

                    self.deflater = Deflater(self.compress)
                    self.mode = "w"
                self.deflater.write(data)

            def exists(self):
                return self.deflater is not None

        if isinstance(dest, six.string_types):
            dest = Dest(dest)
        assert isinstance(dest, Dest)

        from mozpack.mozjar import JarReader, JarWriter

        try:
            old_jar = JarReader(fileobj=dest)
        except Exception:
            old_jar = []

        old_contents = dict([(f.filename, f) for f in old_jar])

        with JarWriter(fileobj=dest, compress=self.compress) as jar:
            for path, file in self:
                compress = self._compress_options.get(path, self.compress)
                if path in old_contents:
                    deflater = DeflaterDest(old_contents[path], compress)
                else:
                    deflater = DeflaterDest(compress=compress)
                file.copy(deflater, skip_if_older)
                jar.add(path, deflater.deflater, mode=file.mode, compress=compress)
            if self._preload:
                jar.preload(self._preload)

    def open(self):
        raise RuntimeError("unsupported")

    def preload(self, paths):
        """
        Add the given set of paths to the list of preloaded files. See
        mozpack.mozjar.JarWriter documentation for details on jar preloading.
        """
        self._preload.extend(paths)