diff options
Diffstat (limited to '')
-rw-r--r-- | toolkit/mozapps/installer/find-dupes.py | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/toolkit/mozapps/installer/find-dupes.py b/toolkit/mozapps/installer/find-dupes.py new file mode 100644 index 0000000000..1931481b7c --- /dev/null +++ b/toolkit/mozapps/installer/find-dupes.py @@ -0,0 +1,148 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import sys +import hashlib +import functools +from mozbuild.preprocessor import Preprocessor +from mozbuild.util import DefinesAction +from mozpack.packager.unpack import UnpackFinder +from mozpack.files import DeflatedFile +from collections import OrderedDict +from io import StringIO +import argparse +import buildconfig + +""" +Find files duplicated in a given packaged directory, independently of its +package format. +""" + + +def normalize_osx_path(p): + """ + Strips the first 3 elements of an OSX app path + + >>> normalize_osx_path('Nightly.app/foo/bar/baz') + 'baz' + """ + bits = p.split("/") + if len(bits) > 3 and bits[0].endswith(".app"): + return "/".join(bits[3:]) + return p + + +def is_l10n_file(path): + return ( + "/locale/" in path + or "/localization/" in path + or path.startswith("localization/") + ) + + +def normalize_path(p): + return normalize_osx_path(p) + + +def find_dupes(source, allowed_dupes, bail=True): + chunk_size = 1024 * 10 + allowed_dupes = set(allowed_dupes) + checksums = OrderedDict() + for p, f in UnpackFinder(source): + checksum = hashlib.sha1() + content_size = 0 + for buf in iter(functools.partial(f.open().read, chunk_size), b""): + checksum.update(buf) + content_size += len(buf) + m = checksum.digest() + if m not in checksums: + if isinstance(f, DeflatedFile): + compressed = f.file.compressed_size + else: + compressed = content_size + checksums[m] = (content_size, compressed, []) + checksums[m][2].append(p) + total = 0 + total_compressed = 0 + num_dupes = 0 + unexpected_dupes = [] + for m, (size, compressed, paths) in sorted( + checksums.items(), key=lambda x: x[1][1] + ): + if len(paths) > 1: + _compressed = " (%d compressed)" % compressed if compressed != size else "" + _times = " (%d times)" % (len(paths) - 1) if len(paths) > 2 else "" + print("Duplicates {} bytes{}{}:".format(size, _compressed, _times)) + print("".join(" %s\n" % p for p in paths)) + total += (len(paths) - 1) * size + total_compressed += (len(paths) - 1) * compressed + num_dupes += 1 + + for p in paths: + if not is_l10n_file(p) and normalize_path(p) not in allowed_dupes: + unexpected_dupes.append(p) + + if num_dupes: + total_compressed = ( + "%d compressed" % total_compressed + if total_compressed != total + else "uncompressed" + ) + print( + "WARNING: Found {} duplicated files taking {} bytes ({})".format( + num_dupes, total, total_compressed + ) + ) + + if unexpected_dupes: + errortype = "ERROR" if bail else "WARNING" + print("{}: The following duplicated files are not allowed:".format(errortype)) + print("\n".join(unexpected_dupes)) + if bail: + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser(description="Find duplicate files in directory.") + parser.add_argument( + "--warning", + "-w", + action="store_true", + help="Only warn about duplicates, do not exit with an error", + ) + parser.add_argument( + "--file", + "-f", + action="append", + dest="dupes_files", + default=[], + help="Add exceptions to the duplicate list from this file", + ) + parser.add_argument("-D", action=DefinesAction) + parser.add_argument("-U", action="append", default=[]) + parser.add_argument("directory", help="The directory to check for duplicates in") + + args = parser.parse_args() + + allowed_dupes = [] + for filename in args.dupes_files: + pp = Preprocessor() + pp.context.update(buildconfig.defines["ALLDEFINES"]) + if args.D: + pp.context.update(args.D) + for undefine in args.U: + if undefine in pp.context: + del pp.context[undefine] + pp.out = StringIO() + pp.do_filter("substitution") + pp.do_include(filename) + allowed_dupes.extend( + [line.partition("#")[0].rstrip() for line in pp.out.getvalue().splitlines()] + ) + + find_dupes(args.directory, bail=not args.warning, allowed_dupes=allowed_dupes) + + +if __name__ == "__main__": + main() |