summaryrefslogtreecommitdiffstats
path: root/toolkit/mozapps/installer/find-dupes.py
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/mozapps/installer/find-dupes.py')
-rw-r--r--toolkit/mozapps/installer/find-dupes.py153
1 files changed, 153 insertions, 0 deletions
diff --git a/toolkit/mozapps/installer/find-dupes.py b/toolkit/mozapps/installer/find-dupes.py
new file mode 100644
index 0000000000..903dfd3b74
--- /dev/null
+++ b/toolkit/mozapps/installer/find-dupes.py
@@ -0,0 +1,153 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, unicode_literals, print_function
+
+import sys
+import hashlib
+import re
+import os
+import functools
+from mozbuild.preprocessor import Preprocessor
+from mozbuild.util import DefinesAction
+from mozpack.packager.unpack import UnpackFinder
+from mozpack.files import DeflatedFile
+from collections import OrderedDict
+import six
+from six import StringIO
+import argparse
+import buildconfig
+
+"""
+Find files duplicated in a given packaged directory, independently of its
+package format.
+"""
+
+
+def normalize_osx_path(p):
+ """
+ Strips the first 3 elements of an OSX app path
+
+ >>> normalize_osx_path('Nightly.app/foo/bar/baz')
+ 'baz'
+ """
+ bits = p.split("/")
+ if len(bits) > 3 and bits[0].endswith(".app"):
+ return "/".join(bits[3:])
+ return p
+
+
+def is_l10n_file(path):
+ return (
+ "/locale/" in path
+ or "/localization/" in path
+ or path.startswith("localization/")
+ )
+
+
+def normalize_path(p):
+ return normalize_osx_path(p)
+
+
+def find_dupes(source, allowed_dupes, bail=True):
+ md5_chunk_size = 1024 * 10
+ allowed_dupes = set(allowed_dupes)
+ md5s = OrderedDict()
+ for p, f in UnpackFinder(source):
+ md5 = hashlib.md5()
+ content_size = 0
+ for buf in iter(functools.partial(f.open().read, md5_chunk_size), b""):
+ md5.update(six.ensure_binary(buf))
+ content_size += len(six.ensure_binary(buf))
+ m = md5.digest()
+ if m not in md5s:
+ if isinstance(f, DeflatedFile):
+ compressed = f.file.compressed_size
+ else:
+ compressed = content_size
+ md5s[m] = (content_size, compressed, [])
+ md5s[m][2].append(p)
+ total = 0
+ total_compressed = 0
+ num_dupes = 0
+ unexpected_dupes = []
+ for m, (size, compressed, paths) in sorted(
+ six.iteritems(md5s), key=lambda x: x[1][1]
+ ):
+ if len(paths) > 1:
+ _compressed = " (%d compressed)" % compressed if compressed != size else ""
+ _times = " (%d times)" % (len(paths) - 1) if len(paths) > 2 else ""
+ print("Duplicates {} bytes{}{}:".format(size, _compressed, _times))
+ print("".join(" %s\n" % p for p in paths))
+ total += (len(paths) - 1) * size
+ total_compressed += (len(paths) - 1) * compressed
+ num_dupes += 1
+
+ for p in paths:
+ if not is_l10n_file(p) and normalize_path(p) not in allowed_dupes:
+ unexpected_dupes.append(p)
+
+ if num_dupes:
+ total_compressed = (
+ "%d compressed" % total_compressed
+ if total_compressed != total
+ else "uncompressed"
+ )
+ print(
+ "WARNING: Found {} duplicated files taking {} bytes ({})".format(
+ num_dupes, total, total_compressed
+ )
+ )
+
+ if unexpected_dupes:
+ errortype = "ERROR" if bail else "WARNING"
+ print("{}: The following duplicated files are not allowed:".format(errortype))
+ print("\n".join(unexpected_dupes))
+ if bail:
+ sys.exit(1)
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Find duplicate files in directory.")
+ parser.add_argument(
+ "--warning",
+ "-w",
+ action="store_true",
+ help="Only warn about duplicates, do not exit with an error",
+ )
+ parser.add_argument(
+ "--file",
+ "-f",
+ action="append",
+ dest="dupes_files",
+ default=[],
+ help="Add exceptions to the duplicate list from this file",
+ )
+ parser.add_argument("-D", action=DefinesAction)
+ parser.add_argument("-U", action="append", default=[])
+ parser.add_argument("directory", help="The directory to check for duplicates in")
+
+ args = parser.parse_args()
+
+ allowed_dupes = []
+ for filename in args.dupes_files:
+ pp = Preprocessor()
+ pp.context.update(buildconfig.defines["ALLDEFINES"])
+ if args.D:
+ pp.context.update(args.D)
+ for undefine in args.U:
+ if undefine in pp.context:
+ del pp.context[undefine]
+ pp.out = StringIO()
+ pp.do_filter("substitution")
+ pp.do_include(filename)
+ allowed_dupes.extend(
+ [line.partition("#")[0].rstrip() for line in pp.out.getvalue().splitlines()]
+ )
+
+ find_dupes(args.directory, bail=not args.warning, allowed_dupes=allowed_dupes)
+
+
+if __name__ == "__main__":
+ main()