From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- python/mozbuild/mozpack/mozjar.py | 842 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 842 insertions(+) create mode 100644 python/mozbuild/mozpack/mozjar.py (limited to 'python/mozbuild/mozpack/mozjar.py') diff --git a/python/mozbuild/mozpack/mozjar.py b/python/mozbuild/mozpack/mozjar.py new file mode 100644 index 0000000000..6500ebfcec --- /dev/null +++ b/python/mozbuild/mozpack/mozjar.py @@ -0,0 +1,842 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import struct +import zlib +from collections import OrderedDict +from io import BytesIO, UnsupportedOperation +from zipfile import ZIP_DEFLATED, ZIP_STORED + +import six + +import mozpack.path as mozpath +from mozbuild.util import ensure_bytes + +JAR_STORED = ZIP_STORED +JAR_DEFLATED = ZIP_DEFLATED +MAX_WBITS = 15 + + +class JarReaderError(Exception): + """Error type for Jar reader errors.""" + + +class JarWriterError(Exception): + """Error type for Jar writer errors.""" + + +class JarStruct(object): + """ + Helper used to define ZIP archive raw data structures. Data structures + handled by this helper all start with a magic number, defined in + subclasses MAGIC field as a 32-bits unsigned integer, followed by data + structured as described in subclasses STRUCT field. + + The STRUCT field contains a list of (name, type) pairs where name is a + field name, and the type can be one of 'uint32', 'uint16' or one of the + field names. In the latter case, the field is considered to be a string + buffer with a length given in that field. + For example, + + .. code-block:: python + + STRUCT = [ + ('version', 'uint32'), + ('filename_size', 'uint16'), + ('filename', 'filename_size') + ] + + describes a structure with a 'version' 32-bits unsigned integer field, + followed by a 'filename_size' 16-bits unsigned integer field, followed by a + filename_size-long string buffer 'filename'. + + Fields that are used as other fields size are not stored in objects. In the + above example, an instance of such subclass would only have two attributes: + - obj['version'] + - obj['filename'] + + filename_size would be obtained with len(obj['filename']). + + JarStruct subclasses instances can be either initialized from existing data + (deserialized), or with empty fields. + """ + + TYPE_MAPPING = {"uint32": (b"I", 4), "uint16": (b"H", 2)} + + def __init__(self, data=None): + """ + Create an instance from the given data. Data may be omitted to create + an instance with empty fields. + """ + assert self.MAGIC and isinstance(self.STRUCT, OrderedDict) + self.size_fields = set( + t for t in six.itervalues(self.STRUCT) if t not in JarStruct.TYPE_MAPPING + ) + self._values = {} + if data: + self._init_data(data) + else: + self._init_empty() + + def _init_data(self, data): + """ + Initialize an instance from data, following the data structure + described in self.STRUCT. The self.MAGIC signature is expected at + data[:4]. + """ + assert data is not None + self.signature, size = JarStruct.get_data("uint32", data) + if self.signature != self.MAGIC: + raise JarReaderError("Bad magic") + offset = size + # For all fields used as other fields sizes, keep track of their value + # separately. + sizes = dict((t, 0) for t in self.size_fields) + for name, t in six.iteritems(self.STRUCT): + if t in JarStruct.TYPE_MAPPING: + value, size = JarStruct.get_data(t, data[offset:]) + else: + size = sizes[t] + value = data[offset : offset + size] + if isinstance(value, memoryview): + value = value.tobytes() + if name not in sizes: + self._values[name] = value + else: + sizes[name] = value + offset += size + + def _init_empty(self): + """ + Initialize an instance with empty fields. + """ + self.signature = self.MAGIC + for name, t in six.iteritems(self.STRUCT): + if name in self.size_fields: + continue + self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else "" + + @staticmethod + def get_data(type, data): + """ + Deserialize a single field of given type (must be one of + JarStruct.TYPE_MAPPING) at the given offset in the given data. + """ + assert type in JarStruct.TYPE_MAPPING + assert data is not None + format, size = JarStruct.TYPE_MAPPING[type] + data = data[:size] + if isinstance(data, memoryview): + data = data.tobytes() + return struct.unpack(b"<" + format, data)[0], size + + def serialize(self): + """ + Serialize the data structure according to the data structure definition + from self.STRUCT. + """ + serialized = struct.pack(b"" % ( + self.__class__.__name__, + " ".join("%s=%s" % (n, v) for n, v in self), + ) + + +class JarCdirEnd(JarStruct): + """ + End of central directory record. + """ + + MAGIC = 0x06054B50 + STRUCT = OrderedDict( + [ + ("disk_num", "uint16"), + ("cdir_disk", "uint16"), + ("disk_entries", "uint16"), + ("cdir_entries", "uint16"), + ("cdir_size", "uint32"), + ("cdir_offset", "uint32"), + ("comment_size", "uint16"), + ("comment", "comment_size"), + ] + ) + + +CDIR_END_SIZE = JarCdirEnd().size + + +class JarCdirEntry(JarStruct): + """ + Central directory file header + """ + + MAGIC = 0x02014B50 + STRUCT = OrderedDict( + [ + ("creator_version", "uint16"), + ("min_version", "uint16"), + ("general_flag", "uint16"), + ("compression", "uint16"), + ("lastmod_time", "uint16"), + ("lastmod_date", "uint16"), + ("crc32", "uint32"), + ("compressed_size", "uint32"), + ("uncompressed_size", "uint32"), + ("filename_size", "uint16"), + ("extrafield_size", "uint16"), + ("filecomment_size", "uint16"), + ("disknum", "uint16"), + ("internal_attr", "uint16"), + ("external_attr", "uint32"), + ("offset", "uint32"), + ("filename", "filename_size"), + ("extrafield", "extrafield_size"), + ("filecomment", "filecomment_size"), + ] + ) + + +class JarLocalFileHeader(JarStruct): + """ + Local file header + """ + + MAGIC = 0x04034B50 + STRUCT = OrderedDict( + [ + ("min_version", "uint16"), + ("general_flag", "uint16"), + ("compression", "uint16"), + ("lastmod_time", "uint16"), + ("lastmod_date", "uint16"), + ("crc32", "uint32"), + ("compressed_size", "uint32"), + ("uncompressed_size", "uint32"), + ("filename_size", "uint16"), + ("extra_field_size", "uint16"), + ("filename", "filename_size"), + ("extra_field", "extra_field_size"), + ] + ) + + +class JarFileReader(object): + """ + File-like class for use by JarReader to give access to individual files + within a Jar archive. + """ + + def __init__(self, header, data): + """ + Initialize a JarFileReader. header is the local file header + corresponding to the file in the jar archive, data a buffer containing + the file data. + """ + assert header["compression"] in [JAR_DEFLATED, JAR_STORED] + self._data = data + # Copy some local file header fields. + for name in ["compressed_size", "uncompressed_size", "crc32"]: + setattr(self, name, header[name]) + self.filename = six.ensure_text(header["filename"]) + self.compressed = header["compression"] != JAR_STORED + self.compress = header["compression"] + + def readable(self): + return True + + def read(self, length=-1): + """ + Read some amount of uncompressed data. + """ + return self.uncompressed_data.read(length) + + def readinto(self, b): + """ + Read bytes into a pre-allocated, writable bytes-like object `b` and return + the number of bytes read. + """ + return self.uncompressed_data.readinto(b) + + def readlines(self): + """ + Return a list containing all the lines of data in the uncompressed + data. + """ + return self.read().splitlines(True) + + def __iter__(self): + """ + Iterator, to support the "for line in fileobj" constructs. + """ + return iter(self.readlines()) + + def seek(self, pos, whence=os.SEEK_SET): + """ + Change the current position in the uncompressed data. Subsequent reads + will start from there. + """ + return self.uncompressed_data.seek(pos, whence) + + def close(self): + """ + Free the uncompressed data buffer. + """ + self.uncompressed_data.close() + + @property + def closed(self): + return self.uncompressed_data.closed + + @property + def compressed_data(self): + """ + Return the raw compressed data. + """ + return self._data[: self.compressed_size] + + @property + def uncompressed_data(self): + """ + Return the uncompressed data. + """ + if hasattr(self, "_uncompressed_data"): + return self._uncompressed_data + data = self.compressed_data + if self.compress == JAR_STORED: + data = data.tobytes() + elif self.compress == JAR_DEFLATED: + data = zlib.decompress(data.tobytes(), -MAX_WBITS) + else: + assert False # Can't be another value per __init__ + if len(data) != self.uncompressed_size: + raise JarReaderError("Corrupted file? %s" % self.filename) + self._uncompressed_data = BytesIO(data) + return self._uncompressed_data + + +class JarReader(object): + """ + Class with methods to read Jar files. Can open standard jar files as well + as Mozilla jar files (see further details in the JarWriter documentation). + """ + + def __init__(self, file=None, fileobj=None, data=None): + """ + Opens the given file as a Jar archive. Use the given file-like object + if one is given instead of opening the given file name. + """ + if fileobj: + data = fileobj.read() + elif file: + data = open(file, "rb").read() + self._data = memoryview(data) + # The End of Central Directory Record has a variable size because of + # comments it may contain, so scan for it from the end of the file. + offset = -CDIR_END_SIZE + while True: + signature = JarStruct.get_data("uint32", self._data[offset:])[0] + if signature == JarCdirEnd.MAGIC: + break + if offset == -len(self._data): + raise JarReaderError("Not a jar?") + offset -= 1 + self._cdir_end = JarCdirEnd(self._data[offset:]) + + def close(self): + """ + Free some resources associated with the Jar. + """ + del self._data + + @property + def compression(self): + entries = self.entries + if not entries: + return JAR_STORED + return max(f["compression"] for f in six.itervalues(entries)) + + @property + def entries(self): + """ + Return an ordered dict of central directory entries, indexed by + filename, in the order they appear in the Jar archive central + directory. Directory entries are skipped. + """ + if hasattr(self, "_entries"): + return self._entries + preload = 0 + if self.is_optimized: + preload = JarStruct.get_data("uint32", self._data)[0] + entries = OrderedDict() + offset = self._cdir_end["cdir_offset"] + for e in six.moves.xrange(self._cdir_end["cdir_entries"]): + entry = JarCdirEntry(self._data[offset:]) + offset += entry.size + # Creator host system. 0 is MSDOS, 3 is Unix + host = entry["creator_version"] >> 8 + # External attributes values depend on host above. On Unix the + # higher bits are the stat.st_mode value. On MSDOS, the lower bits + # are the FAT attributes. + xattr = entry["external_attr"] + # Skip directories + if (host == 0 and xattr & 0x10) or (host == 3 and xattr & (0o040000 << 16)): + continue + entries[six.ensure_text(entry["filename"])] = entry + if entry["offset"] < preload: + self._last_preloaded = six.ensure_text(entry["filename"]) + self._entries = entries + return entries + + @property + def is_optimized(self): + """ + Return whether the jar archive is optimized. + """ + # In optimized jars, the central directory is at the beginning of the + # file, after a single 32-bits value, which is the length of data + # preloaded. + return self._cdir_end["cdir_offset"] == JarStruct.TYPE_MAPPING["uint32"][1] + + @property + def last_preloaded(self): + """ + Return the name of the last file that is set to be preloaded. + See JarWriter documentation for more details on preloading. + """ + if hasattr(self, "_last_preloaded"): + return self._last_preloaded + self._last_preloaded = None + self.entries + return self._last_preloaded + + def _getreader(self, entry): + """ + Helper to create a JarFileReader corresponding to the given central + directory entry. + """ + header = JarLocalFileHeader(self._data[entry["offset"] :]) + for key, value in entry: + if key in header and header[key] != value: + raise JarReaderError( + "Central directory and file header " + + "mismatch. Corrupted archive?" + ) + return JarFileReader(header, self._data[entry["offset"] + header.size :]) + + def __iter__(self): + """ + Iterate over all files in the Jar archive, in the form of + JarFileReaders. + for file in jarReader: + ... + """ + for entry in six.itervalues(self.entries): + yield self._getreader(entry) + + def __getitem__(self, name): + """ + Get a JarFileReader for the given file name. + """ + return self._getreader(self.entries[name]) + + def __contains__(self, name): + """ + Return whether the given file name appears in the Jar archive. + """ + return name in self.entries + + +class JarWriter(object): + """ + Class with methods to write Jar files. Can write more-or-less standard jar + archives as well as jar archives optimized for Gecko. See the documentation + for the close() member function for a description of both layouts. + """ + + def __init__(self, file=None, fileobj=None, compress=True, compress_level=9): + """ + Initialize a Jar archive in the given file. Use the given file-like + object if one is given instead of opening the given file name. + The compress option determines the default behavior for storing data + in the jar archive. The optimize options determines whether the jar + archive should be optimized for Gecko or not. ``compress_level`` + defines the zlib compression level. It must be a value between 0 and 9 + and defaults to 9, the highest and slowest level of compression. + """ + if fileobj: + self._data = fileobj + else: + self._data = open(file, "wb") + if compress is True: + compress = JAR_DEFLATED + self._compress = compress + self._compress_level = compress_level + self._contents = OrderedDict() + self._last_preloaded = None + + def __enter__(self): + """ + Context manager __enter__ method for JarWriter. + """ + return self + + def __exit__(self, type, value, tb): + """ + Context manager __exit__ method for JarWriter. + """ + self.finish() + + def finish(self): + """ + Flush and close the Jar archive. + + Standard jar archives are laid out like the following: + - Local file header 1 + - File data 1 + - Local file header 2 + - File data 2 + - (...) + - Central directory entry pointing at Local file header 1 + - Central directory entry pointing at Local file header 2 + - (...) + - End of central directory, pointing at first central directory + entry. + + Jar archives optimized for Gecko are laid out like the following: + - 32-bits unsigned integer giving the amount of data to preload. + - Central directory entry pointing at Local file header 1 + - Central directory entry pointing at Local file header 2 + - (...) + - End of central directory, pointing at first central directory + entry. + - Local file header 1 + - File data 1 + - Local file header 2 + - File data 2 + - (...) + - End of central directory, pointing at first central directory + entry. + + The duplication of the End of central directory is to accomodate some + Zip reading tools that want an end of central directory structure to + follow the central directory entries. + """ + offset = 0 + headers = {} + preload_size = 0 + # Prepare central directory entries + for entry, content in six.itervalues(self._contents): + header = JarLocalFileHeader() + for name in entry.STRUCT: + if name in header: + header[name] = entry[name] + entry["offset"] = offset + offset += len(content) + header.size + if six.ensure_text(entry["filename"]) == self._last_preloaded: + preload_size = offset + headers[entry] = header + # Prepare end of central directory + end = JarCdirEnd() + end["disk_entries"] = len(self._contents) + end["cdir_entries"] = end["disk_entries"] + end["cdir_size"] = six.moves.reduce( + lambda x, y: x + y[0].size, self._contents.values(), 0 + ) + # On optimized archives, store the preloaded size and the central + # directory entries, followed by the first end of central directory. + if preload_size: + end["cdir_offset"] = 4 + offset = end["cdir_size"] + end["cdir_offset"] + end.size + preload_size += offset + self._data.write(struct.pack("