# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from __future__ import absolute_import, print_function, unicode_literals from io import ( BytesIO, UnsupportedOperation, ) import struct import subprocess import zlib import os import six from zipfile import ( ZIP_STORED, ZIP_DEFLATED, ) from collections import OrderedDict import mozpack.path as mozpath from mozbuild.util import ( memoize, ensure_bytes, ) JAR_STORED = ZIP_STORED JAR_DEFLATED = ZIP_DEFLATED JAR_BROTLI = 0x81 MAX_WBITS = 15 class JarReaderError(Exception): """Error type for Jar reader errors.""" class JarWriterError(Exception): """Error type for Jar writer errors.""" class JarStruct(object): """ Helper used to define ZIP archive raw data structures. Data structures handled by this helper all start with a magic number, defined in subclasses MAGIC field as a 32-bits unsigned integer, followed by data structured as described in subclasses STRUCT field. The STRUCT field contains a list of (name, type) pairs where name is a field name, and the type can be one of 'uint32', 'uint16' or one of the field names. In the latter case, the field is considered to be a string buffer with a length given in that field. For example, STRUCT = [ ('version', 'uint32'), ('filename_size', 'uint16'), ('filename', 'filename_size') ] describes a structure with a 'version' 32-bits unsigned integer field, followed by a 'filename_size' 16-bits unsigned integer field, followed by a filename_size-long string buffer 'filename'. Fields that are used as other fields size are not stored in objects. In the above example, an instance of such subclass would only have two attributes: obj['version'] obj['filename'] filename_size would be obtained with len(obj['filename']). JarStruct subclasses instances can be either initialized from existing data (deserialized), or with empty fields. """ TYPE_MAPPING = {"uint32": (b"I", 4), "uint16": (b"H", 2)} def __init__(self, data=None): """ Create an instance from the given data. Data may be omitted to create an instance with empty fields. """ assert self.MAGIC and isinstance(self.STRUCT, OrderedDict) self.size_fields = set( t for t in six.itervalues(self.STRUCT) if t not in JarStruct.TYPE_MAPPING ) self._values = {} if data: self._init_data(data) else: self._init_empty() def _init_data(self, data): """ Initialize an instance from data, following the data structure described in self.STRUCT. The self.MAGIC signature is expected at data[:4]. """ assert data is not None self.signature, size = JarStruct.get_data("uint32", data) if self.signature != self.MAGIC: raise JarReaderError("Bad magic") offset = size # For all fields used as other fields sizes, keep track of their value # separately. sizes = dict((t, 0) for t in self.size_fields) for name, t in six.iteritems(self.STRUCT): if t in JarStruct.TYPE_MAPPING: value, size = JarStruct.get_data(t, data[offset:]) else: size = sizes[t] value = data[offset : offset + size] if isinstance(value, memoryview): value = value.tobytes() if name not in sizes: self._values[name] = value else: sizes[name] = value offset += size def _init_empty(self): """ Initialize an instance with empty fields. """ self.signature = self.MAGIC for name, t in six.iteritems(self.STRUCT): if name in self.size_fields: continue self._values[name] = 0 if t in JarStruct.TYPE_MAPPING else "" @staticmethod def get_data(type, data): """ Deserialize a single field of given type (must be one of JarStruct.TYPE_MAPPING) at the given offset in the given data. """ assert type in JarStruct.TYPE_MAPPING assert data is not None format, size = JarStruct.TYPE_MAPPING[type] data = data[:size] if isinstance(data, memoryview): data = data.tobytes() return struct.unpack(b"<" + format, data)[0], size def serialize(self): """ Serialize the data structure according to the data structure definition from self.STRUCT. """ serialized = struct.pack(b"" % ( self.__class__.__name__, " ".join("%s=%s" % (n, v) for n, v in self), ) class JarCdirEnd(JarStruct): """ End of central directory record. """ MAGIC = 0x06054B50 STRUCT = OrderedDict( [ ("disk_num", "uint16"), ("cdir_disk", "uint16"), ("disk_entries", "uint16"), ("cdir_entries", "uint16"), ("cdir_size", "uint32"), ("cdir_offset", "uint32"), ("comment_size", "uint16"), ("comment", "comment_size"), ] ) CDIR_END_SIZE = JarCdirEnd().size class JarCdirEntry(JarStruct): """ Central directory file header """ MAGIC = 0x02014B50 STRUCT = OrderedDict( [ ("creator_version", "uint16"), ("min_version", "uint16"), ("general_flag", "uint16"), ("compression", "uint16"), ("lastmod_time", "uint16"), ("lastmod_date", "uint16"), ("crc32", "uint32"), ("compressed_size", "uint32"), ("uncompressed_size", "uint32"), ("filename_size", "uint16"), ("extrafield_size", "uint16"), ("filecomment_size", "uint16"), ("disknum", "uint16"), ("internal_attr", "uint16"), ("external_attr", "uint32"), ("offset", "uint32"), ("filename", "filename_size"), ("extrafield", "extrafield_size"), ("filecomment", "filecomment_size"), ] ) class JarLocalFileHeader(JarStruct): """ Local file header """ MAGIC = 0x04034B50 STRUCT = OrderedDict( [ ("min_version", "uint16"), ("general_flag", "uint16"), ("compression", "uint16"), ("lastmod_time", "uint16"), ("lastmod_date", "uint16"), ("crc32", "uint32"), ("compressed_size", "uint32"), ("uncompressed_size", "uint32"), ("filename_size", "uint16"), ("extra_field_size", "uint16"), ("filename", "filename_size"), ("extra_field", "extra_field_size"), ] ) class JarFileReader(object): """ File-like class for use by JarReader to give access to individual files within a Jar archive. """ def __init__(self, header, data): """ Initialize a JarFileReader. header is the local file header corresponding to the file in the jar archive, data a buffer containing the file data. """ assert header["compression"] in [JAR_DEFLATED, JAR_STORED, JAR_BROTLI] self._data = data # Copy some local file header fields. for name in ["compressed_size", "uncompressed_size", "crc32"]: setattr(self, name, header[name]) self.filename = six.ensure_text(header["filename"]) self.compressed = header["compression"] != JAR_STORED self.compress = header["compression"] def read(self, length=-1): """ Read some amount of uncompressed data. """ return self.uncompressed_data.read(length) def readlines(self): """ Return a list containing all the lines of data in the uncompressed data. """ return self.read().splitlines(True) def __iter__(self): """ Iterator, to support the "for line in fileobj" constructs. """ return iter(self.readlines()) def seek(self, pos, whence=os.SEEK_SET): """ Change the current position in the uncompressed data. Subsequent reads will start from there. """ return self.uncompressed_data.seek(pos, whence) def close(self): """ Free the uncompressed data buffer. """ self.uncompressed_data.close() @property def compressed_data(self): """ Return the raw compressed data. """ return self._data[: self.compressed_size] @property def uncompressed_data(self): """ Return the uncompressed data. """ if hasattr(self, "_uncompressed_data"): return self._uncompressed_data data = self.compressed_data if self.compress == JAR_STORED: data = data.tobytes() elif self.compress == JAR_BROTLI: data = Brotli.decompress(data.tobytes()) elif self.compress == JAR_DEFLATED: data = zlib.decompress(data.tobytes(), -MAX_WBITS) else: assert False # Can't be another value per __init__ if len(data) != self.uncompressed_size: raise JarReaderError("Corrupted file? %s" % self.filename) self._uncompressed_data = BytesIO(data) return self._uncompressed_data class JarReader(object): """ Class with methods to read Jar files. Can open standard jar files as well as Mozilla jar files (see further details in the JarWriter documentation). """ def __init__(self, file=None, fileobj=None, data=None): """ Opens the given file as a Jar archive. Use the given file-like object if one is given instead of opening the given file name. """ if fileobj: data = fileobj.read() elif file: data = open(file, "rb").read() self._data = memoryview(data) # The End of Central Directory Record has a variable size because of # comments it may contain, so scan for it from the end of the file. offset = -CDIR_END_SIZE while True: signature = JarStruct.get_data("uint32", self._data[offset:])[0] if signature == JarCdirEnd.MAGIC: break if offset == -len(self._data): raise JarReaderError("Not a jar?") offset -= 1 self._cdir_end = JarCdirEnd(self._data[offset:]) def close(self): """ Free some resources associated with the Jar. """ del self._data @property def compression(self): entries = self.entries if not entries: return JAR_STORED return max(f["compression"] for f in six.itervalues(entries)) @property def entries(self): """ Return an ordered dict of central directory entries, indexed by filename, in the order they appear in the Jar archive central directory. Directory entries are skipped. """ if hasattr(self, "_entries"): return self._entries preload = 0 if self.is_optimized: preload = JarStruct.get_data("uint32", self._data)[0] entries = OrderedDict() offset = self._cdir_end["cdir_offset"] for e in six.moves.xrange(self._cdir_end["cdir_entries"]): entry = JarCdirEntry(self._data[offset:]) offset += entry.size # Creator host system. 0 is MSDOS, 3 is Unix host = entry["creator_version"] >> 8 # External attributes values depend on host above. On Unix the # higher bits are the stat.st_mode value. On MSDOS, the lower bits # are the FAT attributes. xattr = entry["external_attr"] # Skip directories if (host == 0 and xattr & 0x10) or (host == 3 and xattr & (0o040000 << 16)): continue entries[six.ensure_text(entry["filename"])] = entry if entry["offset"] < preload: self._last_preloaded = six.ensure_text(entry["filename"]) self._entries = entries return entries @property def is_optimized(self): """ Return whether the jar archive is optimized. """ # In optimized jars, the central directory is at the beginning of the # file, after a single 32-bits value, which is the length of data # preloaded. return self._cdir_end["cdir_offset"] == JarStruct.TYPE_MAPPING["uint32"][1] @property def last_preloaded(self): """ Return the name of the last file that is set to be preloaded. See JarWriter documentation for more details on preloading. """ if hasattr(self, "_last_preloaded"): return self._last_preloaded self._last_preloaded = None self.entries return self._last_preloaded def _getreader(self, entry): """ Helper to create a JarFileReader corresponding to the given central directory entry. """ header = JarLocalFileHeader(self._data[entry["offset"] :]) for key, value in entry: if key in header and header[key] != value: raise JarReaderError( "Central directory and file header " + "mismatch. Corrupted archive?" ) return JarFileReader(header, self._data[entry["offset"] + header.size :]) def __iter__(self): """ Iterate over all files in the Jar archive, in the form of JarFileReaders. for file in jarReader: ... """ for entry in six.itervalues(self.entries): yield self._getreader(entry) def __getitem__(self, name): """ Get a JarFileReader for the given file name. """ return self._getreader(self.entries[name]) def __contains__(self, name): """ Return whether the given file name appears in the Jar archive. """ return name in self.entries class JarWriter(object): """ Class with methods to write Jar files. Can write more-or-less standard jar archives as well as jar archives optimized for Gecko. See the documentation for the close() member function for a description of both layouts. """ def __init__(self, file=None, fileobj=None, compress=True, compress_level=9): """ Initialize a Jar archive in the given file. Use the given file-like object if one is given instead of opening the given file name. The compress option determines the default behavior for storing data in the jar archive. The optimize options determines whether the jar archive should be optimized for Gecko or not. ``compress_level`` defines the zlib compression level. It must be a value between 0 and 9 and defaults to 9, the highest and slowest level of compression. """ if fileobj: self._data = fileobj else: self._data = open(file, "wb") if compress is True: compress = JAR_DEFLATED self._compress = compress self._compress_level = compress_level self._contents = OrderedDict() self._last_preloaded = None def __enter__(self): """ Context manager __enter__ method for JarWriter. """ return self def __exit__(self, type, value, tb): """ Context manager __exit__ method for JarWriter. """ self.finish() def finish(self): """ Flush and close the Jar archive. Standard jar archives are laid out like the following: - Local file header 1 - File data 1 - Local file header 2 - File data 2 - (...) - Central directory entry pointing at Local file header 1 - Central directory entry pointing at Local file header 2 - (...) - End of central directory, pointing at first central directory entry. Jar archives optimized for Gecko are laid out like the following: - 32-bits unsigned integer giving the amount of data to preload. - Central directory entry pointing at Local file header 1 - Central directory entry pointing at Local file header 2 - (...) - End of central directory, pointing at first central directory entry. - Local file header 1 - File data 1 - Local file header 2 - File data 2 - (...) - End of central directory, pointing at first central directory entry. The duplication of the End of central directory is to accomodate some Zip reading tools that want an end of central directory structure to follow the central directory entries. """ offset = 0 headers = {} preload_size = 0 # Prepare central directory entries for entry, content in six.itervalues(self._contents): header = JarLocalFileHeader() for name in entry.STRUCT: if name in header: header[name] = entry[name] entry["offset"] = offset offset += len(content) + header.size if six.ensure_text(entry["filename"]) == self._last_preloaded: preload_size = offset headers[entry] = header # Prepare end of central directory end = JarCdirEnd() end["disk_entries"] = len(self._contents) end["cdir_entries"] = end["disk_entries"] end["cdir_size"] = six.moves.reduce( lambda x, y: x + y[0].size, self._contents.values(), 0 ) # On optimized archives, store the preloaded size and the central # directory entries, followed by the first end of central directory. if preload_size: end["cdir_offset"] = 4 offset = end["cdir_size"] + end["cdir_offset"] + end.size preload_size += offset self._data.write(struct.pack("