224 lines
7.4 KiB
Python
224 lines
7.4 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
# TODO: Eventually consolidate with mozpack.pkg module. This is kept separate
|
|
# for now because of the vast difference in API, and to avoid churn for the
|
|
# users of this module (docker images, macos SDK artifacts) when changes are
|
|
# necessary in mozpack.pkg
|
|
import bz2
|
|
import concurrent.futures
|
|
import io
|
|
import lzma
|
|
import os
|
|
import struct
|
|
import zlib
|
|
from collections import deque, namedtuple
|
|
from xml.etree.ElementTree import XML
|
|
|
|
|
|
class ZlibFile:
|
|
def __init__(self, fileobj):
|
|
self.fileobj = fileobj
|
|
self.decompressor = zlib.decompressobj()
|
|
self.buf = b""
|
|
|
|
def read(self, length):
|
|
cutoff = min(length, len(self.buf))
|
|
result = self.buf[:cutoff]
|
|
self.buf = self.buf[cutoff:]
|
|
while len(result) < length:
|
|
buf = self.fileobj.read(io.DEFAULT_BUFFER_SIZE)
|
|
if not buf:
|
|
break
|
|
buf = self.decompressor.decompress(buf)
|
|
cutoff = min(length - len(result), len(buf))
|
|
result += buf[:cutoff]
|
|
self.buf += buf[cutoff:]
|
|
return result
|
|
|
|
|
|
def unxar(fileobj):
|
|
magic = fileobj.read(4)
|
|
if magic != b"xar!":
|
|
raise Exception("Not a XAR?")
|
|
|
|
header_size = fileobj.read(2)
|
|
header_size = struct.unpack(">H", header_size)[0]
|
|
if header_size > 64:
|
|
raise Exception(
|
|
f"Don't know how to handle a {header_size} bytes XAR header size"
|
|
)
|
|
header_size -= 6 # what we've read so far.
|
|
header = fileobj.read(header_size)
|
|
if len(header) != header_size:
|
|
raise Exception("Failed to read XAR header")
|
|
(
|
|
version,
|
|
compressed_toc_len,
|
|
uncompressed_toc_len,
|
|
checksum_type,
|
|
) = struct.unpack(">HQQL", header[:22])
|
|
if version != 1:
|
|
raise Exception(f"XAR version {version} not supported")
|
|
toc = fileobj.read(compressed_toc_len)
|
|
base = fileobj.tell()
|
|
if len(toc) != compressed_toc_len:
|
|
raise Exception("Failed to read XAR TOC")
|
|
toc = zlib.decompress(toc)
|
|
if len(toc) != uncompressed_toc_len:
|
|
raise Exception("Corrupted XAR?")
|
|
toc = XML(toc).find("toc")
|
|
queue = deque(toc.findall("file"))
|
|
while queue:
|
|
f = queue.pop()
|
|
queue.extend(f.iterfind("file"))
|
|
if f.find("type").text != "file":
|
|
continue
|
|
filename = f.find("name").text
|
|
data = f.find("data")
|
|
length = int(data.find("length").text)
|
|
size = int(data.find("size").text)
|
|
offset = int(data.find("offset").text)
|
|
encoding = data.find("encoding").get("style")
|
|
fileobj.seek(base + offset, os.SEEK_SET)
|
|
content = Take(fileobj, length)
|
|
if encoding == "application/octet-stream":
|
|
if length != size:
|
|
raise Exception(f"{length} != {size}")
|
|
elif encoding == "application/x-bzip2":
|
|
content = bz2.BZ2File(content)
|
|
elif encoding == "application/x-gzip":
|
|
# Despite the encoding saying gzip, it is in fact, a raw zlib stream.
|
|
content = ZlibFile(content)
|
|
else:
|
|
raise Exception(f"XAR encoding {encoding} not supported")
|
|
|
|
yield filename, content
|
|
|
|
|
|
class Pbzx:
|
|
def __init__(self, fileobj):
|
|
magic = fileobj.read(4)
|
|
if magic != b"pbzx":
|
|
raise Exception("Not a PBZX payload?")
|
|
# The first thing in the file looks like the size of each
|
|
# decompressed chunk except the last one. It should match
|
|
# decompressed_size in all cases except last, but we don't
|
|
# check.
|
|
chunk_size = fileobj.read(8)
|
|
chunk_size = struct.unpack(">Q", chunk_size)[0]
|
|
# Not using mozbuild.util.cpu_count() because this file is used standalone
|
|
# to generate system symbols.
|
|
executor = concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count())
|
|
self.chunk_getter = executor.map(self._uncompress_chunk, self._chunker(fileobj))
|
|
self._init_one_chunk()
|
|
|
|
@staticmethod
|
|
def _chunker(fileobj):
|
|
while True:
|
|
header = fileobj.read(16)
|
|
if header == b"":
|
|
break
|
|
if len(header) != 16:
|
|
raise Exception("Corrupted PBZX payload?")
|
|
decompressed_size, compressed_size = struct.unpack(">QQ", header)
|
|
chunk = fileobj.read(compressed_size)
|
|
yield decompressed_size, compressed_size, chunk
|
|
|
|
@staticmethod
|
|
def _uncompress_chunk(data):
|
|
decompressed_size, compressed_size, chunk = data
|
|
if compressed_size != decompressed_size:
|
|
chunk = lzma.decompress(chunk)
|
|
if len(chunk) != decompressed_size:
|
|
raise Exception("Corrupted PBZX payload?")
|
|
return chunk
|
|
|
|
def _init_one_chunk(self):
|
|
self.offset = 0
|
|
self.chunk = next(self.chunk_getter, "")
|
|
|
|
def read(self, length=None):
|
|
if length == 0:
|
|
return b""
|
|
if length and len(self.chunk) >= self.offset + length:
|
|
start = self.offset
|
|
self.offset += length
|
|
return self.chunk[start : self.offset]
|
|
else:
|
|
result = self.chunk[self.offset :]
|
|
self._init_one_chunk()
|
|
if self.chunk:
|
|
# XXX: suboptimal if length is larger than the chunk size
|
|
result += self.read(None if length is None else length - len(result))
|
|
return result
|
|
|
|
|
|
class Take:
|
|
"""
|
|
File object wrapper that allows to read at most a certain length.
|
|
"""
|
|
|
|
def __init__(self, fileobj, limit):
|
|
self.fileobj = fileobj
|
|
self.limit = limit
|
|
|
|
def read(self, length=None):
|
|
if length is None:
|
|
length = self.limit
|
|
else:
|
|
length = min(length, self.limit)
|
|
result = self.fileobj.read(length)
|
|
self.limit -= len(result)
|
|
return result
|
|
|
|
|
|
CpioInfo = namedtuple("CpioInfo", ["mode", "nlink", "dev", "ino"])
|
|
|
|
|
|
def uncpio(fileobj):
|
|
while True:
|
|
magic = fileobj.read(6)
|
|
# CPIO payloads in mac pkg files are using the portable ASCII format.
|
|
if magic != b"070707":
|
|
if magic.startswith(b"0707"):
|
|
raise Exception("Unsupported CPIO format")
|
|
raise Exception("Not a CPIO header")
|
|
header = fileobj.read(70)
|
|
(
|
|
dev,
|
|
ino,
|
|
mode,
|
|
uid,
|
|
gid,
|
|
nlink,
|
|
rdev,
|
|
mtime,
|
|
namesize,
|
|
filesize,
|
|
) = struct.unpack(">6s6s6s6s6s6s6s11s6s11s", header)
|
|
dev = int(dev, 8)
|
|
ino = int(ino, 8)
|
|
mode = int(mode, 8)
|
|
nlink = int(nlink, 8)
|
|
namesize = int(namesize, 8)
|
|
filesize = int(filesize, 8)
|
|
name = fileobj.read(namesize)
|
|
if name[-1] != 0:
|
|
raise Exception("File name is not NUL terminated")
|
|
name = name[:-1]
|
|
if name == b"TRAILER!!!":
|
|
break
|
|
|
|
if b"/../" in name or name.startswith(b"../") or name == b"..":
|
|
raise Exception(".. is forbidden in file name")
|
|
if name.startswith(b"."):
|
|
name = name[1:]
|
|
if name.startswith(b"/"):
|
|
name = name[1:]
|
|
content = Take(fileobj, filesize)
|
|
yield name, CpioInfo(mode=mode, nlink=nlink, dev=dev, ino=ino), content
|
|
# Ensure the content is totally consumed
|
|
while content.read(4096):
|
|
pass
|