summaryrefslogtreecommitdiffstats
path: root/python/mozbuild/mozpack/archive.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--python/mozbuild/mozpack/archive.py153
1 files changed, 153 insertions, 0 deletions
diff --git a/python/mozbuild/mozpack/archive.py b/python/mozbuild/mozpack/archive.py
new file mode 100644
index 0000000000..89bf14b179
--- /dev/null
+++ b/python/mozbuild/mozpack/archive.py
@@ -0,0 +1,153 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import bz2
+import gzip
+import stat
+import tarfile
+
+from .files import BaseFile, File
+
+# 2016-01-01T00:00:00+0000
+DEFAULT_MTIME = 1451606400
+
+
+# Python 3.9 contains this change:
+# https://github.com/python/cpython/commit/674935b8caf33e47c78f1b8e197b1b77a04992d2
+# which changes the output of tar creation compared to earlier versions.
+# As this code is used to generate tar files that are meant to be deterministic
+# across versions of python (specifically, it's used as part of computing the hash
+# of docker images, which needs to be identical between CI (which uses python 3.8),
+# and developer environments (using arbitrary versions of python, at this point,
+# most probably more recent than 3.9)).
+# What we do is subblass TarInfo so that if used on python >= 3.9, it reproduces the
+# behavior from python < 3.9.
+# Here's how it goes:
+# - the behavior in python >= 3.9 is the same as python < 3.9 when the type encoded
+# in the tarinfo is CHRTYPE or BLKTYPE.
+# - the value of the type is only compared in the context of choosing which behavior
+# to take
+# - we replace the type with the same value (so that using the value has no changes)
+# but that pretends to be the same as CHRTYPE so that the condition that enables the
+# old behavior is taken.
+class HackedType(bytes):
+ def __eq__(self, other):
+ if other == tarfile.CHRTYPE:
+ return True
+ return self == other
+
+
+class TarInfo(tarfile.TarInfo):
+ @staticmethod
+ def _create_header(info, format, encoding, errors):
+ info["type"] = HackedType(info["type"])
+ return tarfile.TarInfo._create_header(info, format, encoding, errors)
+
+
+def create_tar_from_files(fp, files):
+ """Create a tar file deterministically.
+
+ Receives a dict mapping names of files in the archive to local filesystem
+ paths or ``mozpack.files.BaseFile`` instances.
+
+ The files will be archived and written to the passed file handle opened
+ for writing.
+
+ Only regular files can be written.
+
+ FUTURE accept a filename argument (or create APIs to write files)
+ """
+ # The format is explicitly set to tarfile.GNU_FORMAT, because this default format
+ # has been changed in Python 3.8.
+ with tarfile.open(
+ name="", mode="w", fileobj=fp, dereference=True, format=tarfile.GNU_FORMAT
+ ) as tf:
+ for archive_path, f in sorted(files.items()):
+ if not isinstance(f, BaseFile):
+ f = File(f)
+
+ ti = TarInfo(archive_path)
+ ti.mode = f.mode or 0o0644
+ ti.type = tarfile.REGTYPE
+
+ if not ti.isreg():
+ raise ValueError("not a regular file: %s" % f)
+
+ # Disallow setuid and setgid bits. This is an arbitrary restriction.
+ # However, since we set uid/gid to root:root, setuid and setgid
+ # would be a glaring security hole if the archive were
+ # uncompressed as root.
+ if ti.mode & (stat.S_ISUID | stat.S_ISGID):
+ raise ValueError("cannot add file with setuid or setgid set: " "%s" % f)
+
+ # Set uid, gid, username, and group as deterministic values.
+ ti.uid = 0
+ ti.gid = 0
+ ti.uname = ""
+ ti.gname = ""
+
+ # Set mtime to a constant value.
+ ti.mtime = DEFAULT_MTIME
+
+ ti.size = f.size()
+ # tarfile wants to pass a size argument to read(). So just
+ # wrap/buffer in a proper file object interface.
+ tf.addfile(ti, f.open())
+
+
+def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
+ """Create a tar.gz file deterministically from files.
+
+ This is a glorified wrapper around ``create_tar_from_files`` that
+ adds gzip compression.
+
+ The passed file handle should be opened for writing in binary mode.
+ When the function returns, all data has been written to the handle.
+ """
+ # Offset 3-7 in the gzip header contains an mtime. Pin it to a known
+ # value so output is deterministic.
+ gf = gzip.GzipFile(
+ filename=filename or "",
+ mode="wb",
+ fileobj=fp,
+ compresslevel=compresslevel,
+ mtime=DEFAULT_MTIME,
+ )
+ with gf:
+ create_tar_from_files(gf, files)
+
+
+class _BZ2Proxy(object):
+ """File object that proxies writes to a bz2 compressor."""
+
+ def __init__(self, fp, compresslevel=9):
+ self.fp = fp
+ self.compressor = bz2.BZ2Compressor(compresslevel)
+ self.pos = 0
+
+ def tell(self):
+ return self.pos
+
+ def write(self, data):
+ data = self.compressor.compress(data)
+ self.pos += len(data)
+ self.fp.write(data)
+
+ def close(self):
+ data = self.compressor.flush()
+ self.pos += len(data)
+ self.fp.write(data)
+
+
+def create_tar_bz2_from_files(fp, files, compresslevel=9):
+ """Create a tar.bz2 file deterministically from files.
+
+ This is a glorified wrapper around ``create_tar_from_files`` that
+ adds bzip2 compression.
+
+ This function is similar to ``create_tar_gzip_from_files()``.
+ """
+ proxy = _BZ2Proxy(fp, compresslevel=compresslevel)
+ create_tar_from_files(proxy, files)
+ proxy.close()