diff options
Diffstat (limited to 'third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py')
-rw-r--r-- | third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py | 52 |
1 files changed, 46 insertions, 6 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py index ee59ba4548..261a031038 100644 --- a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py @@ -12,6 +12,40 @@ import tarfile DEFAULT_MTIME = 1451606400 +# Python 3.9 contains this change: +# https://github.com/python/cpython/commit/674935b8caf33e47c78f1b8e197b1b77a04992d2 +# which changes the output of tar creation compared to earlier versions. +# As this code is used to generate tar files that are meant to be deterministic +# across versions of python (specifically, it's used as part of computing the hash +# of docker images, which needs to be identical between CI (which uses python 3.8), +# and developer environments (using arbitrary versions of python, at this point, +# most probably more recent than 3.9)). +# What we do is subblass TarInfo so that if used on python >= 3.9, it reproduces the +# behavior from python < 3.9. +# Here's how it goes: +# - the behavior in python >= 3.9 is the same as python < 3.9 when the type encoded +# in the tarinfo is CHRTYPE or BLKTYPE. +# - the value of the type is only compared in the context of choosing which behavior +# to take +# - we replace the type with the same value (so that using the value has no changes) +# but that pretends to be the same as CHRTYPE so that the condition that enables the +# old behavior is taken. +class HackedType(bytes): + def __eq__(self, other): + if other == tarfile.CHRTYPE: + return True + return self == other + + +class TarInfo(tarfile.TarInfo): + @staticmethod + def _create_header(info, format, encoding, errors): + info["type"] = HackedType(info["type"]) + # ignore type checking because it looks like pyright complains because we're calling a + # non-public method + return tarfile.TarInfo._create_header(info, format, encoding, errors) # type: ignore + + def create_tar_from_files(fp, files): """Create a tar file deterministically. @@ -25,15 +59,23 @@ def create_tar_from_files(fp, files): FUTURE accept a filename argument (or create APIs to write files) """ - with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf: + # The format is explicitly set to tarfile.GNU_FORMAT, because this default format + # has been changed in Python 3.8. + with tarfile.open( + name="", mode="w", fileobj=fp, dereference=True, format=tarfile.GNU_FORMAT + ) as tf: for archive_path, f in sorted(files.items()): if isinstance(f, str): - mode = os.stat(f).st_mode + s = os.stat(f) + mode = s.st_mode + size = s.st_size f = open(f, "rb") else: mode = 0o0644 + size = len(f.read()) + f.seek(0) - ti = tarfile.TarInfo(archive_path) + ti = TarInfo(archive_path) ti.mode = mode ti.type = tarfile.REGTYPE @@ -56,9 +98,7 @@ def create_tar_from_files(fp, files): # Set mtime to a constant value. ti.mtime = DEFAULT_MTIME - f.seek(0, 2) - ti.size = f.tell() - f.seek(0, 0) + ti.size = size # tarfile wants to pass a size argument to read(). So just # wrap/buffer in a proper file object interface. tf.addfile(ti, f) |