summaryrefslogtreecommitdiffstats
path: root/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py')
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py52
1 files changed, 46 insertions, 6 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
index ee59ba4548..261a031038 100644
--- a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py
@@ -12,6 +12,40 @@ import tarfile
DEFAULT_MTIME = 1451606400
+# Python 3.9 contains this change:
+# https://github.com/python/cpython/commit/674935b8caf33e47c78f1b8e197b1b77a04992d2
+# which changes the output of tar creation compared to earlier versions.
+# As this code is used to generate tar files that are meant to be deterministic
+# across versions of python (specifically, it's used as part of computing the hash
+# of docker images, which needs to be identical between CI (which uses python 3.8),
+# and developer environments (using arbitrary versions of python, at this point,
+# most probably more recent than 3.9)).
+# What we do is subblass TarInfo so that if used on python >= 3.9, it reproduces the
+# behavior from python < 3.9.
+# Here's how it goes:
+# - the behavior in python >= 3.9 is the same as python < 3.9 when the type encoded
+# in the tarinfo is CHRTYPE or BLKTYPE.
+# - the value of the type is only compared in the context of choosing which behavior
+# to take
+# - we replace the type with the same value (so that using the value has no changes)
+# but that pretends to be the same as CHRTYPE so that the condition that enables the
+# old behavior is taken.
+class HackedType(bytes):
+ def __eq__(self, other):
+ if other == tarfile.CHRTYPE:
+ return True
+ return self == other
+
+
+class TarInfo(tarfile.TarInfo):
+ @staticmethod
+ def _create_header(info, format, encoding, errors):
+ info["type"] = HackedType(info["type"])
+ # ignore type checking because it looks like pyright complains because we're calling a
+ # non-public method
+ return tarfile.TarInfo._create_header(info, format, encoding, errors) # type: ignore
+
+
def create_tar_from_files(fp, files):
"""Create a tar file deterministically.
@@ -25,15 +59,23 @@ def create_tar_from_files(fp, files):
FUTURE accept a filename argument (or create APIs to write files)
"""
- with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf:
+ # The format is explicitly set to tarfile.GNU_FORMAT, because this default format
+ # has been changed in Python 3.8.
+ with tarfile.open(
+ name="", mode="w", fileobj=fp, dereference=True, format=tarfile.GNU_FORMAT
+ ) as tf:
for archive_path, f in sorted(files.items()):
if isinstance(f, str):
- mode = os.stat(f).st_mode
+ s = os.stat(f)
+ mode = s.st_mode
+ size = s.st_size
f = open(f, "rb")
else:
mode = 0o0644
+ size = len(f.read())
+ f.seek(0)
- ti = tarfile.TarInfo(archive_path)
+ ti = TarInfo(archive_path)
ti.mode = mode
ti.type = tarfile.REGTYPE
@@ -56,9 +98,7 @@ def create_tar_from_files(fp, files):
# Set mtime to a constant value.
ti.mtime = DEFAULT_MTIME
- f.seek(0, 2)
- ti.size = f.tell()
- f.seek(0, 0)
+ ti.size = size
# tarfile wants to pass a size argument to read(). So just
# wrap/buffer in a proper file object interface.
tf.addfile(ti, f)