summaryrefslogtreecommitdiffstats
path: root/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py237
1 files changed, 237 insertions, 0 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
new file mode 100644
index 0000000000..c37a69f98f
--- /dev/null
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py
@@ -0,0 +1,237 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import hashlib
+import io
+import os
+import re
+
+from taskgraph.util.archive import create_tar_gz_from_files
+from taskgraph.util.memoize import memoize
+
+IMAGE_DIR = os.path.join(".", "taskcluster", "docker")
+
+from .yaml import load_yaml
+
+
+def docker_image(name, by_tag=False):
+ """
+ Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``,
+ or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`.
+ """
+ try:
+ with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f:
+ registry = f.read().strip()
+ except OSError:
+ with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f:
+ registry = f.read().strip()
+
+ if not by_tag:
+ hashfile = os.path.join(IMAGE_DIR, name, "HASH")
+ try:
+ with open(hashfile) as f:
+ return f"{registry}/{name}@{f.read().strip()}"
+ except OSError:
+ raise Exception(f"Failed to read HASH file {hashfile}")
+
+ try:
+ with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f:
+ tag = f.read().strip()
+ except OSError:
+ tag = "latest"
+ return f"{registry}/{name}:{tag}"
+
+
+class VoidWriter:
+ """A file object with write capabilities that does nothing with the written
+ data."""
+
+ def write(self, buf):
+ pass
+
+
+def generate_context_hash(topsrcdir, image_path, args=None):
+ """Generates a sha256 hash for context directory used to build an image."""
+
+ return stream_context_tar(topsrcdir, image_path, VoidWriter(), args=args)
+
+
+class HashingWriter:
+ """A file object with write capabilities that hashes the written data at
+ the same time it passes down to a real file object."""
+
+ def __init__(self, writer):
+ self._hash = hashlib.sha256()
+ self._writer = writer
+
+ def write(self, buf):
+ self._hash.update(buf)
+ self._writer.write(buf)
+
+ def hexdigest(self):
+ return self._hash.hexdigest()
+
+
+def create_context_tar(topsrcdir, context_dir, out_path, args=None):
+ """Create a context tarball.
+
+ A directory ``context_dir`` containing a Dockerfile will be assembled into
+ a gzipped tar file at ``out_path``.
+
+ We also scan the source Dockerfile for special syntax that influences
+ context generation.
+
+ If a line in the Dockerfile has the form ``# %include <path>``,
+ the relative path specified on that line will be matched against
+ files in the source repository and added to the context under the
+ path ``topsrcdir/``. If an entry is a directory, we add all files
+ under that directory.
+
+ If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of
+ the string ``$<name>`` in subsequent lines are replaced with the value
+ found in the ``args`` argument. Exception: this doesn't apply to VOLUME
+ definitions.
+
+ Returns the SHA-256 hex digest of the created archive.
+ """
+ with open(out_path, "wb") as fh:
+ return stream_context_tar(
+ topsrcdir,
+ context_dir,
+ fh,
+ image_name=os.path.basename(out_path),
+ args=args,
+ )
+
+
+RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task")
+RUN_TASK_FILES = {
+ f"run-task/{path}": os.path.join(RUN_TASK_ROOT, path)
+ for path in [
+ "run-task",
+ "fetch-content",
+ "hgrc",
+ "robustcheckout.py",
+ ]
+}
+RUN_TASK_SNIPPET = [
+ "COPY run-task/run-task /usr/local/bin/run-task\n",
+ "COPY run-task/fetch-content /usr/local/bin/fetch-content\n",
+ "COPY run-task/robustcheckout.py /usr/local/mercurial/robustcheckout.py\n"
+ "COPY run-task/hgrc /etc/mercurial/hgrc.d/mozilla.rc\n",
+]
+
+
+def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None):
+ """Like create_context_tar, but streams the tar file to the `out_file` file
+ object."""
+ archive_files = {}
+ replace = []
+ content = []
+
+ topsrcdir = os.path.abspath(topsrcdir)
+ context_dir = os.path.join(topsrcdir, context_dir)
+
+ for root, dirs, files in os.walk(context_dir):
+ for f in files:
+ source_path = os.path.join(root, f)
+ archive_path = source_path[len(context_dir) + 1 :]
+ archive_files[archive_path] = open(source_path, "rb")
+
+ # Parse Dockerfile for special syntax of extra files to include.
+ content = []
+ with open(os.path.join(context_dir, "Dockerfile")) as fh:
+ for line in fh:
+ if line.startswith("# %ARG"):
+ p = line[len("# %ARG ") :].strip()
+ if not args or p not in args:
+ raise Exception(f"missing argument: {p}")
+ replace.append((re.compile(rf"\${p}\b"), args[p]))
+ continue
+
+ for regexp, s in replace:
+ line = re.sub(regexp, s, line)
+
+ content.append(line)
+
+ if not line.startswith("# %include"):
+ continue
+
+ if line.strip() == "# %include-run-task":
+ content.extend(RUN_TASK_SNIPPET)
+ archive_files.update(RUN_TASK_FILES)
+ continue
+
+ p = line[len("# %include ") :].strip()
+ if os.path.isabs(p):
+ raise Exception("extra include path cannot be absolute: %s" % p)
+
+ fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+ # Check for filesystem traversal exploits.
+ if not fs_path.startswith(topsrcdir):
+ raise Exception("extra include path outside topsrcdir: %s" % p)
+
+ if not os.path.exists(fs_path):
+ raise Exception("extra include path does not exist: %s" % p)
+
+ if os.path.isdir(fs_path):
+ for root, dirs, files in os.walk(fs_path):
+ for f in files:
+ source_path = os.path.join(root, f)
+ rel = source_path[len(fs_path) + 1 :]
+ archive_path = os.path.join("topsrcdir", p, rel)
+ archive_files[archive_path] = source_path
+ else:
+ archive_path = os.path.join("topsrcdir", p)
+ archive_files[archive_path] = fs_path
+
+ archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8"))
+
+ writer = HashingWriter(out_file)
+ create_tar_gz_from_files(writer, archive_files, image_name)
+ return writer.hexdigest()
+
+
+@memoize
+def image_paths():
+ """Return a map of image name to paths containing their Dockerfile."""
+ config = load_yaml("taskcluster", "ci", "docker-image", "kind.yml")
+ return {
+ k: os.path.join(IMAGE_DIR, v.get("definition", k))
+ for k, v in config["tasks"].items()
+ }
+
+
+def image_path(name):
+ paths = image_paths()
+ if name in paths:
+ return paths[name]
+ return os.path.join(IMAGE_DIR, name)
+
+
+@memoize
+def parse_volumes(image):
+ """Parse VOLUME entries from a Dockerfile for an image."""
+ volumes = set()
+
+ path = image_path(image)
+
+ with open(os.path.join(path, "Dockerfile"), "rb") as fh:
+ for line in fh:
+ line = line.strip()
+ # We assume VOLUME definitions don't use %ARGS.
+ if not line.startswith(b"VOLUME "):
+ continue
+
+ v = line.split(None, 1)[1]
+ if v.startswith(b"["):
+ raise ValueError(
+ "cannot parse array syntax for VOLUME; "
+ "convert to multiple entries"
+ )
+
+ volumes |= {volume.decode("utf-8") for volume in v.split()}
+
+ return volumes