diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/python/taskcluster_taskgraph/taskgraph/util | |
parent | Initial commit. (diff) | |
download | firefox-esr-upstream.tar.xz firefox-esr-upstream.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/taskcluster_taskgraph/taskgraph/util')
24 files changed, 3100 insertions, 0 deletions
diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/__init__.py diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py new file mode 100644 index 0000000000..ee59ba4548 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/archive.py @@ -0,0 +1,86 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import gzip +import os +import stat +import tarfile + +# 2016-01-01T00:00:00+0000 +DEFAULT_MTIME = 1451606400 + + +def create_tar_from_files(fp, files): + """Create a tar file deterministically. + + Receives a dict mapping names of files in the archive to local filesystem + paths or ``mozpack.files.BaseFile`` instances. + + The files will be archived and written to the passed file handle opened + for writing. + + Only regular files can be written. + + FUTURE accept a filename argument (or create APIs to write files) + """ + with tarfile.open(name="", mode="w", fileobj=fp, dereference=True) as tf: + for archive_path, f in sorted(files.items()): + if isinstance(f, str): + mode = os.stat(f).st_mode + f = open(f, "rb") + else: + mode = 0o0644 + + ti = tarfile.TarInfo(archive_path) + ti.mode = mode + ti.type = tarfile.REGTYPE + + if not ti.isreg(): + raise ValueError("not a regular file: %s" % f) + + # Disallow setuid and setgid bits. This is an arbitrary restriction. + # However, since we set uid/gid to root:root, setuid and setgid + # would be a glaring security hole if the archive were + # uncompressed as root. + if ti.mode & (stat.S_ISUID | stat.S_ISGID): + raise ValueError("cannot add file with setuid or setgid set: " "%s" % f) + + # Set uid, gid, username, and group as deterministic values. + ti.uid = 0 + ti.gid = 0 + ti.uname = "" + ti.gname = "" + + # Set mtime to a constant value. + ti.mtime = DEFAULT_MTIME + + f.seek(0, 2) + ti.size = f.tell() + f.seek(0, 0) + # tarfile wants to pass a size argument to read(). So just + # wrap/buffer in a proper file object interface. + tf.addfile(ti, f) + + +def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9): + """Create a tar.gz file deterministically from files. + + This is a glorified wrapper around ``create_tar_from_files`` that + adds gzip compression. + + The passed file handle should be opened for writing in binary mode. + When the function returns, all data has been written to the handle. + """ + # Offset 3-7 in the gzip header contains an mtime. Pin it to a known + # value so output is deterministic. + gf = gzip.GzipFile( + filename=filename or "", + mode="wb", + fileobj=fp, + compresslevel=compresslevel, + mtime=DEFAULT_MTIME, + ) + with gf: + create_tar_from_files(gf, files) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py new file mode 100644 index 0000000000..cf6f11c573 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/attributes.py @@ -0,0 +1,84 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import re + + +def attrmatch(attributes, **kwargs): + """Determine whether the given set of task attributes matches. The + conditions are given as keyword arguments, where each keyword names an + attribute. The keyword value can be a literal, a set, or a callable. A + literal must match the attribute exactly. Given a set, the attribute value + must be in the set. A callable is called with the attribute value. If an + attribute is specified as a keyword argument but not present in the + attributes, the result is False.""" + for kwkey, kwval in kwargs.items(): + if kwkey not in attributes: + return False + attval = attributes[kwkey] + if isinstance(kwval, set): + if attval not in kwval: + return False + elif callable(kwval): + if not kwval(attval): + return False + elif kwval != attributes[kwkey]: + return False + return True + + +def keymatch(attributes, target): + """Determine if any keys in attributes are a match to target, then return + a list of matching values. First exact matches will be checked. Failing + that, regex matches and finally a default key. + """ + # exact match + if target in attributes: + return [attributes[target]] + + # regular expression match + matches = [v for k, v in attributes.items() if re.match(k + "$", target)] + if matches: + return matches + + # default + if "default" in attributes: + return [attributes["default"]] + + return [] + + +def _match_run_on(key, run_on): + """ + Determine whether the given parameter is included in the corresponding `run-on-attribute`. + """ + if "all" in run_on: + return True + return key in run_on + + +match_run_on_projects = _match_run_on +match_run_on_tasks_for = _match_run_on + + +def match_run_on_git_branches(git_branch, run_on_git_branches): + """ + Determine whether the given project is included in the `run-on-git-branches` parameter. + Allows 'all'. + """ + if "all" in run_on_git_branches: + return True + + for expected_git_branch_pattern in run_on_git_branches: + if re.match(expected_git_branch_pattern, git_branch): + return True + + return False + + +def sorted_unique_list(*args): + """Join one or more lists, and return a sorted list of unique members""" + combined = set().union(*args) + return sorted(combined) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py new file mode 100644 index 0000000000..974b114902 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/cached_tasks.py @@ -0,0 +1,86 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import hashlib +import time + +TARGET_CACHE_INDEX = "{cache_prefix}.cache.level-{level}.{type}.{name}.hash.{digest}" +EXTRA_CACHE_INDEXES = [ + "{cache_prefix}.cache.level-{level}.{type}.{name}.latest", + "{cache_prefix}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}", +] + + +def add_optimization( + config, taskdesc, cache_type, cache_name, digest=None, digest_data=None +): + """ + Allow the results of this task to be cached. This adds index routes to the + task so it can be looked up for future runs, and optimization hints so that + cached artifacts can be found. Exactly one of `digest` and `digest_data` + must be passed. + + :param TransformConfig config: The configuration for the kind being transformed. + :param dict taskdesc: The description of the current task. + :param str cache_type: The type of task result being cached. + :param str cache_name: The name of the object being cached. + :param digest: A unique string identifying this version of the artifacts + being generated. Typically this will be the hash of inputs to the task. + :type digest: bytes or None + :param digest_data: A list of bytes representing the inputs of this task. + They will be concatenated and hashed to create the digest for this + task. + :type digest_data: list of bytes or None + """ + if (digest is None) == (digest_data is None): + raise Exception("Must pass exactly one of `digest` and `digest_data`.") + if digest is None: + digest = hashlib.sha256("\n".join(digest_data).encode("utf-8")).hexdigest() + + if "cached-task-prefix" in config.graph_config["taskgraph"]: + cache_prefix = config.graph_config["taskgraph"]["cached-task-prefix"] + else: + cache_prefix = config.graph_config["trust-domain"] + + subs = { + "cache_prefix": cache_prefix, + "type": cache_type, + "name": cache_name, + "digest": digest, + } + + # We'll try to find a cached version of the toolchain at levels above and + # including the current level, starting at the highest level. + # Chain-of-trust doesn't handle tasks not built on the tip of a + # pull-request, so don't look for level-1 tasks if building a pull-request. + index_routes = [] + min_level = int(config.params["level"]) + if config.params["tasks_for"] == "github-pull-request": + min_level = max(min_level, 3) + for level in reversed(range(min_level, 4)): + subs["level"] = level + index_routes.append(TARGET_CACHE_INDEX.format(**subs)) + + taskdesc["optimization"] = {"index-search": index_routes} + + # ... and cache at the lowest level. + subs["level"] = config.params["level"] + taskdesc.setdefault("routes", []).append( + f"index.{TARGET_CACHE_INDEX.format(**subs)}" + ) + + # ... and add some extra routes for humans + subs["build_date_long"] = time.strftime( + "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"]) + ) + taskdesc["routes"].extend( + [f"index.{route.format(**subs)}" for route in EXTRA_CACHE_INDEXES] + ) + + taskdesc["attributes"]["cached_task"] = { + "type": cache_type, + "name": cache_name, + "digest": digest, + } diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py new file mode 100644 index 0000000000..d0e1e1079f --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/decision.py @@ -0,0 +1,79 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Utilities for generating a decision task from :file:`.taskcluster.yml`. +""" + + +import os + +import jsone +import slugid +import yaml + +from .templates import merge +from .time import current_json_time +from .vcs import find_hg_revision_push_info + + +def make_decision_task(params, root, context, head_rev=None): + """Generate a basic decision task, based on the root .taskcluster.yml""" + with open(os.path.join(root, ".taskcluster.yml"), "rb") as f: + taskcluster_yml = yaml.safe_load(f) + + if not head_rev: + head_rev = params["head_rev"] + + if params["repository_type"] == "hg": + pushlog = find_hg_revision_push_info(params["repository_url"], head_rev) + + hg_push_context = { + "pushlog_id": pushlog["pushid"], + "pushdate": pushlog["pushdate"], + "owner": pushlog["user"], + } + else: + hg_push_context = {} + + slugids = {} + + def as_slugid(name): + # https://github.com/taskcluster/json-e/issues/164 + name = name[0] + if name not in slugids: + slugids[name] = slugid.nice() + return slugids[name] + + # provide a similar JSON-e context to what mozilla-taskcluster provides: + # https://docs.taskcluster.net/reference/integrations/mozilla-taskcluster/docs/taskcluster-yml + # but with a different tasks_for and an extra `cron` section + context = merge( + { + "repository": { + "url": params["repository_url"], + "project": params["project"], + "level": params["level"], + }, + "push": merge( + { + "revision": params["head_rev"], + # remainder are fake values, but the decision task expects them anyway + "comment": " ", + }, + hg_push_context, + ), + "now": current_json_time(), + "as_slugid": as_slugid, + }, + context, + ) + + rendered = jsone.render(taskcluster_yml, context) + if len(rendered["tasks"]) != 1: + raise Exception("Expected .taskcluster.yml to only produce one cron task") + task = rendered["tasks"][0] + + task_id = task.pop("taskId") + return (task_id, task) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py new file mode 100644 index 0000000000..4b211cc4b3 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/docker.py @@ -0,0 +1,342 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import hashlib +import io +import json +import os +import re +import sys +import urllib.parse + +import requests_unixsocket + +from taskgraph.util.archive import create_tar_gz_from_files +from taskgraph.util.memoize import memoize + +IMAGE_DIR = os.path.join(".", "taskcluster", "docker") + +from .yaml import load_yaml + + +def docker_url(path, **kwargs): + docker_socket = os.environ.get("DOCKER_SOCKET", "/var/run/docker.sock") + return urllib.parse.urlunparse( + ( + "http+unix", + urllib.parse.quote(docker_socket, safe=""), + path, + "", + urllib.parse.urlencode(kwargs), + "", + ) + ) + + +def post_to_docker(tar, api_path, **kwargs): + """POSTs a tar file to a given docker API path. + + The tar argument can be anything that can be passed to requests.post() + as data (e.g. iterator or file object). + The extra keyword arguments are passed as arguments to the docker API. + """ + req = requests_unixsocket.Session().post( + docker_url(api_path, **kwargs), + data=tar, + stream=True, + headers={"Content-Type": "application/x-tar"}, + ) + if req.status_code != 200: + message = req.json().get("message") + if not message: + message = f"docker API returned HTTP code {req.status_code}" + raise Exception(message) + status_line = {} + + buf = b"" + for content in req.iter_content(chunk_size=None): + if not content: + continue + # Sometimes, a chunk of content is not a complete json, so we cumulate + # with leftovers from previous iterations. + buf += content + try: + data = json.loads(buf) + except Exception: + continue + buf = b"" + # data is sometimes an empty dict. + if not data: + continue + # Mimic how docker itself presents the output. This code was tested + # with API version 1.18 and 1.26. + if "status" in data: + if "id" in data: + if sys.stderr.isatty(): + total_lines = len(status_line) + line = status_line.setdefault(data["id"], total_lines) + n = total_lines - line + if n > 0: + # Move the cursor up n lines. + sys.stderr.write(f"\033[{n}A") + # Clear line and move the cursor to the beginning of it. + sys.stderr.write("\033[2K\r") + sys.stderr.write( + "{}: {} {}\n".format( + data["id"], data["status"], data.get("progress", "") + ) + ) + if n > 1: + # Move the cursor down n - 1 lines, which, considering + # the carriage return on the last write, gets us back + # where we started. + sys.stderr.write(f"\033[{n - 1}B") + else: + status = status_line.get(data["id"]) + # Only print status changes. + if status != data["status"]: + sys.stderr.write("{}: {}\n".format(data["id"], data["status"])) + status_line[data["id"]] = data["status"] + else: + status_line = {} + sys.stderr.write("{}\n".format(data["status"])) + elif "stream" in data: + sys.stderr.write(data["stream"]) + elif "aux" in data: + sys.stderr.write(repr(data["aux"])) + elif "error" in data: + sys.stderr.write("{}\n".format(data["error"])) + # Sadly, docker doesn't give more than a plain string for errors, + # so the best we can do to propagate the error code from the command + # that failed is to parse the error message... + errcode = 1 + m = re.search(r"returned a non-zero code: (\d+)", data["error"]) + if m: + errcode = int(m.group(1)) + sys.exit(errcode) + else: + raise NotImplementedError(repr(data)) + sys.stderr.flush() + + +def docker_image(name, by_tag=False): + """ + Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``, + or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`. + """ + try: + with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f: + registry = f.read().strip() + except OSError: + with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f: + registry = f.read().strip() + + if not by_tag: + hashfile = os.path.join(IMAGE_DIR, name, "HASH") + try: + with open(hashfile) as f: + return f"{registry}/{name}@{f.read().strip()}" + except OSError: + raise Exception(f"Failed to read HASH file {hashfile}") + + try: + with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f: + tag = f.read().strip() + except OSError: + tag = "latest" + return f"{registry}/{name}:{tag}" + + +class VoidWriter: + """A file object with write capabilities that does nothing with the written + data.""" + + def write(self, buf): + pass + + +def generate_context_hash(topsrcdir, image_path, args=None): + """Generates a sha256 hash for context directory used to build an image.""" + + return stream_context_tar(topsrcdir, image_path, VoidWriter(), args=args) + + +class HashingWriter: + """A file object with write capabilities that hashes the written data at + the same time it passes down to a real file object.""" + + def __init__(self, writer): + self._hash = hashlib.sha256() + self._writer = writer + + def write(self, buf): + self._hash.update(buf) + self._writer.write(buf) + + def hexdigest(self): + return self._hash.hexdigest() + + +def create_context_tar(topsrcdir, context_dir, out_path, args=None): + """Create a context tarball. + + A directory ``context_dir`` containing a Dockerfile will be assembled into + a gzipped tar file at ``out_path``. + + We also scan the source Dockerfile for special syntax that influences + context generation. + + If a line in the Dockerfile has the form ``# %include <path>``, + the relative path specified on that line will be matched against + files in the source repository and added to the context under the + path ``topsrcdir/``. If an entry is a directory, we add all files + under that directory. + + If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of + the string ``$<name>`` in subsequent lines are replaced with the value + found in the ``args`` argument. Exception: this doesn't apply to VOLUME + definitions. + + Returns the SHA-256 hex digest of the created archive. + """ + with open(out_path, "wb") as fh: + return stream_context_tar( + topsrcdir, + context_dir, + fh, + image_name=os.path.basename(out_path), + args=args, + ) + + +RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task") +RUN_TASK_FILES = { + f"run-task/{path}": os.path.join(RUN_TASK_ROOT, path) + for path in [ + "run-task", + "fetch-content", + "hgrc", + "robustcheckout.py", + ] +} +RUN_TASK_SNIPPET = [ + "COPY run-task/run-task /usr/local/bin/run-task\n", + "COPY run-task/fetch-content /usr/local/bin/fetch-content\n", + "COPY run-task/robustcheckout.py /usr/local/mercurial/robustcheckout.py\n" + "COPY run-task/hgrc /etc/mercurial/hgrc.d/mozilla.rc\n", +] + + +def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None): + """Like create_context_tar, but streams the tar file to the `out_file` file + object.""" + archive_files = {} + replace = [] + content = [] + + topsrcdir = os.path.abspath(topsrcdir) + context_dir = os.path.join(topsrcdir, context_dir) + + for root, dirs, files in os.walk(context_dir): + for f in files: + source_path = os.path.join(root, f) + archive_path = source_path[len(context_dir) + 1 :] + archive_files[archive_path] = open(source_path, "rb") + + # Parse Dockerfile for special syntax of extra files to include. + content = [] + with open(os.path.join(context_dir, "Dockerfile")) as fh: + for line in fh: + if line.startswith("# %ARG"): + p = line[len("# %ARG ") :].strip() + if not args or p not in args: + raise Exception(f"missing argument: {p}") + replace.append((re.compile(rf"\${p}\b"), args[p])) + continue + + for regexp, s in replace: + line = re.sub(regexp, s, line) + + content.append(line) + + if not line.startswith("# %include"): + continue + + if line.strip() == "# %include-run-task": + content.extend(RUN_TASK_SNIPPET) + archive_files.update(RUN_TASK_FILES) + continue + + p = line[len("# %include ") :].strip() + if os.path.isabs(p): + raise Exception("extra include path cannot be absolute: %s" % p) + + fs_path = os.path.normpath(os.path.join(topsrcdir, p)) + # Check for filesystem traversal exploits. + if not fs_path.startswith(topsrcdir): + raise Exception("extra include path outside topsrcdir: %s" % p) + + if not os.path.exists(fs_path): + raise Exception("extra include path does not exist: %s" % p) + + if os.path.isdir(fs_path): + for root, dirs, files in os.walk(fs_path): + for f in files: + source_path = os.path.join(root, f) + rel = source_path[len(fs_path) + 1 :] + archive_path = os.path.join("topsrcdir", p, rel) + archive_files[archive_path] = source_path + else: + archive_path = os.path.join("topsrcdir", p) + archive_files[archive_path] = fs_path + + archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8")) + + writer = HashingWriter(out_file) + create_tar_gz_from_files(writer, archive_files, image_name) + return writer.hexdigest() + + +@memoize +def image_paths(): + """Return a map of image name to paths containing their Dockerfile.""" + config = load_yaml("taskcluster", "ci", "docker-image", "kind.yml") + return { + k: os.path.join(IMAGE_DIR, v.get("definition", k)) + for k, v in config["tasks"].items() + } + + +def image_path(name): + paths = image_paths() + if name in paths: + return paths[name] + return os.path.join(IMAGE_DIR, name) + + +@memoize +def parse_volumes(image): + """Parse VOLUME entries from a Dockerfile for an image.""" + volumes = set() + + path = image_path(image) + + with open(os.path.join(path, "Dockerfile"), "rb") as fh: + for line in fh: + line = line.strip() + # We assume VOLUME definitions don't use %ARGS. + if not line.startswith(b"VOLUME "): + continue + + v = line.split(None, 1)[1] + if v.startswith(b"["): + raise ValueError( + "cannot parse array syntax for VOLUME; " + "convert to multiple entries" + ) + + volumes |= {volume.decode("utf-8") for volume in v.split()} + + return volumes diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py new file mode 100644 index 0000000000..bf786e92e4 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/hash.py @@ -0,0 +1,54 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import hashlib +from pathlib import Path + +from taskgraph.util import path as mozpath +from taskgraph.util.memoize import memoize + + +@memoize +def hash_path(path): + """Hash a single file. + + Returns the SHA-256 hash in hex form. + """ + with open(path, "rb") as fh: + return hashlib.sha256(fh.read()).hexdigest() + + +def _find_files(base_path): + for path in Path(base_path).rglob("*"): + if path.is_file(): + yield str(path) + + +def hash_paths(base_path, patterns): + """ + Give a list of path patterns, return a digest of the contents of all + the corresponding files, similarly to git tree objects or mercurial + manifests. + + Each file is hashed. The list of all hashes and file paths is then + itself hashed to produce the result. + """ + h = hashlib.sha256() + + found = set() + for pattern in patterns: + files = _find_files(base_path) + matches = [path for path in files if mozpath.match(path, pattern)] + if matches: + found.update(matches) + else: + raise Exception("%s did not match anything" % pattern) + for path in sorted(found): + h.update( + "{} {}\n".format( + hash_path(mozpath.abspath(mozpath.join(base_path, path))), + mozpath.normsep(path), + ).encode("utf-8") + ) + return h.hexdigest() diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py new file mode 100644 index 0000000000..9b0c5a44fb --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/keyed_by.py @@ -0,0 +1,97 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +from .attributes import keymatch + + +def evaluate_keyed_by( + value, item_name, attributes, defer=None, enforce_single_match=True +): + """ + For values which can either accept a literal value, or be keyed by some + attributes, perform that lookup and return the result. + + For example, given item:: + + by-test-platform: + macosx-10.11/debug: 13 + win.*: 6 + default: 12 + + a call to `evaluate_keyed_by(item, 'thing-name', {'test-platform': 'linux96')` + would return `12`. + + Items can be nested as deeply as desired:: + + by-test-platform: + win.*: + by-project: + ash: .. + cedar: .. + linux: 13 + default: 12 + + Args: + value (str): Name of the value to perform evaluation on. + item_name (str): Used to generate useful error messages. + attributes (dict): Dictionary of attributes used to lookup 'by-<key>' with. + defer (list): + Allows evaluating a by-* entry at a later time. In the example + above it's possible that the project attribute hasn't been set yet, + in which case we'd want to stop before resolving that subkey and + then call this function again later. This can be accomplished by + setting `defer=["project"]` in this example. + enforce_single_match (bool): + If True (default), each task may only match a single arm of the + evaluation. + """ + while True: + if not isinstance(value, dict) or len(value) != 1: + return value + value_key = next(iter(value)) + if not value_key.startswith("by-"): + return value + + keyed_by = value_key[3:] # strip off 'by-' prefix + + if defer and keyed_by in defer: + return value + + key = attributes.get(keyed_by) + alternatives = next(iter(value.values())) + + if len(alternatives) == 1 and "default" in alternatives: + # Error out when only 'default' is specified as only alternatives, + # because we don't need to by-{keyed_by} there. + raise Exception( + "Keyed-by '{}' unnecessary with only value 'default' " + "found, when determining item {}".format(keyed_by, item_name) + ) + + if key is None: + if "default" in alternatives: + value = alternatives["default"] + continue + else: + raise Exception( + "No attribute {} and no value for 'default' found " + "while determining item {}".format(keyed_by, item_name) + ) + + matches = keymatch(alternatives, key) + if enforce_single_match and len(matches) > 1: + raise Exception( + "Multiple matching values for {} {!r} found while " + "determining item {}".format(keyed_by, key, item_name) + ) + elif matches: + value = matches[0] + continue + + raise Exception( + "No {} matching {!r} nor 'default' found while determining item {}".format( + keyed_by, key, item_name + ) + ) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py new file mode 100644 index 0000000000..56b513e74c --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/memoize.py @@ -0,0 +1,40 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +# Imported from +# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#923-949 + + +import functools + + +class memoize(dict): + """A decorator to memoize the results of function calls depending + on its arguments. + Both functions and instance methods are handled, although in the + instance method case, the results are cache in the instance itself. + """ + + def __init__(self, func): + self.func = func + functools.update_wrapper(self, func) + + def __call__(self, *args): + if args not in self: + self[args] = self.func(*args) + return self[args] + + def method_call(self, instance, *args): + name = "_%s" % self.func.__name__ + if not hasattr(instance, name): + setattr(instance, name, {}) + cache = getattr(instance, name) + if args not in cache: + cache[args] = self.func(instance, *args) + return cache[args] + + def __get__(self, instance, cls): + return functools.update_wrapper( + functools.partial(self.method_call, instance), self.func + ) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py new file mode 100644 index 0000000000..6233a98a40 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/parameterization.py @@ -0,0 +1,97 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import re + +from taskgraph.util.taskcluster import get_artifact_url +from taskgraph.util.time import json_time_from_now + +TASK_REFERENCE_PATTERN = re.compile("<([^>]+)>") +ARTIFACT_REFERENCE_PATTERN = re.compile("<([^/]+)/([^>]+)>") + + +def _recurse(val, param_fns): + def recurse(val): + if isinstance(val, list): + return [recurse(v) for v in val] + elif isinstance(val, dict): + if len(val) == 1: + for param_key, param_fn in param_fns.items(): + if set(val.keys()) == {param_key}: + return param_fn(val[param_key]) + return {k: recurse(v) for k, v in val.items()} + else: + return val + + return recurse(val) + + +def resolve_timestamps(now, task_def): + """Resolve all instances of `{'relative-datestamp': '..'}` in the given task definition""" + return _recurse( + task_def, + { + "relative-datestamp": lambda v: json_time_from_now(v, now), + }, + ) + + +def resolve_task_references(label, task_def, task_id, decision_task_id, dependencies): + """Resolve all instances of ``{'task-reference': '..<..>..'} `` + and ``{'artifact-reference`: '..<dependency/artifact/path>..'}`` + in the given task definition, using the given dependencies. + """ + + def task_reference(val): + def repl(match): + key = match.group(1) + if key == "self": + return task_id + elif key == "decision": + return decision_task_id + try: + return dependencies[key] + except KeyError: + # handle escaping '<' + if key == "<": + return key + raise KeyError(f"task '{label}' has no dependency named '{key}'") + + return TASK_REFERENCE_PATTERN.sub(repl, val) + + def artifact_reference(val): + def repl(match): + dependency, artifact_name = match.group(1, 2) + + if dependency == "self": + raise KeyError(f"task '{label}' can't reference artifacts of self") + elif dependency == "decision": + task_id = decision_task_id + else: + try: + task_id = dependencies[dependency] + except KeyError: + raise KeyError( + "task '{}' has no dependency named '{}'".format( + label, dependency + ) + ) + + assert artifact_name.startswith( + "public/" + ), "artifact-reference only supports public artifacts, not `{}`".format( + artifact_name + ) + return get_artifact_url(task_id, artifact_name) + + return ARTIFACT_REFERENCE_PATTERN.sub(repl, val) + + return _recurse( + task_def, + { + "task-reference": task_reference, + "artifact-reference": artifact_reference, + }, + ) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py new file mode 100644 index 0000000000..728b648ac1 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/path.py @@ -0,0 +1,172 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Like :py:mod:`os.path`, with a reduced set of functions, and with normalized path +separators (always use forward slashes). +Also contains a few additional utilities not found in :py:mod:`os.path`. +""" + +# Imported from +# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozpack/path.py + + +import os +import posixpath +import re + + +def normsep(path): + """ + Normalize path separators, by using forward slashes instead of whatever + :py:const:`os.sep` is. + """ + if os.sep != "/": + path = path.replace(os.sep, "/") + if os.altsep and os.altsep != "/": + path = path.replace(os.altsep, "/") + return path + + +def relpath(path, start): + rel = normsep(os.path.relpath(path, start)) + return "" if rel == "." else rel + + +def realpath(path): + return normsep(os.path.realpath(path)) + + +def abspath(path): + return normsep(os.path.abspath(path)) + + +def join(*paths): + return normsep(os.path.join(*paths)) + + +def normpath(path): + return posixpath.normpath(normsep(path)) + + +def dirname(path): + return posixpath.dirname(normsep(path)) + + +def commonprefix(paths): + return posixpath.commonprefix([normsep(path) for path in paths]) + + +def basename(path): + return os.path.basename(path) + + +def splitext(path): + return posixpath.splitext(normsep(path)) + + +def split(path): + """ + Return the normalized path as a list of its components. + + ``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']`` + """ + return normsep(path).split("/") + + +def basedir(path, bases): + """ + Given a list of directories (`bases`), return which one contains the given + path. If several matches are found, the deepest base directory is returned. + + ``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'`` + (`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match) + """ + path = normsep(path) + bases = [normsep(b) for b in bases] + if path in bases: + return path + for b in sorted(bases, reverse=True): + if b == "" or path.startswith(b + "/"): + return b + + +re_cache = {} +# Python versions < 3.7 return r'\/' for re.escape('/'). +if re.escape("/") == "/": + MATCH_STAR_STAR_RE = re.compile(r"(^|/)\\\*\\\*/") + MATCH_STAR_STAR_END_RE = re.compile(r"(^|/)\\\*\\\*$") +else: + MATCH_STAR_STAR_RE = re.compile(r"(^|\\\/)\\\*\\\*\\\/") + MATCH_STAR_STAR_END_RE = re.compile(r"(^|\\\/)\\\*\\\*$") + + +def match(path, pattern): + """ + Return whether the given path matches the given pattern. + An asterisk can be used to match any string, including the null string, in + one part of the path: + + ``foo`` matches ``*``, ``f*`` or ``fo*o`` + + However, an asterisk matching a subdirectory may not match the null string: + + ``foo/bar`` does *not* match ``foo/*/bar`` + + If the pattern matches one of the ancestor directories of the path, the + patch is considered matching: + + ``foo/bar`` matches ``foo`` + + Two adjacent asterisks can be used to match files and zero or more + directories and subdirectories. + + ``foo/bar`` matches ``foo/**/bar``, or ``**/bar`` + """ + if not pattern: + return True + if pattern not in re_cache: + p = re.escape(pattern) + p = MATCH_STAR_STAR_RE.sub(r"\1(?:.+/)?", p) + p = MATCH_STAR_STAR_END_RE.sub(r"(?:\1.+)?", p) + p = p.replace(r"\*", "[^/]*") + "(?:/.*)?$" + re_cache[pattern] = re.compile(p) + return re_cache[pattern].match(path) is not None + + +def rebase(oldbase, base, relativepath): + """ + Return `relativepath` relative to `base` instead of `oldbase`. + """ + if base == oldbase: + return relativepath + if len(base) < len(oldbase): + assert basedir(oldbase, [base]) == base + relbase = relpath(oldbase, base) + result = join(relbase, relativepath) + else: + assert basedir(base, [oldbase]) == oldbase + relbase = relpath(base, oldbase) + result = relpath(relativepath, relbase) + result = normpath(result) + if relativepath.endswith("/") and not result.endswith("/"): + result += "/" + return result + + +def ancestors(path): + """Emit the parent directories of a path. + + Args: + path (str): Path to emit parents of. + + Yields: + str: Path of parent directory. + """ + while path: + yield path + newpath = os.path.dirname(path) + if newpath == path: + break + path = newpath diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py new file mode 100644 index 0000000000..3eb61dfbf3 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/python_path.py @@ -0,0 +1,52 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import inspect +import os + + +def find_object(path): + """ + Find a Python object given a path of the form <modulepath>:<objectpath>. + Conceptually equivalent to + + def find_object(modulepath, objectpath): + import <modulepath> as mod + return mod.<objectpath> + """ + if path.count(":") != 1: + raise ValueError(f'python path {path!r} does not have the form "module:object"') + + modulepath, objectpath = path.split(":") + obj = __import__(modulepath) + for a in modulepath.split(".")[1:]: + obj = getattr(obj, a) + for a in objectpath.split("."): + obj = getattr(obj, a) + return obj + + +def import_sibling_modules(exceptions=None): + """ + Import all Python modules that are siblings of the calling module. + + Args: + exceptions (list): A list of file names to exclude (caller and + __init__.py are implicitly excluded). + """ + frame = inspect.stack()[1] + mod = inspect.getmodule(frame[0]) + + name = os.path.basename(mod.__file__) + excs = {"__init__.py", name} + if exceptions: + excs.update(exceptions) + + modpath = mod.__name__ + if not name.startswith("__init__.py"): + modpath = modpath.rsplit(".", 1)[0] + + for f in os.listdir(os.path.dirname(mod.__file__)): + if f.endswith(".py") and f not in excs: + __import__(modpath + "." + f[:-3]) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py new file mode 100644 index 0000000000..55d74f479a --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/readonlydict.py @@ -0,0 +1,22 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +# Imported from +# https://searchfox.org/mozilla-central/rev/c3ebaf6de2d481c262c04bb9657eaf76bf47e2ac/python/mozbuild/mozbuild/util.py#115-127 + + +class ReadOnlyDict(dict): + """A read-only dictionary.""" + + def __init__(self, *args, **kwargs): + dict.__init__(self, *args, **kwargs) + + def __delitem__(self, key): + raise Exception("Object does not support deletion.") + + def __setitem__(self, key, value): + raise Exception("Object does not support assignment.") + + def update(self, *args, **kwargs): + raise Exception("Object does not support update.") diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py new file mode 100644 index 0000000000..3989f71182 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/schema.py @@ -0,0 +1,260 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import collections +import pprint +import re + +import voluptuous + +import taskgraph + +from .keyed_by import evaluate_keyed_by + + +def validate_schema(schema, obj, msg_prefix): + """ + Validate that object satisfies schema. If not, generate a useful exception + beginning with msg_prefix. + """ + if taskgraph.fast: + return + try: + schema(obj) + except voluptuous.MultipleInvalid as exc: + msg = [msg_prefix] + for error in exc.errors: + msg.append(str(error)) + raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) + + +def optionally_keyed_by(*arguments): + """ + Mark a schema value as optionally keyed by any of a number of fields. The + schema is the last argument, and the remaining fields are taken to be the + field names. For example: + + 'some-value': optionally_keyed_by( + 'test-platform', 'build-platform', + Any('a', 'b', 'c')) + + The resulting schema will allow nesting of `by-test-platform` and + `by-build-platform` in either order. + """ + schema = arguments[-1] + fields = arguments[:-1] + + def validator(obj): + if isinstance(obj, dict) and len(obj) == 1: + k, v = list(obj.items())[0] + if k.startswith("by-") and k[len("by-") :] in fields: + res = {} + for kk, vv in v.items(): + try: + res[kk] = validator(vv) + except voluptuous.Invalid as e: + e.prepend([k, kk]) + raise + return res + return Schema(schema)(obj) + + return validator + + +def resolve_keyed_by( + item, field, item_name, defer=None, enforce_single_match=True, **extra_values +): + """ + For values which can either accept a literal value, or be keyed by some + other attribute of the item, perform that lookup and replacement in-place + (modifying `item` directly). The field is specified using dotted notation + to traverse dictionaries. + + For example, given item:: + + job: + test-platform: linux128 + chunks: + by-test-platform: + macosx-10.11/debug: 13 + win.*: 6 + default: 12 + + a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])` + would mutate item in-place to:: + + job: + test-platform: linux128 + chunks: 12 + + The `item_name` parameter is used to generate useful error messages. + + If extra_values are supplied, they represent additional values available + for reference from by-<field>. + + Items can be nested as deeply as the schema will allow:: + + chunks: + by-test-platform: + win.*: + by-project: + ash: .. + cedar: .. + linux: 13 + default: 12 + + Args: + item (dict): Object being evaluated. + field (str): Name of the key to perform evaluation on. + item_name (str): Used to generate useful error messages. + defer (list): + Allows evaluating a by-* entry at a later time. In the example + above it's possible that the project attribute hasn't been set yet, + in which case we'd want to stop before resolving that subkey and + then call this function again later. This can be accomplished by + setting `defer=["project"]` in this example. + enforce_single_match (bool): + If True (default), each task may only match a single arm of the + evaluation. + extra_values (kwargs): + If supplied, represent additional values available + for reference from by-<field>. + + Returns: + dict: item which has also been modified in-place. + """ + # find the field, returning the item unchanged if anything goes wrong + container, subfield = item, field + while "." in subfield: + f, subfield = subfield.split(".", 1) + if f not in container: + return item + container = container[f] + if not isinstance(container, dict): + return item + + if subfield not in container: + return item + + container[subfield] = evaluate_keyed_by( + value=container[subfield], + item_name=f"`{field}` in `{item_name}`", + defer=defer, + enforce_single_match=enforce_single_match, + attributes=dict(item, **extra_values), + ) + + return item + + +# Schemas for YAML files should use dashed identifiers by default. If there are +# components of the schema for which there is a good reason to use another format, +# they can be excepted here. +EXCEPTED_SCHEMA_IDENTIFIERS = [ + # upstream-artifacts and artifact-map are handed directly to scriptWorker, + # which expects interCaps + "upstream-artifacts", + "artifact-map", +] + + +def check_schema(schema): + identifier_re = re.compile(r"^\$?[a-z][a-z0-9-]*$") + + def excepted(item): + for esi in EXCEPTED_SCHEMA_IDENTIFIERS: + if isinstance(esi, str): + if f"[{esi!r}]" in item: + return True + elif esi(item): + return True + return False + + def iter(path, sch): + def check_identifier(path, k): + if k in (str,) or k in (str, voluptuous.Extra): + pass + elif isinstance(k, voluptuous.NotIn): + pass + elif isinstance(k, str): + if not identifier_re.match(k) and not excepted(path): + raise RuntimeError( + "YAML schemas should use dashed lower-case identifiers, " + "not {!r} @ {}".format(k, path) + ) + elif isinstance(k, (voluptuous.Optional, voluptuous.Required)): + check_identifier(path, k.schema) + elif isinstance(k, (voluptuous.Any, voluptuous.All)): + for v in k.validators: + check_identifier(path, v) + elif not excepted(path): + raise RuntimeError( + "Unexpected type in YAML schema: {} @ {}".format( + type(k).__name__, path + ) + ) + + if isinstance(sch, collections.abc.Mapping): + for k, v in sch.items(): + child = f"{path}[{k!r}]" + check_identifier(child, k) + iter(child, v) + elif isinstance(sch, (list, tuple)): + for i, v in enumerate(sch): + iter(f"{path}[{i}]", v) + elif isinstance(sch, voluptuous.Any): + for v in sch.validators: + iter(path, v) + + iter("schema", schema.schema) + + +class Schema(voluptuous.Schema): + """ + Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks + in the process. + """ + + def __init__(self, *args, check=True, **kwargs): + super().__init__(*args, **kwargs) + + self.check = check + if not taskgraph.fast and self.check: + check_schema(self) + + def extend(self, *args, **kwargs): + schema = super().extend(*args, **kwargs) + + if self.check: + check_schema(schema) + # We want twice extend schema to be checked too. + schema.__class__ = Schema + return schema + + def _compile(self, schema): + if taskgraph.fast: + return + return super()._compile(schema) + + def __getitem__(self, item): + return self.schema[item] + + +OptimizationSchema = voluptuous.Any( + # always run this task (default) + None, + # search the index for the given index namespaces, and replace this task if found + # the search occurs in order, with the first match winning + {"index-search": [str]}, + # skip this task if none of the given file patterns match + {"skip-unless-changed": [str]}, +) + +# shortcut for a string where task references are allowed +taskref_or_string = voluptuous.Any( + str, + {voluptuous.Required("task-reference"): str}, + {voluptuous.Required("artifact-reference"): str}, +) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py new file mode 100644 index 0000000000..d695767f05 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/shell.py @@ -0,0 +1,40 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + +SHELL_QUOTE_RE = re.compile(r"[\\\t\r\n \'\"#<>&|`(){}$;\*\?]") + + +def _quote(s): + """Given a string, returns a version that can be used literally on a shell + command line, enclosing it with single quotes if necessary. + + As a special case, if given an int, returns a string containing the int, + not enclosed in quotes. + """ + if type(s) == int: + return "%d" % s + + # Empty strings need to be quoted to have any significance + if s and not SHELL_QUOTE_RE.search(s) and not s.startswith("~"): + return s + + # Single quoted strings can contain any characters unescaped except the + # single quote itself, which can't even be escaped, so the string needs to + # be closed, an escaped single quote added, and reopened. + t = type(s) + return t("'%s'") % s.replace(t("'"), t("'\\''")) + + +def quote(*strings): + """Given one or more strings, returns a quoted string that can be used + literally on a shell command line. + + >>> quote('a', 'b') + "a b" + >>> quote('a b', 'c') + "'a b' c" + """ + return " ".join(_quote(s) for s in strings) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py new file mode 100644 index 0000000000..a830a473b3 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskcluster.py @@ -0,0 +1,373 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import datetime +import functools +import logging +import os + +import requests +import taskcluster_urls as liburls +from requests.packages.urllib3.util.retry import Retry + +from taskgraph.task import Task +from taskgraph.util import yaml +from taskgraph.util.memoize import memoize + +logger = logging.getLogger(__name__) + +# this is set to true for `mach taskgraph action-callback --test` +testing = False + +# Default rootUrl to use if none is given in the environment; this should point +# to the production Taskcluster deployment used for CI. +PRODUCTION_TASKCLUSTER_ROOT_URL = None + +# the maximum number of parallel Taskcluster API calls to make +CONCURRENCY = 50 + + +@memoize +def get_root_url(use_proxy): + """Get the current TASKCLUSTER_ROOT_URL. + + When running in a task, this must come from $TASKCLUSTER_ROOT_URL; when run + on the command line, a default may be provided that points to the + production deployment of Taskcluster. If use_proxy is set, this attempts to + get TASKCLUSTER_PROXY_URL instead, failing if it is not set. + """ + if use_proxy: + try: + return liburls.normalize_root_url(os.environ["TASKCLUSTER_PROXY_URL"]) + except KeyError: + if "TASK_ID" not in os.environ: + raise RuntimeError( + "taskcluster-proxy is not available when not executing in a task" + ) + else: + raise RuntimeError("taskcluster-proxy is not enabled for this task") + + if "TASKCLUSTER_ROOT_URL" in os.environ: + logger.debug( + "Running in Taskcluster instance {}{}".format( + os.environ["TASKCLUSTER_ROOT_URL"], + " with taskcluster-proxy" + if "TASKCLUSTER_PROXY_URL" in os.environ + else "", + ) + ) + return liburls.normalize_root_url(os.environ["TASKCLUSTER_ROOT_URL"]) + + if "TASK_ID" in os.environ: + raise RuntimeError("$TASKCLUSTER_ROOT_URL must be set when running in a task") + + if PRODUCTION_TASKCLUSTER_ROOT_URL is None: + raise RuntimeError( + "Could not detect Taskcluster instance, set $TASKCLUSTER_ROOT_URL" + ) + + logger.debug("Using default TASKCLUSTER_ROOT_URL") + return liburls.normalize_root_url(PRODUCTION_TASKCLUSTER_ROOT_URL) + + +def requests_retry_session( + retries, + backoff_factor=0.1, + status_forcelist=(500, 502, 503, 504), + concurrency=CONCURRENCY, + session=None, +): + session = session or requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + + # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase + # that limit. Connections are established as needed, so using a large value + # should not negatively impact performance. + http_adapter = requests.adapters.HTTPAdapter( + pool_connections=concurrency, + pool_maxsize=concurrency, + max_retries=retry, + ) + session.mount("http://", http_adapter) + session.mount("https://", http_adapter) + + return session + + +@memoize +def get_session(): + return requests_retry_session(retries=5) + + +def _do_request(url, method=None, **kwargs): + if method is None: + method = "post" if kwargs else "get" + + session = get_session() + if method == "get": + kwargs["stream"] = True + + response = getattr(session, method)(url, **kwargs) + + if response.status_code >= 400: + # Consume content before raise_for_status, so that the connection can be + # reused. + response.content + response.raise_for_status() + return response + + +def _handle_artifact(path, response): + if path.endswith(".json"): + return response.json() + if path.endswith(".yml"): + return yaml.load_stream(response.text) + response.raw.read = functools.partial(response.raw.read, decode_content=True) + return response.raw + + +def get_artifact_url(task_id, path, use_proxy=False): + artifact_tmpl = liburls.api( + get_root_url(False), "queue", "v1", "task/{}/artifacts/{}" + ) + data = artifact_tmpl.format(task_id, path) + if use_proxy: + # Until Bug 1405889 is deployed, we can't download directly + # from the taskcluster-proxy. Work around by using the /bewit + # endpoint instead. + # The bewit URL is the body of a 303 redirect, which we don't + # want to follow (which fetches a potentially large resource). + response = _do_request( + os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit", + data=data, + allow_redirects=False, + ) + return response.text + return data + + +def get_artifact(task_id, path, use_proxy=False): + """ + Returns the artifact with the given path for the given task id. + + If the path ends with ".json" or ".yml", the content is deserialized as, + respectively, json or yaml, and the corresponding python data (usually + dict) is returned. + For other types of content, a file-like object is returned. + """ + response = _do_request(get_artifact_url(task_id, path, use_proxy)) + return _handle_artifact(path, response) + + +def list_artifacts(task_id, use_proxy=False): + response = _do_request(get_artifact_url(task_id, "", use_proxy).rstrip("/")) + return response.json()["artifacts"] + + +def get_artifact_prefix(task): + prefix = None + if isinstance(task, dict): + prefix = task.get("attributes", {}).get("artifact_prefix") + elif isinstance(task, Task): + prefix = task.attributes.get("artifact_prefix") + else: + raise Exception(f"Can't find artifact-prefix of non-task: {task}") + return prefix or "public/build" + + +def get_artifact_path(task, path): + return f"{get_artifact_prefix(task)}/{path}" + + +def get_index_url(index_path, use_proxy=False, multiple=False): + index_tmpl = liburls.api(get_root_url(use_proxy), "index", "v1", "task{}/{}") + return index_tmpl.format("s" if multiple else "", index_path) + + +def find_task_id(index_path, use_proxy=False): + try: + response = _do_request(get_index_url(index_path, use_proxy)) + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + raise KeyError(f"index path {index_path} not found") + raise + return response.json()["taskId"] + + +def get_artifact_from_index(index_path, artifact_path, use_proxy=False): + full_path = index_path + "/artifacts/" + artifact_path + response = _do_request(get_index_url(full_path, use_proxy)) + return _handle_artifact(full_path, response) + + +def list_tasks(index_path, use_proxy=False): + """ + Returns a list of task_ids where each task_id is indexed under a path + in the index. Results are sorted by expiration date from oldest to newest. + """ + results = [] + data = {} + while True: + response = _do_request( + get_index_url(index_path, use_proxy, multiple=True), json=data + ) + response = response.json() + results += response["tasks"] + if response.get("continuationToken"): + data = {"continuationToken": response.get("continuationToken")} + else: + break + + # We can sort on expires because in the general case + # all of these tasks should be created with the same expires time so they end up in + # order from earliest to latest action. If more correctness is needed, consider + # fetching each task and sorting on the created date. + results.sort(key=lambda t: parse_time(t["expires"])) + return [t["taskId"] for t in results] + + +def parse_time(timestamp): + """Turn a "JSON timestamp" as used in TC APIs into a datetime""" + return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ") + + +def get_task_url(task_id, use_proxy=False): + task_tmpl = liburls.api(get_root_url(use_proxy), "queue", "v1", "task/{}") + return task_tmpl.format(task_id) + + +def get_task_definition(task_id, use_proxy=False): + response = _do_request(get_task_url(task_id, use_proxy)) + return response.json() + + +def cancel_task(task_id, use_proxy=False): + """Cancels a task given a task_id. In testing mode, just logs that it would + have cancelled.""" + if testing: + logger.info(f"Would have cancelled {task_id}.") + else: + _do_request(get_task_url(task_id, use_proxy) + "/cancel", json={}) + + +def status_task(task_id, use_proxy=False): + """Gets the status of a task given a task_id. + + In testing mode, just logs that it would have retrieved status. + + Args: + task_id (str): A task id. + use_proxy (bool): Whether to use taskcluster-proxy (default: False) + + Returns: + dict: A dictionary object as defined here: + https://docs.taskcluster.net/docs/reference/platform/queue/api#status + """ + if testing: + logger.info(f"Would have gotten status for {task_id}.") + else: + resp = _do_request(get_task_url(task_id, use_proxy) + "/status") + status = resp.json().get("status", {}) + return status + + +def state_task(task_id, use_proxy=False): + """Gets the state of a task given a task_id. + + In testing mode, just logs that it would have retrieved state. This is a subset of the + data returned by :func:`status_task`. + + Args: + task_id (str): A task id. + use_proxy (bool): Whether to use taskcluster-proxy (default: False) + + Returns: + str: The state of the task, one of + ``pending, running, completed, failed, exception, unknown``. + """ + if testing: + logger.info(f"Would have gotten state for {task_id}.") + else: + status = status_task(task_id, use_proxy=use_proxy).get("state") or "unknown" + return status + + +def rerun_task(task_id): + """Reruns a task given a task_id. In testing mode, just logs that it would + have reran.""" + if testing: + logger.info(f"Would have rerun {task_id}.") + else: + _do_request(get_task_url(task_id, use_proxy=True) + "/rerun", json={}) + + +def get_current_scopes(): + """Get the current scopes. This only makes sense in a task with the Taskcluster + proxy enabled, where it returns the actual scopes accorded to the task.""" + auth_url = liburls.api(get_root_url(True), "auth", "v1", "scopes/current") + resp = _do_request(auth_url) + return resp.json().get("scopes", []) + + +def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False): + url_tmpl = liburls.api( + get_root_url(use_proxy), "purge-cache", "v1", "purge-cache/{}/{}" + ) + return url_tmpl.format(provisioner_id, worker_type) + + +def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False): + """Requests a cache purge from the purge-caches service.""" + if testing: + logger.info( + "Would have purged {}/{}/{}.".format( + provisioner_id, worker_type, cache_name + ) + ) + else: + logger.info(f"Purging {provisioner_id}/{worker_type}/{cache_name}.") + purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy) + _do_request(purge_cache_url, json={"cacheName": cache_name}) + + +def send_email(address, subject, content, link, use_proxy=False): + """Sends an email using the notify service""" + logger.info(f"Sending email to {address}.") + url = liburls.api(get_root_url(use_proxy), "notify", "v1", "email") + _do_request( + url, + json={ + "address": address, + "subject": subject, + "content": content, + "link": link, + }, + ) + + +def list_task_group_incomplete_tasks(task_group_id): + """Generate the incomplete tasks in a task group""" + params = {} + while True: + url = liburls.api( + get_root_url(False), + "queue", + "v1", + f"task-group/{task_group_id}/list", + ) + resp = _do_request(url, method="get", params=params).json() + for task in [t["status"] for t in resp["tasks"]]: + if task["state"] in ["running", "pending", "unscheduled"]: + yield task["taskId"] + if resp.get("continuationToken"): + params = {"continuationToken": resp.get("continuationToken")} + else: + break diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py new file mode 100644 index 0000000000..7b545595ef --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/taskgraph.py @@ -0,0 +1,54 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Tools for interacting with existing taskgraphs. +""" + + +from taskgraph.util.taskcluster import find_task_id, get_artifact + + +def find_decision_task(parameters, graph_config): + """Given the parameters for this action, find the taskId of the decision + task""" + if parameters.get("repository_type", "hg") == "hg": + return find_task_id( + "{}.v2.{}.pushlog-id.{}.decision".format( + graph_config["trust-domain"], + parameters["project"], + parameters["pushlog_id"], + ) + ) + elif parameters["repository_type"] == "git": + return find_task_id( + "{}.v2.{}.revision.{}.taskgraph.decision".format( + graph_config["trust-domain"], + parameters["project"], + parameters["head_rev"], + ) + ) + else: + raise Exception( + "Unknown repository_type {}!".format(parameters["repository_type"]) + ) + + +def find_existing_tasks_from_previous_kinds( + full_task_graph, previous_graph_ids, rebuild_kinds +): + """Given a list of previous decision/action taskIds and kinds to ignore + from the previous graphs, return a dictionary of labels-to-taskids to use + as ``existing_tasks`` in the optimization step.""" + existing_tasks = {} + for previous_graph_id in previous_graph_ids: + label_to_taskid = get_artifact(previous_graph_id, "public/label-to-taskid.json") + kind_labels = { + t.label + for t in full_task_graph.tasks.values() + if t.attributes["kind"] not in rebuild_kinds + } + for label in set(label_to_taskid.keys()).intersection(kind_labels): + existing_tasks[label] = label_to_taskid[label] + return existing_tasks diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py new file mode 100644 index 0000000000..465e4a43de --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/templates.py @@ -0,0 +1,50 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import copy + + +def merge_to(source, dest): + """ + Merge dict and arrays (override scalar values) + + Keys from source override keys from dest, and elements from lists in source + are appended to lists in dest. + + :param dict source: to copy from + :param dict dest: to copy to (modified in place) + """ + + for key, value in source.items(): + # Override mismatching or empty types + if type(value) != type(dest.get(key)): # noqa + dest[key] = source[key] + continue + + # Merge dict + if isinstance(value, dict): + merge_to(value, dest[key]) + continue + + if isinstance(value, list): + dest[key] = dest[key] + source[key] + continue + + dest[key] = source[key] + + return dest + + +def merge(*objects): + """ + Merge the given objects, using the semantics described for merge_to, with + objects later in the list taking precedence. From an inheritance + perspective, "parents" should be listed before "children". + + Returns the result without modifying any arguments. + """ + if len(objects) == 1: + return copy.deepcopy(objects[0]) + return merge_to(objects[-1], merge(*objects[:-1])) diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py new file mode 100644 index 0000000000..e511978b5f --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/time.py @@ -0,0 +1,115 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Python port of the ms.js node module this is not a direct port some things are +# more complicated or less precise and we lean on time delta here. + + +import datetime +import re + +PATTERN = re.compile(r"((?:\d+)?\.?\d+) *([a-z]+)") + + +def seconds(value): + return datetime.timedelta(seconds=int(value)) + + +def minutes(value): + return datetime.timedelta(minutes=int(value)) + + +def hours(value): + return datetime.timedelta(hours=int(value)) + + +def days(value): + return datetime.timedelta(days=int(value)) + + +def months(value): + # See warning in years(), below + return datetime.timedelta(days=int(value) * 30) + + +def years(value): + # Warning here "years" are vague don't use this for really sensitive date + # computation the idea is to give you a absolute amount of time in the + # future which is not the same thing as "precisely on this date next year" + return datetime.timedelta(days=int(value) * 365) + + +ALIASES = {} +ALIASES["seconds"] = ALIASES["second"] = ALIASES["s"] = seconds +ALIASES["minutes"] = ALIASES["minute"] = ALIASES["min"] = minutes +ALIASES["hours"] = ALIASES["hour"] = ALIASES["h"] = hours +ALIASES["days"] = ALIASES["day"] = ALIASES["d"] = days +ALIASES["months"] = ALIASES["month"] = ALIASES["mo"] = months +ALIASES["years"] = ALIASES["year"] = ALIASES["y"] = years + + +class InvalidString(Exception): + pass + + +class UnknownTimeMeasurement(Exception): + pass + + +def value_of(input_str): + """ + Convert a string to a json date in the future + :param str input_str: (ex: 1d, 2d, 6years, 2 seconds) + :returns: Unit given in seconds + """ + + matches = PATTERN.search(input_str) + + if matches is None or len(matches.groups()) < 2: + raise InvalidString(f"'{input_str}' is invalid string") + + value, unit = matches.groups() + + if unit not in ALIASES: + raise UnknownTimeMeasurement( + "{} is not a valid time measure use one of {}".format( + unit, sorted(ALIASES.keys()) + ) + ) + + return ALIASES[unit](value) + + +def json_time_from_now(input_str, now=None, datetime_format=False): + """ + :param str input_str: Input string (see value of) + :param datetime now: Optionally set the definition of `now` + :param boolean datetime_format: Set `True` to get a `datetime` output + :returns: JSON string representation of time in future. + """ + + if now is None: + now = datetime.datetime.utcnow() + + time = now + value_of(input_str) + + if datetime_format is True: + return time + else: + # Sorta a big hack but the json schema validator for date does not like the + # ISO dates until 'Z' (for timezone) is added... + # Microseconds are excluded (see bug 1381801) + return time.isoformat(timespec="milliseconds") + "Z" + + +def current_json_time(datetime_format=False): + """ + :param boolean datetime_format: Set `True` to get a `datetime` output + :returns: JSON string representation of the current time. + """ + if datetime_format is True: + return datetime.datetime.utcnow() + else: + # Microseconds are excluded (see bug 1381801) + return datetime.datetime.utcnow().isoformat(timespec="milliseconds") + "Z" diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py new file mode 100644 index 0000000000..9d0c032a1b --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/treeherder.py @@ -0,0 +1,64 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + +_JOINED_SYMBOL_RE = re.compile(r"([^(]*)\(([^)]*)\)$") + + +def split_symbol(treeherder_symbol): + """Split a symbol expressed as grp(sym) into its two parts. If no group is + given, the returned group is '?'""" + groupSymbol = "?" + symbol = treeherder_symbol + if "(" in symbol: + match = _JOINED_SYMBOL_RE.match(symbol) + if match: + groupSymbol, symbol = match.groups() + else: + raise Exception(f"`{symbol}` is not a valid treeherder symbol.") + return groupSymbol, symbol + + +def join_symbol(group, symbol): + """Perform the reverse of split_symbol, combining the given group and + symbol. If the group is '?', then it is omitted.""" + if group == "?": + return symbol + return f"{group}({symbol})" + + +def add_suffix(treeherder_symbol, suffix): + """Add a suffix to a treeherder symbol that may contain a group.""" + group, symbol = split_symbol(treeherder_symbol) + symbol += str(suffix) + return join_symbol(group, symbol) + + +def replace_group(treeherder_symbol, new_group): + """Add a suffix to a treeherder symbol that may contain a group.""" + _, symbol = split_symbol(treeherder_symbol) + return join_symbol(new_group, symbol) + + +def inherit_treeherder_from_dep(job, dep_job): + """Inherit treeherder defaults from dep_job""" + treeherder = job.get("treeherder", {}) + + dep_th_platform = ( + dep_job.task.get("extra", {}) + .get("treeherder", {}) + .get("machine", {}) + .get("platform", "") + ) + dep_th_collection = list( + dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys() + )[0] + treeherder.setdefault("platform", f"{dep_th_platform}/{dep_th_collection}") + treeherder.setdefault( + "tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1) + ) + # Does not set symbol + treeherder.setdefault("kind", "build") + return treeherder diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py new file mode 100644 index 0000000000..ba1d909019 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/vcs.py @@ -0,0 +1,539 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import logging +import os +import re +import subprocess +from abc import ABC, abstractmethod, abstractproperty +from shutil import which + +import requests +from redo import retry + +from taskgraph.util.path import ancestors + +PUSHLOG_TMPL = "{}/json-pushes?version=2&changeset={}&tipsonly=1&full=1" + +logger = logging.getLogger(__name__) + + +class Repository(ABC): + # Both mercurial and git use sha1 as revision idenfiers. Luckily, both define + # the same value as the null revision. + # + # https://github.com/git/git/blob/dc04167d378fb29d30e1647ff6ff51dd182bc9a3/t/oid-info/hash-info#L7 + # https://www.mercurial-scm.org/repo/hg-stable/file/82efc31bd152/mercurial/node.py#l30 + NULL_REVISION = "0000000000000000000000000000000000000000" + + def __init__(self, path): + self.path = path + self.binary = which(self.tool) + if self.binary is None: + raise OSError(f"{self.tool} not found!") + self._valid_diff_filter = ("m", "a", "d") + + self._env = os.environ.copy() + + def run(self, *args: str, **kwargs): + return_codes = kwargs.pop("return_codes", []) + cmd = (self.binary,) + args + + try: + return subprocess.check_output( + cmd, cwd=self.path, env=self._env, encoding="utf-8", **kwargs + ) + except subprocess.CalledProcessError as e: + if e.returncode in return_codes: + return "" + raise + + @abstractproperty + def tool(self) -> str: + """Version control system being used, either 'hg' or 'git'.""" + + @abstractproperty + def head_rev(self) -> str: + """Hash of HEAD revision.""" + + @abstractproperty + def base_rev(self): + """Hash of revision the current topic branch is based on.""" + + @abstractproperty + def branch(self): + """Current branch or bookmark the checkout has active.""" + + @abstractproperty + def all_remote_names(self): + """Name of all configured remote repositories.""" + + @abstractproperty + def default_remote_name(self): + """Name the VCS defines for the remote repository when cloning + it for the first time. This name may not exist anymore if users + changed the default configuration, for instance.""" + + @abstractproperty + def remote_name(self): + """Name of the remote repository.""" + + def _get_most_suitable_remote(self, remote_instructions): + remotes = self.all_remote_names + if len(remotes) == 1: + return remotes[0] + + if self.default_remote_name in remotes: + return self.default_remote_name + + first_remote = remotes[0] + logger.warning( + f"Unable to determine which remote repository to use between: {remotes}. " + f'Arbitrarily using the first one "{first_remote}". Please set an ' + f"`{self.default_remote_name}` remote if the arbitrarily selected one " + f"is not right. To do so: {remote_instructions}" + ) + + return first_remote + + @abstractproperty + def default_branch(self): + """Name of the default branch.""" + + @abstractmethod + def get_url(self, remote=None): + """Get URL of the upstream repository.""" + + @abstractmethod + def get_commit_message(self, revision=None): + """Commit message of specified revision or current commit.""" + + @abstractmethod + def get_changed_files(self, diff_filter, mode="unstaged", rev=None, base_rev=None): + """Return a list of files that are changed in: + * either this repository's working copy, + * or at a given revision (``rev``) + * or between 2 revisions (``base_rev`` and ``rev``) + + ``diff_filter`` controls which kinds of modifications are returned. + It is a string which may only contain the following characters: + + A - Include files that were added + D - Include files that were deleted + M - Include files that were modified + + By default, all three will be included. + + ``mode`` can be one of 'unstaged', 'staged' or 'all'. Only has an + effect on git. Defaults to 'unstaged'. + + ``rev`` is a specifier for which changesets to consider for + changes. The exact meaning depends on the vcs system being used. + + ``base_rev`` specifies the range of changesets. This parameter cannot + be used without ``rev``. The range includes ``rev`` but excludes + ``base_rev``. + """ + + @abstractmethod + def get_outgoing_files(self, diff_filter, upstream): + """Return a list of changed files compared to upstream. + + ``diff_filter`` works the same as `get_changed_files`. + ``upstream`` is a remote ref to compare against. If unspecified, + this will be determined automatically. If there is no remote ref, + a MissingUpstreamRepo exception will be raised. + """ + + @abstractmethod + def working_directory_clean(self, untracked=False, ignored=False): + """Determine if the working directory is free of modifications. + + Returns True if the working directory does not have any file + modifications. False otherwise. + + By default, untracked and ignored files are not considered. If + ``untracked`` or ``ignored`` are set, they influence the clean check + to factor these file classes into consideration. + """ + + @abstractmethod + def update(self, ref): + """Update the working directory to the specified reference.""" + + @abstractmethod + def find_latest_common_revision(self, base_ref_or_rev, head_rev): + """Find the latest revision that is common to both the given + ``head_rev`` and ``base_ref_or_rev``""" + + @abstractmethod + def does_revision_exist_locally(self, revision): + """Check whether this revision exists in the local repository. + + If this function returns an unexpected value, then make sure + the revision was fetched from the remote repository.""" + + +class HgRepository(Repository): + tool = "hg" + default_remote_name = "default" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._env["HGPLAIN"] = "1" + + @property + def head_rev(self): + return self.run("log", "-r", ".", "-T", "{node}").strip() + + @property + def base_rev(self): + return self.run("log", "-r", "last(ancestors(.) and public())", "-T", "{node}") + + @property + def branch(self): + bookmarks_fn = os.path.join(self.path, ".hg", "bookmarks.current") + if os.path.exists(bookmarks_fn): + with open(bookmarks_fn) as f: + bookmark = f.read() + return bookmark or None + + return None + + @property + def all_remote_names(self): + remotes = self.run("paths", "--quiet").splitlines() + if not remotes: + raise RuntimeError("No remotes defined") + return remotes + + @property + def remote_name(self): + return self._get_most_suitable_remote( + "Edit .hg/hgrc and add:\n\n[paths]\ndefault = $URL", + ) + + @property + def default_branch(self): + # Mercurial recommends keeping "default" + # https://www.mercurial-scm.org/wiki/StandardBranching#Don.27t_use_a_name_other_than_default_for_your_main_development_branch + return "default" + + def get_url(self, remote="default"): + return self.run("path", "-T", "{url}", remote).strip() + + def get_commit_message(self, revision=None): + revision = revision or self.head_rev + return self.run("log", "-r", ".", "-T", "{desc}") + + def _format_diff_filter(self, diff_filter, for_status=False): + df = diff_filter.lower() + assert all(f in self._valid_diff_filter for f in df) + + # When looking at the changes in the working directory, the hg status + # command uses 'd' for files that have been deleted with a non-hg + # command, and 'r' for files that have been `hg rm`ed. Use both. + return df.replace("d", "dr") if for_status else df + + def _files_template(self, diff_filter): + template = "" + df = self._format_diff_filter(diff_filter) + if "a" in df: + template += "{file_adds % '{file}\\n'}" + if "d" in df: + template += "{file_dels % '{file}\\n'}" + if "m" in df: + template += "{file_mods % '{file}\\n'}" + return template + + def get_changed_files( + self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None + ): + if rev is None: + if base_rev is not None: + raise ValueError("Cannot specify `base_rev` without `rev`") + # Use --no-status to print just the filename. + df = self._format_diff_filter(diff_filter, for_status=True) + return self.run("status", "--no-status", f"-{df}").splitlines() + else: + template = self._files_template(diff_filter) + revision_argument = rev if base_rev is None else f"{base_rev}~-1::{rev}" + return self.run("log", "-r", revision_argument, "-T", template).splitlines() + + def get_outgoing_files(self, diff_filter="ADM", upstream=None): + template = self._files_template(diff_filter) + + if not upstream: + return self.run( + "log", "-r", "draft() and ancestors(.)", "--template", template + ).split() + + return self.run( + "outgoing", + "-r", + ".", + "--quiet", + "--template", + template, + upstream, + return_codes=(1,), + ).split() + + def working_directory_clean(self, untracked=False, ignored=False): + args = ["status", "--modified", "--added", "--removed", "--deleted"] + if untracked: + args.append("--unknown") + if ignored: + args.append("--ignored") + + # If output is empty, there are no entries of requested status, which + # means we are clean. + return not len(self.run(*args).strip()) + + def update(self, ref): + return self.run("update", "--check", ref) + + def find_latest_common_revision(self, base_ref_or_rev, head_rev): + return self.run( + "log", + "-r", + f"last(ancestors('{base_ref_or_rev}') and ancestors('{head_rev}'))", + "--template", + "{node}", + ).strip() + + def does_revision_exist_locally(self, revision): + try: + return self.run("log", "-r", revision).strip() != "" + except subprocess.CalledProcessError as e: + # Error code 255 comes with the message: + # "abort: unknown revision $REVISION" + if e.returncode == 255: + return False + raise + + +class GitRepository(Repository): + tool = "git" + default_remote_name = "origin" + + _LS_REMOTE_PATTERN = re.compile(r"ref:\s+refs/heads/(?P<branch_name>\S+)\s+HEAD") + + @property + def head_rev(self): + return self.run("rev-parse", "--verify", "HEAD").strip() + + @property + def base_rev(self): + refs = self.run( + "rev-list", "HEAD", "--topo-order", "--boundary", "--not", "--remotes" + ).splitlines() + if refs: + return refs[-1][1:] # boundary starts with a prefix `-` + return self.head_rev + + @property + def branch(self): + return self.run("branch", "--show-current").strip() or None + + @property + def all_remote_names(self): + remotes = self.run("remote").splitlines() + if not remotes: + raise RuntimeError("No remotes defined") + return remotes + + @property + def remote_name(self): + try: + remote_branch_name = self.run( + "rev-parse", "--verify", "--abbrev-ref", "--symbolic-full-name", "@{u}" + ).strip() + return remote_branch_name.split("/")[0] + except subprocess.CalledProcessError as e: + # Error code 128 comes with the message: + # "fatal: no upstream configured for branch $BRANCH" + if e.returncode != 128: + raise + + return self._get_most_suitable_remote("`git remote add origin $URL`") + + @property + def default_branch(self): + try: + # this one works if the current repo was cloned from an existing + # repo elsewhere + return self._get_default_branch_from_cloned_metadata() + except (subprocess.CalledProcessError, RuntimeError): + pass + + try: + # This call works if you have (network) access to the repo + return self._get_default_branch_from_remote_query() + except (subprocess.CalledProcessError, RuntimeError): + pass + + # this one is the last resort in case the remote is not accessible and + # the local repo is where `git init` was made + return self._guess_default_branch() + + def _get_default_branch_from_remote_query(self): + # This function requires network access to the repo + remote_name = self.remote_name + output = self.run("ls-remote", "--symref", remote_name, "HEAD") + matches = self._LS_REMOTE_PATTERN.search(output) + if not matches: + raise RuntimeError( + f'Could not find the default branch of remote repository "{remote_name}". ' + "Got: {output}" + ) + + branch_name = matches.group("branch_name") + return f"{remote_name}/{branch_name}" + + def _get_default_branch_from_cloned_metadata(self): + return self.run("rev-parse", "--abbrev-ref", f"{self.remote_name}/HEAD").strip() + + def _guess_default_branch(self): + branches = [ + line.strip() + for line in self.run( + "branch", "--all", "--no-color", "--format=%(refname)" + ).splitlines() + for candidate_branch in ("main", "master", "branches/default/tip") + if line.strip().endswith(candidate_branch) + ] + + if len(branches) == 1: + return branches[0] + + raise RuntimeError(f"Unable to find default branch. Got: {branches}") + + def get_url(self, remote="origin"): + return self.run("remote", "get-url", remote).strip() + + def get_commit_message(self, revision=None): + revision = revision or self.head_rev + return self.run("log", "-n1", "--format=%B") + + def get_changed_files( + self, diff_filter="ADM", mode="unstaged", rev=None, base_rev=None + ): + assert all(f.lower() in self._valid_diff_filter for f in diff_filter) + + if rev is None: + if base_rev is not None: + raise ValueError("Cannot specify `base_rev` without `rev`") + cmd = ["diff"] + if mode == "staged": + cmd.append("--cached") + elif mode == "all": + cmd.append("HEAD") + else: + revision_argument = ( + f"{rev}~1..{rev}" if base_rev is None else f"{base_rev}..{rev}" + ) + cmd = ["log", "--format=format:", revision_argument] + + cmd.append("--name-only") + cmd.append("--diff-filter=" + diff_filter.upper()) + + files = self.run(*cmd).splitlines() + return [f for f in files if f] + + def get_outgoing_files(self, diff_filter="ADM", upstream=None): + assert all(f.lower() in self._valid_diff_filter for f in diff_filter) + + not_condition = upstream if upstream else "--remotes" + + files = self.run( + "log", + "--name-only", + f"--diff-filter={diff_filter.upper()}", + "--oneline", + "--pretty=format:", + "HEAD", + "--not", + not_condition, + ).splitlines() + return [f for f in files if f] + + def working_directory_clean(self, untracked=False, ignored=False): + args = ["status", "--porcelain"] + + # Even in --porcelain mode, behavior is affected by the + # ``status.showUntrackedFiles`` option, which means we need to be + # explicit about how to treat untracked files. + if untracked: + args.append("--untracked-files=all") + else: + args.append("--untracked-files=no") + + if ignored: + args.append("--ignored") + + # If output is empty, there are no entries of requested status, which + # means we are clean. + return not len(self.run(*args).strip()) + + def update(self, ref): + self.run("checkout", ref) + + def find_latest_common_revision(self, base_ref_or_rev, head_rev): + return self.run("merge-base", base_ref_or_rev, head_rev).strip() + + def does_revision_exist_locally(self, revision): + try: + return self.run("cat-file", "-t", revision).strip() == "commit" + except subprocess.CalledProcessError as e: + # Error code 128 comes with the message: + # "git cat-file: could not get object info" + if e.returncode == 128: + return False + raise + + +def get_repository(path): + """Get a repository object for the repository at `path`. + If `path` is not a known VCS repository, raise an exception. + """ + for path in ancestors(path): + if os.path.isdir(os.path.join(path, ".hg")): + return HgRepository(path) + elif os.path.exists(os.path.join(path, ".git")): + return GitRepository(path) + + raise RuntimeError("Current directory is neither a git or hg repository") + + +def find_hg_revision_push_info(repository, revision): + """Given the parameters for this action and a revision, find the + pushlog_id of the revision.""" + pushlog_url = PUSHLOG_TMPL.format(repository, revision) + + def query_pushlog(url): + r = requests.get(pushlog_url, timeout=60) + r.raise_for_status() + return r + + r = retry( + query_pushlog, + args=(pushlog_url,), + attempts=5, + sleeptime=10, + ) + pushes = r.json()["pushes"] + if len(pushes) != 1: + raise RuntimeError( + "Unable to find a single pushlog_id for {} revision {}: {}".format( + repository, revision, pushes + ) + ) + pushid = list(pushes.keys())[0] + return { + "pushdate": pushes[pushid]["date"], + "pushid": pushid, + "user": pushes[pushid]["user"], + } diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py new file mode 100644 index 0000000000..5911914f13 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/verify.py @@ -0,0 +1,283 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import logging +import sys +from abc import ABC, abstractmethod + +import attr + +from taskgraph.config import GraphConfig +from taskgraph.parameters import Parameters +from taskgraph.taskgraph import TaskGraph +from taskgraph.util.attributes import match_run_on_projects +from taskgraph.util.treeherder import join_symbol + +logger = logging.getLogger(__name__) + + +@attr.s(frozen=True) +class Verification(ABC): + func = attr.ib() + + @abstractmethod + def verify(self, **kwargs) -> None: + pass + + +@attr.s(frozen=True) +class InitialVerification(Verification): + """Verification that doesn't depend on any generation state.""" + + def verify(self): + self.func() + + +@attr.s(frozen=True) +class GraphVerification(Verification): + """Verification for a TaskGraph object.""" + + run_on_projects = attr.ib(default=None) + + def verify( + self, graph: TaskGraph, graph_config: GraphConfig, parameters: Parameters + ): + if self.run_on_projects and not match_run_on_projects( + parameters["project"], self.run_on_projects + ): + return + + scratch_pad = {} + graph.for_each_task( + self.func, + scratch_pad=scratch_pad, + graph_config=graph_config, + parameters=parameters, + ) + self.func( + None, + graph, + scratch_pad=scratch_pad, + graph_config=graph_config, + parameters=parameters, + ) + + +@attr.s(frozen=True) +class ParametersVerification(Verification): + """Verification for a set of parameters.""" + + def verify(self, parameters: Parameters): + self.func(parameters) + + +@attr.s(frozen=True) +class KindsVerification(Verification): + """Verification for kinds.""" + + def verify(self, kinds: dict): + self.func(kinds) + + +@attr.s(frozen=True) +class VerificationSequence: + """ + Container for a sequence of verifications over a TaskGraph. Each + verification is represented as a callable taking (task, taskgraph, + scratch_pad), called for each task in the taskgraph, and one more + time with no task but with the taskgraph and the same scratch_pad + that was passed for each task. + """ + + _verifications = attr.ib(factory=dict) + _verification_types = { + "graph": GraphVerification, + "initial": InitialVerification, + "kinds": KindsVerification, + "parameters": ParametersVerification, + } + + def __call__(self, name, *args, **kwargs): + for verification in self._verifications.get(name, []): + verification.verify(*args, **kwargs) + + def add(self, name, **kwargs): + cls = self._verification_types.get(name, GraphVerification) + + def wrap(func): + self._verifications.setdefault(name, []).append(cls(func, **kwargs)) + return func + + return wrap + + +verifications = VerificationSequence() + + +@verifications.add("full_task_graph") +def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, parameters): + """ + This function verifies that tuple + (collection.keys(), machine.platform, groupSymbol, symbol) is unique + for a target task graph. + """ + if task is None: + return + task_dict = task.task + if "extra" in task_dict: + extra = task_dict["extra"] + if "treeherder" in extra: + treeherder = extra["treeherder"] + + collection_keys = tuple(sorted(treeherder.get("collection", {}).keys())) + if len(collection_keys) != 1: + raise Exception( + "Task {} can't be in multiple treeherder collections " + "(the part of the platform after `/`): {}".format( + task.label, collection_keys + ) + ) + platform = treeherder.get("machine", {}).get("platform") + group_symbol = treeherder.get("groupSymbol") + symbol = treeherder.get("symbol") + + key = (platform, collection_keys[0], group_symbol, symbol) + if key in scratch_pad: + raise Exception( + "Duplicate treeherder platform and symbol in tasks " + "`{}`and `{}`: {} {}".format( + task.label, + scratch_pad[key], + f"{platform}/{collection_keys[0]}", + join_symbol(group_symbol, symbol), + ) + ) + else: + scratch_pad[key] = task.label + + +@verifications.add("full_task_graph") +def verify_trust_domain_v2_routes( + task, taskgraph, scratch_pad, graph_config, parameters +): + """ + This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes. + """ + if task is None: + return + route_prefix = "index.{}.v2".format(graph_config["trust-domain"]) + task_dict = task.task + routes = task_dict.get("routes", []) + + for route in routes: + if route.startswith(route_prefix): + if route in scratch_pad: + raise Exception( + "conflict between {}:{} for route: {}".format( + task.label, scratch_pad[route], route + ) + ) + else: + scratch_pad[route] = task.label + + +@verifications.add("full_task_graph") +def verify_routes_notification_filters( + task, taskgraph, scratch_pad, graph_config, parameters +): + """ + This function ensures that only understood filters for notifications are + specified. + + See: https://docs.taskcluster.net/reference/core/taskcluster-notify/docs/usage + """ + if task is None: + return + route_prefix = "notify." + valid_filters = ("on-any", "on-completed", "on-failed", "on-exception") + task_dict = task.task + routes = task_dict.get("routes", []) + + for route in routes: + if route.startswith(route_prefix): + # Get the filter of the route + route_filter = route.split(".")[-1] + if route_filter not in valid_filters: + raise Exception( + "{} has invalid notification filter ({})".format( + task.label, route_filter + ) + ) + + +@verifications.add("full_task_graph") +def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, parameters): + tiers = scratch_pad + if task is not None: + tiers[task.label] = ( + task.task.get("extra", {}).get("treeherder", {}).get("tier", sys.maxsize) + ) + else: + + def printable_tier(tier): + if tier == sys.maxsize: + return "unknown" + return tier + + for task in taskgraph.tasks.values(): + tier = tiers[task.label] + for d in task.dependencies.values(): + if taskgraph[d].task.get("workerType") == "always-optimized": + continue + if "dummy" in taskgraph[d].kind: + continue + if tier < tiers[d]: + raise Exception( + "{} (tier {}) cannot depend on {} (tier {})".format( + task.label, + printable_tier(tier), + d, + printable_tier(tiers[d]), + ) + ) + + +@verifications.add("full_task_graph") +def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameters): + """ + This function verifies that toolchain aliases are not reused. + """ + if task is None: + return + attributes = task.attributes + if "toolchain-alias" in attributes: + keys = attributes["toolchain-alias"] + if not keys: + keys = [] + elif isinstance(keys, str): + keys = [keys] + for key in keys: + if key in scratch_pad: + raise Exception( + "Duplicate toolchain-alias in tasks " + "`{}`and `{}`: {}".format( + task.label, + scratch_pad[key], + key, + ) + ) + else: + scratch_pad[key] = task.label + + +@verifications.add("optimized_task_graph") +def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters): + """ + This function ensures that always-optimized tasks have been optimized. + """ + if task is None: + return + if task.task.get("workerType") == "always-optimized": + raise Exception(f"Could not optimize the task {task.label!r}") diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py new file mode 100644 index 0000000000..d71f7e06a3 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/workertypes.py @@ -0,0 +1,75 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import attr + +from .keyed_by import evaluate_keyed_by +from .memoize import memoize + + +@attr.s +class _BuiltinWorkerType: + provisioner = attr.ib(str) + worker_type = attr.ib(str) + + @property + def implementation(self): + """ + Since the list of built-in worker-types is small and fixed, we can get + away with punning the implementation name (in + `taskgraph.transforms.task`) and the worker_type. + """ + return self.worker_type + + +_BUILTIN_TYPES = { + "always-optimized": _BuiltinWorkerType("invalid", "always-optimized"), + "succeed": _BuiltinWorkerType("built-in", "succeed"), +} + + +@memoize +def worker_type_implementation(graph_config, worker_type): + """Get the worker implementation and OS for the given workerType, where the + OS represents the host system, not the target OS, in the case of + cross-compiles.""" + if worker_type in _BUILTIN_TYPES: + # For the built-in worker-types, we use an `implementation that matches + # the worker-type. + return _BUILTIN_TYPES[worker_type].implementation, None + worker_config = evaluate_keyed_by( + {"by-worker-type": graph_config["workers"]["aliases"]}, + "worker-types.yml", + {"worker-type": worker_type}, + ) + return worker_config["implementation"], worker_config.get("os") + + +@memoize +def get_worker_type(graph_config, alias, level): + """ + Get the worker type based, evaluating aliases from the graph config. + """ + if alias in _BUILTIN_TYPES: + builtin_type = _BUILTIN_TYPES[alias] + return builtin_type.provisioner, builtin_type.worker_type + + level = str(level) + worker_config = evaluate_keyed_by( + {"by-alias": graph_config["workers"]["aliases"]}, + "graph_config.workers.aliases", + {"alias": alias}, + ) + provisioner = evaluate_keyed_by( + worker_config["provisioner"], + alias, + {"level": level}, + ).format(level=level) + worker_type = evaluate_keyed_by( + worker_config["worker-type"], + alias, + {"level": level}, + ).format(level=level, alias=alias) + return provisioner, worker_type diff --git a/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py new file mode 100644 index 0000000000..141c7a16d3 --- /dev/null +++ b/third_party/python/taskcluster_taskgraph/taskgraph/util/yaml.py @@ -0,0 +1,36 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import os + +from yaml.loader import SafeLoader + + +class UnicodeLoader(SafeLoader): + def construct_yaml_str(self, node): + return self.construct_scalar(node) + + +UnicodeLoader.add_constructor("tag:yaml.org,2002:str", UnicodeLoader.construct_yaml_str) + + +def load_stream(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = UnicodeLoader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + + +def load_yaml(*parts): + """Convenience function to load a YAML file in the given path. This is + useful for loading kind configuration files from the kind path.""" + filename = os.path.join(*parts) + with open(filename, "rb") as f: + return load_stream(f) |