diff options
Diffstat (limited to '')
28 files changed, 5119 insertions, 0 deletions
diff --git a/taskcluster/taskgraph/util/__init__.py b/taskcluster/taskgraph/util/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/taskcluster/taskgraph/util/__init__.py diff --git a/taskcluster/taskgraph/util/attributes.py b/taskcluster/taskgraph/util/attributes.py new file mode 100644 index 0000000000..6a446d6fa7 --- /dev/null +++ b/taskcluster/taskgraph/util/attributes.py @@ -0,0 +1,163 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import re + +import six + + +INTEGRATION_PROJECTS = { + "autoland", +} + +TRUNK_PROJECTS = INTEGRATION_PROJECTS | {"mozilla-central", "comm-central"} + +RELEASE_PROJECTS = { + "mozilla-central", + "mozilla-beta", + "mozilla-release", + "mozilla-esr78", + "comm-central", + "comm-beta", + "comm-esr78", + "oak", +} + +RELEASE_PROMOTION_PROJECTS = { + "jamun", + "maple", + "try", + "try-comm-central", +} | RELEASE_PROJECTS + +TEMPORARY_PROJECTS = set( + { + # When using a "Disposeabel Project Branch" you can specify your branch here. e.g.: + # 'oak', + } +) + +ALL_PROJECTS = RELEASE_PROMOTION_PROJECTS | TRUNK_PROJECTS | TEMPORARY_PROJECTS + +RUN_ON_PROJECT_ALIASES = { + # key is alias, value is lambda to test it against + "all": lambda project: True, + "integration": lambda project: project in INTEGRATION_PROJECTS, + "release": lambda project: project in RELEASE_PROJECTS, + "trunk": lambda project: project in TRUNK_PROJECTS, +} + +_COPYABLE_ATTRIBUTES = ( + "accepted-mar-channel-ids", + "artifact_map", + "artifact_prefix", + "build_platform", + "build_type", + "l10n_chunk", + "locale", + "mar-channel-id", + "nightly", + "required_signoffs", + "shippable", + "shipping_phase", + "shipping_product", + "signed", + "stub-installer", + "update-channel", +) + + +def attrmatch(attributes, **kwargs): + """Determine whether the given set of task attributes matches. The + conditions are given as keyword arguments, where each keyword names an + attribute. The keyword value can be a literal, a set, or a callable. A + literal must match the attribute exactly. Given a set, the attribute value + must be in the set. A callable is called with the attribute value. If an + attribute is specified as a keyword argument but not present in the + attributes, the result is False.""" + for kwkey, kwval in six.iteritems(kwargs): + if kwkey not in attributes: + return False + attval = attributes[kwkey] + if isinstance(kwval, set): + if attval not in kwval: + return False + elif callable(kwval): + if not kwval(attval): + return False + elif kwval != attributes[kwkey]: + return False + return True + + +def keymatch(attributes, target): + """Determine if any keys in attributes are a match to target, then return + a list of matching values. First exact matches will be checked. Failing + that, regex matches and finally a default key. + """ + # exact match + if target in attributes: + return [attributes[target]] + + # regular expression match + matches = [v for k, v in six.iteritems(attributes) if re.match(k + "$", target)] + if matches: + return matches + + # default + if "default" in attributes: + return [attributes["default"]] + + return [] + + +def match_run_on_projects(project, run_on_projects): + """Determine whether the given project is included in the `run-on-projects` + parameter, applying expansions for things like "integration" mentioned in + the attribute documentation.""" + aliases = RUN_ON_PROJECT_ALIASES.keys() + run_aliases = set(aliases) & set(run_on_projects) + if run_aliases: + if any(RUN_ON_PROJECT_ALIASES[alias](project) for alias in run_aliases): + return True + + return project in run_on_projects + + +def match_run_on_hg_branches(hg_branch, run_on_hg_branches): + """Determine whether the given project is included in the `run-on-hg-branches` + parameter. Allows 'all'.""" + if "all" in run_on_hg_branches: + return True + + for expected_hg_branch_pattern in run_on_hg_branches: + if re.match(expected_hg_branch_pattern, hg_branch): + return True + + return False + + +def copy_attributes_from_dependent_job(dep_job, denylist=()): + return { + attr: dep_job.attributes[attr] + for attr in _COPYABLE_ATTRIBUTES + if attr in dep_job.attributes and attr not in denylist + } + + +def sorted_unique_list(*args): + """Join one or more lists, and return a sorted list of unique members""" + combined = set().union(*args) + return sorted(combined) + + +def release_level(project): + """ + Whether this is a staging release or not. + + :return six.text_type: One of "production" or "staging". + """ + return "production" if project in RELEASE_PROJECTS else "staging" diff --git a/taskcluster/taskgraph/util/backstop.py b/taskcluster/taskgraph/util/backstop.py new file mode 100644 index 0000000000..e89cd53ef0 --- /dev/null +++ b/taskcluster/taskgraph/util/backstop.py @@ -0,0 +1,81 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +from requests import HTTPError + +from taskgraph.util.taskcluster import ( + find_task_id, + get_artifact, + status_task, +) + + +BACKSTOP_PUSH_INTERVAL = 20 +BACKSTOP_TIME_INTERVAL = 60 * 4 # minutes +BACKSTOP_INDEX = "gecko.v2.{project}.latest.taskgraph.backstop" + + +def is_backstop( + params, push_interval=BACKSTOP_PUSH_INTERVAL, time_interval=BACKSTOP_TIME_INTERVAL +): + """Determines whether the given parameters represent a backstop push. + + Args: + push_interval (int): Number of pushes + time_interval (int): Minutes between forced schedules. + Use 0 to disable. + Returns: + bool: True if this is a backtop, otherwise False. + """ + # In case this is being faked on try. + if params.get("backstop", False): + return True + + project = params["project"] + pushid = int(params["pushlog_id"]) + pushdate = int(params["pushdate"]) + + if project == "try": + return False + elif project != "autoland": + return True + + # On every Nth push, want to run all tasks. + if pushid % push_interval == 0: + return True + + if time_interval <= 0: + return False + + # We also want to ensure we run all tasks at least once per N minutes. + index = BACKSTOP_INDEX.format(project=project) + + try: + last_backstop_id = find_task_id(index) + except KeyError: + # Index wasn't found, implying there hasn't been a backstop push yet. + return True + + if status_task(last_backstop_id) in ("failed", "exception"): + # If the last backstop failed its decision task, make this a backstop. + return True + + try: + last_pushdate = get_artifact(last_backstop_id, "public/parameters.yml")[ + "pushdate" + ] + except HTTPError as e: + # If the last backstop decision task exists in the index, but + # parameters.yml isn't available yet, it means the decision task is + # still running. If that's the case, we can be pretty sure the time + # component will not cause a backstop, so just return False. + if e.response.status_code == 404: + return False + raise + + if (pushdate - last_pushdate) / 60 >= time_interval: + return True + return False diff --git a/taskcluster/taskgraph/util/bugbug.py b/taskcluster/taskgraph/util/bugbug.py new file mode 100644 index 0000000000..8fc3c07610 --- /dev/null +++ b/taskcluster/taskgraph/util/bugbug.py @@ -0,0 +1,126 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import os +import json +import sys +import time + +import requests +from mozbuild.util import memoize + +from taskgraph.util.taskcluster import requests_retry_session + +try: + # TODO(py3): use time.monotonic() + from time import monotonic +except ImportError: + from time import time as monotonic + +BUGBUG_BASE_URL = "https://bugbug.herokuapp.com" +RETRY_TIMEOUT = 9 * 60 # seconds +RETRY_INTERVAL = 10 # seconds + +# Preset confidence thresholds. +CT_LOW = 0.7 +CT_MEDIUM = 0.8 +CT_HIGH = 0.9 + +GROUP_TRANSLATIONS = { + "testing/web-platform/tests": "", + "testing/web-platform/mozilla/tests": "/_mozilla", +} + + +def translate_group(group): + for prefix, value in GROUP_TRANSLATIONS.items(): + if group.startswith(prefix): + return group.replace(prefix, value) + + return group + + +class BugbugTimeoutException(Exception): + pass + + +@memoize +def get_session(): + s = requests.Session() + s.headers.update({"X-API-KEY": "gecko-taskgraph"}) + return requests_retry_session(retries=5, session=s) + + +def _write_perfherder_data(lower_is_better): + if os.environ.get("MOZ_AUTOMATION", "0") == "1": + perfherder_data = { + "framework": {"name": "build_metrics"}, + "suites": [ + { + "name": suite, + "value": value, + "lowerIsBetter": True, + "shouldAlert": False, + "subtests": [], + } + for suite, value in lower_is_better.items() + ], + } + print( + "PERFHERDER_DATA: {}".format(json.dumps(perfherder_data)), file=sys.stderr + ) + + +@memoize +def push_schedules(branch, rev): + url = BUGBUG_BASE_URL + "/push/{branch}/{rev}/schedules".format( + branch=branch, rev=rev + ) + start = monotonic() + session = get_session() + + # On try there is no fallback and pulling is slower, so we allow bugbug more + # time to compute the results. + # See https://github.com/mozilla/bugbug/issues/1673. + timeout = RETRY_TIMEOUT + if branch == "try": + timeout += int(timeout / 3) + + attempts = timeout / RETRY_INTERVAL + i = 0 + while i < attempts: + r = session.get(url) + r.raise_for_status() + + if r.status_code != 202: + break + + time.sleep(RETRY_INTERVAL) + i += 1 + end = monotonic() + + _write_perfherder_data( + lower_is_better={ + "bugbug_push_schedules_time": end - start, + "bugbug_push_schedules_retries": i, + } + ) + + data = r.json() + if r.status_code == 202: + raise BugbugTimeoutException( + "Timed out waiting for result from '{}'".format(url) + ) + + if "groups" in data: + data["groups"] = {translate_group(k): v for k, v in data["groups"].items()} + + if "config_groups" in data: + data["config_groups"] = { + translate_group(k): v for k, v in data["config_groups"].items() + } + + return data diff --git a/taskcluster/taskgraph/util/cached_tasks.py b/taskcluster/taskgraph/util/cached_tasks.py new file mode 100644 index 0000000000..980ef0ef79 --- /dev/null +++ b/taskcluster/taskgraph/util/cached_tasks.py @@ -0,0 +1,82 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import hashlib +import time + +import six + + +TARGET_CACHE_INDEX = "{trust_domain}.cache.level-{level}.{type}.{name}.hash.{digest}" +EXTRA_CACHE_INDEXES = [ + "{trust_domain}.cache.level-{level}.{type}.{name}.latest", + "{trust_domain}.cache.level-{level}.{type}.{name}.pushdate.{build_date_long}", +] + + +def add_optimization( + config, taskdesc, cache_type, cache_name, digest=None, digest_data=None +): + """ + Allow the results of this task to be cached. This adds index routes to the + task so it can be looked up for future runs, and optimization hints so that + cached artifacts can be found. Exactly one of `digest` and `digest_data` + must be passed. + + :param TransformConfig config: The configuration for the kind being transformed. + :param dict taskdesc: The description of the current task. + :param str cache_type: The type of task result being cached. + :param str cache_name: The name of the object being cached. + :param digest: A unique string indentifying this version of the artifacts + being generated. Typically this will be the hash of inputs to the task. + :type digest: bytes or None + :param digest_data: A list of bytes representing the inputs of this task. + They will be concatenated and hashed to create the digest for this + task. + :type digest_data: list of bytes or None + """ + if (digest is None) == (digest_data is None): + raise Exception("Must pass exactly one of `digest` and `digest_data`.") + if digest is None: + digest = hashlib.sha256(six.ensure_binary("\n".join(digest_data))).hexdigest() + + subs = { + "trust_domain": config.graph_config["trust-domain"], + "type": cache_type, + "name": cache_name, + "digest": digest, + } + + # We'll try to find a cached version of the toolchain at levels above + # and including the current level, starting at the highest level. + index_routes = [] + for level in reversed(range(int(config.params["level"]), 4)): + subs["level"] = level + index_routes.append(TARGET_CACHE_INDEX.format(**subs)) + taskdesc["optimization"] = {"index-search": index_routes} + + # ... and cache at the lowest level. + taskdesc.setdefault("routes", []).append( + "index.{}".format(TARGET_CACHE_INDEX.format(**subs)) + ) + + # ... and add some extra routes for humans + subs["build_date_long"] = time.strftime( + "%Y.%m.%d.%Y%m%d%H%M%S", time.gmtime(config.params["build_date"]) + ) + taskdesc["routes"].extend( + ["index.{}".format(route.format(**subs)) for route in EXTRA_CACHE_INDEXES] + ) + + taskdesc["attributes"]["cached_task"] = { + "type": cache_type, + "name": cache_name, + "digest": digest, + } + + # Allow future pushes to find this task before it completes + # Implementation in morphs + taskdesc["attributes"]["eager_indexes"] = [TARGET_CACHE_INDEX.format(**subs)] diff --git a/taskcluster/taskgraph/util/chunking.py b/taskcluster/taskgraph/util/chunking.py new file mode 100644 index 0000000000..ecf8098f01 --- /dev/null +++ b/taskcluster/taskgraph/util/chunking.py @@ -0,0 +1,270 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +"""Utility functions to handle test chunking.""" + +import json +import logging +import os +from abc import ABCMeta, abstractmethod + +import six +from manifestparser import TestManifest +from manifestparser.filters import chunk_by_runtime +from mozbuild.util import memoize +from moztest.resolve import ( + TEST_SUITES, + TestResolver, + TestManifestLoader, +) + +from taskgraph import GECKO +from taskgraph.util.bugbug import BugbugTimeoutException, push_schedules + +logger = logging.getLogger(__name__) +here = os.path.abspath(os.path.dirname(__file__)) +resolver = TestResolver.from_environment(cwd=here, loader_cls=TestManifestLoader) + + +def guess_mozinfo_from_task(task): + """Attempt to build a mozinfo dict from a task definition. + + This won't be perfect and many values used in the manifests will be missing. But + it should cover most of the major ones and be "good enough" for chunking in the + taskgraph. + + Args: + task (dict): A task definition. + + Returns: + A dict that can be used as a mozinfo replacement. + """ + info = { + "asan": "asan" in task["build-attributes"]["build_platform"], + "bits": 32 if "32" in task["build-attributes"]["build_platform"] else 64, + "ccov": "ccov" in task["build-attributes"]["build_platform"], + "debug": task["build-attributes"]["build_type"] == "debug", + "e10s": task["attributes"]["e10s"], + "fission": task["attributes"].get("unittest_variant") == "fission", + "headless": "-headless" in task["test-name"], + "tsan": "tsan" in task["build-attributes"]["build_platform"], + "webrender": task.get("webrender", False), + } + for platform in ("android", "linux", "mac", "win"): + if platform in task["build-attributes"]["build_platform"]: + info["os"] = platform + break + else: + raise ValueError( + "{} is not a known platform!".format( + task["build-attributes"]["build_platform"] + ) + ) + + info["appname"] = "fennec" if info["os"] == "android" else "firefox" + + # guess processor + if "aarch64" in task["build-attributes"]["build_platform"]: + info["processor"] = "aarch64" + elif info["os"] == "android" and "arm" in task["test-platform"]: + info["processor"] = "arm" + elif info["bits"] == 32: + info["processor"] = "x86" + else: + info["processor"] = "x86_64" + + # guess toolkit + if info["os"] == "android": + info["toolkit"] = "android" + elif info["os"] == "win": + info["toolkit"] = "windows" + elif info["os"] == "mac": + info["toolkit"] = "cocoa" + else: + info["toolkit"] = "gtk" + + return info + + +@memoize +def get_runtimes(platform, suite_name): + if not suite_name or not platform: + raise TypeError("suite_name and platform cannot be empty.") + + base = os.path.join(GECKO, "testing", "runtimes", "manifest-runtimes-{}.json") + for key in ("android", "windows"): + if key in platform: + path = base.format(key) + break + else: + path = base.format("unix") + + if not os.path.exists(path): + raise IOError("manifest runtime file at {} not found.".format(path)) + + with open(path, "r") as fh: + return json.load(fh)[suite_name] + + +def chunk_manifests(suite, platform, chunks, manifests): + """Run the chunking algorithm. + + Args: + platform (str): Platform used to find runtime info. + chunks (int): Number of chunks to split manifests into. + manifests(list): Manifests to chunk. + + Returns: + A list of length `chunks` where each item contains a list of manifests + that run in that chunk. + """ + manifests = set(manifests) + + if "web-platform-tests" not in suite: + runtimes = { + k: v for k, v in get_runtimes(platform, suite).items() if k in manifests + } + return [ + c[1] + for c in chunk_by_runtime(None, chunks, runtimes).get_chunked_manifests( + manifests + ) + ] + + # Keep track of test paths for each chunk, and the runtime information. + chunked_manifests = [[] for _ in range(chunks)] + + # Spread out the test manifests evenly across all chunks. + for index, key in enumerate(sorted(manifests)): + chunked_manifests[index % chunks].append(key) + + # One last sort by the number of manifests. Chunk size should be more or less + # equal in size. + chunked_manifests.sort(key=lambda x: len(x)) + + # Return just the chunked test paths. + return chunked_manifests + + +@six.add_metaclass(ABCMeta) +class BaseManifestLoader(object): + def __init__(self, params): + self.params = params + + @abstractmethod + def get_manifests(self, flavor, subsuite, mozinfo): + """Compute which manifests should run for the given flavor, subsuite and mozinfo. + + This function returns skipped manifests separately so that more balanced + chunks can be achieved by only considering "active" manifests in the + chunking algorithm. + + Args: + flavor (str): The suite to run. Values are defined by the 'build_flavor' key + in `moztest.resolve.TEST_SUITES`. + subsuite (str): The subsuite to run or 'undefined' to denote no subsuite. + mozinfo (frozenset): Set of data in the form of (<key>, <value>) used + for filtering. + + Returns: + A tuple of two manifest lists. The first is the set of active manifests (will + run at least one test. The second is a list of skipped manifests (all tests are + skipped). + """ + pass + + +class DefaultLoader(BaseManifestLoader): + """Load manifests using metadata from the TestResolver.""" + + @memoize + def get_tests(self, suite): + suite_definition = TEST_SUITES[suite] + return list( + resolver.resolve_tests( + flavor=suite_definition["build_flavor"], + subsuite=suite_definition.get("kwargs", {}).get( + "subsuite", "undefined" + ), + ) + ) + + @memoize + def get_manifests(self, suite, mozinfo): + mozinfo = dict(mozinfo) + # Compute all tests for the given suite/subsuite. + tests = self.get_tests(suite) + + if "web-platform-tests" in suite: + manifests = set() + for t in tests: + manifests.add(t["manifest"]) + return {"active": list(manifests), "skipped": []} + + manifests = set(chunk_by_runtime.get_manifest(t) for t in tests) + + # Compute the active tests. + m = TestManifest() + m.tests = tests + tests = m.active_tests(disabled=False, exists=False, **mozinfo) + active = set(chunk_by_runtime.get_manifest(t) for t in tests) + skipped = manifests - active + return {"active": list(active), "skipped": list(skipped)} + + +class BugbugLoader(DefaultLoader): + """Load manifests using metadata from the TestResolver, and then + filter them based on a query to bugbug.""" + + CONFIDENCE_THRESHOLD = 0.5 + + def __init__(self, *args, **kwargs): + super(BugbugLoader, self).__init__(*args, **kwargs) + self.timedout = False + + @memoize + def get_manifests(self, suite, mozinfo): + manifests = super(BugbugLoader, self).get_manifests(suite, mozinfo) + + # Don't prune any manifests if we're on a backstop push or there was a timeout. + if self.params["backstop"] or self.timedout: + return manifests + + try: + data = push_schedules(self.params["project"], self.params["head_rev"]) + except BugbugTimeoutException: + logger.warning("Timed out waiting for bugbug, loading all test manifests.") + self.timedout = True + return self.get_manifests(suite, mozinfo) + + bugbug_manifests = { + m + for m, c in data.get("groups", {}).items() + if c >= self.CONFIDENCE_THRESHOLD + } + + manifests["active"] = list(set(manifests["active"]) & bugbug_manifests) + manifests["skipped"] = list(set(manifests["skipped"]) & bugbug_manifests) + return manifests + + +manifest_loaders = { + "bugbug": BugbugLoader, + "default": DefaultLoader, +} + +_loader_cache = {} + + +def get_manifest_loader(name, params): + # Ensure we never create more than one instance of the same loader type for + # performance reasons. + if name in _loader_cache: + return _loader_cache[name] + + loader = manifest_loaders[name](dict(params)) + _loader_cache[name] = loader + return loader diff --git a/taskcluster/taskgraph/util/declarative_artifacts.py b/taskcluster/taskgraph/util/declarative_artifacts.py new file mode 100644 index 0000000000..821200e71d --- /dev/null +++ b/taskcluster/taskgraph/util/declarative_artifacts.py @@ -0,0 +1,70 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, unicode_literals + +import re + +from taskgraph.util.scriptworker import generate_beetmover_upstream_artifacts + + +_ARTIFACT_ID_PER_PLATFORM = { + "android-aarch64-opt": "geckoview-default-arm64-v8a", + "android-api-16-opt": "geckoview-default-armeabi-v7a", + "android-x86-opt": "geckoview-default-x86", + "android-x86_64-opt": "geckoview-default-x86_64", + "android-geckoview-fat-aar-opt": "geckoview-default", + "android-aarch64-shippable": "geckoview{update_channel}-arm64-v8a", + "android-api-16-shippable": "geckoview{update_channel}-armeabi-v7a", + "android-x86-shippable": "geckoview{update_channel}-x86", + "android-x86_64-shippable": "geckoview{update_channel}-x86_64", + "android-geckoview-fat-aar-shippable": "geckoview{update_channel}", +} + + +def get_geckoview_upstream_artifacts(config, job, platform=""): + if not platform: + platform = job["attributes"]["build_platform"] + upstream_artifacts = generate_beetmover_upstream_artifacts( + config, + job, + platform="", + **get_geckoview_template_vars( + config, platform, job["attributes"].get("update-channel") + ) + ) + return [ + {key: value for key, value in upstream_artifact.items() if key != "locale"} + for upstream_artifact in upstream_artifacts + ] + + +def get_geckoview_template_vars(config, platform, update_channel): + version_groups = re.match(r"(\d+).(\d+).*", config.params["version"]) + if version_groups: + major_version, minor_version = version_groups.groups() + + return { + "artifact_id": get_geckoview_artifact_id( + config, + platform, + update_channel, + ), + "build_date": config.params["moz_build_date"], + "major_version": major_version, + "minor_version": minor_version, + } + + +def get_geckoview_artifact_id(config, platform, update_channel=None): + if update_channel == "release": + update_channel = "" + elif update_channel is not None: + update_channel = "-{}".format(update_channel) + else: + # For shippable builds, mozharness defaults to using + # "nightly-{project}" for the update channel. For other builds, the + # update channel is not set, but the value is not substituted. + update_channel = "-nightly-{}".format(config.params["project"]) + return _ARTIFACT_ID_PER_PLATFORM[platform].format(update_channel=update_channel) diff --git a/taskcluster/taskgraph/util/docker.py b/taskcluster/taskgraph/util/docker.py new file mode 100644 index 0000000000..0642e4faff --- /dev/null +++ b/taskcluster/taskgraph/util/docker.py @@ -0,0 +1,356 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import hashlib +import io +import json +import os +import re +import requests +import requests_unixsocket +import six +import sys + +from six.moves.urllib.parse import quote, urlencode, urlunparse +from six.moves.collections_abc import Mapping + +from mozbuild.util import memoize +from mozpack.files import GeneratedFile +from mozpack.archive import create_tar_gz_from_files +from .. import GECKO + +from .yaml import load_yaml + + +IMAGE_DIR = os.path.join(GECKO, "taskcluster", "docker") + + +def docker_url(path, **kwargs): + docker_socket = os.environ.get("DOCKER_SOCKET", "/var/run/docker.sock") + return urlunparse( + ("http+unix", quote(docker_socket, safe=""), path, "", urlencode(kwargs), "") + ) + + +def post_to_docker(tar, api_path, **kwargs): + """POSTs a tar file to a given docker API path. + + The tar argument can be anything that can be passed to requests.post() + as data (e.g. iterator or file object). + The extra keyword arguments are passed as arguments to the docker API. + """ + # requests-unixsocket doesn't honor requests timeouts + # See https://github.com/msabramo/requests-unixsocket/issues/44 + # We have some large docker images that trigger the default timeout, + # so we increase the requests-unixsocket timeout here. + session = requests.Session() + session.mount( + requests_unixsocket.DEFAULT_SCHEME, + requests_unixsocket.UnixAdapter(timeout=120), + ) + req = session.post( + docker_url(api_path, **kwargs), + data=tar, + stream=True, + headers={"Content-Type": "application/x-tar"}, + ) + if req.status_code != 200: + message = req.json().get("message") + if not message: + message = "docker API returned HTTP code {}".format(req.status_code) + raise Exception(message) + status_line = {} + + buf = b"" + for content in req.iter_content(chunk_size=None): + if not content: + continue + # Sometimes, a chunk of content is not a complete json, so we cumulate + # with leftovers from previous iterations. + buf += content + try: + data = json.loads(buf) + except Exception: + continue + buf = b"" + # data is sometimes an empty dict. + if not data: + continue + # Mimick how docker itself presents the output. This code was tested + # with API version 1.18 and 1.26. + if "status" in data: + if "id" in data: + if sys.stderr.isatty(): + total_lines = len(status_line) + line = status_line.setdefault(data["id"], total_lines) + n = total_lines - line + if n > 0: + # Move the cursor up n lines. + sys.stderr.write("\033[{}A".format(n)) + # Clear line and move the cursor to the beginning of it. + sys.stderr.write("\033[2K\r") + sys.stderr.write( + "{}: {} {}\n".format( + data["id"], data["status"], data.get("progress", "") + ) + ) + if n > 1: + # Move the cursor down n - 1 lines, which, considering + # the carriage return on the last write, gets us back + # where we started. + sys.stderr.write("\033[{}B".format(n - 1)) + else: + status = status_line.get(data["id"]) + # Only print status changes. + if status != data["status"]: + sys.stderr.write("{}: {}\n".format(data["id"], data["status"])) + status_line[data["id"]] = data["status"] + else: + status_line = {} + sys.stderr.write("{}\n".format(data["status"])) + elif "stream" in data: + sys.stderr.write(data["stream"]) + elif "aux" in data: + sys.stderr.write(repr(data["aux"])) + elif "error" in data: + sys.stderr.write("{}\n".format(data["error"])) + # Sadly, docker doesn't give more than a plain string for errors, + # so the best we can do to propagate the error code from the command + # that failed is to parse the error message... + errcode = 1 + m = re.search(r"returned a non-zero code: (\d+)", data["error"]) + if m: + errcode = int(m.group(1)) + sys.exit(errcode) + else: + raise NotImplementedError(repr(data)) + sys.stderr.flush() + + +def docker_image(name, by_tag=False): + """ + Resolve in-tree prebuilt docker image to ``<registry>/<repository>@sha256:<digest>``, + or ``<registry>/<repository>:<tag>`` if `by_tag` is `True`. + """ + try: + with open(os.path.join(IMAGE_DIR, name, "REGISTRY")) as f: + registry = f.read().strip() + except IOError: + with open(os.path.join(IMAGE_DIR, "REGISTRY")) as f: + registry = f.read().strip() + + if not by_tag: + hashfile = os.path.join(IMAGE_DIR, name, "HASH") + try: + with open(hashfile) as f: + return "{}/{}@{}".format(registry, name, f.read().strip()) + except IOError: + raise Exception("Failed to read HASH file {}".format(hashfile)) + + try: + with open(os.path.join(IMAGE_DIR, name, "VERSION")) as f: + tag = f.read().strip() + except IOError: + tag = "latest" + return "{}/{}:{}".format(registry, name, tag) + + +class VoidWriter(object): + """A file object with write capabilities that does nothing with the written + data.""" + + def write(self, buf): + pass + + +def generate_context_hash(topsrcdir, image_path, image_name, args): + """Generates a sha256 hash for context directory used to build an image.""" + + return stream_context_tar( + topsrcdir, image_path, VoidWriter(), image_name, args=args + ) + + +class HashingWriter(object): + """A file object with write capabilities that hashes the written data at + the same time it passes down to a real file object.""" + + def __init__(self, writer): + self._hash = hashlib.sha256() + self._writer = writer + + def write(self, buf): + self._hash.update(buf) + self._writer.write(buf) + + def hexdigest(self): + return six.ensure_text(self._hash.hexdigest()) + + +def create_context_tar(topsrcdir, context_dir, out_path, image_name, args): + """Create a context tarball. + + A directory ``context_dir`` containing a Dockerfile will be assembled into + a gzipped tar file at ``out_path``. + + We also scan the source Dockerfile for special syntax that influences + context generation. + + If a line in the Dockerfile has the form ``# %include <path>``, + the relative path specified on that line will be matched against + files in the source repository and added to the context under the + path ``topsrcdir/``. If an entry is a directory, we add all files + under that directory. + + If a line in the Dockerfile has the form ``# %ARG <name>``, occurrences of + the string ``$<name>`` in subsequent lines are replaced with the value + found in the ``args`` argument. Exception: this doesn't apply to VOLUME + definitions. + + Returns the SHA-256 hex digest of the created archive. + """ + with open(out_path, "wb") as fh: + return stream_context_tar( + topsrcdir, + context_dir, + fh, + image_name=image_name, + args=args, + ) + + +def stream_context_tar(topsrcdir, context_dir, out_file, image_name, args): + """Like create_context_tar, but streams the tar file to the `out_file` file + object.""" + archive_files = {} + replace = [] + content = [] + + context_dir = os.path.join(topsrcdir, context_dir) + + for root, dirs, files in os.walk(context_dir): + for f in files: + source_path = os.path.join(root, f) + archive_path = source_path[len(context_dir) + 1 :] + archive_files[archive_path] = source_path + + # Parse Dockerfile for special syntax of extra files to include. + with io.open(os.path.join(context_dir, "Dockerfile"), "r") as fh: + for line in fh: + if line.startswith("# %ARG"): + p = line[len("# %ARG ") :].strip() + if not args or p not in args: + raise Exception("missing argument: {}".format(p)) + replace.append((re.compile(r"\${}\b".format(p)), args[p])) + continue + + for regexp, s in replace: + line = re.sub(regexp, s, line) + + content.append(line) + + if not line.startswith("# %include"): + continue + + p = line[len("# %include ") :].strip() + if os.path.isabs(p): + raise Exception("extra include path cannot be absolute: %s" % p) + + fs_path = os.path.normpath(os.path.join(topsrcdir, p)) + # Check for filesystem traversal exploits. + if not fs_path.startswith(topsrcdir): + raise Exception("extra include path outside topsrcdir: %s" % p) + + if not os.path.exists(fs_path): + raise Exception("extra include path does not exist: %s" % p) + + if os.path.isdir(fs_path): + for root, dirs, files in os.walk(fs_path): + for f in files: + source_path = os.path.join(root, f) + rel = source_path[len(fs_path) + 1 :] + archive_path = os.path.join("topsrcdir", p, rel) + archive_files[archive_path] = source_path + else: + archive_path = os.path.join("topsrcdir", p) + archive_files[archive_path] = fs_path + + archive_files["Dockerfile"] = GeneratedFile( + b"".join(six.ensure_binary(s) for s in content) + ) + + writer = HashingWriter(out_file) + create_tar_gz_from_files(writer, archive_files, "{}.tar".format(image_name)) + return writer.hexdigest() + + +class ImagePathsMap(Mapping): + """ImagePathsMap contains the mapping of Docker image names to their + context location in the filesystem. The register function allows Thunderbird + to define additional images under comm/taskcluster. + """ + + def __init__(self, config_path, image_dir=IMAGE_DIR): + config = load_yaml(GECKO, config_path) + self.__update_image_paths(config["jobs"], image_dir) + + def __getitem__(self, key): + return self.__dict__[key] + + def __iter__(self): + return iter(self.__dict__) + + def __len__(self): + return len(self.__dict__) + + def __update_image_paths(self, jobs, image_dir): + self.__dict__.update( + { + k: os.path.join(image_dir, v.get("definition", k)) + for k, v in jobs.items() + } + ) + + def register(self, jobs_config_path, image_dir): + """Register additional image_paths. In this case, there is no 'jobs' + key in the loaded YAML as this file is loaded via jobs-from in kind.yml.""" + jobs = load_yaml(GECKO, jobs_config_path) + self.__update_image_paths(jobs, image_dir) + + +image_paths = ImagePathsMap("taskcluster/ci/docker-image/kind.yml") + + +def image_path(name): + if name in image_paths: + return image_paths[name] + return os.path.join(IMAGE_DIR, name) + + +@memoize +def parse_volumes(image): + """Parse VOLUME entries from a Dockerfile for an image.""" + volumes = set() + + path = image_path(image) + + with open(os.path.join(path, "Dockerfile"), "rb") as fh: + for line in fh: + line = line.strip() + # We assume VOLUME definitions don't use %ARGS. + if not line.startswith(b"VOLUME "): + continue + + v = line.split(None, 1)[1] + if v.startswith(b"["): + raise ValueError( + "cannot parse array syntax for VOLUME; " + "convert to multiple entries" + ) + + volumes |= set([six.ensure_text(v) for v in v.split()]) + + return volumes diff --git a/taskcluster/taskgraph/util/hash.py b/taskcluster/taskgraph/util/hash.py new file mode 100644 index 0000000000..04b946be71 --- /dev/null +++ b/taskcluster/taskgraph/util/hash.py @@ -0,0 +1,58 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals +from mozbuild.util import memoize +import mozpack.path as mozpath +from mozversioncontrol import get_repository_object +import hashlib +import io +import six + + +@memoize +def hash_path(path): + """Hash a single file. + + Returns the SHA-256 hash in hex form. + """ + with io.open(path, mode="rb") as fh: + return hashlib.sha256(fh.read()).hexdigest() + + +@memoize +def get_file_finder(base_path): + return get_repository_object(base_path).get_tracked_files_finder() + + +def hash_paths(base_path, patterns): + """ + Give a list of path patterns, return a digest of the contents of all + the corresponding files, similarly to git tree objects or mercurial + manifests. + + Each file is hashed. The list of all hashes and file paths is then + itself hashed to produce the result. + """ + finder = get_file_finder(base_path) + h = hashlib.sha256() + files = {} + for pattern in patterns: + found = list(finder.find(pattern)) + if found: + files.update(found) + else: + raise Exception("%s did not match anything" % pattern) + for path in sorted(files.keys()): + if path.endswith((".pyc", ".pyd", ".pyo")): + continue + h.update( + six.ensure_binary( + "{} {}\n".format( + hash_path(mozpath.abspath(mozpath.join(base_path, path))), + mozpath.normsep(path), + ) + ) + ) + return h.hexdigest() diff --git a/taskcluster/taskgraph/util/hg.py b/taskcluster/taskgraph/util/hg.py new file mode 100644 index 0000000000..65dd412354 --- /dev/null +++ b/taskcluster/taskgraph/util/hg.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import logging + +import requests +import six +import subprocess +from redo import retry + +from mozbuild.util import memoize + +logger = logging.getLogger(__name__) + +PUSHLOG_CHANGESET_TMPL = ( + "{repository}/json-pushes?version=2&changeset={revision}&tipsonly=1" +) +PUSHLOG_PUSHES_TMPL = ( + "{repository}/json-pushes/?version=2&startID={push_id_start}&endID={push_id_end}" +) + + +def _query_pushlog(url): + response = retry( + requests.get, + attempts=5, + sleeptime=10, + args=(url,), + kwargs={"timeout": 60, "headers": {"User-Agent": "TaskCluster"}}, + ) + + return response.json()["pushes"] + + +def find_hg_revision_push_info(repository, revision): + """Given the parameters for this action and a revision, find the + pushlog_id of the revision.""" + url = PUSHLOG_CHANGESET_TMPL.format(repository=repository, revision=revision) + + pushes = _query_pushlog(url) + + if len(pushes) != 1: + raise RuntimeError( + "Found {} pushlog_ids, expected 1, for {} revision {}: {}".format( + len(pushes), repository, revision, pushes + ) + ) + + pushid = list(pushes.keys())[0] + return { + "pushdate": pushes[pushid]["date"], + "pushid": pushid, + "user": pushes[pushid]["user"], + } + + +@memoize +def get_push_data(repository, project, push_id_start, push_id_end): + url = PUSHLOG_PUSHES_TMPL.format( + repository=repository, + push_id_start=push_id_start - 1, + push_id_end=push_id_end, + ) + + try: + pushes = _query_pushlog(url) + + return { + push_id: pushes[str(push_id)] + for push_id in range(push_id_start, push_id_end + 1) + } + + # In the event of request times out, requests will raise a TimeoutError. + except requests.exceptions.Timeout: + logger.warning("json-pushes timeout") + + # In the event of a network problem (e.g. DNS failure, refused connection, etc), + # requests will raise a ConnectionError. + except requests.exceptions.ConnectionError: + logger.warning("json-pushes connection error") + + # In the event of the rare invalid HTTP response(e.g 404, 401), + # requests will raise an HTTPError exception + except requests.exceptions.HTTPError: + logger.warning("Bad Http response") + + # When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426) + except ValueError as error: + logger.warning("Invalid JSON, possible server error: {}".format(error)) + + # We just print the error out as a debug message if we failed to catch the exception above + except requests.exceptions.RequestException as error: + logger.warning(error) + + return None + + +def get_hg_revision_branch(root, revision): + """Given the parameters for a revision, find the hg_branch (aka + relbranch) of the revision.""" + return six.ensure_text( + subprocess.check_output( + [ + "hg", + "identify", + "-T", + "{branch}", + "--rev", + revision, + ], + cwd=root, + universal_newlines=True, + ) + ) + + +# For these functions, we assume that run-task has correctly checked out the +# revision indicated by GECKO_HEAD_REF, so all that remains is to see what the +# current revision is. Mercurial refers to that as `.`. +def get_hg_commit_message(root): + return six.ensure_text( + subprocess.check_output(["hg", "log", "-r", ".", "-T", "{desc}"], cwd=root) + ) + + +def calculate_head_rev(root): + return six.ensure_text( + subprocess.check_output(["hg", "log", "-r", ".", "-T", "{node}"], cwd=root) + ) diff --git a/taskcluster/taskgraph/util/keyed_by.py b/taskcluster/taskgraph/util/keyed_by.py new file mode 100644 index 0000000000..c86447f75f --- /dev/null +++ b/taskcluster/taskgraph/util/keyed_by.py @@ -0,0 +1,89 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +from .attributes import keymatch + + +def evaluate_keyed_by(value, item_name, attributes, defer=None): + """ + For values which can either accept a literal value, or be keyed by some + attributes, perform that lookup and return the result. + + For example, given item:: + + by-test-platform: + macosx-10.11/debug: 13 + win.*: 6 + default: 12 + + a call to `evaluate_keyed_by(item, 'thing-name', {'test-platform': 'linux96')` + would return `12`. + + The `item_name` parameter is used to generate useful error messages. + Items can be nested as deeply as desired:: + + by-test-platform: + win.*: + by-project: + ash: .. + cedar: .. + linux: 13 + default: 12 + + The `defer` parameter allows evaluating a by-* entry at a later time. In the + example above it's possible that the project attribute hasn't been set + yet, in which case we'd want to stop before resolving that subkey and then + call this function again later. This can be accomplished by setting + `defer=["project"]` in this example. + """ + while True: + if not isinstance(value, dict) or len(value) != 1: + return value + value_key = next(iter(value)) + if not value_key.startswith("by-"): + return value + + keyed_by = value_key[3:] # strip off 'by-' prefix + + if defer and keyed_by in defer: + return value + + key = attributes.get(keyed_by) + alternatives = next(iter(value.values())) + + if len(alternatives) == 1 and "default" in alternatives: + # Error out when only 'default' is specified as only alternatives, + # because we don't need to by-{keyed_by} there. + raise Exception( + "Keyed-by '{}' unnecessary with only value 'default' " + "found, when determining item {}".format(keyed_by, item_name) + ) + + if key is None: + if "default" in alternatives: + value = alternatives["default"] + continue + else: + raise Exception( + "No attribute {} and no value for 'default' found " + "while determining item {}".format(keyed_by, item_name) + ) + + matches = keymatch(alternatives, key) + if len(matches) > 1: + raise Exception( + "Multiple matching values for {} {!r} found while " + "determining item {}".format(keyed_by, key, item_name) + ) + elif matches: + value = matches[0] + continue + + raise Exception( + "No {} matching {!r} nor 'default' found while determining item {}".format( + keyed_by, key, item_name + ) + ) diff --git a/taskcluster/taskgraph/util/parameterization.py b/taskcluster/taskgraph/util/parameterization.py new file mode 100644 index 0000000000..0486773605 --- /dev/null +++ b/taskcluster/taskgraph/util/parameterization.py @@ -0,0 +1,107 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import re + +import six + +from taskgraph.util.time import json_time_from_now +from taskgraph.util.taskcluster import get_artifact_url + +TASK_REFERENCE_PATTERN = re.compile("<([^>]+)>") +ARTIFACT_REFERENCE_PATTERN = re.compile("<([^/]+)/([^>]+)>") + + +def _recurse(val, param_fns): + def recurse(val): + if isinstance(val, list): + return [recurse(v) for v in val] + elif isinstance(val, dict): + if len(val) == 1: + for param_key, param_fn in param_fns.items(): + if set(six.iterkeys(val)) == {param_key}: + return param_fn(val[param_key]) + return {k: recurse(v) for k, v in six.iteritems(val)} + else: + return val + + return recurse(val) + + +def resolve_timestamps(now, task_def): + """Resolve all instances of `{'relative-datestamp': '..'}` in the given task definition""" + return _recurse( + task_def, + { + "relative-datestamp": lambda v: json_time_from_now(v, now), + }, + ) + + +def resolve_task_references(label, task_def, task_id, decision_task_id, dependencies): + """Resolve all instances of + {'task-reference': '..<..>..'} + and + {'artifact-reference`: '..<dependency/artifact/path>..'} + in the given task definition, using the given dependencies + + """ + + def task_reference(val): + def repl(match): + key = match.group(1) + if key == "self": + return task_id + elif key == "decision": + return decision_task_id + try: + return dependencies[key] + except KeyError: + # handle escaping '<' + if key == "<": + return key + raise KeyError( + "task '{}' has no dependency named '{}'".format(label, key) + ) + + return TASK_REFERENCE_PATTERN.sub(repl, val) + + def artifact_reference(val): + def repl(match): + dependency, artifact_name = match.group(1, 2) + + if dependency == "self": + raise KeyError( + "task '{}' can't reference artifacts of self".format(label) + ) + elif dependency == "decision": + task_id = decision_task_id + else: + try: + task_id = dependencies[dependency] + except KeyError: + raise KeyError( + "task '{}' has no dependency named '{}'".format( + label, dependency + ) + ) + + assert artifact_name.startswith( + "public/" + ), "artifact-reference only supports public artifacts, not `{}`".format( + artifact_name + ) + return get_artifact_url(task_id, artifact_name) + + return ARTIFACT_REFERENCE_PATTERN.sub(repl, val) + + return _recurse( + task_def, + { + "task-reference": task_reference, + "artifact-reference": artifact_reference, + }, + ) diff --git a/taskcluster/taskgraph/util/partials.py b/taskcluster/taskgraph/util/partials.py new file mode 100644 index 0000000000..50a64d89de --- /dev/null +++ b/taskcluster/taskgraph/util/partials.py @@ -0,0 +1,301 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import logging + +import requests +import six + +import redo +from taskgraph.util.scriptworker import ( + BALROG_SCOPE_ALIAS_TO_PROJECT, + BALROG_SERVER_SCOPES, +) + +logger = logging.getLogger(__name__) + +PLATFORM_RENAMES = { + "windows2012-32": "win32", + "windows2012-64": "win64", + "windows2012-aarch64": "win64-aarch64", + "osx-cross": "macosx64", + "osx": "macosx64", +} + +BALROG_PLATFORM_MAP = { + "linux": ["Linux_x86-gcc3"], + "linux32": ["Linux_x86-gcc3"], + "linux64": ["Linux_x86_64-gcc3"], + "linux64-asan-reporter": ["Linux_x86_64-gcc3-asan"], + "macosx64": [ + "Darwin_x86_64-gcc3-u-i386-x86_64", + "Darwin_x86-gcc3-u-i386-x86_64", + "Darwin_aarch64-gcc3", + "Darwin_x86-gcc3", + "Darwin_x86_64-gcc3", + ], + "win32": ["WINNT_x86-msvc", "WINNT_x86-msvc-x86", "WINNT_x86-msvc-x64"], + "win64": ["WINNT_x86_64-msvc", "WINNT_x86_64-msvc-x64"], + "win64-asan-reporter": ["WINNT_x86_64-msvc-x64-asan"], + "win64-aarch64": [ + "WINNT_aarch64-msvc-aarch64", + ], +} + +FTP_PLATFORM_MAP = { + "Darwin_x86-gcc3": "mac", + "Darwin_x86-gcc3-u-i386-x86_64": "mac", + "Darwin_x86_64-gcc3": "mac", + "Darwin_x86_64-gcc3-u-i386-x86_64": "mac", + "Darwin_aarch64-gcc3": "mac", + "Linux_x86-gcc3": "linux-i686", + "Linux_x86_64-gcc3": "linux-x86_64", + "Linux_x86_64-gcc3-asan": "linux-x86_64-asan-reporter", + "WINNT_x86_64-msvc-x64-asan": "win64-asan-reporter", + "WINNT_x86-msvc": "win32", + "WINNT_x86-msvc-x64": "win32", + "WINNT_x86-msvc-x86": "win32", + "WINNT_x86_64-msvc": "win64", + "WINNT_x86_64-msvc-x64": "win64", + "WINNT_aarch64-msvc-aarch64": "win64-aarch64", +} + + +def get_balrog_platform_name(platform): + """Convert build platform names into balrog platform names. + + Remove known values instead to catch aarch64 and other platforms + that may be added. + """ + removals = ["-devedition", "-shippable"] + for remove in removals: + platform = platform.replace(remove, "") + return PLATFORM_RENAMES.get(platform, platform) + + +def _sanitize_platform(platform): + platform = get_balrog_platform_name(platform) + if platform not in BALROG_PLATFORM_MAP: + return platform + return BALROG_PLATFORM_MAP[platform][0] + + +def get_builds(release_history, platform, locale): + """Examine cached balrog release history and return the list of + builds we need to generate diffs from""" + platform = _sanitize_platform(platform) + return release_history.get(platform, {}).get(locale, {}) + + +def get_partials_artifacts_from_params(release_history, platform, locale): + platform = _sanitize_platform(platform) + return [ + (artifact, details.get("previousVersion", None)) + for artifact, details in release_history.get(platform, {}) + .get(locale, {}) + .items() + ] + + +def get_partials_info_from_params(release_history, platform, locale): + platform = _sanitize_platform(platform) + + artifact_map = {} + for k in release_history.get(platform, {}).get(locale, {}): + details = release_history[platform][locale][k] + attributes = ("buildid", "previousBuildNumber", "previousVersion") + artifact_map[k] = { + attr: details[attr] for attr in attributes if attr in details + } + return artifact_map + + +def _retry_on_http_errors(url, verify, params, errors): + if params: + params_str = "&".join("=".join([k, str(v)]) for k, v in six.iteritems(params)) + else: + params_str = "" + logger.info("Connecting to %s?%s", url, params_str) + for _ in redo.retrier(sleeptime=5, max_sleeptime=30, attempts=10): + try: + req = requests.get(url, verify=verify, params=params, timeout=10) + req.raise_for_status() + return req + except requests.HTTPError as e: + if e.response.status_code in errors: + logger.exception( + "Got HTTP %s trying to reach %s", e.response.status_code, url + ) + else: + raise + else: + raise + + +def get_sorted_releases(product, branch): + """Returns a list of release names from Balrog. + :param product: product name, AKA appName + :param branch: branch name, e.g. mozilla-central + :return: a sorted list of release names, most recent first. + """ + url = "{}/releases".format(_get_balrog_api_root(branch)) + params = { + "product": product, + # Adding -nightly-2 (2 stands for the beginning of build ID + # based on date) should filter out release and latest blobs. + # This should be changed to -nightly-3 in 3000 ;) + "name_prefix": "{}-{}-nightly-2".format(product, branch), + "names_only": True, + } + req = _retry_on_http_errors(url=url, verify=True, params=params, errors=[500]) + releases = req.json()["names"] + releases = sorted(releases, reverse=True) + return releases + + +def get_release_builds(release, branch): + url = "{}/releases/{}".format(_get_balrog_api_root(branch), release) + req = _retry_on_http_errors(url=url, verify=True, params=None, errors=[500]) + return req.json() + + +def _get_balrog_api_root(branch): + # Query into the scopes scriptworker uses to make sure we check against the same balrog server + # That our jobs would use. + scope = None + for alias, projects in BALROG_SCOPE_ALIAS_TO_PROJECT: + if branch in projects and alias in BALROG_SERVER_SCOPES: + scope = BALROG_SERVER_SCOPES[alias] + break + else: + scope = BALROG_SERVER_SCOPES["default"] + + if scope == "balrog:server:dep": + return "https://stage.balrog.nonprod.cloudops.mozgcp.net/api/v1" + else: + return "https://aus5.mozilla.org/api/v1" + + +def find_localtest(fileUrls): + for channel in fileUrls: + if "-localtest" in channel: + return channel + + +def populate_release_history( + product, branch, maxbuilds=4, maxsearch=10, partial_updates=None +): + # Assuming we are using release branches when we know the list of previous + # releases in advance + if partial_updates: + return _populate_release_history( + product, branch, partial_updates=partial_updates + ) + else: + return _populate_nightly_history( + product, branch, maxbuilds=maxbuilds, maxsearch=maxsearch + ) + + +def _populate_nightly_history(product, branch, maxbuilds=4, maxsearch=10): + """Find relevant releases in Balrog + Not all releases have all platforms and locales, due + to Taskcluster migration. + + Args: + product (str): capitalized product name, AKA appName, e.g. Firefox + branch (str): branch name (mozilla-central) + maxbuilds (int): Maximum number of historical releases to populate + maxsearch(int): Traverse at most this many releases, to avoid + working through the entire history. + Returns: + json object based on data from balrog api + + results = { + 'platform1': { + 'locale1': { + 'buildid1': mar_url, + 'buildid2': mar_url, + 'buildid3': mar_url, + }, + 'locale2': { + 'target.partial-1.mar': {'buildid1': 'mar_url'}, + } + }, + 'platform2': { + } + } + """ + last_releases = get_sorted_releases(product, branch) + + partial_mar_tmpl = "target.partial-{}.mar" + + builds = dict() + for release in last_releases[:maxsearch]: + # maxbuilds in all categories, don't make any more queries + full = len(builds) > 0 and all( + len(builds[platform][locale]) >= maxbuilds + for platform in builds + for locale in builds[platform] + ) + if full: + break + history = get_release_builds(release, branch) + + for platform in history["platforms"]: + if "alias" in history["platforms"][platform]: + continue + if platform not in builds: + builds[platform] = dict() + for locale in history["platforms"][platform]["locales"]: + if locale not in builds[platform]: + builds[platform][locale] = dict() + if len(builds[platform][locale]) >= maxbuilds: + continue + buildid = history["platforms"][platform]["locales"][locale]["buildID"] + url = history["platforms"][platform]["locales"][locale]["completes"][0][ + "fileUrl" + ] + nextkey = len(builds[platform][locale]) + 1 + builds[platform][locale][partial_mar_tmpl.format(nextkey)] = { + "buildid": buildid, + "mar_url": url, + } + return builds + + +def _populate_release_history(product, branch, partial_updates): + builds = dict() + for version, release in six.iteritems(partial_updates): + prev_release_blob = "{product}-{version}-build{build_number}".format( + product=product, version=version, build_number=release["buildNumber"] + ) + partial_mar_key = "target-{version}.partial.mar".format(version=version) + history = get_release_builds(prev_release_blob, branch) + # use one of the localtest channels to avoid relying on bouncer + localtest = find_localtest(history["fileUrls"]) + url_pattern = history["fileUrls"][localtest]["completes"]["*"] + + for platform in history["platforms"]: + if "alias" in history["platforms"][platform]: + continue + if platform not in builds: + builds[platform] = dict() + for locale in history["platforms"][platform]["locales"]: + if locale not in builds[platform]: + builds[platform][locale] = dict() + buildid = history["platforms"][platform]["locales"][locale]["buildID"] + url = url_pattern.replace( + "%OS_FTP%", FTP_PLATFORM_MAP[platform] + ).replace("%LOCALE%", locale) + builds[platform][locale][partial_mar_key] = { + "buildid": buildid, + "mar_url": url, + "previousVersion": version, + "previousBuildNumber": release["buildNumber"], + "product": product, + } + return builds diff --git a/taskcluster/taskgraph/util/partners.py b/taskcluster/taskgraph/util/partners.py new file mode 100644 index 0000000000..1d820bf55e --- /dev/null +++ b/taskcluster/taskgraph/util/partners.py @@ -0,0 +1,557 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +from copy import deepcopy +import json +import logging +import os +from redo import retry +import requests +import xml.etree.ElementTree as ET + +from taskgraph.util.attributes import release_level +from taskgraph.util.schema import resolve_keyed_by +import six +import yaml + +# Suppress chatty requests logging +logging.getLogger("requests").setLevel(logging.WARNING) + +log = logging.getLogger(__name__) + +GITHUB_API_ENDPOINT = "https://api.github.com/graphql" + +""" +LOGIN_QUERY, MANIFEST_QUERY, and REPACK_CFG_QUERY are all written to the Github v4 API, +which users GraphQL. See https://developer.github.com/v4/ +""" + +LOGIN_QUERY = """query { + viewer { + login + name + } +} +""" + +# Returns the contents of default.xml from a manifest repository +MANIFEST_QUERY = """query { + repository(owner:"%(owner)s", name:"%(repo)s") { + object(expression: "master:%(file)s") { + ... on Blob { + text + } + } + } +} +""" +# Example response: +# { +# "data": { +# "repository": { +# "object": { +# "text": "<?xml version=\"1.0\" ?>\n<manifest>\n " + +# "<remote fetch=\"git@github.com:mozilla-partners/\" name=\"mozilla-partners\"/>\n " + +# "<remote fetch=\"git@github.com:mozilla/\" name=\"mozilla\"/>\n\n " + +# "<project name=\"repack-scripts\" path=\"scripts\" remote=\"mozilla-partners\" " + +# "revision=\"master\"/>\n <project name=\"build-tools\" path=\"scripts/tools\" " + +# "remote=\"mozilla\" revision=\"master\"/>\n <project name=\"mozilla-EME-free\" " + +# "path=\"partners/mozilla-EME-free\" remote=\"mozilla-partners\" " + +# "revision=\"master\"/>\n</manifest>\n" +# } +# } +# } +# } + +# Returns the contents of desktop/*/repack.cfg for a partner repository +REPACK_CFG_QUERY = """query{ + repository(owner:"%(owner)s", name:"%(repo)s") { + object(expression: "%(revision)s:desktop/"){ + ... on Tree { + entries { + name + object { + ... on Tree { + entries { + name + object { + ... on Blob { + text + } + } + } + } + } + } + } + } + } +} +""" +# Example response: +# { +# "data": { +# "repository": { +# "object": { +# "entries": [ +# { +# "name": "mozilla-EME-free", +# "object": { +# "entries": [ +# { +# "name": "distribution", +# "object": {} +# }, +# { +# "name": "repack.cfg", +# "object": { +# "text": "aus=\"mozilla-EMEfree\"\ndist_id=\"mozilla-EMEfree\"\n" + +# "dist_version=\"1.0\"\nlinux-i686=true\nlinux-x86_64=true\n" + +# " locales=\"ach af de en-US\"\nmac=true\nwin32=true\nwin64=true\n" + +# "output_dir=\"%(platform)s-EME-free/%(locale)s\"\n\n" + +# "# Upload params\nbucket=\"net-mozaws-prod-delivery-firefox\"\n" + +# "upload_to_candidates=true\n" +# } +# } +# ] +# } +# } +# ] +# } +# } +# } +# } + +# Map platforms in repack.cfg into their equivalents in taskcluster +TC_PLATFORM_PER_FTP = { + "linux-i686": "linux-shippable", + "linux-x86_64": "linux64-shippable", + "mac": "macosx64-shippable", + "win32": "win32-shippable", + "win64": "win64-shippable", + "win64-aarch64": "win64-aarch64-shippable", +} + +TASKCLUSTER_PROXY_SECRET_ROOT = "http://taskcluster/secrets/v1/secret" + +LOCALES_FILE = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), + "browser", + "locales", + "l10n-changesets.json", +) + +# cache data at the module level +partner_configs = {} + + +def get_token(params): + """We use a Personal Access Token from Github to lookup partner config. No extra scopes are + needed on the token to read public repositories, but need the 'repo' scope to see private + repositories. This is not fine grained and also grants r/w access, but is revoked at the repo + level. + """ + + # Allow for local taskgraph debugging + if os.environ.get("GITHUB_API_TOKEN"): + return os.environ["GITHUB_API_TOKEN"] + + # The 'usual' method - via taskClusterProxy for decision tasks + url = "{secret_root}/project/releng/gecko/build/level-{level}/partner-github-api".format( + secret_root=TASKCLUSTER_PROXY_SECRET_ROOT, **params + ) + try: + resp = retry( + requests.get, + attempts=2, + sleeptime=10, + args=(url,), + kwargs={"timeout": 60, "headers": ""}, + ) + j = resp.json() + return j["secret"]["key"] + except (requests.ConnectionError, ValueError, KeyError): + raise RuntimeError("Could not get Github API token to lookup partner data") + + +def query_api(query, token): + """ Make a query with a Github auth header, returning the json """ + headers = {"Authorization": "bearer %s" % token} + r = requests.post(GITHUB_API_ENDPOINT, json={"query": query}, headers=headers) + r.raise_for_status() + + j = r.json() + if "errors" in j: + raise RuntimeError("Github query error - %s", j["errors"]) + return j + + +def check_login(token): + log.debug("Checking we have a valid login") + query_api(LOGIN_QUERY, token) + + +def get_repo_params(repo): + """ Parse the organisation and repo name from an https or git url for a repo """ + if repo.startswith("https"): + # eg https://github.com/mozilla-partners/mozilla-EME-free + return repo.rsplit("/", 2)[-2:] + elif repo.startswith("git@"): + # eg git@github.com:mozilla-partners/mailru.git + repo = repo.replace(".git", "") + return repo.split(":")[-1].split("/") + + +def get_partners(manifestRepo, token): + """Given the url to a manifest repository, retrieve the default.xml and parse it into a + list of partner repos. + """ + log.debug("Querying for manifest default.xml in %s", manifestRepo) + owner, repo = get_repo_params(manifestRepo) + query = MANIFEST_QUERY % {"owner": owner, "repo": repo, "file": "default.xml"} + raw_manifest = query_api(query, token) + log.debug("Raw manifest: %s", raw_manifest) + if not raw_manifest["data"]["repository"]: + raise RuntimeError( + "Couldn't load partner manifest at %s, insufficient permissions ?" + % manifestRepo + ) + e = ET.fromstring(raw_manifest["data"]["repository"]["object"]["text"]) + + remotes = {} + partners = {} + for child in e: + if child.tag == "remote": + name = child.attrib["name"] + url = child.attrib["fetch"] + remotes[name] = url + log.debug("Added remote %s at %s", name, url) + elif child.tag == "project": + # we don't need to check any code repos + if "scripts" in child.attrib["path"]: + continue + owner, _ = get_repo_params(remotes[child.attrib["remote"]] + "_") + partner_url = { + "owner": owner, + "repo": child.attrib["name"], + "revision": child.attrib["revision"], + } + partners[child.attrib["name"]] = partner_url + log.debug( + "Added partner %s at revision %s" + % (partner_url["repo"], partner_url["revision"]) + ) + return partners + + +def parse_config(data): + """Parse a single repack.cfg file into a python dictionary. + data is contents of the file, in "foo=bar\nbaz=buzz" style. We do some translation on + locales and platforms data, otherwise passthrough + """ + ALLOWED_KEYS = ( + "locales", + "platforms", + "upload_to_candidates", + "repack_stub_installer", + "publish_to_releases", + ) + config = {"platforms": []} + for l in data.splitlines(): + if "=" in l: + l = str(l) + key, value = l.split("=", 1) + value = value.strip("'\"").rstrip("'\"") + if key in TC_PLATFORM_PER_FTP.keys(): + if value.lower() == "true": + config["platforms"].append(TC_PLATFORM_PER_FTP[key]) + continue + if key not in ALLOWED_KEYS: + continue + if key == "locales": + # a list please + value = value.split(" ") + config[key] = value + return config + + +def get_repack_configs(repackRepo, token): + """ For a partner repository, retrieve all the repack.cfg files and parse them into a dict """ + log.debug("Querying for configs in %s", repackRepo) + query = REPACK_CFG_QUERY % repackRepo + raw_configs = query_api(query, token) + raw_configs = raw_configs["data"]["repository"]["object"]["entries"] + + configs = {} + for sub_config in raw_configs: + name = sub_config["name"] + for file in sub_config["object"].get("entries", []): + if file["name"] != "repack.cfg": + continue + configs[name] = parse_config(file["object"]["text"]) + return configs + + +def get_attribution_config(manifestRepo, token): + log.debug("Querying for manifest attribution_config.yml in %s", manifestRepo) + owner, repo = get_repo_params(manifestRepo) + query = MANIFEST_QUERY % { + "owner": owner, + "repo": repo, + "file": "attribution_config.yml", + } + raw_manifest = query_api(query, token) + if not raw_manifest["data"]["repository"]: + raise RuntimeError( + "Couldn't load partner manifest at %s, insufficient permissions ?" + % manifestRepo + ) + # no file has been set up, gracefully continue + if raw_manifest["data"]["repository"]["object"] is None: + log.debug("No attribution_config.yml file found") + return {} + + return yaml.safe_load(raw_manifest["data"]["repository"]["object"]["text"]) + + +def get_partner_config_by_url(manifest_url, kind, token, partner_subset=None): + """Retrieve partner data starting from the manifest url, which points to a repository + containing a default.xml that is intended to be drive the Google tool 'repo'. It + descends into each partner repo to lookup and parse the repack.cfg file(s). + + If partner_subset is a list of sub_config names only return data for those. + + Supports caching data by kind to avoid repeated requests, relying on the related kinds for + partner repacking, signing, repackage, repackage signing all having the same kind prefix. + """ + if not manifest_url: + raise RuntimeError("Manifest url for {} not defined".format(kind)) + if kind not in partner_configs: + log.info("Looking up data for %s from %s", kind, manifest_url) + check_login(token) + if kind == "release-partner-attribution": + partner_configs[kind] = get_attribution_config(manifest_url, token) + else: + partners = get_partners(manifest_url, token) + + partner_configs[kind] = {} + for partner, partner_url in partners.items(): + if partner_subset and partner not in partner_subset: + continue + partner_configs[kind][partner] = get_repack_configs(partner_url, token) + + return partner_configs[kind] + + +def check_if_partners_enabled(config, tasks): + if ( + ( + config.params["release_enable_partner_repack"] + and config.kind.startswith("release-partner-repack") + ) + or ( + config.params["release_enable_partner_attribution"] + and config.kind.startswith("release-partner-attribution") + ) + or ( + config.params["release_enable_emefree"] + and config.kind.startswith("release-eme-free-") + ) + ): + for task in tasks: + yield task + + +def get_partner_config_by_kind(config, kind): + """Retrieve partner data starting from the manifest url, which points to a repository + containing a default.xml that is intended to be drive the Google tool 'repo'. It + descends into each partner repo to lookup and parse the repack.cfg file(s). + + Supports caching data by kind to avoid repeated requests, relying on the related kinds for + partner repacking, signing, repackage, repackage signing all having the same kind prefix. + """ + partner_subset = config.params["release_partners"] + partner_configs = config.params["release_partner_config"] or {} + + # TODO eme-free should be a partner; we shouldn't care about per-kind + for k in partner_configs: + if kind.startswith(k): + kind_config = partner_configs[k] + break + else: + return {} + # if we're only interested in a subset of partners we remove the rest + if partner_subset: + if kind.startswith("release-partner-repack"): + # TODO - should be fatal to have an unknown partner in partner_subset + for partner in [p for p in kind_config.keys() if p not in partner_subset]: + del kind_config[partner] + elif kind.startswith("release-partner-attribution") and isinstance( + kind_config, dict + ): + all_configs = deepcopy(kind_config.get("configs", [])) + kind_config["configs"] = [] + for this_config in all_configs: + if this_config["campaign"] in partner_subset: + kind_config["configs"].append(this_config) + return kind_config + + +def _fix_subpartner_locales(orig_config, all_locales): + subpartner_config = deepcopy(orig_config) + # Get an ordered list of subpartner locales that is a subset of all_locales + subpartner_config["locales"] = sorted( + list(set(orig_config["locales"]) & set(all_locales)) + ) + return subpartner_config + + +def fix_partner_config(orig_config): + pc = {} + with open(LOCALES_FILE, "r") as fh: + all_locales = list(json.load(fh).keys()) + # l10n-changesets.json doesn't include en-US, but the repack list does + if "en-US" not in all_locales: + all_locales.append("en-US") + for kind, kind_config in six.iteritems(orig_config): + if kind == "release-partner-attribution": + pc[kind] = {} + if kind_config: + pc[kind] = {"defaults": kind_config["defaults"]} + for config in kind_config["configs"]: + # Make sure our locale list is a subset of all_locales + pc[kind].setdefault("configs", []).append( + _fix_subpartner_locales(config, all_locales) + ) + else: + for partner, partner_config in six.iteritems(kind_config): + for subpartner, subpartner_config in six.iteritems(partner_config): + # get rid of empty subpartner configs + if not subpartner_config: + continue + # Make sure our locale list is a subset of all_locales + pc.setdefault(kind, {}).setdefault(partner, {})[ + subpartner + ] = _fix_subpartner_locales(subpartner_config, all_locales) + return pc + + +# seems likely this exists elsewhere already +def get_ftp_platform(platform): + if platform.startswith("win32"): + return "win32" + elif platform.startswith("win64-aarch64"): + return "win64-aarch64" + elif platform.startswith("win64"): + return "win64" + elif platform.startswith("macosx"): + return "mac" + elif platform.startswith("linux-"): + return "linux-i686" + elif platform.startswith("linux64"): + return "linux-x86_64" + else: + raise ValueError("Unimplemented platform {}".format(platform)) + + +# Ugh +def locales_per_build_platform(build_platform, locales): + if build_platform.startswith("mac"): + exclude = ["ja"] + else: + exclude = ["ja-JP-mac"] + return [locale for locale in locales if locale not in exclude] + + +def get_partner_url_config(parameters, graph_config): + partner_url_config = deepcopy(graph_config["partner-urls"]) + substitutions = { + "release-product": parameters["release_product"], + "release-level": release_level(parameters["project"]), + "release-type": parameters["release_type"], + } + resolve_keyed_by( + partner_url_config, + "release-eme-free-repack", + "eme-free manifest_url", + **substitutions + ) + resolve_keyed_by( + partner_url_config, + "release-partner-repack", + "partner manifest url", + **substitutions + ) + resolve_keyed_by( + partner_url_config, + "release-partner-attribution", + "partner attribution url", + **substitutions + ) + return partner_url_config + + +def get_repack_ids_by_platform(config, build_platform): + partner_config = get_partner_config_by_kind(config, config.kind) + combinations = [] + for partner, subconfigs in partner_config.items(): + for sub_config_name, sub_config in subconfigs.items(): + if build_platform not in sub_config.get("platforms", []): + continue + locales = locales_per_build_platform( + build_platform, sub_config.get("locales", []) + ) + for locale in locales: + combinations.append("{}/{}/{}".format(partner, sub_config_name, locale)) + return sorted(combinations) + + +def get_partners_to_be_published(config): + # hardcoded kind because release-bouncer-aliases doesn't match otherwise + partner_config = get_partner_config_by_kind(config, "release-partner-repack") + partners = [] + for partner, subconfigs in partner_config.items(): + for sub_config_name, sub_config in subconfigs.items(): + if sub_config.get("publish_to_releases"): + partners.append((partner, sub_config_name, sub_config["platforms"])) + return partners + + +def apply_partner_priority(config, jobs): + priority = None + # Reduce the priority of the partner repack jobs because they don't block QE. Meanwhile + # leave EME-free jobs alone because they do, and they'll get the branch priority like the rest + # of the release. Only bother with this in production, not on staging releases on try. + # medium is the same as mozilla-central, see taskcluster/ci/config.yml. ie higher than + # integration branches because we don't want to wait a lot for the graph to be done, but + # for multiple releases the partner tasks always wait for non-partner. + if ( + config.kind.startswith( + ("release-partner-repack", "release-partner-attribution") + ) + and config.params.release_level() == "production" + ): + priority = "medium" + for job in jobs: + if priority: + job["priority"] = priority + yield job + + +def generate_attribution_code(defaults, partner): + params = { + "medium": defaults["medium"], + "source": defaults["source"], + "campaign": partner["campaign"], + "content": partner["content"], + } + if partner.get("variation"): + params["variation"] = partner["variation"] + if partner.get("experiment"): + params["experiment"] = partner["experiment"] + + code = six.moves.urllib.parse.urlencode(params) + return code diff --git a/taskcluster/taskgraph/util/perfile.py b/taskcluster/taskgraph/util/perfile.py new file mode 100644 index 0000000000..5944593e26 --- /dev/null +++ b/taskcluster/taskgraph/util/perfile.py @@ -0,0 +1,103 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import itertools +import json +import logging +import math + +from mozbuild.util import memoize +from mozpack.path import match as mozpackmatch +from taskgraph import files_changed +import taskgraph +from .. import GECKO + +logger = logging.getLogger(__name__) + + +@memoize +def perfile_number_of_chunks(is_try, try_task_config, head_repository, head_rev, type): + if taskgraph.fast and not is_try: + # When iterating on taskgraph changes, the exact number of chunks that + # test-verify runs usually isn't important, so skip it when going fast. + return 3 + tests_per_chunk = 10.0 + if type.startswith("test-coverage"): + tests_per_chunk = 30.0 + + if type.startswith("test-verify-wpt") or type.startswith("test-coverage-wpt"): + file_patterns = [ + "testing/web-platform/tests/**", + "testing/web-platform/mozilla/tests/**", + ] + elif type.startswith("test-verify-gpu") or type.startswith("test-coverage-gpu"): + file_patterns = [ + "**/*webgl*/**/test_*", + "**/dom/canvas/**/test_*", + "**/gfx/tests/**/test_*", + "**/devtools/canvasdebugger/**/browser_*", + "**/reftest*/**", + ] + elif type.startswith("test-verify") or type.startswith("test-coverage"): + file_patterns = [ + "**/test_*", + "**/browser_*", + "**/crashtest*/**", + "js/src/tests/test/**", + "js/src/tests/non262/**", + "js/src/tests/test262/**", + ] + else: + # Returning 0 means no tests to run, this captures non test-verify tasks + return 1 + + changed_files = set() + if try_task_config: + suite_to_paths = json.loads(try_task_config) + specified_files = itertools.chain.from_iterable(suite_to_paths.values()) + changed_files.update(specified_files) + + if is_try: + changed_files.update(files_changed.get_locally_changed_files(GECKO)) + else: + changed_files.update(files_changed.get_changed_files(head_repository, head_rev)) + + test_count = 0 + for pattern in file_patterns: + for path in changed_files: + # TODO: consider running tests if a manifest changes + if path.endswith(".list") or path.endswith(".ini"): + continue + if path.endswith("^headers^"): + continue + + if mozpackmatch(path, pattern): + gpu = False + if type == "test-verify-e10s" or type == "test-coverage-e10s": + # file_patterns for test-verify will pick up some gpu tests, lets ignore + # in the case of reftest, we will not have any in the regular case + gpu_dirs = [ + "dom/canvas", + "gfx/tests", + "devtools/canvasdebugger", + "webgl", + ] + for gdir in gpu_dirs: + if len(path.split(gdir)) > 1: + gpu = True + + if not gpu: + test_count += 1 + + chunks = test_count / tests_per_chunk + chunks = int(math.ceil(chunks)) + + # Never return 0 chunks on try, so that per-file tests can be pushed to try with + # an explicit path, and also so "empty" runs can be checked on try. + if is_try and chunks == 0: + chunks = 1 + + return chunks diff --git a/taskcluster/taskgraph/util/platforms.py b/taskcluster/taskgraph/util/platforms.py new file mode 100644 index 0000000000..8f68eb8809 --- /dev/null +++ b/taskcluster/taskgraph/util/platforms.py @@ -0,0 +1,61 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import re + +from .attributes import keymatch + +# platform family is extracted from build platform by taking the alphabetic prefix +# and then translating win -> windows +_platform_re = re.compile(r"^[a-z]*") +_renames = {"win": "windows"} + + +_archive_formats = { + "linux": ".tar.bz2", + "macosx": ".tar.gz", + "windows": ".zip", +} + +_executable_extension = { + "linux": "", + "macosx": "", + "windows": ".exe", +} + +_architectures = { + r"linux\b.*": "x86", + r"linux64\b.*": "x86_64", + r"macosx64\b.*": "macos-x86_64-aarch64", + r"win32\b.*": "x86", + r"win64\b(?!-aarch64).*": "x86_64", + r"win64-aarch64\b.*": "aarch64", +} + + +def platform_family(build_platform): + """Given a build platform, return the platform family (linux, macosx, etc.)""" + family = _platform_re.match(build_platform).group(0) + return _renames.get(family, family) + + +def archive_format(build_platform): + """Given a build platform, return the archive format used on the platform.""" + return _archive_formats[platform_family(build_platform)] + + +def executable_extension(build_platform): + """Given a build platform, return the executable extension used on the platform.""" + return _executable_extension[platform_family(build_platform)] + + +def architecture(build_platform): + matches = keymatch(_architectures, build_platform) + if len(matches) == 1: + return matches[0] + raise Exception( + "Could not determine architecture of platform `{}`.".format(build_platform) + ) diff --git a/taskcluster/taskgraph/util/python_path.py b/taskcluster/taskgraph/util/python_path.py new file mode 100644 index 0000000000..c8bc11c5dd --- /dev/null +++ b/taskcluster/taskgraph/util/python_path.py @@ -0,0 +1,56 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import inspect +import os + + +def find_object(path): + """ + Find a Python object given a path of the form <modulepath>:<objectpath>. + Conceptually equivalent to + + def find_object(modulepath, objectpath): + import <modulepath> as mod + return mod.<objectpath> + """ + if path.count(":") != 1: + raise ValueError( + 'python path {!r} does not have the form "module:object"'.format(path) + ) + + modulepath, objectpath = path.split(":") + obj = __import__(modulepath) + for a in modulepath.split(".")[1:]: + obj = getattr(obj, a) + for a in objectpath.split("."): + obj = getattr(obj, a) + return obj + + +def import_sibling_modules(exceptions=None): + """ + Import all Python modules that are siblings of the calling module. + + Args: + exceptions (list): A list of file names to exclude (caller and + __init__.py are implicitly excluded). + """ + frame = inspect.stack()[1] + mod = inspect.getmodule(frame[0]) + + name = os.path.basename(mod.__file__) + excs = set(["__init__.py", name]) + if exceptions: + excs.update(exceptions) + + modpath = mod.__name__ + if not name.startswith("__init__.py"): + modpath = modpath.rsplit(".", 1)[0] + + for f in os.listdir(os.path.dirname(mod.__file__)): + if f.endswith(".py") and f not in excs: + __import__(modpath + "." + f[:-3]) diff --git a/taskcluster/taskgraph/util/schema.py b/taskcluster/taskgraph/util/schema.py new file mode 100644 index 0000000000..a095132a94 --- /dev/null +++ b/taskcluster/taskgraph/util/schema.py @@ -0,0 +1,228 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import re +import pprint +import collections +import voluptuous + +from six import text_type, iteritems + +import taskgraph + +from .keyed_by import evaluate_keyed_by + + +def validate_schema(schema, obj, msg_prefix): + """ + Validate that object satisfies schema. If not, generate a useful exception + beginning with msg_prefix. + """ + if taskgraph.fast: + return + try: + schema(obj) + except voluptuous.MultipleInvalid as exc: + msg = [msg_prefix] + for error in exc.errors: + msg.append(str(error)) + raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) + + +def optionally_keyed_by(*arguments): + """ + Mark a schema value as optionally keyed by any of a number of fields. The + schema is the last argument, and the remaining fields are taken to be the + field names. For example: + + 'some-value': optionally_keyed_by( + 'test-platform', 'build-platform', + Any('a', 'b', 'c')) + + The resulting schema will allow nesting of `by-test-platform` and + `by-build-platform` in either order. + """ + schema = arguments[-1] + fields = arguments[:-1] + + def validator(obj): + if isinstance(obj, dict) and len(obj) == 1: + k, v = list(obj.items())[0] + if k.startswith("by-") and k[len("by-") :] in fields: + res = {} + for kk, vv in v.items(): + try: + res[kk] = validator(vv) + except voluptuous.Invalid as e: + e.prepend([k, kk]) + raise + return res + return Schema(schema)(obj) + + return validator + + +def resolve_keyed_by(item, field, item_name, defer=None, **extra_values): + """ + For values which can either accept a literal value, or be keyed by some + other attribute of the item, perform that lookup and replacement in-place + (modifying `item` directly). The field is specified using dotted notation + to traverse dictionaries. + + For example, given item:: + + job: + test-platform: linux128 + chunks: + by-test-platform: + macosx-10.11/debug: 13 + win.*: 6 + default: 12 + + a call to `resolve_keyed_by(item, 'job.chunks', item['thing-name'])` + would mutate item in-place to:: + + job: + test-platform: linux128 + chunks: 12 + + The `item_name` parameter is used to generate useful error messages. + + If extra_values are supplied, they represent additional values available + for reference from by-<field>. + + Items can be nested as deeply as the schema will allow:: + + chunks: + by-test-platform: + win.*: + by-project: + ash: .. + cedar: .. + linux: 13 + default: 12 + + The `defer` parameter allows evaluating a by-* entry at a later time. In the + example above it's possible that the project attribute hasn't been set + yet, in which case we'd want to stop before resolving that subkey and then + call this function again later. This can be accomplished by setting + `defer=["project"]` in this example. + """ + # find the field, returning the item unchanged if anything goes wrong + container, subfield = item, field + while "." in subfield: + f, subfield = subfield.split(".", 1) + if f not in container: + return item + container = container[f] + if not isinstance(container, dict): + return item + + if subfield not in container: + return item + + container[subfield] = evaluate_keyed_by( + value=container[subfield], + item_name="`{}` in `{}`".format(field, item_name), + defer=defer, + attributes=dict(item, **extra_values), + ) + + return item + + +# Schemas for YAML files should use dashed identifiers by default. If there are +# components of the schema for which there is a good reason to use another format, +# they can be whitelisted here. +WHITELISTED_SCHEMA_IDENTIFIERS = [ + # upstream-artifacts are handed directly to scriptWorker, which expects interCaps + lambda path: "[{!r}]".format("upstream-artifacts") in path, + lambda path: ( + "[{!r}]".format("test_name") in path + or "[{!r}]".format("json_location") in path + or "[{!r}]".format("video_location") in path + ), +] + + +def check_schema(schema): + identifier_re = re.compile("^[a-z][a-z0-9-]*$") + + def whitelisted(path): + return any(f(path) for f in WHITELISTED_SCHEMA_IDENTIFIERS) + + def iter(path, sch): + def check_identifier(path, k): + if k in (text_type, text_type, voluptuous.Extra): + pass + elif isinstance(k, voluptuous.NotIn): + pass + elif isinstance(k, text_type): + if not identifier_re.match(k) and not whitelisted(path): + raise RuntimeError( + "YAML schemas should use dashed lower-case identifiers, " + "not {!r} @ {}".format(k, path) + ) + elif isinstance(k, (voluptuous.Optional, voluptuous.Required)): + check_identifier(path, k.schema) + elif isinstance(k, (voluptuous.Any, voluptuous.All)): + for v in k.validators: + check_identifier(path, v) + elif not whitelisted(path): + raise RuntimeError( + "Unexpected type in YAML schema: {} @ {}".format( + type(k).__name__, path + ) + ) + + if isinstance(sch, collections.Mapping): + for k, v in iteritems(sch): + child = "{}[{!r}]".format(path, k) + check_identifier(child, k) + iter(child, v) + elif isinstance(sch, (list, tuple)): + for i, v in enumerate(sch): + iter("{}[{}]".format(path, i), v) + elif isinstance(sch, voluptuous.Any): + for v in sch.validators: + iter(path, v) + + iter("schema", schema.schema) + + +class Schema(voluptuous.Schema): + """ + Operates identically to voluptuous.Schema, but applying some taskgraph-specific checks + in the process. + """ + + def __init__(self, *args, **kwargs): + super(Schema, self).__init__(*args, **kwargs) + if not taskgraph.fast: + check_schema(self) + + def extend(self, *args, **kwargs): + schema = super(Schema, self).extend(*args, **kwargs) + check_schema(schema) + # We want twice extend schema to be checked too. + schema.__class__ = Schema + return schema + + def _compile(self, schema): + if taskgraph.fast: + return + return super(Schema, self)._compile(schema) + + def __getitem__(self, item): + return self.schema[item] + + +# shortcut for a string where task references are allowed +taskref_or_string = voluptuous.Any( + text_type, + {voluptuous.Required("task-reference"): text_type}, + {voluptuous.Required("artifact-reference"): text_type}, +) diff --git a/taskcluster/taskgraph/util/scriptworker.py b/taskcluster/taskgraph/util/scriptworker.py new file mode 100644 index 0000000000..af94432295 --- /dev/null +++ b/taskcluster/taskgraph/util/scriptworker.py @@ -0,0 +1,819 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +"""Make scriptworker.cot.verify more user friendly by making scopes dynamic. + +Scriptworker uses certain scopes to determine which sets of credentials to use. +Certain scopes are restricted by branch in chain of trust verification, and are +checked again at the script level. This file provides functions to adjust +these scopes automatically by project; this makes pushing to try, forking a +project branch, and merge day uplifts more user friendly. + +In the future, we may adjust scopes by other settings as well, e.g. different +scopes for `push-to-candidates` rather than `push-to-releases`, even if both +happen on mozilla-beta and mozilla-release. + +Additional configuration is found in the :ref:`graph config <taskgraph-graph-config>`. +""" +from __future__ import absolute_import, print_function, unicode_literals +import functools +import json +import os +import itertools +from copy import deepcopy +from datetime import datetime + +import jsone + +from mozbuild.util import memoize + +from .schema import resolve_keyed_by +from .taskcluster import get_artifact_prefix +from .yaml import load_yaml + +# constants {{{1 +"""Map signing scope aliases to sets of projects. + +Currently m-c and DevEdition on m-b use nightly signing; Beta on m-b and m-r +use release signing. These data structures aren't set-up to handle different +scopes on the same repo, so we use a different set of them for DevEdition, and +callers are responsible for using the correct one (by calling the appropriate +helper below). More context on this in https://bugzilla.mozilla.org/show_bug.cgi?id=1358601. + +We will need to add esr support at some point. Eventually we want to add +nuance so certain m-b and m-r tasks use dep or nightly signing, and we only +release sign when we have a signed-off set of candidate builds. This current +approach works for now, though. + +This is a list of list-pairs, for ordering. +""" +SIGNING_SCOPE_ALIAS_TO_PROJECT = [ + [ + "all-nightly-branches", + set( + [ + "mozilla-central", + "comm-central", + "oak", + ] + ), + ], + [ + "all-release-branches", + set( + [ + "mozilla-beta", + "mozilla-release", + "mozilla-esr78", + "comm-beta", + "comm-esr78", + ] + ), + ], +] + +"""Map the signing scope aliases to the actual scopes. +""" +SIGNING_CERT_SCOPES = { + "all-release-branches": "signing:cert:release-signing", + "all-nightly-branches": "signing:cert:nightly-signing", + "default": "signing:cert:dep-signing", +} + +DEVEDITION_SIGNING_SCOPE_ALIAS_TO_PROJECT = [ + [ + "beta", + set( + [ + "mozilla-beta", + ] + ), + ] +] + +DEVEDITION_SIGNING_CERT_SCOPES = { + "beta": "signing:cert:nightly-signing", + "default": "signing:cert:dep-signing", +} + +"""Map beetmover scope aliases to sets of projects. +""" +BEETMOVER_SCOPE_ALIAS_TO_PROJECT = [ + [ + "all-nightly-branches", + set( + [ + "mozilla-central", + "comm-central", + "oak", + ] + ), + ], + [ + "all-release-branches", + set( + [ + "mozilla-beta", + "mozilla-release", + "mozilla-esr78", + "comm-beta", + "comm-esr78", + ] + ), + ], +] + +"""Map the beetmover scope aliases to the actual scopes. +""" +BEETMOVER_BUCKET_SCOPES = { + "all-release-branches": "beetmover:bucket:release", + "all-nightly-branches": "beetmover:bucket:nightly", + "default": "beetmover:bucket:dep", +} + +"""Map the beetmover tasks aliases to the actual action scopes. +""" +BEETMOVER_ACTION_SCOPES = { + "nightly": "beetmover:action:push-to-nightly", + "nightly-oak": "beetmover:action:push-to-nightly", + "default": "beetmover:action:push-to-candidates", +} + + +"""Known balrog actions.""" +BALROG_ACTIONS = ( + "submit-locale", + "submit-toplevel", + "schedule", + "v2-submit-locale", + "v2-submit-toplevel", +) + +"""Map balrog scope aliases to sets of projects. + +This is a list of list-pairs, for ordering. +""" +BALROG_SCOPE_ALIAS_TO_PROJECT = [ + [ + "nightly", + set( + [ + "mozilla-central", + "comm-central", + "oak", + ] + ), + ], + [ + "beta", + set( + [ + "mozilla-beta", + "comm-beta", + ] + ), + ], + [ + "release", + set( + [ + "mozilla-release", + "comm-esr78", + ] + ), + ], + [ + "esr78", + set( + [ + "mozilla-esr78", + ] + ), + ], +] + +"""Map the balrog scope aliases to the actual scopes. +""" +BALROG_SERVER_SCOPES = { + "nightly": "balrog:server:nightly", + "aurora": "balrog:server:aurora", + "beta": "balrog:server:beta", + "release": "balrog:server:release", + "esr78": "balrog:server:esr", + "default": "balrog:server:dep", +} + + +""" The list of the release promotion phases which we send notifications for +""" +RELEASE_NOTIFICATION_PHASES = ("promote", "push", "ship") + + +def add_scope_prefix(config, scope): + """ + Prepends the scriptworker scope prefix from the :ref:`graph config + <taskgraph-graph-config>`. + + Args: + config (TransformConfig): The configuration for the kind being transformed. + scope (string): The suffix of the scope + + Returns: + string: the scope to use. + """ + return "{prefix}:{scope}".format( + prefix=config.graph_config["scriptworker"]["scope-prefix"], + scope=scope, + ) + + +def with_scope_prefix(f): + """ + Wraps a function, calling :py:func:`add_scope_prefix` on the result of + calling the wrapped function. + + Args: + f (callable): A function that takes a ``config`` and some keyword + arguments, and returns a scope suffix. + + Returns: + callable: the wrapped function + """ + + @functools.wraps(f) + def wrapper(config, **kwargs): + scope_or_scopes = f(config, **kwargs) + if isinstance(scope_or_scopes, list): + return map(functools.partial(add_scope_prefix, config), scope_or_scopes) + else: + return add_scope_prefix(config, scope_or_scopes) + + return wrapper + + +# scope functions {{{1 +@with_scope_prefix +def get_scope_from_project(config, alias_to_project_map, alias_to_scope_map): + """Determine the restricted scope from `config.params['project']`. + + Args: + config (TransformConfig): The configuration for the kind being transformed. + alias_to_project_map (list of lists): each list pair contains the + alias and the set of projects that match. This is ordered. + alias_to_scope_map (dict): the alias alias to scope + + Returns: + string: the scope to use. + """ + for alias, projects in alias_to_project_map: + if config.params["project"] in projects and alias in alias_to_scope_map: + return alias_to_scope_map[alias] + return alias_to_scope_map["default"] + + +@with_scope_prefix +def get_scope_from_release_type(config, release_type_to_scope_map): + """Determine the restricted scope from `config.params['target_tasks_method']`. + + Args: + config (TransformConfig): The configuration for the kind being transformed. + release_type_to_scope_map (dict): the maps release types to scopes + + Returns: + string: the scope to use. + """ + return release_type_to_scope_map.get( + config.params["release_type"], release_type_to_scope_map["default"] + ) + + +def get_phase_from_target_method(config, alias_to_tasks_map, alias_to_phase_map): + """Determine the phase from `config.params['target_tasks_method']`. + + Args: + config (TransformConfig): The configuration for the kind being transformed. + alias_to_tasks_map (list of lists): each list pair contains the + alias and the set of target methods that match. This is ordered. + alias_to_phase_map (dict): the alias to phase map + + Returns: + string: the phase to use. + """ + for alias, tasks in alias_to_tasks_map: + if ( + config.params["target_tasks_method"] in tasks + and alias in alias_to_phase_map + ): + return alias_to_phase_map[alias] + return alias_to_phase_map["default"] + + +@with_scope_prefix +def get_balrog_action_scope(config, action="submit"): + assert action in BALROG_ACTIONS + return "balrog:action:{}".format(action) + + +get_signing_cert_scope = functools.partial( + get_scope_from_project, + alias_to_project_map=SIGNING_SCOPE_ALIAS_TO_PROJECT, + alias_to_scope_map=SIGNING_CERT_SCOPES, +) + +get_devedition_signing_cert_scope = functools.partial( + get_scope_from_project, + alias_to_project_map=DEVEDITION_SIGNING_SCOPE_ALIAS_TO_PROJECT, + alias_to_scope_map=DEVEDITION_SIGNING_CERT_SCOPES, +) + +get_beetmover_bucket_scope = functools.partial( + get_scope_from_project, + alias_to_project_map=BEETMOVER_SCOPE_ALIAS_TO_PROJECT, + alias_to_scope_map=BEETMOVER_BUCKET_SCOPES, +) + +get_beetmover_action_scope = functools.partial( + get_scope_from_release_type, + release_type_to_scope_map=BEETMOVER_ACTION_SCOPES, +) + +get_balrog_server_scope = functools.partial( + get_scope_from_project, + alias_to_project_map=BALROG_SCOPE_ALIAS_TO_PROJECT, + alias_to_scope_map=BALROG_SERVER_SCOPES, +) + +cached_load_yaml = memoize(load_yaml) + + +# release_config {{{1 +def get_release_config(config): + """Get the build number and version for a release task. + + Currently only applies to beetmover tasks. + + Args: + config (TransformConfig): The configuration for the kind being transformed. + + Returns: + dict: containing both `build_number` and `version`. This can be used to + update `task.payload`. + """ + release_config = {} + + partial_updates = os.environ.get("PARTIAL_UPDATES", "") + if partial_updates != "" and config.kind in ( + "release-bouncer-sub", + "release-bouncer-check", + "release-update-verify-config", + "release-secondary-update-verify-config", + "release-balrog-submit-toplevel", + "release-secondary-balrog-submit-toplevel", + ): + partial_updates = json.loads(partial_updates) + release_config["partial_versions"] = ", ".join( + [ + "{}build{}".format(v, info["buildNumber"]) + for v, info in partial_updates.items() + ] + ) + if release_config["partial_versions"] == "{}": + del release_config["partial_versions"] + + release_config["version"] = config.params["version"] + release_config["appVersion"] = config.params["app_version"] + + release_config["next_version"] = config.params["next_version"] + release_config["build_number"] = config.params["build_number"] + return release_config + + +def get_signing_cert_scope_per_platform(build_platform, is_shippable, config): + if "devedition" in build_platform: + return get_devedition_signing_cert_scope(config) + elif is_shippable: + return get_signing_cert_scope(config) + else: + return add_scope_prefix(config, "signing:cert:dep-signing") + + +# generate_beetmover_upstream_artifacts {{{1 +def generate_beetmover_upstream_artifacts( + config, job, platform, locale=None, dependencies=None, **kwargs +): + """Generate the upstream artifacts for beetmover, using the artifact map. + + Currently only applies to beetmover tasks. + + Args: + job (dict): The current job being generated + dependencies (list): A list of the job's dependency labels. + platform (str): The current build platform + locale (str): The current locale being beetmoved. + + Returns: + list: A list of dictionaries conforming to the upstream_artifacts spec. + """ + base_artifact_prefix = get_artifact_prefix(job) + resolve_keyed_by( + job, + "attributes.artifact_map", + "artifact map", + **{ + "release-type": config.params["release_type"], + "platform": platform, + } + ) + map_config = deepcopy(cached_load_yaml(job["attributes"]["artifact_map"])) + upstream_artifacts = list() + + if not locale: + locales = map_config["default_locales"] + elif isinstance(locale, list): + locales = locale + else: + locales = [locale] + + if not dependencies: + if job.get("dependencies"): + dependencies = job["dependencies"].keys() + elif job.get("primary-dependency"): + dependencies = [job["primary-dependency"].kind] + else: + raise Exception("Unsupported type of dependency. Got job: {}".format(job)) + + for locale, dep in itertools.product(locales, dependencies): + paths = list() + + for filename in map_config["mapping"]: + if dep not in map_config["mapping"][filename]["from"]: + continue + if locale != "en-US" and not map_config["mapping"][filename]["all_locales"]: + continue + if ( + "only_for_platforms" in map_config["mapping"][filename] + and platform + not in map_config["mapping"][filename]["only_for_platforms"] + ): + continue + if ( + "not_for_platforms" in map_config["mapping"][filename] + and platform in map_config["mapping"][filename]["not_for_platforms"] + ): + continue + if "partials_only" in map_config["mapping"][filename]: + continue + # The next time we look at this file it might be a different locale. + file_config = deepcopy(map_config["mapping"][filename]) + resolve_keyed_by( + file_config, + "source_path_modifier", + "source path modifier", + locale=locale, + ) + + kwargs["locale"] = locale + + paths.append( + os.path.join( + base_artifact_prefix, + jsone.render(file_config["source_path_modifier"], kwargs), + jsone.render(filename, kwargs), + ) + ) + + if job.get("dependencies") and getattr( + job["dependencies"][dep], "release_artifacts", None + ): + paths = [ + path + for path in paths + if path in job["dependencies"][dep].release_artifacts + ] + + if not paths: + continue + + upstream_artifacts.append( + { + "taskId": {"task-reference": "<{}>".format(dep)}, + "taskType": map_config["tasktype_map"].get(dep), + "paths": sorted(paths), + "locale": locale, + } + ) + + upstream_artifacts.sort(key=lambda u: u["paths"]) + return upstream_artifacts + + +# generate_beetmover_artifact_map {{{1 +def generate_beetmover_artifact_map(config, job, **kwargs): + """Generate the beetmover artifact map. + + Currently only applies to beetmover tasks. + + Args: + config (): Current taskgraph configuration. + job (dict): The current job being generated + Common kwargs: + platform (str): The current build platform + locale (str): The current locale being beetmoved. + + Returns: + list: A list of dictionaries containing source->destination + maps for beetmover. + """ + platform = kwargs.get("platform", "") + resolve_keyed_by( + job, + "attributes.artifact_map", + job["label"], + **{ + "release-type": config.params["release_type"], + "platform": platform, + } + ) + map_config = deepcopy(cached_load_yaml(job["attributes"]["artifact_map"])) + base_artifact_prefix = map_config.get( + "base_artifact_prefix", get_artifact_prefix(job) + ) + + artifacts = list() + + dependencies = job["dependencies"].keys() + + if kwargs.get("locale"): + if isinstance(kwargs["locale"], list): + locales = kwargs["locale"] + else: + locales = [kwargs["locale"]] + else: + locales = map_config["default_locales"] + + resolve_keyed_by(map_config, "s3_bucket_paths", job["label"], platform=platform) + + for locale, dep in sorted(itertools.product(locales, dependencies)): + paths = dict() + for filename in map_config["mapping"]: + # Relevancy checks + if dep not in map_config["mapping"][filename]["from"]: + # We don't get this file from this dependency. + continue + if locale != "en-US" and not map_config["mapping"][filename]["all_locales"]: + # This locale either doesn't produce or shouldn't upload this file. + continue + if ( + "only_for_platforms" in map_config["mapping"][filename] + and platform + not in map_config["mapping"][filename]["only_for_platforms"] + ): + # This platform either doesn't produce or shouldn't upload this file. + continue + if ( + "not_for_platforms" in map_config["mapping"][filename] + and platform in map_config["mapping"][filename]["not_for_platforms"] + ): + # This platform either doesn't produce or shouldn't upload this file. + continue + if "partials_only" in map_config["mapping"][filename]: + continue + + # deepcopy because the next time we look at this file the locale will differ. + file_config = deepcopy(map_config["mapping"][filename]) + + for field in [ + "destinations", + "locale_prefix", + "source_path_modifier", + "update_balrog_manifest", + "pretty_name", + "checksums_path", + ]: + resolve_keyed_by( + file_config, field, job["label"], locale=locale, platform=platform + ) + + # This format string should ideally be in the configuration file, + # but this would mean keeping variable names in sync between code + config. + destinations = [ + "{s3_bucket_path}/{dest_path}/{locale_prefix}{filename}".format( + s3_bucket_path=bucket_path, + dest_path=dest_path, + locale_prefix=file_config["locale_prefix"], + filename=file_config.get("pretty_name", filename), + ) + for dest_path, bucket_path in itertools.product( + file_config["destinations"], map_config["s3_bucket_paths"] + ) + ] + # Creating map entries + # Key must be artifact path, to avoid trampling duplicates, such + # as public/build/target.apk and public/build/en-US/target.apk + key = os.path.join( + base_artifact_prefix, + file_config["source_path_modifier"], + filename, + ) + + paths[key] = { + "destinations": destinations, + } + if file_config.get("checksums_path"): + paths[key]["checksums_path"] = file_config["checksums_path"] + + # optional flag: balrog manifest + if file_config.get("update_balrog_manifest"): + paths[key]["update_balrog_manifest"] = True + if file_config.get("balrog_format"): + paths[key]["balrog_format"] = file_config["balrog_format"] + + if not paths: + # No files for this dependency/locale combination. + continue + + # Render all variables for the artifact map + platforms = deepcopy(map_config.get("platform_names", {})) + if platform: + for key in platforms.keys(): + resolve_keyed_by(platforms, key, job["label"], platform=platform) + + upload_date = datetime.fromtimestamp(config.params["build_date"]) + + kwargs.update( + { + "locale": locale, + "version": config.params["version"], + "branch": config.params["project"], + "build_number": config.params["build_number"], + "year": upload_date.year, + "month": upload_date.strftime("%m"), # zero-pad the month + "upload_date": upload_date.strftime("%Y-%m-%d-%H-%M-%S"), + } + ) + kwargs.update(**platforms) + paths = jsone.render(paths, kwargs) + artifacts.append( + { + "taskId": {"task-reference": "<{}>".format(dep)}, + "locale": locale, + "paths": paths, + } + ) + + return artifacts + + +# generate_beetmover_partials_artifact_map {{{1 +def generate_beetmover_partials_artifact_map(config, job, partials_info, **kwargs): + """Generate the beetmover partials artifact map. + + Currently only applies to beetmover tasks. + + Args: + config (): Current taskgraph configuration. + job (dict): The current job being generated + partials_info (dict): Current partials and information about them in a dict + Common kwargs: + platform (str): The current build platform + locale (str): The current locale being beetmoved. + + Returns: + list: A list of dictionaries containing source->destination + maps for beetmover. + """ + platform = kwargs.get("platform", "") + resolve_keyed_by( + job, + "attributes.artifact_map", + "artifact map", + **{ + "release-type": config.params["release_type"], + "platform": platform, + } + ) + map_config = deepcopy(cached_load_yaml(job["attributes"]["artifact_map"])) + base_artifact_prefix = map_config.get( + "base_artifact_prefix", get_artifact_prefix(job) + ) + + artifacts = list() + dependencies = job["dependencies"].keys() + + if kwargs.get("locale"): + locales = [kwargs["locale"]] + else: + locales = map_config["default_locales"] + + resolve_keyed_by( + map_config, "s3_bucket_paths", "s3_bucket_paths", platform=platform + ) + + platforms = deepcopy(map_config.get("platform_names", {})) + if platform: + for key in platforms.keys(): + resolve_keyed_by(platforms, key, key, platform=platform) + upload_date = datetime.fromtimestamp(config.params["build_date"]) + + for locale, dep in itertools.product(locales, dependencies): + paths = dict() + for filename in map_config["mapping"]: + # Relevancy checks + if dep not in map_config["mapping"][filename]["from"]: + # We don't get this file from this dependency. + continue + if locale != "en-US" and not map_config["mapping"][filename]["all_locales"]: + # This locale either doesn't produce or shouldn't upload this file. + continue + if "partials_only" not in map_config["mapping"][filename]: + continue + # deepcopy because the next time we look at this file the locale will differ. + file_config = deepcopy(map_config["mapping"][filename]) + + for field in [ + "destinations", + "locale_prefix", + "source_path_modifier", + "update_balrog_manifest", + "from_buildid", + "pretty_name", + "checksums_path", + ]: + resolve_keyed_by( + file_config, field, field, locale=locale, platform=platform + ) + + # This format string should ideally be in the configuration file, + # but this would mean keeping variable names in sync between code + config. + destinations = [ + "{s3_bucket_path}/{dest_path}/{locale_prefix}{filename}".format( + s3_bucket_path=bucket_path, + dest_path=dest_path, + locale_prefix=file_config["locale_prefix"], + filename=file_config.get("pretty_name", filename), + ) + for dest_path, bucket_path in itertools.product( + file_config["destinations"], map_config["s3_bucket_paths"] + ) + ] + # Creating map entries + # Key must be artifact path, to avoid trampling duplicates, such + # as public/build/target.apk and public/build/en-US/target.apk + key = os.path.join( + base_artifact_prefix, + file_config["source_path_modifier"], + filename, + ) + partials_paths = {} + for pname, info in partials_info.items(): + partials_paths[key] = { + "destinations": destinations, + } + if file_config.get("checksums_path"): + partials_paths[key]["checksums_path"] = file_config[ + "checksums_path" + ] + + # optional flag: balrog manifest + if file_config.get("update_balrog_manifest"): + partials_paths[key]["update_balrog_manifest"] = True + if file_config.get("balrog_format"): + partials_paths[key]["balrog_format"] = file_config[ + "balrog_format" + ] + # optional flag: from_buildid + if file_config.get("from_buildid"): + partials_paths[key]["from_buildid"] = file_config["from_buildid"] + + # render buildid + kwargs.update( + { + "partial": pname, + "from_buildid": info["buildid"], + "previous_version": info.get("previousVersion"), + "buildid": str(config.params["moz_build_date"]), + "locale": locale, + "version": config.params["version"], + "branch": config.params["project"], + "build_number": config.params["build_number"], + "year": upload_date.year, + "month": upload_date.strftime("%m"), # zero-pad the month + "upload_date": upload_date.strftime("%Y-%m-%d-%H-%M-%S"), + } + ) + kwargs.update(**platforms) + paths.update(jsone.render(partials_paths, kwargs)) + + if not paths: + continue + + artifacts.append( + { + "taskId": {"task-reference": "<{}>".format(dep)}, + "locale": locale, + "paths": paths, + } + ) + + artifacts.sort(key=lambda a: sorted(a["paths"].items())) + return artifacts diff --git a/taskcluster/taskgraph/util/signed_artifacts.py b/taskcluster/taskgraph/util/signed_artifacts.py new file mode 100644 index 0000000000..0a215e152e --- /dev/null +++ b/taskcluster/taskgraph/util/signed_artifacts.py @@ -0,0 +1,196 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +Defines artifacts to sign before repackage. +""" + +from __future__ import absolute_import, print_function, unicode_literals +from taskgraph.util.taskcluster import get_artifact_path +from taskgraph.util.declarative_artifacts import get_geckoview_upstream_artifacts + + +LANGPACK_SIGN_PLATFORMS = { # set + "linux64-shippable", + "linux64-devedition", + "macosx64-shippable", + "macosx64-devedition", +} + + +def is_partner_kind(kind): + if kind and kind.startswith(("release-partner", "release-eme-free")): + return True + + +def is_notarization_kind(kind): + if kind and "notarization" in kind: + return True + + +def generate_specifications_of_artifacts_to_sign( + config, job, keep_locale_template=True, kind=None, dep_kind=None +): + build_platform = job["attributes"].get("build_platform") + use_stub = job["attributes"].get("stub-installer") + # Get locales to know if we want to sign ja-JP-mac langpack + locales = job["attributes"].get("chunk_locales", []) + if kind == "release-source-signing": + artifacts_specifications = [ + { + "artifacts": [get_artifact_path(job, "source.tar.xz")], + "formats": ["autograph_gpg"], + } + ] + elif "android" in build_platform: + artifacts_specifications = [ + { + "artifacts": get_geckoview_artifacts_to_sign(config, job), + "formats": ["autograph_gpg"], + } + ] + # XXX: Mars aren't signed here (on any platform) because internals will be + # signed at after this stage of the release + elif "macosx" in build_platform: + if is_notarization_kind(dep_kind): + # This task is notarization part 3: download signed bits, + # and staple notarization. + artifacts_specifications = [ + { + "artifacts": [ + get_artifact_path(job, "{locale}/target.tar.gz"), + get_artifact_path(job, "{locale}/target.pkg"), + ], + "formats": [], + } + ] + langpack_formats = [] + else: + # This task is either depsigning, or notarization part 1: + # download unsigned bits, and sign. If notarization part 1, + # submit for notarization and create a uuid_manifest.json + if is_partner_kind(kind): + extension = "tar.gz" + else: + extension = "dmg" + artifacts_specifications = [ + { + "artifacts": [ + get_artifact_path(job, "{{locale}}/target.{}".format(extension)) + ], + "formats": ["macapp", "autograph_widevine", "autograph_omnija"], + } + ] + langpack_formats = ["autograph_langpack"] + + if "ja-JP-mac" in locales and build_platform in LANGPACK_SIGN_PLATFORMS: + artifacts_specifications += [ + { + "artifacts": [ + get_artifact_path(job, "ja-JP-mac/target.langpack.xpi") + ], + "formats": langpack_formats, + } + ] + elif "win" in build_platform: + artifacts_specifications = [ + { + "artifacts": [ + get_artifact_path(job, "{locale}/setup.exe"), + ], + "formats": ["autograph_authenticode"], + }, + { + "artifacts": [ + get_artifact_path(job, "{locale}/target.zip"), + ], + "formats": [ + "autograph_authenticode", + "autograph_widevine", + "autograph_omnija", + ], + }, + ] + + if use_stub: + artifacts_specifications[0]["artifacts"] += [ + get_artifact_path(job, "{locale}/setup-stub.exe") + ] + elif "linux" in build_platform: + artifacts_specifications = [ + { + "artifacts": [get_artifact_path(job, "{locale}/target.tar.bz2")], + "formats": ["autograph_gpg", "autograph_widevine", "autograph_omnija"], + } + ] + if build_platform in LANGPACK_SIGN_PLATFORMS: + artifacts_specifications += [ + { + "artifacts": [ + get_artifact_path(job, "{locale}/target.langpack.xpi") + ], + "formats": ["autograph_langpack"], + } + ] + else: + raise Exception("Platform not implemented for signing") + + if not keep_locale_template: + artifacts_specifications = _strip_locale_template(artifacts_specifications) + + if is_partner_kind(kind): + artifacts_specifications = _strip_widevine_for_partners( + artifacts_specifications + ) + + return artifacts_specifications + + +def _strip_locale_template(artifacts_without_locales): + for spec in artifacts_without_locales: + for index, artifact in enumerate(spec["artifacts"]): + stripped_artifact = artifact.format(locale="") + stripped_artifact = stripped_artifact.replace("//", "/") + spec["artifacts"][index] = stripped_artifact + + return artifacts_without_locales + + +def _strip_widevine_for_partners(artifacts_specifications): + """Partner repacks should not resign that's previously signed for fear of breaking partial + updates + """ + for spec in artifacts_specifications: + if "autograph_widevine" in spec["formats"]: + spec["formats"].remove("autograph_widevine") + if "autograph_omnija" in spec["formats"]: + spec["formats"].remove("autograph_omnija") + + return artifacts_specifications + + +def get_signed_artifacts(input, formats, behavior=None): + """ + Get the list of signed artifacts for the given input and formats. + """ + artifacts = set() + if input.endswith(".dmg"): + artifacts.add(input.replace(".dmg", ".tar.gz")) + if behavior and behavior != "mac_sign": + artifacts.add(input.replace(".dmg", ".pkg")) + else: + artifacts.add(input) + if "autograph_gpg" in formats: + artifacts.add("{}.asc".format(input)) + + return artifacts + + +def get_geckoview_artifacts_to_sign(config, job): + upstream_artifacts = get_geckoview_upstream_artifacts(config, job) + return [ + path + for upstream_artifact in upstream_artifacts + for path in upstream_artifact["paths"] + if not path.endswith(".md5") and not path.endswith(".sha1") + ] diff --git a/taskcluster/taskgraph/util/taskcluster.py b/taskcluster/taskgraph/util/taskcluster.py new file mode 100644 index 0000000000..ae6315d2f7 --- /dev/null +++ b/taskcluster/taskgraph/util/taskcluster.py @@ -0,0 +1,373 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import os +import datetime +import functools +import requests +import six +import logging +import taskcluster_urls as liburls +from mozbuild.util import memoize +from requests.packages.urllib3.util.retry import Retry +from taskcluster import Hooks +from taskgraph.task import Task +from taskgraph.util import yaml + +logger = logging.getLogger(__name__) + +# this is set to true for `mach taskgraph action-callback --test` +testing = False + +# Default rootUrl to use if none is given in the environment; this should point +# to the production Taskcluster deployment used for CI. +PRODUCTION_TASKCLUSTER_ROOT_URL = "https://firefox-ci-tc.services.mozilla.com" + +# the maximum number of parallel Taskcluster API calls to make +CONCURRENCY = 50 + + +@memoize +def get_root_url(use_proxy): + """Get the current TASKCLUSTER_ROOT_URL. When running in a task, this must + come from $TASKCLUSTER_ROOT_URL; when run on the command line, we apply a + defualt that points to the production deployment of Taskcluster. If use_proxy + is set, this attempts to get TASKCLUSTER_PROXY_URL instead, failing if it + is not set.""" + if use_proxy: + try: + return six.ensure_text(os.environ["TASKCLUSTER_PROXY_URL"]) + except KeyError: + if "TASK_ID" not in os.environ: + raise RuntimeError( + "taskcluster-proxy is not available when not executing in a task" + ) + else: + raise RuntimeError("taskcluster-proxy is not enabled for this task") + + if "TASKCLUSTER_ROOT_URL" not in os.environ: + if "TASK_ID" in os.environ: + raise RuntimeError( + "$TASKCLUSTER_ROOT_URL must be set when running in a task" + ) + else: + logger.debug("Using default TASKCLUSTER_ROOT_URL (Firefox CI production)") + return PRODUCTION_TASKCLUSTER_ROOT_URL + logger.debug( + "Running in Taskcluster instance {}{}".format( + os.environ["TASKCLUSTER_ROOT_URL"], + " with taskcluster-proxy" if "TASKCLUSTER_PROXY_URL" in os.environ else "", + ) + ) + return six.ensure_text(os.environ["TASKCLUSTER_ROOT_URL"]) + + +def requests_retry_session( + retries, + backoff_factor=0.1, + status_forcelist=(500, 502, 504), + concurrency=CONCURRENCY, + session=None, +): + session = session or requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + + # Default HTTPAdapter uses 10 connections. Mount custom adapter to increase + # that limit. Connections are established as needed, so using a large value + # should not negatively impact performance. + http_adapter = requests.adapters.HTTPAdapter( + pool_connections=concurrency, + pool_maxsize=concurrency, + max_retries=retry, + ) + session.mount("http://", http_adapter) + session.mount("https://", http_adapter) + + return session + + +@memoize +def get_session(): + return requests_retry_session(retries=5) + + +def _do_request(url, method=None, **kwargs): + if method is None: + method = "post" if kwargs else "get" + + session = get_session() + if method == "get": + kwargs["stream"] = True + response = getattr(session, method)(url, **kwargs) + + if response.status_code >= 400: + # Consume content before raise_for_status, so that the connection can be + # reused. + response.content + response.raise_for_status() + return response + + +def _handle_artifact(path, response): + if path.endswith(".json"): + return response.json() + if path.endswith(".yml"): + return yaml.load_stream(response.text) + response.raw.read = functools.partial(response.raw.read, decode_content=True) + return response.raw + + +def get_artifact_url(task_id, path, use_proxy=False): + artifact_tmpl = liburls.api( + get_root_url(False), "queue", "v1", "task/{}/artifacts/{}" + ) + data = six.ensure_text(artifact_tmpl.format(task_id, path)) + if use_proxy: + # Until Bug 1405889 is deployed, we can't download directly + # from the taskcluster-proxy. Work around by using the /bewit + # endpoint instead. + # The bewit URL is the body of a 303 redirect, which we don't + # want to follow (which fetches a potentially large resource). + response = _do_request( + os.environ["TASKCLUSTER_PROXY_URL"] + "/bewit", + data=data, + allow_redirects=False, + ) + return six.ensure_text(response.text) + return data + + +def get_artifact(task_id, path, use_proxy=False): + """ + Returns the artifact with the given path for the given task id. + + If the path ends with ".json" or ".yml", the content is deserialized as, + respectively, json or yaml, and the corresponding python data (usually + dict) is returned. + For other types of content, a file-like object is returned. + """ + response = _do_request(get_artifact_url(task_id, path, use_proxy)) + return _handle_artifact(path, response) + + +def list_artifacts(task_id, use_proxy=False): + response = _do_request(get_artifact_url(task_id, "", use_proxy).rstrip("/")) + return response.json()["artifacts"] + + +def get_artifact_prefix(task): + prefix = None + if isinstance(task, dict): + prefix = task.get("attributes", {}).get("artifact_prefix") + elif isinstance(task, Task): + prefix = task.attributes.get("artifact_prefix") + else: + raise Exception("Can't find artifact-prefix of non-task: {}".format(task)) + return prefix or "public/build" + + +def get_artifact_path(task, path): + return "{}/{}".format(get_artifact_prefix(task), path) + + +def get_index_url(index_path, use_proxy=False, multiple=False): + index_tmpl = liburls.api(get_root_url(use_proxy), "index", "v1", "task{}/{}") + return index_tmpl.format("s" if multiple else "", index_path) + + +def find_task_id(index_path): + try: + response = _do_request(get_index_url(index_path)) + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + raise KeyError("index path {} not found".format(index_path)) + raise + return response.json()["taskId"] + + +def get_artifact_from_index(index_path, artifact_path, use_proxy=False): + full_path = index_path + "/artifacts/" + artifact_path + response = _do_request(get_index_url(full_path, use_proxy)) + return _handle_artifact(full_path, response) + + +def list_tasks(index_path, use_proxy=False): + """ + Returns a list of task_ids where each task_id is indexed under a path + in the index. Results are sorted by expiration date from oldest to newest. + """ + results = [] + data = {} + while True: + response = _do_request( + get_index_url(index_path, use_proxy, multiple=True), json=data + ) + response = response.json() + results += response["tasks"] + if response.get("continuationToken"): + data = {"continuationToken": response.get("continuationToken")} + else: + break + + # We can sort on expires because in the general case + # all of these tasks should be created with the same expires time so they end up in + # order from earliest to latest action. If more correctness is needed, consider + # fetching each task and sorting on the created date. + results.sort(key=lambda t: parse_time(t["expires"])) + return [t["taskId"] for t in results] + + +def insert_index(index_path, task_id, data=None, use_proxy=False): + index_url = get_index_url(index_path, use_proxy=use_proxy) + + # Find task expiry. + expires = get_task_definition(task_id, use_proxy=use_proxy)["expires"] + + response = _do_request( + index_url, + method="put", + json={ + "taskId": task_id, + "rank": 0, + "data": data or {}, + "expires": expires, + }, + ) + return response + + +def parse_time(timestamp): + """Turn a "JSON timestamp" as used in TC APIs into a datetime""" + return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ") + + +def get_task_url(task_id, use_proxy=False): + task_tmpl = liburls.api(get_root_url(use_proxy), "queue", "v1", "task/{}") + return task_tmpl.format(task_id) + + +def get_task_definition(task_id, use_proxy=False): + response = _do_request(get_task_url(task_id, use_proxy)) + return response.json() + + +def cancel_task(task_id, use_proxy=False): + """Cancels a task given a task_id. In testing mode, just logs that it would + have cancelled.""" + if testing: + logger.info("Would have cancelled {}.".format(task_id)) + else: + _do_request(get_task_url(task_id, use_proxy) + "/cancel", json={}) + + +def status_task(task_id, use_proxy=False): + """Gets the status of a task given a task_id. In testing mode, just logs that it would + have retrieved status.""" + if testing: + logger.info("Would have gotten status for {}.".format(task_id)) + else: + resp = _do_request(get_task_url(task_id, use_proxy) + "/status") + status = resp.json().get("status", {}).get("state") or "unknown" + return status + + +def rerun_task(task_id): + """Reruns a task given a task_id. In testing mode, just logs that it would + have reran.""" + if testing: + logger.info("Would have rerun {}.".format(task_id)) + else: + _do_request(get_task_url(task_id, use_proxy=True) + "/rerun", json={}) + + +def trigger_hook(hook_group_id, hook_id, hook_payload): + hooks = Hooks({"rootUrl": get_root_url(True)}) + response = hooks.triggerHook(hook_group_id, hook_id, hook_payload) + + logger.info( + "Task seen here: {}/tasks/{}".format( + get_root_url(os.environ.get("TASKCLUSTER_PROXY_URL")), + response["status"]["taskId"], + ) + ) + + +def get_current_scopes(): + """Get the current scopes. This only makes sense in a task with the Taskcluster + proxy enabled, where it returns the actual scopes accorded to the task.""" + auth_url = liburls.api(get_root_url(True), "auth", "v1", "scopes/current") + resp = _do_request(auth_url) + return resp.json().get("scopes", []) + + +def get_purge_cache_url(provisioner_id, worker_type, use_proxy=False): + url_tmpl = liburls.api( + get_root_url(use_proxy), "purge-cache", "v1", "purge-cache/{}/{}" + ) + return url_tmpl.format(provisioner_id, worker_type) + + +def purge_cache(provisioner_id, worker_type, cache_name, use_proxy=False): + """Requests a cache purge from the purge-caches service.""" + if testing: + logger.info( + "Would have purged {}/{}/{}.".format( + provisioner_id, worker_type, cache_name + ) + ) + else: + logger.info("Purging {}/{}/{}.".format(provisioner_id, worker_type, cache_name)) + purge_cache_url = get_purge_cache_url(provisioner_id, worker_type, use_proxy) + _do_request(purge_cache_url, json={"cacheName": cache_name}) + + +def send_email(address, subject, content, link, use_proxy=False): + """Sends an email using the notify service""" + logger.info("Sending email to {}.".format(address)) + url = liburls.api(get_root_url(use_proxy), "notify", "v1", "email") + _do_request( + url, + json={ + "address": address, + "subject": subject, + "content": content, + "link": link, + }, + ) + + +def list_task_group_tasks(task_group_id): + """Generate the tasks in a task group""" + params = {} + while True: + url = liburls.api( + get_root_url(False), + "queue", + "v1", + "task-group/{}/list".format(task_group_id), + ) + resp = _do_request(url, method="get", params=params).json() + for task in resp["tasks"]: + yield task + if resp.get("continuationToken"): + params = {"continuationToken": resp.get("continuationToken")} + else: + break + + +def list_task_group_incomplete_task_ids(task_group_id): + states = ("running", "pending", "unscheduled") + for task in [t["status"] for t in list_task_group_tasks(task_group_id)]: + if task["state"] in states: + yield task["taskId"] diff --git a/taskcluster/taskgraph/util/taskgraph.py b/taskcluster/taskgraph/util/taskgraph.py new file mode 100644 index 0000000000..735bfbb15a --- /dev/null +++ b/taskcluster/taskgraph/util/taskgraph.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Tools for interacting with existing taskgraphs. +""" + +from __future__ import absolute_import, print_function, unicode_literals + +import six + +from taskgraph.util.taskcluster import ( + find_task_id, + get_artifact, +) + + +def find_decision_task(parameters, graph_config): + """Given the parameters for this action, find the taskId of the decision + task""" + head_rev_param = "{}head_rev".format(graph_config["project-repo-param-prefix"]) + return find_task_id( + "{}.v2.{}.revision.{}.taskgraph.decision".format( + graph_config["trust-domain"], + parameters["project"], + parameters[head_rev_param], + ) + ) + + +def find_existing_tasks(previous_graph_ids): + existing_tasks = {} + for previous_graph_id in previous_graph_ids: + label_to_taskid = get_artifact(previous_graph_id, "public/label-to-taskid.json") + existing_tasks.update(label_to_taskid) + return existing_tasks + + +def find_existing_tasks_from_previous_kinds( + full_task_graph, previous_graph_ids, rebuild_kinds +): + """Given a list of previous decision/action taskIds and kinds to ignore + from the previous graphs, return a dictionary of labels-to-taskids to use + as ``existing_tasks`` in the optimization step.""" + existing_tasks = find_existing_tasks(previous_graph_ids) + kind_labels = { + t.label + for t in six.itervalues(full_task_graph.tasks) + if t.attributes["kind"] not in rebuild_kinds + } + return { + label: taskid + for (label, taskid) in existing_tasks.items() + if label in kind_labels + } diff --git a/taskcluster/taskgraph/util/templates.py b/taskcluster/taskgraph/util/templates.py new file mode 100644 index 0000000000..d0cfb251c3 --- /dev/null +++ b/taskcluster/taskgraph/util/templates.py @@ -0,0 +1,60 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import copy + + +def merge_to(source, dest): + """ + Merge dict and arrays (override scalar values) + + Keys from source override keys from dest, and elements from lists in source + are appended to lists in dest. + + :param dict source: to copy from + :param dict dest: to copy to (modified in place) + """ + + for key, value in source.items(): + if ( + isinstance(value, dict) + and len(value) == 1 + and list(value)[0].startswith("by-") + ): + # Do not merge by-* values as this is likely to confuse someone + dest[key] = value + continue + + # Override mismatching or empty types + if type(value) != type(dest.get(key)): # noqa + dest[key] = value + continue + + # Merge dict + if isinstance(value, dict): + merge_to(value, dest[key]) + continue + + if isinstance(value, list): + dest[key] = dest[key] + value + continue + + dest[key] = value + + return dest + + +def merge(*objects): + """ + Merge the given objects, using the semantics described for merge_to, with + objects later in the list taking precedence. From an inheritance + perspective, "parents" should be listed before "children". + + Returns the result without modifying any arguments. + """ + if len(objects) == 1: + return copy.deepcopy(objects[0]) + return merge_to(objects[-1], merge(*objects[:-1])) diff --git a/taskcluster/taskgraph/util/time.py b/taskcluster/taskgraph/util/time.py new file mode 100644 index 0000000000..184d92f0c0 --- /dev/null +++ b/taskcluster/taskgraph/util/time.py @@ -0,0 +1,118 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Python port of the ms.js node module this is not a direct port some things are +# more complicated or less precise and we lean on time delta here. + +from __future__ import absolute_import, print_function, unicode_literals + +import re +import datetime + +PATTERN = re.compile("((?:\d+)?\.?\d+) *([a-z]+)") + + +def seconds(value): + return datetime.timedelta(seconds=int(value)) + + +def minutes(value): + return datetime.timedelta(minutes=int(value)) + + +def hours(value): + return datetime.timedelta(hours=int(value)) + + +def days(value): + return datetime.timedelta(days=int(value)) + + +def months(value): + # See warning in years(), below + return datetime.timedelta(days=int(value) * 30) + + +def years(value): + # Warning here "years" are vague don't use this for really sensitive date + # computation the idea is to give you a absolute amount of time in the + # future which is not the same thing as "precisely on this date next year" + return datetime.timedelta(days=int(value) * 365) + + +ALIASES = {} +ALIASES["seconds"] = ALIASES["second"] = ALIASES["s"] = seconds +ALIASES["minutes"] = ALIASES["minute"] = ALIASES["min"] = minutes +ALIASES["hours"] = ALIASES["hour"] = ALIASES["h"] = hours +ALIASES["days"] = ALIASES["day"] = ALIASES["d"] = days +ALIASES["months"] = ALIASES["month"] = ALIASES["mo"] = months +ALIASES["years"] = ALIASES["year"] = ALIASES["y"] = years + + +class InvalidString(Exception): + pass + + +class UnknownTimeMeasurement(Exception): + pass + + +def value_of(input_str): + """ + Convert a string to a json date in the future + :param str input_str: (ex: 1d, 2d, 6years, 2 seconds) + :returns: Unit given in seconds + """ + + matches = PATTERN.search(input_str) + + if matches is None or len(matches.groups()) < 2: + raise InvalidString("'{}' is invalid string".format(input_str)) + + value, unit = matches.groups() + + if unit not in ALIASES: + raise UnknownTimeMeasurement( + "{} is not a valid time measure use one of {}".format( + unit, sorted(ALIASES.keys()) + ) + ) + + return ALIASES[unit](value) + + +def json_time_from_now(input_str, now=None, datetime_format=False): + """ + :param str input_str: Input string (see value of) + :param datetime now: Optionally set the definition of `now` + :param boolean datetime_format: Set `True` to get a `datetime` output + :returns: JSON string representation of time in future. + """ + + if now is None: + now = datetime.datetime.utcnow() + + time = now + value_of(input_str) + + if datetime_format is True: + return time + else: + # Sorta a big hack but the json schema validator for date does not like the + # ISO dates until 'Z' (for timezone) is added... + # the [:23] ensures only whole seconds or milliseconds are included, + # not microseconds (see bug 1381801) + return time.isoformat()[:23] + "Z" + + +def current_json_time(datetime_format=False): + """ + :param boolean datetime_format: Set `True` to get a `datetime` output + :returns: JSON string representation of the current time. + """ + if datetime_format is True: + return datetime.datetime.utcnow() + else: + # the [:23] ensures only whole seconds or milliseconds are included, + # not microseconds (see bug 1381801) + return datetime.datetime.utcnow().isoformat()[:23] + "Z" diff --git a/taskcluster/taskgraph/util/treeherder.py b/taskcluster/taskgraph/util/treeherder.py new file mode 100644 index 0000000000..9e7429ef64 --- /dev/null +++ b/taskcluster/taskgraph/util/treeherder.py @@ -0,0 +1,67 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals +import re + +_JOINED_SYMBOL_RE = re.compile(r"([^(]*)\(([^)]*)\)$") + + +def split_symbol(treeherder_symbol): + """Split a symbol expressed as grp(sym) into its two parts. If no group is + given, the returned group is '?'""" + groupSymbol = "?" + symbol = treeherder_symbol + if "(" in symbol: + match = _JOINED_SYMBOL_RE.match(symbol) + if match: + groupSymbol, symbol = match.groups() + else: + raise Exception("`{}` is not a valid treeherder symbol.".format(symbol)) + return groupSymbol, symbol + + +def join_symbol(group, symbol): + """Perform the reverse of split_symbol, combining the given group and + symbol. If the group is '?', then it is omitted.""" + if group == "?": + return symbol + return "{}({})".format(group, symbol) + + +def add_suffix(treeherder_symbol, suffix): + """Add a suffix to a treeherder symbol that may contain a group.""" + group, symbol = split_symbol(treeherder_symbol) + symbol += str(suffix) + return join_symbol(group, symbol) + + +def replace_group(treeherder_symbol, new_group): + """Add a suffix to a treeherder symbol that may contain a group.""" + _, symbol = split_symbol(treeherder_symbol) + return join_symbol(new_group, symbol) + + +def inherit_treeherder_from_dep(job, dep_job): + """Inherit treeherder defaults from dep_job""" + treeherder = job.get("treeherder", {}) + + dep_th_platform = ( + dep_job.task.get("extra", {}) + .get("treeherder", {}) + .get("machine", {}) + .get("platform", "") + ) + dep_th_collection = list( + dep_job.task.get("extra", {}).get("treeherder", {}).get("collection", {}).keys() + )[0] + treeherder.setdefault( + "platform", "{}/{}".format(dep_th_platform, dep_th_collection) + ) + treeherder.setdefault( + "tier", dep_job.task.get("extra", {}).get("treeherder", {}).get("tier", 1) + ) + # Does not set symbol + treeherder.setdefault("kind", "build") + return treeherder diff --git a/taskcluster/taskgraph/util/verify.py b/taskcluster/taskgraph/util/verify.py new file mode 100644 index 0000000000..a1d24718c9 --- /dev/null +++ b/taskcluster/taskgraph/util/verify.py @@ -0,0 +1,454 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import logging +import re +import os + +import attr +import six + +from .. import GECKO +from .treeherder import join_symbol +from taskgraph.util.attributes import match_run_on_projects, RELEASE_PROJECTS + +from taskgraph.util.attributes import ALL_PROJECTS, RUN_ON_PROJECT_ALIASES + +logger = logging.getLogger(__name__) +doc_base_path = os.path.join(GECKO, "taskcluster", "docs") + + +@attr.s(frozen=True) +class Verification(object): + verify = attr.ib() + run_on_projects = attr.ib() + + +@attr.s(frozen=True) +class VerificationSequence(object): + """ + Container for a sequence of verifications over a TaskGraph. Each + verification is represented as a callable taking (task, taskgraph, + scratch_pad), called for each task in the taskgraph, and one more + time with no task but with the taskgraph and the same scratch_pad + that was passed for each task. + """ + + _verifications = attr.ib(factory=dict) + + def __call__(self, graph_name, graph, graph_config, parameters): + for verification in self._verifications.get(graph_name, []): + if not match_run_on_projects( + parameters["project"], verification.run_on_projects + ): + continue + scratch_pad = {} + graph.for_each_task( + verification.verify, + scratch_pad=scratch_pad, + graph_config=graph_config, + parameters=parameters, + ) + verification.verify( + None, + graph, + scratch_pad=scratch_pad, + graph_config=graph_config, + parameters=parameters, + ) + return graph_name, graph + + def add(self, graph_name, run_on_projects={"all"}): + def wrap(func): + self._verifications.setdefault(graph_name, []).append( + Verification(func, run_on_projects) + ) + return func + + return wrap + + +verifications = VerificationSequence() + + +@attr.s(frozen=True) +class DocPaths(object): + _paths = attr.ib(factory=list) + + def get_files(self, filename): + rv = [] + for p in self._paths: + doc_path = os.path.join(p, filename) + if os.path.exists(doc_path): + rv.append(doc_path) + return rv + + def add(self, path): + """ + Projects that make use of Firefox's taskgraph can extend it with + their own task kinds by registering additional paths for documentation. + documentation_paths.add() needs to be called by the project's Taskgraph + registration function. See taskgraph.config. + """ + self._paths.append(path) + + +documentation_paths = DocPaths() +documentation_paths.add(doc_base_path) + + +def verify_docs(filename, identifiers, appearing_as): + """ + Look for identifiers of the type appearing_as in the files + returned by documentation_paths.get_files(). Firefox will have + a single file in a list, but projects such as Thunderbird can have + documentation in another location and may return multiple files. + """ + # We ignore identifiers starting with '_' for the sake of tests. + # Strings starting with "_" are ignored for doc verification + # hence they can be used for faking test values + doc_files = documentation_paths.get_files(filename) + doctext = "".join([open(d).read() for d in doc_files]) + + if appearing_as == "inline-literal": + expression_list = [ + "``" + identifier + "``" + for identifier in identifiers + if not identifier.startswith("_") + ] + elif appearing_as == "heading": + expression_list = [ + "\n" + identifier + "\n(?:(?:(?:-+\n)+)|(?:(?:.+\n)+))" + for identifier in identifiers + if not identifier.startswith("_") + ] + else: + raise Exception("appearing_as = `{}` not defined".format(appearing_as)) + + for expression, identifier in zip(expression_list, identifiers): + match_group = re.search(expression, doctext) + if not match_group: + raise Exception( + "{}: `{}` missing from doc file: `{}`".format( + appearing_as, identifier, filename + ) + ) + + +@verifications.add("full_task_graph") +def verify_task_graph_symbol(task, taskgraph, scratch_pad, graph_config, parameters): + """ + This function verifies that tuple + (collection.keys(), machine.platform, groupSymbol, symbol) is unique + for a target task graph. + """ + if task is None: + return + task_dict = task.task + if "extra" in task_dict: + extra = task_dict["extra"] + if "treeherder" in extra: + treeherder = extra["treeherder"] + + collection_keys = tuple(sorted(treeherder.get("collection", {}).keys())) + if len(collection_keys) != 1: + raise Exception( + "Task {} can't be in multiple treeherder collections " + "(the part of the platform after `/`): {}".format( + task.label, collection_keys + ) + ) + platform = treeherder.get("machine", {}).get("platform") + group_symbol = treeherder.get("groupSymbol") + symbol = treeherder.get("symbol") + + key = (platform, collection_keys[0], group_symbol, symbol) + if key in scratch_pad: + raise Exception( + "Duplicate treeherder platform and symbol in tasks " + "`{}`and `{}`: {} {}".format( + task.label, + scratch_pad[key], + "{}/{}".format(platform, collection_keys[0]), + join_symbol(group_symbol, symbol), + ) + ) + else: + scratch_pad[key] = task.label + + +@verifications.add("full_task_graph") +def verify_trust_domain_v2_routes( + task, taskgraph, scratch_pad, graph_config, parameters +): + """ + This function ensures that any two tasks have distinct ``index.{trust-domain}.v2`` routes. + """ + if task is None: + return + route_prefix = "index.{}.v2".format(graph_config["trust-domain"]) + task_dict = task.task + routes = task_dict.get("routes", []) + + for route in routes: + if route.startswith(route_prefix): + if route in scratch_pad: + raise Exception( + "conflict between {}:{} for route: {}".format( + task.label, scratch_pad[route], route + ) + ) + else: + scratch_pad[route] = task.label + + +@verifications.add("full_task_graph") +def verify_routes_notification_filters( + task, taskgraph, scratch_pad, graph_config, parameters +): + """ + This function ensures that only understood filters for notifications are + specified. + + See: https://firefox-ci-tc.services.mozilla.com/docs/manual/using/task-notifications + """ + if task is None: + return + route_prefix = "notify." + valid_filters = ("on-any", "on-completed", "on-failed", "on-exception") + task_dict = task.task + routes = task_dict.get("routes", []) + + for route in routes: + if route.startswith(route_prefix): + # Get the filter of the route + route_filter = route.split(".")[-1] + if route_filter not in valid_filters: + raise Exception( + "{} has invalid notification filter ({})".format( + task.label, route_filter + ) + ) + + +@verifications.add("full_task_graph") +def verify_dependency_tiers(task, taskgraph, scratch_pad, graph_config, parameters): + tiers = scratch_pad + if task is not None: + tiers[task.label] = ( + task.task.get("extra", {}).get("treeherder", {}).get("tier", six.MAXSIZE) + ) + else: + + def printable_tier(tier): + if tier == six.MAXSIZE: + return "unknown" + return tier + + for task in six.itervalues(taskgraph.tasks): + tier = tiers[task.label] + for d in six.itervalues(task.dependencies): + if taskgraph[d].task.get("workerType") == "always-optimized": + continue + if "dummy" in taskgraph[d].kind: + continue + if tier < tiers[d]: + raise Exception( + "{} (tier {}) cannot depend on {} (tier {})".format( + task.label, + printable_tier(tier), + d, + printable_tier(tiers[d]), + ) + ) + + +@verifications.add("full_task_graph") +def verify_required_signoffs(task, taskgraph, scratch_pad, graph_config, parameters): + """ + Task with required signoffs can't be dependencies of tasks with less + required signoffs. + """ + all_required_signoffs = scratch_pad + if task is not None: + all_required_signoffs[task.label] = set( + task.attributes.get("required_signoffs", []) + ) + else: + + def printable_signoff(signoffs): + if len(signoffs) == 1: + return "required signoff {}".format(*signoffs) + elif signoffs: + return "required signoffs {}".format(", ".join(signoffs)) + else: + return "no required signoffs" + + for task in six.itervalues(taskgraph.tasks): + required_signoffs = all_required_signoffs[task.label] + for d in six.itervalues(task.dependencies): + if required_signoffs < all_required_signoffs[d]: + raise Exception( + "{} ({}) cannot depend on {} ({})".format( + task.label, + printable_signoff(required_signoffs), + d, + printable_signoff(all_required_signoffs[d]), + ) + ) + + +@verifications.add("full_task_graph") +def verify_toolchain_alias(task, taskgraph, scratch_pad, graph_config, parameters): + """ + This function verifies that toolchain aliases are not reused. + """ + if task is None: + return + attributes = task.attributes + if "toolchain-alias" in attributes: + key = attributes["toolchain-alias"] + if key in scratch_pad: + raise Exception( + "Duplicate toolchain-alias in tasks " + "`{}`and `{}`: {}".format( + task.label, + scratch_pad[key], + key, + ) + ) + else: + scratch_pad[key] = task.label + + +@verifications.add("optimized_task_graph") +def verify_always_optimized(task, taskgraph, scratch_pad, graph_config, parameters): + """ + This function ensures that always-optimized tasks have been optimized. + """ + if task is None: + return + if task.task.get("workerType") == "always-optimized": + raise Exception("Could not optimize the task {!r}".format(task.label)) + + +@verifications.add("full_task_graph", run_on_projects=RELEASE_PROJECTS) +def verify_shippable_no_sccache(task, taskgraph, scratch_pad, graph_config, parameters): + if task and task.attributes.get("shippable"): + if task.task.get("payload", {}).get("env", {}).get("USE_SCCACHE"): + raise Exception("Shippable job {} cannot use sccache".format(task.label)) + + +@verifications.add("full_task_graph") +def verify_test_packaging(task, taskgraph, scratch_pad, graph_config, parameters): + if task is None: + # In certain cases there are valid reasons for tests to be missing, + # don't error out when that happens. + missing_tests_allowed = any( + ( + # user specified `--target-kind` + parameters.get("target-kind") is not None, + # manifest scheduling is enabled + parameters["test_manifest_loader"] != "default", + ) + ) + + exceptions = [] + for task in six.itervalues(taskgraph.tasks): + if task.kind == "build" and not task.attributes.get( + "skip-verify-test-packaging" + ): + build_env = task.task.get("payload", {}).get("env", {}) + package_tests = build_env.get("MOZ_AUTOMATION_PACKAGE_TESTS") + shippable = task.attributes.get("shippable", False) + build_has_tests = scratch_pad.get(task.label) + + if package_tests != "1": + # Shippable builds should always package tests. + if shippable: + exceptions.append( + "Build job {} is shippable and does not specify " + "MOZ_AUTOMATION_PACKAGE_TESTS=1 in the " + "environment.".format(task.label) + ) + + # Build tasks in the scratch pad have tests dependent on + # them, so we need to package tests during build. + if build_has_tests: + exceptions.append( + "Build job {} has tests dependent on it and does not specify " + "MOZ_AUTOMATION_PACKAGE_TESTS=1 in the environment".format( + task.label + ) + ) + else: + # Build tasks that aren't in the scratch pad have no + # dependent tests, so we shouldn't package tests. + # With the caveat that we expect shippable jobs to always + # produce tests. + if not build_has_tests and not shippable: + # If we have not generated all task kinds, we can't verify that + # there are no dependent tests. + if not missing_tests_allowed: + exceptions.append( + "Build job {} has no tests, but specifies " + "MOZ_AUTOMATION_PACKAGE_TESTS={} in the environment. " + "Unset MOZ_AUTOMATION_PACKAGE_TESTS in the task definition " + "to fix.".format(task.label, package_tests) + ) + if exceptions: + raise Exception("\n".join(exceptions)) + return + if task.kind == "test": + build_task = taskgraph[task.dependencies["build"]] + scratch_pad[build_task.label] = 1 + + +@verifications.add("full_task_graph") +def verify_run_known_projects(task, taskgraph, scratch_pad, graph_config, parameters): + """Validates the inputs in run-on-projects. + + We should never let 'try' (or 'try-comm-central') be in run-on-projects even though it + is valid because it is not considered for try pushes. While here we also validate for + other unknown projects or typos. + """ + if task and task.attributes.get("run_on_projects"): + projects = set(task.attributes["run_on_projects"]) + if {"try", "try-comm-central"} & set(projects): + raise Exception( + "In task {}: using try in run-on-projects is invalid; use try " + "selectors to select this task on try".format(task.label) + ) + # try isn't valid, but by the time we get here its not an available project anyway. + valid_projects = ALL_PROJECTS | set(RUN_ON_PROJECT_ALIASES.keys()) + invalid_projects = projects - valid_projects + if invalid_projects: + raise Exception( + "Task '{}' has an invalid run-on-projects value: " + "{}".format(task.label, invalid_projects) + ) + + +@verifications.add("full_task_graph") +def verify_local_toolchains(task, taskgraph, scratch_pad, graph_config, parameters): + """ + Toolchains that are used for local development need to be built on a + level-3 branch to installable via `mach bootstrap`. We ensure here that all + such tasks run on at least trunk projects, even if they aren't pulled in as + a dependency of other tasks in the graph. + + There is code in `mach artifact toolchain` that verifies that anything + installed via `mach bootstrap` has the attribute set. + """ + if task and task.attributes.get("local-toolchain"): + run_on_projects = task.attributes.get("run_on_projects", []) + if not any(alias in run_on_projects for alias in ["all", "trunk"]): + raise Exception( + "Toolchain {} used for local development is not built on trunk. {}".format( + task.label, run_on_projects + ) + ) diff --git a/taskcluster/taskgraph/util/workertypes.py b/taskcluster/taskgraph/util/workertypes.py new file mode 100644 index 0000000000..6a9f571a69 --- /dev/null +++ b/taskcluster/taskgraph/util/workertypes.py @@ -0,0 +1,95 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +from mozbuild.util import memoize + +from .keyed_by import evaluate_keyed_by +from .attributes import keymatch + +WORKER_TYPES = { + "gce/gecko-1-b-linux": ("docker-worker", "linux"), + "gce/gecko-2-b-linux": ("docker-worker", "linux"), + "gce/gecko-3-b-linux": ("docker-worker", "linux"), + "invalid/invalid": ("invalid", None), + "invalid/always-optimized": ("always-optimized", None), + "scriptworker-prov-v1/signing-linux-v1": ("scriptworker-signing", None), + "scriptworker-k8s/gecko-3-shipit": ("shipit", None), + "scriptworker-k8s/gecko-1-shipit": ("shipit", None), +} + + +@memoize +def _get(graph_config, alias, level, release_level): + """Get the configuration for this worker_type alias: {provisioner, + worker-type, implementation, os}""" + level = str(level) + + # handle the legacy (non-alias) format + if "/" in alias: + alias = alias.format(level=level) + provisioner, worker_type = alias.split("/", 1) + try: + implementation, os = WORKER_TYPES[alias] + return { + "provisioner": provisioner, + "worker-type": worker_type, + "implementation": implementation, + "os": os, + } + except KeyError: + return { + "provisioner": provisioner, + "worker-type": worker_type, + } + + matches = keymatch(graph_config["workers"]["aliases"], alias) + if len(matches) > 1: + raise KeyError("Multiple matches for worker-type alias " + alias) + elif not matches: + raise KeyError("No matches for worker-type alias " + alias) + worker_config = matches[0].copy() + + worker_config["provisioner"] = evaluate_keyed_by( + worker_config["provisioner"], + "worker-type alias {} field provisioner".format(alias), + {"level": level}, + ).format( + **{ + "trust-domain": graph_config["trust-domain"], + "level": level, + "alias": alias, + } + ) + worker_config["worker-type"] = evaluate_keyed_by( + worker_config["worker-type"], + "worker-type alias {} field worker-type".format(alias), + {"level": level, "release-level": release_level}, + ).format( + **{ + "trust-domain": graph_config["trust-domain"], + "level": level, + "alias": alias, + } + ) + + return worker_config + + +def worker_type_implementation(graph_config, worker_type): + """Get the worker implementation and OS for the given workerType, where the + OS represents the host system, not the target OS, in the case of + cross-compiles.""" + worker_config = _get(graph_config, worker_type, "1", "staging") + return worker_config["implementation"], worker_config.get("os") + + +def get_worker_type(graph_config, worker_type, level, release_level): + """ + Get the worker type provisioner and worker-type, optionally evaluating + aliases from the graph config. + """ + worker_config = _get(graph_config, worker_type, level, release_level) + return worker_config["provisioner"], worker_config["worker-type"] diff --git a/taskcluster/taskgraph/util/yaml.py b/taskcluster/taskgraph/util/yaml.py new file mode 100644 index 0000000000..9f1015c541 --- /dev/null +++ b/taskcluster/taskgraph/util/yaml.py @@ -0,0 +1,37 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import os + +from yaml.loader import SafeLoader + + +class UnicodeLoader(SafeLoader): + def construct_yaml_str(self, node): + return self.construct_scalar(node) + + +UnicodeLoader.add_constructor("tag:yaml.org,2002:str", UnicodeLoader.construct_yaml_str) + + +def load_stream(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = UnicodeLoader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + + +def load_yaml(*parts): + """Convenience function to load a YAML file in the given path. This is + useful for loading kind configuration files from the kind path.""" + filename = os.path.join(*parts) + with open(filename, "rb") as f: + return load_stream(f) |