summaryrefslogtreecommitdiffstats
path: root/taskcluster/taskgraph/util/chunking.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--taskcluster/taskgraph/util/chunking.py270
1 files changed, 270 insertions, 0 deletions
diff --git a/taskcluster/taskgraph/util/chunking.py b/taskcluster/taskgraph/util/chunking.py
new file mode 100644
index 0000000000..ecf8098f01
--- /dev/null
+++ b/taskcluster/taskgraph/util/chunking.py
@@ -0,0 +1,270 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+"""Utility functions to handle test chunking."""
+
+import json
+import logging
+import os
+from abc import ABCMeta, abstractmethod
+
+import six
+from manifestparser import TestManifest
+from manifestparser.filters import chunk_by_runtime
+from mozbuild.util import memoize
+from moztest.resolve import (
+ TEST_SUITES,
+ TestResolver,
+ TestManifestLoader,
+)
+
+from taskgraph import GECKO
+from taskgraph.util.bugbug import BugbugTimeoutException, push_schedules
+
+logger = logging.getLogger(__name__)
+here = os.path.abspath(os.path.dirname(__file__))
+resolver = TestResolver.from_environment(cwd=here, loader_cls=TestManifestLoader)
+
+
+def guess_mozinfo_from_task(task):
+ """Attempt to build a mozinfo dict from a task definition.
+
+ This won't be perfect and many values used in the manifests will be missing. But
+ it should cover most of the major ones and be "good enough" for chunking in the
+ taskgraph.
+
+ Args:
+ task (dict): A task definition.
+
+ Returns:
+ A dict that can be used as a mozinfo replacement.
+ """
+ info = {
+ "asan": "asan" in task["build-attributes"]["build_platform"],
+ "bits": 32 if "32" in task["build-attributes"]["build_platform"] else 64,
+ "ccov": "ccov" in task["build-attributes"]["build_platform"],
+ "debug": task["build-attributes"]["build_type"] == "debug",
+ "e10s": task["attributes"]["e10s"],
+ "fission": task["attributes"].get("unittest_variant") == "fission",
+ "headless": "-headless" in task["test-name"],
+ "tsan": "tsan" in task["build-attributes"]["build_platform"],
+ "webrender": task.get("webrender", False),
+ }
+ for platform in ("android", "linux", "mac", "win"):
+ if platform in task["build-attributes"]["build_platform"]:
+ info["os"] = platform
+ break
+ else:
+ raise ValueError(
+ "{} is not a known platform!".format(
+ task["build-attributes"]["build_platform"]
+ )
+ )
+
+ info["appname"] = "fennec" if info["os"] == "android" else "firefox"
+
+ # guess processor
+ if "aarch64" in task["build-attributes"]["build_platform"]:
+ info["processor"] = "aarch64"
+ elif info["os"] == "android" and "arm" in task["test-platform"]:
+ info["processor"] = "arm"
+ elif info["bits"] == 32:
+ info["processor"] = "x86"
+ else:
+ info["processor"] = "x86_64"
+
+ # guess toolkit
+ if info["os"] == "android":
+ info["toolkit"] = "android"
+ elif info["os"] == "win":
+ info["toolkit"] = "windows"
+ elif info["os"] == "mac":
+ info["toolkit"] = "cocoa"
+ else:
+ info["toolkit"] = "gtk"
+
+ return info
+
+
+@memoize
+def get_runtimes(platform, suite_name):
+ if not suite_name or not platform:
+ raise TypeError("suite_name and platform cannot be empty.")
+
+ base = os.path.join(GECKO, "testing", "runtimes", "manifest-runtimes-{}.json")
+ for key in ("android", "windows"):
+ if key in platform:
+ path = base.format(key)
+ break
+ else:
+ path = base.format("unix")
+
+ if not os.path.exists(path):
+ raise IOError("manifest runtime file at {} not found.".format(path))
+
+ with open(path, "r") as fh:
+ return json.load(fh)[suite_name]
+
+
+def chunk_manifests(suite, platform, chunks, manifests):
+ """Run the chunking algorithm.
+
+ Args:
+ platform (str): Platform used to find runtime info.
+ chunks (int): Number of chunks to split manifests into.
+ manifests(list): Manifests to chunk.
+
+ Returns:
+ A list of length `chunks` where each item contains a list of manifests
+ that run in that chunk.
+ """
+ manifests = set(manifests)
+
+ if "web-platform-tests" not in suite:
+ runtimes = {
+ k: v for k, v in get_runtimes(platform, suite).items() if k in manifests
+ }
+ return [
+ c[1]
+ for c in chunk_by_runtime(None, chunks, runtimes).get_chunked_manifests(
+ manifests
+ )
+ ]
+
+ # Keep track of test paths for each chunk, and the runtime information.
+ chunked_manifests = [[] for _ in range(chunks)]
+
+ # Spread out the test manifests evenly across all chunks.
+ for index, key in enumerate(sorted(manifests)):
+ chunked_manifests[index % chunks].append(key)
+
+ # One last sort by the number of manifests. Chunk size should be more or less
+ # equal in size.
+ chunked_manifests.sort(key=lambda x: len(x))
+
+ # Return just the chunked test paths.
+ return chunked_manifests
+
+
+@six.add_metaclass(ABCMeta)
+class BaseManifestLoader(object):
+ def __init__(self, params):
+ self.params = params
+
+ @abstractmethod
+ def get_manifests(self, flavor, subsuite, mozinfo):
+ """Compute which manifests should run for the given flavor, subsuite and mozinfo.
+
+ This function returns skipped manifests separately so that more balanced
+ chunks can be achieved by only considering "active" manifests in the
+ chunking algorithm.
+
+ Args:
+ flavor (str): The suite to run. Values are defined by the 'build_flavor' key
+ in `moztest.resolve.TEST_SUITES`.
+ subsuite (str): The subsuite to run or 'undefined' to denote no subsuite.
+ mozinfo (frozenset): Set of data in the form of (<key>, <value>) used
+ for filtering.
+
+ Returns:
+ A tuple of two manifest lists. The first is the set of active manifests (will
+ run at least one test. The second is a list of skipped manifests (all tests are
+ skipped).
+ """
+ pass
+
+
+class DefaultLoader(BaseManifestLoader):
+ """Load manifests using metadata from the TestResolver."""
+
+ @memoize
+ def get_tests(self, suite):
+ suite_definition = TEST_SUITES[suite]
+ return list(
+ resolver.resolve_tests(
+ flavor=suite_definition["build_flavor"],
+ subsuite=suite_definition.get("kwargs", {}).get(
+ "subsuite", "undefined"
+ ),
+ )
+ )
+
+ @memoize
+ def get_manifests(self, suite, mozinfo):
+ mozinfo = dict(mozinfo)
+ # Compute all tests for the given suite/subsuite.
+ tests = self.get_tests(suite)
+
+ if "web-platform-tests" in suite:
+ manifests = set()
+ for t in tests:
+ manifests.add(t["manifest"])
+ return {"active": list(manifests), "skipped": []}
+
+ manifests = set(chunk_by_runtime.get_manifest(t) for t in tests)
+
+ # Compute the active tests.
+ m = TestManifest()
+ m.tests = tests
+ tests = m.active_tests(disabled=False, exists=False, **mozinfo)
+ active = set(chunk_by_runtime.get_manifest(t) for t in tests)
+ skipped = manifests - active
+ return {"active": list(active), "skipped": list(skipped)}
+
+
+class BugbugLoader(DefaultLoader):
+ """Load manifests using metadata from the TestResolver, and then
+ filter them based on a query to bugbug."""
+
+ CONFIDENCE_THRESHOLD = 0.5
+
+ def __init__(self, *args, **kwargs):
+ super(BugbugLoader, self).__init__(*args, **kwargs)
+ self.timedout = False
+
+ @memoize
+ def get_manifests(self, suite, mozinfo):
+ manifests = super(BugbugLoader, self).get_manifests(suite, mozinfo)
+
+ # Don't prune any manifests if we're on a backstop push or there was a timeout.
+ if self.params["backstop"] or self.timedout:
+ return manifests
+
+ try:
+ data = push_schedules(self.params["project"], self.params["head_rev"])
+ except BugbugTimeoutException:
+ logger.warning("Timed out waiting for bugbug, loading all test manifests.")
+ self.timedout = True
+ return self.get_manifests(suite, mozinfo)
+
+ bugbug_manifests = {
+ m
+ for m, c in data.get("groups", {}).items()
+ if c >= self.CONFIDENCE_THRESHOLD
+ }
+
+ manifests["active"] = list(set(manifests["active"]) & bugbug_manifests)
+ manifests["skipped"] = list(set(manifests["skipped"]) & bugbug_manifests)
+ return manifests
+
+
+manifest_loaders = {
+ "bugbug": BugbugLoader,
+ "default": DefaultLoader,
+}
+
+_loader_cache = {}
+
+
+def get_manifest_loader(name, params):
+ # Ensure we never create more than one instance of the same loader type for
+ # performance reasons.
+ if name in _loader_cache:
+ return _loader_cache[name]
+
+ loader = manifest_loaders[name](dict(params))
+ _loader_cache[name] = loader
+ return loader