summaryrefslogtreecommitdiffstats
path: root/taskcluster/gecko_taskgraph/transforms/test/chunk.py
diff options
context:
space:
mode:
Diffstat (limited to 'taskcluster/gecko_taskgraph/transforms/test/chunk.py')
-rw-r--r--taskcluster/gecko_taskgraph/transforms/test/chunk.py262
1 files changed, 262 insertions, 0 deletions
diff --git a/taskcluster/gecko_taskgraph/transforms/test/chunk.py b/taskcluster/gecko_taskgraph/transforms/test/chunk.py
new file mode 100644
index 0000000000..f6442e3755
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/transforms/test/chunk.py
@@ -0,0 +1,262 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import json
+
+import taskgraph
+from taskgraph.transforms.base import TransformSequence
+from taskgraph.util.attributes import keymatch
+from taskgraph.util.treeherder import join_symbol, split_symbol
+
+from gecko_taskgraph.util.attributes import is_try
+from gecko_taskgraph.util.chunking import (
+ DefaultLoader,
+ chunk_manifests,
+ get_manifest_loader,
+ get_runtimes,
+ guess_mozinfo_from_task,
+)
+from gecko_taskgraph.util.copy_task import copy_task
+from gecko_taskgraph.util.perfile import perfile_number_of_chunks
+
+DYNAMIC_CHUNK_DURATION = 20 * 60 # seconds
+"""The approximate time each test chunk should take to run."""
+
+
+DYNAMIC_CHUNK_MULTIPLIER = {
+ # Desktop xpcshell tests run in parallel. Reduce the total runtime to
+ # compensate.
+ "^(?!android).*-xpcshell.*": 0.2,
+}
+"""A multiplication factor to tweak the total duration per platform / suite."""
+
+
+transforms = TransformSequence()
+
+
+@transforms.add
+def set_test_verify_chunks(config, tasks):
+ """Set the number of chunks we use for test-verify."""
+ for task in tasks:
+ if any(task["suite"].startswith(s) for s in ("test-verify", "test-coverage")):
+ env = config.params.get("try_task_config", {}) or {}
+ env = env.get("templates", {}).get("env", {})
+ task["chunks"] = perfile_number_of_chunks(
+ is_try(config.params),
+ env.get("MOZHARNESS_TEST_PATHS", ""),
+ config.params.get("head_repository", ""),
+ config.params.get("head_rev", ""),
+ task["test-name"],
+ )
+
+ # limit the number of chunks we run for test-verify mode because
+ # test-verify is comprehensive and takes a lot of time, if we have
+ # >30 tests changed, this is probably an import of external tests,
+ # or a patch renaming/moving files in bulk
+ maximum_number_verify_chunks = 3
+ if task["chunks"] > maximum_number_verify_chunks:
+ task["chunks"] = maximum_number_verify_chunks
+
+ yield task
+
+
+@transforms.add
+def set_test_manifests(config, tasks):
+ """Determine the set of test manifests that should run in this task."""
+
+ for task in tasks:
+ # When a task explicitly requests no 'test_manifest_loader', test
+ # resolving will happen at test runtime rather than in the taskgraph.
+ if "test-manifest-loader" in task and task["test-manifest-loader"] is None:
+ yield task
+ continue
+
+ # Set 'tests_grouped' to "1", so we can differentiate between suites that are
+ # chunked at the test runtime and those that are chunked in the taskgraph.
+ task.setdefault("tags", {})["tests_grouped"] = "1"
+
+ if taskgraph.fast:
+ # We want to avoid evaluating manifests when taskgraph.fast is set. But
+ # manifests are required for dynamic chunking. Just set the number of
+ # chunks to one in this case.
+ if task["chunks"] == "dynamic":
+ task["chunks"] = 1
+ yield task
+ continue
+
+ manifests = task.get("test-manifests")
+ if manifests:
+ if isinstance(manifests, list):
+ task["test-manifests"] = {"active": manifests, "skipped": []}
+ yield task
+ continue
+
+ mozinfo = guess_mozinfo_from_task(
+ task, config.params.get("head_repository", "")
+ )
+
+ loader_name = task.pop(
+ "test-manifest-loader", config.params["test_manifest_loader"]
+ )
+ loader = get_manifest_loader(loader_name, config.params)
+
+ task["test-manifests"] = loader.get_manifests(
+ task["suite"],
+ frozenset(mozinfo.items()),
+ )
+
+ # When scheduling with test paths, we often find manifests scheduled but all tests
+ # are skipped on a given config. This will remove the task from the task set if
+ # no manifests have active tests for the given task/config
+ mh_test_paths = {}
+ if "MOZHARNESS_TEST_PATHS" in config.params.get("try_task_config", {}).get(
+ "env", {}
+ ):
+ mh_test_paths = json.loads(
+ config.params["try_task_config"]["env"]["MOZHARNESS_TEST_PATHS"]
+ )
+
+ if task["attributes"]["unittest_suite"] in mh_test_paths.keys():
+ input_paths = mh_test_paths[task["attributes"]["unittest_suite"]]
+ remaining_manifests = []
+
+ # if we have web-platform tests incoming, just yield task
+ for m in input_paths:
+ if m.startswith("testing/web-platform/tests/"):
+ if not isinstance(loader, DefaultLoader):
+ task["chunks"] = "dynamic"
+ yield task
+ break
+
+ # input paths can exist in other directories (i.e. [../../dir/test.js])
+ # we need to look for all [active] manifests that include tests in the path
+ for m in input_paths:
+ if [tm for tm in task["test-manifests"]["active"] if tm.startswith(m)]:
+ remaining_manifests.append(m)
+
+ # look in the 'other' manifests
+ for m in input_paths:
+ man = m
+ for tm in task["test-manifests"]["other_dirs"]:
+ matched_dirs = [
+ dp
+ for dp in task["test-manifests"]["other_dirs"].get(tm)
+ if dp.startswith(man)
+ ]
+ if matched_dirs:
+ if tm not in task["test-manifests"]["active"]:
+ continue
+ if m not in remaining_manifests:
+ remaining_manifests.append(m)
+
+ if remaining_manifests == []:
+ continue
+
+ # The default loader loads all manifests. If we use a non-default
+ # loader, we'll only run some subset of manifests and the hardcoded
+ # chunk numbers will no longer be valid. Dynamic chunking should yield
+ # better results.
+ if not isinstance(loader, DefaultLoader):
+ task["chunks"] = "dynamic"
+
+ yield task
+
+
+@transforms.add
+def resolve_dynamic_chunks(config, tasks):
+ """Determine how many chunks are needed to handle the given set of manifests."""
+
+ for task in tasks:
+ if task["chunks"] != "dynamic":
+ yield task
+ continue
+
+ if not task.get("test-manifests"):
+ raise Exception(
+ "{} must define 'test-manifests' to use dynamic chunking!".format(
+ task["test-name"]
+ )
+ )
+
+ runtimes = {
+ m: r
+ for m, r in get_runtimes(task["test-platform"], task["suite"]).items()
+ if m in task["test-manifests"]["active"]
+ }
+
+ # Truncate runtimes that are above the desired chunk duration. They
+ # will be assigned to a chunk on their own and the excess duration
+ # shouldn't cause additional chunks to be needed.
+ times = [min(DYNAMIC_CHUNK_DURATION, r) for r in runtimes.values()]
+ avg = round(sum(times) / len(times), 2) if times else 0
+ total = sum(times)
+
+ # If there are manifests missing from the runtimes data, fill them in
+ # with the average of all present manifests.
+ missing = [m for m in task["test-manifests"]["active"] if m not in runtimes]
+ total += avg * len(missing)
+
+ # Apply any chunk multipliers if found.
+ key = "{}-{}".format(task["test-platform"], task["test-name"])
+ matches = keymatch(DYNAMIC_CHUNK_MULTIPLIER, key)
+ if len(matches) > 1:
+ raise Exception(
+ "Multiple matching values for {} found while "
+ "determining dynamic chunk multiplier!".format(key)
+ )
+ elif matches:
+ total = total * matches[0]
+
+ chunks = int(round(total / DYNAMIC_CHUNK_DURATION))
+
+ # Make sure we never exceed the number of manifests, nor have a chunk
+ # length of 0.
+ task["chunks"] = min(chunks, len(task["test-manifests"]["active"])) or 1
+ yield task
+
+
+@transforms.add
+def split_chunks(config, tasks):
+ """Based on the 'chunks' key, split tests up into chunks by duplicating
+ them and assigning 'this-chunk' appropriately and updating the treeherder
+ symbol.
+ """
+
+ for task in tasks:
+ # If test-manifests are set, chunk them ahead of time to avoid running
+ # the algorithm more than once.
+ chunked_manifests = None
+ if "test-manifests" in task:
+ manifests = task["test-manifests"]
+ chunked_manifests = chunk_manifests(
+ task["suite"],
+ task["test-platform"],
+ task["chunks"],
+ manifests["active"],
+ )
+
+ # Add all skipped manifests to the first chunk of backstop pushes
+ # so they still show up in the logs. They won't impact runtime much
+ # and this way tools like ActiveData are still aware that they
+ # exist.
+ if config.params["backstop"] and manifests["active"]:
+ chunked_manifests[0].extend(manifests["skipped"])
+
+ for i in range(task["chunks"]):
+ this_chunk = i + 1
+
+ # copy the test and update with the chunk number
+ chunked = copy_task(task)
+ chunked["this-chunk"] = this_chunk
+
+ if chunked_manifests is not None:
+ chunked["test-manifests"] = sorted(chunked_manifests[i])
+
+ group, symbol = split_symbol(chunked["treeherder-symbol"])
+ if task["chunks"] > 1 or not symbol:
+ # add the chunk number to the TH symbol
+ symbol += str(this_chunk)
+ chunked["treeherder-symbol"] = join_symbol(group, symbol)
+
+ yield chunked