# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import json import taskgraph from taskgraph.transforms.base import TransformSequence from taskgraph.util.attributes import keymatch from taskgraph.util.treeherder import join_symbol, split_symbol from gecko_taskgraph.util.attributes import is_try from gecko_taskgraph.util.chunking import ( DefaultLoader, chunk_manifests, get_manifest_loader, get_runtimes, guess_mozinfo_from_task, ) from gecko_taskgraph.util.copy_task import copy_task from gecko_taskgraph.util.perfile import perfile_number_of_chunks DYNAMIC_CHUNK_DURATION = 20 * 60 # seconds """The approximate time each test chunk should take to run.""" DYNAMIC_CHUNK_MULTIPLIER = { # Desktop xpcshell tests run in parallel. Reduce the total runtime to # compensate. "^(?!android).*-xpcshell.*": 0.2, } """A multiplication factor to tweak the total duration per platform / suite.""" transforms = TransformSequence() @transforms.add def set_test_verify_chunks(config, tasks): """Set the number of chunks we use for test-verify.""" for task in tasks: if any(task["suite"].startswith(s) for s in ("test-verify", "test-coverage")): env = config.params.get("try_task_config", {}) or {} env = env.get("templates", {}).get("env", {}) task["chunks"] = perfile_number_of_chunks( is_try(config.params), env.get("MOZHARNESS_TEST_PATHS", ""), config.params.get("head_repository", ""), config.params.get("head_rev", ""), task["test-name"], ) # limit the number of chunks we run for test-verify mode because # test-verify is comprehensive and takes a lot of time, if we have # >30 tests changed, this is probably an import of external tests, # or a patch renaming/moving files in bulk maximum_number_verify_chunks = 3 if task["chunks"] > maximum_number_verify_chunks: task["chunks"] = maximum_number_verify_chunks yield task @transforms.add def set_test_manifests(config, tasks): """Determine the set of test manifests that should run in this task.""" for task in tasks: # When a task explicitly requests no 'test_manifest_loader', test # resolving will happen at test runtime rather than in the taskgraph. if "test-manifest-loader" in task and task["test-manifest-loader"] is None: yield task continue # Set 'tests_grouped' to "1", so we can differentiate between suites that are # chunked at the test runtime and those that are chunked in the taskgraph. task.setdefault("tags", {})["tests_grouped"] = "1" if taskgraph.fast: # We want to avoid evaluating manifests when taskgraph.fast is set. But # manifests are required for dynamic chunking. Just set the number of # chunks to one in this case. if task["chunks"] == "dynamic": task["chunks"] = 1 yield task continue manifests = task.get("test-manifests") if manifests: if isinstance(manifests, list): task["test-manifests"] = {"active": manifests, "skipped": []} yield task continue mozinfo = guess_mozinfo_from_task( task, config.params.get("head_repository", "") ) loader_name = task.pop( "test-manifest-loader", config.params["test_manifest_loader"] ) loader = get_manifest_loader(loader_name, config.params) task["test-manifests"] = loader.get_manifests( task["suite"], frozenset(mozinfo.items()), ) # When scheduling with test paths, we often find manifests scheduled but all tests # are skipped on a given config. This will remove the task from the task set if # no manifests have active tests for the given task/config mh_test_paths = {} if "MOZHARNESS_TEST_PATHS" in config.params.get("try_task_config", {}).get( "env", {} ): mh_test_paths = json.loads( config.params["try_task_config"]["env"]["MOZHARNESS_TEST_PATHS"] ) if task["attributes"]["unittest_suite"] in mh_test_paths.keys(): input_paths = mh_test_paths[task["attributes"]["unittest_suite"]] remaining_manifests = [] # if we have web-platform tests incoming, just yield task for m in input_paths: if m.startswith("testing/web-platform/tests/"): if not isinstance(loader, DefaultLoader): task["chunks"] = "dynamic" yield task break # input paths can exist in other directories (i.e. [../../dir/test.js]) # we need to look for all [active] manifests that include tests in the path for m in input_paths: if [tm for tm in task["test-manifests"]["active"] if tm.startswith(m)]: remaining_manifests.append(m) # look in the 'other' manifests for m in input_paths: man = m for tm in task["test-manifests"]["other_dirs"]: matched_dirs = [ dp for dp in task["test-manifests"]["other_dirs"].get(tm) if dp.startswith(man) ] if matched_dirs: if tm not in task["test-manifests"]["active"]: continue if m not in remaining_manifests: remaining_manifests.append(m) if remaining_manifests == []: continue # The default loader loads all manifests. If we use a non-default # loader, we'll only run some subset of manifests and the hardcoded # chunk numbers will no longer be valid. Dynamic chunking should yield # better results. if not isinstance(loader, DefaultLoader): task["chunks"] = "dynamic" yield task @transforms.add def resolve_dynamic_chunks(config, tasks): """Determine how many chunks are needed to handle the given set of manifests.""" for task in tasks: if task["chunks"] != "dynamic": yield task continue if not task.get("test-manifests"): raise Exception( "{} must define 'test-manifests' to use dynamic chunking!".format( task["test-name"] ) ) runtimes = { m: r for m, r in get_runtimes(task["test-platform"], task["suite"]).items() if m in task["test-manifests"]["active"] } # Truncate runtimes that are above the desired chunk duration. They # will be assigned to a chunk on their own and the excess duration # shouldn't cause additional chunks to be needed. times = [min(DYNAMIC_CHUNK_DURATION, r) for r in runtimes.values()] avg = round(sum(times) / len(times), 2) if times else 0 total = sum(times) # If there are manifests missing from the runtimes data, fill them in # with the average of all present manifests. missing = [m for m in task["test-manifests"]["active"] if m not in runtimes] total += avg * len(missing) # Apply any chunk multipliers if found. key = "{}-{}".format(task["test-platform"], task["test-name"]) matches = keymatch(DYNAMIC_CHUNK_MULTIPLIER, key) if len(matches) > 1: raise Exception( "Multiple matching values for {} found while " "determining dynamic chunk multiplier!".format(key) ) elif matches: total = total * matches[0] chunks = int(round(total / DYNAMIC_CHUNK_DURATION)) # Make sure we never exceed the number of manifests, nor have a chunk # length of 0. task["chunks"] = min(chunks, len(task["test-manifests"]["active"])) or 1 yield task @transforms.add def split_chunks(config, tasks): """Based on the 'chunks' key, split tests up into chunks by duplicating them and assigning 'this-chunk' appropriately and updating the treeherder symbol. """ for task in tasks: # If test-manifests are set, chunk them ahead of time to avoid running # the algorithm more than once. chunked_manifests = None if "test-manifests" in task: manifests = task["test-manifests"] chunked_manifests = chunk_manifests( task["suite"], task["test-platform"], task["chunks"], manifests["active"], ) # Add all skipped manifests to the first chunk of backstop pushes # so they still show up in the logs. They won't impact runtime much # and this way tools like ActiveData are still aware that they # exist. if config.params["backstop"] and manifests["active"]: chunked_manifests[0].extend(manifests["skipped"]) for i in range(task["chunks"]): this_chunk = i + 1 # copy the test and update with the chunk number chunked = copy_task(task) chunked["this-chunk"] = this_chunk if chunked_manifests is not None: chunked["test-manifests"] = sorted(chunked_manifests[i]) group, symbol = split_symbol(chunked["treeherder-symbol"]) if task["chunks"] > 1 or not symbol: # add the chunk number to the TH symbol symbol += str(this_chunk) chunked["treeherder-symbol"] = join_symbol(group, symbol) yield chunked