diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /taskcluster/gecko_taskgraph/optimize | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'taskcluster/gecko_taskgraph/optimize')
-rw-r--r-- | taskcluster/gecko_taskgraph/optimize/__init__.py | 287 | ||||
-rw-r--r-- | taskcluster/gecko_taskgraph/optimize/backstop.py | 47 | ||||
-rw-r--r-- | taskcluster/gecko_taskgraph/optimize/bugbug.py | 321 | ||||
-rw-r--r-- | taskcluster/gecko_taskgraph/optimize/schema.py | 60 | ||||
-rw-r--r-- | taskcluster/gecko_taskgraph/optimize/strategies.py | 136 |
5 files changed, 851 insertions, 0 deletions
diff --git a/taskcluster/gecko_taskgraph/optimize/__init__.py b/taskcluster/gecko_taskgraph/optimize/__init__.py new file mode 100644 index 0000000000..c2d3fdf839 --- /dev/null +++ b/taskcluster/gecko_taskgraph/optimize/__init__.py @@ -0,0 +1,287 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" +The objective of optimization is to remove as many tasks from the graph as +possible, as efficiently as possible, thereby delivering useful results as +quickly as possible. For example, ideally if only a test script is modified in +a push, then the resulting graph contains only the corresponding test suite +task. + +See ``taskcluster/docs/optimization.rst`` for more information. +""" + +from taskgraph.optimize.base import Alias, All, Any, Not, register_strategy, registry +from taskgraph.util.python_path import import_sibling_modules + +# Use the gecko_taskgraph version of 'skip-unless-changed' for now. +registry.pop("skip-unless-changed", None) + +# Trigger registration in sibling modules. +import_sibling_modules() + + +def split_bugbug_arg(arg, substrategies): + """Split args for bugbug based strategies. + + Many bugbug based optimizations require passing an empty dict by reference + to communicate to downstream strategies. This function passes the provided + arg to the first (non bugbug) strategies and a shared empty dict to the + bugbug strategy and all substrategies after it. + """ + from gecko_taskgraph.optimize.bugbug import BugBugPushSchedules + + index = [ + i + for i, strategy in enumerate(substrategies) + if isinstance(strategy, BugBugPushSchedules) + ][0] + + return [arg] * index + [{}] * (len(substrategies) - index) + + +# Register composite strategies. +register_strategy("build", args=("skip-unless-schedules",))(Alias) +register_strategy("test", args=("skip-unless-schedules",))(Alias) +register_strategy("test-inclusive", args=("skip-unless-schedules",))(Alias) +register_strategy("test-verify", args=("skip-unless-schedules",))(Alias) +register_strategy("upload-symbols", args=("never",))(Alias) +register_strategy("reprocess-symbols", args=("never",))(Alias) + + +# Strategy overrides used to tweak the default strategies. These are referenced +# by the `optimize_strategies` parameter. + + +class project: + """Strategies that should be applied per-project.""" + + autoland = { + "test": Any( + # This `Any` strategy implements bi-modal behaviour. It allows different + # strategies on expanded pushes vs regular pushes. + # This first `All` handles "expanded" pushes. + All( + # There are three substrategies in this `All`, the first two act as barriers + # that help determine when to apply the third: + # 1. On backstop pushes, `skip-unless-backstop` returns False. Therefore + # the overall composite strategy is False and we don't optimize. + # 2. On regular pushes, `Not('skip-unless-expanded')` returns False. Therefore + # the overall composite strategy is False and we don't optimize. + # 3. On expanded pushes, the third strategy will determine whether or + # not to optimize each individual task. + # The barrier strategies. + "skip-unless-backstop", + Not("skip-unless-expanded"), + # The actual test strategy applied to "expanded" pushes. + Any( + "skip-unless-schedules", + "bugbug-reduced-manifests-fallback-last-10-pushes", + "platform-disperse", + split_args=split_bugbug_arg, + ), + ), + # This second `All` handles regular (aka not expanded or backstop) + # pushes. + All( + # There are two substrategies in this `All`, the first acts as a barrier + # that determines when to apply the second: + # 1. On expanded pushes (which includes backstops), `skip-unless-expanded` + # returns False. Therefore the overall composite strategy is False and we + # don't optimize. + # 2. On regular pushes, the second strategy will determine whether or + # not to optimize each individual task. + # The barrier strategy. + "skip-unless-expanded", + # The actual test strategy applied to regular pushes. + Any( + "skip-unless-schedules", + "bugbug-reduced-manifests-fallback-low", + "platform-disperse", + split_args=split_bugbug_arg, + ), + ), + ), + "build": All( + "skip-unless-expanded", + Any( + "skip-unless-schedules", + "bugbug-reduced-fallback", + split_args=split_bugbug_arg, + ), + ), + } + """Strategy overrides that apply to autoland.""" + + +class experimental: + """Experimental strategies either under development or used as benchmarks. + + These run as "shadow-schedulers" on each autoland push (tier 3) and/or can be used + with `./mach try auto`. E.g: + + ./mach try auto --strategy relevant_tests + """ + + bugbug_tasks_medium = { + "test": Any( + "skip-unless-schedules", "bugbug-tasks-medium", split_args=split_bugbug_arg + ), + } + """Doesn't limit platforms, medium confidence threshold.""" + + bugbug_tasks_high = { + "test": Any( + "skip-unless-schedules", "bugbug-tasks-high", split_args=split_bugbug_arg + ), + } + """Doesn't limit platforms, high confidence threshold.""" + + bugbug_debug_disperse = { + "test": Any( + "skip-unless-schedules", + "bugbug-low", + "platform-debug", + "platform-disperse", + split_args=split_bugbug_arg, + ), + } + """Restricts tests to debug platforms.""" + + bugbug_disperse_low = { + "test": Any( + "skip-unless-schedules", + "bugbug-low", + "platform-disperse", + split_args=split_bugbug_arg, + ), + } + """Disperse tests across platforms, low confidence threshold.""" + + bugbug_disperse_medium = { + "test": Any( + "skip-unless-schedules", + "bugbug-medium", + "platform-disperse", + split_args=split_bugbug_arg, + ), + } + """Disperse tests across platforms, medium confidence threshold.""" + + bugbug_disperse_reduced_medium = { + "test": Any( + "skip-unless-schedules", + "bugbug-reduced-manifests", + "platform-disperse", + split_args=split_bugbug_arg, + ), + } + """Disperse tests across platforms, medium confidence threshold with reduced tasks.""" + + bugbug_reduced_manifests_config_selection_low = { + "test": Any( + "skip-unless-schedules", + "bugbug-reduced-manifests-config-selection-low", + split_args=split_bugbug_arg, + ), + } + """Choose configs selected by bugbug, low confidence threshold with reduced tasks.""" + + bugbug_reduced_manifests_config_selection_medium = { + "test": Any( + "skip-unless-schedules", + "bugbug-reduced-manifests-config-selection", + split_args=split_bugbug_arg, + ), + } + """Choose configs selected by bugbug, medium confidence threshold with reduced tasks.""" + + bugbug_disperse_medium_no_unseen = { + "test": Any( + "skip-unless-schedules", + "bugbug-medium", + "platform-disperse-no-unseen", + split_args=split_bugbug_arg, + ), + } + """Disperse tests across platforms (no modified for unseen configurations), medium confidence + threshold.""" + + bugbug_disperse_medium_only_one = { + "test": Any( + "skip-unless-schedules", + "bugbug-medium", + "platform-disperse-only-one", + split_args=split_bugbug_arg, + ), + } + """Disperse tests across platforms (one platform per group), medium confidence threshold.""" + + bugbug_disperse_high = { + "test": Any( + "skip-unless-schedules", + "bugbug-high", + "platform-disperse", + split_args=split_bugbug_arg, + ), + } + """Disperse tests across platforms, high confidence threshold.""" + + bugbug_reduced = { + "test": Any( + "skip-unless-schedules", "bugbug-reduced", split_args=split_bugbug_arg + ), + } + """Use the reduced set of tasks (and no groups) chosen by bugbug.""" + + bugbug_reduced_high = { + "test": Any( + "skip-unless-schedules", "bugbug-reduced-high", split_args=split_bugbug_arg + ), + } + """Use the reduced set of tasks (and no groups) chosen by bugbug, high + confidence threshold.""" + + relevant_tests = { + "test": Any("skip-unless-schedules", "skip-unless-has-relevant-tests"), + } + """Runs task containing tests in the same directories as modified files.""" + + +class ExperimentalOverride: + """Overrides dictionaries that are stored in a container with new values. + + This can be used to modify all strategies in a collection the same way, + presumably with strategies affecting kinds of tasks tangential to the + current context. + + Args: + base (object): A container class supporting attribute access. + overrides (dict): Values to update any accessed dictionaries with. + """ + + def __init__(self, base, overrides): + self.base = base + self.overrides = overrides + + def __getattr__(self, name): + val = getattr(self.base, name).copy() + for name, strategy in self.overrides.items(): + if isinstance(strategy, str) and strategy.startswith("base:"): + strategy = val[strategy[len("base:") :]] + + val[name] = strategy + return val + + +tryselect = ExperimentalOverride( + experimental, + { + "build": Any( + "skip-unless-schedules", "bugbug-reduced", split_args=split_bugbug_arg + ), + "test-verify": "base:test", + "upload-symbols": Alias("always"), + "reprocess-symbols": Alias("always"), + }, +) diff --git a/taskcluster/gecko_taskgraph/optimize/backstop.py b/taskcluster/gecko_taskgraph/optimize/backstop.py new file mode 100644 index 0000000000..7b0c86222b --- /dev/null +++ b/taskcluster/gecko_taskgraph/optimize/backstop.py @@ -0,0 +1,47 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +from taskgraph.optimize.base import All, OptimizationStrategy, register_strategy + +from gecko_taskgraph.util.backstop import BACKSTOP_PUSH_INTERVAL + + +@register_strategy("skip-unless-backstop") +class SkipUnlessBackstop(OptimizationStrategy): + """Always removes tasks except on backstop pushes.""" + + def should_remove_task(self, task, params, _): + return not params["backstop"] + + +class SkipUnlessPushInterval(OptimizationStrategy): + """Always removes tasks except every N pushes. + + Args: + push_interval (int): Number of pushes + """ + + def __init__(self, push_interval, remove_on_projects=None): + self.push_interval = push_interval + + @property + def description(self): + return f"skip-unless-push-interval-{self.push_interval}" + + def should_remove_task(self, task, params, _): + # On every Nth push, want to run all tasks. + return int(params["pushlog_id"]) % self.push_interval != 0 + + +# Strategy to run tasks on "expanded" pushes, currently defined as pushes that +# are half the backstop interval. The 'All' composite strategy means that the +# "backstop" strategy will prevent "expanded" from applying on backstop pushes. +register_strategy( + "skip-unless-expanded", + args=( + "skip-unless-backstop", + SkipUnlessPushInterval(BACKSTOP_PUSH_INTERVAL / 2), + ), +)(All) diff --git a/taskcluster/gecko_taskgraph/optimize/bugbug.py b/taskcluster/gecko_taskgraph/optimize/bugbug.py new file mode 100644 index 0000000000..d8603560ef --- /dev/null +++ b/taskcluster/gecko_taskgraph/optimize/bugbug.py @@ -0,0 +1,321 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +from collections import defaultdict +from fnmatch import fnmatch + +from taskgraph.optimize.base import OptimizationStrategy, register_strategy, registry + +from gecko_taskgraph.util.bugbug import ( + CT_HIGH, + CT_LOW, + CT_MEDIUM, + BugbugTimeoutException, + push_schedules, +) +from gecko_taskgraph.util.hg import get_push_data + +FALLBACK = "skip-unless-has-relevant-tests" + + +def merge_bugbug_replies(data, new_data): + """Merge a bugbug reply (stored in the `new_data` argument) into another (stored + in the `data` argument). + """ + for key, value in new_data.items(): + if isinstance(value, dict): + if key not in data: + data[key] = {} + + if len(value) == 0: + continue + + dict_value = next(iter(value.values())) + if isinstance(dict_value, list): + for name, configs in value.items(): + if name not in data[key]: + data[key][name] = set() + + data[key][name].update(configs) + else: + for name, confidence in value.items(): + if name not in data[key] or data[key][name] < confidence: + data[key][name] = confidence + elif isinstance(value, list): + if key not in data: + data[key] = set() + + data[key].update(value) + + +@register_strategy("bugbug-low", args=(CT_LOW,)) +@register_strategy("bugbug-medium", args=(CT_MEDIUM,)) +@register_strategy("bugbug-high", args=(CT_HIGH,)) +@register_strategy("bugbug-tasks-medium", args=(CT_MEDIUM, True)) +@register_strategy("bugbug-tasks-high", args=(CT_HIGH, True)) +@register_strategy("bugbug-reduced", args=(CT_MEDIUM, True, True)) +@register_strategy("bugbug-reduced-fallback", args=(CT_MEDIUM, True, True, FALLBACK)) +@register_strategy("bugbug-reduced-high", args=(CT_HIGH, True, True)) +@register_strategy("bugbug-reduced-manifests", args=(CT_MEDIUM, False, True)) +@register_strategy( + "bugbug-reduced-manifests-config-selection-low", + args=(CT_LOW, False, True, None, 1, True), +) +@register_strategy( + "bugbug-reduced-manifests-config-selection", + args=(CT_MEDIUM, False, True, None, 1, True), +) +@register_strategy( + "bugbug-reduced-manifests-fallback-low", args=(CT_LOW, False, True, FALLBACK) +) +@register_strategy( + "bugbug-reduced-manifests-fallback", args=(CT_MEDIUM, False, True, FALLBACK) +) +@register_strategy( + "bugbug-reduced-manifests-fallback-last-10-pushes", + args=(0.3, False, True, FALLBACK, 10), +) +class BugBugPushSchedules(OptimizationStrategy): + """Query the 'bugbug' service to retrieve relevant tasks and manifests. + + Args: + confidence_threshold (float): The minimum confidence threshold (in + range [0, 1]) needed for a task to be scheduled. + tasks_only (bool): Whether or not to only use tasks and no groups + (default: False) + use_reduced_tasks (bool): Whether or not to use the reduced set of tasks + provided by the bugbug service (default: False). + fallback (str): The fallback strategy to use if there + was a failure in bugbug (default: None) + num_pushes (int): The number of pushes to consider for the selection + (default: 1). + select_configs (bool): Whether to select configurations for manifests + too (default: False). + """ + + def __init__( + self, + confidence_threshold, + tasks_only=False, + use_reduced_tasks=False, + fallback=None, + num_pushes=1, + select_configs=False, + ): + self.confidence_threshold = confidence_threshold + self.use_reduced_tasks = use_reduced_tasks + self.fallback = fallback + self.tasks_only = tasks_only + self.num_pushes = num_pushes + self.select_configs = select_configs + self.timedout = False + + def should_remove_task(self, task, params, importance): + project = params["project"] + + if project not in ("autoland", "try"): + return False + + current_push_id = int(params["pushlog_id"]) + + rev = params["head_rev"] + + if self.timedout: + return registry[self.fallback].should_remove_task(task, params, importance) + + data = {} + + start_push_id = current_push_id - self.num_pushes + 1 + if self.num_pushes != 1: + push_data = get_push_data( + params["head_repository"], project, start_push_id, current_push_id - 1 + ) + + for push_id in range(start_push_id, current_push_id + 1): + if push_id == current_push_id: + rev = params["head_rev"] + else: + rev = push_data[push_id]["changesets"][-1] + + try: + new_data = push_schedules(params["project"], rev) + merge_bugbug_replies(data, new_data) + except BugbugTimeoutException: + if not self.fallback: + raise + + self.timedout = True + return self.should_remove_task(task, params, importance) + + key = "reduced_tasks" if self.use_reduced_tasks else "tasks" + tasks = { + task + for task, confidence in data.get(key, {}).items() + if confidence >= self.confidence_threshold + } + + test_manifests = task.attributes.get("test_manifests") + if test_manifests is None or self.tasks_only: + if data.get("known_tasks") and task.label not in data["known_tasks"]: + return False + + if task.label not in tasks: + return True + + return False + + # If a task contains more than one group, use the max confidence. + groups = data.get("groups", {}) + confidences = [c for g, c in groups.items() if g in test_manifests] + if not confidences or max(confidences) < self.confidence_threshold: + return True + + # If the task configuration doesn't match the ones selected by bugbug for + # the manifests, optimize out. + if self.select_configs: + selected_groups = [ + g + for g, c in groups.items() + if g in test_manifests and c > self.confidence_threshold + ] + + config_groups = data.get("config_groups", defaultdict(list)) + + # Configurations returned by bugbug are in a format such as + # `test-windows10-64/opt-*-e10s`, while task labels are like + # test-windows10-64-qr/opt-mochitest-browser-chrome-e10s-6. + # In order to match the strings, we need to ignore the chunk number + # from the task label. + parts = task.label.split("-") + label_without_chunk_number = "-".join( + parts[:-1] if parts[-1].isdigit() else parts + ) + + if not any( + fnmatch(label_without_chunk_number, config) + for group in selected_groups + for config in config_groups[group] + ): + return True + + # Store group importance so future optimizers can access it. + for manifest in test_manifests: + if manifest not in groups: + continue + + confidence = groups[manifest] + if confidence >= CT_HIGH: + importance[manifest] = "high" + elif confidence >= CT_MEDIUM: + importance[manifest] = "medium" + elif confidence >= CT_LOW: + importance[manifest] = "low" + else: + importance[manifest] = "lowest" + + return False + + +@register_strategy("platform-debug") +class SkipUnlessDebug(OptimizationStrategy): + """Only run debug platforms.""" + + def should_remove_task(self, task, params, arg): + return ( + "build_type" in task.attributes and task.attributes["build_type"] != "debug" + ) + + +@register_strategy("platform-disperse") +@register_strategy("platform-disperse-no-unseen", args=(None, 0)) +@register_strategy( + "platform-disperse-only-one", + args=( + { + "high": 1, + "medium": 1, + "low": 1, + "lowest": 0, + }, + 0, + ), +) +class DisperseGroups(OptimizationStrategy): + """Disperse groups across test configs. + + Each task has an associated 'importance' dict passed in via the arg. This + is of the form `{<group>: <importance>}`. + + Where 'group' is a test group id (usually a path to a manifest), and 'importance' is + one of `{'lowest', 'low', 'medium', 'high'}`. + + Each importance value has an associated 'count' as defined in + `self.target_counts`. It guarantees that 'manifest' will run in at least + 'count' different configurations (assuming there are enough tasks + containing 'manifest'). + + On configurations that haven't been seen before, we'll increase the target + count by `self.unseen_modifier` to increase the likelihood of scheduling a + task on that configuration. + + Args: + target_counts (dict): Override DEFAULT_TARGET_COUNTS with custom counts. This + is a dict mapping the importance value ('lowest', 'low', etc) to the + minimum number of configurations manifests with this value should run + on. + + unseen_modifier (int): Override DEFAULT_UNSEEN_MODIFIER to a custom + value. This is the amount we'll increase 'target_count' by for unseen + configurations. + """ + + DEFAULT_TARGET_COUNTS = { + "high": 3, + "medium": 2, + "low": 1, + "lowest": 0, + } + DEFAULT_UNSEEN_MODIFIER = 1 + + def __init__(self, target_counts=None, unseen_modifier=DEFAULT_UNSEEN_MODIFIER): + self.target_counts = self.DEFAULT_TARGET_COUNTS.copy() + if target_counts: + self.target_counts.update(target_counts) + self.unseen_modifier = unseen_modifier + + self.count = defaultdict(int) + self.seen_configurations = set() + + def should_remove_task(self, task, params, importance): + test_manifests = task.attributes.get("test_manifests") + test_platform = task.attributes.get("test_platform") + + if not importance or not test_manifests or not test_platform: + return False + + # Build the test configuration key. + key = test_platform + if "unittest_variant" in task.attributes: + key += "-" + task.attributes["unittest_variant"] + + important_manifests = set(test_manifests) & set(importance) + for manifest in important_manifests: + target_count = self.target_counts[importance[manifest]] + + # If this configuration hasn't been seen before, increase the + # likelihood of scheduling the task. + if key not in self.seen_configurations: + target_count += self.unseen_modifier + + if self.count[manifest] < target_count: + # Update manifest counts and seen configurations. + self.seen_configurations.add(key) + for manifest in important_manifests: + self.count[manifest] += 1 + return False + + # Should remove task because all manifests have reached their + # importance count (or there were no important manifests). + return True diff --git a/taskcluster/gecko_taskgraph/optimize/schema.py b/taskcluster/gecko_taskgraph/optimize/schema.py new file mode 100644 index 0000000000..a7f878cf60 --- /dev/null +++ b/taskcluster/gecko_taskgraph/optimize/schema.py @@ -0,0 +1,60 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import logging + +import voluptuous +from mozbuild import schedules + +logger = logging.getLogger(__name__) + + +default_optimizations = ( + # always run this task (default) + None, + # always optimize this task + {"always": None}, + # optimize strategy aliases for build kind + {"build": list(schedules.ALL_COMPONENTS)}, + # search the index for the given index namespaces, and replace this task if found + # the search occurs in order, with the first match winning + {"index-search": [str]}, + # never optimize this task + {"never": None}, + # skip the task except for every Nth push + {"skip-unless-expanded": None}, + {"skip-unless-backstop": None}, + # skip this task if none of the given file patterns match + {"skip-unless-changed": [str]}, + # skip this task if unless the change files' SCHEDULES contains any of these components + {"skip-unless-schedules": list(schedules.ALL_COMPONENTS)}, + # optimize strategy aliases for the test kind + {"test": list(schedules.ALL_COMPONENTS)}, + {"test-inclusive": list(schedules.ALL_COMPONENTS)}, + # optimize strategy alias for test-verify tasks + {"test-verify": list(schedules.ALL_COMPONENTS)}, + # optimize strategy alias for upload-symbols tasks + {"upload-symbols": None}, + # optimize strategy alias for reprocess-symbols tasks + {"reprocess-symbols": None}, +) + +OptimizationSchema = voluptuous.Any(*default_optimizations) + + +def set_optimization_schema(schema_tuple): + """Sets OptimizationSchema so it can be imported by the task transform. + This function is called by projects that extend Firefox's taskgraph. + It should be called by the project's taskgraph:register function before + any transport or job runner code is imported. + + :param tuple schema_tuple: Tuple of possible optimization strategies + """ + global OptimizationSchema + if OptimizationSchema.validators == default_optimizations: + logger.info("OptimizationSchema updated.") + OptimizationSchema = voluptuous.Any(*schema_tuple) + else: + raise Exception("Can only call set_optimization_schema once.") diff --git a/taskcluster/gecko_taskgraph/optimize/strategies.py b/taskcluster/gecko_taskgraph/optimize/strategies.py new file mode 100644 index 0000000000..2e520c4750 --- /dev/null +++ b/taskcluster/gecko_taskgraph/optimize/strategies.py @@ -0,0 +1,136 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +import logging +from datetime import datetime + +import mozpack.path as mozpath +from mozbuild.base import MozbuildObject +from mozbuild.util import memoize +from taskgraph.optimize.base import OptimizationStrategy, register_strategy +from taskgraph.util.taskcluster import find_task_id + +from gecko_taskgraph import files_changed +from gecko_taskgraph.util.taskcluster import status_task + +logger = logging.getLogger(__name__) + + +@register_strategy("index-search") +class IndexSearch(OptimizationStrategy): + + # A task with no dependencies remaining after optimization will be replaced + # if artifacts exist for the corresponding index_paths. + # Otherwise, we're in one of the following cases: + # - the task has un-optimized dependencies + # - the artifacts have expired + # - some changes altered the index_paths and new artifacts need to be + # created. + # In every of those cases, we need to run the task to create or refresh + # artifacts. + + fmt = "%Y-%m-%dT%H:%M:%S.%fZ" + + def should_replace_task(self, task, params, deadline, index_paths): + "Look for a task with one of the given index paths" + for index_path in index_paths: + try: + task_id = find_task_id(index_path) + status = status_task(task_id) + # status can be `None` if we're in `testing` mode + # (e.g. test-action-callback) + if not status or status.get("state") in ("exception", "failed"): + continue + + if deadline and datetime.strptime( + status["expires"], self.fmt + ) < datetime.strptime(deadline, self.fmt): + continue + + return task_id + except KeyError: + # 404 will end up here and go on to the next index path + pass + + return False + + +@register_strategy("skip-unless-changed") +class SkipUnlessChanged(OptimizationStrategy): + def should_remove_task(self, task, params, file_patterns): + # pushlog_id == -1 - this is the case when run from a cron.yml job + if params.get("pushlog_id") == -1: + return False + + changed = files_changed.check(params, file_patterns) + if not changed: + logger.debug( + "no files found matching a pattern in `skip-unless-changed` for " + + task.label + ) + return True + return False + + +@register_strategy("skip-unless-schedules") +class SkipUnlessSchedules(OptimizationStrategy): + @memoize + def scheduled_by_push(self, repository, revision): + changed_files = files_changed.get_changed_files(repository, revision) + + mbo = MozbuildObject.from_environment() + # the decision task has a sparse checkout, so, mozbuild_reader will use + # a MercurialRevisionFinder with revision '.', which should be the same + # as `revision`; in other circumstances, it will use a default reader + rdr = mbo.mozbuild_reader(config_mode="empty") + + components = set() + for p, m in rdr.files_info(changed_files).items(): + components |= set(m["SCHEDULES"].components) + + return components + + def should_remove_task(self, task, params, conditions): + if params.get("pushlog_id") == -1: + return False + + scheduled = self.scheduled_by_push( + params["head_repository"], params["head_rev"] + ) + conditions = set(conditions) + # if *any* of the condition components are scheduled, do not optimize + if conditions & scheduled: + return False + + return True + + +@register_strategy("skip-unless-has-relevant-tests") +class SkipUnlessHasRelevantTests(OptimizationStrategy): + """Optimizes tasks that don't run any tests that were + in child directories of a modified file. + """ + + @memoize + def get_changed_dirs(self, repo, rev): + changed = map(mozpath.dirname, files_changed.get_changed_files(repo, rev)) + # Filter out empty directories (from files modified in the root). + # Otherwise all tasks would be scheduled. + return {d for d in changed if d} + + def should_remove_task(self, task, params, _): + if not task.attributes.get("test_manifests"): + return True + + for d in self.get_changed_dirs(params["head_repository"], params["head_rev"]): + for t in task.attributes["test_manifests"]: + if t.startswith(d): + logger.debug( + "{} runs a test path ({}) contained by a modified file ({})".format( + task.label, t, d + ) + ) + return False + return True |