summaryrefslogtreecommitdiffstats
path: root/taskcluster/gecko_taskgraph/optimize
diff options
context:
space:
mode:
Diffstat (limited to 'taskcluster/gecko_taskgraph/optimize')
-rw-r--r--taskcluster/gecko_taskgraph/optimize/__init__.py287
-rw-r--r--taskcluster/gecko_taskgraph/optimize/backstop.py47
-rw-r--r--taskcluster/gecko_taskgraph/optimize/bugbug.py323
-rw-r--r--taskcluster/gecko_taskgraph/optimize/schema.py60
-rw-r--r--taskcluster/gecko_taskgraph/optimize/strategies.py136
5 files changed, 853 insertions, 0 deletions
diff --git a/taskcluster/gecko_taskgraph/optimize/__init__.py b/taskcluster/gecko_taskgraph/optimize/__init__.py
new file mode 100644
index 0000000000..c2d3fdf839
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/__init__.py
@@ -0,0 +1,287 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+The objective of optimization is to remove as many tasks from the graph as
+possible, as efficiently as possible, thereby delivering useful results as
+quickly as possible. For example, ideally if only a test script is modified in
+a push, then the resulting graph contains only the corresponding test suite
+task.
+
+See ``taskcluster/docs/optimization.rst`` for more information.
+"""
+
+from taskgraph.optimize.base import Alias, All, Any, Not, register_strategy, registry
+from taskgraph.util.python_path import import_sibling_modules
+
+# Use the gecko_taskgraph version of 'skip-unless-changed' for now.
+registry.pop("skip-unless-changed", None)
+
+# Trigger registration in sibling modules.
+import_sibling_modules()
+
+
+def split_bugbug_arg(arg, substrategies):
+ """Split args for bugbug based strategies.
+
+ Many bugbug based optimizations require passing an empty dict by reference
+ to communicate to downstream strategies. This function passes the provided
+ arg to the first (non bugbug) strategies and a shared empty dict to the
+ bugbug strategy and all substrategies after it.
+ """
+ from gecko_taskgraph.optimize.bugbug import BugBugPushSchedules
+
+ index = [
+ i
+ for i, strategy in enumerate(substrategies)
+ if isinstance(strategy, BugBugPushSchedules)
+ ][0]
+
+ return [arg] * index + [{}] * (len(substrategies) - index)
+
+
+# Register composite strategies.
+register_strategy("build", args=("skip-unless-schedules",))(Alias)
+register_strategy("test", args=("skip-unless-schedules",))(Alias)
+register_strategy("test-inclusive", args=("skip-unless-schedules",))(Alias)
+register_strategy("test-verify", args=("skip-unless-schedules",))(Alias)
+register_strategy("upload-symbols", args=("never",))(Alias)
+register_strategy("reprocess-symbols", args=("never",))(Alias)
+
+
+# Strategy overrides used to tweak the default strategies. These are referenced
+# by the `optimize_strategies` parameter.
+
+
+class project:
+ """Strategies that should be applied per-project."""
+
+ autoland = {
+ "test": Any(
+ # This `Any` strategy implements bi-modal behaviour. It allows different
+ # strategies on expanded pushes vs regular pushes.
+ # This first `All` handles "expanded" pushes.
+ All(
+ # There are three substrategies in this `All`, the first two act as barriers
+ # that help determine when to apply the third:
+ # 1. On backstop pushes, `skip-unless-backstop` returns False. Therefore
+ # the overall composite strategy is False and we don't optimize.
+ # 2. On regular pushes, `Not('skip-unless-expanded')` returns False. Therefore
+ # the overall composite strategy is False and we don't optimize.
+ # 3. On expanded pushes, the third strategy will determine whether or
+ # not to optimize each individual task.
+ # The barrier strategies.
+ "skip-unless-backstop",
+ Not("skip-unless-expanded"),
+ # The actual test strategy applied to "expanded" pushes.
+ Any(
+ "skip-unless-schedules",
+ "bugbug-reduced-manifests-fallback-last-10-pushes",
+ "platform-disperse",
+ split_args=split_bugbug_arg,
+ ),
+ ),
+ # This second `All` handles regular (aka not expanded or backstop)
+ # pushes.
+ All(
+ # There are two substrategies in this `All`, the first acts as a barrier
+ # that determines when to apply the second:
+ # 1. On expanded pushes (which includes backstops), `skip-unless-expanded`
+ # returns False. Therefore the overall composite strategy is False and we
+ # don't optimize.
+ # 2. On regular pushes, the second strategy will determine whether or
+ # not to optimize each individual task.
+ # The barrier strategy.
+ "skip-unless-expanded",
+ # The actual test strategy applied to regular pushes.
+ Any(
+ "skip-unless-schedules",
+ "bugbug-reduced-manifests-fallback-low",
+ "platform-disperse",
+ split_args=split_bugbug_arg,
+ ),
+ ),
+ ),
+ "build": All(
+ "skip-unless-expanded",
+ Any(
+ "skip-unless-schedules",
+ "bugbug-reduced-fallback",
+ split_args=split_bugbug_arg,
+ ),
+ ),
+ }
+ """Strategy overrides that apply to autoland."""
+
+
+class experimental:
+ """Experimental strategies either under development or used as benchmarks.
+
+ These run as "shadow-schedulers" on each autoland push (tier 3) and/or can be used
+ with `./mach try auto`. E.g:
+
+ ./mach try auto --strategy relevant_tests
+ """
+
+ bugbug_tasks_medium = {
+ "test": Any(
+ "skip-unless-schedules", "bugbug-tasks-medium", split_args=split_bugbug_arg
+ ),
+ }
+ """Doesn't limit platforms, medium confidence threshold."""
+
+ bugbug_tasks_high = {
+ "test": Any(
+ "skip-unless-schedules", "bugbug-tasks-high", split_args=split_bugbug_arg
+ ),
+ }
+ """Doesn't limit platforms, high confidence threshold."""
+
+ bugbug_debug_disperse = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-low",
+ "platform-debug",
+ "platform-disperse",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Restricts tests to debug platforms."""
+
+ bugbug_disperse_low = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-low",
+ "platform-disperse",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Disperse tests across platforms, low confidence threshold."""
+
+ bugbug_disperse_medium = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-medium",
+ "platform-disperse",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Disperse tests across platforms, medium confidence threshold."""
+
+ bugbug_disperse_reduced_medium = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-reduced-manifests",
+ "platform-disperse",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Disperse tests across platforms, medium confidence threshold with reduced tasks."""
+
+ bugbug_reduced_manifests_config_selection_low = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-reduced-manifests-config-selection-low",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Choose configs selected by bugbug, low confidence threshold with reduced tasks."""
+
+ bugbug_reduced_manifests_config_selection_medium = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-reduced-manifests-config-selection",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Choose configs selected by bugbug, medium confidence threshold with reduced tasks."""
+
+ bugbug_disperse_medium_no_unseen = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-medium",
+ "platform-disperse-no-unseen",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Disperse tests across platforms (no modified for unseen configurations), medium confidence
+ threshold."""
+
+ bugbug_disperse_medium_only_one = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-medium",
+ "platform-disperse-only-one",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Disperse tests across platforms (one platform per group), medium confidence threshold."""
+
+ bugbug_disperse_high = {
+ "test": Any(
+ "skip-unless-schedules",
+ "bugbug-high",
+ "platform-disperse",
+ split_args=split_bugbug_arg,
+ ),
+ }
+ """Disperse tests across platforms, high confidence threshold."""
+
+ bugbug_reduced = {
+ "test": Any(
+ "skip-unless-schedules", "bugbug-reduced", split_args=split_bugbug_arg
+ ),
+ }
+ """Use the reduced set of tasks (and no groups) chosen by bugbug."""
+
+ bugbug_reduced_high = {
+ "test": Any(
+ "skip-unless-schedules", "bugbug-reduced-high", split_args=split_bugbug_arg
+ ),
+ }
+ """Use the reduced set of tasks (and no groups) chosen by bugbug, high
+ confidence threshold."""
+
+ relevant_tests = {
+ "test": Any("skip-unless-schedules", "skip-unless-has-relevant-tests"),
+ }
+ """Runs task containing tests in the same directories as modified files."""
+
+
+class ExperimentalOverride:
+ """Overrides dictionaries that are stored in a container with new values.
+
+ This can be used to modify all strategies in a collection the same way,
+ presumably with strategies affecting kinds of tasks tangential to the
+ current context.
+
+ Args:
+ base (object): A container class supporting attribute access.
+ overrides (dict): Values to update any accessed dictionaries with.
+ """
+
+ def __init__(self, base, overrides):
+ self.base = base
+ self.overrides = overrides
+
+ def __getattr__(self, name):
+ val = getattr(self.base, name).copy()
+ for name, strategy in self.overrides.items():
+ if isinstance(strategy, str) and strategy.startswith("base:"):
+ strategy = val[strategy[len("base:") :]]
+
+ val[name] = strategy
+ return val
+
+
+tryselect = ExperimentalOverride(
+ experimental,
+ {
+ "build": Any(
+ "skip-unless-schedules", "bugbug-reduced", split_args=split_bugbug_arg
+ ),
+ "test-verify": "base:test",
+ "upload-symbols": Alias("always"),
+ "reprocess-symbols": Alias("always"),
+ },
+)
diff --git a/taskcluster/gecko_taskgraph/optimize/backstop.py b/taskcluster/gecko_taskgraph/optimize/backstop.py
new file mode 100644
index 0000000000..7b0c86222b
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/backstop.py
@@ -0,0 +1,47 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from taskgraph.optimize.base import All, OptimizationStrategy, register_strategy
+
+from gecko_taskgraph.util.backstop import BACKSTOP_PUSH_INTERVAL
+
+
+@register_strategy("skip-unless-backstop")
+class SkipUnlessBackstop(OptimizationStrategy):
+ """Always removes tasks except on backstop pushes."""
+
+ def should_remove_task(self, task, params, _):
+ return not params["backstop"]
+
+
+class SkipUnlessPushInterval(OptimizationStrategy):
+ """Always removes tasks except every N pushes.
+
+ Args:
+ push_interval (int): Number of pushes
+ """
+
+ def __init__(self, push_interval, remove_on_projects=None):
+ self.push_interval = push_interval
+
+ @property
+ def description(self):
+ return f"skip-unless-push-interval-{self.push_interval}"
+
+ def should_remove_task(self, task, params, _):
+ # On every Nth push, want to run all tasks.
+ return int(params["pushlog_id"]) % self.push_interval != 0
+
+
+# Strategy to run tasks on "expanded" pushes, currently defined as pushes that
+# are half the backstop interval. The 'All' composite strategy means that the
+# "backstop" strategy will prevent "expanded" from applying on backstop pushes.
+register_strategy(
+ "skip-unless-expanded",
+ args=(
+ "skip-unless-backstop",
+ SkipUnlessPushInterval(BACKSTOP_PUSH_INTERVAL / 2),
+ ),
+)(All)
diff --git a/taskcluster/gecko_taskgraph/optimize/bugbug.py b/taskcluster/gecko_taskgraph/optimize/bugbug.py
new file mode 100644
index 0000000000..1ae717b53c
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/bugbug.py
@@ -0,0 +1,323 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from collections import defaultdict
+from fnmatch import fnmatch
+from urllib.parse import urlsplit
+
+from taskgraph.optimize.base import OptimizationStrategy, register_strategy, registry
+
+from gecko_taskgraph.util.bugbug import (
+ CT_HIGH,
+ CT_LOW,
+ CT_MEDIUM,
+ BugbugTimeoutException,
+ push_schedules,
+)
+from gecko_taskgraph.util.hg import get_push_data
+
+FALLBACK = "skip-unless-has-relevant-tests"
+
+
+def merge_bugbug_replies(data, new_data):
+ """Merge a bugbug reply (stored in the `new_data` argument) into another (stored
+ in the `data` argument).
+ """
+ for key, value in new_data.items():
+ if isinstance(value, dict):
+ if key not in data:
+ data[key] = {}
+
+ if len(value) == 0:
+ continue
+
+ dict_value = next(iter(value.values()))
+ if isinstance(dict_value, list):
+ for name, configs in value.items():
+ if name not in data[key]:
+ data[key][name] = set()
+
+ data[key][name].update(configs)
+ else:
+ for name, confidence in value.items():
+ if name not in data[key] or data[key][name] < confidence:
+ data[key][name] = confidence
+ elif isinstance(value, list):
+ if key not in data:
+ data[key] = set()
+
+ data[key].update(value)
+
+
+@register_strategy("bugbug-low", args=(CT_LOW,))
+@register_strategy("bugbug-medium", args=(CT_MEDIUM,))
+@register_strategy("bugbug-high", args=(CT_HIGH,))
+@register_strategy("bugbug-tasks-medium", args=(CT_MEDIUM, True))
+@register_strategy("bugbug-tasks-high", args=(CT_HIGH, True))
+@register_strategy("bugbug-reduced", args=(CT_MEDIUM, True, True))
+@register_strategy("bugbug-reduced-fallback", args=(CT_MEDIUM, True, True, FALLBACK))
+@register_strategy("bugbug-reduced-high", args=(CT_HIGH, True, True))
+@register_strategy("bugbug-reduced-manifests", args=(CT_MEDIUM, False, True))
+@register_strategy(
+ "bugbug-reduced-manifests-config-selection-low",
+ args=(CT_LOW, False, True, None, 1, True),
+)
+@register_strategy(
+ "bugbug-reduced-manifests-config-selection",
+ args=(CT_MEDIUM, False, True, None, 1, True),
+)
+@register_strategy(
+ "bugbug-reduced-manifests-fallback-low", args=(CT_LOW, False, True, FALLBACK)
+)
+@register_strategy(
+ "bugbug-reduced-manifests-fallback", args=(CT_MEDIUM, False, True, FALLBACK)
+)
+@register_strategy(
+ "bugbug-reduced-manifests-fallback-last-10-pushes",
+ args=(0.3, False, True, FALLBACK, 10),
+)
+class BugBugPushSchedules(OptimizationStrategy):
+ """Query the 'bugbug' service to retrieve relevant tasks and manifests.
+
+ Args:
+ confidence_threshold (float): The minimum confidence threshold (in
+ range [0, 1]) needed for a task to be scheduled.
+ tasks_only (bool): Whether or not to only use tasks and no groups
+ (default: False)
+ use_reduced_tasks (bool): Whether or not to use the reduced set of tasks
+ provided by the bugbug service (default: False).
+ fallback (str): The fallback strategy to use if there
+ was a failure in bugbug (default: None)
+ num_pushes (int): The number of pushes to consider for the selection
+ (default: 1).
+ select_configs (bool): Whether to select configurations for manifests
+ too (default: False).
+ """
+
+ def __init__(
+ self,
+ confidence_threshold,
+ tasks_only=False,
+ use_reduced_tasks=False,
+ fallback=None,
+ num_pushes=1,
+ select_configs=False,
+ ):
+ self.confidence_threshold = confidence_threshold
+ self.use_reduced_tasks = use_reduced_tasks
+ self.fallback = fallback
+ self.tasks_only = tasks_only
+ self.num_pushes = num_pushes
+ self.select_configs = select_configs
+ self.timedout = False
+
+ def should_remove_task(self, task, params, importance):
+ project = params["project"]
+
+ if project not in ("autoland", "try"):
+ return False
+
+ current_push_id = int(params["pushlog_id"])
+
+ branch = urlsplit(params["head_repository"]).path.strip("/")
+ rev = params["head_rev"]
+
+ if self.timedout:
+ return registry[self.fallback].should_remove_task(task, params, importance)
+
+ data = {}
+
+ start_push_id = current_push_id - self.num_pushes + 1
+ if self.num_pushes != 1:
+ push_data = get_push_data(
+ params["head_repository"], project, start_push_id, current_push_id - 1
+ )
+
+ for push_id in range(start_push_id, current_push_id + 1):
+ if push_id == current_push_id:
+ rev = params["head_rev"]
+ else:
+ rev = push_data[push_id]["changesets"][-1]
+
+ try:
+ new_data = push_schedules(branch, rev)
+ merge_bugbug_replies(data, new_data)
+ except BugbugTimeoutException:
+ if not self.fallback:
+ raise
+
+ self.timedout = True
+ return self.should_remove_task(task, params, importance)
+
+ key = "reduced_tasks" if self.use_reduced_tasks else "tasks"
+ tasks = {
+ task
+ for task, confidence in data.get(key, {}).items()
+ if confidence >= self.confidence_threshold
+ }
+
+ test_manifests = task.attributes.get("test_manifests")
+ if test_manifests is None or self.tasks_only:
+ if data.get("known_tasks") and task.label not in data["known_tasks"]:
+ return False
+
+ if task.label not in tasks:
+ return True
+
+ return False
+
+ # If a task contains more than one group, use the max confidence.
+ groups = data.get("groups", {})
+ confidences = [c for g, c in groups.items() if g in test_manifests]
+ if not confidences or max(confidences) < self.confidence_threshold:
+ return True
+
+ # If the task configuration doesn't match the ones selected by bugbug for
+ # the manifests, optimize out.
+ if self.select_configs:
+ selected_groups = [
+ g
+ for g, c in groups.items()
+ if g in test_manifests and c > self.confidence_threshold
+ ]
+
+ config_groups = data.get("config_groups", defaultdict(list))
+
+ # Configurations returned by bugbug are in a format such as
+ # `test-windows10-64/opt-*-e10s`, while task labels are like
+ # test-windows10-64-qr/opt-mochitest-browser-chrome-e10s-6.
+ # In order to match the strings, we need to ignore the chunk number
+ # from the task label.
+ parts = task.label.split("-")
+ label_without_chunk_number = "-".join(
+ parts[:-1] if parts[-1].isdigit() else parts
+ )
+
+ if not any(
+ fnmatch(label_without_chunk_number, config)
+ for group in selected_groups
+ for config in config_groups[group]
+ ):
+ return True
+
+ # Store group importance so future optimizers can access it.
+ for manifest in test_manifests:
+ if manifest not in groups:
+ continue
+
+ confidence = groups[manifest]
+ if confidence >= CT_HIGH:
+ importance[manifest] = "high"
+ elif confidence >= CT_MEDIUM:
+ importance[manifest] = "medium"
+ elif confidence >= CT_LOW:
+ importance[manifest] = "low"
+ else:
+ importance[manifest] = "lowest"
+
+ return False
+
+
+@register_strategy("platform-debug")
+class SkipUnlessDebug(OptimizationStrategy):
+ """Only run debug platforms."""
+
+ def should_remove_task(self, task, params, arg):
+ return (
+ "build_type" in task.attributes and task.attributes["build_type"] != "debug"
+ )
+
+
+@register_strategy("platform-disperse")
+@register_strategy("platform-disperse-no-unseen", args=(None, 0))
+@register_strategy(
+ "platform-disperse-only-one",
+ args=(
+ {
+ "high": 1,
+ "medium": 1,
+ "low": 1,
+ "lowest": 0,
+ },
+ 0,
+ ),
+)
+class DisperseGroups(OptimizationStrategy):
+ """Disperse groups across test configs.
+
+ Each task has an associated 'importance' dict passed in via the arg. This
+ is of the form `{<group>: <importance>}`.
+
+ Where 'group' is a test group id (usually a path to a manifest), and 'importance' is
+ one of `{'lowest', 'low', 'medium', 'high'}`.
+
+ Each importance value has an associated 'count' as defined in
+ `self.target_counts`. It guarantees that 'manifest' will run in at least
+ 'count' different configurations (assuming there are enough tasks
+ containing 'manifest').
+
+ On configurations that haven't been seen before, we'll increase the target
+ count by `self.unseen_modifier` to increase the likelihood of scheduling a
+ task on that configuration.
+
+ Args:
+ target_counts (dict): Override DEFAULT_TARGET_COUNTS with custom counts. This
+ is a dict mapping the importance value ('lowest', 'low', etc) to the
+ minimum number of configurations manifests with this value should run
+ on.
+
+ unseen_modifier (int): Override DEFAULT_UNSEEN_MODIFIER to a custom
+ value. This is the amount we'll increase 'target_count' by for unseen
+ configurations.
+ """
+
+ DEFAULT_TARGET_COUNTS = {
+ "high": 3,
+ "medium": 2,
+ "low": 1,
+ "lowest": 0,
+ }
+ DEFAULT_UNSEEN_MODIFIER = 1
+
+ def __init__(self, target_counts=None, unseen_modifier=DEFAULT_UNSEEN_MODIFIER):
+ self.target_counts = self.DEFAULT_TARGET_COUNTS.copy()
+ if target_counts:
+ self.target_counts.update(target_counts)
+ self.unseen_modifier = unseen_modifier
+
+ self.count = defaultdict(int)
+ self.seen_configurations = set()
+
+ def should_remove_task(self, task, params, importance):
+ test_manifests = task.attributes.get("test_manifests")
+ test_platform = task.attributes.get("test_platform")
+
+ if not importance or not test_manifests or not test_platform:
+ return False
+
+ # Build the test configuration key.
+ key = test_platform
+ if "unittest_variant" in task.attributes:
+ key += "-" + task.attributes["unittest_variant"]
+
+ important_manifests = set(test_manifests) & set(importance)
+ for manifest in important_manifests:
+ target_count = self.target_counts[importance[manifest]]
+
+ # If this configuration hasn't been seen before, increase the
+ # likelihood of scheduling the task.
+ if key not in self.seen_configurations:
+ target_count += self.unseen_modifier
+
+ if self.count[manifest] < target_count:
+ # Update manifest counts and seen configurations.
+ self.seen_configurations.add(key)
+ for manifest in important_manifests:
+ self.count[manifest] += 1
+ return False
+
+ # Should remove task because all manifests have reached their
+ # importance count (or there were no important manifests).
+ return True
diff --git a/taskcluster/gecko_taskgraph/optimize/schema.py b/taskcluster/gecko_taskgraph/optimize/schema.py
new file mode 100644
index 0000000000..a7f878cf60
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/schema.py
@@ -0,0 +1,60 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+import voluptuous
+from mozbuild import schedules
+
+logger = logging.getLogger(__name__)
+
+
+default_optimizations = (
+ # always run this task (default)
+ None,
+ # always optimize this task
+ {"always": None},
+ # optimize strategy aliases for build kind
+ {"build": list(schedules.ALL_COMPONENTS)},
+ # search the index for the given index namespaces, and replace this task if found
+ # the search occurs in order, with the first match winning
+ {"index-search": [str]},
+ # never optimize this task
+ {"never": None},
+ # skip the task except for every Nth push
+ {"skip-unless-expanded": None},
+ {"skip-unless-backstop": None},
+ # skip this task if none of the given file patterns match
+ {"skip-unless-changed": [str]},
+ # skip this task if unless the change files' SCHEDULES contains any of these components
+ {"skip-unless-schedules": list(schedules.ALL_COMPONENTS)},
+ # optimize strategy aliases for the test kind
+ {"test": list(schedules.ALL_COMPONENTS)},
+ {"test-inclusive": list(schedules.ALL_COMPONENTS)},
+ # optimize strategy alias for test-verify tasks
+ {"test-verify": list(schedules.ALL_COMPONENTS)},
+ # optimize strategy alias for upload-symbols tasks
+ {"upload-symbols": None},
+ # optimize strategy alias for reprocess-symbols tasks
+ {"reprocess-symbols": None},
+)
+
+OptimizationSchema = voluptuous.Any(*default_optimizations)
+
+
+def set_optimization_schema(schema_tuple):
+ """Sets OptimizationSchema so it can be imported by the task transform.
+ This function is called by projects that extend Firefox's taskgraph.
+ It should be called by the project's taskgraph:register function before
+ any transport or job runner code is imported.
+
+ :param tuple schema_tuple: Tuple of possible optimization strategies
+ """
+ global OptimizationSchema
+ if OptimizationSchema.validators == default_optimizations:
+ logger.info("OptimizationSchema updated.")
+ OptimizationSchema = voluptuous.Any(*schema_tuple)
+ else:
+ raise Exception("Can only call set_optimization_schema once.")
diff --git a/taskcluster/gecko_taskgraph/optimize/strategies.py b/taskcluster/gecko_taskgraph/optimize/strategies.py
new file mode 100644
index 0000000000..2e520c4750
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/strategies.py
@@ -0,0 +1,136 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+from datetime import datetime
+
+import mozpack.path as mozpath
+from mozbuild.base import MozbuildObject
+from mozbuild.util import memoize
+from taskgraph.optimize.base import OptimizationStrategy, register_strategy
+from taskgraph.util.taskcluster import find_task_id
+
+from gecko_taskgraph import files_changed
+from gecko_taskgraph.util.taskcluster import status_task
+
+logger = logging.getLogger(__name__)
+
+
+@register_strategy("index-search")
+class IndexSearch(OptimizationStrategy):
+
+ # A task with no dependencies remaining after optimization will be replaced
+ # if artifacts exist for the corresponding index_paths.
+ # Otherwise, we're in one of the following cases:
+ # - the task has un-optimized dependencies
+ # - the artifacts have expired
+ # - some changes altered the index_paths and new artifacts need to be
+ # created.
+ # In every of those cases, we need to run the task to create or refresh
+ # artifacts.
+
+ fmt = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+ def should_replace_task(self, task, params, deadline, index_paths):
+ "Look for a task with one of the given index paths"
+ for index_path in index_paths:
+ try:
+ task_id = find_task_id(index_path)
+ status = status_task(task_id)
+ # status can be `None` if we're in `testing` mode
+ # (e.g. test-action-callback)
+ if not status or status.get("state") in ("exception", "failed"):
+ continue
+
+ if deadline and datetime.strptime(
+ status["expires"], self.fmt
+ ) < datetime.strptime(deadline, self.fmt):
+ continue
+
+ return task_id
+ except KeyError:
+ # 404 will end up here and go on to the next index path
+ pass
+
+ return False
+
+
+@register_strategy("skip-unless-changed")
+class SkipUnlessChanged(OptimizationStrategy):
+ def should_remove_task(self, task, params, file_patterns):
+ # pushlog_id == -1 - this is the case when run from a cron.yml job
+ if params.get("pushlog_id") == -1:
+ return False
+
+ changed = files_changed.check(params, file_patterns)
+ if not changed:
+ logger.debug(
+ "no files found matching a pattern in `skip-unless-changed` for "
+ + task.label
+ )
+ return True
+ return False
+
+
+@register_strategy("skip-unless-schedules")
+class SkipUnlessSchedules(OptimizationStrategy):
+ @memoize
+ def scheduled_by_push(self, repository, revision):
+ changed_files = files_changed.get_changed_files(repository, revision)
+
+ mbo = MozbuildObject.from_environment()
+ # the decision task has a sparse checkout, so, mozbuild_reader will use
+ # a MercurialRevisionFinder with revision '.', which should be the same
+ # as `revision`; in other circumstances, it will use a default reader
+ rdr = mbo.mozbuild_reader(config_mode="empty")
+
+ components = set()
+ for p, m in rdr.files_info(changed_files).items():
+ components |= set(m["SCHEDULES"].components)
+
+ return components
+
+ def should_remove_task(self, task, params, conditions):
+ if params.get("pushlog_id") == -1:
+ return False
+
+ scheduled = self.scheduled_by_push(
+ params["head_repository"], params["head_rev"]
+ )
+ conditions = set(conditions)
+ # if *any* of the condition components are scheduled, do not optimize
+ if conditions & scheduled:
+ return False
+
+ return True
+
+
+@register_strategy("skip-unless-has-relevant-tests")
+class SkipUnlessHasRelevantTests(OptimizationStrategy):
+ """Optimizes tasks that don't run any tests that were
+ in child directories of a modified file.
+ """
+
+ @memoize
+ def get_changed_dirs(self, repo, rev):
+ changed = map(mozpath.dirname, files_changed.get_changed_files(repo, rev))
+ # Filter out empty directories (from files modified in the root).
+ # Otherwise all tasks would be scheduled.
+ return {d for d in changed if d}
+
+ def should_remove_task(self, task, params, _):
+ if not task.attributes.get("test_manifests"):
+ return True
+
+ for d in self.get_changed_dirs(params["head_repository"], params["head_rev"]):
+ for t in task.attributes["test_manifests"]:
+ if t.startswith(d):
+ logger.debug(
+ "{} runs a test path ({}) contained by a modified file ({})".format(
+ task.label, t, d
+ )
+ )
+ return False
+ return True