5 files changed, 851 insertions, 0 deletions
diff --git a/taskcluster/gecko_taskgraph/optimize/__init__.py b/taskcluster/gecko_taskgraph/optimize/__init__.py
new file mode 100644
index 0000000000..c2d3fdf839
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/__init__.py
@@ -0,0 +1,287 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+The objective of optimization is to remove as many tasks from the graph as
+possible, as efficiently as possible, thereby delivering useful results as
+quickly as possible.  For example, ideally if only a test script is modified in
+a push, then the resulting graph contains only the corresponding test suite
+task.
+
+See ``taskcluster/docs/optimization.rst`` for more information.
+"""
+
+from taskgraph.optimize.base import Alias, All, Any, Not, register_strategy, registry
+from taskgraph.util.python_path import import_sibling_modules
+
+# Use the gecko_taskgraph version of 'skip-unless-changed' for now.
+registry.pop("skip-unless-changed", None)
+
+# Trigger registration in sibling modules.
+import_sibling_modules()
+
+
+def split_bugbug_arg(arg, substrategies):
+    """Split args for bugbug based strategies.
+
+    Many bugbug based optimizations require passing an empty dict by reference
+    to communicate to downstream strategies. This function passes the provided
+    arg to the first (non bugbug) strategies and a shared empty dict to the
+    bugbug strategy and all substrategies after it.
+    """
+    from gecko_taskgraph.optimize.bugbug import BugBugPushSchedules
+
+    index = [
+        i
+        for i, strategy in enumerate(substrategies)
+        if isinstance(strategy, BugBugPushSchedules)
+    ][0]
+
+    return [arg] * index + [{}] * (len(substrategies) - index)
+
+
+# Register composite strategies.
+register_strategy("build", args=("skip-unless-schedules",))(Alias)
+register_strategy("test", args=("skip-unless-schedules",))(Alias)
+register_strategy("test-inclusive", args=("skip-unless-schedules",))(Alias)
+register_strategy("test-verify", args=("skip-unless-schedules",))(Alias)
+register_strategy("upload-symbols", args=("never",))(Alias)
+register_strategy("reprocess-symbols", args=("never",))(Alias)
+
+
+# Strategy overrides used to tweak the default strategies. These are referenced
+# by the `optimize_strategies` parameter.
+
+
+class project:
+    """Strategies that should be applied per-project."""
+
+    autoland = {
+        "test": Any(
+            # This `Any` strategy implements bi-modal behaviour. It allows different
+            # strategies on expanded pushes vs regular pushes.
+            # This first `All` handles "expanded" pushes.
+            All(
+                # There are three substrategies in this `All`, the first two act as barriers
+                # that help determine when to apply the third:
+                # 1. On backstop pushes, `skip-unless-backstop` returns False. Therefore
+                #    the overall composite strategy is False and we don't optimize.
+                # 2. On regular pushes, `Not('skip-unless-expanded')` returns False. Therefore
+                #    the overall composite strategy is False and we don't optimize.
+                # 3. On expanded pushes, the third strategy will determine whether or
+                #    not to optimize each individual task.
+                # The barrier strategies.
+                "skip-unless-backstop",
+                Not("skip-unless-expanded"),
+                # The actual test strategy applied to "expanded" pushes.
+                Any(
+                    "skip-unless-schedules",
+                    "bugbug-reduced-manifests-fallback-last-10-pushes",
+                    "platform-disperse",
+                    split_args=split_bugbug_arg,
+                ),
+            ),
+            # This second `All` handles regular (aka not expanded or backstop)
+            # pushes.
+            All(
+                # There are two substrategies in this `All`, the first acts as a barrier
+                # that determines when to apply the second:
+                # 1. On expanded pushes (which includes backstops), `skip-unless-expanded`
+                #    returns False. Therefore the overall composite strategy is False and we
+                #    don't optimize.
+                # 2. On regular pushes, the second strategy will determine whether or
+                #    not to optimize each individual task.
+                # The barrier strategy.
+                "skip-unless-expanded",
+                # The actual test strategy applied to regular pushes.
+                Any(
+                    "skip-unless-schedules",
+                    "bugbug-reduced-manifests-fallback-low",
+                    "platform-disperse",
+                    split_args=split_bugbug_arg,
+                ),
+            ),
+        ),
+        "build": All(
+            "skip-unless-expanded",
+            Any(
+                "skip-unless-schedules",
+                "bugbug-reduced-fallback",
+                split_args=split_bugbug_arg,
+            ),
+        ),
+    }
+    """Strategy overrides that apply to autoland."""
+
+
+class experimental:
+    """Experimental strategies either under development or used as benchmarks.
+
+    These run as "shadow-schedulers" on each autoland push (tier 3) and/or can be used
+    with `./mach try auto`.  E.g:
+
+        ./mach try auto --strategy relevant_tests
+    """
+
+    bugbug_tasks_medium = {
+        "test": Any(
+            "skip-unless-schedules", "bugbug-tasks-medium", split_args=split_bugbug_arg
+        ),
+    }
+    """Doesn't limit platforms, medium confidence threshold."""
+
+    bugbug_tasks_high = {
+        "test": Any(
+            "skip-unless-schedules", "bugbug-tasks-high", split_args=split_bugbug_arg
+        ),
+    }
+    """Doesn't limit platforms, high confidence threshold."""
+
+    bugbug_debug_disperse = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-low",
+            "platform-debug",
+            "platform-disperse",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Restricts tests to debug platforms."""
+
+    bugbug_disperse_low = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-low",
+            "platform-disperse",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Disperse tests across platforms, low confidence threshold."""
+
+    bugbug_disperse_medium = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-medium",
+            "platform-disperse",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Disperse tests across platforms, medium confidence threshold."""
+
+    bugbug_disperse_reduced_medium = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-reduced-manifests",
+            "platform-disperse",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Disperse tests across platforms, medium confidence threshold with reduced tasks."""
+
+    bugbug_reduced_manifests_config_selection_low = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-reduced-manifests-config-selection-low",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Choose configs selected by bugbug, low confidence threshold with reduced tasks."""
+
+    bugbug_reduced_manifests_config_selection_medium = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-reduced-manifests-config-selection",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Choose configs selected by bugbug, medium confidence threshold with reduced tasks."""
+
+    bugbug_disperse_medium_no_unseen = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-medium",
+            "platform-disperse-no-unseen",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Disperse tests across platforms (no modified for unseen configurations), medium confidence
+    threshold."""
+
+    bugbug_disperse_medium_only_one = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-medium",
+            "platform-disperse-only-one",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Disperse tests across platforms (one platform per group), medium confidence threshold."""
+
+    bugbug_disperse_high = {
+        "test": Any(
+            "skip-unless-schedules",
+            "bugbug-high",
+            "platform-disperse",
+            split_args=split_bugbug_arg,
+        ),
+    }
+    """Disperse tests across platforms, high confidence threshold."""
+
+    bugbug_reduced = {
+        "test": Any(
+            "skip-unless-schedules", "bugbug-reduced", split_args=split_bugbug_arg
+        ),
+    }
+    """Use the reduced set of tasks (and no groups) chosen by bugbug."""
+
+    bugbug_reduced_high = {
+        "test": Any(
+            "skip-unless-schedules", "bugbug-reduced-high", split_args=split_bugbug_arg
+        ),
+    }
+    """Use the reduced set of tasks (and no groups) chosen by bugbug, high
+    confidence threshold."""
+
+    relevant_tests = {
+        "test": Any("skip-unless-schedules", "skip-unless-has-relevant-tests"),
+    }
+    """Runs task containing tests in the same directories as modified files."""
+
+
+class ExperimentalOverride:
+    """Overrides dictionaries that are stored in a container with new values.
+
+    This can be used to modify all strategies in a collection the same way,
+    presumably with strategies affecting kinds of tasks tangential to the
+    current context.
+
+    Args:
+        base (object): A container class supporting attribute access.
+        overrides (dict): Values to update any accessed dictionaries with.
+    """
+
+    def __init__(self, base, overrides):
+        self.base = base
+        self.overrides = overrides
+
+    def __getattr__(self, name):
+        val = getattr(self.base, name).copy()
+        for name, strategy in self.overrides.items():
+            if isinstance(strategy, str) and strategy.startswith("base:"):
+                strategy = val[strategy[len("base:") :]]
+
+            val[name] = strategy
+        return val
+
+
+tryselect = ExperimentalOverride(
+    experimental,
+    {
+        "build": Any(
+            "skip-unless-schedules", "bugbug-reduced", split_args=split_bugbug_arg
+        ),
+        "test-verify": "base:test",
+        "upload-symbols": Alias("always"),
+        "reprocess-symbols": Alias("always"),
+    },
+)
diff --git a/taskcluster/gecko_taskgraph/optimize/backstop.py b/taskcluster/gecko_taskgraph/optimize/backstop.py
new file mode 100644
index 0000000000..7b0c86222b
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/backstop.py
@@ -0,0 +1,47 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from taskgraph.optimize.base import All, OptimizationStrategy, register_strategy
+
+from gecko_taskgraph.util.backstop import BACKSTOP_PUSH_INTERVAL
+
+
+@register_strategy("skip-unless-backstop")
+class SkipUnlessBackstop(OptimizationStrategy):
+    """Always removes tasks except on backstop pushes."""
+
+    def should_remove_task(self, task, params, _):
+        return not params["backstop"]
+
+
+class SkipUnlessPushInterval(OptimizationStrategy):
+    """Always removes tasks except every N pushes.
+
+    Args:
+        push_interval (int): Number of pushes
+    """
+
+    def __init__(self, push_interval, remove_on_projects=None):
+        self.push_interval = push_interval
+
+    @property
+    def description(self):
+        return f"skip-unless-push-interval-{self.push_interval}"
+
+    def should_remove_task(self, task, params, _):
+        # On every Nth push, want to run all tasks.
+        return int(params["pushlog_id"]) % self.push_interval != 0
+
+
+# Strategy to run tasks on "expanded" pushes, currently defined as pushes that
+# are half the backstop interval. The 'All' composite strategy means that the
+# "backstop" strategy will prevent "expanded" from applying on backstop pushes.
+register_strategy(
+    "skip-unless-expanded",
+    args=(
+        "skip-unless-backstop",
+        SkipUnlessPushInterval(BACKSTOP_PUSH_INTERVAL / 2),
+    ),
+)(All)
diff --git a/taskcluster/gecko_taskgraph/optimize/bugbug.py b/taskcluster/gecko_taskgraph/optimize/bugbug.py
new file mode 100644
index 0000000000..d8603560ef
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/bugbug.py
@@ -0,0 +1,321 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+from collections import defaultdict
+from fnmatch import fnmatch
+
+from taskgraph.optimize.base import OptimizationStrategy, register_strategy, registry
+
+from gecko_taskgraph.util.bugbug import (
+    CT_HIGH,
+    CT_LOW,
+    CT_MEDIUM,
+    BugbugTimeoutException,
+    push_schedules,
+)
+from gecko_taskgraph.util.hg import get_push_data
+
+FALLBACK = "skip-unless-has-relevant-tests"
+
+
+def merge_bugbug_replies(data, new_data):
+    """Merge a bugbug reply (stored in the `new_data` argument) into another (stored
+    in the `data` argument).
+    """
+    for key, value in new_data.items():
+        if isinstance(value, dict):
+            if key not in data:
+                data[key] = {}
+
+            if len(value) == 0:
+                continue
+
+            dict_value = next(iter(value.values()))
+            if isinstance(dict_value, list):
+                for name, configs in value.items():
+                    if name not in data[key]:
+                        data[key][name] = set()
+
+                    data[key][name].update(configs)
+            else:
+                for name, confidence in value.items():
+                    if name not in data[key] or data[key][name] < confidence:
+                        data[key][name] = confidence
+        elif isinstance(value, list):
+            if key not in data:
+                data[key] = set()
+
+            data[key].update(value)
+
+
+@register_strategy("bugbug-low", args=(CT_LOW,))
+@register_strategy("bugbug-medium", args=(CT_MEDIUM,))
+@register_strategy("bugbug-high", args=(CT_HIGH,))
+@register_strategy("bugbug-tasks-medium", args=(CT_MEDIUM, True))
+@register_strategy("bugbug-tasks-high", args=(CT_HIGH, True))
+@register_strategy("bugbug-reduced", args=(CT_MEDIUM, True, True))
+@register_strategy("bugbug-reduced-fallback", args=(CT_MEDIUM, True, True, FALLBACK))
+@register_strategy("bugbug-reduced-high", args=(CT_HIGH, True, True))
+@register_strategy("bugbug-reduced-manifests", args=(CT_MEDIUM, False, True))
+@register_strategy(
+    "bugbug-reduced-manifests-config-selection-low",
+    args=(CT_LOW, False, True, None, 1, True),
+)
+@register_strategy(
+    "bugbug-reduced-manifests-config-selection",
+    args=(CT_MEDIUM, False, True, None, 1, True),
+)
+@register_strategy(
+    "bugbug-reduced-manifests-fallback-low", args=(CT_LOW, False, True, FALLBACK)
+)
+@register_strategy(
+    "bugbug-reduced-manifests-fallback", args=(CT_MEDIUM, False, True, FALLBACK)
+)
+@register_strategy(
+    "bugbug-reduced-manifests-fallback-last-10-pushes",
+    args=(0.3, False, True, FALLBACK, 10),
+)
+class BugBugPushSchedules(OptimizationStrategy):
+    """Query the 'bugbug' service to retrieve relevant tasks and manifests.
+
+    Args:
+        confidence_threshold (float): The minimum confidence threshold (in
+            range [0, 1]) needed for a task to be scheduled.
+        tasks_only (bool): Whether or not to only use tasks and no groups
+            (default: False)
+        use_reduced_tasks (bool): Whether or not to use the reduced set of tasks
+            provided by the bugbug service (default: False).
+        fallback (str): The fallback strategy to use if there
+            was a failure in bugbug (default: None)
+        num_pushes (int): The number of pushes to consider for the selection
+            (default: 1).
+        select_configs (bool): Whether to select configurations for manifests
+            too (default: False).
+    """
+
+    def __init__(
+        self,
+        confidence_threshold,
+        tasks_only=False,
+        use_reduced_tasks=False,
+        fallback=None,
+        num_pushes=1,
+        select_configs=False,
+    ):
+        self.confidence_threshold = confidence_threshold
+        self.use_reduced_tasks = use_reduced_tasks
+        self.fallback = fallback
+        self.tasks_only = tasks_only
+        self.num_pushes = num_pushes
+        self.select_configs = select_configs
+        self.timedout = False
+
+    def should_remove_task(self, task, params, importance):
+        project = params["project"]
+
+        if project not in ("autoland", "try"):
+            return False
+
+        current_push_id = int(params["pushlog_id"])
+
+        rev = params["head_rev"]
+
+        if self.timedout:
+            return registry[self.fallback].should_remove_task(task, params, importance)
+
+        data = {}
+
+        start_push_id = current_push_id - self.num_pushes + 1
+        if self.num_pushes != 1:
+            push_data = get_push_data(
+                params["head_repository"], project, start_push_id, current_push_id - 1
+            )
+
+        for push_id in range(start_push_id, current_push_id + 1):
+            if push_id == current_push_id:
+                rev = params["head_rev"]
+            else:
+                rev = push_data[push_id]["changesets"][-1]
+
+            try:
+                new_data = push_schedules(params["project"], rev)
+                merge_bugbug_replies(data, new_data)
+            except BugbugTimeoutException:
+                if not self.fallback:
+                    raise
+
+                self.timedout = True
+                return self.should_remove_task(task, params, importance)
+
+        key = "reduced_tasks" if self.use_reduced_tasks else "tasks"
+        tasks = {
+            task
+            for task, confidence in data.get(key, {}).items()
+            if confidence >= self.confidence_threshold
+        }
+
+        test_manifests = task.attributes.get("test_manifests")
+        if test_manifests is None or self.tasks_only:
+            if data.get("known_tasks") and task.label not in data["known_tasks"]:
+                return False
+
+            if task.label not in tasks:
+                return True
+
+            return False
+
+        # If a task contains more than one group, use the max confidence.
+        groups = data.get("groups", {})
+        confidences = [c for g, c in groups.items() if g in test_manifests]
+        if not confidences or max(confidences) < self.confidence_threshold:
+            return True
+
+        # If the task configuration doesn't match the ones selected by bugbug for
+        # the manifests, optimize out.
+        if self.select_configs:
+            selected_groups = [
+                g
+                for g, c in groups.items()
+                if g in test_manifests and c > self.confidence_threshold
+            ]
+
+            config_groups = data.get("config_groups", defaultdict(list))
+
+            # Configurations returned by bugbug are in a format such as
+            # `test-windows10-64/opt-*-e10s`, while task labels are like
+            # test-windows10-64-qr/opt-mochitest-browser-chrome-e10s-6.
+            # In order to match the strings, we need to ignore the chunk number
+            # from the task label.
+            parts = task.label.split("-")
+            label_without_chunk_number = "-".join(
+                parts[:-1] if parts[-1].isdigit() else parts
+            )
+
+            if not any(
+                fnmatch(label_without_chunk_number, config)
+                for group in selected_groups
+                for config in config_groups[group]
+            ):
+                return True
+
+        # Store group importance so future optimizers can access it.
+        for manifest in test_manifests:
+            if manifest not in groups:
+                continue
+
+            confidence = groups[manifest]
+            if confidence >= CT_HIGH:
+                importance[manifest] = "high"
+            elif confidence >= CT_MEDIUM:
+                importance[manifest] = "medium"
+            elif confidence >= CT_LOW:
+                importance[manifest] = "low"
+            else:
+                importance[manifest] = "lowest"
+
+        return False
+
+
+@register_strategy("platform-debug")
+class SkipUnlessDebug(OptimizationStrategy):
+    """Only run debug platforms."""
+
+    def should_remove_task(self, task, params, arg):
+        return (
+            "build_type" in task.attributes and task.attributes["build_type"] != "debug"
+        )
+
+
+@register_strategy("platform-disperse")
+@register_strategy("platform-disperse-no-unseen", args=(None, 0))
+@register_strategy(
+    "platform-disperse-only-one",
+    args=(
+        {
+            "high": 1,
+            "medium": 1,
+            "low": 1,
+            "lowest": 0,
+        },
+        0,
+    ),
+)
+class DisperseGroups(OptimizationStrategy):
+    """Disperse groups across test configs.
+
+    Each task has an associated 'importance' dict passed in via the arg. This
+    is of the form `{<group>: <importance>}`.
+
+    Where 'group' is a test group id (usually a path to a manifest), and 'importance' is
+    one of `{'lowest', 'low', 'medium', 'high'}`.
+
+    Each importance value has an associated 'count' as defined in
+    `self.target_counts`. It guarantees that 'manifest' will run in at least
+    'count' different configurations (assuming there are enough tasks
+    containing 'manifest').
+
+    On configurations that haven't been seen before, we'll increase the target
+    count by `self.unseen_modifier` to increase the likelihood of scheduling a
+    task on that configuration.
+
+    Args:
+        target_counts (dict): Override DEFAULT_TARGET_COUNTS with custom counts. This
+            is a dict mapping the importance value ('lowest', 'low', etc) to the
+            minimum number of configurations manifests with this value should run
+            on.
+
+        unseen_modifier (int): Override DEFAULT_UNSEEN_MODIFIER to a custom
+            value. This is the amount we'll increase 'target_count' by for unseen
+            configurations.
+    """
+
+    DEFAULT_TARGET_COUNTS = {
+        "high": 3,
+        "medium": 2,
+        "low": 1,
+        "lowest": 0,
+    }
+    DEFAULT_UNSEEN_MODIFIER = 1
+
+    def __init__(self, target_counts=None, unseen_modifier=DEFAULT_UNSEEN_MODIFIER):
+        self.target_counts = self.DEFAULT_TARGET_COUNTS.copy()
+        if target_counts:
+            self.target_counts.update(target_counts)
+        self.unseen_modifier = unseen_modifier
+
+        self.count = defaultdict(int)
+        self.seen_configurations = set()
+
+    def should_remove_task(self, task, params, importance):
+        test_manifests = task.attributes.get("test_manifests")
+        test_platform = task.attributes.get("test_platform")
+
+        if not importance or not test_manifests or not test_platform:
+            return False
+
+        # Build the test configuration key.
+        key = test_platform
+        if "unittest_variant" in task.attributes:
+            key += "-" + task.attributes["unittest_variant"]
+
+        important_manifests = set(test_manifests) & set(importance)
+        for manifest in important_manifests:
+            target_count = self.target_counts[importance[manifest]]
+
+            # If this configuration hasn't been seen before, increase the
+            # likelihood of scheduling the task.
+            if key not in self.seen_configurations:
+                target_count += self.unseen_modifier
+
+            if self.count[manifest] < target_count:
+                # Update manifest counts and seen configurations.
+                self.seen_configurations.add(key)
+                for manifest in important_manifests:
+                    self.count[manifest] += 1
+                return False
+
+        # Should remove task because all manifests have reached their
+        # importance count (or there were no important manifests).
+        return True
diff --git a/taskcluster/gecko_taskgraph/optimize/schema.py b/taskcluster/gecko_taskgraph/optimize/schema.py
new file mode 100644
index 0000000000..a7f878cf60
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/schema.py
@@ -0,0 +1,60 @@
+#  This Source Code Form is subject to the terms of the Mozilla Public
+#  License, v. 2.0. If a copy of the MPL was not distributed with this
+#  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+
+import voluptuous
+from mozbuild import schedules
+
+logger = logging.getLogger(__name__)
+
+
+default_optimizations = (
+    # always run this task (default)
+    None,
+    # always optimize this task
+    {"always": None},
+    # optimize strategy aliases for build kind
+    {"build": list(schedules.ALL_COMPONENTS)},
+    # search the index for the given index namespaces, and replace this task if found
+    # the search occurs in order, with the first match winning
+    {"index-search": [str]},
+    # never optimize this task
+    {"never": None},
+    # skip the task except for every Nth push
+    {"skip-unless-expanded": None},
+    {"skip-unless-backstop": None},
+    # skip this task if none of the given file patterns match
+    {"skip-unless-changed": [str]},
+    # skip this task if unless the change files' SCHEDULES contains any of these components
+    {"skip-unless-schedules": list(schedules.ALL_COMPONENTS)},
+    # optimize strategy aliases for the test kind
+    {"test": list(schedules.ALL_COMPONENTS)},
+    {"test-inclusive": list(schedules.ALL_COMPONENTS)},
+    # optimize strategy alias for test-verify tasks
+    {"test-verify": list(schedules.ALL_COMPONENTS)},
+    # optimize strategy alias for upload-symbols tasks
+    {"upload-symbols": None},
+    # optimize strategy alias for reprocess-symbols tasks
+    {"reprocess-symbols": None},
+)
+
+OptimizationSchema = voluptuous.Any(*default_optimizations)
+
+
+def set_optimization_schema(schema_tuple):
+    """Sets OptimizationSchema so it can be imported by the task transform.
+    This function is called by projects that extend Firefox's taskgraph.
+    It should be called by the project's taskgraph:register function before
+    any transport or job runner code is imported.
+
+    :param tuple schema_tuple: Tuple of possible optimization strategies
+    """
+    global OptimizationSchema
+    if OptimizationSchema.validators == default_optimizations:
+        logger.info("OptimizationSchema updated.")
+        OptimizationSchema = voluptuous.Any(*schema_tuple)
+    else:
+        raise Exception("Can only call set_optimization_schema once.")
diff --git a/taskcluster/gecko_taskgraph/optimize/strategies.py b/taskcluster/gecko_taskgraph/optimize/strategies.py
new file mode 100644
index 0000000000..2e520c4750
--- /dev/null
+++ b/taskcluster/gecko_taskgraph/optimize/strategies.py
@@ -0,0 +1,136 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+import logging
+from datetime import datetime
+
+import mozpack.path as mozpath
+from mozbuild.base import MozbuildObject
+from mozbuild.util import memoize
+from taskgraph.optimize.base import OptimizationStrategy, register_strategy
+from taskgraph.util.taskcluster import find_task_id
+
+from gecko_taskgraph import files_changed
+from gecko_taskgraph.util.taskcluster import status_task
+
+logger = logging.getLogger(__name__)
+
+
+@register_strategy("index-search")
+class IndexSearch(OptimizationStrategy):
+
+    # A task with no dependencies remaining after optimization will be replaced
+    # if artifacts exist for the corresponding index_paths.
+    # Otherwise, we're in one of the following cases:
+    # - the task has un-optimized dependencies
+    # - the artifacts have expired
+    # - some changes altered the index_paths and new artifacts need to be
+    # created.
+    # In every of those cases, we need to run the task to create or refresh
+    # artifacts.
+
+    fmt = "%Y-%m-%dT%H:%M:%S.%fZ"
+
+    def should_replace_task(self, task, params, deadline, index_paths):
+        "Look for a task with one of the given index paths"
+        for index_path in index_paths:
+            try:
+                task_id = find_task_id(index_path)
+                status = status_task(task_id)
+                # status can be `None` if we're in `testing` mode
+                # (e.g. test-action-callback)
+                if not status or status.get("state") in ("exception", "failed"):
+                    continue
+
+                if deadline and datetime.strptime(
+                    status["expires"], self.fmt
+                ) < datetime.strptime(deadline, self.fmt):
+                    continue
+
+                return task_id
+            except KeyError:
+                # 404 will end up here and go on to the next index path
+                pass
+
+        return False
+
+
+@register_strategy("skip-unless-changed")
+class SkipUnlessChanged(OptimizationStrategy):
+    def should_remove_task(self, task, params, file_patterns):
+        # pushlog_id == -1 - this is the case when run from a cron.yml job
+        if params.get("pushlog_id") == -1:
+            return False
+
+        changed = files_changed.check(params, file_patterns)
+        if not changed:
+            logger.debug(
+                "no files found matching a pattern in `skip-unless-changed` for "
+                + task.label
+            )
+            return True
+        return False
+
+
+@register_strategy("skip-unless-schedules")
+class SkipUnlessSchedules(OptimizationStrategy):
+    @memoize
+    def scheduled_by_push(self, repository, revision):
+        changed_files = files_changed.get_changed_files(repository, revision)
+
+        mbo = MozbuildObject.from_environment()
+        # the decision task has a sparse checkout, so, mozbuild_reader will use
+        # a MercurialRevisionFinder with revision '.', which should be the same
+        # as `revision`; in other circumstances, it will use a default reader
+        rdr = mbo.mozbuild_reader(config_mode="empty")
+
+        components = set()
+        for p, m in rdr.files_info(changed_files).items():
+            components |= set(m["SCHEDULES"].components)
+
+        return components
+
+    def should_remove_task(self, task, params, conditions):
+        if params.get("pushlog_id") == -1:
+            return False
+
+        scheduled = self.scheduled_by_push(
+            params["head_repository"], params["head_rev"]
+        )
+        conditions = set(conditions)
+        # if *any* of the condition components are scheduled, do not optimize
+        if conditions & scheduled:
+            return False
+
+        return True
+
+
+@register_strategy("skip-unless-has-relevant-tests")
+class SkipUnlessHasRelevantTests(OptimizationStrategy):
+    """Optimizes tasks that don't run any tests that were
+    in child directories of a modified file.
+    """
+
+    @memoize
+    def get_changed_dirs(self, repo, rev):
+        changed = map(mozpath.dirname, files_changed.get_changed_files(repo, rev))
+        # Filter out empty directories (from files modified in the root).
+        # Otherwise all tasks would be scheduled.
+        return {d for d in changed if d}
+
+    def should_remove_task(self, task, params, _):
+        if not task.attributes.get("test_manifests"):
+            return True
+
+        for d in self.get_changed_dirs(params["head_repository"], params["head_rev"]):
+            for t in task.attributes["test_manifests"]:
+                if t.startswith(d):
+                    logger.debug(
+                        "{} runs a test path ({}) contained by a modified file ({})".format(
+                            task.label, t, d
+                        )
+                    )
+                    return False
+        return True