python/mozperftest/mozperftest/tools.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import shutil
import tempfile
from pathlib import Path


class PerformanceChangeDetected(Exception):
    """Raised when a performance change is detected.

    This failure happens with regressions, and improvements. There
    is no unique failure for each of them.

    TODO: We eventually need to be able to distinguish between these.
    To do so, we would need to incorporate the "lower_is_better" settings
    into the detection tooling.
    """

    pass


def run_side_by_side(artifacts, kwargs):
    from mozperftest_tools.side_by_side import SideBySide

    if "output" in kwargs:
        kwargs.pop("output")

    tempdir = tempfile.mkdtemp()
    s = SideBySide(str(tempdir))
    s.run(**kwargs)

    try:
        for file in os.listdir(tempdir):
            if file.startswith("cold-") or file.startswith("warm-"):
                print(f"Copying from {tempdir}/{file} to {artifacts}")
                shutil.copy(Path(tempdir, file), artifacts)
    finally:
        shutil.rmtree(tempdir)


def _gather_task_names(kwargs):
    task_names = kwargs.get("task_names", [])
    if len(task_names) == 0:
        if kwargs.get("test_name", None) is None:
            raise Exception("No test, or task names given!")
        if kwargs.get("platform", None) is None:
            raise Exception("No platform, or task names given!")
        task_names.append(kwargs["platform"] + "-" + kwargs["test_name"])
    return task_names


def _get_task_splitter(task):
    splitter = "/opt-"
    if splitter not in task:
        splitter = "/" + task.split("/")[-1].split("-")[0] + "-"
    return splitter


def _format_changes_to_str(all_results):
    changes_detected = None
    for task, results in all_results.items():
        for pltype, metrics in results["metrics-with-changes"].items():
            for metric, changes in metrics.items():
                for revision, diffs in changes.items():
                    if changes_detected is None:
                        changes_detected = "REVISION  PL_TYPE  METRIC %-DIFFERENCE\n"
                    changes_detected += f"{revision} {pltype} {metric} {str(diffs)}\n"
    return changes_detected


def run_change_detector(artifacts, kwargs):
    from mozperftest_tools.regression_detector import ChangeDetector

    tempdir = tempfile.mkdtemp()
    detector = ChangeDetector(tempdir)

    all_results = {}
    results_path = Path(artifacts, "results.json")
    try:
        for task in _gather_task_names(kwargs):
            splitter = _get_task_splitter(task)

            platform, test_name = task.split(splitter)
            platform += splitter[:-1]

            new_test_name = test_name
            new_platform_name = platform
            if kwargs["new_test_name"] is not None:
                new_test_name = kwargs["new_test_name"]
            if kwargs["new_platform"] is not None:
                new_platform_name = kwargs["new_platform_name"]

            all_changed_revisions, changed_metric_revisions = detector.detect_changes(
                test_name=test_name,
                new_test_name=new_test_name,
                platform=platform,
                new_platform=new_platform_name,
                base_revision=kwargs["base_revision"],
                new_revision=kwargs["new_revision"],
                base_branch=kwargs["base_branch"],
                new_branch=kwargs["new_branch"],
                # Depth of -1 means auto-computed (everything in between the two given revisions),
                # None is direct comparisons, anything else uses the new_revision as a start
                # and goes backwards from there.
                depth=kwargs.get("depth", None),
                skip_download=False,
                overwrite=False,
            )

            # The task names are unique, so we don't need to worry about
            # them overwriting each other
            all_results[task] = {}
            all_results[task]["revisions-with-changes"] = list(all_changed_revisions)
            all_results[task]["metrics-with-changes"] = changed_metric_revisions

        changes_detected = _format_changes_to_str(all_results)
        if changes_detected is not None:
            print(changes_detected)
            raise PerformanceChangeDetected(
                "[ERROR] A significant performance change was detected in your patch! "
                "See the logs above, or the results.json artifact that was produced for "
                "more information."
            )

    finally:
        shutil.rmtree(tempdir)

        print(f"Saving change detection results to {str(results_path)}")
        with results_path.open("w") as f:
            json.dump(all_results, f, indent=4)


TOOL_RUNNERS = {
    "side-by-side": run_side_by_side,
    "change-detector": run_change_detector,
}