python/mozperftest/mozperftest/metrics/perfherder.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import pathlib
import statistics
import sys

import jsonschema

from mozperftest.layers import Layer
from mozperftest.metrics.common import COMMON_ARGS, filtered_metrics
from mozperftest.metrics.exceptions import PerfherderValidDataError
from mozperftest.metrics.notebook.constant import Constant
from mozperftest.metrics.notebook.transformer import get_transformer
from mozperftest.metrics.utils import has_callable_method, is_number, write_json
from mozperftest.utils import strtobool

PERFHERDER_SCHEMA = pathlib.Path(
    "testing", "mozharness", "external_tools", "performance-artifact-schema.json"
)


class Perfherder(Layer):
    """Output data in the perfherder format."""

    name = "perfherder"
    activated = False

    arguments = COMMON_ARGS
    arguments.update(
        {
            "app": {
                "type": str,
                "default": "firefox",
                "choices": [
                    "firefox",
                    "chrome-m",
                    "chrome",
                    "chromium",
                    "fennec",
                    "geckoview",
                    "fenix",
                    "refbrow",
                ],
                "help": (
                    "Shorthand name of application that is "
                    "being tested (used in perfherder data)."
                ),
            },
            "stats": {
                "action": "store_true",
                "default": False,
                "help": "If set, browsertime statistics will be reported.",
            },
            "timestamp": {
                "type": float,
                "default": None,
                "help": (
                    "Timestamp to use for the perfherder data. Can be the "
                    "current date or a past date if needed."
                ),
            },
        }
    )

    def run(self, metadata):
        """Processes the given results into a perfherder-formatted data blob.

        If the `--perfherder` flag isn't provided, then the
        results won't be processed into a perfherder-data blob. If the
        flavor is unknown to us, then we assume that it comes from
        browsertime.

        XXX If needed, make a way to do flavor-specific processing

        :param results list/dict/str: Results to process.
        :param perfherder bool: True if results should be processed
            into a perfherder-data blob.
        :param flavor str: The flavor that is being processed.
        """
        prefix = self.get_arg("prefix")
        output = self.get_arg("output")

        # XXX Make an arugment for exclusions from metrics
        # (or go directly to regex's for metrics)
        exclusions = None
        if not self.get_arg("stats"):
            exclusions = ["statistics."]

        # Get filtered metrics
        metrics = self.get_arg("metrics")
        results, fullsettings = filtered_metrics(
            metadata,
            output,
            prefix,
            metrics=metrics,
            transformer=self.get_arg("transformer"),
            settings=True,
            exclude=exclusions,
            split_by=self.get_arg("split-by"),
            simplify_names=self.get_arg("simplify-names"),
            simplify_exclude=self.get_arg("simplify-exclude"),
        )

        if not any([results[name] for name in results]):
            self.warning("No results left after filtering")
            return metadata

        # XXX Add version info into this data
        app_info = {"name": self.get_arg("app", default="firefox")}

        # converting the metrics list into a mapping where
        # keys are the metrics nane
        if metrics is not None:
            metrics = dict([(m["name"], m) for m in metrics])
        else:
            metrics = {}

        all_perfherder_data = None
        for name, res in results.items():
            settings = dict(fullsettings[name])
            # updating the settings with values provided in metrics, if any
            if name in metrics:
                settings.update(metrics[name])

            # XXX Instead of just passing replicates here, we should build
            # up a partial perfherder data blob (with options) and subtest
            # overall values.
            subtests = {}
            for r in res:
                vals = [v["value"] for v in r["data"] if is_number(v["value"])]
                if vals:
                    subtests[r["subtest"]] = vals

            perfherder_data = self._build_blob(
                subtests,
                name=name,
                extra_options=settings.get("extraOptions"),
                should_alert=strtobool(settings.get("shouldAlert", False)),
                application=app_info,
                alert_threshold=float(settings.get("alertThreshold", 2.0)),
                lower_is_better=strtobool(settings.get("lowerIsBetter", True)),
                unit=settings.get("unit", "ms"),
                summary=settings.get("value"),
                framework=settings.get("framework"),
                metrics_info=metrics,
                transformer=res[0].get("transformer", None),
            )

            if all_perfherder_data is None:
                all_perfherder_data = perfherder_data
            else:
                all_perfherder_data["suites"].extend(perfherder_data["suites"])

        if prefix:
            # If a prefix was given, store it in the perfherder data as well
            all_perfherder_data["prefix"] = prefix

        timestamp = self.get_arg("timestamp")
        if timestamp is not None:
            all_perfherder_data["pushTimestamp"] = timestamp

        # Validate the final perfherder data blob
        with pathlib.Path(metadata._mach_cmd.topsrcdir, PERFHERDER_SCHEMA).open() as f:
            schema = json.load(f)
        jsonschema.validate(all_perfherder_data, schema)

        file = "perfherder-data.json"
        if prefix:
            file = "{}-{}".format(prefix, file)
        self.info("Writing perfherder results to {}".format(os.path.join(output, file)))

        # XXX "suites" key error occurs when using self.info so a print
        # is being done for now.

        # print() will produce a BlockingIOError on large outputs, so we use
        # sys.stdout
        sys.stdout.write("PERFHERDER_DATA: ")
        json.dump(all_perfherder_data, sys.stdout)
        sys.stdout.write("\n")
        sys.stdout.flush()

        metadata.set_output(write_json(all_perfherder_data, output, file))
        return metadata

    def _build_blob(
        self,
        subtests,
        name="browsertime",
        test_type="pageload",
        extra_options=None,
        should_alert=False,
        subtest_should_alert=None,
        suiteshould_alert=False,
        framework=None,
        application=None,
        alert_threshold=2.0,
        lower_is_better=True,
        unit="ms",
        summary=None,
        metrics_info=None,
        transformer=None,
    ):
        """Build a PerfHerder data blob from the given subtests.

        NOTE: This is a WIP, see the many TODOs across this file.

        Given a dictionary of subtests, and the values. Build up a
        perfherder data blob. Note that the naming convention for
        these arguments is different then the rest of the scripts
        to make it easier to see where they are going to in the perfherder
        data.

        For the `should_alert` field, if should_alert is True but `subtest_should_alert`
        is empty, then all subtests along with the suite will generate alerts.
        Otherwise, if the subtest_should_alert contains subtests to alert on, then
        only those will alert and nothing else (including the suite). If the
        suite value should alert, then set `suiteshould_alert` to True.

        :param subtests dict: A dictionary of subtests and the values.
            XXX TODO items for subtests:
                (1) Allow it to contain replicates and individual settings
                    for each of the subtests.
                (2) The geomean of the replicates will be taken for now,
                    but it should be made more flexible in some way.
                (3) We need some way to handle making multiple suites.
        :param name str: Name to give to the suite.
        :param test_type str: The type of test that was run.
        :param extra_options list: A list of extra options to store.
        :param should_alert bool: Whether all values in the suite should
            generate alerts or not.
        :param subtest_should_alert list: A list of subtests to alert on. If this
            is not empty, then it will disable the suite-level alerts.
        :param suiteshould_alert bool: Used if `subtest_should_alert` is not
            empty, and if True, then the suite-level value will generate
            alerts.
        :param framework dict: Information about the framework that
            is being tested.
        :param application dict: Information about the application that
            is being tested. Must include name, and optionally a version.
        :param alert_threshold float: The change in percentage this
            metric must undergo to to generate an alert.
        :param lower_is_better bool: If True, then lower values are better
            than higher ones.
        :param unit str: The unit of the data.
        :param summary float: The summary value to use in the perfherder
            data blob. By default, the mean of all the subtests will be
            used.
        :param metrics_info dict: Contains a mapping of metric names to the
            options that are used on the metric.
        :param transformer str: The name of a predefined tranformer, a module
            path to a transform, or a path to the file containing the transformer.

        :return dict: The PerfHerder data blob.
        """
        if extra_options is None:
            extra_options = []
        if subtest_should_alert is None:
            subtest_should_alert = []
        if framework is None:
            framework = {"name": "mozperftest"}
        if application is None:
            application = {"name": "firefox", "version": "9000"}
        if metrics_info is None:
            metrics_info = {}

        # Use the transform to produce a suite value
        const = Constant()
        tfm_cls = None
        transformer_obj = None
        if transformer and transformer in const.predefined_transformers:
            # A pre-built transformer name was given
            tfm_cls = const.predefined_transformers[transformer]
            transformer_obj = tfm_cls()
        elif transformer is not None:
            tfm_cls = get_transformer(transformer)
            transformer_obj = tfm_cls()
        else:
            self.warning(
                "No transformer found for this suite. Cannot produce a summary value."
            )

        perf_subtests = []
        suite = {
            "name": name,
            "type": test_type,
            "unit": unit,
            "extraOptions": extra_options,
            "lowerIsBetter": lower_is_better,
            "alertThreshold": alert_threshold,
            "shouldAlert": (should_alert and not subtest_should_alert)
            or suiteshould_alert,
            "subtests": perf_subtests,
        }

        perfherder = {
            "suites": [suite],
            "framework": framework,
            "application": application,
        }

        allvals = []
        alert_thresholds = []
        for measurement in subtests:
            reps = subtests[measurement]
            allvals.extend(reps)

            if len(reps) == 0:
                self.warning("No replicates found for {}, skipping".format(measurement))
                continue

            # Gather extra settings specified from within a metric specification
            subtest_lower_is_better = lower_is_better
            subtest_unit = unit
            for met in metrics_info:
                if met not in measurement:
                    continue

                extra_options.extend(metrics_info[met].get("extraOptions", []))
                alert_thresholds.append(
                    metrics_info[met].get("alertThreshold", alert_threshold)
                )

                subtest_unit = metrics_info[met].get("unit", unit)
                subtest_lower_is_better = metrics_info[met].get(
                    "lowerIsBetter", lower_is_better
                )

                if metrics_info[met].get("shouldAlert", should_alert):
                    subtest_should_alert.append(measurement)

                break

            subtest = {
                "name": measurement,
                "replicates": reps,
                "lowerIsBetter": subtest_lower_is_better,
                "value": None,
                "unit": subtest_unit,
                "shouldAlert": should_alert or measurement in subtest_should_alert,
            }

            if has_callable_method(transformer_obj, "subtest_summary"):
                subtest["value"] = transformer_obj.subtest_summary(subtest)
            if subtest["value"] is None:
                subtest["value"] = statistics.mean(reps)

            perf_subtests.append(subtest)

        if len(allvals) == 0:
            raise PerfherderValidDataError(
                "Could not build perfherder data blob because no valid data was provided, "
                + "only int/float data is accepted."
            )

        alert_thresholds = list(set(alert_thresholds))
        if len(alert_thresholds) > 1:
            raise PerfherderValidDataError(
                "Too many alertThreshold's were specified, expecting 1 but found "
                + f"{len(alert_thresholds)}"
            )
        elif len(alert_thresholds) == 1:
            suite["alertThreshold"] = alert_thresholds[0]

        suite["extraOptions"] = list(set(suite["extraOptions"]))

        if has_callable_method(transformer_obj, "summary"):
            val = transformer_obj.summary(suite)
            if val is not None:
                suite["value"] = val

        return perfherder