python/mozperftest/mozperftest/metrics/notebook/perftestetl.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import os
import pathlib
from collections import OrderedDict

from .constant import Constant
from .transformer import SimplePerfherderTransformer, Transformer, get_transformer


class PerftestETL(object):
    """Controller class for the PerftestETL."""

    def __init__(
        self,
        file_groups,
        config,
        prefix,
        logger,
        custom_transform=None,
        sort_files=False,
    ):
        """Initializes PerftestETL.

        :param dict file_groups: A dict of file groupings. The value
            of each of the dict entries is the name of the data that
            will be produced.
        :param str custom_transform: The class name of a custom transformer.
        """
        self.fmt_data = {}
        self.file_groups = file_groups
        self.config = config
        self.sort_files = sort_files
        self.const = Constant()
        self.prefix = prefix
        self.logger = logger

        # Gather the available transformers
        tfms_dict = self.const.predefined_transformers

        # XXX NOTEBOOK_PLUGIN functionality is broken at the moment.
        # This code block will raise an exception if it detects it in
        # the environment.
        plugin_path = os.getenv("NOTEBOOK_PLUGIN")
        if plugin_path:
            raise Exception("NOTEBOOK_PLUGIN is currently broken.")

        # Initialize the requested transformer
        if custom_transform:
            # try to load it directly, and fallback to registry
            try:
                tfm_cls = get_transformer(custom_transform)
            except ImportError:
                tfm_cls = tfms_dict.get(custom_transform)

            if tfm_cls:
                self.transformer = Transformer(
                    files=[],
                    custom_transformer=tfm_cls(),
                    logger=self.logger,
                    prefix=self.prefix,
                )
                self.logger.info(f"Found {custom_transform} transformer", self.prefix)
            else:
                raise Exception(f"Could not get a {custom_transform} transformer.")
        else:
            self.transformer = Transformer(
                files=[],
                custom_transformer=SimplePerfherderTransformer(),
                logger=self.logger,
                prefix=self.prefix,
            )

    def parse_file_grouping(self, file_grouping):
        """Handles differences in the file_grouping definitions.

        It can either be a path to a folder containing the files, a list of files,
        or it can contain settings from an artifact_downloader instance.

        :param file_grouping: A file grouping entry.
        :return: A list of files to process.
        """
        files = []
        if isinstance(file_grouping, list):
            # A list of files was provided
            files = file_grouping
        elif isinstance(file_grouping, dict):
            # A dictionary of settings from an artifact_downloader instance
            # was provided here
            raise Exception(
                "Artifact downloader tooling is disabled for the time being."
            )
        elif isinstance(file_grouping, str):
            # Assume a path to files was given
            filepath = file_grouping
            newf = [f.resolve().as_posix() for f in pathlib.Path(filepath).rglob("*")]
            files = newf
        else:
            raise Exception(
                "Unknown file grouping type provided here: %s" % file_grouping
            )

        if self.sort_files:
            if isinstance(files, list):
                files.sort()
            else:
                for _, file_list in files.items():
                    file_list.sort()
                files = OrderedDict(sorted(files.items(), key=lambda entry: entry[0]))

        if not files:
            raise Exception(
                "Could not find any files in this configuration: %s" % file_grouping
            )

        return files

    def parse_output(self):
        # XXX Fix up this function, it should only return a directory for output
        # not a directory or a file. Or remove it completely, it's not very useful.
        prefix = "" if "prefix" not in self.config else self.config["prefix"]
        filepath = f"{prefix}std-output.json"

        if "output" in self.config:
            filepath = self.config["output"]
        if os.path.isdir(filepath):
            filepath = os.path.join(filepath, f"{prefix}std-output.json")

        return filepath

    def process(self, **kwargs):
        """Process the file groups and return the results of the requested analyses.

        :return: All the results in a dictionary. The field names are the Analyzer
            funtions that were called.
        """
        fmt_data = []

        for name, files in self.file_groups.items():
            files = self.parse_file_grouping(files)
            if isinstance(files, dict):
                raise Exception(
                    "Artifact downloader tooling is disabled for the time being."
                )
            else:
                # Transform the data
                self.transformer.files = files
                trfm_data = self.transformer.process(name, **kwargs)

                if isinstance(trfm_data, list):
                    fmt_data.extend(trfm_data)
                else:
                    fmt_data.append(trfm_data)

        self.fmt_data = fmt_data

        # Write formatted data output to filepath
        output_data_filepath = self.parse_output()

        print("Writing results to %s" % output_data_filepath)
        with open(output_data_filepath, "w") as f:
            json.dump(self.fmt_data, f, indent=4, sort_keys=True)

        return {"data": self.fmt_data, "file-output": output_data_filepath}