summaryrefslogtreecommitdiffstats
path: root/python/mozperftest/mozperftest/metrics/notebook/transformer.py
blob: 7ecbc40d8978d4f84646f49589280b258b13f710 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import importlib.util
import inspect
import json
import pathlib

from jsonschema import validate

from mozperftest.metrics.exceptions import (
    NotebookDuplicateTransformsError,
    NotebookInvalidPathError,
    NotebookInvalidTransformError,
)
from mozperftest.runner import HERE
from mozperftest.utils import load_class


class Transformer(object):
    """Abstract class for data transformers."""

    def __init__(self, files=None, custom_transformer=None, logger=None, prefix=None):
        """Initialize the transformer with files.

        :param list files: A list of files containing data to transform.
        :param object custom_transformer: A custom transformer instance.
            Must implement `transform` and `merge` methods.
        """
        self._files = files
        self.logger = logger
        self.prefix = prefix

        if custom_transformer:
            valid = (
                hasattr(custom_transformer, "transform")
                and hasattr(custom_transformer, "merge")
                and callable(custom_transformer.transform)
                and callable(custom_transformer.merge)
            )

            if not valid:
                raise NotebookInvalidTransformError(
                    "The custom transformer must contain `transform` and `merge` methods."
                )

        self._custom_transformer = custom_transformer

        with pathlib.Path(HERE, "schemas", "transformer_schema.json").open() as f:
            self.schema = json.load(f)

    @property
    def files(self):
        return self._files

    @files.setter
    def files(self, val):
        if not isinstance(val, list):
            self.logger.warning(
                "`files` must be a list, got %s" % type(val), self.prefix
            )
            return
        self._files = val

    @property
    def custom_transformer(self):
        return self._custom_transformer

    def open_data(self, file):
        """Opens a file of data.

        If it's not a JSON file, then the data
        will be opened as a text file.

        :param str file: Path to the data file.
        :return: Data contained in the file.
        """
        with open(file) as f:
            if file.endswith(".json"):
                return json.load(f)
            return f.readlines()

    def process(self, name, **kwargs):
        """Process all the known data into a merged, and standardized data format.

        :param str name: Name of the merged data.
        :return dict: Merged data.
        """
        trfmdata = []

        for file in self.files:
            data = {}

            # Open data
            try:
                if hasattr(self._custom_transformer, "open_data"):
                    data = self._custom_transformer.open_data(file)
                else:
                    data = self.open_data(file)
            except Exception as e:
                self.logger.warning(
                    "Failed to open file %s, skipping" % file, self.prefix
                )
                self.logger.warning("%s %s" % (e.__class__.__name__, e), self.prefix)

            # Transform data
            try:
                data = self._custom_transformer.transform(data, **kwargs)
                if not isinstance(data, list):
                    data = [data]
                for entry in data:
                    for ele in entry["data"]:
                        if "file" not in ele:
                            ele.update({"file": file})
                trfmdata.extend(data)
            except Exception as e:
                self.logger.warning(
                    "Failed to transform file %s, skipping" % file, self.prefix
                )
                self.logger.warning("%s %s" % (e.__class__.__name__, e), self.prefix)

        merged = self._custom_transformer.merge(trfmdata)

        if isinstance(merged, dict):
            merged["name"] = name
        else:
            for e in merged:
                e["name"] = name

        validate(instance=merged, schema=self.schema)
        return merged


class SimplePerfherderTransformer:
    """Transforms perfherder data into the standardized data format."""

    entry_number = 0

    def transform(self, data):
        self.entry_number += 1
        return {
            "data": [{"value": data["suites"][0]["value"], "xaxis": self.entry_number}]
        }

    def merge(self, sde):
        merged = {"data": []}
        for entry in sde:
            if isinstance(entry["data"], list):
                merged["data"].extend(entry["data"])
            else:
                merged["data"].append(entry["data"])

        self.entry_number = 0
        return merged


def get_transformer(path, ret_members=False):
    """This function returns a Transformer class with the given path.

    :param str path: The path points to the custom transformer.
    :param bool ret_members: If true then return inspect.getmembers().
    :return Transformer if not ret_members else inspect.getmembers().
    """
    file = pathlib.Path(path)

    if file.suffix != ".py":
        return load_class(path)

    if not file.exists():
        raise NotebookInvalidPathError(f"The path {path} does not exist.")

    # Importing a source file directly
    spec = importlib.util.spec_from_file_location(name=file.name, location=path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)

    members = inspect.getmembers(
        module,
        lambda c: inspect.isclass(c)
        and hasattr(c, "transform")
        and hasattr(c, "merge")
        and callable(c.transform)
        and callable(c.merge),
    )

    if not members and not ret_members:
        raise NotebookInvalidTransformError(
            f"The path {path} was found but it was not a valid transformer."
        )

    return members if ret_members else members[0][-1]


def get_transformers(dirpath=None):
    """This function returns a dict of transformers under the given path.

    If more than one transformers have the same class name, an exception will be raised.

    :param pathlib.Path dirpath: Path to a directory containing the transformers.
    :return dict: {"Transformer class name": Transformer class}.
    """

    ret = {}

    if not dirpath.exists():
        raise NotebookInvalidPathError(f"The path {dirpath.as_posix()} does not exist.")

    if not dirpath.is_dir():
        raise NotebookInvalidPathError(
            f"Path given is not a directory: {dirpath.as_posix()}"
        )

    tfm_files = list(dirpath.glob("*.py"))
    importlib.machinery.SOURCE_SUFFIXES.append("")

    for file in tfm_files:
        members = get_transformer(file.resolve().as_posix(), True)

        for (name, tfm_class) in members:
            if name in ret:
                raise NotebookDuplicateTransformsError(
                    f"Duplicated transformer {name} "
                    + f"is found in the directory {dirpath.as_posix()}."
                    + "Please define each transformer class with a unique class name.",
                )
            ret.update({name: tfm_class})

    return ret