summaryrefslogtreecommitdiffstats
path: root/testing/raptor/raptor/filters.py
blob: a0c7ffc1a919eabc17d1fde888e464f25f422c89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# originally taken from /testing/talos/talos/filter.py

import math

"""
data filters:
takes a series of run data and applies statistical transforms to it

Each filter is a simple function, but it also have attached a special
`prepare` method that create a tuple with one instance of a
:class:`Filter`; this allow to write stuff like::

  from raptor import filters
  filter_list = filters.ignore_first.prepare(1) + filters.median.prepare()

  for filter in filter_list:
      data = filter(data)
  # data is filtered
"""

_FILTERS = {}


class Filter(object):
    def __init__(self, func, *args, **kwargs):
        """
        Takes a filter function, and save args and kwargs that
        should be used when the filter is used.
        """
        self.func = func
        self.args = args
        self.kwargs = kwargs

    def apply(self, data):
        """
        Apply the filter on the data, and return the new data
        """
        return self.func(data, *self.args, **self.kwargs)


def define_filter(func):
    """
    decorator to attach the prepare method.
    """

    def prepare(*args, **kwargs):
        return (Filter(func, *args, **kwargs),)

    func.prepare = prepare
    return func


def register_filter(func):
    """
    all filters defined in this module
    should be registered
    """
    global _FILTERS

    _FILTERS[func.__name__] = func
    return func


def filters(*args):
    global _FILTERS

    filters_ = [_FILTERS[filter] for filter in args]
    return filters_


def apply(data, filters):
    for filter in filters:
        data = filter(data)

    return data


def parse(string_):
    def to_number(string_number):
        try:
            return int(string_number)
        except ValueError:
            return float(string_number)

    tokens = string_.split(":")

    func = tokens[0]
    digits = []
    if len(tokens) > 1:
        digits.extend(tokens[1].split(","))
        digits = [to_number(digit) for digit in digits]

    return [func, digits]


# filters that return a scalar


@register_filter
@define_filter
def mean(series):
    """
    mean of data; needs at least one data point
    """
    return sum(series) / float(len(series))


@register_filter
@define_filter
def median(series):
    """
    median of data; needs at least one data point
    """
    series = sorted(series)
    if len(series) % 2:
        # odd
        # pylint --py3k W1619
        # must force to int to use as index.
        return series[int(len(series) / 2)]
    else:
        # even
        # pylint --py3k W1619
        middle = int(len(series) / 2)  # the higher of the middle 2, actually
        return 0.5 * (series[middle - 1] + series[middle])


@register_filter
@define_filter
def variance(series):
    """
    variance: http://en.wikipedia.org/wiki/Variance
    """

    _mean = mean(series)
    variance = sum([(i - _mean) ** 2 for i in series]) / float(len(series))
    return variance


@register_filter
@define_filter
def stddev(series):
    """
    standard deviation: http://en.wikipedia.org/wiki/Standard_deviation
    """
    return variance(series) ** 0.5


@register_filter
@define_filter
def dromaeo(series):
    """
    dromaeo: https://wiki.mozilla.org/Dromaeo, pull the internal calculation
    out
      * This is for 'runs/s' based tests, not 'ms' tests.
      * chunksize: defined in dromaeo: tests/dromaeo/webrunner.js#l8
    """
    means = []
    chunksize = 5
    series = list(dromaeo_chunks(series, chunksize))
    for i in series:
        means.append(mean(i))
    return geometric_mean(means)


@register_filter
@define_filter
def dromaeo_chunks(series, size):
    for i in range(0, len(series), size):
        yield series[i : i + size]


@register_filter
@define_filter
def geometric_mean(series):
    """
    geometric_mean: http://en.wikipedia.org/wiki/Geometric_mean
    """
    total = 0
    for i in series:
        total += math.log(i + 1)
    # pylint --py3k W1619
    return math.exp(total / len(series)) - 1


# filters that return a list


@register_filter
@define_filter
def ignore_first(series, number=1):
    """
    ignore first datapoint
    """
    if len(series) <= number:
        # don't modify short series
        return series
    return series[number:]


@register_filter
@define_filter
def ignore(series, function):
    """
    ignore the first value of a list given by function
    """
    if len(series) <= 1:
        # don't modify short series
        return series
    series = series[:]  # do not mutate the original series
    value = function(series)
    series.remove(value)
    return series


@register_filter
@define_filter
def ignore_max(series):
    """
    ignore maximum data point
    """
    return ignore(series, max)


@register_filter
@define_filter
def ignore_min(series):
    """
    ignore minimum data point
    """
    return ignore(series, min)


@register_filter
@define_filter
def ignore_negative(series):
    """
    ignore data points that have a negative value
    caution: if all data values are < 0, this will return an empty list
    """
    if len(series) <= 1:
        # don't modify short series
        return series
    series = series[:]  # do not mutate the original series
    return list(filter(lambda x: x >= 0, series))


@register_filter
@define_filter
def v8_subtest(series, name):
    """
    v8 benchmark score - modified for no sub benchmarks.
    * removed Crypto and kept Encrypt/Decrypt standalone
    * removed EarlyBoyer and kept Earley/Boyer standalone

    this is not 100% in parity but within .3%
    """
    reference = {
        "Encrypt": 266181.0,
        "Decrypt": 266181.0,
        "DeltaBlue": 66118.0,
        "Earley": 666463.0,
        "Boyer": 666463.0,
        "NavierStokes": 1484000.0,
        "RayTrace": 739989.0,
        "RegExp": 910985.0,
        "Richards": 35302.0,
        "Splay": 81491.0,
    }

    # pylint --py3k W1619
    return reference[name] / geometric_mean(series)


@register_filter
@define_filter
def responsiveness_Metric(val_list):
    return sum([float(x) * float(x) / 1000000.0 for x in val_list])