testing/talos/talos/scripts/report.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import argparse
import collections
import csv
import os
import sys
from calendar import day_name
from datetime import datetime

import compare
import numpy
import six

sys.path.insert(1, os.path.join(sys.path[0], ".."))


def get_branch(platform):
    if platform.startswith("OSX"):
        return compare.branch_map["Inbound"]["pgo"]["id"]
    return compare.branch_map["Inbound"]["nonpgo"]["id"]


def get_all_test_tuples():
    ret = []
    for test in compare.test_map:
        for platform in compare.platform_map:
            ret.extend(get_tuple(test, platform))
    return ret


def get_tuple(test, platform):
    return [
        (
            compare.test_map[test]["id"],
            get_branch(platform),
            compare.platform_map[platform],
            test,
            platform,
        )
    ]


def generate_report(tuple_list, filepath, mode="variance"):
    avg = []

    for test in tuple_list:
        testid, branchid, platformid = test[:3]
        data_dict = compare.getGraphData(testid, branchid, platformid)
        week_avgs = []

        if data_dict:
            data = data_dict["test_runs"]
            data.sort(key=lambda x: x[3])
            data = data[int(0.1 * len(data)) : int(0.9 * len(data) + 1)]
            time_dict = collections.OrderedDict()
            days = {}

            for point in data:
                time = datetime.fromtimestamp(point[2]).strftime("%Y-%m-%d")
                time_dict[time] = time_dict.get(time, []) + [point[3]]

            for time in time_dict:
                runs = len(time_dict[time])
                weekday = datetime.strptime(time, "%Y-%m-%d").strftime("%A")
                variance = numpy.var(time_dict[time])
                if mode == "variance":
                    days[weekday] = days.get(weekday, []) + [variance]
                elif mode == "count":
                    days[weekday] = days.get(weekday, []) + [runs]

            line = ["-".join(test[3:])]
            for day in day_name:
                if mode == "variance":
                    # removing top and bottom 10% to reduce outlier influence
                    # pylint --py3k W1619
                    tenth = len(days[day]) / 10
                    average = numpy.average(sorted(days[day])[tenth : tenth * 9 + 1])
                elif mode == "count":
                    average = numpy.average(days[day])
                line.append("%.3f" % average)
                week_avgs.append(average)

            outliers = is_normal(week_avgs)
            for j in six.moves.range(7):
                if j in outliers:
                    line[j + 1] = "**" + str(line[j + 1]) + "**"

            avg.append(line)

    with open(filepath, "wb") as report:
        avgs_header = csv.writer(report, quoting=csv.QUOTE_ALL)
        avgs_header.writerow(["test-platform"] + list(day_name))
        for line in avg:
            out = csv.writer(report, quoting=csv.QUOTE_ALL)
            out.writerow(line)


def is_normal(y):
    # This is a crude initial attempt at detecting normal distributions
    # TODO: Improve this
    limit = 1.5
    clean_week = []
    outliers = []
    # find a baseline for the week
    if (min(y[0:4]) * limit) <= max(y[0:4]):
        for i in six.moves.range(1, 5):
            if y[i] > (y[i - 1] * limit) or y[i] > (y[i + 1] * limit):
                outliers.append(i)
                continue
            clean_week.append(y[i])
    else:
        clean_week = y

    # look at weekends now
    # pylint --py3k W1619
    avg = sum(clean_week) / len(clean_week)
    for i in six.moves.range(5, 7):
        # look for something outside of the 20% window
        if (y[i] * 1.2) < avg or y[i] > (avg * 1.2):
            outliers.append(i)
    return outliers


def main():
    parser = argparse.ArgumentParser(description="Generate weekdays reports")
    parser.add_argument("--test", help="show only the test named TEST")
    parser.add_argument("--platform", help="show only the platform named PLATFORM")
    parser.add_argument("--mode", help="select mode", default="variance")
    args = parser.parse_args()
    tuple_list = get_all_test_tuples()
    f = "report"
    if args.platform:
        tuple_list = [x for x in tuple_list if x[4] == args.platform]
        f += "-%s" % args.platform

    if args.test:
        tuple_list = [x for x in tuple_list if x[3] == args.test]
        f += "-%s" % args.test

    f += "-%s" % args.mode
    generate_report(tuple_list, filepath=f + ".csv", mode=args.mode)


if __name__ == "__main__":
    main()