testing/awsy/awsy/process_perf_data.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

#!/usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import glob
import json
import math
import os
import sys

import six

AWSY_PATH = os.path.dirname(os.path.realpath(__file__))
if AWSY_PATH not in sys.path:
    sys.path.append(AWSY_PATH)

import parse_about_memory

# A description of each checkpoint and the root path to it.
CHECKPOINTS = [
    {"name": "Fresh start", "path": "memory-report-Start-0.json.gz"},
    {"name": "Fresh start [+30s]", "path": "memory-report-StartSettled-0.json.gz"},
    {"name": "After tabs open", "path": "memory-report-TabsOpen-4.json.gz"},
    {
        "name": "After tabs open [+30s]",
        "path": "memory-report-TabsOpenSettled-4.json.gz",
    },
    {
        "name": "After tabs open [+30s, forced GC]",
        "path": "memory-report-TabsOpenForceGC-4.json.gz",
    },
    {
        "name": "Tabs closed extra processes",
        "path": "memory-report-TabsClosedExtraProcesses-4.json.gz",
    },
    {"name": "Tabs closed", "path": "memory-report-TabsClosed-4.json.gz"},
    {"name": "Tabs closed [+30s]", "path": "memory-report-TabsClosedSettled-4.json.gz"},
    {
        "name": "Tabs closed [+30s, forced GC]",
        "path": "memory-report-TabsClosedForceGC-4.json.gz",
    },
]

# A description of each perfherder suite and the path to its values.
PERF_SUITES = [
    {"name": "Resident Memory", "node": "resident"},
    {"name": "Explicit Memory", "node": "explicit/"},
    {"name": "Heap Unclassified", "node": "explicit/heap-unclassified"},
    {"name": "JS", "node": "js-main-runtime/"},
    {"name": "Images", "node": "explicit/images/"},
]


def median(values):
    sorted_ = sorted(values)
    # pylint --py3k W1619
    med = int(len(sorted_) / 2)

    if len(sorted_) % 2:
        return sorted_[med]
    # pylint --py3k W1619
    return (sorted_[med - 1] + sorted_[med]) / 2


def update_checkpoint_paths(checkpoint_files, checkpoints):
    """
    Updates checkpoints with memory report file fetched in data_path
    :param checkpoint_files: list of files in data_path
    :param checkpoints: The checkpoints to update the path of.
    """
    target_path = [
        ["Start-", 0],
        ["StartSettled-", 0],
        ["TabsOpen-", -1],
        ["TabsOpenSettled-", -1],
        ["TabsOpenForceGC-", -1],
        ["TabsClosedExtraProcesses-", -1],
        ["TabsClosed-", -1],
        ["TabsClosedSettled-", -1],
        ["TabsClosedForceGC-", -1],
    ]
    for i in range(len(target_path)):
        (name, idx) = target_path[i]
        paths = sorted([x for x in checkpoint_files if name in x])
        if paths:
            indices = [i for i, x in enumerate(checkpoints) if name in x["path"]]
            if indices:
                checkpoints[indices[0]]["path"] = paths[idx]
            else:
                print("found files but couldn't find {}".format(name))


def create_suite(
    name, node, data_path, checkpoints=CHECKPOINTS, alertThreshold=None, extra_opts=None
):
    """
    Creates a suite suitable for adding to a perfherder blob. Calculates the
    geometric mean of the checkpoint values and adds that to the suite as
    well.

    :param name: The name of the suite.
    :param node: The path of the data node to extract data from.
    :param data_path: The directory to retrieve data from.
    :param checkpoints: Which checkpoints to include.
    :param alertThreshold: The percentage of change that triggers an alert.
    """
    suite = {"name": name, "subtests": [], "lowerIsBetter": True, "unit": "bytes"}

    if alertThreshold:
        suite["alertThreshold"] = alertThreshold

    opts = []
    if extra_opts:
        opts.extend(extra_opts)

    if "DMD" in os.environ and os.environ["DMD"]:
        opts.append("dmd")

    if len(opts) > 0:
        suite["extraOptions"] = opts

    update_checkpoint_paths(
        glob.glob(os.path.join(data_path, "memory-report*")), checkpoints
    )

    total = 0
    for checkpoint in checkpoints:
        memory_report_path = os.path.join(data_path, checkpoint["path"])

        name_filter = checkpoint.get("name_filter", None)
        if checkpoint.get("median"):
            process = median
        else:
            process = sum

        if node != "resident":
            totals = parse_about_memory.calculate_memory_report_values(
                memory_report_path, node, name_filter
            )
            value = process(totals.values())
        else:
            # For "resident" we really want RSS of the chrome ("Main") process
            # and USS of the child processes. We'll still call it resident
            # for simplicity (it's nice to be able to compare RSS of non-e10s
            # with RSS + USS of e10s).
            totals_rss = parse_about_memory.calculate_memory_report_values(
                memory_report_path, node, ["Main"]
            )
            totals_uss = parse_about_memory.calculate_memory_report_values(
                memory_report_path, "resident-unique"
            )
            value = list(totals_rss.values())[0] + sum(
                [v for k, v in six.iteritems(totals_uss) if "Main" not in k]
            )

        subtest = {
            "name": checkpoint["name"],
            "value": value,
            "lowerIsBetter": True,
            "unit": "bytes",
        }
        suite["subtests"].append(subtest)
        total += math.log(subtest["value"])

    # Add the geometric mean. For more details on the calculation see:
    #   https://en.wikipedia.org/wiki/Geometric_mean#Relationship_with_arithmetic_mean_of_logarithms
    # pylint --py3k W1619
    suite["value"] = math.exp(total / len(checkpoints))

    return suite


def create_perf_data(
    data_path, perf_suites=PERF_SUITES, checkpoints=CHECKPOINTS, extra_opts=None
):
    """
    Builds up a performance data blob suitable for submitting to perfherder.
    """
    if ("GCOV_PREFIX" in os.environ) or ("JS_CODE_COVERAGE_OUTPUT_DIR" in os.environ):
        print(
            "Code coverage is being collected, performance data will not be gathered."
        )
        return {}

    perf_blob = {"framework": {"name": "awsy"}, "suites": []}

    for suite in perf_suites:
        perf_blob["suites"].append(
            create_suite(
                suite["name"],
                suite["node"],
                data_path,
                checkpoints,
                suite.get("alertThreshold"),
                extra_opts,
            )
        )

    return perf_blob


if __name__ == "__main__":
    args = sys.argv[1:]
    if not args:
        print("Usage: process_perf_data.py data_path")
        sys.exit(1)

    # Determine which revisions we need to process.
    data_path = args[0]
    perf_blob = create_perf_data(data_path)
    print("PERFHERDER_DATA: {}".format(json.dumps(perf_blob)))

    sys.exit(0)