summaryrefslogtreecommitdiffstats
path: root/dom/quota/scripts/stackanalysis.py
blob: f0363c5e1fec4805fe1a1fa6dfda02e35607cef6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.


# There seem to be sometimes identical events recorded twice by telemetry
def sanitize(rows):
    newrows = []
    pcid = "unset"
    psid = "unset"
    pseq = "unset"
    for row in rows:
        cid = row["client_id"]
        sid = row["session_id"]
        seq = row["seq"]
        if cid != pcid or sid != psid or seq != pseq:
            newrows.append(row)
        pcid = cid
        psid = sid
        pseq = seq

    return newrows


# Given a set of rows, find all distinct build ids
def extractBuildIDs(rows):
    buildids = {}
    for row in rows:
        id = row["build_id"]
        if id in buildids:
            buildids[id] = buildids[id] + 1
        else:
            buildids[id] = 1
    return buildids


# Given a set of build ids and rows, enrich each row by an hg link.
# Relys on the result of utils.fetchBuildRevisions in buildids.
def constructHGLinks(buildids, rows):
    for row in rows:
        id = row["build_id"]
        if id in buildids:
            row["location"] = (
                buildids[id] + "/" + row["source_file"] + "#l" + row["source_line"]
            )
        else:
            row["location"] = id + "/" + row["source_file"] + "#l" + row["source_line"]


topmost_stackframes = set()
delta_frames = {}


def isTopmostFrame(frame):
    f = (frame["location"], frame["result"])
    return f in topmost_stackframes


def addTopmostFrame(frame):
    f = (frame["location"], frame["result"])
    if not isTopmostFrame(frame):
        # print("Found new topmost frame {}.".format(frame))
        topmost_stackframes.add(f)
        frame["topmost"] = True


def addFrameDelta(frame1, frame2):
    if frame1["client_id"] != frame2["client_id"]:
        return
    if frame1["session_id"] != frame2["session_id"]:
        return

    fkey = "{}:{}-{}:{}".format(
        frame2["location"], frame2["result"], frame1["location"], frame1["result"]
    )
    if fkey not in delta_frames:
        fdelta = {"delta_sum": 0, "delta_cnt": 0}
        fdelta["prev_row"] = frame1
        fdelta["candidate"] = frame2
        delta_frames[fkey] = fdelta

    fdelta = delta_frames[fkey]
    etv1 = frame1["event_timestamp"]
    etv2 = frame2["event_timestamp"]
    if isinstance(etv1, int) and isinstance(etv2, int) and etv2 > etv1:
        delta = etv2 - etv1
        fdelta["delta_sum"] = fdelta["delta_sum"] + delta
        fdelta["delta_cnt"] = fdelta["delta_cnt"] + 1


# There can be outliers in terms of time distance between two stack frames
# that belong to the same propagation stack. In order to not increase the
# risk that one outlier breaks thousands of stacks, we check for the average
# time distance.
def checkAverageFrameTimeDeltas(rows, max_delta):
    # print("checkAverageFrameTimeDeltas")
    prev_row = None
    for row in rows:
        if "topmost" in row or not row["session_complete"]:
            prev_row = None
            continue

        if prev_row:
            addFrameDelta(prev_row, row)
        prev_row = row

    for fd in delta_frames:
        sum = delta_frames[fd]["delta_sum"]
        cnt = delta_frames[fd]["delta_cnt"]
        if cnt > 0 and (sum / cnt) > max_delta:
            # print(delta_frames[fd])
            addTopmostFrame(delta_frames[fd]["candidate"])


# A topmost frame is considered to initiate a new raw stack. We collect all
# candidates before we actually apply them. This implies, that we should run
# this function on a "large enough" sample of rows to be more accurate.
# As a side effect, we mark all rows that are part of a "complete" session
# (a session, that started within our data scope).
def collectTopmostFrames(rows):
    prev_cid = "unset"
    prev_sid = "unset"
    prev_tid = "unset"
    prev_ctx = "unset"
    prev_sev = "ERROR"
    session_complete = False
    after_severity_downgrade = False
    for row in rows:
        cid = row["client_id"]
        sid = row["session_id"]
        tid = row["seq"] >> 32  # thread_id
        ctx = row["context"]
        seq = row["seq"] & 0x00000000FFFFFFFF  # seq
        sev = row["severity"]

        # If we have a new session, ensure it is complete from start,
        # otherwise we will ignore it entirely.
        if cid != prev_cid or sid != prev_sid or tid != prev_tid:
            if seq == 1:
                session_complete = True
            else:
                session_complete = False
        row["session_complete"] = session_complete
        if session_complete:
            # If we change client, session, thread or context, we can be sure to have
            # a new topmost frame.
            if (
                seq == 1
                or cid != prev_cid
                or sid != prev_sid
                or tid != prev_tid
                or ctx != prev_ctx
            ):
                addTopmostFrame(row)
                after_severity_downgrade = False
            # We do not expect a non-error to be ever upgraded to an error
            elif sev == "ERROR" and prev_sev != "ERROR":
                addTopmostFrame(row)
                after_severity_downgrade = False
            # If we just had a severity downgrade, we assume that we wanted
            # to break the error propagation after this point and split, too
            elif after_severity_downgrade:
                addTopmostFrame(row)
                after_severity_downgrade = False
            elif prev_sev == "ERROR" and sev != "ERROR":
                after_severity_downgrade = True

        prev_cid = cid
        prev_sid = sid
        prev_tid = tid
        prev_ctx = ctx
        prev_sev = sev

    # Should be ms. We've seen quite some runtime between stackframes in the
    # wild. We might want to consider to make this configurable. In general
    # we prefer local context over letting slip through some topmost frame
    # unrecognized, assuming that fixing the issues one by one they will
    # uncover them succesively. This is achieved by a rather high delta value.
    max_avg_delta = 200
    checkAverageFrameTimeDeltas(rows, max_avg_delta)


def getFrameKey(frame):
    return "{}.{}|".format(frame["location"], frame["result"])


def getStackKey(stack):
    stack_key = ""
    for frame in stack["frames"]:
        stack_key += getFrameKey(frame)
    return hash(stack_key)


# A "raw stack" is a list of frames, that:
# - share the same build_id (implicitely through location)
# - share the same client_id
# - share the same session_id
# - has a growing sequence number
# - stops at the first downgrade of severity from ERROR to else
# - XXX: contains each location at most once (no recursion)
# - appears to be in a reasonable short timeframe
# Calculates also a hash key to identify identical stacks
def collectRawStacks(rows):
    collectTopmostFrames(rows)
    raw_stacks = []
    stack = {
        "stack_id": "unset",
        "client_id": "unset",
        "session_id": "unset",
        "submit_timeabs": "unset",
        "frames": [{"location": "unset"}],
    }
    stack_id = 1
    first = True
    for row in rows:
        if isTopmostFrame(row):
            if not first:
                stack["stack_key"] = getStackKey(stack)
                raw_stacks.append(stack)
            stack_id += 1
            stack = {
                "stack_id": stack_id,
                "client_id": row["client_id"],
                "session_id": row["session_id"],
                "submit_timeabs": row["submit_timeabs"],
                "context": row["context"],
                "frames": [],
            }

        stack["frames"].append(
            {
                "location": row["location"],
                "source_file": row["source_file"],
                "source_line": row["source_line"],
                "seq": row["seq"],
                "severity": row["severity"],
                "result": row["result"],
            }
        )
        first = False

    return raw_stacks


# Merge all stacks that have the same hash key and count occurences.
# Relys on the ordering per client_id/session_id for correct counting.
def mergeEqualStacks(raw_stacks):
    merged_stacks = {}
    last_client_id = "none"
    last_session_id = "none"
    for stack in raw_stacks:
        stack_key = stack["stack_key"]
        merged_stack = stack
        if stack_key in merged_stacks:
            merged_stack = merged_stacks[stack_key]
            if stack["client_id"] != last_client_id:
                last_client_id = stack["client_id"]
                merged_stack["client_count"] += 1
            if stack["session_id"] != last_session_id:
                last_session_id = stack["session_id"]
                merged_stack["session_count"] += 1
            merged_stack["hit_count"] += 1
        else:
            merged_stack["client_count"] = 1
            last_client_id = merged_stack["client_id"]
            merged_stack["session_count"] = 1
            last_session_id = merged_stack["session_id"]
            merged_stack["hit_count"] = 1
            merged_stacks[stack_key] = merged_stack

    merged_list = list(merged_stacks.values())
    merged_list.sort(key=lambda x: x.get("hit_count"), reverse=True)
    return merged_list


# Split the list of stacks into:
# - aborted (has at least one frame with NS_ERROR_ABORT)
# - info/warning (has at least one frame with that severity)
# - error (has only error frames)
def filterStacksForPropagation(
    all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks
):
    for stack in all_stacks:
        warn = list(filter(lambda x: x["severity"] == "WARNING", stack["frames"]))
        info = list(filter(lambda x: x["severity"] == "INFO", stack["frames"]))
        abort = list(filter(lambda x: x["result"] == "NS_ERROR_ABORT", stack["frames"]))
        if len(abort) > 0:
            abort_stacks.append(stack)
        elif len(info) > 0:
            info_stacks.append(stack)
        elif len(warn) > 0:
            warn_stacks.append(stack)
        else:
            error_stacks.append(stack)


# Bugzilla comment markup
def printStacks(stacks):
    row_format = "{} | {} | {} | {} | {}\n"
    out = ""
    out += row_format.format("Clients", "Sessions", "Hits", "Anchor (Context)", "Stack")
    out += row_format.format("-------", "--------", "----", "----------------", "-----")
    for stack in stacks:
        framestr = ""
        first = True
        for frame in stack["frames"]:
            if not first:
                framestr += " <- "
            framestr += "[{}#{}:{}]({})".format(
                frame["source_file"],
                frame["source_line"],
                frame["result"],
                frame["location"],
            )
            first = False
        out += row_format.format(
            stack["client_count"],
            stack["session_count"],
            stack["hit_count"],
            "{} ({})".format(stack["frames"][0]["anchor"], stack["context"]),
            framestr,
        )

    return out


def groupStacksForAnchors(stacks):
    anchors = {}
    for stack in stacks:
        anchor_name = stack["frames"][0]["anchor"]
        if anchor_name in anchors:
            anchors[anchor_name]["stacks"].append(stack)
        else:
            anchor = {"anchor": anchor_name, "stacks": [stack]}
            anchors[anchor_name] = anchor
    return anchors


"""
def getSummaryForAnchor(anchor):
    return "[QM_TRY] Errors in function {}".format(anchor)


def searchBugForAnchor(bugzilla_key, anchor):
    summary = getSummaryForAnchor(anchor)
    bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
              "summary={}&api_key={}".format(summary, bugzilla_key)
    return requests.get(url=bug_url).json()["bugs"]


def createBugForAnchor(bugzilla_key, anchor):
    summary = getSummaryForAnchor(anchor)
    bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
              "Bugzilla_api_key={}".format(bugzilla_key)
    body = {
        "product" : "Core",
        "component" : "Storage: Quota Manager",
        "version" : "unspecified",
        "summary" : summary,
        "description" : "This bug collects errors reported by QM_TRY"
                        "macros for function {}.".format(anchor),
    }
    resp = requests.post(url=bug_url, json=body)
    if resp.status_code != 200:
        print(resp)
        return 0
    id = resp.json()["id"]
    print("Added new bug {}:".format(id))
    return id


def ensureBugForAnchor(bugzilla_key, anchor):
    buglist = searchBugForAnchor(bugzilla_key, anchor)
    if (len(buglist) > 0):
        id = buglist[0]["id"]
        print("Found existing bug {}:".format(id))
        return id
    return createBugForAnchor(bugzilla_key, anchor)


def addCommentForAnchor(bugzilla_key, anchor, stacks):
    id = ensureBugForAnchor(bugzilla_key, anchor)
    if (id <= 0):
        print("Unable to create a bug for {}.".format(anchor))
        return
    comment = printStacks(stacks)
    print("")
    print("Add comment to bug {}:".format(id))
    print(comment)


def addCommentsForStacks(bugzilla_key, stacks):
    anchors = groupStacksForAnchors(stacks)
    for anchor in anchors:
        addCommentForAnchor(bugzilla_key, anchors[anchor]["anchor"], anchors[anchor]["stacks"])
"""