1 files changed, 396 insertions, 0 deletions
diff --git a/dom/quota/scripts/stackanalysis.py b/dom/quota/scripts/stackanalysis.py
new file mode 100644
index 0000000000..f0363c5e1f
--- /dev/null
+++ b/dom/quota/scripts/stackanalysis.py
@@ -0,0 +1,396 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# There seem to be sometimes identical events recorded twice by telemetry
+def sanitize(rows):
+    newrows = []
+    pcid = "unset"
+    psid = "unset"
+    pseq = "unset"
+    for row in rows:
+        cid = row["client_id"]
+        sid = row["session_id"]
+        seq = row["seq"]
+        if cid != pcid or sid != psid or seq != pseq:
+            newrows.append(row)
+        pcid = cid
+        psid = sid
+        pseq = seq
+
+    return newrows
+
+
+# Given a set of rows, find all distinct build ids
+def extractBuildIDs(rows):
+    buildids = {}
+    for row in rows:
+        id = row["build_id"]
+        if id in buildids:
+            buildids[id] = buildids[id] + 1
+        else:
+            buildids[id] = 1
+    return buildids
+
+
+# Given a set of build ids and rows, enrich each row by an hg link.
+# Relys on the result of utils.fetchBuildRevisions in buildids.
+def constructHGLinks(buildids, rows):
+    for row in rows:
+        id = row["build_id"]
+        if id in buildids:
+            row["location"] = (
+                buildids[id] + "/" + row["source_file"] + "#l" + row["source_line"]
+            )
+        else:
+            row["location"] = id + "/" + row["source_file"] + "#l" + row["source_line"]
+
+
+topmost_stackframes = set()
+delta_frames = {}
+
+
+def isTopmostFrame(frame):
+    f = (frame["location"], frame["result"])
+    return f in topmost_stackframes
+
+
+def addTopmostFrame(frame):
+    f = (frame["location"], frame["result"])
+    if not isTopmostFrame(frame):
+        # print("Found new topmost frame {}.".format(frame))
+        topmost_stackframes.add(f)
+        frame["topmost"] = True
+
+
+def addFrameDelta(frame1, frame2):
+    if frame1["client_id"] != frame2["client_id"]:
+        return
+    if frame1["session_id"] != frame2["session_id"]:
+        return
+
+    fkey = "{}:{}-{}:{}".format(
+        frame2["location"], frame2["result"], frame1["location"], frame1["result"]
+    )
+    if fkey not in delta_frames:
+        fdelta = {"delta_sum": 0, "delta_cnt": 0}
+        fdelta["prev_row"] = frame1
+        fdelta["candidate"] = frame2
+        delta_frames[fkey] = fdelta
+
+    fdelta = delta_frames[fkey]
+    etv1 = frame1["event_timestamp"]
+    etv2 = frame2["event_timestamp"]
+    if isinstance(etv1, int) and isinstance(etv2, int) and etv2 > etv1:
+        delta = etv2 - etv1
+        fdelta["delta_sum"] = fdelta["delta_sum"] + delta
+        fdelta["delta_cnt"] = fdelta["delta_cnt"] + 1
+
+
+# There can be outliers in terms of time distance between two stack frames
+# that belong to the same propagation stack. In order to not increase the
+# risk that one outlier breaks thousands of stacks, we check for the average
+# time distance.
+def checkAverageFrameTimeDeltas(rows, max_delta):
+    # print("checkAverageFrameTimeDeltas")
+    prev_row = None
+    for row in rows:
+        if "topmost" in row or not row["session_complete"]:
+            prev_row = None
+            continue
+
+        if prev_row:
+            addFrameDelta(prev_row, row)
+        prev_row = row
+
+    for fd in delta_frames:
+        sum = delta_frames[fd]["delta_sum"]
+        cnt = delta_frames[fd]["delta_cnt"]
+        if cnt > 0 and (sum / cnt) > max_delta:
+            # print(delta_frames[fd])
+            addTopmostFrame(delta_frames[fd]["candidate"])
+
+
+# A topmost frame is considered to initiate a new raw stack. We collect all
+# candidates before we actually apply them. This implies, that we should run
+# this function on a "large enough" sample of rows to be more accurate.
+# As a side effect, we mark all rows that are part of a "complete" session
+# (a session, that started within our data scope).
+def collectTopmostFrames(rows):
+    prev_cid = "unset"
+    prev_sid = "unset"
+    prev_tid = "unset"
+    prev_ctx = "unset"
+    prev_sev = "ERROR"
+    session_complete = False
+    after_severity_downgrade = False
+    for row in rows:
+        cid = row["client_id"]
+        sid = row["session_id"]
+        tid = row["seq"] >> 32  # thread_id
+        ctx = row["context"]
+        seq = row["seq"] & 0x00000000FFFFFFFF  # seq
+        sev = row["severity"]
+
+        # If we have a new session, ensure it is complete from start,
+        # otherwise we will ignore it entirely.
+        if cid != prev_cid or sid != prev_sid or tid != prev_tid:
+            if seq == 1:
+                session_complete = True
+            else:
+                session_complete = False
+        row["session_complete"] = session_complete
+        if session_complete:
+            # If we change client, session, thread or context, we can be sure to have
+            # a new topmost frame.
+            if (
+                seq == 1
+                or cid != prev_cid
+                or sid != prev_sid
+                or tid != prev_tid
+                or ctx != prev_ctx
+            ):
+                addTopmostFrame(row)
+                after_severity_downgrade = False
+            # We do not expect a non-error to be ever upgraded to an error
+            elif sev == "ERROR" and prev_sev != "ERROR":
+                addTopmostFrame(row)
+                after_severity_downgrade = False
+            # If we just had a severity downgrade, we assume that we wanted
+            # to break the error propagation after this point and split, too
+            elif after_severity_downgrade:
+                addTopmostFrame(row)
+                after_severity_downgrade = False
+            elif prev_sev == "ERROR" and sev != "ERROR":
+                after_severity_downgrade = True
+
+        prev_cid = cid
+        prev_sid = sid
+        prev_tid = tid
+        prev_ctx = ctx
+        prev_sev = sev
+
+    # Should be ms. We've seen quite some runtime between stackframes in the
+    # wild. We might want to consider to make this configurable. In general
+    # we prefer local context over letting slip through some topmost frame
+    # unrecognized, assuming that fixing the issues one by one they will
+    # uncover them succesively. This is achieved by a rather high delta value.
+    max_avg_delta = 200
+    checkAverageFrameTimeDeltas(rows, max_avg_delta)
+
+
+def getFrameKey(frame):
+    return "{}.{}|".format(frame["location"], frame["result"])
+
+
+def getStackKey(stack):
+    stack_key = ""
+    for frame in stack["frames"]:
+        stack_key += getFrameKey(frame)
+    return hash(stack_key)
+
+
+# A "raw stack" is a list of frames, that:
+# - share the same build_id (implicitely through location)
+# - share the same client_id
+# - share the same session_id
+# - has a growing sequence number
+# - stops at the first downgrade of severity from ERROR to else
+# - XXX: contains each location at most once (no recursion)
+# - appears to be in a reasonable short timeframe
+# Calculates also a hash key to identify identical stacks
+def collectRawStacks(rows):
+    collectTopmostFrames(rows)
+    raw_stacks = []
+    stack = {
+        "stack_id": "unset",
+        "client_id": "unset",
+        "session_id": "unset",
+        "submit_timeabs": "unset",
+        "frames": [{"location": "unset"}],
+    }
+    stack_id = 1
+    first = True
+    for row in rows:
+        if isTopmostFrame(row):
+            if not first:
+                stack["stack_key"] = getStackKey(stack)
+                raw_stacks.append(stack)
+            stack_id += 1
+            stack = {
+                "stack_id": stack_id,
+                "client_id": row["client_id"],
+                "session_id": row["session_id"],
+                "submit_timeabs": row["submit_timeabs"],
+                "context": row["context"],
+                "frames": [],
+            }
+
+        stack["frames"].append(
+            {
+                "location": row["location"],
+                "source_file": row["source_file"],
+                "source_line": row["source_line"],
+                "seq": row["seq"],
+                "severity": row["severity"],
+                "result": row["result"],
+            }
+        )
+        first = False
+
+    return raw_stacks
+
+
+# Merge all stacks that have the same hash key and count occurences.
+# Relys on the ordering per client_id/session_id for correct counting.
+def mergeEqualStacks(raw_stacks):
+    merged_stacks = {}
+    last_client_id = "none"
+    last_session_id = "none"
+    for stack in raw_stacks:
+        stack_key = stack["stack_key"]
+        merged_stack = stack
+        if stack_key in merged_stacks:
+            merged_stack = merged_stacks[stack_key]
+            if stack["client_id"] != last_client_id:
+                last_client_id = stack["client_id"]
+                merged_stack["client_count"] += 1
+            if stack["session_id"] != last_session_id:
+                last_session_id = stack["session_id"]
+                merged_stack["session_count"] += 1
+            merged_stack["hit_count"] += 1
+        else:
+            merged_stack["client_count"] = 1
+            last_client_id = merged_stack["client_id"]
+            merged_stack["session_count"] = 1
+            last_session_id = merged_stack["session_id"]
+            merged_stack["hit_count"] = 1
+            merged_stacks[stack_key] = merged_stack
+
+    merged_list = list(merged_stacks.values())
+    merged_list.sort(key=lambda x: x.get("hit_count"), reverse=True)
+    return merged_list
+
+
+# Split the list of stacks into:
+# - aborted (has at least one frame with NS_ERROR_ABORT)
+# - info/warning (has at least one frame with that severity)
+# - error (has only error frames)
+def filterStacksForPropagation(
+    all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks
+):
+    for stack in all_stacks:
+        warn = list(filter(lambda x: x["severity"] == "WARNING", stack["frames"]))
+        info = list(filter(lambda x: x["severity"] == "INFO", stack["frames"]))
+        abort = list(filter(lambda x: x["result"] == "NS_ERROR_ABORT", stack["frames"]))
+        if len(abort) > 0:
+            abort_stacks.append(stack)
+        elif len(info) > 0:
+            info_stacks.append(stack)
+        elif len(warn) > 0:
+            warn_stacks.append(stack)
+        else:
+            error_stacks.append(stack)
+
+
+# Bugzilla comment markup
+def printStacks(stacks):
+    row_format = "{} | {} | {} | {} | {}\n"
+    out = ""
+    out += row_format.format("Clients", "Sessions", "Hits", "Anchor (Context)", "Stack")
+    out += row_format.format("-------", "--------", "----", "----------------", "-----")
+    for stack in stacks:
+        framestr = ""
+        first = True
+        for frame in stack["frames"]:
+            if not first:
+                framestr += " <- "
+            framestr += "[{}#{}:{}]({})".format(
+                frame["source_file"],
+                frame["source_line"],
+                frame["result"],
+                frame["location"],
+            )
+            first = False
+        out += row_format.format(
+            stack["client_count"],
+            stack["session_count"],
+            stack["hit_count"],
+            "{} ({})".format(stack["frames"][0]["anchor"], stack["context"]),
+            framestr,
+        )
+
+    return out
+
+
+def groupStacksForAnchors(stacks):
+    anchors = {}
+    for stack in stacks:
+        anchor_name = stack["frames"][0]["anchor"]
+        if anchor_name in anchors:
+            anchors[anchor_name]["stacks"].append(stack)
+        else:
+            anchor = {"anchor": anchor_name, "stacks": [stack]}
+            anchors[anchor_name] = anchor
+    return anchors
+
+
+"""
+def getSummaryForAnchor(anchor):
+    return "[QM_TRY] Errors in function {}".format(anchor)
+
+
+def searchBugForAnchor(bugzilla_key, anchor):
+    summary = getSummaryForAnchor(anchor)
+    bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
+              "summary={}&api_key={}".format(summary, bugzilla_key)
+    return requests.get(url=bug_url).json()["bugs"]
+
+
+def createBugForAnchor(bugzilla_key, anchor):
+    summary = getSummaryForAnchor(anchor)
+    bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
+              "Bugzilla_api_key={}".format(bugzilla_key)
+    body = {
+        "product" : "Core",
+        "component" : "Storage: Quota Manager",
+        "version" : "unspecified",
+        "summary" : summary,
+        "description" : "This bug collects errors reported by QM_TRY"
+                        "macros for function {}.".format(anchor),
+    }
+    resp = requests.post(url=bug_url, json=body)
+    if resp.status_code != 200:
+        print(resp)
+        return 0
+    id = resp.json()["id"]
+    print("Added new bug {}:".format(id))
+    return id
+
+
+def ensureBugForAnchor(bugzilla_key, anchor):
+    buglist = searchBugForAnchor(bugzilla_key, anchor)
+    if (len(buglist) > 0):
+        id = buglist[0]["id"]
+        print("Found existing bug {}:".format(id))
+        return id
+    return createBugForAnchor(bugzilla_key, anchor)
+
+
+def addCommentForAnchor(bugzilla_key, anchor, stacks):
+    id = ensureBugForAnchor(bugzilla_key, anchor)
+    if (id <= 0):
+        print("Unable to create a bug for {}.".format(anchor))
+        return
+    comment = printStacks(stacks)
+    print("")
+    print("Add comment to bug {}:".format(id))
+    print(comment)
+
+
+def addCommentsForStacks(bugzilla_key, stacks):
+    anchors = groupStacksForAnchors(stacks)
+    for anchor in anchors:
+        addCommentForAnchor(bugzilla_key, anchors[anchor]["anchor"], anchors[anchor]["stacks"])
+"""