Merging upstream version 126.0.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-15 03:35:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-15 03:35:49 +0000
commit: d8bbc7858622b6d9c278469aab701ca0b609cddf (patch)
tree: eff41dc61d9f714852212739e6b3738b82a2af87 /dom/quota/scripts/qm-try-analysis/qm_try_analysis
parent: Releasing progress-linux version 125.0.3-1~progress7.99u1. (diff)
download: firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.tar.xz
firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.zip
11 files changed, 1230 insertions, 0 deletions
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/__init__.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/__init__.py
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/analyze.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/analyze.py
new file mode 100755
index 0000000000..1173555e08
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/analyze.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import sys
+from os import path
+
+import click
+
+from qm_try_analysis import fn_anchors, stackanalysis, utils
+from qm_try_analysis.logging import error, info
+
+"""
+The analysis is based on stack frames of the following form:
+
+[
+    {
+        "event_timeabs": 1617121013137,
+        "session_startabs": 1617120840000,
+        "build_id": "20210329095128",
+        "client_id": "0013a68f-9893-461a-93d4-2d7a2f85583f",
+        "session_id": "8cd37159-bd5c-481c-99ad-9eace9ea726a",
+        "seq": 1,
+        "context": "Initialization::TemporaryStorage",
+        "source_file": "dom/localstorage/ActorsParent.cpp",
+        "source_line": "1018",
+        "severity": "ERROR",
+        "result": "NS_ERROR_FILE_NOT_FOUND"
+    },
+...
+]
+
+The location of the input file is expected to be found in the
+last item of the list inside qmexecutions.json.
+"""
+
+
+@click.command()
+@click.option(
+    "--output-to",
+    type=click.Path(dir_okay=False, writable=True),
+    default="qmstacks_until_<lasteventtime>.txt",
+    help="Specify the output file for the analyzed data.",
+)
+@click.option(
+    "-w",
+    "--workdir",
+    type=click.Path(file_okay=False, exists=True, writable=True),
+    default="output",
+    help="Working directory",
+)
+def analyze_qm_failures(output_to, workdir):
+    """
+    Analyzes the results from fetch's JSON file.
+    Writes out several JSON results as files and a bugzilla markup table on stdout.
+    """
+    run = utils.getLastRunFromExecutionFile(workdir)
+    if "numrows" not in run or run["numrows"] == 0:
+        error(
+            "No previous execution from fetch_qm_failures.py found or the last execution yielded no result."
+        )
+        sys.exit(2)
+
+    if output_to == "qmstacks_until_<lasteventtime>.txt":
+        output_to = path.join(workdir, f'qmstacks_until_{run["lasteventtime"]}.txt')
+    elif output_to.exists():
+        error(
+            f'The output file "{output_to}" already exists. This script would override it.'
+        )
+        sys.exit(2)
+    run["stacksfile"] = output_to
+
+    def getFname(prefix):
+        return "{}/{}_until_{}.json".format(workdir, prefix, run["lasteventtime"])
+
+    # read rows from JSON
+    rows = utils.readJSONFile(getFname("qmrows"))
+    info(f"Found {len(rows)} rows of data")
+    rows = stackanalysis.sanitize(rows)
+
+    # enrich rows with hg locations
+    buildids = stackanalysis.extractBuildIDs(rows)
+    utils.fetchBuildRevisions(buildids)
+    stackanalysis.constructHGLinks(buildids, rows)
+
+    # transform rows to unique stacks
+    raw_stacks = stackanalysis.collectRawStacks(rows)
+    all_stacks = stackanalysis.mergeEqualStacks(raw_stacks)
+
+    # enrich with function anchors
+    for stack in all_stacks:
+        for frame in stack["frames"]:
+            frame["anchor"] = "{}:{}".format(
+                frame["source_file"], fn_anchors.getFunctionName(frame["location"])
+            )
+
+    # separate stacks for relevance
+    error_stacks = []
+    warn_stacks = []
+    info_stacks = []
+    abort_stacks = []
+    stackanalysis.filterStacksForPropagation(
+        all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks
+    )
+    run["errorfile"] = getFname("qmerrors")
+    utils.writeJSONFile(run["errorfile"], error_stacks)
+    run["warnfile"] = getFname("qmwarnings")
+    utils.writeJSONFile(run["warnfile"], warn_stacks)
+    run["infofile"] = getFname("qminfo")
+    utils.writeJSONFile(run["infofile"], info_stacks)
+    run["abortfile"] = getFname("qmabort")
+    utils.writeJSONFile(run["abortfile"], abort_stacks)
+    utils.updateLastRunToExecutionFile(workdir, run)
+
+    info(f"Found {len(error_stacks)} error stacks")
+    info(f"Found {len(warn_stacks)} warning stacks")
+    info(f"Found {len(info_stacks)} info stacks")
+    info(f"Found {len(abort_stacks)} aborted stacks")
+
+    # Write results to the specified output file
+    with open(output_to, "w") as output:
+
+        def print_to_output(message):
+            print(message, file=output)
+
+        print_to_output("Error stacks:")
+        print_to_output(stackanalysis.printStacks(error_stacks))
+        print_to_output("")
+        print_to_output("Error stacks grouped by anchors:")
+        anchors = stackanalysis.groupStacksForAnchors(error_stacks)
+        anchornames = list(anchors.keys())
+        for a in anchornames:
+            print_to_output(stackanalysis.printStacks(anchors[a]["stacks"]))
+            print_to_output("")
+        print_to_output("")
+        print_to_output("Warning stacks:")
+        print_to_output(stackanalysis.printStacks(warn_stacks))
+        print_to_output("")
+        print_to_output("Info stacks:")
+        print_to_output(stackanalysis.printStacks(info_stacks))
+        print_to_output("")
+        print_to_output("Aborted stacks:")
+        print_to_output(stackanalysis.printStacks(abort_stacks))
+
+    info(f"Wrote results to specified output file {output_to}")
+
+
+if __name__ == "__main__":
+    analyze_qm_failures()
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/cli.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/cli.py
new file mode 100644
index 0000000000..509a8e33e1
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/cli.py
@@ -0,0 +1,22 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+import click
+
+from qm_try_analysis.analyze import analyze_qm_failures
+from qm_try_analysis.fetch import fetch_qm_failures
+from qm_try_analysis.report import report_qm_failures
+
+
+@click.group(context_settings={"show_default": True})
+def cli():
+    pass
+
+
+cli.add_command(fetch_qm_failures, "fetch")
+cli.add_command(analyze_qm_failures, "analyze")
+cli.add_command(report_qm_failures, "report")
+
+if __name__ == "__main__":
+    cli()
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch.py
new file mode 100644
index 0000000000..2512293c29
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+import pathlib
+
+import click
+
+from qm_try_analysis import telemetry, utils
+from qm_try_analysis.logging import info
+
+"""
+The analysis is based on the following query:
+https://sql.telemetry.mozilla.org/queries/78691/source?p_day=28&p_month=03&p_year=2021
+
+SELECT UNIX_MILLIS(timestamp) AS submit_timeabs,
+       session_start_time,
+       submission_date,
+       build_id,
+       client_id,
+       session_id,
+       event_timestamp,
+       CAST(mozfun.map.get_key(event_map_values, "seq") AS INT64) AS seq,
+       mozfun.map.get_key(event_map_values, "context") AS context,
+       mozfun.map.get_key(event_map_values, "source_file") AS source_file,
+       mozfun.map.get_key(event_map_values, "source_line") AS source_line,
+       mozfun.map.get_key(event_map_values, "severity") AS severity,
+       mozfun.map.get_key(event_map_values, "result") AS result,
+FROM telemetry.events
+WHERE submission_date >= CAST('{{ year }}-{{ month }}-{{ day }}' AS DATE)
+  AND event_category='dom.quota.try'
+  AND build_id >= '{{ build }}'
+  AND UNIX_MILLIS(timestamp) > {{ last }}
+ORDER BY submit_timeabs
+LIMIT 600000
+
+We fetch events in chronological order, as we want to keep track of where we already
+arrived with our analysis. To accomplish this we write our runs into qmexecutions.json.
+
+[
+    {
+        "workdir": ".",
+        "daysback": 1,
+        "numrows": 17377,
+        "lasteventtime": 1617303855145,
+        "rawfile": "./qmrows_until_1617303855145.json"
+    }
+]
+
+lasteventtime is the highest value of event_timeabs we found in our data.
+
+analyze_qm_failures instead needs the rows to be ordered by
+client_id, session_id, thread_id, submit_timeabs, seq
+Thus we sort the rows accordingly before writing them.
+"""
+
+
+@click.command()
+@click.option(
+    "-k",
+    "--key",
+    required=True,
+    help="Your personal telemetry API key.",
+)
+@click.option(
+    "-b",
+    "--minbuild",
+    default="20210329000000",
+    help="The lowest build id we will fetch data for. This should have the following format: `yyyymmddhhmmss`.",
+)
+@click.option("-d", "--days", type=int, default=7, help="Number of days to go back.")
+@click.option(
+    "-l",
+    "--lasteventtime",
+    type=int,
+    default=0,
+    help="Fetch only events after this number of Unix milliseconds.",
+)
+@click.option(
+    "-w",
+    "--workdir",
+    type=click.Path(file_okay=False, writable=True, path_type=pathlib.Path),
+    default="output",
+    help="Working directory",
+)
+def fetch_qm_failures(key, minbuild, days, lasteventtime, workdir):
+    """
+    Invokes the query 78691 and stores the result in a JSON file.
+    """
+    # Creeate output dir if it does not exist
+    workdir.mkdir(exist_ok=True)
+
+    start = utils.dateback(days)
+    year, month, day = start.year, start.month, start.day
+
+    run = {}
+    lastrun = utils.getLastRunFromExecutionFile(workdir)
+    if "lasteventtime" in lastrun:
+        lasteventtime = lastrun["lasteventtime"]
+
+    run["workdir"] = workdir.as_posix()
+    run["daysback"] = days
+    run["minbuild"] = minbuild
+
+    p_params = f"p_year={year:04d}&p_month={month:02d}&p_day={day:02d}&p_build={minbuild}&p_last={lasteventtime}"
+
+    # Read string at the start of the file for more information on query 78691
+    result = telemetry.query(key, 78691, p_params)
+    rows = result["query_result"]["data"]["rows"]
+    run["numrows"] = len(rows)
+
+    if run["numrows"] > 0:
+        lasteventtime = telemetry.getLastEventTimeAbs(rows)
+        run["lasteventtime"] = lasteventtime
+        rows.sort(
+            key=lambda row: "{}.{}.{}.{}.{:06d}".format(
+                row["client_id"],
+                row["session_id"],
+                row["seq"] >> 32,  # thread_id
+                row["submit_timeabs"],
+                row["seq"] & 0x00000000FFFFFFFF,  # seq,
+            ),
+            reverse=False,
+        )
+        outfile = f"{workdir}/qmrows_until_{lasteventtime}.json"
+        utils.writeJSONFile(outfile, rows)
+        run["rawfile"] = outfile
+    else:
+        info("No results found, maybe next time.")
+        run["lasteventtime"] = lasteventtime
+
+    utils.addNewRunToExecutionFile(workdir, run)
+
+
+if __name__ == "__main__":
+    fetch_qm_failures()
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch_fn_names.sh b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch_fn_names.sh
new file mode 100755
index 0000000000..bd619186cd
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch_fn_names.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This script assumes to have rust-code-analysis-cli in the path.
+HG_URL=$1
+TEMPDIR=/tmp/fetch_fn_names_$$
+TEMPSRC=$TEMPDIR/src
+mkdir $TEMPDIR
+echo "" > $TEMPDIR/empty.json
+HG_URL=`echo $HG_URL | sed 's/annotate/raw-file/g'`
+wget -q -O "$TEMPSRC" $HG_URL
+rust-code-analysis-cli -m -O json -p "$TEMPSRC"
+CONTENT=`cat $TEMPDIR/*.json`
+rm -rf $TEMPDIR
+echo $CONTENT
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fn_anchors.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fn_anchors.py
new file mode 100644
index 0000000000..13e3802399
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fn_anchors.py
@@ -0,0 +1,76 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import json
+import subprocess
+from os import path
+
+from qm_try_analysis.logging import info, warning
+
+cached_functions = {}
+
+
+def getMetricsJson(src_url):
+    if src_url.startswith("http"):
+        info(f"Fetching source for function extraction: {src_url}")
+        metrics = subprocess.check_output(
+            [
+                path.join(path.dirname(path.realpath(__file__)), "fetch_fn_names.sh"),
+                src_url,
+            ]
+        )
+    else:
+        warning(f"Skip fetching source: {src_url}")
+        metrics = ""
+
+    try:
+        return json.loads(metrics)
+    except ValueError:
+        return {"kind": "empty", "name": "anonymous", "spaces": []}
+
+
+def getSpaceFunctionsRecursive(metrics_space):
+    functions = []
+    if (
+        metrics_space["kind"] == "function"
+        and metrics_space["name"]
+        and metrics_space["name"] != "<anonymous>"
+    ):
+        functions.append(
+            {
+                "name": metrics_space["name"],
+                "start_line": int(metrics_space["start_line"]),
+                "end_line": int(metrics_space["end_line"]),
+            }
+        )
+    for space in metrics_space["spaces"]:
+        functions += getSpaceFunctionsRecursive(space)
+    return functions
+
+
+def getSourceFunctions(src_url):
+    if src_url not in cached_functions:
+        metrics_space = getMetricsJson(src_url)
+        cached_functions[src_url] = getSpaceFunctionsRecursive(metrics_space)
+
+    return cached_functions[src_url]
+
+
+def getFunctionName(location):
+    location.replace("annotate", "raw-file")
+    pieces = location.split("#l")
+    src_url = pieces[0]
+    line = int(pieces[1])
+    closest_name = "<Unknown {}>".format(line)
+    closest_start = 0
+    functions = getSourceFunctions(src_url)
+    for fn in functions:
+        if (
+            fn["start_line"] > closest_start
+            and line >= fn["start_line"]
+            and line <= fn["end_line"]
+        ):
+            closest_start = fn["start_line"]
+            closest_name = fn["name"]
+    return closest_name
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/logging.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/logging.py
new file mode 100644
index 0000000000..c96679f96c
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/logging.py
@@ -0,0 +1,21 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+import click
+
+
+def debug(message):
+    click.echo(click.style("Debug", fg="cyan") + f": {message}")
+
+
+def info(message):
+    click.echo(click.style("Info", fg="white") + f": {message}")
+
+
+def warning(message):
+    click.echo(click.style("Warning", fg="yellow") + f": {message}")
+
+
+def error(message):
+    click.echo(click.style("Error", fg="red") + f": {message}")
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/report.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/report.py
new file mode 100644
index 0000000000..0ec5428679
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/report.py
@@ -0,0 +1,266 @@
+#!/usr/bin/env python3
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+import hashlib
+import json
+import re
+import sys
+import webbrowser
+from typing import Union
+
+import bugzilla
+import click
+from click.utils import echo
+
+from qm_try_analysis import stackanalysis, utils
+from qm_try_analysis.logging import error, info, warning
+
+# Flag for toggling development mod
+DEV = True
+
+# Constants for Bugzilla URLs
+if DEV:
+    BUGZILLA_BASE_URL = "https://bugzilla-dev.allizom.org/"
+else:
+    BUGZILLA_BASE_URL = "https://bugzilla.mozilla.org/"
+
+BUGZILLA_API_URL = BUGZILLA_BASE_URL + "rest/"
+BUGZILLA_ATTACHMENT_URL = BUGZILLA_BASE_URL + "attachment.cgi?id="
+BUGZILLA_BUG_URL = BUGZILLA_BASE_URL + "show_bug.cgi?id="
+
+# Constants for static bugs
+QM_TRY_FAILURES_BUG = 1702411
+WARNING_STACKS_BUG = 1711703
+
+# Regex pattern for parsing anchor strings
+ANCHOR_REGEX_PATTERN = re.compile(r"([^:]+):([^:]+)?:*([^:]+)?")
+
+
+@click.command()
+@click.option(
+    "-k",
+    "--key",
+    help="Your personal Bugzilla API key",
+    required=True,
+)
+@click.option(
+    "--stacksfile",
+    type=click.File("rb"),
+    help="The output file of the previous analysis run. You only have to specify this, if the previous run does not include this info.",
+)
+@click.option(
+    "--open-modified/--no-open-modified",
+    default=True,
+    help="Whether to open modified bugs in your default browser after updating them.",
+)
+@click.option(
+    "-w",
+    "--workdir",
+    type=click.Path(file_okay=False, exists=True, writable=True),
+    default="output",
+    help="Working directory",
+)
+def report_qm_failures(key, stacksfile, open_modified, workdir):
+    """
+    Report QM failures to Bugzilla based on stack analysis.
+    """
+    run = utils.getLastRunFromExecutionFile(workdir)
+
+    # Check for valid execution file from the previous run
+    if not {"errorfile", "warnfile"} <= run.keys():
+        error("No analyzable execution from the previous run of analyze found.")
+        echo("Did you remember to run `poetry run qm-try-analysis analyze`?")
+        sys.exit(2)
+
+    # Handle missing stacksfile
+    if not stacksfile:
+        if "stacksfile" not in run:
+            error(
+                "The previous analyze run did not contain the location of the stacksfile."
+            )
+            echo('Please provide the file location using the "--stacksfile" option.')
+            sys.exit(2)
+        stacksfile = open(run["stacksfile"], "rb")
+
+    # Create Bugzilla client
+    bugzilla_client = bugzilla.Bugzilla(url=BUGZILLA_API_URL, api_key=key)
+
+    # Initialize report data
+    report = run.get("report", {})
+    run["report"] = report
+    attachment_id = report.get("stacksfile_attachment", None)
+    reported = report.get("reported", [])
+    report["reported"] = reported
+
+    def post_comment(bug_id, comment):
+        """
+        Post a comment to a Bugzilla bug.
+        """
+        data = {"id": bug_id, "comment": comment, "is_markdown": True}
+        res = bugzilla_client._post(f"bug/{bug_id}/comment", json.dumps(data))
+        return res["id"]
+
+    # Handle missing attachment ID
+    if not attachment_id:
+        attachment = bugzilla.DotDict()
+        attachment.file_name = f"qmstacks_until_{run['lasteventtime']}.txt"
+        attachment.summary = attachment.file_name
+        attachment.content_type = "text/plain"
+        attachment.data = stacksfile.read().decode()
+        res = bugzilla_client.post_attachment(QM_TRY_FAILURES_BUG, attachment)
+        attachment_id = next(iter(res["attachments"].values()))["id"]
+        report["stacksfile_attachment"] = attachment_id
+        utils.updateLastRunToExecutionFile(workdir, run)
+
+        info(
+            f'Created attachment for "{attachment.file_name}": {BUGZILLA_ATTACHMENT_URL + str(attachment_id)}.'
+        )
+
+    def generate_comment(stacks):
+        """
+        Generate a comment for Bugzilla based on error stacks.
+        """
+        comment = f"Taken from Attachment {attachment_id}\n\n"
+        comment += stackanalysis.printStacks(stacks)
+        return comment
+
+    # Handle missing warnings comment
+    if "warnings_comment" not in report:
+        warning_stacks = utils.readJSONFile(run["warnfile"])
+        warning_stacks = filter(lambda stack: stack["hit_count"] >= 100, warning_stacks)
+
+        comment = generate_comment(warning_stacks)
+        comment_id = post_comment(WARNING_STACKS_BUG, comment)
+
+        report["warnings_comment"] = comment_id
+        utils.updateLastRunToExecutionFile(workdir, run)
+
+        info("Created comment for warning stacks.")
+
+    error_stacks = utils.readJSONFile(run["errorfile"])
+
+    def reduce(search_results, by: str) -> Union[int, None]:
+        """
+        Reduce bug search results automatically or based on user input.
+        """
+        anchor = by
+
+        search_results = remove_duplicates(search_results, bugzilla_client)
+
+        if not search_results:
+            return
+
+        if len(search_results) == 1:
+            return search_results[0]["id"]
+
+        echo(f'Multiple bugs found for anchor "{anchor}":')
+
+        for i, result in enumerate(search_results, start=1):
+            echo(
+                f"{i}.{' [closed]' if result['resolution'] != '' else ''} {BUGZILLA_BUG_URL + str(result['id'])} - {result['summary']}"
+            )
+
+        choice = click.prompt(
+            "Enter the number of the bug you want to use",
+            type=click.Choice(
+                [str(i) for i in range(1, len(search_results) + 1)] + ["skip"]
+            ),
+            default="skip",
+            show_default=True,
+            confirmation_prompt="Please confirm the selected choice",
+        )
+
+        if choice == "skip":
+            return
+
+        return search_results[int(choice) - 1]["id"]
+
+    anchors = stackanalysis.groupStacksForAnchors(error_stacks)
+
+    for anchor in anchors:
+        if hash_str(anchor) in reported:
+            info(f'Skipping anchor "{anchor}" since it has already been reported.')
+            continue
+
+        if not (match := ANCHOR_REGEX_PATTERN.match(anchor)):
+            warning(f'"{anchor}" did not match the regex pattern.')
+
+        if "Unknown" in match.group(2):
+            warning(f'Skipping "{anchor}" since it is not a valid anchor.')
+            continue
+
+        search_string = " ".join(filter(None, match.groups()))
+        search_results = bugzilla_client.search_bugs(
+            [{"product": "Core", "summary": search_string}]
+        )["bugs"]
+
+        if bug_id := reduce(search_results, by=anchor):
+            info(f'Found bug {BUGZILLA_BUG_URL + str(bug_id)} for anchor "{anchor}".')
+        else:
+            warning(f'No bug found for anchor "{anchor}".')
+
+            if not click.confirm("Would you like to create one?"):
+                continue
+
+            bug = bugzilla.DotDict()
+            bug.product = "Core"
+            bug.component = "Storage: Quota Manager"
+            bug.summary = f"[QM_TRY] Failures in {anchor}"
+            bug.description = f"This bug keeps track of the semi-automatic monitoring of QM_TRY failures in `{anchor}`"
+            bug["type"] = "defect"
+            bug.blocks = QM_TRY_FAILURES_BUG
+            bug.version = "unspecified"
+
+            bug_id = bugzilla_client.post_bug(bug)["id"]
+
+            info(f'Created bug {BUGZILLA_BUG_URL + str(bug_id)} for anchor "{anchor}".')
+
+        comment = generate_comment(anchors[anchor]["stacks"])
+        comment_id = post_comment(bug_id, comment)
+
+        reported.append(hash_str(anchor))
+        utils.updateLastRunToExecutionFile(workdir, run)
+
+        if open_modified:
+            comment_seq_number = bugzilla_client.get_comment(comment_id)["comments"][
+                str(comment_id)
+            ]["count"]
+            webbrowser.open(
+                BUGZILLA_BUG_URL + str(bug_id) + "#c" + str(comment_seq_number)
+            )
+
+
+def hash_str(s):
+    """
+    Hash a string using MD5.
+    """
+    encoded_str = s.encode("utf-8")
+    return int(hashlib.md5(encoded_str).hexdigest(), 16)
+
+
+def remove_duplicates(search_results, bugzilla_client):
+    """
+    Remove duplicate bugs in search results.
+    """
+    resolved_bugs = set(bug["id"] for bug in search_results if not bug.get("dupe_of"))
+
+    def resolve_if_dupe(bug):
+        if not (dupe_of := bug.get("dupe_of")):
+            return bug
+
+        if dupe_of in resolved_bugs:
+            return None
+
+        remote = resolve_if_dupe(bugzilla_client.get_bug(dupe_of))
+        if remote:
+            resolved_bugs.add(remote["id"])
+
+        return remote
+
+    return [non_dupe for bug in search_results if (non_dupe := resolve_if_dupe(bug))]
+
+
+if __name__ == "__main__":
+    report_qm_failures()
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/stackanalysis.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/stackanalysis.py
new file mode 100644
index 0000000000..f0363c5e1f
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/stackanalysis.py
@@ -0,0 +1,396 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# There seem to be sometimes identical events recorded twice by telemetry
+def sanitize(rows):
+    newrows = []
+    pcid = "unset"
+    psid = "unset"
+    pseq = "unset"
+    for row in rows:
+        cid = row["client_id"]
+        sid = row["session_id"]
+        seq = row["seq"]
+        if cid != pcid or sid != psid or seq != pseq:
+            newrows.append(row)
+        pcid = cid
+        psid = sid
+        pseq = seq
+
+    return newrows
+
+
+# Given a set of rows, find all distinct build ids
+def extractBuildIDs(rows):
+    buildids = {}
+    for row in rows:
+        id = row["build_id"]
+        if id in buildids:
+            buildids[id] = buildids[id] + 1
+        else:
+            buildids[id] = 1
+    return buildids
+
+
+# Given a set of build ids and rows, enrich each row by an hg link.
+# Relys on the result of utils.fetchBuildRevisions in buildids.
+def constructHGLinks(buildids, rows):
+    for row in rows:
+        id = row["build_id"]
+        if id in buildids:
+            row["location"] = (
+                buildids[id] + "/" + row["source_file"] + "#l" + row["source_line"]
+            )
+        else:
+            row["location"] = id + "/" + row["source_file"] + "#l" + row["source_line"]
+
+
+topmost_stackframes = set()
+delta_frames = {}
+
+
+def isTopmostFrame(frame):
+    f = (frame["location"], frame["result"])
+    return f in topmost_stackframes
+
+
+def addTopmostFrame(frame):
+    f = (frame["location"], frame["result"])
+    if not isTopmostFrame(frame):
+        # print("Found new topmost frame {}.".format(frame))
+        topmost_stackframes.add(f)
+        frame["topmost"] = True
+
+
+def addFrameDelta(frame1, frame2):
+    if frame1["client_id"] != frame2["client_id"]:
+        return
+    if frame1["session_id"] != frame2["session_id"]:
+        return
+
+    fkey = "{}:{}-{}:{}".format(
+        frame2["location"], frame2["result"], frame1["location"], frame1["result"]
+    )
+    if fkey not in delta_frames:
+        fdelta = {"delta_sum": 0, "delta_cnt": 0}
+        fdelta["prev_row"] = frame1
+        fdelta["candidate"] = frame2
+        delta_frames[fkey] = fdelta
+
+    fdelta = delta_frames[fkey]
+    etv1 = frame1["event_timestamp"]
+    etv2 = frame2["event_timestamp"]
+    if isinstance(etv1, int) and isinstance(etv2, int) and etv2 > etv1:
+        delta = etv2 - etv1
+        fdelta["delta_sum"] = fdelta["delta_sum"] + delta
+        fdelta["delta_cnt"] = fdelta["delta_cnt"] + 1
+
+
+# There can be outliers in terms of time distance between two stack frames
+# that belong to the same propagation stack. In order to not increase the
+# risk that one outlier breaks thousands of stacks, we check for the average
+# time distance.
+def checkAverageFrameTimeDeltas(rows, max_delta):
+    # print("checkAverageFrameTimeDeltas")
+    prev_row = None
+    for row in rows:
+        if "topmost" in row or not row["session_complete"]:
+            prev_row = None
+            continue
+
+        if prev_row:
+            addFrameDelta(prev_row, row)
+        prev_row = row
+
+    for fd in delta_frames:
+        sum = delta_frames[fd]["delta_sum"]
+        cnt = delta_frames[fd]["delta_cnt"]
+        if cnt > 0 and (sum / cnt) > max_delta:
+            # print(delta_frames[fd])
+            addTopmostFrame(delta_frames[fd]["candidate"])
+
+
+# A topmost frame is considered to initiate a new raw stack. We collect all
+# candidates before we actually apply them. This implies, that we should run
+# this function on a "large enough" sample of rows to be more accurate.
+# As a side effect, we mark all rows that are part of a "complete" session
+# (a session, that started within our data scope).
+def collectTopmostFrames(rows):
+    prev_cid = "unset"
+    prev_sid = "unset"
+    prev_tid = "unset"
+    prev_ctx = "unset"
+    prev_sev = "ERROR"
+    session_complete = False
+    after_severity_downgrade = False
+    for row in rows:
+        cid = row["client_id"]
+        sid = row["session_id"]
+        tid = row["seq"] >> 32  # thread_id
+        ctx = row["context"]
+        seq = row["seq"] & 0x00000000FFFFFFFF  # seq
+        sev = row["severity"]
+
+        # If we have a new session, ensure it is complete from start,
+        # otherwise we will ignore it entirely.
+        if cid != prev_cid or sid != prev_sid or tid != prev_tid:
+            if seq == 1:
+                session_complete = True
+            else:
+                session_complete = False
+        row["session_complete"] = session_complete
+        if session_complete:
+            # If we change client, session, thread or context, we can be sure to have
+            # a new topmost frame.
+            if (
+                seq == 1
+                or cid != prev_cid
+                or sid != prev_sid
+                or tid != prev_tid
+                or ctx != prev_ctx
+            ):
+                addTopmostFrame(row)
+                after_severity_downgrade = False
+            # We do not expect a non-error to be ever upgraded to an error
+            elif sev == "ERROR" and prev_sev != "ERROR":
+                addTopmostFrame(row)
+                after_severity_downgrade = False
+            # If we just had a severity downgrade, we assume that we wanted
+            # to break the error propagation after this point and split, too
+            elif after_severity_downgrade:
+                addTopmostFrame(row)
+                after_severity_downgrade = False
+            elif prev_sev == "ERROR" and sev != "ERROR":
+                after_severity_downgrade = True
+
+        prev_cid = cid
+        prev_sid = sid
+        prev_tid = tid
+        prev_ctx = ctx
+        prev_sev = sev
+
+    # Should be ms. We've seen quite some runtime between stackframes in the
+    # wild. We might want to consider to make this configurable. In general
+    # we prefer local context over letting slip through some topmost frame
+    # unrecognized, assuming that fixing the issues one by one they will
+    # uncover them succesively. This is achieved by a rather high delta value.
+    max_avg_delta = 200
+    checkAverageFrameTimeDeltas(rows, max_avg_delta)
+
+
+def getFrameKey(frame):
+    return "{}.{}|".format(frame["location"], frame["result"])
+
+
+def getStackKey(stack):
+    stack_key = ""
+    for frame in stack["frames"]:
+        stack_key += getFrameKey(frame)
+    return hash(stack_key)
+
+
+# A "raw stack" is a list of frames, that:
+# - share the same build_id (implicitely through location)
+# - share the same client_id
+# - share the same session_id
+# - has a growing sequence number
+# - stops at the first downgrade of severity from ERROR to else
+# - XXX: contains each location at most once (no recursion)
+# - appears to be in a reasonable short timeframe
+# Calculates also a hash key to identify identical stacks
+def collectRawStacks(rows):
+    collectTopmostFrames(rows)
+    raw_stacks = []
+    stack = {
+        "stack_id": "unset",
+        "client_id": "unset",
+        "session_id": "unset",
+        "submit_timeabs": "unset",
+        "frames": [{"location": "unset"}],
+    }
+    stack_id = 1
+    first = True
+    for row in rows:
+        if isTopmostFrame(row):
+            if not first:
+                stack["stack_key"] = getStackKey(stack)
+                raw_stacks.append(stack)
+            stack_id += 1
+            stack = {
+                "stack_id": stack_id,
+                "client_id": row["client_id"],
+                "session_id": row["session_id"],
+                "submit_timeabs": row["submit_timeabs"],
+                "context": row["context"],
+                "frames": [],
+            }
+
+        stack["frames"].append(
+            {
+                "location": row["location"],
+                "source_file": row["source_file"],
+                "source_line": row["source_line"],
+                "seq": row["seq"],
+                "severity": row["severity"],
+                "result": row["result"],
+            }
+        )
+        first = False
+
+    return raw_stacks
+
+
+# Merge all stacks that have the same hash key and count occurences.
+# Relys on the ordering per client_id/session_id for correct counting.
+def mergeEqualStacks(raw_stacks):
+    merged_stacks = {}
+    last_client_id = "none"
+    last_session_id = "none"
+    for stack in raw_stacks:
+        stack_key = stack["stack_key"]
+        merged_stack = stack
+        if stack_key in merged_stacks:
+            merged_stack = merged_stacks[stack_key]
+            if stack["client_id"] != last_client_id:
+                last_client_id = stack["client_id"]
+                merged_stack["client_count"] += 1
+            if stack["session_id"] != last_session_id:
+                last_session_id = stack["session_id"]
+                merged_stack["session_count"] += 1
+            merged_stack["hit_count"] += 1
+        else:
+            merged_stack["client_count"] = 1
+            last_client_id = merged_stack["client_id"]
+            merged_stack["session_count"] = 1
+            last_session_id = merged_stack["session_id"]
+            merged_stack["hit_count"] = 1
+            merged_stacks[stack_key] = merged_stack
+
+    merged_list = list(merged_stacks.values())
+    merged_list.sort(key=lambda x: x.get("hit_count"), reverse=True)
+    return merged_list
+
+
+# Split the list of stacks into:
+# - aborted (has at least one frame with NS_ERROR_ABORT)
+# - info/warning (has at least one frame with that severity)
+# - error (has only error frames)
+def filterStacksForPropagation(
+    all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks
+):
+    for stack in all_stacks:
+        warn = list(filter(lambda x: x["severity"] == "WARNING", stack["frames"]))
+        info = list(filter(lambda x: x["severity"] == "INFO", stack["frames"]))
+        abort = list(filter(lambda x: x["result"] == "NS_ERROR_ABORT", stack["frames"]))
+        if len(abort) > 0:
+            abort_stacks.append(stack)
+        elif len(info) > 0:
+            info_stacks.append(stack)
+        elif len(warn) > 0:
+            warn_stacks.append(stack)
+        else:
+            error_stacks.append(stack)
+
+
+# Bugzilla comment markup
+def printStacks(stacks):
+    row_format = "{} | {} | {} | {} | {}\n"
+    out = ""
+    out += row_format.format("Clients", "Sessions", "Hits", "Anchor (Context)", "Stack")
+    out += row_format.format("-------", "--------", "----", "----------------", "-----")
+    for stack in stacks:
+        framestr = ""
+        first = True
+        for frame in stack["frames"]:
+            if not first:
+                framestr += " <- "
+            framestr += "[{}#{}:{}]({})".format(
+                frame["source_file"],
+                frame["source_line"],
+                frame["result"],
+                frame["location"],
+            )
+            first = False
+        out += row_format.format(
+            stack["client_count"],
+            stack["session_count"],
+            stack["hit_count"],
+            "{} ({})".format(stack["frames"][0]["anchor"], stack["context"]),
+            framestr,
+        )
+
+    return out
+
+
+def groupStacksForAnchors(stacks):
+    anchors = {}
+    for stack in stacks:
+        anchor_name = stack["frames"][0]["anchor"]
+        if anchor_name in anchors:
+            anchors[anchor_name]["stacks"].append(stack)
+        else:
+            anchor = {"anchor": anchor_name, "stacks": [stack]}
+            anchors[anchor_name] = anchor
+    return anchors
+
+
+"""
+def getSummaryForAnchor(anchor):
+    return "[QM_TRY] Errors in function {}".format(anchor)
+
+
+def searchBugForAnchor(bugzilla_key, anchor):
+    summary = getSummaryForAnchor(anchor)
+    bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
+              "summary={}&api_key={}".format(summary, bugzilla_key)
+    return requests.get(url=bug_url).json()["bugs"]
+
+
+def createBugForAnchor(bugzilla_key, anchor):
+    summary = getSummaryForAnchor(anchor)
+    bug_url = "https://bugzilla.mozilla.org/rest/bug?" \
+              "Bugzilla_api_key={}".format(bugzilla_key)
+    body = {
+        "product" : "Core",
+        "component" : "Storage: Quota Manager",
+        "version" : "unspecified",
+        "summary" : summary,
+        "description" : "This bug collects errors reported by QM_TRY"
+                        "macros for function {}.".format(anchor),
+    }
+    resp = requests.post(url=bug_url, json=body)
+    if resp.status_code != 200:
+        print(resp)
+        return 0
+    id = resp.json()["id"]
+    print("Added new bug {}:".format(id))
+    return id
+
+
+def ensureBugForAnchor(bugzilla_key, anchor):
+    buglist = searchBugForAnchor(bugzilla_key, anchor)
+    if (len(buglist) > 0):
+        id = buglist[0]["id"]
+        print("Found existing bug {}:".format(id))
+        return id
+    return createBugForAnchor(bugzilla_key, anchor)
+
+
+def addCommentForAnchor(bugzilla_key, anchor, stacks):
+    id = ensureBugForAnchor(bugzilla_key, anchor)
+    if (id <= 0):
+        print("Unable to create a bug for {}.".format(anchor))
+        return
+    comment = printStacks(stacks)
+    print("")
+    print("Add comment to bug {}:".format(id))
+    print(comment)
+
+
+def addCommentsForStacks(bugzilla_key, stacks):
+    anchors = groupStacksForAnchors(stacks)
+    for anchor in anchors:
+        addCommentForAnchor(bugzilla_key, anchors[anchor]["anchor"], anchors[anchor]["stacks"])
+"""
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/telemetry.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/telemetry.py
new file mode 100644
index 0000000000..26debd0546
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/telemetry.py
@@ -0,0 +1,54 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import time
+
+import requests
+
+from qm_try_analysis.logging import info
+
+TELEMETRY_BASE_URL = "https://sql.telemetry.mozilla.org/api/"
+
+
+def query(key, query, p_params):
+    headers = {"Authorization": "Key {}".format(key)}
+    start_url = TELEMETRY_BASE_URL + f"queries/{query}/refresh?{p_params}"
+    info(f"Starting job using url {start_url}")
+    resp = requests.post(url=start_url, headers=headers)
+    job = resp.json()["job"]
+    job_id = job["id"]
+    info(f"Started job {job_id}")
+
+    poll_url = TELEMETRY_BASE_URL + f"jobs/{job_id}"
+    info(f"Polling query status from {poll_url}")
+    poll = True
+    status = 0
+    qresultid = 0
+    while poll:
+        print(".", end="", flush=True)
+        resp = requests.get(url=poll_url, headers=headers)
+        status = resp.json()["job"]["status"]
+        if status > 2:
+            # print(resp.json())
+            poll = False
+            qresultid = resp.json()["job"]["query_result_id"]
+        else:
+            time.sleep(0.2)
+    print(".")
+    info(f"Finished with status {status}")
+
+    if status == 3:
+        results_url = TELEMETRY_BASE_URL + f"queries/78691/results/{qresultid}.json"
+
+        info(f"Querying result from {results_url}")
+        resp = requests.get(url=results_url, headers=headers)
+        return resp.json()
+
+    return {"query_result": {"data": {"rows": {}}}}
+
+
+def getLastEventTimeAbs(rows):
+    if len(rows) == 0:
+        return 0
+    return rows[len(rows) - 1]["submit_timeabs"]
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/utils.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/utils.py
new file mode 100644
index 0000000000..485dbf66f5
--- /dev/null
+++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/utils.py
@@ -0,0 +1,91 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import datetime
+import json
+
+import requests
+
+from qm_try_analysis.logging import error, info, warning
+
+
+def readJSONFile(FileName):
+    f = open(FileName, "r")
+    p = json.load(f)
+    f.close()
+    return p
+
+
+def writeJSONFile(FileName, Content):
+    with open(FileName, "w") as outfile:
+        json.dump(Content, outfile, indent=4)
+
+
+def dateback(days):
+    today = datetime.date.today()
+    delta = datetime.timedelta(days)
+    return today - delta
+
+
+def lastweek():
+    today = datetime.date.today()
+    delta = datetime.timedelta(days=7)
+    return today - delta
+
+
+# Given a set of build ids, fetch the repository base URL for each id.
+def fetchBuildRevisions(buildids):
+    buildhub_url = "https://buildhub.moz.tools/api/search"
+    delids = {}
+    for bid in buildids:
+        info(f"Fetching revision for build {bid}.")
+        body = {"size": 1, "query": {"term": {"build.id": bid}}}
+        resp = requests.post(url=buildhub_url, json=body)
+        hits = resp.json()["hits"]["hits"]
+        if len(hits) > 0:
+            buildids[bid] = (
+                hits[0]["_source"]["source"]["repository"]
+                + "/annotate/"
+                + hits[0]["_source"]["source"]["revision"]
+            )
+        else:
+            warning(f"No revision for build.id {bid}")
+            delids[bid] = "x"
+    for bid in delids:
+        buildids.pop(bid)
+
+
+def readExecutionFile(workdir):
+    exefile = "{}/qmexecutions.json".format(workdir)
+    try:
+        return readJSONFile(exefile)
+    except OSError:
+        return []
+
+
+def writeExecutionFile(workdir, executions):
+    exefile = "{}/qmexecutions.json".format(workdir)
+    try:
+        writeJSONFile(exefile, executions)
+    except OSError:
+        error("Error writing execution record.")
+
+
+def getLastRunFromExecutionFile(workdir):
+    executions = readExecutionFile(workdir)
+    if len(executions) > 0:
+        return executions[len(executions) - 1]
+    return {}
+
+
+def updateLastRunToExecutionFile(workdir, run):
+    executions = readExecutionFile(workdir)
+    executions[len(executions) - 1] = run
+    writeExecutionFile(workdir, executions)
+
+
+def addNewRunToExecutionFile(workdir, run):
+    executions = readExecutionFile(workdir)
+    executions.append(run)
+    writeExecutionFile(workdir, executions)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-15 03:35:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-15 03:35:49 +0000
commit	d8bbc7858622b6d9c278469aab701ca0b609cddf (patch)
tree	eff41dc61d9f714852212739e6b3738b82a2af87 /dom/quota/scripts/qm-try-analysis/qm_try_analysis
parent	Releasing progress-linux version 125.0.3-1~progress7.99u1. (diff)
download	firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.tar.xz firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.zip