diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:35:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-15 03:35:49 +0000 |
commit | d8bbc7858622b6d9c278469aab701ca0b609cddf (patch) | |
tree | eff41dc61d9f714852212739e6b3738b82a2af87 /dom/quota/scripts/qm-try-analysis/qm_try_analysis | |
parent | Releasing progress-linux version 125.0.3-1~progress7.99u1. (diff) | |
download | firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.tar.xz firefox-d8bbc7858622b6d9c278469aab701ca0b609cddf.zip |
Merging upstream version 126.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/quota/scripts/qm-try-analysis/qm_try_analysis')
11 files changed, 1230 insertions, 0 deletions
diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/__init__.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/__init__.py diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/analyze.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/analyze.py new file mode 100755 index 0000000000..1173555e08 --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/analyze.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import sys +from os import path + +import click + +from qm_try_analysis import fn_anchors, stackanalysis, utils +from qm_try_analysis.logging import error, info + +""" +The analysis is based on stack frames of the following form: + +[ + { + "event_timeabs": 1617121013137, + "session_startabs": 1617120840000, + "build_id": "20210329095128", + "client_id": "0013a68f-9893-461a-93d4-2d7a2f85583f", + "session_id": "8cd37159-bd5c-481c-99ad-9eace9ea726a", + "seq": 1, + "context": "Initialization::TemporaryStorage", + "source_file": "dom/localstorage/ActorsParent.cpp", + "source_line": "1018", + "severity": "ERROR", + "result": "NS_ERROR_FILE_NOT_FOUND" + }, +... +] + +The location of the input file is expected to be found in the +last item of the list inside qmexecutions.json. +""" + + +@click.command() +@click.option( + "--output-to", + type=click.Path(dir_okay=False, writable=True), + default="qmstacks_until_<lasteventtime>.txt", + help="Specify the output file for the analyzed data.", +) +@click.option( + "-w", + "--workdir", + type=click.Path(file_okay=False, exists=True, writable=True), + default="output", + help="Working directory", +) +def analyze_qm_failures(output_to, workdir): + """ + Analyzes the results from fetch's JSON file. + Writes out several JSON results as files and a bugzilla markup table on stdout. + """ + run = utils.getLastRunFromExecutionFile(workdir) + if "numrows" not in run or run["numrows"] == 0: + error( + "No previous execution from fetch_qm_failures.py found or the last execution yielded no result." + ) + sys.exit(2) + + if output_to == "qmstacks_until_<lasteventtime>.txt": + output_to = path.join(workdir, f'qmstacks_until_{run["lasteventtime"]}.txt') + elif output_to.exists(): + error( + f'The output file "{output_to}" already exists. This script would override it.' + ) + sys.exit(2) + run["stacksfile"] = output_to + + def getFname(prefix): + return "{}/{}_until_{}.json".format(workdir, prefix, run["lasteventtime"]) + + # read rows from JSON + rows = utils.readJSONFile(getFname("qmrows")) + info(f"Found {len(rows)} rows of data") + rows = stackanalysis.sanitize(rows) + + # enrich rows with hg locations + buildids = stackanalysis.extractBuildIDs(rows) + utils.fetchBuildRevisions(buildids) + stackanalysis.constructHGLinks(buildids, rows) + + # transform rows to unique stacks + raw_stacks = stackanalysis.collectRawStacks(rows) + all_stacks = stackanalysis.mergeEqualStacks(raw_stacks) + + # enrich with function anchors + for stack in all_stacks: + for frame in stack["frames"]: + frame["anchor"] = "{}:{}".format( + frame["source_file"], fn_anchors.getFunctionName(frame["location"]) + ) + + # separate stacks for relevance + error_stacks = [] + warn_stacks = [] + info_stacks = [] + abort_stacks = [] + stackanalysis.filterStacksForPropagation( + all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks + ) + run["errorfile"] = getFname("qmerrors") + utils.writeJSONFile(run["errorfile"], error_stacks) + run["warnfile"] = getFname("qmwarnings") + utils.writeJSONFile(run["warnfile"], warn_stacks) + run["infofile"] = getFname("qminfo") + utils.writeJSONFile(run["infofile"], info_stacks) + run["abortfile"] = getFname("qmabort") + utils.writeJSONFile(run["abortfile"], abort_stacks) + utils.updateLastRunToExecutionFile(workdir, run) + + info(f"Found {len(error_stacks)} error stacks") + info(f"Found {len(warn_stacks)} warning stacks") + info(f"Found {len(info_stacks)} info stacks") + info(f"Found {len(abort_stacks)} aborted stacks") + + # Write results to the specified output file + with open(output_to, "w") as output: + + def print_to_output(message): + print(message, file=output) + + print_to_output("Error stacks:") + print_to_output(stackanalysis.printStacks(error_stacks)) + print_to_output("") + print_to_output("Error stacks grouped by anchors:") + anchors = stackanalysis.groupStacksForAnchors(error_stacks) + anchornames = list(anchors.keys()) + for a in anchornames: + print_to_output(stackanalysis.printStacks(anchors[a]["stacks"])) + print_to_output("") + print_to_output("") + print_to_output("Warning stacks:") + print_to_output(stackanalysis.printStacks(warn_stacks)) + print_to_output("") + print_to_output("Info stacks:") + print_to_output(stackanalysis.printStacks(info_stacks)) + print_to_output("") + print_to_output("Aborted stacks:") + print_to_output(stackanalysis.printStacks(abort_stacks)) + + info(f"Wrote results to specified output file {output_to}") + + +if __name__ == "__main__": + analyze_qm_failures() diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/cli.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/cli.py new file mode 100644 index 0000000000..509a8e33e1 --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/cli.py @@ -0,0 +1,22 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import click + +from qm_try_analysis.analyze import analyze_qm_failures +from qm_try_analysis.fetch import fetch_qm_failures +from qm_try_analysis.report import report_qm_failures + + +@click.group(context_settings={"show_default": True}) +def cli(): + pass + + +cli.add_command(fetch_qm_failures, "fetch") +cli.add_command(analyze_qm_failures, "analyze") +cli.add_command(report_qm_failures, "report") + +if __name__ == "__main__": + cli() diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch.py new file mode 100644 index 0000000000..2512293c29 --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import pathlib + +import click + +from qm_try_analysis import telemetry, utils +from qm_try_analysis.logging import info + +""" +The analysis is based on the following query: +https://sql.telemetry.mozilla.org/queries/78691/source?p_day=28&p_month=03&p_year=2021 + +SELECT UNIX_MILLIS(timestamp) AS submit_timeabs, + session_start_time, + submission_date, + build_id, + client_id, + session_id, + event_timestamp, + CAST(mozfun.map.get_key(event_map_values, "seq") AS INT64) AS seq, + mozfun.map.get_key(event_map_values, "context") AS context, + mozfun.map.get_key(event_map_values, "source_file") AS source_file, + mozfun.map.get_key(event_map_values, "source_line") AS source_line, + mozfun.map.get_key(event_map_values, "severity") AS severity, + mozfun.map.get_key(event_map_values, "result") AS result, +FROM telemetry.events +WHERE submission_date >= CAST('{{ year }}-{{ month }}-{{ day }}' AS DATE) + AND event_category='dom.quota.try' + AND build_id >= '{{ build }}' + AND UNIX_MILLIS(timestamp) > {{ last }} +ORDER BY submit_timeabs +LIMIT 600000 + +We fetch events in chronological order, as we want to keep track of where we already +arrived with our analysis. To accomplish this we write our runs into qmexecutions.json. + +[ + { + "workdir": ".", + "daysback": 1, + "numrows": 17377, + "lasteventtime": 1617303855145, + "rawfile": "./qmrows_until_1617303855145.json" + } +] + +lasteventtime is the highest value of event_timeabs we found in our data. + +analyze_qm_failures instead needs the rows to be ordered by +client_id, session_id, thread_id, submit_timeabs, seq +Thus we sort the rows accordingly before writing them. +""" + + +@click.command() +@click.option( + "-k", + "--key", + required=True, + help="Your personal telemetry API key.", +) +@click.option( + "-b", + "--minbuild", + default="20210329000000", + help="The lowest build id we will fetch data for. This should have the following format: `yyyymmddhhmmss`.", +) +@click.option("-d", "--days", type=int, default=7, help="Number of days to go back.") +@click.option( + "-l", + "--lasteventtime", + type=int, + default=0, + help="Fetch only events after this number of Unix milliseconds.", +) +@click.option( + "-w", + "--workdir", + type=click.Path(file_okay=False, writable=True, path_type=pathlib.Path), + default="output", + help="Working directory", +) +def fetch_qm_failures(key, minbuild, days, lasteventtime, workdir): + """ + Invokes the query 78691 and stores the result in a JSON file. + """ + # Creeate output dir if it does not exist + workdir.mkdir(exist_ok=True) + + start = utils.dateback(days) + year, month, day = start.year, start.month, start.day + + run = {} + lastrun = utils.getLastRunFromExecutionFile(workdir) + if "lasteventtime" in lastrun: + lasteventtime = lastrun["lasteventtime"] + + run["workdir"] = workdir.as_posix() + run["daysback"] = days + run["minbuild"] = minbuild + + p_params = f"p_year={year:04d}&p_month={month:02d}&p_day={day:02d}&p_build={minbuild}&p_last={lasteventtime}" + + # Read string at the start of the file for more information on query 78691 + result = telemetry.query(key, 78691, p_params) + rows = result["query_result"]["data"]["rows"] + run["numrows"] = len(rows) + + if run["numrows"] > 0: + lasteventtime = telemetry.getLastEventTimeAbs(rows) + run["lasteventtime"] = lasteventtime + rows.sort( + key=lambda row: "{}.{}.{}.{}.{:06d}".format( + row["client_id"], + row["session_id"], + row["seq"] >> 32, # thread_id + row["submit_timeabs"], + row["seq"] & 0x00000000FFFFFFFF, # seq, + ), + reverse=False, + ) + outfile = f"{workdir}/qmrows_until_{lasteventtime}.json" + utils.writeJSONFile(outfile, rows) + run["rawfile"] = outfile + else: + info("No results found, maybe next time.") + run["lasteventtime"] = lasteventtime + + utils.addNewRunToExecutionFile(workdir, run) + + +if __name__ == "__main__": + fetch_qm_failures() diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch_fn_names.sh b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch_fn_names.sh new file mode 100755 index 0000000000..bd619186cd --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fetch_fn_names.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This script assumes to have rust-code-analysis-cli in the path. +HG_URL=$1 +TEMPDIR=/tmp/fetch_fn_names_$$ +TEMPSRC=$TEMPDIR/src +mkdir $TEMPDIR +echo "" > $TEMPDIR/empty.json +HG_URL=`echo $HG_URL | sed 's/annotate/raw-file/g'` +wget -q -O "$TEMPSRC" $HG_URL +rust-code-analysis-cli -m -O json -p "$TEMPSRC" +CONTENT=`cat $TEMPDIR/*.json` +rm -rf $TEMPDIR +echo $CONTENT diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fn_anchors.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fn_anchors.py new file mode 100644 index 0000000000..13e3802399 --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/fn_anchors.py @@ -0,0 +1,76 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import json +import subprocess +from os import path + +from qm_try_analysis.logging import info, warning + +cached_functions = {} + + +def getMetricsJson(src_url): + if src_url.startswith("http"): + info(f"Fetching source for function extraction: {src_url}") + metrics = subprocess.check_output( + [ + path.join(path.dirname(path.realpath(__file__)), "fetch_fn_names.sh"), + src_url, + ] + ) + else: + warning(f"Skip fetching source: {src_url}") + metrics = "" + + try: + return json.loads(metrics) + except ValueError: + return {"kind": "empty", "name": "anonymous", "spaces": []} + + +def getSpaceFunctionsRecursive(metrics_space): + functions = [] + if ( + metrics_space["kind"] == "function" + and metrics_space["name"] + and metrics_space["name"] != "<anonymous>" + ): + functions.append( + { + "name": metrics_space["name"], + "start_line": int(metrics_space["start_line"]), + "end_line": int(metrics_space["end_line"]), + } + ) + for space in metrics_space["spaces"]: + functions += getSpaceFunctionsRecursive(space) + return functions + + +def getSourceFunctions(src_url): + if src_url not in cached_functions: + metrics_space = getMetricsJson(src_url) + cached_functions[src_url] = getSpaceFunctionsRecursive(metrics_space) + + return cached_functions[src_url] + + +def getFunctionName(location): + location.replace("annotate", "raw-file") + pieces = location.split("#l") + src_url = pieces[0] + line = int(pieces[1]) + closest_name = "<Unknown {}>".format(line) + closest_start = 0 + functions = getSourceFunctions(src_url) + for fn in functions: + if ( + fn["start_line"] > closest_start + and line >= fn["start_line"] + and line <= fn["end_line"] + ): + closest_start = fn["start_line"] + closest_name = fn["name"] + return closest_name diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/logging.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/logging.py new file mode 100644 index 0000000000..c96679f96c --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/logging.py @@ -0,0 +1,21 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import click + + +def debug(message): + click.echo(click.style("Debug", fg="cyan") + f": {message}") + + +def info(message): + click.echo(click.style("Info", fg="white") + f": {message}") + + +def warning(message): + click.echo(click.style("Warning", fg="yellow") + f": {message}") + + +def error(message): + click.echo(click.style("Error", fg="red") + f": {message}") diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/report.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/report.py new file mode 100644 index 0000000000..0ec5428679 --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/report.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import hashlib +import json +import re +import sys +import webbrowser +from typing import Union + +import bugzilla +import click +from click.utils import echo + +from qm_try_analysis import stackanalysis, utils +from qm_try_analysis.logging import error, info, warning + +# Flag for toggling development mod +DEV = True + +# Constants for Bugzilla URLs +if DEV: + BUGZILLA_BASE_URL = "https://bugzilla-dev.allizom.org/" +else: + BUGZILLA_BASE_URL = "https://bugzilla.mozilla.org/" + +BUGZILLA_API_URL = BUGZILLA_BASE_URL + "rest/" +BUGZILLA_ATTACHMENT_URL = BUGZILLA_BASE_URL + "attachment.cgi?id=" +BUGZILLA_BUG_URL = BUGZILLA_BASE_URL + "show_bug.cgi?id=" + +# Constants for static bugs +QM_TRY_FAILURES_BUG = 1702411 +WARNING_STACKS_BUG = 1711703 + +# Regex pattern for parsing anchor strings +ANCHOR_REGEX_PATTERN = re.compile(r"([^:]+):([^:]+)?:*([^:]+)?") + + +@click.command() +@click.option( + "-k", + "--key", + help="Your personal Bugzilla API key", + required=True, +) +@click.option( + "--stacksfile", + type=click.File("rb"), + help="The output file of the previous analysis run. You only have to specify this, if the previous run does not include this info.", +) +@click.option( + "--open-modified/--no-open-modified", + default=True, + help="Whether to open modified bugs in your default browser after updating them.", +) +@click.option( + "-w", + "--workdir", + type=click.Path(file_okay=False, exists=True, writable=True), + default="output", + help="Working directory", +) +def report_qm_failures(key, stacksfile, open_modified, workdir): + """ + Report QM failures to Bugzilla based on stack analysis. + """ + run = utils.getLastRunFromExecutionFile(workdir) + + # Check for valid execution file from the previous run + if not {"errorfile", "warnfile"} <= run.keys(): + error("No analyzable execution from the previous run of analyze found.") + echo("Did you remember to run `poetry run qm-try-analysis analyze`?") + sys.exit(2) + + # Handle missing stacksfile + if not stacksfile: + if "stacksfile" not in run: + error( + "The previous analyze run did not contain the location of the stacksfile." + ) + echo('Please provide the file location using the "--stacksfile" option.') + sys.exit(2) + stacksfile = open(run["stacksfile"], "rb") + + # Create Bugzilla client + bugzilla_client = bugzilla.Bugzilla(url=BUGZILLA_API_URL, api_key=key) + + # Initialize report data + report = run.get("report", {}) + run["report"] = report + attachment_id = report.get("stacksfile_attachment", None) + reported = report.get("reported", []) + report["reported"] = reported + + def post_comment(bug_id, comment): + """ + Post a comment to a Bugzilla bug. + """ + data = {"id": bug_id, "comment": comment, "is_markdown": True} + res = bugzilla_client._post(f"bug/{bug_id}/comment", json.dumps(data)) + return res["id"] + + # Handle missing attachment ID + if not attachment_id: + attachment = bugzilla.DotDict() + attachment.file_name = f"qmstacks_until_{run['lasteventtime']}.txt" + attachment.summary = attachment.file_name + attachment.content_type = "text/plain" + attachment.data = stacksfile.read().decode() + res = bugzilla_client.post_attachment(QM_TRY_FAILURES_BUG, attachment) + attachment_id = next(iter(res["attachments"].values()))["id"] + report["stacksfile_attachment"] = attachment_id + utils.updateLastRunToExecutionFile(workdir, run) + + info( + f'Created attachment for "{attachment.file_name}": {BUGZILLA_ATTACHMENT_URL + str(attachment_id)}.' + ) + + def generate_comment(stacks): + """ + Generate a comment for Bugzilla based on error stacks. + """ + comment = f"Taken from Attachment {attachment_id}\n\n" + comment += stackanalysis.printStacks(stacks) + return comment + + # Handle missing warnings comment + if "warnings_comment" not in report: + warning_stacks = utils.readJSONFile(run["warnfile"]) + warning_stacks = filter(lambda stack: stack["hit_count"] >= 100, warning_stacks) + + comment = generate_comment(warning_stacks) + comment_id = post_comment(WARNING_STACKS_BUG, comment) + + report["warnings_comment"] = comment_id + utils.updateLastRunToExecutionFile(workdir, run) + + info("Created comment for warning stacks.") + + error_stacks = utils.readJSONFile(run["errorfile"]) + + def reduce(search_results, by: str) -> Union[int, None]: + """ + Reduce bug search results automatically or based on user input. + """ + anchor = by + + search_results = remove_duplicates(search_results, bugzilla_client) + + if not search_results: + return + + if len(search_results) == 1: + return search_results[0]["id"] + + echo(f'Multiple bugs found for anchor "{anchor}":') + + for i, result in enumerate(search_results, start=1): + echo( + f"{i}.{' [closed]' if result['resolution'] != '' else ''} {BUGZILLA_BUG_URL + str(result['id'])} - {result['summary']}" + ) + + choice = click.prompt( + "Enter the number of the bug you want to use", + type=click.Choice( + [str(i) for i in range(1, len(search_results) + 1)] + ["skip"] + ), + default="skip", + show_default=True, + confirmation_prompt="Please confirm the selected choice", + ) + + if choice == "skip": + return + + return search_results[int(choice) - 1]["id"] + + anchors = stackanalysis.groupStacksForAnchors(error_stacks) + + for anchor in anchors: + if hash_str(anchor) in reported: + info(f'Skipping anchor "{anchor}" since it has already been reported.') + continue + + if not (match := ANCHOR_REGEX_PATTERN.match(anchor)): + warning(f'"{anchor}" did not match the regex pattern.') + + if "Unknown" in match.group(2): + warning(f'Skipping "{anchor}" since it is not a valid anchor.') + continue + + search_string = " ".join(filter(None, match.groups())) + search_results = bugzilla_client.search_bugs( + [{"product": "Core", "summary": search_string}] + )["bugs"] + + if bug_id := reduce(search_results, by=anchor): + info(f'Found bug {BUGZILLA_BUG_URL + str(bug_id)} for anchor "{anchor}".') + else: + warning(f'No bug found for anchor "{anchor}".') + + if not click.confirm("Would you like to create one?"): + continue + + bug = bugzilla.DotDict() + bug.product = "Core" + bug.component = "Storage: Quota Manager" + bug.summary = f"[QM_TRY] Failures in {anchor}" + bug.description = f"This bug keeps track of the semi-automatic monitoring of QM_TRY failures in `{anchor}`" + bug["type"] = "defect" + bug.blocks = QM_TRY_FAILURES_BUG + bug.version = "unspecified" + + bug_id = bugzilla_client.post_bug(bug)["id"] + + info(f'Created bug {BUGZILLA_BUG_URL + str(bug_id)} for anchor "{anchor}".') + + comment = generate_comment(anchors[anchor]["stacks"]) + comment_id = post_comment(bug_id, comment) + + reported.append(hash_str(anchor)) + utils.updateLastRunToExecutionFile(workdir, run) + + if open_modified: + comment_seq_number = bugzilla_client.get_comment(comment_id)["comments"][ + str(comment_id) + ]["count"] + webbrowser.open( + BUGZILLA_BUG_URL + str(bug_id) + "#c" + str(comment_seq_number) + ) + + +def hash_str(s): + """ + Hash a string using MD5. + """ + encoded_str = s.encode("utf-8") + return int(hashlib.md5(encoded_str).hexdigest(), 16) + + +def remove_duplicates(search_results, bugzilla_client): + """ + Remove duplicate bugs in search results. + """ + resolved_bugs = set(bug["id"] for bug in search_results if not bug.get("dupe_of")) + + def resolve_if_dupe(bug): + if not (dupe_of := bug.get("dupe_of")): + return bug + + if dupe_of in resolved_bugs: + return None + + remote = resolve_if_dupe(bugzilla_client.get_bug(dupe_of)) + if remote: + resolved_bugs.add(remote["id"]) + + return remote + + return [non_dupe for bug in search_results if (non_dupe := resolve_if_dupe(bug))] + + +if __name__ == "__main__": + report_qm_failures() diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/stackanalysis.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/stackanalysis.py new file mode 100644 index 0000000000..f0363c5e1f --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/stackanalysis.py @@ -0,0 +1,396 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +# There seem to be sometimes identical events recorded twice by telemetry +def sanitize(rows): + newrows = [] + pcid = "unset" + psid = "unset" + pseq = "unset" + for row in rows: + cid = row["client_id"] + sid = row["session_id"] + seq = row["seq"] + if cid != pcid or sid != psid or seq != pseq: + newrows.append(row) + pcid = cid + psid = sid + pseq = seq + + return newrows + + +# Given a set of rows, find all distinct build ids +def extractBuildIDs(rows): + buildids = {} + for row in rows: + id = row["build_id"] + if id in buildids: + buildids[id] = buildids[id] + 1 + else: + buildids[id] = 1 + return buildids + + +# Given a set of build ids and rows, enrich each row by an hg link. +# Relys on the result of utils.fetchBuildRevisions in buildids. +def constructHGLinks(buildids, rows): + for row in rows: + id = row["build_id"] + if id in buildids: + row["location"] = ( + buildids[id] + "/" + row["source_file"] + "#l" + row["source_line"] + ) + else: + row["location"] = id + "/" + row["source_file"] + "#l" + row["source_line"] + + +topmost_stackframes = set() +delta_frames = {} + + +def isTopmostFrame(frame): + f = (frame["location"], frame["result"]) + return f in topmost_stackframes + + +def addTopmostFrame(frame): + f = (frame["location"], frame["result"]) + if not isTopmostFrame(frame): + # print("Found new topmost frame {}.".format(frame)) + topmost_stackframes.add(f) + frame["topmost"] = True + + +def addFrameDelta(frame1, frame2): + if frame1["client_id"] != frame2["client_id"]: + return + if frame1["session_id"] != frame2["session_id"]: + return + + fkey = "{}:{}-{}:{}".format( + frame2["location"], frame2["result"], frame1["location"], frame1["result"] + ) + if fkey not in delta_frames: + fdelta = {"delta_sum": 0, "delta_cnt": 0} + fdelta["prev_row"] = frame1 + fdelta["candidate"] = frame2 + delta_frames[fkey] = fdelta + + fdelta = delta_frames[fkey] + etv1 = frame1["event_timestamp"] + etv2 = frame2["event_timestamp"] + if isinstance(etv1, int) and isinstance(etv2, int) and etv2 > etv1: + delta = etv2 - etv1 + fdelta["delta_sum"] = fdelta["delta_sum"] + delta + fdelta["delta_cnt"] = fdelta["delta_cnt"] + 1 + + +# There can be outliers in terms of time distance between two stack frames +# that belong to the same propagation stack. In order to not increase the +# risk that one outlier breaks thousands of stacks, we check for the average +# time distance. +def checkAverageFrameTimeDeltas(rows, max_delta): + # print("checkAverageFrameTimeDeltas") + prev_row = None + for row in rows: + if "topmost" in row or not row["session_complete"]: + prev_row = None + continue + + if prev_row: + addFrameDelta(prev_row, row) + prev_row = row + + for fd in delta_frames: + sum = delta_frames[fd]["delta_sum"] + cnt = delta_frames[fd]["delta_cnt"] + if cnt > 0 and (sum / cnt) > max_delta: + # print(delta_frames[fd]) + addTopmostFrame(delta_frames[fd]["candidate"]) + + +# A topmost frame is considered to initiate a new raw stack. We collect all +# candidates before we actually apply them. This implies, that we should run +# this function on a "large enough" sample of rows to be more accurate. +# As a side effect, we mark all rows that are part of a "complete" session +# (a session, that started within our data scope). +def collectTopmostFrames(rows): + prev_cid = "unset" + prev_sid = "unset" + prev_tid = "unset" + prev_ctx = "unset" + prev_sev = "ERROR" + session_complete = False + after_severity_downgrade = False + for row in rows: + cid = row["client_id"] + sid = row["session_id"] + tid = row["seq"] >> 32 # thread_id + ctx = row["context"] + seq = row["seq"] & 0x00000000FFFFFFFF # seq + sev = row["severity"] + + # If we have a new session, ensure it is complete from start, + # otherwise we will ignore it entirely. + if cid != prev_cid or sid != prev_sid or tid != prev_tid: + if seq == 1: + session_complete = True + else: + session_complete = False + row["session_complete"] = session_complete + if session_complete: + # If we change client, session, thread or context, we can be sure to have + # a new topmost frame. + if ( + seq == 1 + or cid != prev_cid + or sid != prev_sid + or tid != prev_tid + or ctx != prev_ctx + ): + addTopmostFrame(row) + after_severity_downgrade = False + # We do not expect a non-error to be ever upgraded to an error + elif sev == "ERROR" and prev_sev != "ERROR": + addTopmostFrame(row) + after_severity_downgrade = False + # If we just had a severity downgrade, we assume that we wanted + # to break the error propagation after this point and split, too + elif after_severity_downgrade: + addTopmostFrame(row) + after_severity_downgrade = False + elif prev_sev == "ERROR" and sev != "ERROR": + after_severity_downgrade = True + + prev_cid = cid + prev_sid = sid + prev_tid = tid + prev_ctx = ctx + prev_sev = sev + + # Should be ms. We've seen quite some runtime between stackframes in the + # wild. We might want to consider to make this configurable. In general + # we prefer local context over letting slip through some topmost frame + # unrecognized, assuming that fixing the issues one by one they will + # uncover them succesively. This is achieved by a rather high delta value. + max_avg_delta = 200 + checkAverageFrameTimeDeltas(rows, max_avg_delta) + + +def getFrameKey(frame): + return "{}.{}|".format(frame["location"], frame["result"]) + + +def getStackKey(stack): + stack_key = "" + for frame in stack["frames"]: + stack_key += getFrameKey(frame) + return hash(stack_key) + + +# A "raw stack" is a list of frames, that: +# - share the same build_id (implicitely through location) +# - share the same client_id +# - share the same session_id +# - has a growing sequence number +# - stops at the first downgrade of severity from ERROR to else +# - XXX: contains each location at most once (no recursion) +# - appears to be in a reasonable short timeframe +# Calculates also a hash key to identify identical stacks +def collectRawStacks(rows): + collectTopmostFrames(rows) + raw_stacks = [] + stack = { + "stack_id": "unset", + "client_id": "unset", + "session_id": "unset", + "submit_timeabs": "unset", + "frames": [{"location": "unset"}], + } + stack_id = 1 + first = True + for row in rows: + if isTopmostFrame(row): + if not first: + stack["stack_key"] = getStackKey(stack) + raw_stacks.append(stack) + stack_id += 1 + stack = { + "stack_id": stack_id, + "client_id": row["client_id"], + "session_id": row["session_id"], + "submit_timeabs": row["submit_timeabs"], + "context": row["context"], + "frames": [], + } + + stack["frames"].append( + { + "location": row["location"], + "source_file": row["source_file"], + "source_line": row["source_line"], + "seq": row["seq"], + "severity": row["severity"], + "result": row["result"], + } + ) + first = False + + return raw_stacks + + +# Merge all stacks that have the same hash key and count occurences. +# Relys on the ordering per client_id/session_id for correct counting. +def mergeEqualStacks(raw_stacks): + merged_stacks = {} + last_client_id = "none" + last_session_id = "none" + for stack in raw_stacks: + stack_key = stack["stack_key"] + merged_stack = stack + if stack_key in merged_stacks: + merged_stack = merged_stacks[stack_key] + if stack["client_id"] != last_client_id: + last_client_id = stack["client_id"] + merged_stack["client_count"] += 1 + if stack["session_id"] != last_session_id: + last_session_id = stack["session_id"] + merged_stack["session_count"] += 1 + merged_stack["hit_count"] += 1 + else: + merged_stack["client_count"] = 1 + last_client_id = merged_stack["client_id"] + merged_stack["session_count"] = 1 + last_session_id = merged_stack["session_id"] + merged_stack["hit_count"] = 1 + merged_stacks[stack_key] = merged_stack + + merged_list = list(merged_stacks.values()) + merged_list.sort(key=lambda x: x.get("hit_count"), reverse=True) + return merged_list + + +# Split the list of stacks into: +# - aborted (has at least one frame with NS_ERROR_ABORT) +# - info/warning (has at least one frame with that severity) +# - error (has only error frames) +def filterStacksForPropagation( + all_stacks, error_stacks, warn_stacks, info_stacks, abort_stacks +): + for stack in all_stacks: + warn = list(filter(lambda x: x["severity"] == "WARNING", stack["frames"])) + info = list(filter(lambda x: x["severity"] == "INFO", stack["frames"])) + abort = list(filter(lambda x: x["result"] == "NS_ERROR_ABORT", stack["frames"])) + if len(abort) > 0: + abort_stacks.append(stack) + elif len(info) > 0: + info_stacks.append(stack) + elif len(warn) > 0: + warn_stacks.append(stack) + else: + error_stacks.append(stack) + + +# Bugzilla comment markup +def printStacks(stacks): + row_format = "{} | {} | {} | {} | {}\n" + out = "" + out += row_format.format("Clients", "Sessions", "Hits", "Anchor (Context)", "Stack") + out += row_format.format("-------", "--------", "----", "----------------", "-----") + for stack in stacks: + framestr = "" + first = True + for frame in stack["frames"]: + if not first: + framestr += " <- " + framestr += "[{}#{}:{}]({})".format( + frame["source_file"], + frame["source_line"], + frame["result"], + frame["location"], + ) + first = False + out += row_format.format( + stack["client_count"], + stack["session_count"], + stack["hit_count"], + "{} ({})".format(stack["frames"][0]["anchor"], stack["context"]), + framestr, + ) + + return out + + +def groupStacksForAnchors(stacks): + anchors = {} + for stack in stacks: + anchor_name = stack["frames"][0]["anchor"] + if anchor_name in anchors: + anchors[anchor_name]["stacks"].append(stack) + else: + anchor = {"anchor": anchor_name, "stacks": [stack]} + anchors[anchor_name] = anchor + return anchors + + +""" +def getSummaryForAnchor(anchor): + return "[QM_TRY] Errors in function {}".format(anchor) + + +def searchBugForAnchor(bugzilla_key, anchor): + summary = getSummaryForAnchor(anchor) + bug_url = "https://bugzilla.mozilla.org/rest/bug?" \ + "summary={}&api_key={}".format(summary, bugzilla_key) + return requests.get(url=bug_url).json()["bugs"] + + +def createBugForAnchor(bugzilla_key, anchor): + summary = getSummaryForAnchor(anchor) + bug_url = "https://bugzilla.mozilla.org/rest/bug?" \ + "Bugzilla_api_key={}".format(bugzilla_key) + body = { + "product" : "Core", + "component" : "Storage: Quota Manager", + "version" : "unspecified", + "summary" : summary, + "description" : "This bug collects errors reported by QM_TRY" + "macros for function {}.".format(anchor), + } + resp = requests.post(url=bug_url, json=body) + if resp.status_code != 200: + print(resp) + return 0 + id = resp.json()["id"] + print("Added new bug {}:".format(id)) + return id + + +def ensureBugForAnchor(bugzilla_key, anchor): + buglist = searchBugForAnchor(bugzilla_key, anchor) + if (len(buglist) > 0): + id = buglist[0]["id"] + print("Found existing bug {}:".format(id)) + return id + return createBugForAnchor(bugzilla_key, anchor) + + +def addCommentForAnchor(bugzilla_key, anchor, stacks): + id = ensureBugForAnchor(bugzilla_key, anchor) + if (id <= 0): + print("Unable to create a bug for {}.".format(anchor)) + return + comment = printStacks(stacks) + print("") + print("Add comment to bug {}:".format(id)) + print(comment) + + +def addCommentsForStacks(bugzilla_key, stacks): + anchors = groupStacksForAnchors(stacks) + for anchor in anchors: + addCommentForAnchor(bugzilla_key, anchors[anchor]["anchor"], anchors[anchor]["stacks"]) +""" diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/telemetry.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/telemetry.py new file mode 100644 index 0000000000..26debd0546 --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/telemetry.py @@ -0,0 +1,54 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import time + +import requests + +from qm_try_analysis.logging import info + +TELEMETRY_BASE_URL = "https://sql.telemetry.mozilla.org/api/" + + +def query(key, query, p_params): + headers = {"Authorization": "Key {}".format(key)} + start_url = TELEMETRY_BASE_URL + f"queries/{query}/refresh?{p_params}" + info(f"Starting job using url {start_url}") + resp = requests.post(url=start_url, headers=headers) + job = resp.json()["job"] + job_id = job["id"] + info(f"Started job {job_id}") + + poll_url = TELEMETRY_BASE_URL + f"jobs/{job_id}" + info(f"Polling query status from {poll_url}") + poll = True + status = 0 + qresultid = 0 + while poll: + print(".", end="", flush=True) + resp = requests.get(url=poll_url, headers=headers) + status = resp.json()["job"]["status"] + if status > 2: + # print(resp.json()) + poll = False + qresultid = resp.json()["job"]["query_result_id"] + else: + time.sleep(0.2) + print(".") + info(f"Finished with status {status}") + + if status == 3: + results_url = TELEMETRY_BASE_URL + f"queries/78691/results/{qresultid}.json" + + info(f"Querying result from {results_url}") + resp = requests.get(url=results_url, headers=headers) + return resp.json() + + return {"query_result": {"data": {"rows": {}}}} + + +def getLastEventTimeAbs(rows): + if len(rows) == 0: + return 0 + return rows[len(rows) - 1]["submit_timeabs"] diff --git a/dom/quota/scripts/qm-try-analysis/qm_try_analysis/utils.py b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/utils.py new file mode 100644 index 0000000000..485dbf66f5 --- /dev/null +++ b/dom/quota/scripts/qm-try-analysis/qm_try_analysis/utils.py @@ -0,0 +1,91 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import datetime +import json + +import requests + +from qm_try_analysis.logging import error, info, warning + + +def readJSONFile(FileName): + f = open(FileName, "r") + p = json.load(f) + f.close() + return p + + +def writeJSONFile(FileName, Content): + with open(FileName, "w") as outfile: + json.dump(Content, outfile, indent=4) + + +def dateback(days): + today = datetime.date.today() + delta = datetime.timedelta(days) + return today - delta + + +def lastweek(): + today = datetime.date.today() + delta = datetime.timedelta(days=7) + return today - delta + + +# Given a set of build ids, fetch the repository base URL for each id. +def fetchBuildRevisions(buildids): + buildhub_url = "https://buildhub.moz.tools/api/search" + delids = {} + for bid in buildids: + info(f"Fetching revision for build {bid}.") + body = {"size": 1, "query": {"term": {"build.id": bid}}} + resp = requests.post(url=buildhub_url, json=body) + hits = resp.json()["hits"]["hits"] + if len(hits) > 0: + buildids[bid] = ( + hits[0]["_source"]["source"]["repository"] + + "/annotate/" + + hits[0]["_source"]["source"]["revision"] + ) + else: + warning(f"No revision for build.id {bid}") + delids[bid] = "x" + for bid in delids: + buildids.pop(bid) + + +def readExecutionFile(workdir): + exefile = "{}/qmexecutions.json".format(workdir) + try: + return readJSONFile(exefile) + except OSError: + return [] + + +def writeExecutionFile(workdir, executions): + exefile = "{}/qmexecutions.json".format(workdir) + try: + writeJSONFile(exefile, executions) + except OSError: + error("Error writing execution record.") + + +def getLastRunFromExecutionFile(workdir): + executions = readExecutionFile(workdir) + if len(executions) > 0: + return executions[len(executions) - 1] + return {} + + +def updateLastRunToExecutionFile(workdir, run): + executions = readExecutionFile(workdir) + executions[len(executions) - 1] = run + writeExecutionFile(workdir, executions) + + +def addNewRunToExecutionFile(workdir, run): + executions = readExecutionFile(workdir) + executions.append(run) + writeExecutionFile(workdir, executions) |