summaryrefslogtreecommitdiffstats
path: root/python/mach/mach/sentry.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/mach/mach/sentry.py')
-rw-r--r--python/mach/mach/sentry.py222
1 files changed, 222 insertions, 0 deletions
diff --git a/python/mach/mach/sentry.py b/python/mach/mach/sentry.py
new file mode 100644
index 0000000000..5008f8a40c
--- /dev/null
+++ b/python/mach/mach/sentry.py
@@ -0,0 +1,222 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import abc
+import re
+from pathlib import Path
+from threading import Thread
+
+import sentry_sdk
+from mozversioncontrol import (
+ InvalidRepoPath,
+ MissingUpstreamRepo,
+ MissingVCSTool,
+ get_repository_object,
+)
+from six import string_types
+
+from mach.telemetry import is_telemetry_enabled
+from mach.util import get_state_dir
+
+# https://sentry.io/organizations/mozilla/projects/mach/
+_SENTRY_DSN = (
+ "https://5cfe351fb3a24e8d82c751252b48722b@o1069899.ingest.sentry.io/6250014"
+)
+
+
+class ErrorReporter(object):
+ @abc.abstractmethod
+ def report_exception(self, exception):
+ """Report the exception to remote error-tracking software."""
+
+
+class SentryErrorReporter(ErrorReporter):
+ """Reports errors using Sentry."""
+
+ def report_exception(self, exception):
+ return sentry_sdk.capture_exception(exception)
+
+
+class NoopErrorReporter(ErrorReporter):
+ """Drops errors instead of reporting them.
+
+ This is useful in cases where error-reporting is specifically disabled, such as
+ when telemetry hasn't been allowed.
+ """
+
+ def report_exception(self, exception):
+ return None
+
+
+def register_sentry(argv, settings, topsrcdir: Path):
+ if not is_telemetry_enabled(settings):
+ return NoopErrorReporter()
+
+ global _is_unmodified_mach_core_thread
+ _is_unmodified_mach_core_thread = Thread(
+ target=_is_unmodified_mach_core,
+ args=[topsrcdir],
+ daemon=True,
+ )
+ _is_unmodified_mach_core_thread.start()
+
+ sentry_sdk.init(
+ _SENTRY_DSN, before_send=lambda event, _: _process_event(event, topsrcdir)
+ )
+ sentry_sdk.add_breadcrumb(message="./mach {}".format(" ".join(argv)))
+ return SentryErrorReporter()
+
+
+def _process_event(sentry_event, topsrcdir: Path):
+ # Returning nothing causes the event to be dropped:
+ # https://docs.sentry.io/platforms/python/configuration/filtering/#using-beforesend
+ repo = _get_repository_object(topsrcdir)
+ if repo is None:
+ # We don't know the repo state, so we don't know if mach files are
+ # unmodified.
+ return
+
+ base_ref = repo.base_ref_as_hg()
+ if not base_ref:
+ # If we don't know which revision this exception is attached to, then it's
+ # not worth sending
+ return
+
+ _is_unmodified_mach_core_thread.join()
+ if not _is_unmodified_mach_core_result:
+ return
+
+ for map_fn in (_settle_mach_module_id, _patch_absolute_paths, _delete_server_name):
+ sentry_event = map_fn(sentry_event, topsrcdir)
+
+ sentry_event["release"] = "hg-rev-{}".format(base_ref)
+ return sentry_event
+
+
+def _settle_mach_module_id(sentry_event, _):
+ # Sentry groups issues according to the stack frames and their associated
+ # "module" properties. However, one of the modules is being reported
+ # like "mach.commands.26a828ef5164403eaff4305ab4cb0fab" (with a generated id).
+ # This function replaces that generated id with the static string "<generated>"
+ # so that grouping behaves as expected
+
+ stacktrace_frames = sentry_event["exception"]["values"][0]["stacktrace"]["frames"]
+ for frame in stacktrace_frames:
+ module = frame.get("module")
+ if not module:
+ continue
+
+ module = re.sub(
+ "mach\\.commands\\.[a-f0-9]{32}", "mach.commands.<generated>", module
+ )
+ frame["module"] = module
+ return sentry_event
+
+
+def _patch_absolute_paths(sentry_event, topsrcdir: Path):
+ # As discussed here (https://bugzilla.mozilla.org/show_bug.cgi?id=1636251#c28),
+ # we remove usernames from file names with a best-effort basis. The most likely
+ # place for usernames to manifest in Sentry information is within absolute paths,
+ # such as: "/home/mitch/dev/firefox/mach"
+ # We replace the state_dir, obj_dir, src_dir with "<...>" placeholders.
+ # Note that we also do a blanket find-and-replace of the user's name with "<user>",
+ # which may have ill effects if the user's name is, by happenstance, a substring
+ # of some other value within the Sentry event.
+ def recursive_patch(value, needle, replacement):
+ if isinstance(value, list):
+ return [recursive_patch(v, needle, replacement) for v in value]
+ elif isinstance(value, dict):
+ for key in list(value.keys()):
+ next_value = value.pop(key)
+ key = needle.sub(replacement, key)
+ value[key] = recursive_patch(next_value, needle, replacement)
+ return value
+ elif isinstance(value, string_types):
+ return needle.sub(replacement, value)
+ else:
+ return value
+
+ for (target_path, replacement) in (
+ (get_state_dir(), "<statedir>"),
+ (str(topsrcdir), "<topsrcdir>"),
+ (str(Path.home()), "~"),
+ ):
+ # Sentry converts "vars" to their "representations". When paths are in local
+ # variables on Windows, "C:\Users\MozillaUser\Desktop" becomes
+ # "'C:\\Users\\MozillaUser\\Desktop'". To still catch this case, we "repr"
+ # the home directory and scrub the beginning and end quotes, then
+ # find-and-replace on that.
+ repr_path = repr(target_path)[1:-1]
+
+ for target in (target_path, repr_path):
+ # Paths in the Sentry event aren't consistent:
+ # * On *nix, they're mostly forward slashes.
+ # * On *nix, not all absolute paths start with a leading forward slash.
+ # * On Windows, they're mostly backslashes.
+ # * On Windows, `.extra."sys.argv"` uses forward slashes.
+ # * The Python variables in-scope captured by the Sentry report may be
+ # inconsistent, even for a single path. For example, on
+ # Windows, Mach calculates the state_dir as "C:\Users\<user>/.mozbuild".
+
+ # Handle the case where not all absolute paths start with a leading
+ # forward slash: make the initial slash optional in the search string.
+ if target.startswith("/"):
+ target = "/?" + target[1:]
+
+ # Handle all possible slash variants: our search string should match
+ # both forward slashes and backslashes. This is done by dynamically
+ # replacing each "/" and "\" with the regex "[\/\\]" (match both).
+ slash_regex = re.compile(r"[\/\\]")
+ # The regex module parses string backslash escapes before compiling the
+ # regex, so we need to add more backslashes:
+ # "[\\/\\\\]" => [\/\\] => match "/" and "\"
+ target = slash_regex.sub(r"[\\/\\\\]", target)
+
+ # Compile the regex and patch the event.
+ needle_regex = re.compile(target, re.IGNORECASE)
+ sentry_event = recursive_patch(sentry_event, needle_regex, replacement)
+ return sentry_event
+
+
+def _delete_server_name(sentry_event, _):
+ sentry_event.pop("server_name")
+ return sentry_event
+
+
+def _get_repository_object(topsrcdir: Path):
+ try:
+ return get_repository_object(str(topsrcdir))
+ except (InvalidRepoPath, MissingVCSTool):
+ return None
+
+
+def _is_unmodified_mach_core(topsrcdir: Path):
+ """True if mach is unmodified compared to the public tree.
+
+ To avoid submitting Sentry events for errors caused by user's
+ local changes, we attempt to detect if mach (or code affecting mach)
+ has been modified in the user's local state:
+ * In a revision off of a "ancestor to central" revision, or:
+ * In the working, uncommitted state.
+
+ If "$topsrcdir/mach" and "*.py" haven't been touched, then we can be
+ pretty confident that the Mach behaviour that caused the exception
+ also exists in the public tree.
+ """
+ global _is_unmodified_mach_core_result
+
+ repo = _get_repository_object(topsrcdir)
+ try:
+ files = set(repo.get_outgoing_files()) | set(repo.get_changed_files())
+ _is_unmodified_mach_core_result = not any(
+ [file for file in files if file == "mach" or file.endswith(".py")]
+ )
+ except MissingUpstreamRepo:
+ # If we don't know the upstream state, we don't know if the mach files
+ # have been unmodified.
+ _is_unmodified_mach_core_result = False
+
+
+_is_unmodified_mach_core_result = None
+_is_unmodified_mach_core_thread = None