# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. import abc import re from pathlib import Path from threading import Thread import sentry_sdk from mozversioncontrol import ( InvalidRepoPath, MissingUpstreamRepo, MissingVCSTool, get_repository_object, ) from six import string_types from mach.telemetry import is_telemetry_enabled from mach.util import get_state_dir # https://sentry.io/organizations/mozilla/projects/mach/ _SENTRY_DSN = ( "https://5cfe351fb3a24e8d82c751252b48722b@o1069899.ingest.sentry.io/6250014" ) class ErrorReporter(object): @abc.abstractmethod def report_exception(self, exception): """Report the exception to remote error-tracking software.""" class SentryErrorReporter(ErrorReporter): """Reports errors using Sentry.""" def report_exception(self, exception): return sentry_sdk.capture_exception(exception) class NoopErrorReporter(ErrorReporter): """Drops errors instead of reporting them. This is useful in cases where error-reporting is specifically disabled, such as when telemetry hasn't been allowed. """ def report_exception(self, exception): return None def register_sentry(argv, settings, topsrcdir: Path): if not is_telemetry_enabled(settings): return NoopErrorReporter() global _is_unmodified_mach_core_thread _is_unmodified_mach_core_thread = Thread( target=_is_unmodified_mach_core, args=[topsrcdir], daemon=True, ) _is_unmodified_mach_core_thread.start() sentry_sdk.init( _SENTRY_DSN, before_send=lambda event, _: _process_event(event, topsrcdir) ) sentry_sdk.add_breadcrumb(message="./mach {}".format(" ".join(argv))) return SentryErrorReporter() def _process_event(sentry_event, topsrcdir: Path): # Returning nothing causes the event to be dropped: # https://docs.sentry.io/platforms/python/configuration/filtering/#using-beforesend repo = _get_repository_object(topsrcdir) if repo is None: # We don't know the repo state, so we don't know if mach files are # unmodified. return base_ref = repo.base_ref_as_hg() if not base_ref: # If we don't know which revision this exception is attached to, then it's # not worth sending return _is_unmodified_mach_core_thread.join() if not _is_unmodified_mach_core_result: return for map_fn in (_settle_mach_module_id, _patch_absolute_paths, _delete_server_name): sentry_event = map_fn(sentry_event, topsrcdir) sentry_event["release"] = "hg-rev-{}".format(base_ref) return sentry_event def _settle_mach_module_id(sentry_event, _): # Sentry groups issues according to the stack frames and their associated # "module" properties. However, one of the modules is being reported # like "mach.commands.26a828ef5164403eaff4305ab4cb0fab" (with a generated id). # This function replaces that generated id with the static string "" # so that grouping behaves as expected stacktrace_frames = sentry_event["exception"]["values"][0]["stacktrace"]["frames"] for frame in stacktrace_frames: module = frame.get("module") if not module: continue module = re.sub( "mach\\.commands\\.[a-f0-9]{32}", "mach.commands.", module ) frame["module"] = module return sentry_event def _patch_absolute_paths(sentry_event, topsrcdir: Path): # As discussed here (https://bugzilla.mozilla.org/show_bug.cgi?id=1636251#c28), # we remove usernames from file names with a best-effort basis. The most likely # place for usernames to manifest in Sentry information is within absolute paths, # such as: "/home/mitch/dev/firefox/mach" # We replace the state_dir, obj_dir, src_dir with "<...>" placeholders. # Note that we also do a blanket find-and-replace of the user's name with "", # which may have ill effects if the user's name is, by happenstance, a substring # of some other value within the Sentry event. def recursive_patch(value, needle, replacement): if isinstance(value, list): return [recursive_patch(v, needle, replacement) for v in value] elif isinstance(value, dict): for key in list(value.keys()): next_value = value.pop(key) key = needle.sub(replacement, key) value[key] = recursive_patch(next_value, needle, replacement) return value elif isinstance(value, string_types): return needle.sub(replacement, value) else: return value for (target_path, replacement) in ( (get_state_dir(), ""), (str(topsrcdir), ""), (str(Path.home()), "~"), ): # Sentry converts "vars" to their "representations". When paths are in local # variables on Windows, "C:\Users\MozillaUser\Desktop" becomes # "'C:\\Users\\MozillaUser\\Desktop'". To still catch this case, we "repr" # the home directory and scrub the beginning and end quotes, then # find-and-replace on that. repr_path = repr(target_path)[1:-1] for target in (target_path, repr_path): # Paths in the Sentry event aren't consistent: # * On *nix, they're mostly forward slashes. # * On *nix, not all absolute paths start with a leading forward slash. # * On Windows, they're mostly backslashes. # * On Windows, `.extra."sys.argv"` uses forward slashes. # * The Python variables in-scope captured by the Sentry report may be # inconsistent, even for a single path. For example, on # Windows, Mach calculates the state_dir as "C:\Users\/.mozbuild". # Handle the case where not all absolute paths start with a leading # forward slash: make the initial slash optional in the search string. if target.startswith("/"): target = "/?" + target[1:] # Handle all possible slash variants: our search string should match # both forward slashes and backslashes. This is done by dynamically # replacing each "/" and "\" with the regex "[\/\\]" (match both). slash_regex = re.compile(r"[\/\\]") # The regex module parses string backslash escapes before compiling the # regex, so we need to add more backslashes: # "[\\/\\\\]" => [\/\\] => match "/" and "\" target = slash_regex.sub(r"[\\/\\\\]", target) # Compile the regex and patch the event. needle_regex = re.compile(target, re.IGNORECASE) sentry_event = recursive_patch(sentry_event, needle_regex, replacement) return sentry_event def _delete_server_name(sentry_event, _): sentry_event.pop("server_name") return sentry_event def _get_repository_object(topsrcdir: Path): try: return get_repository_object(str(topsrcdir)) except (InvalidRepoPath, MissingVCSTool): return None def _is_unmodified_mach_core(topsrcdir: Path): """True if mach is unmodified compared to the public tree. To avoid submitting Sentry events for errors caused by user's local changes, we attempt to detect if mach (or code affecting mach) has been modified in the user's local state: * In a revision off of a "ancestor to central" revision, or: * In the working, uncommitted state. If "$topsrcdir/mach" and "*.py" haven't been touched, then we can be pretty confident that the Mach behaviour that caused the exception also exists in the public tree. """ global _is_unmodified_mach_core_result repo = _get_repository_object(topsrcdir) try: files = set(repo.get_outgoing_files()) | set(repo.get_changed_files()) _is_unmodified_mach_core_result = not any( [file for file in files if file == "mach" or file.endswith(".py")] ) except MissingUpstreamRepo: # If we don't know the upstream state, we don't know if the mach files # have been unmodified. _is_unmodified_mach_core_result = False _is_unmodified_mach_core_result = None _is_unmodified_mach_core_thread = None