From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- toolkit/crashreporter/tools/python.toml | 3 + toolkit/crashreporter/tools/symbolstore.py | 1095 +++++++++++++++++++++++ toolkit/crashreporter/tools/unit-symbolstore.py | 617 +++++++++++++ toolkit/crashreporter/tools/upload_symbols.py | 306 +++++++ 4 files changed, 2021 insertions(+) create mode 100644 toolkit/crashreporter/tools/python.toml create mode 100755 toolkit/crashreporter/tools/symbolstore.py create mode 100755 toolkit/crashreporter/tools/unit-symbolstore.py create mode 100644 toolkit/crashreporter/tools/upload_symbols.py (limited to 'toolkit/crashreporter/tools') diff --git a/toolkit/crashreporter/tools/python.toml b/toolkit/crashreporter/tools/python.toml new file mode 100644 index 0000000000..778905df37 --- /dev/null +++ b/toolkit/crashreporter/tools/python.toml @@ -0,0 +1,3 @@ +[DEFAULT] + +["unit-symbolstore.py"] diff --git a/toolkit/crashreporter/tools/symbolstore.py b/toolkit/crashreporter/tools/symbolstore.py new file mode 100755 index 0000000000..bc16002503 --- /dev/null +++ b/toolkit/crashreporter/tools/symbolstore.py @@ -0,0 +1,1095 @@ +#!/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Usage: symbolstore.py +# +# Runs dump_syms on each debug info file specified on the command line, +# then places the resulting symbol file in the proper directory +# structure in the symbol store path. Accepts multiple files +# on the command line, so can be called as part of a pipe using +# find | xargs symbolstore.pl +# But really, you might just want to pass it . +# +# Parameters accepted: +# -c : Copy debug info files to the same directory structure +# as sym files. On Windows, this will also copy +# binaries into the symbol store. +# -a "" : Run dump_syms -a for each space separated +# cpu architecture in (only on OS X) +# -s : Use as the top source directory to +# generate relative filenames. + +import ctypes +import errno +import os +import platform +import re +import shutil +import subprocess +import sys +import textwrap +import time +from optparse import OptionParser +from pathlib import Path + +import buildconfig +from mozbuild.generated_sources import ( + GENERATED_SOURCE_EXTS, + get_filename_with_digest, + get_s3_region_and_bucket, +) +from mozbuild.util import memoize +from mozpack import executables +from mozpack.copier import FileRegistry +from mozpack.manifests import InstallManifest, UnreadableInstallManifest + +# Utility classes + + +class VCSFileInfo: + """A base class for version-controlled file information. Ensures that the + following attributes are generated only once (successfully): + + self.root + self.clean_root + self.revision + self.filename + + The attributes are generated by a single call to the GetRoot, + GetRevision, and GetFilename methods. Those methods are explicitly not + implemented here and must be implemented in derived classes.""" + + def __init__(self, file): + if not file: + raise ValueError + self.file = file + + def __getattr__(self, name): + """__getattr__ is only called for attributes that are not set on self, + so setting self.[attr] will prevent future calls to the GetRoot, + GetRevision, and GetFilename methods. We don't set the values on + failure on the off chance that a future call might succeed.""" + + if name == "root": + root = self.GetRoot() + if root: + self.root = root + return root + + elif name == "clean_root": + clean_root = self.GetCleanRoot() + if clean_root: + self.clean_root = clean_root + return clean_root + + elif name == "revision": + revision = self.GetRevision() + if revision: + self.revision = revision + return revision + + elif name == "filename": + filename = self.GetFilename() + if filename: + self.filename = filename + return filename + + raise AttributeError + + def GetRoot(self): + """This method should return the unmodified root for the file or 'None' + on failure.""" + raise NotImplementedError + + def GetCleanRoot(self): + """This method should return the repository root for the file or 'None' + on failure.""" + raise NotImplementedError + + def GetRevision(self): + """This method should return the revision number for the file or 'None' + on failure.""" + raise NotImplementedError + + def GetFilename(self): + """This method should return the repository-specific filename for the + file or 'None' on failure.""" + raise NotImplementedError + + +# This regex separates protocol and optional username/password from a url. +# For instance, all the following urls will be transformed into +# 'foo.com/bar': +# +# http://foo.com/bar +# svn+ssh://user@foo.com/bar +# svn+ssh://user:pass@foo.com/bar +# +rootRegex = re.compile(r"^\S+?:/+(?:[^\s/]*@)?(\S+)$") + + +def read_output(*args): + (stdout, _) = subprocess.Popen( + args=args, universal_newlines=True, stdout=subprocess.PIPE + ).communicate() + return stdout.rstrip() + + +class HGRepoInfo: + def __init__(self, path): + self.path = path + + rev = os.environ.get("MOZ_SOURCE_CHANGESET") + if not rev: + rev = read_output("hg", "-R", path, "parent", "--template={node}") + + # Look for the default hg path. If MOZ_SOURCE_REPO is set, we + # don't bother asking hg. + hg_root = os.environ.get("MOZ_SOURCE_REPO") + if hg_root: + root = hg_root + else: + root = read_output("hg", "-R", path, "showconfig", "paths.default") + if not root: + print("Failed to get HG Repo for %s" % path, file=sys.stderr) + cleanroot = None + if root: + match = rootRegex.match(root) + if match: + cleanroot = match.group(1) + if cleanroot.endswith("/"): + cleanroot = cleanroot[:-1] + if cleanroot is None: + print( + textwrap.dedent( + """\ + Could not determine repo info for %s. This is either not a clone of the web-based + repository, or you have not specified MOZ_SOURCE_REPO, or the clone is corrupt.""" + ) + % path, + sys.stderr, + ) + sys.exit(1) + self.rev = rev + self.root = root + self.cleanroot = cleanroot + + def GetFileInfo(self, file): + return HGFileInfo(file, self) + + +class HGFileInfo(VCSFileInfo): + def __init__(self, file, repo): + VCSFileInfo.__init__(self, file) + self.repo = repo + self.file = os.path.relpath(file, repo.path) + + def GetRoot(self): + return self.repo.root + + def GetCleanRoot(self): + return self.repo.cleanroot + + def GetRevision(self): + return self.repo.rev + + def GetFilename(self): + if self.revision and self.clean_root: + return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision) + return self.file + + +class GitRepoInfo: + """ + Info about a local git repository. Does not currently + support discovering info about a git clone, the info must be + provided out-of-band. + """ + + def __init__(self, path, rev, root): + self.path = path + cleanroot = None + if root: + match = rootRegex.match(root) + if match: + cleanroot = match.group(1) + if cleanroot.endswith("/"): + cleanroot = cleanroot[:-1] + if cleanroot is None: + print( + textwrap.dedent( + """\ + Could not determine repo info for %s (%s). This is either not a clone of a web-based + repository, or you have not specified MOZ_SOURCE_REPO, or the clone is corrupt.""" + ) + % (path, root), + file=sys.stderr, + ) + sys.exit(1) + self.rev = rev + self.cleanroot = cleanroot + + def GetFileInfo(self, file): + return GitFileInfo(file, self) + + +class GitFileInfo(VCSFileInfo): + def __init__(self, file, repo): + VCSFileInfo.__init__(self, file) + self.repo = repo + self.file = os.path.relpath(file, repo.path) + + def GetRoot(self): + return self.repo.path + + def GetCleanRoot(self): + return self.repo.cleanroot + + def GetRevision(self): + return self.repo.rev + + def GetFilename(self): + if self.revision and self.clean_root: + return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision) + return self.file + + +# Utility functions + + +# A cache of files for which VCS info has already been determined. Used to +# prevent extra filesystem activity or process launching. +vcsFileInfoCache = {} + +if platform.system() == "Windows": + + def realpath(path): + """ + Normalize a path using `GetFinalPathNameByHandleW` to get the + path with all components in the case they exist in on-disk, so + that making links to a case-sensitive server (hg.mozilla.org) works. + + This function also resolves any symlinks in the path. + """ + # Return the original path if something fails, which can happen for paths that + # don't exist on this system (like paths from the CRT). + result = path + + ctypes.windll.kernel32.SetErrorMode(ctypes.c_uint(1)) + handle = ctypes.windll.kernel32.CreateFileW( + path, + # GENERIC_READ + 0x80000000, + # FILE_SHARE_READ + 1, + None, + # OPEN_EXISTING + 3, + # FILE_FLAG_BACKUP_SEMANTICS + # This is necessary to open + # directory handles. + 0x02000000, + None, + ) + if handle != -1: + size = ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle, None, 0, 0) + buf = ctypes.create_unicode_buffer(size) + if ( + ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle, buf, size, 0) + > 0 + ): + # The return value of GetFinalPathNameByHandleW uses the + # '\\?\' prefix. + result = buf.value[4:] + ctypes.windll.kernel32.CloseHandle(handle) + return result + +else: + # Just use the os.path version otherwise. + realpath = os.path.realpath + + +def IsInDir(file, dir): + try: + Path(file).relative_to(dir) + return True + except ValueError: + return False + + +def GetVCSFilenameFromSrcdir(file, srcdir): + if srcdir not in Dumper.srcdirRepoInfo: + # Not in cache, so find it adnd cache it + if os.path.isdir(os.path.join(srcdir, ".hg")): + Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir) + else: + # Unknown VCS or file is not in a repo. + return None + return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file) + + +def GetVCSFilename(file, srcdirs): + """Given a full path to a file, and the top source directory, + look for version control information about this file, and return + a tuple containing + 1) a specially formatted filename that contains the VCS type, + VCS location, relative filename, and revision number, formatted like: + vcs:vcs location:filename:revision + For example: + cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36 + 2) the unmodified root information if it exists""" + (path, filename) = os.path.split(file) + if path == "" or filename == "": + return (file, None) + + fileInfo = None + root = "" + if file in vcsFileInfoCache: + # Already cached this info, use it. + fileInfo = vcsFileInfoCache[file] + else: + for srcdir in srcdirs: + if not IsInDir(file, srcdir): + continue + fileInfo = GetVCSFilenameFromSrcdir(file, srcdir) + if fileInfo: + vcsFileInfoCache[file] = fileInfo + break + + if fileInfo: + file = fileInfo.filename + root = fileInfo.root + + # we want forward slashes on win32 paths + return (file.replace("\\", "/"), root) + + +def validate_install_manifests(install_manifest_args): + args = [] + for arg in install_manifest_args: + bits = arg.split(",") + if len(bits) != 2: + raise ValueError( + "Invalid format for --install-manifest: " "specify manifest,target_dir" + ) + manifest_file, destination = [os.path.abspath(b) for b in bits] + if not os.path.isfile(manifest_file): + raise IOError(errno.ENOENT, "Manifest file not found", manifest_file) + if not os.path.isdir(destination): + raise IOError(errno.ENOENT, "Install directory not found", destination) + try: + manifest = InstallManifest(manifest_file) + except UnreadableInstallManifest: + raise IOError(errno.EINVAL, "Error parsing manifest file", manifest_file) + args.append((manifest, destination)) + return args + + +def make_file_mapping(install_manifests): + file_mapping = {} + for manifest, destination in install_manifests: + destination = os.path.abspath(destination) + reg = FileRegistry() + manifest.populate_registry(reg) + for dst, src in reg: + if hasattr(src, "path"): + # Any paths that get compared to source file names need to go through realpath. + abs_dest = realpath(os.path.join(destination, dst)) + file_mapping[abs_dest] = realpath(src.path) + return file_mapping + + +@memoize +def get_generated_file_s3_path(filename, rel_path, bucket): + """Given a filename, return a path formatted similarly to + GetVCSFilename but representing a file available in an s3 bucket.""" + with open(filename, "rb") as f: + path = get_filename_with_digest(rel_path, f.read()) + return "s3:{bucket}:{path}:".format(bucket=bucket, path=path) + + +def GetPlatformSpecificDumper(**kwargs): + """This function simply returns a instance of a subclass of Dumper + that is appropriate for the current platform.""" + return {"WINNT": Dumper_Win32, "Linux": Dumper_Linux, "Darwin": Dumper_Mac}[ + buildconfig.substs["OS_ARCH"] + ](**kwargs) + + +def SourceIndex(fileStream, outputPath, vcs_root, s3_bucket): + """Takes a list of files, writes info to a data block in a .stream file""" + # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing + # Create the srcsrv data block that indexes the pdb file + result = True + pdbStreamFile = open(outputPath, "w") + pdbStreamFile.write( + "SRCSRV: ini ------------------------------------------------\r\n" + + "VERSION=2\r\n" + + "INDEXVERSION=2\r\n" + + "VERCTRL=http\r\n" + + "SRCSRV: variables ------------------------------------------\r\n" + + "SRCSRVVERCTRL=http\r\n" + + "RUST_GITHUB_TARGET=https://github.com/rust-lang/rust/raw/%var4%/%var3%\r\n" + ) + pdbStreamFile.write("HGSERVER=" + vcs_root + "\r\n") + pdbStreamFile.write("HG_TARGET=%hgserver%/raw-file/%var4%/%var3%\r\n") + + if s3_bucket: + pdbStreamFile.write("S3_BUCKET=" + s3_bucket + "\r\n") + pdbStreamFile.write("S3_TARGET=https://%s3_bucket%.s3.amazonaws.com/%var3%\r\n") + + # Allow each entry to choose its template via "var2". + # Possible values for var2 are: HG_TARGET / S3_TARGET / RUST_GITHUB_TARGET + pdbStreamFile.write("SRCSRVTRG=%fnvar%(%var2%)\r\n") + + pdbStreamFile.write( + "SRCSRV: source files ---------------------------------------\r\n" + ) + pdbStreamFile.write(fileStream) + pdbStreamFile.write( + "SRCSRV: end ------------------------------------------------\r\n\n" + ) + pdbStreamFile.close() + return result + + +class Dumper: + """This class can dump symbols from a file with debug info, and + store the output in a directory structure that is valid for use as + a Breakpad symbol server. Requires a path to a dump_syms binary-- + |dump_syms| and a directory to store symbols in--|symbol_path|. + Optionally takes a list of processor architectures to process from + each debug file--|archs|, the full path to the top source + directory--|srcdir|, for generating relative source file names, + and an option to copy debug info files alongside the dumped + symbol files--|copy_debug|, mostly useful for creating a + Microsoft Symbol Server from the resulting output. + + You don't want to use this directly if you intend to process files. + Instead, call GetPlatformSpecificDumper to get an instance of a + subclass.""" + + srcdirRepoInfo = {} + + def __init__( + self, + dump_syms, + symbol_path, + archs=None, + srcdirs=[], + copy_debug=False, + vcsinfo=False, + srcsrv=False, + s3_bucket=None, + file_mapping=None, + ): + # popen likes absolute paths, at least on windows + self.dump_syms = os.path.abspath(dump_syms) + self.symbol_path = symbol_path + if archs is None: + # makes the loop logic simpler + self.archs = [""] + else: + self.archs = ["-a %s" % a for a in archs.split()] + # Any paths that get compared to source file names need to go through realpath. + self.srcdirs = [realpath(s) for s in srcdirs] + self.copy_debug = copy_debug + self.vcsinfo = vcsinfo + self.srcsrv = srcsrv + self.s3_bucket = s3_bucket + self.file_mapping = file_mapping or {} + # Add a static mapping for Rust sources. Since Rust 1.30 official Rust builds map + # source paths to start with "/rust//". + rust_sha = buildconfig.substs["RUSTC_COMMIT"] + rust_srcdir = "/rustc/" + rust_sha + self.srcdirs.append(rust_srcdir) + Dumper.srcdirRepoInfo[rust_srcdir] = GitRepoInfo( + rust_srcdir, rust_sha, "https://github.com/rust-lang/rust/" + ) + + # subclasses override this + def ShouldProcess(self, file): + return True + + # This is a no-op except on Win32 + def SourceServerIndexing( + self, debug_file, guid, sourceFileStream, vcs_root, s3_bucket + ): + return "" + + # subclasses override this if they want to support this + def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id): + """This function will copy a library or executable and the file holding the + debug information to |symbol_path|""" + pass + + def Process(self, file_to_process, count_ctors=False): + """Process the given file.""" + if self.ShouldProcess(os.path.abspath(file_to_process)): + self.ProcessFile(file_to_process, count_ctors=count_ctors) + + def ProcessFile(self, file, dsymbundle=None, count_ctors=False): + """Dump symbols from these files into a symbol file, stored + in the proper directory structure in |symbol_path|; processing is performed + asynchronously, and Finish must be called to wait for it complete and cleanup. + All files after the first are fallbacks in case the first file does not process + successfully; if it does, no other files will be touched.""" + print("Beginning work for file: %s" % file, file=sys.stderr) + + # tries to get the vcs root from the .mozconfig first - if it's not set + # the tinderbox vcs path will be assigned further down + vcs_root = os.environ.get("MOZ_SOURCE_REPO") + for arch_num, arch in enumerate(self.archs): + self.ProcessFileWork( + file, arch_num, arch, vcs_root, dsymbundle, count_ctors=count_ctors + ) + + def dump_syms_cmdline(self, file, arch, dsymbundle=None): + """ + Get the commandline used to invoke dump_syms. + """ + # The Mac dumper overrides this. + return [self.dump_syms, "--inlines", file] + + def ProcessFileWork( + self, file, arch_num, arch, vcs_root, dsymbundle=None, count_ctors=False + ): + ctors = 0 + t_start = time.time() + print("Processing file: %s" % file, file=sys.stderr) + + sourceFileStream = "" + code_id, code_file = None, None + try: + cmd = self.dump_syms_cmdline(file, arch, dsymbundle=dsymbundle) + print(" ".join(cmd), file=sys.stderr) + proc = subprocess.Popen( + cmd, + universal_newlines=True, + stdout=subprocess.PIPE, + ) + try: + module_line = next(proc.stdout) + except StopIteration: + module_line = "" + if module_line.startswith("MODULE"): + # MODULE os cpu guid debug_file + (guid, debug_file) = (module_line.split())[3:5] + # strip off .pdb extensions, and append .sym + sym_file = re.sub("\.pdb$", "", debug_file) + ".sym" + # we do want forward slashes here + rel_path = os.path.join(debug_file, guid, sym_file).replace("\\", "/") + full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path)) + try: + os.makedirs(os.path.dirname(full_path)) + except OSError: # already exists + pass + f = open(full_path, "w") + f.write(module_line) + # now process the rest of the output + for line in proc.stdout: + if line.startswith("FILE"): + # FILE index filename + (x, index, filename) = line.rstrip().split(None, 2) + # We want original file paths for the source server. + sourcepath = filename + filename = realpath(filename) + if filename in self.file_mapping: + filename = self.file_mapping[filename] + if self.vcsinfo: + try: + gen_path = Path(filename) + rel_gen_path = gen_path.relative_to( + buildconfig.topobjdir + ) + except ValueError: + gen_path = None + if ( + gen_path + and gen_path.exists() + and gen_path.suffix in GENERATED_SOURCE_EXTS + and self.s3_bucket + ): + filename = get_generated_file_s3_path( + filename, str(rel_gen_path), self.s3_bucket + ) + rootname = "" + else: + (filename, rootname) = GetVCSFilename( + filename, self.srcdirs + ) + # sets vcs_root in case the loop through files were to end + # on an empty rootname + if vcs_root is None: + if rootname: + vcs_root = rootname + # Emit an entry for the file mapping for the srcsrv stream + if filename.startswith("hg:"): + (vcs, repo, source_file, revision) = filename.split(":", 3) + sourceFileStream += sourcepath + "*HG_TARGET*" + source_file + sourceFileStream += "*" + revision + "\r\n" + elif filename.startswith("s3:"): + (vcs, bucket, source_file, nothing) = filename.split(":", 3) + sourceFileStream += sourcepath + "*S3_TARGET*" + sourceFileStream += source_file + "\r\n" + elif filename.startswith("git:github.com/rust-lang/rust:"): + (vcs, repo, source_file, revision) = filename.split(":", 3) + sourceFileStream += sourcepath + "*RUST_GITHUB_TARGET*" + sourceFileStream += source_file + "*" + revision + "\r\n" + f.write("FILE %s %s\n" % (index, filename)) + elif line.startswith("INFO CODE_ID "): + # INFO CODE_ID code_id code_file + # This gives some info we can use to + # store binaries in the symbol store. + bits = line.rstrip().split(None, 3) + if len(bits) == 4: + code_id, code_file = bits[2:] + f.write(line) + else: + if count_ctors and line.startswith("FUNC "): + # Static initializers, as created by clang and gcc + # have symbols that start with "_GLOBAL_sub" + if "_GLOBAL__sub_" in line: + ctors += 1 + # MSVC creates `dynamic initializer for '...'` + # symbols. + elif "`dynamic initializer for '" in line: + ctors += 1 + + # pass through all other lines unchanged + f.write(line) + f.close() + retcode = proc.wait() + if retcode != 0: + raise RuntimeError( + "dump_syms failed with error code %d while processing %s\n" + % (retcode, file) + ) + # we output relative paths so callers can get a list of what + # was generated + print(rel_path) + if self.srcsrv and vcs_root: + # add source server indexing to the pdb file + self.SourceServerIndexing( + debug_file, guid, sourceFileStream, vcs_root, self.s3_bucket + ) + # only copy debug the first time if we have multiple architectures + if self.copy_debug and arch_num == 0: + self.CopyExeAndDebugInfo(file, debug_file, guid, code_file, code_id) + else: + # For some reason, we didn't see the MODULE line as the first + # line of output, this is strictly required so fail irrespective + # of the process' return code. + retcode = proc.wait() + message = [ + "dump_syms failed to produce the expected output", + "file: %s" % file, + "return code: %d" % retcode, + "first line of output: %s" % module_line, + ] + raise RuntimeError("\n----------\n".join(message)) + except Exception as e: + print("Unexpected error: %s" % str(e), file=sys.stderr) + raise + + if dsymbundle: + shutil.rmtree(dsymbundle) + + if count_ctors: + import json + + perfherder_data = { + "framework": {"name": "build_metrics"}, + "suites": [ + { + "name": "compiler_metrics", + "subtests": [ + { + "name": "num_static_constructors", + "value": ctors, + "alertChangeType": "absolute", + "alertThreshold": 3, + } + ], + } + ], + } + perfherder_extra_options = os.environ.get("PERFHERDER_EXTRA_OPTIONS", "") + for opt in perfherder_extra_options.split(): + for suite in perfherder_data["suites"]: + if opt not in suite.get("extraOptions", []): + suite.setdefault("extraOptions", []).append(opt) + + if "asan" not in perfherder_extra_options.lower(): + print( + "PERFHERDER_DATA: %s" % json.dumps(perfherder_data), file=sys.stderr + ) + + elapsed = time.time() - t_start + print("Finished processing %s in %.2fs" % (file, elapsed), file=sys.stderr) + + +# Platform-specific subclasses. For the most part, these just have +# logic to determine what files to extract symbols from. + + +def locate_pdb(path): + """Given a path to a binary, attempt to locate the matching pdb file with simple heuristics: + * Look for a pdb file with the same base name next to the binary + * Look for a pdb file with the same base name in the cwd + + Returns the path to the pdb file if it exists, or None if it could not be located. + """ + path, ext = os.path.splitext(path) + pdb = path + ".pdb" + if os.path.isfile(pdb): + return pdb + # If there's no pdb next to the file, see if there's a pdb with the same root name + # in the cwd. We build some binaries directly into dist/bin, but put the pdb files + # in the relative objdir, which is the cwd when running this script. + base = os.path.basename(pdb) + pdb = os.path.join(os.getcwd(), base) + if os.path.isfile(pdb): + return pdb + return None + + +class Dumper_Win32(Dumper): + fixedFilenameCaseCache = {} + + def ShouldProcess(self, file): + """This function will allow processing of exe or dll files that have pdb + files with the same base name next to them.""" + if file.endswith(".exe") or file.endswith(".dll"): + if locate_pdb(file) is not None: + return True + return False + + def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id): + """This function will copy the executable or dll and pdb files to |symbol_path|""" + pdb_file = locate_pdb(file) + + rel_path = os.path.join(debug_file, guid, debug_file).replace("\\", "/") + full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path)) + shutil.copyfile(pdb_file, full_path) + print(rel_path) + + # Copy the binary file as well + if code_file and code_id: + full_code_path = os.path.join(os.path.dirname(file), code_file) + if os.path.exists(full_code_path): + rel_path = os.path.join(code_file, code_id, code_file).replace( + "\\", "/" + ) + full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path)) + try: + os.makedirs(os.path.dirname(full_path)) + except OSError as e: + if e.errno != errno.EEXIST: + raise + shutil.copyfile(full_code_path, full_path) + print(rel_path) + + def SourceServerIndexing( + self, debug_file, guid, sourceFileStream, vcs_root, s3_bucket + ): + # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing + streamFilename = debug_file + ".stream" + stream_output_path = os.path.abspath(streamFilename) + # Call SourceIndex to create the .stream file + result = SourceIndex(sourceFileStream, stream_output_path, vcs_root, s3_bucket) + if self.copy_debug: + pdbstr = buildconfig.substs["PDBSTR"] + wine = buildconfig.substs.get("WINE") + if wine: + cmd = [wine, pdbstr] + else: + cmd = [pdbstr] + subprocess.call( + cmd + + [ + "-w", + "-p:" + os.path.basename(debug_file), + "-i:" + os.path.basename(streamFilename), + "-s:srcsrv", + ], + cwd=os.path.dirname(stream_output_path), + ) + # clean up all the .stream files when done + os.remove(stream_output_path) + return result + + +class Dumper_Linux(Dumper): + objcopy = os.environ["OBJCOPY"] if "OBJCOPY" in os.environ else "objcopy" + + def ShouldProcess(self, file): + """This function will allow processing of files that are + executable, or end with the .so extension, and additionally + file(1) reports as being ELF files. It expects to find the file + command in PATH.""" + if file.endswith(".so") or os.access(file, os.X_OK): + return executables.get_type(file) == executables.ELF + return False + + def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id): + # We want to strip out the debug info, and add a + # .gnu_debuglink section to the object, so the debugger can + # actually load our debug info later. + # In some odd cases, the object might already have an irrelevant + # .gnu_debuglink section, and objcopy doesn't want to add one in + # such cases, so we make it remove it any existing one first. + file_dbg = file + ".dbg" + if ( + subprocess.call([self.objcopy, "--only-keep-debug", file, file_dbg]) == 0 + and subprocess.call( + [ + self.objcopy, + "--remove-section", + ".gnu_debuglink", + "--add-gnu-debuglink=%s" % file_dbg, + file, + ] + ) + == 0 + ): + rel_path = os.path.join(debug_file, guid, debug_file + ".dbg") + full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path)) + shutil.move(file_dbg, full_path) + print(rel_path) + else: + if os.path.isfile(file_dbg): + os.unlink(file_dbg) + + +class Dumper_Solaris(Dumper): + def ShouldProcess(self, file): + """This function will allow processing of files that are + executable, or end with the .so extension, and additionally + file(1) reports as being ELF files. It expects to find the file + command in PATH.""" + if file.endswith(".so") or os.access(file, os.X_OK): + return executables.get_type(file) == executables.ELF + return False + + +class Dumper_Mac(Dumper): + def ShouldProcess(self, file): + """This function will allow processing of files that are + executable, or end with the .dylib extension, and additionally + file(1) reports as being Mach-O files. It expects to find the file + command in PATH.""" + if file.endswith(".dylib") or os.access(file, os.X_OK): + return executables.get_type(file) == executables.MACHO + return False + + def ProcessFile(self, file, count_ctors=False): + print("Starting Mac pre-processing on file: %s" % file, file=sys.stderr) + dsymbundle = self.GenerateDSYM(file) + if dsymbundle: + # kick off new jobs per-arch with our new list of files + Dumper.ProcessFile( + self, file, dsymbundle=dsymbundle, count_ctors=count_ctors + ) + + def dump_syms_cmdline(self, file, arch, dsymbundle=None): + """ + Get the commandline used to invoke dump_syms. + """ + # dump_syms wants the path to the original binary and the .dSYM + # in order to dump all the symbols. + if dsymbundle: + # This is the .dSYM bundle. + return ( + [self.dump_syms] + + arch.split() + + ["--inlines", "-j", "2", dsymbundle, file] + ) + return Dumper.dump_syms_cmdline(self, file, arch) + + def GenerateDSYM(self, file): + """dump_syms on Mac needs to be run on a dSYM bundle produced + by dsymutil(1), so run dsymutil here and pass the bundle name + down to the superclass method instead.""" + t_start = time.time() + print("Running Mac pre-processing on file: %s" % (file,), file=sys.stderr) + + dsymbundle = file + ".dSYM" + if os.path.exists(dsymbundle): + shutil.rmtree(dsymbundle) + dsymutil = buildconfig.substs["DSYMUTIL"] + # dsymutil takes --arch=foo instead of -a foo like everything else + cmd = ( + [dsymutil] + [a.replace("-a ", "--arch=") for a in self.archs if a] + [file] + ) + print(" ".join(cmd), file=sys.stderr) + + dsymutil_proc = subprocess.Popen( + cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + dsymout, dsymerr = dsymutil_proc.communicate() + if dsymutil_proc.returncode != 0: + raise RuntimeError("Error running dsymutil: %s" % dsymerr) + + # Regular dsymutil won't produce a .dSYM for files without symbols. + if not os.path.exists(dsymbundle): + print("No symbols found in file: %s" % (file,), file=sys.stderr) + return False + + # llvm-dsymutil will produce a .dSYM for files without symbols or + # debug information, but only sometimes will it warn you about this. + # We don't want to run dump_syms on such bundles, because asserts + # will fire in debug mode and who knows what will happen in release. + # + # So we check for the error message and bail if it appears. If it + # doesn't, we carefully check the bundled DWARF to see if dump_syms + # will be OK with it. + if "warning: no debug symbols in" in dsymerr: + print(dsymerr, file=sys.stderr) + return False + + contents_dir = os.path.join(dsymbundle, "Contents", "Resources", "DWARF") + if not os.path.exists(contents_dir): + print( + "No DWARF information in .dSYM bundle %s" % (dsymbundle,), + file=sys.stderr, + ) + return False + + files = os.listdir(contents_dir) + if len(files) != 1: + print("Unexpected files in .dSYM bundle %s" % (files,), file=sys.stderr) + return False + + otool_out = subprocess.check_output( + [buildconfig.substs["OTOOL"], "-l", os.path.join(contents_dir, files[0])], + universal_newlines=True, + ) + if "sectname __debug_info" not in otool_out: + print("No symbols in .dSYM bundle %s" % (dsymbundle,), file=sys.stderr) + return False + + elapsed = time.time() - t_start + print("Finished processing %s in %.2fs" % (file, elapsed), file=sys.stderr) + return dsymbundle + + def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id): + """ProcessFile has already produced a dSYM bundle, so we should just + copy that to the destination directory. However, we'll package it + into a .tar because it's a bundle, so it's a directory. |file| here is + the original filename.""" + dsymbundle = file + ".dSYM" + rel_path = os.path.join(debug_file, guid, os.path.basename(dsymbundle) + ".tar") + full_path = os.path.abspath(os.path.join(self.symbol_path, rel_path)) + success = subprocess.call( + ["tar", "cf", full_path, os.path.basename(dsymbundle)], + cwd=os.path.dirname(dsymbundle), + stdout=open(os.devnull, "w"), + stderr=subprocess.STDOUT, + ) + if success == 0 and os.path.exists(full_path): + print(rel_path) + + +# Entry point if called as a standalone program + + +def main(): + parser = OptionParser( + usage="usage: %prog [options] " + ) + parser.add_option( + "-c", + "--copy", + action="store_true", + dest="copy_debug", + default=False, + help="Copy debug info files into the same directory structure as symbol files", + ) + parser.add_option( + "-a", + "--archs", + action="store", + dest="archs", + help="Run dump_syms -a for each space separated" + + "cpu architecture in ARCHS (only on OS X)", + ) + parser.add_option( + "-s", + "--srcdir", + action="append", + dest="srcdir", + default=[], + help="Use SRCDIR to determine relative paths to source files", + ) + parser.add_option( + "-v", + "--vcs-info", + action="store_true", + dest="vcsinfo", + help="Try to retrieve VCS info for each FILE listed in the output", + ) + parser.add_option( + "-i", + "--source-index", + action="store_true", + dest="srcsrv", + default=False, + help="Add source index information to debug files, making them suitable" + + " for use in a source server.", + ) + parser.add_option( + "--install-manifest", + action="append", + dest="install_manifests", + default=[], + help="""Use this install manifest to map filenames back +to canonical locations in the source repository. Specify +, as a comma-separated pair.""", + ) + parser.add_option( + "--count-ctors", + action="store_true", + dest="count_ctors", + default=False, + help="Count static initializers", + ) + (options, args) = parser.parse_args() + + # check to see if the pdbstr.exe exists + if options.srcsrv: + if "PDBSTR" not in buildconfig.substs: + print("pdbstr was not found by configure.\n", file=sys.stderr) + sys.exit(1) + + if len(args) < 3: + parser.error("not enough arguments") + exit(1) + + try: + manifests = validate_install_manifests(options.install_manifests) + except (IOError, ValueError) as e: + parser.error(str(e)) + exit(1) + file_mapping = make_file_mapping(manifests) + _, bucket = get_s3_region_and_bucket() + dumper = GetPlatformSpecificDumper( + dump_syms=args[0], + symbol_path=args[1], + copy_debug=options.copy_debug, + archs=options.archs, + srcdirs=options.srcdir, + vcsinfo=options.vcsinfo, + srcsrv=options.srcsrv, + s3_bucket=bucket, + file_mapping=file_mapping, + ) + + dumper.Process(args[2], options.count_ctors) + + +# run main if run directly +if __name__ == "__main__": + main() diff --git a/toolkit/crashreporter/tools/unit-symbolstore.py b/toolkit/crashreporter/tools/unit-symbolstore.py new file mode 100755 index 0000000000..21d577fb63 --- /dev/null +++ b/toolkit/crashreporter/tools/unit-symbolstore.py @@ -0,0 +1,617 @@ +#!/usr/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import shutil +import struct +import subprocess +import sys +import tempfile +import unittest +from unittest import mock +from unittest.mock import patch + +import buildconfig +import mozpack.path as mozpath +import mozunit +import symbolstore +from mozpack.manifests import InstallManifest +from symbolstore import realpath + +# Some simple functions to mock out files that the platform-specific dumpers will accept. +# dump_syms itself will not be run (we mock that call out), but we can't override +# the ShouldProcessFile method since we actually want to test that. + + +def write_elf(filename): + open(filename, "wb").write( + struct.pack("<7B45x", 0x7F, ord("E"), ord("L"), ord("F"), 1, 1, 1) + ) + + +def write_macho(filename): + open(filename, "wb").write(struct.pack("= 1, "should have a FILE line for " + match + ) + # Skip this check for local git repositories. + if not os.path.isdir(mozpath.join(self.topsrcdir, ".hg")): + return + for line in match_lines: + filename = line.split(None, 2)[2] + self.assertEqual("hg:", filename[:3]) + + # Check that nsBrowserApp.cpp is listed as a FILE line, and that + # it was properly mapped to the source repo. + check_hg_path(file_lines, "nsBrowserApp.cpp") + # Also check Sprintf.h to verify that files from dist/include + # are properly mapped. + check_hg_path(file_lines, "mfbt/Sprintf.h") + + +if __name__ == "__main__": + mozunit.main() diff --git a/toolkit/crashreporter/tools/upload_symbols.py b/toolkit/crashreporter/tools/upload_symbols.py new file mode 100644 index 0000000000..eff1f43b2b --- /dev/null +++ b/toolkit/crashreporter/tools/upload_symbols.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This script uploads a symbol archive file from a path or URL passed on the commandline +# to the symbol server at https://symbols.mozilla.org/ . +# +# Using this script requires you to have generated an authentication +# token in the symbol server web interface. You must store the token in a Taskcluster +# secret as the JSON blob `{"token": ""}` and set the `SYMBOL_SECRET` +# environment variable to the name of the Taskcluster secret. Alternately, +# you can put the token in a file and set `SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE` +# environment variable to the path to the file. + +import argparse +import logging +import os +import sys +import tempfile + +import redo +import requests + +log = logging.getLogger("upload-symbols") +log.setLevel(logging.INFO) + +DEFAULT_URL = "https://symbols.mozilla.org/upload/" +MAX_RETRIES = 7 +MAX_ZIP_SIZE = 500000000 # 500 MB + + +def print_error(r): + if r.status_code < 400: + log.error("Error: bad auth token? ({0}: {1})".format(r.status_code, r.reason)) + else: + log.error("Error: got HTTP response {0}: {1}".format(r.status_code, r.reason)) + + log.error( + "Response body:\n{sep}\n{body}\n{sep}\n".format(sep="=" * 20, body=r.text) + ) + + +def get_taskcluster_secret(secret_name): + secrets_url = "http://taskcluster/secrets/v1/secret/{}".format(secret_name) + log.info( + 'Using symbol upload token from the secrets service: "{}"'.format(secrets_url) + ) + res = requests.get(secrets_url) + res.raise_for_status() + secret = res.json() + auth_token = secret["secret"]["token"] + + return auth_token + + +def main(): + logging.basicConfig() + parser = argparse.ArgumentParser( + description="Upload symbols in ZIP using token from Taskcluster secrets service." + ) + parser.add_argument( + "archive", help="Symbols archive file - URL or path to local file" + ) + parser.add_argument( + "--ignore-missing", help="No error on missing files", action="store_true" + ) + args = parser.parse_args() + + def check_file_exists(url): + for i, _ in enumerate(redo.retrier(attempts=MAX_RETRIES), start=1): + try: + resp = requests.head(url, allow_redirects=True) + return resp.status_code == requests.codes.ok + except requests.exceptions.RequestException as e: + log.error("Error: {0}".format(e)) + log.info("Retrying...") + return False + + if args.archive.startswith("http"): + is_existing = check_file_exists(args.archive) + else: + is_existing = os.path.isfile(args.archive) + + if not is_existing: + if args.ignore_missing: + log.info('Archive file "{0}" does not exist!'.format(args.archive)) + return 0 + else: + log.error('Error: archive file "{0}" does not exist!'.format(args.archive)) + return 1 + + try: + tmpdir = None + if args.archive.endswith(".tar.zst"): + tmpdir = tempfile.TemporaryDirectory() + zip_paths = convert_zst_archive(args.archive, tmpdir) + else: + zip_paths = [args.archive] + + for zip_path in zip_paths: + result = upload_symbols(zip_path) + if result: + return result + return 0 + finally: + if tmpdir: + tmpdir.cleanup() + + +def convert_zst_archive(zst_archive, tmpdir): + """ + Convert a .tar.zst file to a zip file + + Our build tasks output .tar.zst files, but the tecken server only allows + .zip files to be uploaded. + + :param zst_archive: path or URL to a .tar.zst source file + :param tmpdir: TemporaryDirectory to store the output zip file in + :returns: path to output zip file + """ + import concurrent.futures + import gzip + import itertools + import tarfile + + import zstandard + from mozpack.files import File + from mozpack.mozjar import Deflater, JarWriter + + def iter_files_from_tar(reader): + ctx = zstandard.ZstdDecompressor() + uncompressed = ctx.stream_reader(reader) + with tarfile.open(mode="r|", fileobj=uncompressed, bufsize=1024 * 1024) as tar: + while True: + info = tar.next() + if info is None: + break + data = tar.extractfile(info).read() + yield (info.name, data) + + def prepare_from(archive, tmpdir): + if archive.startswith("http"): + resp = requests.get(archive, allow_redirects=True, stream=True) + resp.raise_for_status() + reader = resp.raw + # Work around taskcluster generic-worker possibly gzipping the tar.zst. + if resp.headers.get("Content-Encoding") == "gzip": + reader = gzip.GzipFile(fileobj=reader) + else: + reader = open(archive, "rb") + + def handle_file(data): + name, data = data + log.info("Compressing %s", name) + path = os.path.join(tmpdir, name.lstrip("/")) + if name.endswith(".dbg"): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as fh: + with gzip.GzipFile(fileobj=fh, mode="wb", compresslevel=5) as c: + c.write(data) + return (name + ".gz", File(path)) + elif name.endswith(".dSYM.tar"): + import bz2 + + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as fh: + fh.write(bz2.compress(data)) + return (name + ".bz2", File(path)) + elif name.endswith((".pdb", ".exe", ".dll")): + import subprocess + + makecab = os.environ.get("MAKECAB", "makecab") + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as fh: + fh.write(data) + + subprocess.check_call( + [makecab, "-D", "CompressionType=MSZIP", path, path + "_"], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + ) + + return (name[:-1] + "_", File(path + "_")) + else: + deflater = Deflater(compress_level=5) + deflater.write(data) + return (name, deflater) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count() + ) as executor: + yield from executor.map(handle_file, iter_files_from_tar(reader)) + + reader.close() + + zip_paths_iter = iter( + os.path.join(tmpdir.name, "symbols{}.zip".format("" if i == 1 else i)) + for i in itertools.count(start=1) + ) + zip_path = next(zip_paths_iter) + log.info('Preparing symbol archive "{0}" from "{1}"'.format(zip_path, zst_archive)) + for i, _ in enumerate(redo.retrier(attempts=MAX_RETRIES), start=1): + zip_paths = [] + jar = None + try: + for name, data in prepare_from(zst_archive, tmpdir.name): + if not jar: + jar = JarWriter(zip_path) + zip_paths.append(zip_path) + size = 0 + log.info("Adding %s", name) + jar.add(name, data, compress=not isinstance(data, File)) + size += data.size() if isinstance(data, File) else data.compressed_size + if size > MAX_ZIP_SIZE: + jar.finish() + jar = None + zip_path = next(zip_paths_iter) + log.info('Continuing with symbol archive "{}"'.format(zip_path)) + if jar: + jar.finish() + return zip_paths + except requests.exceptions.RequestException as e: + log.error("Error: {0}".format(e)) + log.info("Retrying...") + + return [] + + +def upload_symbols(zip_path): + """ + Upload symbols to the tecken server + + :param zip_path: path to the zip file to upload + :returns: 0 indicates the upload was successful, non-zero indicates an + error that should be used for the script's exit code + """ + secret_name = os.environ.get("SYMBOL_SECRET") + if secret_name is not None: + auth_token = get_taskcluster_secret(secret_name) + elif "SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE" in os.environ: + token_file = os.environ["SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE"] + + if not os.path.isfile(token_file): + log.error( + 'SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE "{0}" does not exist!'.format( + token_file + ) + ) + return 1 + auth_token = open(token_file, "r").read().strip() + else: + log.error( + "You must set the SYMBOL_SECRET or SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE " + "environment variables!" + ) + return 1 + + # Allow overwriting of the upload url with an environmental variable + if "SOCORRO_SYMBOL_UPLOAD_URL" in os.environ: + url = os.environ["SOCORRO_SYMBOL_UPLOAD_URL"] + else: + url = DEFAULT_URL + + log.info('Uploading symbol file "{0}" to "{1}"'.format(zip_path, url)) + + for i, _ in enumerate(redo.retrier(attempts=MAX_RETRIES), start=1): + log.info("Attempt %d of %d..." % (i, MAX_RETRIES)) + try: + if zip_path.startswith("http"): + zip_arg = {"data": {"url": zip_path}} + else: + zip_arg = {"files": {"symbols.zip": open(zip_path, "rb")}} + r = requests.post( + url, + headers={"Auth-Token": auth_token}, + allow_redirects=False, + # Allow a longer read timeout because uploading by URL means the server + # has to fetch the entire zip file, which can take a while. The load balancer + # in front of symbols.mozilla.org has a 300 second timeout, so we'll use that. + timeout=(300, 300), + **zip_arg + ) + # 408, 429 or any 5XX is likely to be a transient failure. + # Break out for success or other error codes. + if r.ok or (r.status_code < 500 and (r.status_code not in (408, 429))): + break + print_error(r) + except requests.exceptions.RequestException as e: + log.error("Error: {0}".format(e)) + log.info("Retrying...") + else: + log.warning("Maximum retries hit, giving up!") + return 1 + + if r.status_code >= 200 and r.status_code < 300: + log.info("Uploaded successfully!") + return 0 + + print_error(r) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) -- cgit v1.2.3