diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /toolkit/crashreporter/tools | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'toolkit/crashreporter/tools')
-rw-r--r-- | toolkit/crashreporter/tools/python.ini | 3 | ||||
-rwxr-xr-x | toolkit/crashreporter/tools/symbolstore.py | 1096 | ||||
-rwxr-xr-x | toolkit/crashreporter/tools/unit-symbolstore.py | 613 | ||||
-rw-r--r-- | toolkit/crashreporter/tools/upload_symbols.py | 306 |
4 files changed, 2018 insertions, 0 deletions
diff --git a/toolkit/crashreporter/tools/python.ini b/toolkit/crashreporter/tools/python.ini new file mode 100644 index 0000000000..d715bfe094 --- /dev/null +++ b/toolkit/crashreporter/tools/python.ini @@ -0,0 +1,3 @@ +[DEFAULT] + +[unit-symbolstore.py] diff --git a/toolkit/crashreporter/tools/symbolstore.py b/toolkit/crashreporter/tools/symbolstore.py new file mode 100755 index 0000000000..5dd5570a84 --- /dev/null +++ b/toolkit/crashreporter/tools/symbolstore.py @@ -0,0 +1,1096 @@ +#!/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Usage: symbolstore.py <params> <dump_syms path> <symbol store path> +# <debug info files or dirs> +# Runs dump_syms on each debug info file specified on the command line, +# then places the resulting symbol file in the proper directory +# structure in the symbol store path. Accepts multiple files +# on the command line, so can be called as part of a pipe using +# find <dir> | xargs symbolstore.pl <dump_syms> <storepath> +# But really, you might just want to pass it <dir>. +# +# Parameters accepted: +# -c : Copy debug info files to the same directory structure +# as sym files. On Windows, this will also copy +# binaries into the symbol store. +# -a "<archs>" : Run dump_syms -a <arch> for each space separated +# cpu architecture in <archs> (only on OS X) +# -s <srcdir> : Use <srcdir> as the top source directory to +# generate relative filenames. + +import ctypes +import errno +import os +import platform +import re +import shutil +import subprocess +import sys +import textwrap +import time +from optparse import OptionParser +from pathlib import Path + +import buildconfig +from mozbuild.generated_sources import ( + GENERATED_SOURCE_EXTS, + get_filename_with_digest, + get_s3_region_and_bucket, +) +from mozbuild.util import memoize +from mozpack import executables +from mozpack.copier import FileRegistry +from mozpack.manifests import InstallManifest, UnreadableInstallManifest + +# Utility classes + + +class VCSFileInfo: + """A base class for version-controlled file information. Ensures that the + following attributes are generated only once (successfully): + + self.root + self.clean_root + self.revision + self.filename + + The attributes are generated by a single call to the GetRoot, + GetRevision, and GetFilename methods. Those methods are explicitly not + implemented here and must be implemented in derived classes.""" + + def __init__(self, file): + if not file: + raise ValueError + self.file = file + + def __getattr__(self, name): + """__getattr__ is only called for attributes that are not set on self, + so setting self.[attr] will prevent future calls to the GetRoot, + GetRevision, and GetFilename methods. We don't set the values on + failure on the off chance that a future call might succeed.""" + + if name == "root": + root = self.GetRoot() + if root: + self.root = root + return root + + elif name == "clean_root": + clean_root = self.GetCleanRoot() + if clean_root: + self.clean_root = clean_root + return clean_root + + elif name == "revision": + revision = self.GetRevision() + if revision: + self.revision = revision + return revision + + elif name == "filename": + filename = self.GetFilename() + if filename: + self.filename = filename + return filename + + raise AttributeError + + def GetRoot(self): + """This method should return the unmodified root for the file or 'None' + on failure.""" + raise NotImplementedError + + def GetCleanRoot(self): + """This method should return the repository root for the file or 'None' + on failure.""" + raise NotImplementedError + + def GetRevision(self): + """This method should return the revision number for the file or 'None' + on failure.""" + raise NotImplementedError + + def GetFilename(self): + """This method should return the repository-specific filename for the + file or 'None' on failure.""" + raise NotImplementedError + + +# This regex separates protocol and optional username/password from a url. +# For instance, all the following urls will be transformed into +# 'foo.com/bar': +# +# http://foo.com/bar +# svn+ssh://user@foo.com/bar +# svn+ssh://user:pass@foo.com/bar +# +rootRegex = re.compile(r"^\S+?:/+(?:[^\s/]*@)?(\S+)$") + + +def read_output(*args): + (stdout, _) = subprocess.Popen( + args=args, universal_newlines=True, stdout=subprocess.PIPE + ).communicate() + return stdout.rstrip() + + +class HGRepoInfo: + def __init__(self, path): + self.path = path + + rev = os.environ.get("MOZ_SOURCE_CHANGESET") + if not rev: + rev = read_output("hg", "-R", path, "parent", "--template={node}") + + # Look for the default hg path. If MOZ_SOURCE_REPO is set, we + # don't bother asking hg. + hg_root = os.environ.get("MOZ_SOURCE_REPO") + if hg_root: + root = hg_root + else: + root = read_output("hg", "-R", path, "showconfig", "paths.default") + if not root: + print("Failed to get HG Repo for %s" % path, file=sys.stderr) + cleanroot = None + if root: + match = rootRegex.match(root) + if match: + cleanroot = match.group(1) + if cleanroot.endswith("/"): + cleanroot = cleanroot[:-1] + if cleanroot is None: + print( + textwrap.dedent( + """\ + Could not determine repo info for %s. This is either not a clone of the web-based + repository, or you have not specified MOZ_SOURCE_REPO, or the clone is corrupt.""" + ) + % path, + sys.stderr, + ) + sys.exit(1) + self.rev = rev + self.root = root + self.cleanroot = cleanroot + + def GetFileInfo(self, file): + return HGFileInfo(file, self) + + +class HGFileInfo(VCSFileInfo): + def __init__(self, file, repo): + VCSFileInfo.__init__(self, file) + self.repo = repo + self.file = os.path.relpath(file, repo.path) + + def GetRoot(self): + return self.repo.root + + def GetCleanRoot(self): + return self.repo.cleanroot + + def GetRevision(self): + return self.repo.rev + + def GetFilename(self): + if self.revision and self.clean_root: + return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision) + return self.file + + +class GitRepoInfo: + """ + Info about a local git repository. Does not currently + support discovering info about a git clone, the info must be + provided out-of-band. + """ + + def __init__(self, path, rev, root): + self.path = path + cleanroot = None + if root: + match = rootRegex.match(root) + if match: + cleanroot = match.group(1) + if cleanroot.endswith("/"): + cleanroot = cleanroot[:-1] + if cleanroot is None: + print( + textwrap.dedent( + """\ + Could not determine repo info for %s (%s). This is either not a clone of a web-based + repository, or you have not specified MOZ_SOURCE_REPO, or the clone is corrupt.""" + ) + % (path, root), + file=sys.stderr, + ) + sys.exit(1) + self.rev = rev + self.cleanroot = cleanroot + + def GetFileInfo(self, file): + return GitFileInfo(file, self) + + +class GitFileInfo(VCSFileInfo): + def __init__(self, file, repo): + VCSFileInfo.__init__(self, file) + self.repo = repo + self.file = os.path.relpath(file, repo.path) + + def GetRoot(self): + return self.repo.path + + def GetCleanRoot(self): + return self.repo.cleanroot + + def GetRevision(self): + return self.repo.rev + + def GetFilename(self): + if self.revision and self.clean_root: + return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision) + return self.file + + +# Utility functions + + +# A cache of files for which VCS info has already been determined. Used to +# prevent extra filesystem activity or process launching. +vcsFileInfoCache = {} + +if platform.system() == "Windows": + + def realpath(path): + """ + Normalize a path using `GetFinalPathNameByHandleW` to get the + path with all components in the case they exist in on-disk, so + that making links to a case-sensitive server (hg.mozilla.org) works. + + This function also resolves any symlinks in the path. + """ + # Return the original path if something fails, which can happen for paths that + # don't exist on this system (like paths from the CRT). + result = path + + ctypes.windll.kernel32.SetErrorMode(ctypes.c_uint(1)) + handle = ctypes.windll.kernel32.CreateFileW( + path, + # GENERIC_READ + 0x80000000, + # FILE_SHARE_READ + 1, + None, + # OPEN_EXISTING + 3, + # FILE_FLAG_BACKUP_SEMANTICS + # This is necessary to open + # directory handles. + 0x02000000, + None, + ) + if handle != -1: + size = ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle, None, 0, 0) + buf = ctypes.create_unicode_buffer(size) + if ( + ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle, buf, size, 0) + > 0 + ): + # The return value of GetFinalPathNameByHandleW uses the + # '\\?\' prefix. + result = buf.value[4:] + ctypes.windll.kernel32.CloseHandle(handle) + return result + + +else: + # Just use the os.path version otherwise. + realpath = os.path.realpath + + +def IsInDir(file, dir): + try: + Path(file).relative_to(dir) + return True + except ValueError: + return False + + +def GetVCSFilenameFromSrcdir(file, srcdir): + if srcdir not in Dumper.srcdirRepoInfo: + # Not in cache, so find it adnd cache it + if os.path.isdir(os.path.join(srcdir, ".hg")): + Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir) + else: + # Unknown VCS or file is not in a repo. + return None + return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file) + + +def GetVCSFilename(file, srcdirs): + """Given a full path to a file, and the top source directory, + look for version control information about this file, and return + a tuple containing + 1) a specially formatted filename that contains the VCS type, + VCS location, relative filename, and revision number, formatted like: + vcs:vcs location:filename:revision + For example: + cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36 + 2) the unmodified root information if it exists""" + (path, filename) = os.path.split(file) + if path == "" or filename == "": + return (file, None) + + fileInfo = None + root = "" + if file in vcsFileInfoCache: + # Already cached this info, use it. + fileInfo = vcsFileInfoCache[file] + else: + for srcdir in srcdirs: + if not IsInDir(file, srcdir): + continue + fileInfo = GetVCSFilenameFromSrcdir(file, srcdir) + if fileInfo: + vcsFileInfoCache[file] = fileInfo + break + + if fileInfo: + file = fileInfo.filename + root = fileInfo.root + + # we want forward slashes on win32 paths + return (file.replace("\\", "/"), root) + + +def validate_install_manifests(install_manifest_args): + args = [] + for arg in install_manifest_args: + bits = arg.split(",") + if len(bits) != 2: + raise ValueError( + "Invalid format for --install-manifest: " "specify manifest,target_dir" + ) + manifest_file, destination = [os.path.abspath(b) for b in bits] + if not os.path.isfile(manifest_file): + raise IOError(errno.ENOENT, "Manifest file not found", manifest_file) + if not os.path.isdir(destination): + raise IOError(errno.ENOENT, "Install directory not found", destination) + try: + manifest = InstallManifest(manifest_file) + except UnreadableInstallManifest: + raise IOError(errno.EINVAL, "Error parsing manifest file", manifest_file) + args.append((manifest, destination)) + return args + + +def make_file_mapping(install_manifests): + file_mapping = {} + for manifest, destination in install_manifests: + destination = os.path.abspath(destination) + reg = FileRegistry() + manifest.populate_registry(reg) + for dst, src in reg: + if hasattr(src, "path"): + # Any paths that get compared to source file names need to go through realpath. + abs_dest = realpath(os.path.join(destination, dst)) + file_mapping[abs_dest] = realpath(src.path) + return file_mapping + + +@memoize +def get_generated_file_s3_path(filename, rel_path, bucket): + """Given a filename, return a path formatted similarly to + GetVCSFilename but representing a file available in an s3 bucket.""" + with open(filename, "rb") as f: + path = get_filename_with_digest(rel_path, f.read()) + return "s3:{bucket}:{path}:".format(bucket=bucket, path=path) + + +def GetPlatformSpecificDumper(**kwargs): + """This function simply returns a instance of a subclass of Dumper + that is appropriate for the current platform.""" + return {"WINNT": Dumper_Win32, "Linux": Dumper_Linux, "Darwin": Dumper_Mac}[ + buildconfig.substs["OS_ARCH"] + ](**kwargs) + + +def SourceIndex(fileStream, outputPath, vcs_root, s3_bucket): + """Takes a list of files, writes info to a data block in a .stream file""" + # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing + # Create the srcsrv data block that indexes the pdb file + result = True + pdbStreamFile = open(outputPath, "w") + pdbStreamFile.write( + "SRCSRV: ini ------------------------------------------------\r\n" + + "VERSION=2\r\n" + + "INDEXVERSION=2\r\n" + + "VERCTRL=http\r\n" + + "SRCSRV: variables ------------------------------------------\r\n" + + "SRCSRVVERCTRL=http\r\n" + + "RUST_GITHUB_TARGET=https://github.com/rust-lang/rust/raw/%var4%/%var3%\r\n" + ) + pdbStreamFile.write("HGSERVER=" + vcs_root + "\r\n") + pdbStreamFile.write("HG_TARGET=%hgserver%/raw-file/%var4%/%var3%\r\n") + + if s3_bucket: + pdbStreamFile.write("S3_BUCKET=" + s3_bucket + "\r\n") + pdbStreamFile.write("S3_TARGET=https://%s3_bucket%.s3.amazonaws.com/%var3%\r\n") + + # Allow each entry to choose its template via "var2". + # Possible values for var2 are: HG_TARGET / S3_TARGET / RUST_GITHUB_TARGET + pdbStreamFile.write("SRCSRVTRG=%fnvar%(%var2%)\r\n") + + pdbStreamFile.write( + "SRCSRV: source files ---------------------------------------\r\n" + ) + pdbStreamFile.write(fileStream) + pdbStreamFile.write( + "SRCSRV: end ------------------------------------------------\r\n\n" + ) + pdbStreamFile.close() + return result + + +class Dumper: + """This class can dump symbols from a file with debug info, and + store the output in a directory structure that is valid for use as + a Breakpad symbol server. Requires a path to a dump_syms binary-- + |dump_syms| and a directory to store symbols in--|symbol_path|. + Optionally takes a list of processor architectures to process from + each debug file--|archs|, the full path to the top source + directory--|srcdir|, for generating relative source file names, + and an option to copy debug info files alongside the dumped + symbol files--|copy_debug|, mostly useful for creating a + Microsoft Symbol Server from the resulting output. + + You don't want to use this directly if you intend to process files. + Instead, call GetPlatformSpecificDumper to get an instance of a + subclass.""" + + srcdirRepoInfo = {} + + def __init__( + self, + dump_syms, + symbol_path, + archs=None, + srcdirs=[], + copy_debug=False, + vcsinfo=False, + srcsrv=False, + s3_bucket=None, + file_mapping=None, + ): + # popen likes absolute paths, at least on windows + self.dump_syms = os.path.abspath(dump_syms) + self.symbol_path = symbol_path + if archs is None: + # makes the loop logic simpler + self.archs = [""] + else: + self.archs = ["-a %s" % a for a in archs.split()] + # Any paths that get compared to source file names need to go through realpath. + self.srcdirs = [realpath(s) for s in srcdirs] + self.copy_debug = copy_debug + self.vcsinfo = vcsinfo + self.srcsrv = srcsrv + self.s3_bucket = s3_bucket + self.file_mapping = file_mapping or {} + # Add a static mapping for Rust sources. Since Rust 1.30 official Rust builds map + # source paths to start with "/rust/<sha>/". + rust_sha = buildconfig.substs["RUSTC_COMMIT"] + rust_srcdir = "/rustc/" + rust_sha + self.srcdirs.append(rust_srcdir) + Dumper.srcdirRepoInfo[rust_srcdir] = GitRepoInfo( + rust_srcdir, rust_sha, "https://github.com/rust-lang/rust/" + ) + + # subclasses override this + def ShouldProcess(self, file): + return True + + # This is a no-op except on Win32 + def SourceServerIndexing( + self, debug_file, guid, sourceFileStream, vcs_root, s3_bucket + ): + return "" + + # subclasses override this if they want to support this + def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id): + """This function will copy a library or executable and the file holding the + debug information to |symbol_path|""" + pass + + def Process(self, file_to_process, count_ctors=False): + """Process the given file.""" + if self.ShouldProcess(os.path.abspath(file_to_process)): + self.ProcessFile(file_to_process, count_ctors=count_ctors) + + def ProcessFile(self, file, dsymbundle=None, count_ctors=False): + """Dump symbols from these files into a symbol file, stored + in the proper directory structure in |symbol_path|; processing is performed + asynchronously, and Finish must be called to wait for it complete and cleanup. + All files after the first are fallbacks in case the first file does not process + successfully; if it does, no other files will be touched.""" + print("Beginning work for file: %s" % file, file=sys.stderr) + + # tries to get the vcs root from the .mozconfig first - if it's not set + # the tinderbox vcs path will be assigned further down + vcs_root = os.environ.get("MOZ_SOURCE_REPO") + for arch_num, arch in enumerate(self.archs): + self.ProcessFileWork( + file, arch_num, arch, vcs_root, dsymbundle, count_ctors=count_ctors + ) + + def dump_syms_cmdline(self, file, arch, dsymbundle=None): + """ + Get the commandline used to invoke dump_syms. + """ + # The Mac dumper overrides this. + return [self.dump_syms, "--inlines", file] + + def ProcessFileWork( + self, file, arch_num, arch, vcs_root, dsymbundle=None, count_ctors=False + ): + ctors = 0 + t_start = time.time() + print("Processing file: %s" % file, file=sys.stderr) + + sourceFileStream = "" + code_id, code_file = None, None + try: + cmd = self.dump_syms_cmdline(file, arch, dsymbundle=dsymbundle) + print(" ".join(cmd), file=sys.stderr) + proc = subprocess.Popen( + cmd, + universal_newlines=True, + stdout=subprocess.PIPE, + ) + try: + module_line = next(proc.stdout) + except StopIteration: + module_line = "" + if module_line.startswith("MODULE"): + # MODULE os cpu guid debug_file + (guid, debug_file) = (module_line.split())[3:5] + # strip off .pdb extensions, and append .sym + sym_file = re.sub("\.pdb$", "", debug_file) + ".sym" + # we do want forward slashes here + rel_path = os.path.join(debug_file, guid, sym_file).replace("\\", "/") + full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path)) + try: + os.makedirs(os.path.dirname(full_path)) + except OSError: # already exists + pass + f = open(full_path, "w") + f.write(module_line) + # now process the rest of the output + for line in proc.stdout: + if line.startswith("FILE"): + # FILE index filename + (x, index, filename) = line.rstrip().split(None, 2) + # We want original file paths for the source server. + sourcepath = filename + filename = realpath(filename) + if filename in self.file_mapping: + filename = self.file_mapping[filename] + if self.vcsinfo: + try: + gen_path = Path(filename) + rel_gen_path = gen_path.relative_to( + buildconfig.topobjdir + ) + except ValueError: + gen_path = None + if ( + gen_path + and gen_path.exists() + and gen_path.suffix in GENERATED_SOURCE_EXTS + and self.s3_bucket + ): + filename = get_generated_file_s3_path( + filename, str(rel_gen_path), self.s3_bucket + ) + rootname = "" + else: + (filename, rootname) = GetVCSFilename( + filename, self.srcdirs + ) + # sets vcs_root in case the loop through files were to end + # on an empty rootname + if vcs_root is None: + if rootname: + vcs_root = rootname + # Emit an entry for the file mapping for the srcsrv stream + if filename.startswith("hg:"): + (vcs, repo, source_file, revision) = filename.split(":", 3) + sourceFileStream += sourcepath + "*HG_TARGET*" + source_file + sourceFileStream += "*" + revision + "\r\n" + elif filename.startswith("s3:"): + (vcs, bucket, source_file, nothing) = filename.split(":", 3) + sourceFileStream += sourcepath + "*S3_TARGET*" + sourceFileStream += source_file + "\r\n" + elif filename.startswith("git:github.com/rust-lang/rust:"): + (vcs, repo, source_file, revision) = filename.split(":", 3) + sourceFileStream += sourcepath + "*RUST_GITHUB_TARGET*" + sourceFileStream += source_file + "*" + revision + "\r\n" + f.write("FILE %s %s\n" % (index, filename)) + elif line.startswith("INFO CODE_ID "): + # INFO CODE_ID code_id code_file + # This gives some info we can use to + # store binaries in the symbol store. + bits = line.rstrip().split(None, 3) + if len(bits) == 4: + code_id, code_file = bits[2:] + f.write(line) + else: + if count_ctors and line.startswith("FUNC "): + # Static initializers, as created by clang and gcc + # have symbols that start with "_GLOBAL_sub" + if "_GLOBAL__sub_" in line: + ctors += 1 + # MSVC creates `dynamic initializer for '...'` + # symbols. + elif "`dynamic initializer for '" in line: + ctors += 1 + + # pass through all other lines unchanged + f.write(line) + f.close() + retcode = proc.wait() + if retcode != 0: + raise RuntimeError( + "dump_syms failed with error code %d while processing %s\n" + % (retcode, file) + ) + # we output relative paths so callers can get a list of what + # was generated + print(rel_path) + if self.srcsrv and vcs_root: + # add source server indexing to the pdb file + self.SourceServerIndexing( + debug_file, guid, sourceFileStream, vcs_root, self.s3_bucket + ) + # only copy debug the first time if we have multiple architectures + if self.copy_debug and arch_num == 0: + self.CopyExeAndDebugInfo(file, debug_file, guid, code_file, code_id) + else: + # For some reason, we didn't see the MODULE line as the first + # line of output, this is strictly required so fail irrespective + # of the process' return code. + retcode = proc.wait() + message = [ + "dump_syms failed to produce the expected output", + "file: %s" % file, + "return code: %d" % retcode, + "first line of output: %s" % module_line, + ] + raise RuntimeError("\n----------\n".join(message)) + except Exception as e: + print("Unexpected error: %s" % str(e), file=sys.stderr) + raise + + if dsymbundle: + shutil.rmtree(dsymbundle) + + if count_ctors: + import json + + perfherder_data = { + "framework": {"name": "build_metrics"}, + "suites": [ + { + "name": "compiler_metrics", + "subtests": [ + { + "name": "num_static_constructors", + "value": ctors, + "alertChangeType": "absolute", + "alertThreshold": 3, + } + ], + } + ], + } + perfherder_extra_options = os.environ.get("PERFHERDER_EXTRA_OPTIONS", "") + for opt in perfherder_extra_options.split(): + for suite in perfherder_data["suites"]: + if opt not in suite.get("extraOptions", []): + suite.setdefault("extraOptions", []).append(opt) + + if "asan" not in perfherder_extra_options.lower(): + print( + "PERFHERDER_DATA: %s" % json.dumps(perfherder_data), file=sys.stderr + ) + + elapsed = time.time() - t_start + print("Finished processing %s in %.2fs" % (file, elapsed), file=sys.stderr) + + +# Platform-specific subclasses. For the most part, these just have +# logic to determine what files to extract symbols from. + + +def locate_pdb(path): + """Given a path to a binary, attempt to locate the matching pdb file with simple heuristics: + * Look for a pdb file with the same base name next to the binary + * Look for a pdb file with the same base name in the cwd + + Returns the path to the pdb file if it exists, or None if it could not be located. + """ + path, ext = os.path.splitext(path) + pdb = path + ".pdb" + if os.path.isfile(pdb): + return pdb + # If there's no pdb next to the file, see if there's a pdb with the same root name + # in the cwd. We build some binaries directly into dist/bin, but put the pdb files + # in the relative objdir, which is the cwd when running this script. + base = os.path.basename(pdb) + pdb = os.path.join(os.getcwd(), base) + if os.path.isfile(pdb): + return pdb + return None + + +class Dumper_Win32(Dumper): + fixedFilenameCaseCache = {} + + def ShouldProcess(self, file): + """This function will allow processing of exe or dll files that have pdb + files with the same base name next to them.""" + if file.endswith(".exe") or file.endswith(".dll"): + if locate_pdb(file) is not None: + return True + return False + + def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id): + """This function will copy the executable or dll and pdb files to |symbol_path|""" + pdb_file = locate_pdb(file) + + rel_path = os.path.join(debug_file, guid, debug_file).replace("\\", "/") + full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path)) + shutil.copyfile(pdb_file, full_path) + print(rel_path) + + # Copy the binary file as well + if code_file and code_id: + full_code_path = os.path.join(os.path.dirname(file), code_file) + if os.path.exists(full_code_path): + rel_path = os.path.join(code_file, code_id, code_file).replace( + "\\", "/" + ) + full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path)) + try: + os.makedirs(os.path.dirname(full_path)) + except OSError as e: + if e.errno != errno.EEXIST: + raise + shutil.copyfile(full_code_path, full_path) + print(rel_path) + + def SourceServerIndexing( + self, debug_file, guid, sourceFileStream, vcs_root, s3_bucket + ): + # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing + streamFilename = debug_file + ".stream" + stream_output_path = os.path.abspath(streamFilename) + # Call SourceIndex to create the .stream file + result = SourceIndex(sourceFileStream, stream_output_path, vcs_root, s3_bucket) + if self.copy_debug: + pdbstr = buildconfig.substs["PDBSTR"] + wine = buildconfig.substs.get("WINE") + if wine: + cmd = [wine, pdbstr] + else: + cmd = [pdbstr] + subprocess.call( + cmd + + [ + "-w", + "-p:" + os.path.basename(debug_file), + "-i:" + os.path.basename(streamFilename), + "-s:srcsrv", + ], + cwd=os.path.dirname(stream_output_path), + ) + # clean up all the .stream files when done + os.remove(stream_output_path) + return result + + +class Dumper_Linux(Dumper): + objcopy = os.environ["OBJCOPY"] if "OBJCOPY" in os.environ else "objcopy" + + def ShouldProcess(self, file): + """This function will allow processing of files that are + executable, or end with the .so extension, and additionally + file(1) reports as being ELF files. It expects to find the file + command in PATH.""" + if file.endswith(".so") or os.access(file, os.X_OK): + return executables.get_type(file) == executables.ELF + return False + + def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id): + # We want to strip out the debug info, and add a + # .gnu_debuglink section to the object, so the debugger can + # actually load our debug info later. + # In some odd cases, the object might already have an irrelevant + # .gnu_debuglink section, and objcopy doesn't want to add one in + # such cases, so we make it remove it any existing one first. + file_dbg = file + ".dbg" + if ( + subprocess.call([self.objcopy, "--only-keep-debug", file, file_dbg]) == 0 + and subprocess.call( + [ + self.objcopy, + "--remove-section", + ".gnu_debuglink", + "--add-gnu-debuglink=%s" % file_dbg, + file, + ] + ) + == 0 + ): + rel_path = os.path.join(debug_file, guid, debug_file + ".dbg") + full_path = os.path.normpath(os.path.join(self.symbol_path, rel_path)) + shutil.move(file_dbg, full_path) + print(rel_path) + else: + if os.path.isfile(file_dbg): + os.unlink(file_dbg) + + +class Dumper_Solaris(Dumper): + def ShouldProcess(self, file): + """This function will allow processing of files that are + executable, or end with the .so extension, and additionally + file(1) reports as being ELF files. It expects to find the file + command in PATH.""" + if file.endswith(".so") or os.access(file, os.X_OK): + return executables.get_type(file) == executables.ELF + return False + + +class Dumper_Mac(Dumper): + def ShouldProcess(self, file): + """This function will allow processing of files that are + executable, or end with the .dylib extension, and additionally + file(1) reports as being Mach-O files. It expects to find the file + command in PATH.""" + if file.endswith(".dylib") or os.access(file, os.X_OK): + return executables.get_type(file) == executables.MACHO + return False + + def ProcessFile(self, file, count_ctors=False): + print("Starting Mac pre-processing on file: %s" % file, file=sys.stderr) + dsymbundle = self.GenerateDSYM(file) + if dsymbundle: + # kick off new jobs per-arch with our new list of files + Dumper.ProcessFile( + self, file, dsymbundle=dsymbundle, count_ctors=count_ctors + ) + + def dump_syms_cmdline(self, file, arch, dsymbundle=None): + """ + Get the commandline used to invoke dump_syms. + """ + # dump_syms wants the path to the original binary and the .dSYM + # in order to dump all the symbols. + if dsymbundle: + # This is the .dSYM bundle. + return ( + [self.dump_syms] + + arch.split() + + ["--inlines", "-j", "2", dsymbundle, file] + ) + return Dumper.dump_syms_cmdline(self, file, arch) + + def GenerateDSYM(self, file): + """dump_syms on Mac needs to be run on a dSYM bundle produced + by dsymutil(1), so run dsymutil here and pass the bundle name + down to the superclass method instead.""" + t_start = time.time() + print("Running Mac pre-processing on file: %s" % (file,), file=sys.stderr) + + dsymbundle = file + ".dSYM" + if os.path.exists(dsymbundle): + shutil.rmtree(dsymbundle) + dsymutil = buildconfig.substs["DSYMUTIL"] + # dsymutil takes --arch=foo instead of -a foo like everything else + cmd = ( + [dsymutil] + [a.replace("-a ", "--arch=") for a in self.archs if a] + [file] + ) + print(" ".join(cmd), file=sys.stderr) + + dsymutil_proc = subprocess.Popen( + cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + dsymout, dsymerr = dsymutil_proc.communicate() + if dsymutil_proc.returncode != 0: + raise RuntimeError("Error running dsymutil: %s" % dsymerr) + + # Regular dsymutil won't produce a .dSYM for files without symbols. + if not os.path.exists(dsymbundle): + print("No symbols found in file: %s" % (file,), file=sys.stderr) + return False + + # llvm-dsymutil will produce a .dSYM for files without symbols or + # debug information, but only sometimes will it warn you about this. + # We don't want to run dump_syms on such bundles, because asserts + # will fire in debug mode and who knows what will happen in release. + # + # So we check for the error message and bail if it appears. If it + # doesn't, we carefully check the bundled DWARF to see if dump_syms + # will be OK with it. + if "warning: no debug symbols in" in dsymerr: + print(dsymerr, file=sys.stderr) + return False + + contents_dir = os.path.join(dsymbundle, "Contents", "Resources", "DWARF") + if not os.path.exists(contents_dir): + print( + "No DWARF information in .dSYM bundle %s" % (dsymbundle,), + file=sys.stderr, + ) + return False + + files = os.listdir(contents_dir) + if len(files) != 1: + print("Unexpected files in .dSYM bundle %s" % (files,), file=sys.stderr) + return False + + otool_out = subprocess.check_output( + [buildconfig.substs["OTOOL"], "-l", os.path.join(contents_dir, files[0])], + universal_newlines=True, + ) + if "sectname __debug_info" not in otool_out: + print("No symbols in .dSYM bundle %s" % (dsymbundle,), file=sys.stderr) + return False + + elapsed = time.time() - t_start + print("Finished processing %s in %.2fs" % (file, elapsed), file=sys.stderr) + return dsymbundle + + def CopyExeAndDebugInfo(self, file, debug_file, guid, code_file, code_id): + """ProcessFile has already produced a dSYM bundle, so we should just + copy that to the destination directory. However, we'll package it + into a .tar because it's a bundle, so it's a directory. |file| here is + the original filename.""" + dsymbundle = file + ".dSYM" + rel_path = os.path.join(debug_file, guid, os.path.basename(dsymbundle) + ".tar") + full_path = os.path.abspath(os.path.join(self.symbol_path, rel_path)) + success = subprocess.call( + ["tar", "cf", full_path, os.path.basename(dsymbundle)], + cwd=os.path.dirname(dsymbundle), + stdout=open(os.devnull, "w"), + stderr=subprocess.STDOUT, + ) + if success == 0 and os.path.exists(full_path): + print(rel_path) + + +# Entry point if called as a standalone program + + +def main(): + parser = OptionParser( + usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>" + ) + parser.add_option( + "-c", + "--copy", + action="store_true", + dest="copy_debug", + default=False, + help="Copy debug info files into the same directory structure as symbol files", + ) + parser.add_option( + "-a", + "--archs", + action="store", + dest="archs", + help="Run dump_syms -a <arch> for each space separated" + + "cpu architecture in ARCHS (only on OS X)", + ) + parser.add_option( + "-s", + "--srcdir", + action="append", + dest="srcdir", + default=[], + help="Use SRCDIR to determine relative paths to source files", + ) + parser.add_option( + "-v", + "--vcs-info", + action="store_true", + dest="vcsinfo", + help="Try to retrieve VCS info for each FILE listed in the output", + ) + parser.add_option( + "-i", + "--source-index", + action="store_true", + dest="srcsrv", + default=False, + help="Add source index information to debug files, making them suitable" + + " for use in a source server.", + ) + parser.add_option( + "--install-manifest", + action="append", + dest="install_manifests", + default=[], + help="""Use this install manifest to map filenames back +to canonical locations in the source repository. Specify +<install manifest filename>,<install destination> as a comma-separated pair.""", + ) + parser.add_option( + "--count-ctors", + action="store_true", + dest="count_ctors", + default=False, + help="Count static initializers", + ) + (options, args) = parser.parse_args() + + # check to see if the pdbstr.exe exists + if options.srcsrv: + if "PDBSTR" not in buildconfig.substs: + print("pdbstr was not found by configure.\n", file=sys.stderr) + sys.exit(1) + + if len(args) < 3: + parser.error("not enough arguments") + exit(1) + + try: + manifests = validate_install_manifests(options.install_manifests) + except (IOError, ValueError) as e: + parser.error(str(e)) + exit(1) + file_mapping = make_file_mapping(manifests) + _, bucket = get_s3_region_and_bucket() + dumper = GetPlatformSpecificDumper( + dump_syms=args[0], + symbol_path=args[1], + copy_debug=options.copy_debug, + archs=options.archs, + srcdirs=options.srcdir, + vcsinfo=options.vcsinfo, + srcsrv=options.srcsrv, + s3_bucket=bucket, + file_mapping=file_mapping, + ) + + dumper.Process(args[2], options.count_ctors) + + +# run main if run directly +if __name__ == "__main__": + main() diff --git a/toolkit/crashreporter/tools/unit-symbolstore.py b/toolkit/crashreporter/tools/unit-symbolstore.py new file mode 100755 index 0000000000..ed59b012bb --- /dev/null +++ b/toolkit/crashreporter/tools/unit-symbolstore.py @@ -0,0 +1,613 @@ +#!/usr/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import shutil +import struct +import subprocess +import sys +import tempfile +import unittest +from unittest import mock +from unittest.mock import patch + +import buildconfig +import mozpack.path as mozpath +import mozunit +import symbolstore +from mozpack.manifests import InstallManifest +from symbolstore import realpath + +# Some simple functions to mock out files that the platform-specific dumpers will accept. +# dump_syms itself will not be run (we mock that call out), but we can't override +# the ShouldProcessFile method since we actually want to test that. + + +def write_elf(filename): + open(filename, "wb").write( + struct.pack("<7B45x", 0x7F, ord("E"), ord("L"), ord("F"), 1, 1, 1) + ) + + +def write_macho(filename): + open(filename, "wb").write(struct.pack("<I28x", 0xFEEDFACE)) + + +def write_dll(filename): + open(filename, "w").write("aaa") + # write out a fake PDB too + open(os.path.splitext(filename)[0] + ".pdb", "w").write("aaa") + + +def target_platform(): + return buildconfig.substs["OS_TARGET"] + + +def host_platform(): + return buildconfig.substs["HOST_OS_ARCH"] + + +writer = { + "WINNT": write_dll, + "Linux": write_elf, + "Sunos5": write_elf, + "Darwin": write_macho, +}[target_platform()] +extension = {"WINNT": ".dll", "Linux": ".so", "Sunos5": ".so", "Darwin": ".dylib"}[ + target_platform() +] +file_output = [ + {"WINNT": "bogus data", "Linux": "ELF executable", "Darwin": "Mach-O executable"}[ + target_platform() + ] +] + + +def add_extension(files): + return [f + extension for f in files] + + +class HelperMixin(object): + """ + Test that passing filenames to exclude from processing works. + """ + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + if not self.test_dir.endswith(os.sep): + self.test_dir += os.sep + symbolstore.srcdirRepoInfo = {} + symbolstore.vcsFileInfoCache = {} + + # Remove environment variables that can influence tests. + for e in ("MOZ_SOURCE_CHANGESET", "MOZ_SOURCE_REPO"): + try: + del os.environ[e] + except KeyError: + pass + + def tearDown(self): + shutil.rmtree(self.test_dir) + symbolstore.srcdirRepoInfo = {} + symbolstore.vcsFileInfoCache = {} + + def make_dirs(self, f): + d = os.path.dirname(f) + if d and not os.path.exists(d): + os.makedirs(d) + + def make_file(self, path): + self.make_dirs(path) + with open(path, "wb"): + pass + + def add_test_files(self, files): + for f in files: + f = os.path.join(self.test_dir, f) + self.make_dirs(f) + writer(f) + + +def mock_dump_syms(module_id, filename, extra=[]): + return ( + ["MODULE os x86 %s %s" % (module_id, filename)] + + extra + + ["FILE 0 foo.c", "PUBLIC xyz 123"] + ) + + +class TestCopyDebug(HelperMixin, unittest.TestCase): + def setUp(self): + HelperMixin.setUp(self) + self.symbol_dir = tempfile.mkdtemp() + self.mock_call = patch("subprocess.call").start() + self.stdouts = [] + self.mock_popen = patch("subprocess.Popen").start() + stdout_iter = self.next_mock_stdout() + + def next_popen(*args, **kwargs): + m = mock.MagicMock() + # Get the iterators over whatever output was provided. + stdout_ = next(stdout_iter) + # Eager evaluation for communicate(), below. + stdout_ = list(stdout_) + # stdout is really an iterator, so back to iterators we go. + m.stdout = iter(stdout_) + m.wait.return_value = 0 + # communicate returns the full text of stdout and stderr. + m.communicate.return_value = ("\n".join(stdout_), "") + return m + + self.mock_popen.side_effect = next_popen + shutil.rmtree = patch("shutil.rmtree").start() + + def tearDown(self): + HelperMixin.tearDown(self) + patch.stopall() + shutil.rmtree(self.symbol_dir) + + def next_mock_stdout(self): + if not self.stdouts: + yield iter([]) + for s in self.stdouts: + yield iter(s) + + def test_copy_debug_universal(self): + """ + Test that dumping symbols for multiple architectures only copies debug symbols once + per file. + """ + copied = [] + + def mock_copy_debug(filename, debug_file, guid, code_file, code_id): + copied.append( + filename[len(self.symbol_dir) :] + if filename.startswith(self.symbol_dir) + else filename + ) + + self.add_test_files(add_extension(["foo"])) + # Windows doesn't call file(1) to figure out if the file should be processed. + if target_platform() != "WINNT": + self.stdouts.append(file_output) + self.stdouts.append(mock_dump_syms("X" * 33, add_extension(["foo"])[0])) + self.stdouts.append(mock_dump_syms("Y" * 33, add_extension(["foo"])[0])) + + def mock_dsymutil(args, **kwargs): + filename = args[-1] + os.makedirs(filename + ".dSYM") + return 0 + + self.mock_call.side_effect = mock_dsymutil + d = symbolstore.GetPlatformSpecificDumper( + dump_syms="dump_syms", + symbol_path=self.symbol_dir, + copy_debug=True, + archs="abc xyz", + ) + d.CopyDebug = mock_copy_debug + d.Process(os.path.join(self.test_dir, add_extension(["foo"])[0])) + self.assertEqual(1, len(copied)) + + def test_copy_debug_copies_binaries(self): + """ + Test that CopyDebug copies binaries as well on Windows. + """ + test_file = os.path.join(self.test_dir, "foo.dll") + write_dll(test_file) + code_file = "foo.dll" + code_id = "abc123" + self.stdouts.append( + mock_dump_syms( + "X" * 33, "foo.pdb", ["INFO CODE_ID %s %s" % (code_id, code_file)] + ) + ) + + def mock_compress(args, **kwargs): + filename = args[-1] + open(filename, "w").write("stuff") + return 0 + + self.mock_call.side_effect = mock_compress + d = symbolstore.Dumper_Win32( + dump_syms="dump_syms", symbol_path=self.symbol_dir, copy_debug=True + ) + d.Process(test_file) + self.assertTrue( + os.path.isfile(os.path.join(self.symbol_dir, code_file, code_id, code_file)) + ) + + +class TestGetVCSFilename(HelperMixin, unittest.TestCase): + def setUp(self): + HelperMixin.setUp(self) + + def tearDown(self): + HelperMixin.tearDown(self) + + @patch("subprocess.Popen") + def testVCSFilenameHg(self, mock_Popen): + # mock calls to `hg parent` and `hg showconfig paths.default` + mock_communicate = mock_Popen.return_value.communicate + mock_communicate.side_effect = [ + ("abcd1234", ""), + ("http://example.com/repo", ""), + ] + os.mkdir(os.path.join(self.test_dir, ".hg")) + filename = os.path.join(self.test_dir, "foo.c") + self.assertEqual( + "hg:example.com/repo:foo.c:abcd1234", + symbolstore.GetVCSFilename(filename, [self.test_dir])[0], + ) + + @patch("subprocess.Popen") + def testVCSFilenameHgMultiple(self, mock_Popen): + # mock calls to `hg parent` and `hg showconfig paths.default` + mock_communicate = mock_Popen.return_value.communicate + mock_communicate.side_effect = [ + ("abcd1234", ""), + ("http://example.com/repo", ""), + ("0987ffff", ""), + ("http://example.com/other", ""), + ] + srcdir1 = os.path.join(self.test_dir, "one") + srcdir2 = os.path.join(self.test_dir, "two") + os.makedirs(os.path.join(srcdir1, ".hg")) + os.makedirs(os.path.join(srcdir2, ".hg")) + filename1 = os.path.join(srcdir1, "foo.c") + filename2 = os.path.join(srcdir2, "bar.c") + self.assertEqual( + "hg:example.com/repo:foo.c:abcd1234", + symbolstore.GetVCSFilename(filename1, [srcdir1, srcdir2])[0], + ) + self.assertEqual( + "hg:example.com/other:bar.c:0987ffff", + symbolstore.GetVCSFilename(filename2, [srcdir1, srcdir2])[0], + ) + + def testVCSFilenameEnv(self): + # repo URL and changeset read from environment variables if defined. + os.environ["MOZ_SOURCE_REPO"] = "https://somewhere.com/repo" + os.environ["MOZ_SOURCE_CHANGESET"] = "abcdef0123456" + os.mkdir(os.path.join(self.test_dir, ".hg")) + filename = os.path.join(self.test_dir, "foo.c") + self.assertEqual( + "hg:somewhere.com/repo:foo.c:abcdef0123456", + symbolstore.GetVCSFilename(filename, [self.test_dir])[0], + ) + + +# SHA-512 of a zero-byte file +EMPTY_SHA512 = ( + "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff" +) +EMPTY_SHA512 += "8318d2877eec2f63b931bd47417a81a538327af927da3e" + + +class TestGeneratedFilePath(HelperMixin, unittest.TestCase): + def setUp(self): + HelperMixin.setUp(self) + + def tearDown(self): + HelperMixin.tearDown(self) + + def test_generated_file_path(self): + # Make an empty generated file + g = os.path.join(self.test_dir, "generated") + rel_path = "a/b/generated" + with open(g, "wb"): + pass + expected = "s3:bucket:{}/{}:".format(EMPTY_SHA512, rel_path) + self.assertEqual( + expected, symbolstore.get_generated_file_s3_path(g, rel_path, "bucket") + ) + + +if host_platform() == "WINNT": + + class TestRealpath(HelperMixin, unittest.TestCase): + def test_realpath(self): + # self.test_dir is going to be 8.3 paths... + junk = os.path.join(self.test_dir, "x") + with open(junk, "w") as o: + o.write("x") + fixed_dir = os.path.dirname(realpath(junk)) + files = [ + "one\\two.c", + "three\\Four.d", + "Five\\Six.e", + "seven\\Eight\\nine.F", + ] + for rel_path in files: + full_path = os.path.normpath(os.path.join(self.test_dir, rel_path)) + self.make_dirs(full_path) + with open(full_path, "w") as o: + o.write("x") + fixed_path = realpath(full_path.lower()) + fixed_path = os.path.relpath(fixed_path, fixed_dir) + self.assertEqual(rel_path, fixed_path) + + +if target_platform() == "WINNT": + + class TestSourceServer(HelperMixin, unittest.TestCase): + @patch("subprocess.call") + @patch("subprocess.Popen") + @patch.dict("buildconfig.substs._dict", {"PDBSTR": "pdbstr"}) + def test_HGSERVER(self, mock_Popen, mock_call): + """ + Test that HGSERVER gets set correctly in the source server index. + """ + symbolpath = os.path.join(self.test_dir, "symbols") + os.makedirs(symbolpath) + srcdir = os.path.join(self.test_dir, "srcdir") + os.makedirs(os.path.join(srcdir, ".hg")) + sourcefile = os.path.join(srcdir, "foo.c") + test_files = add_extension(["foo"]) + self.add_test_files(test_files) + # mock calls to `dump_syms`, `hg parent` and + # `hg showconfig paths.default` + mock_Popen.return_value.stdout = iter( + [ + "MODULE os x86 %s %s" % ("X" * 33, test_files[0]), + "FILE 0 %s" % sourcefile, + "PUBLIC xyz 123", + ] + ) + mock_Popen.return_value.wait.return_value = 0 + mock_communicate = mock_Popen.return_value.communicate + mock_communicate.side_effect = [ + ("abcd1234", ""), + ("http://example.com/repo", ""), + ] + # And mock the call to pdbstr to capture the srcsrv stream data. + global srcsrv_stream + srcsrv_stream = None + + def mock_pdbstr(args, cwd="", **kwargs): + for arg in args: + if arg.startswith("-i:"): + global srcsrv_stream + srcsrv_stream = open(os.path.join(cwd, arg[3:]), "r").read() + return 0 + + mock_call.side_effect = mock_pdbstr + d = symbolstore.GetPlatformSpecificDumper( + dump_syms="dump_syms", + symbol_path=symbolpath, + srcdirs=[srcdir], + vcsinfo=True, + srcsrv=True, + copy_debug=True, + ) + # stub out CopyDebug + d.CopyDebug = lambda *args: True + d.Process(os.path.join(self.test_dir, test_files[0])) + self.assertNotEqual(srcsrv_stream, None) + hgserver = [ + x.rstrip() + for x in srcsrv_stream.splitlines() + if x.startswith("HGSERVER=") + ] + self.assertEqual(len(hgserver), 1) + self.assertEqual(hgserver[0].split("=")[1], "http://example.com/repo") + + +class TestInstallManifest(HelperMixin, unittest.TestCase): + def setUp(self): + HelperMixin.setUp(self) + self.srcdir = os.path.join(self.test_dir, "src") + os.mkdir(self.srcdir) + self.objdir = os.path.join(self.test_dir, "obj") + os.mkdir(self.objdir) + self.manifest = InstallManifest() + self.canonical_mapping = {} + for s in ["src1", "src2"]: + srcfile = realpath(os.path.join(self.srcdir, s)) + objfile = realpath(os.path.join(self.objdir, s)) + self.canonical_mapping[objfile] = srcfile + self.manifest.add_copy(srcfile, s) + self.manifest_file = os.path.join(self.test_dir, "install-manifest") + self.manifest.write(self.manifest_file) + + def testMakeFileMapping(self): + """ + Test that valid arguments are validated. + """ + arg = "%s,%s" % (self.manifest_file, self.objdir) + ret = symbolstore.validate_install_manifests([arg]) + self.assertEqual(len(ret), 1) + manifest, dest = ret[0] + self.assertTrue(isinstance(manifest, InstallManifest)) + self.assertEqual(dest, self.objdir) + + file_mapping = symbolstore.make_file_mapping(ret) + for obj, src in self.canonical_mapping.items(): + self.assertTrue(obj in file_mapping) + self.assertEqual(file_mapping[obj], src) + + def testMissingFiles(self): + """ + Test that missing manifest files or install directories give errors. + """ + missing_manifest = os.path.join(self.test_dir, "missing-manifest") + arg = "%s,%s" % (missing_manifest, self.objdir) + with self.assertRaises(IOError) as e: + symbolstore.validate_install_manifests([arg]) + self.assertEqual(e.filename, missing_manifest) + + missing_install_dir = os.path.join(self.test_dir, "missing-dir") + arg = "%s,%s" % (self.manifest_file, missing_install_dir) + with self.assertRaises(IOError) as e: + symbolstore.validate_install_manifests([arg]) + self.assertEqual(e.filename, missing_install_dir) + + def testBadManifest(self): + """ + Test that a bad manifest file give errors. + """ + bad_manifest = os.path.join(self.test_dir, "bad-manifest") + with open(bad_manifest, "w") as f: + f.write("junk\n") + arg = "%s,%s" % (bad_manifest, self.objdir) + with self.assertRaises(IOError) as e: + symbolstore.validate_install_manifests([arg]) + self.assertEqual(e.filename, bad_manifest) + + def testBadArgument(self): + """ + Test that a bad manifest argument gives an error. + """ + with self.assertRaises(ValueError): + symbolstore.validate_install_manifests(["foo"]) + + +class TestFileMapping(HelperMixin, unittest.TestCase): + def setUp(self): + HelperMixin.setUp(self) + self.srcdir = os.path.join(self.test_dir, "src") + os.mkdir(self.srcdir) + self.objdir = os.path.join(self.test_dir, "obj") + os.mkdir(self.objdir) + self.symboldir = os.path.join(self.test_dir, "symbols") + os.mkdir(self.symboldir) + + @patch("subprocess.Popen") + def testFileMapping(self, mock_Popen): + files = [("a/b", "mozilla/b"), ("c/d", "foo/d")] + if os.sep != "/": + files = [[f.replace("/", os.sep) for f in x] for x in files] + file_mapping = {} + dumped_files = [] + expected_files = [] + self.make_dirs(os.path.join(self.objdir, "x", "y")) + for s, o in files: + srcfile = os.path.join(self.srcdir, s) + self.make_file(srcfile) + expected_files.append(realpath(srcfile)) + objfile = os.path.join(self.objdir, o) + self.make_file(objfile) + file_mapping[realpath(objfile)] = realpath(srcfile) + dumped_files.append(os.path.join(self.objdir, "x", "y", "..", "..", o)) + # mock the dump_syms output + file_id = ("X" * 33, "somefile") + + def mk_output(files): + return iter( + ["MODULE os x86 %s %s\n" % file_id] + + ["FILE %d %s\n" % (i, s) for i, s in enumerate(files)] + + ["PUBLIC xyz 123\n"] + ) + + mock_Popen.return_value.stdout = mk_output(dumped_files) + mock_Popen.return_value.wait.return_value = 0 + + d = symbolstore.Dumper("dump_syms", self.symboldir, file_mapping=file_mapping) + f = os.path.join(self.objdir, "somefile") + open(f, "w").write("blah") + d.Process(f) + expected_output = "".join(mk_output(expected_files)) + symbol_file = os.path.join( + self.symboldir, file_id[1], file_id[0], file_id[1] + ".sym" + ) + self.assertEqual(open(symbol_file, "r").read(), expected_output) + + +class TestFunctional(HelperMixin, unittest.TestCase): + """Functional tests of symbolstore.py, calling it with a real + dump_syms binary and passing in a real binary to dump symbols from. + + Since the rest of the tests in this file mock almost everything and + don't use the actual process pool like buildsymbols does, this tests + that the way symbolstore.py gets called in buildsymbols works. + """ + + def setUp(self): + HelperMixin.setUp(self) + self.skip_test = False + if buildconfig.substs["MOZ_BUILD_APP"] != "browser": + self.skip_test = True + if buildconfig.substs.get("ENABLE_STRIP"): + self.skip_test = True + # Bug 1608146. + if buildconfig.substs.get("MOZ_CODE_COVERAGE"): + self.skip_test = True + self.topsrcdir = buildconfig.topsrcdir + self.script_path = os.path.join( + self.topsrcdir, "toolkit", "crashreporter", "tools", "symbolstore.py" + ) + self.dump_syms = buildconfig.substs.get("DUMP_SYMS") + if not self.dump_syms: + self.skip_test = True + + if target_platform() == "WINNT": + self.target_bin = os.path.join( + buildconfig.topobjdir, "dist", "bin", "firefox.exe" + ) + else: + self.target_bin = os.path.join( + buildconfig.topobjdir, "dist", "bin", "firefox-bin" + ) + + def tearDown(self): + HelperMixin.tearDown(self) + + def testSymbolstore(self): + if self.skip_test: + raise unittest.SkipTest("Skipping test in non-Firefox product") + dist_include_manifest = os.path.join( + buildconfig.topobjdir, "_build_manifests/install/dist_include" + ) + dist_include = os.path.join(buildconfig.topobjdir, "dist/include") + browser_app = os.path.join(buildconfig.topobjdir, "browser/app") + output = subprocess.check_output( + [ + sys.executable, + self.script_path, + "--vcs-info", + "-s", + self.topsrcdir, + "--install-manifest=%s,%s" % (dist_include_manifest, dist_include), + self.dump_syms, + self.test_dir, + self.target_bin, + ], + universal_newlines=True, + stderr=None, + cwd=browser_app, + ) + lines = [l for l in output.splitlines() if l.strip()] + self.assertEqual( + 1, + len(lines), + "should have one filename in the output; got %s" % repr(output), + ) + symbol_file = os.path.join(self.test_dir, lines[0]) + self.assertTrue(os.path.isfile(symbol_file)) + symlines = open(symbol_file, "r").readlines() + file_lines = [l for l in symlines if l.startswith("FILE")] + + def check_hg_path(lines, match): + match_lines = [l for l in file_lines if match in l] + self.assertTrue( + len(match_lines) >= 1, "should have a FILE line for " + match + ) + # Skip this check for local git repositories. + if not os.path.isdir(mozpath.join(self.topsrcdir, ".hg")): + return + for line in match_lines: + filename = line.split(None, 2)[2] + self.assertEqual("hg:", filename[:3]) + + # Check that nsBrowserApp.cpp is listed as a FILE line, and that + # it was properly mapped to the source repo. + check_hg_path(file_lines, "nsBrowserApp.cpp") + # Also check Sprintf.h to verify that files from dist/include + # are properly mapped. + check_hg_path(file_lines, "mfbt/Sprintf.h") + + +if __name__ == "__main__": + mozunit.main() diff --git a/toolkit/crashreporter/tools/upload_symbols.py b/toolkit/crashreporter/tools/upload_symbols.py new file mode 100644 index 0000000000..eff1f43b2b --- /dev/null +++ b/toolkit/crashreporter/tools/upload_symbols.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# This script uploads a symbol archive file from a path or URL passed on the commandline +# to the symbol server at https://symbols.mozilla.org/ . +# +# Using this script requires you to have generated an authentication +# token in the symbol server web interface. You must store the token in a Taskcluster +# secret as the JSON blob `{"token": "<token>"}` and set the `SYMBOL_SECRET` +# environment variable to the name of the Taskcluster secret. Alternately, +# you can put the token in a file and set `SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE` +# environment variable to the path to the file. + +import argparse +import logging +import os +import sys +import tempfile + +import redo +import requests + +log = logging.getLogger("upload-symbols") +log.setLevel(logging.INFO) + +DEFAULT_URL = "https://symbols.mozilla.org/upload/" +MAX_RETRIES = 7 +MAX_ZIP_SIZE = 500000000 # 500 MB + + +def print_error(r): + if r.status_code < 400: + log.error("Error: bad auth token? ({0}: {1})".format(r.status_code, r.reason)) + else: + log.error("Error: got HTTP response {0}: {1}".format(r.status_code, r.reason)) + + log.error( + "Response body:\n{sep}\n{body}\n{sep}\n".format(sep="=" * 20, body=r.text) + ) + + +def get_taskcluster_secret(secret_name): + secrets_url = "http://taskcluster/secrets/v1/secret/{}".format(secret_name) + log.info( + 'Using symbol upload token from the secrets service: "{}"'.format(secrets_url) + ) + res = requests.get(secrets_url) + res.raise_for_status() + secret = res.json() + auth_token = secret["secret"]["token"] + + return auth_token + + +def main(): + logging.basicConfig() + parser = argparse.ArgumentParser( + description="Upload symbols in ZIP using token from Taskcluster secrets service." + ) + parser.add_argument( + "archive", help="Symbols archive file - URL or path to local file" + ) + parser.add_argument( + "--ignore-missing", help="No error on missing files", action="store_true" + ) + args = parser.parse_args() + + def check_file_exists(url): + for i, _ in enumerate(redo.retrier(attempts=MAX_RETRIES), start=1): + try: + resp = requests.head(url, allow_redirects=True) + return resp.status_code == requests.codes.ok + except requests.exceptions.RequestException as e: + log.error("Error: {0}".format(e)) + log.info("Retrying...") + return False + + if args.archive.startswith("http"): + is_existing = check_file_exists(args.archive) + else: + is_existing = os.path.isfile(args.archive) + + if not is_existing: + if args.ignore_missing: + log.info('Archive file "{0}" does not exist!'.format(args.archive)) + return 0 + else: + log.error('Error: archive file "{0}" does not exist!'.format(args.archive)) + return 1 + + try: + tmpdir = None + if args.archive.endswith(".tar.zst"): + tmpdir = tempfile.TemporaryDirectory() + zip_paths = convert_zst_archive(args.archive, tmpdir) + else: + zip_paths = [args.archive] + + for zip_path in zip_paths: + result = upload_symbols(zip_path) + if result: + return result + return 0 + finally: + if tmpdir: + tmpdir.cleanup() + + +def convert_zst_archive(zst_archive, tmpdir): + """ + Convert a .tar.zst file to a zip file + + Our build tasks output .tar.zst files, but the tecken server only allows + .zip files to be uploaded. + + :param zst_archive: path or URL to a .tar.zst source file + :param tmpdir: TemporaryDirectory to store the output zip file in + :returns: path to output zip file + """ + import concurrent.futures + import gzip + import itertools + import tarfile + + import zstandard + from mozpack.files import File + from mozpack.mozjar import Deflater, JarWriter + + def iter_files_from_tar(reader): + ctx = zstandard.ZstdDecompressor() + uncompressed = ctx.stream_reader(reader) + with tarfile.open(mode="r|", fileobj=uncompressed, bufsize=1024 * 1024) as tar: + while True: + info = tar.next() + if info is None: + break + data = tar.extractfile(info).read() + yield (info.name, data) + + def prepare_from(archive, tmpdir): + if archive.startswith("http"): + resp = requests.get(archive, allow_redirects=True, stream=True) + resp.raise_for_status() + reader = resp.raw + # Work around taskcluster generic-worker possibly gzipping the tar.zst. + if resp.headers.get("Content-Encoding") == "gzip": + reader = gzip.GzipFile(fileobj=reader) + else: + reader = open(archive, "rb") + + def handle_file(data): + name, data = data + log.info("Compressing %s", name) + path = os.path.join(tmpdir, name.lstrip("/")) + if name.endswith(".dbg"): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as fh: + with gzip.GzipFile(fileobj=fh, mode="wb", compresslevel=5) as c: + c.write(data) + return (name + ".gz", File(path)) + elif name.endswith(".dSYM.tar"): + import bz2 + + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as fh: + fh.write(bz2.compress(data)) + return (name + ".bz2", File(path)) + elif name.endswith((".pdb", ".exe", ".dll")): + import subprocess + + makecab = os.environ.get("MAKECAB", "makecab") + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as fh: + fh.write(data) + + subprocess.check_call( + [makecab, "-D", "CompressionType=MSZIP", path, path + "_"], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + ) + + return (name[:-1] + "_", File(path + "_")) + else: + deflater = Deflater(compress_level=5) + deflater.write(data) + return (name, deflater) + + with concurrent.futures.ThreadPoolExecutor( + max_workers=os.cpu_count() + ) as executor: + yield from executor.map(handle_file, iter_files_from_tar(reader)) + + reader.close() + + zip_paths_iter = iter( + os.path.join(tmpdir.name, "symbols{}.zip".format("" if i == 1 else i)) + for i in itertools.count(start=1) + ) + zip_path = next(zip_paths_iter) + log.info('Preparing symbol archive "{0}" from "{1}"'.format(zip_path, zst_archive)) + for i, _ in enumerate(redo.retrier(attempts=MAX_RETRIES), start=1): + zip_paths = [] + jar = None + try: + for name, data in prepare_from(zst_archive, tmpdir.name): + if not jar: + jar = JarWriter(zip_path) + zip_paths.append(zip_path) + size = 0 + log.info("Adding %s", name) + jar.add(name, data, compress=not isinstance(data, File)) + size += data.size() if isinstance(data, File) else data.compressed_size + if size > MAX_ZIP_SIZE: + jar.finish() + jar = None + zip_path = next(zip_paths_iter) + log.info('Continuing with symbol archive "{}"'.format(zip_path)) + if jar: + jar.finish() + return zip_paths + except requests.exceptions.RequestException as e: + log.error("Error: {0}".format(e)) + log.info("Retrying...") + + return [] + + +def upload_symbols(zip_path): + """ + Upload symbols to the tecken server + + :param zip_path: path to the zip file to upload + :returns: 0 indicates the upload was successful, non-zero indicates an + error that should be used for the script's exit code + """ + secret_name = os.environ.get("SYMBOL_SECRET") + if secret_name is not None: + auth_token = get_taskcluster_secret(secret_name) + elif "SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE" in os.environ: + token_file = os.environ["SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE"] + + if not os.path.isfile(token_file): + log.error( + 'SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE "{0}" does not exist!'.format( + token_file + ) + ) + return 1 + auth_token = open(token_file, "r").read().strip() + else: + log.error( + "You must set the SYMBOL_SECRET or SOCORRO_SYMBOL_UPLOAD_TOKEN_FILE " + "environment variables!" + ) + return 1 + + # Allow overwriting of the upload url with an environmental variable + if "SOCORRO_SYMBOL_UPLOAD_URL" in os.environ: + url = os.environ["SOCORRO_SYMBOL_UPLOAD_URL"] + else: + url = DEFAULT_URL + + log.info('Uploading symbol file "{0}" to "{1}"'.format(zip_path, url)) + + for i, _ in enumerate(redo.retrier(attempts=MAX_RETRIES), start=1): + log.info("Attempt %d of %d..." % (i, MAX_RETRIES)) + try: + if zip_path.startswith("http"): + zip_arg = {"data": {"url": zip_path}} + else: + zip_arg = {"files": {"symbols.zip": open(zip_path, "rb")}} + r = requests.post( + url, + headers={"Auth-Token": auth_token}, + allow_redirects=False, + # Allow a longer read timeout because uploading by URL means the server + # has to fetch the entire zip file, which can take a while. The load balancer + # in front of symbols.mozilla.org has a 300 second timeout, so we'll use that. + timeout=(300, 300), + **zip_arg + ) + # 408, 429 or any 5XX is likely to be a transient failure. + # Break out for success or other error codes. + if r.ok or (r.status_code < 500 and (r.status_code not in (408, 429))): + break + print_error(r) + except requests.exceptions.RequestException as e: + log.error("Error: {0}".format(e)) + log.info("Retrying...") + else: + log.warning("Maximum retries hit, giving up!") + return 1 + + if r.status_code >= 200 and r.status_code < 300: + log.info("Uploaded successfully!") + return 0 + + print_error(r) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) |