diff options
Diffstat (limited to 'tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py')
-rw-r--r-- | tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py new file mode 100644 index 0000000000..9998dcac27 --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# Any copyright is dedicated to the Public Domain. +# http://creativecommons.org/publicdomain/zero/1.0/ + +import argparse +import concurrent.futures +import datetime +import os +import subprocess +import sys +import traceback +import urllib +import zipfile + +import requests + +if sys.platform == "darwin": + SYSTEM_DIRS = [ + "/usr/lib", + "/System/Library/Frameworks", + "/System/Library/PrivateFrameworks", + "/System/Library/Extensions", + ] +else: + SYSTEM_DIRS = ["/lib", "/usr/lib"] +SYMBOL_SERVER_URL = "https://symbols.mozilla.org/" + + +def should_process(f, platform=sys.platform): + """Determine if a file is a platform binary""" + if platform == "darwin": + """ + The 'file' command can error out. One example is "illegal byte + sequence" on a Japanese language UTF8 text file. So we must wrap the + command in a try/except block to prevent the script from terminating + prematurely when this happens. + """ + try: + filetype = subprocess.check_output(["file", "-Lb", f], text=True) + except subprocess.CalledProcessError: + return False + """Skip kernel extensions""" + if "kext bundle" in filetype: + return False + return filetype.startswith("Mach-O") + else: + return subprocess.check_output(["file", "-Lb", f], text=True).startswith("ELF") + return False + + +def get_archs(filename, platform=sys.platform): + """ + Find the list of architectures present in a Mach-O file, or a single-element + list on non-OS X. + """ + architectures = [] + output = subprocess.check_output(["file", "-Lb", filename], text=True) + for string in output.split(" "): + if string == "arm64e": + architectures.append("arm64e") + elif string == "x86_64_haswell": + architectures.append("x86_64h") + elif string == "x86_64": + architectures.append("x86_64") + elif string == "i386": + architectures.append("i386") + + return architectures + + +def server_has_file(filename): + """ + Send the symbol server a HEAD request to see if it has this symbol file. + """ + try: + r = requests.head( + urllib.parse.urljoin(SYMBOL_SERVER_URL, urllib.parse.quote(filename)) + ) + return r.status_code == 200 + except requests.exceptions.RequestException: + return False + + +def process_file(dump_syms, path, arch, verbose, write_all): + arch_arg = ["-a", arch] + try: + stderr = None if verbose else subprocess.DEVNULL + stdout = subprocess.check_output([dump_syms] + arch_arg + [path], stderr=stderr) + except subprocess.CalledProcessError: + if verbose: + print("Processing %s%s...failed." % (path, " [%s]" % arch if arch else "")) + return None, None + module = stdout.splitlines()[0] + bits = module.split(b" ", 4) + if len(bits) != 5: + return None, None + _, platform, cpu_arch, debug_id, debug_file = bits + if verbose: + sys.stdout.write("Processing %s [%s]..." % (path, arch)) + filename = os.path.join(debug_file, debug_id, debug_file + b".sym") + # see if the server already has this symbol file + if not write_all: + if server_has_file(filename): + if verbose: + print("already on server.") + return None, None + # Collect for uploading + if verbose: + print("done.") + return filename, stdout + + +def get_files(paths, platform=sys.platform): + """ + For each entry passed in paths if the path is a file that can + be processed, yield it, otherwise if it is a directory yield files + under it that can be processed. + """ + for path in paths: + if os.path.isdir(path): + for root, subdirs, files in os.walk(path): + for f in files: + fullpath = os.path.join(root, f) + if should_process(fullpath, platform=platform): + yield fullpath + elif should_process(path, platform=platform): + yield path + + +def process_paths( + paths, executor, dump_syms, verbose, write_all=False, platform=sys.platform +): + jobs = set() + for fullpath in get_files(paths, platform=platform): + while os.path.islink(fullpath): + fullpath = os.path.join(os.path.dirname(fullpath), os.readlink(fullpath)) + if platform == "linux": + # See if there's a -dbg package installed and dump that instead. + dbgpath = "/usr/lib/debug" + fullpath + if os.path.isfile(dbgpath): + fullpath = dbgpath + for arch in get_archs(fullpath, platform=platform): + jobs.add( + executor.submit( + process_file, dump_syms, fullpath, arch, verbose, write_all + ) + ) + for job in concurrent.futures.as_completed(jobs): + try: + yield job.result() + except Exception as e: + traceback.print_exc(file=sys.stderr) + print("Error: %s" % str(e), file=sys.stderr) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-v", "--verbose", action="store_true", help="Produce verbose output" + ) + parser.add_argument( + "--all", + action="store_true", + help="Gather all system symbols, not just missing ones.", + ) + parser.add_argument("dump_syms", help="Path to dump_syms binary") + parser.add_argument( + "files", nargs="*", help="Specific files from which to gather symbols." + ) + args = parser.parse_args() + args.dump_syms = os.path.abspath(args.dump_syms) + # check for the dump_syms binary + if ( + not os.path.isabs(args.dump_syms) + or not os.path.exists(args.dump_syms) + or not os.access(args.dump_syms, os.X_OK) + ): + print( + "Error: can't find dump_syms binary at %s!" % args.dump_syms, + file=sys.stderr, + ) + return 1 + file_list = set() + executor = concurrent.futures.ProcessPoolExecutor() + zip_path = os.path.abspath("symbols.zip") + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + for filename, contents in process_paths( + args.files if args.files else SYSTEM_DIRS, + executor, + args.dump_syms, + args.verbose, + args.all, + ): + if filename and contents and filename not in file_list: + file_list.add(filename) + zf.writestr(filename, contents) + zf.writestr( + "ossyms-1.0-{platform}-{date}-symbols.txt".format( + platform=sys.platform.title(), + date=datetime.datetime.now().strftime("%Y%m%d%H%M%S"), + ), + "\n".join(file_list), + ) + if file_list: + if args.verbose: + print("Generated %s with %d symbols" % (zip_path, len(file_list))) + else: + os.unlink("symbols.zip") + + +if __name__ == "__main__": + main() |