diff options
Diffstat (limited to '')
7 files changed, 1077 insertions, 0 deletions
diff --git a/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py new file mode 100755 index 0000000000..5e63a2f39f --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python + +# Copyright 2015 Michael R. Miller. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +PackageSymbolDumper.py + +Dumps Breakpad symbols for the contents of an Apple update installer. Given a +path to an Apple update installer as a .dmg or a path to a specific package +within the disk image, PackageSymbolDumper mounts, traverses, and dumps symbols +for all applicable frameworks and dylibs found within. + +Required tools for Linux: + pax + gzip + tar + xpwn's dmg (https://github.com/planetbeing/xpwn) + +Created on Apr 11, 2012 + +@author: mrmiller +""" +import argparse +import concurrent.futures +import errno +import logging +import os +import shutil +import stat +import subprocess +import tempfile +import traceback + +from macpkg import Pbzx, uncpio, unxar +from scrapesymbols.gathersymbols import process_paths + + +def expand_pkg(pkg_path, out_path): + """ + Expands the contents of an installer package to some directory. + + @param pkg_path: a path to an installer package (.pkg) + @param out_path: a path to hold the package contents + """ + for name, content in unxar(open(pkg_path, "rb")): + with open(os.path.join(out_path, name), "wb") as fh: + shutil.copyfileobj(content, fh) + + +def expand_dmg(dmg_path, out_path): + """ + Expands the contents of a DMG file to some directory. + + @param dmg_path: a path to a disk image file (.dmg) + @param out_path: a path to hold the image contents + """ + + with tempfile.NamedTemporaryFile() as f: + subprocess.check_call( + ["dmg", "extract", dmg_path, f.name], stdout=subprocess.DEVNULL + ) + subprocess.check_call( + ["hfsplus", f.name, "extractall"], stdout=subprocess.DEVNULL, cwd=out_path + ) + + +def expand_zip(zip_path, out_path): + """ + Expands the contents of a ZIP archive to some directory. + + @param dmg_path: a path to a ZIP archive (.zip) + @param out_path: a path to hold the archive contents + """ + subprocess.check_call( + ["unzip", "-d", out_path, zip_path], stdout=open(subprocess.DEVNULL, "wb") + ) + + +def filter_files(function, path): + """ + Yield file paths matching a filter function by walking the + hierarchy rooted at path. + + @param function: a function taking in a filename that returns true to + include the path + @param path: the root path of the hierarchy to traverse + """ + for root, _dirs, files in os.walk(path): + for filename in files: + if function(filename): + yield os.path.join(root, filename) + + +def find_packages(path): + """ + Returns a list of installer packages (as determined by the .pkg extension), + disk images (as determined by the .dmg extension) or ZIP archives found + within path. + + @param path: root path to search for .pkg, .dmg and .zip files + """ + return filter_files( + lambda filename: os.path.splitext(filename)[1] in (".pkg", ".dmg", ".zip") + and not filename.startswith("._"), + path, + ) + + +def find_all_packages(paths): + """ + Yield installer package files, disk images and ZIP archives found in all + of `paths`. + + @param path: list of root paths to search for .pkg & .dmg files + """ + for path in paths: + logging.info("find_all_packages: {}".format(path)) + for pkg in find_packages(path): + yield pkg + + +def find_payloads(path): + """ + Returns a list of possible installer package payload paths. + + @param path: root path for an installer package + """ + return filter_files( + lambda filename: "Payload" in filename or ".pax.gz" in filename, path + ) + + +def extract_payload(payload_path, output_path): + """ + Extracts the contents of an installer package payload to a given directory. + + @param payload_path: path to an installer package's payload + @param output_path: output path for the payload's contents + @return True for success, False for failure. + """ + header = open(payload_path, "rb").read(2) + try: + if header == b"BZ": + logging.info("Extracting bzip2 payload") + extract = "bzip2" + subprocess.check_call( + 'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format( + extract=extract, payload=payload_path, dest=output_path + ), + shell=True, + ) + return True + elif header == b"\x1f\x8b": + logging.info("Extracting gzip payload") + extract = "gzip" + subprocess.check_call( + 'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format( + extract=extract, payload=payload_path, dest=output_path + ), + shell=True, + ) + return True + elif header == b"pb": + logging.info("Extracting pbzx payload") + + for path, mode, content in uncpio(Pbzx(open(payload_path, "rb"))): + if not path or not stat.S_ISREG(mode): + continue + out = os.path.join(output_path, path.decode()) + os.makedirs(os.path.dirname(out), exist_ok=True) + with open(out, "wb") as fh: + shutil.copyfileobj(content, fh) + + return True + else: + # Unsupported format + logging.error( + "Unknown payload format: 0x{0:x}{1:x}".format(header[0], header[1]) + ) + return False + + except Exception: + return False + + +def shutil_error_handler(caller, path, excinfo): + logging.error('Could not remove "{path}": {info}'.format(path=path, info=excinfo)) + + +def write_symbol_file(dest, filename, contents): + full_path = os.path.join(dest, filename) + try: + os.makedirs(os.path.dirname(full_path)) + with open(full_path, "wb") as sym_file: + sym_file.write(contents) + except os.error as e: + if e.errno != errno.EEXIST: + raise + + +def dump_symbols(executor, dump_syms, path, dest): + system_library = os.path.join("System", "Library") + subdirectories = [ + os.path.join(system_library, "Frameworks"), + os.path.join(system_library, "PrivateFrameworks"), + os.path.join(system_library, "Extensions"), + os.path.join("usr", "lib"), + ] + + paths_to_dump = [os.path.join(path, d) for d in subdirectories] + existing_paths = [path for path in paths_to_dump if os.path.exists(path)] + + for filename, contents in process_paths( + paths=existing_paths, + executor=executor, + dump_syms=dump_syms, + verbose=True, + write_all=True, + platform="darwin", + ): + if filename and contents: + logging.info("Added symbol file " + str(filename, "utf-8")) + write_symbol_file(dest, str(filename, "utf-8"), contents) + + +def dump_symbols_from_payload(executor, dump_syms, payload_path, dest): + """ + Dumps all the symbols found inside the payload of an installer package. + + @param dump_syms: path to the dump_syms executable + @param payload_path: path to an installer package's payload + @param dest: output path for symbols + """ + temp_dir = None + logging.info("Dumping symbols from payload: " + payload_path) + try: + temp_dir = tempfile.mkdtemp() + logging.info("Extracting payload to {path}.".format(path=temp_dir)) + if not extract_payload(payload_path, temp_dir): + logging.error("Could not extract payload: " + payload_path) + return False + + dump_symbols(executor, dump_syms, temp_dir, dest) + + finally: + if temp_dir is not None: + shutil.rmtree(temp_dir, onerror=shutil_error_handler) + + return True + + +def dump_symbols_from_package(executor, dump_syms, pkg, dest): + """ + Dumps all the symbols found inside an installer package. + + @param dump_syms: path to the dump_syms executable + @param pkg: path to an installer package + @param dest: output path for symbols + """ + successful = True + temp_dir = None + logging.info("Dumping symbols from package: " + pkg) + try: + temp_dir = tempfile.mkdtemp() + if os.path.splitext(pkg)[1] == ".pkg": + expand_pkg(pkg, temp_dir) + elif os.path.splitext(pkg)[1] == ".zip": + expand_zip(pkg, temp_dir) + else: + expand_dmg(pkg, temp_dir) + + # check for any subpackages + for subpackage in find_packages(temp_dir): + logging.info("Found subpackage at: " + subpackage) + res = dump_symbols_from_package(executor, dump_syms, subpackage, dest) + if not res: + logging.error("Error while dumping subpackage: " + subpackage) + + # dump symbols from any payloads (only expecting one) in the package + for payload in find_payloads(temp_dir): + res = dump_symbols_from_payload(executor, dump_syms, payload, dest) + if not res: + successful = False + + # dump symbols directly extracted from the package + dump_symbols(executor, dump_syms, temp_dir, dest) + + except Exception as e: + traceback.print_exc() + logging.error("Exception while dumping symbols from package: {}".format(e)) + successful = False + + finally: + if temp_dir is not None: + shutil.rmtree(temp_dir, onerror=shutil_error_handler) + + return successful + + +def read_processed_packages(tracking_file): + if tracking_file is None or not os.path.exists(tracking_file): + return set() + logging.info("Reading processed packages from {}".format(tracking_file)) + return set(open(tracking_file, "r").read().splitlines()) + + +def write_processed_packages(tracking_file, processed_packages): + if tracking_file is None: + return + logging.info( + "Writing {} processed packages to {}".format( + len(processed_packages), tracking_file + ) + ) + open(tracking_file, "w").write("\n".join(processed_packages)) + + +def process_packages(package_finder, to, tracking_file, dump_syms): + processed_packages = read_processed_packages(tracking_file) + with concurrent.futures.ProcessPoolExecutor() as executor: + for pkg in package_finder(): + if pkg in processed_packages: + logging.info("Skipping already-processed package: {}".format(pkg)) + else: + dump_symbols_from_package(executor, dump_syms, pkg, to) + processed_packages.add(pkg) + write_processed_packages(tracking_file, processed_packages) + + +def main(): + parser = argparse.ArgumentParser( + description="Extracts Breakpad symbols from a Mac OS X support update." + ) + parser.add_argument( + "--dump_syms", + default="dump_syms", + type=str, + help="path to the Breakpad dump_syms executable", + ) + parser.add_argument( + "--tracking-file", + type=str, + help="Path to a file in which to store information " + + "about already-processed packages", + ) + parser.add_argument( + "search", nargs="+", help="Paths to search recursively for packages" + ) + parser.add_argument("to", type=str, help="destination path for the symbols") + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + for p in ("requests.packages.urllib3.connectionpool", "urllib3"): + urllib3_logger = logging.getLogger(p) + urllib3_logger.setLevel(logging.ERROR) + + if not args.search or not all(os.path.exists(p) for p in args.search): + logging.error("Invalid search path") + return + if not os.path.exists(args.to): + logging.error("Invalid path to destination") + return + + def finder(): + return find_all_packages(args.search) + + process_packages(finder, args.to, args.tracking_file, args.dump_syms) + + +if __name__ == "__main__": + main() diff --git a/tools/crashreporter/system-symbols/mac/get_update_packages.py b/tools/crashreporter/system-symbols/mac/get_update_packages.py new file mode 100644 index 0000000000..3192fa3ef0 --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/get_update_packages.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +# Copyright (c) 2015 Ted Mielczarek <ted@mielczarek.org> +# and Michael R. Miller <michaelrmmiller@gmail.com> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import argparse +import concurrent.futures +import logging +import os +import re +import shutil +import subprocess +import tempfile + +import requests +import urlparse +from PackageSymbolDumper import find_packages, process_packages + +OSX_RE = re.compile(r"10\.[0-9]+\.[0-9]+") + + +def extract_dmg(dmg_path, dest): + logging.info("extract_dmg({}, {})".format(dmg_path, dest)) + with tempfile.NamedTemporaryFile() as f: + subprocess.check_call( + ["dmg", "extract", dmg_path, f.name], stdout=subprocess.DEVNULL + ) + subprocess.check_call(["hfsplus", f.name, "extractall"], cwd=dest) + + +def get_update_packages(): + for i in range(16): + logging.info("get_update_packages: page " + str(i)) + url = ( + "https://km.support.apple.com/kb/index?page=downloads_browse&sort=recency" + "&facet=all&category=PF6&locale=en_US&offset=%d" % i + ) + res = requests.get(url) + if res.status_code != 200: + break + data = res.json() + downloads = data.get("downloads", []) + if not downloads: + break + for d in downloads: + title = d.get("title", "") + if OSX_RE.search(title) and "Combo" not in title: + logging.info("Title: " + title) + if "fileurl" in d: + yield d["fileurl"] + else: + logging.warn("No fileurl in download!") + + +def fetch_url_to_file(url, download_dir): + filename = os.path.basename(urlparse.urlsplit(url).path) + local_filename = os.path.join(download_dir, filename) + if os.path.isfile(local_filename): + logging.info("{} already exists, skipping".format(local_filename)) + return None + r = requests.get(url, stream=True) + res_len = int(r.headers.get("content-length", "0")) + logging.info("Downloading {} -> {} ({} bytes)".format(url, local_filename, res_len)) + with open(local_filename, "wb") as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + return local_filename + + +def fetch_and_extract_dmg(url, tmpdir): + logging.info("fetch_and_extract_dmg: " + url) + filename = fetch_url_to_file(url, tmpdir) + if not filename: + return [] + # Extract dmg contents to a subdir + subdir = tempfile.mkdtemp(dir=tmpdir) + extract_dmg(filename, subdir) + packages = list(find_packages(subdir)) + logging.info( + "fetch_and_extract_dmg({}): found packages: {}".format(url, str(packages)) + ) + return packages + + +def find_update_packages(tmpdir): + logging.info("find_update_packages") + # Only download 2 packages at a time. + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + jobs = dict( + (executor.submit(fetch_and_extract_dmg, url, tmpdir), url) + for url in get_update_packages() + ) + for future in concurrent.futures.as_completed(jobs): + url = jobs[future] + if future.exception() is not None: + logging.error( + "exception downloading {}: {}".format(url, future.exception()) + ) + else: + for pkg in future.result(): + yield pkg + + +def main(): + parser = argparse.ArgumentParser( + description="Download OS X update packages and dump symbols from them" + ) + parser.add_argument( + "--dump_syms", + default="dump_syms", + type=str, + help="path to the Breakpad dump_syms executable", + ) + parser.add_argument("to", type=str, help="destination path for the symbols") + args = parser.parse_args() + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + for p in ("requests.packages.urllib3.connectionpool", "urllib3"): + urllib3_logger = logging.getLogger(p) + urllib3_logger.setLevel(logging.ERROR) + try: + tmpdir = tempfile.mkdtemp(suffix=".osxupdates") + + def finder(): + return find_update_packages(tmpdir) + + process_packages(finder, args.to, None, args.dump_syms) + finally: + shutil.rmtree(tmpdir) + + +if __name__ == "__main__": + main() diff --git a/tools/crashreporter/system-symbols/mac/list-packages.py b/tools/crashreporter/system-symbols/mac/list-packages.py new file mode 100755 index 0000000000..444c27be9d --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/list-packages.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +# Copyright 2015 Ted Mielczarek. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import os +import sys + +from reposadolib import reposadocommon + +reposadocommon.get_main_dir = lambda: "/usr/local/bin/" + +products = reposadocommon.get_product_info() +args = [] +for product_id, product in products.items(): + try: + title = product["title"] + except KeyError: + print("Missing title in {}, skipping".format(product), file=sys.stderr) + continue + + try: + major_version = int(product["version"].split(".")[0]) + except Exception: + print( + "Cannot extract the major version number in {}, skipping".format(product), + file=sys.stderr, + ) + continue + + if ( + title.startswith("OS X") + or title.startswith("Mac OS X") + or title.startswith("macOS") + ) and major_version <= 10: + args.append(product_id) + else: + print("Skipping %r for repo_sync" % title, file=sys.stderr) +if "JUST_ONE_PACKAGE" in os.environ: + args = args[:1] + +print("\n".join(args)) diff --git a/tools/crashreporter/system-symbols/mac/macpkg.py b/tools/crashreporter/system-symbols/mac/macpkg.py new file mode 100644 index 0000000000..d606196c1c --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/macpkg.py @@ -0,0 +1,199 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import bz2 +import io +import lzma +import os +import struct +import zlib +from xml.etree.ElementTree import XML + + +class ZlibFile(object): + def __init__(self, fileobj): + self.fileobj = fileobj + self.decompressor = zlib.decompressobj() + self.buf = b"" + + def read(self, length): + cutoff = min(length, len(self.buf)) + result = self.buf[:cutoff] + self.buf = self.buf[cutoff:] + while len(result) < length: + buf = self.fileobj.read(io.DEFAULT_BUFFER_SIZE) + if not buf: + break + buf = self.decompressor.decompress(buf) + cutoff = min(length - len(result), len(buf)) + result += buf[:cutoff] + self.buf += buf[cutoff:] + return result + + +def unxar(fileobj): + magic = fileobj.read(4) + if magic != b"xar!": + raise Exception("Not a XAR?") + + header_size = fileobj.read(2) + header_size = struct.unpack(">H", header_size)[0] + if header_size > 64: + raise Exception( + f"Don't know how to handle a {header_size} bytes XAR header size" + ) + header_size -= 6 # what we've read so far. + header = fileobj.read(header_size) + if len(header) != header_size: + raise Exception("Failed to read XAR header") + ( + version, + compressed_toc_len, + uncompressed_toc_len, + checksum_type, + ) = struct.unpack(">HQQL", header[:22]) + if version != 1: + raise Exception(f"XAR version {version} not supported") + toc = fileobj.read(compressed_toc_len) + base = fileobj.tell() + if len(toc) != compressed_toc_len: + raise Exception("Failed to read XAR TOC") + toc = zlib.decompress(toc) + if len(toc) != uncompressed_toc_len: + raise Exception("Corrupted XAR?") + toc = XML(toc).find("toc") + for f in toc.findall("file"): + if f.find("type").text != "file": + continue + filename = f.find("name").text + data = f.find("data") + length = int(data.find("length").text) + size = int(data.find("size").text) + offset = int(data.find("offset").text) + encoding = data.find("encoding").get("style") + fileobj.seek(base + offset, os.SEEK_SET) + content = Take(fileobj, length) + if encoding == "application/octet-stream": + if length != size: + raise Exception(f"{length} != {size}") + elif encoding == "application/x-bzip2": + content = bz2.BZ2File(content) + elif encoding == "application/x-gzip": + # Despite the encoding saying gzip, it is in fact, a raw zlib stream. + content = ZlibFile(content) + else: + raise Exception(f"XAR encoding {encoding} not supported") + + yield filename, content + + +class Pbzx(object): + def __init__(self, fileobj): + magic = fileobj.read(4) + if magic != b"pbzx": + raise Exception("Not a PBZX payload?") + # The first thing in the file looks like the size of each + # decompressed chunk except the last one. It should match + # decompressed_size in all cases except last, but we don't + # check. + chunk_size = fileobj.read(8) + chunk_size = struct.unpack(">Q", chunk_size)[0] + self.fileobj = fileobj + self._init_one_chunk() + + def _init_one_chunk(self): + self.offset = 0 + header = self.fileobj.read(16) + if header == b"": + self.chunk = "" + return + if len(header) != 16: + raise Exception("Corrupted PBZX payload?") + decompressed_size, compressed_size = struct.unpack(">QQ", header) + chunk = self.fileobj.read(compressed_size) + if compressed_size == decompressed_size: + self.chunk = chunk + else: + self.chunk = lzma.decompress(chunk) + if len(self.chunk) != decompressed_size: + raise Exception("Corrupted PBZX payload?") + + def read(self, length=None): + if length == 0: + return b"" + if length and len(self.chunk) >= self.offset + length: + start = self.offset + self.offset += length + return self.chunk[start : self.offset] + else: + result = self.chunk[self.offset :] + self._init_one_chunk() + if self.chunk: + # XXX: suboptimal if length is larger than the chunk size + result += self.read(None if length is None else length - len(result)) + return result + + +class Take(object): + """ + File object wrapper that allows to read at most a certain length. + """ + + def __init__(self, fileobj, limit): + self.fileobj = fileobj + self.limit = limit + + def read(self, length=None): + if length is None: + length = self.limit + else: + length = min(length, self.limit) + result = self.fileobj.read(length) + self.limit -= len(result) + return result + + +def uncpio(fileobj): + while True: + magic = fileobj.read(6) + # CPIO payloads in mac pkg files are using the portable ASCII format. + if magic != b"070707": + if magic.startswith(b"0707"): + raise Exception("Unsupported CPIO format") + raise Exception("Not a CPIO header") + header = fileobj.read(70) + ( + dev, + ino, + mode, + uid, + gid, + nlink, + rdev, + mtime, + namesize, + filesize, + ) = struct.unpack(">6s6s6s6s6s6s6s11s6s11s", header) + mode = int(mode, 8) + nlink = int(nlink, 8) + namesize = int(namesize, 8) + filesize = int(filesize, 8) + name = fileobj.read(namesize) + if name[-1] != 0: + raise Exception("File name is not NUL terminated") + name = name[:-1] + if name == b"TRAILER!!!": + break + + if b"/../" in name or name.startswith(b"../") or name == b"..": + raise Exception(".. is forbidden in file name") + if name.startswith(b"."): + name = name[1:] + if name.startswith(b"/"): + name = name[1:] + content = Take(fileobj, filesize) + yield name, mode, content + # Ensure the content is totally consumed + while content.read(4096): + pass diff --git a/tools/crashreporter/system-symbols/mac/run.sh b/tools/crashreporter/system-symbols/mac/run.sh new file mode 100755 index 0000000000..8dec95dffe --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/run.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +set -v -e -x + +base="$(realpath "$(dirname "$0")")" +export PATH="$PATH:/builds/worker/bin:$base:${MOZ_FETCHES_DIR}/dmg" + +cd /builds/worker + +if test "$PROCESSED_PACKAGES_INDEX" && test "$PROCESSED_PACKAGES_PATH" && test "$TASKCLUSTER_ROOT_URL"; then + PROCESSED_PACKAGES="$TASKCLUSTER_ROOT_URL/api/index/v1/task/$PROCESSED_PACKAGES_INDEX/artifacts/$PROCESSED_PACKAGES_PATH" +fi + +if test "$PROCESSED_PACKAGES"; then + rm -f processed-packages + if test `curl --output /dev/null --silent --head --location "$PROCESSED_PACKAGES" -w "%{http_code}"` = 200; then + curl -L "$PROCESSED_PACKAGES" | gzip -dc > processed-packages + elif test -f "$PROCESSED_PACKAGES"; then + gzip -dc "$PROCESSED_PACKAGES" > processed-packages + fi + if test -f processed-packages; then + # Prevent reposado from downloading packages that have previously been + # dumped. + for f in $(cat processed-packages); do + mkdir -p "$(dirname "$f")" + touch "$f" + done + fi +fi + +mkdir -p /opt/data-reposado/html /opt/data-reposado/metadata artifacts + +# First, just fetch all the update info. +python3 /usr/local/bin/repo_sync --no-download + +# Next, fetch just the update packages we're interested in. +packages=$(python3 "${base}/list-packages.py") + +for package in ${packages}; do + # repo_sync is super-chatty, let's pipe stderr to separate files + python3 /usr/local/bin/repo_sync "--product-id=${package}" 2> "artifacts/repo_sync-product-id-${package}.stderr" + # Stop downloading packages if we have more than 10 GiB of them to process + download_size=$(du -B1073741824 -s /opt/data-reposado | cut -f1) + if [ ${download_size} -gt 10 ]; then + break + fi +done + +du -sh /opt/data-reposado + +# Now scrape symbols out of anything that was downloaded. +mkdir -p symbols tmp +env TMP=tmp python3 "${base}/PackageSymbolDumper.py" --tracking-file=/builds/worker/processed-packages --dump_syms=$MOZ_FETCHES_DIR/dump_syms/dump_syms /opt/data-reposado/html/content/downloads /builds/worker/symbols + +# Hand out artifacts +gzip -c processed-packages > artifacts/processed-packages.gz + +cd symbols +zip -r9 /builds/worker/artifacts/target.crashreporter-symbols.zip ./* || echo "No symbols dumped" diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py new file mode 100644 index 0000000000..70be2a62a7 --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# Any copyright is dedicated to the Public Domain. +# http://creativecommons.org/publicdomain/zero/1.0/ + +import argparse +import concurrent.futures +import datetime +import os +import subprocess +import sys +import traceback +import urllib +import zipfile + +import requests + +if sys.platform == "darwin": + SYSTEM_DIRS = [ + "/usr/lib", + "/System/Library/Frameworks", + "/System/Library/PrivateFrameworks", + "/System/Library/Extensions", + ] +else: + SYSTEM_DIRS = ["/lib", "/usr/lib"] +SYMBOL_SERVER_URL = ( + "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/" +) + + +def should_process(f, platform=sys.platform): + """Determine if a file is a platform binary""" + if platform == "darwin": + """ + The 'file' command can error out. One example is "illegal byte + sequence" on a Japanese language UTF8 text file. So we must wrap the + command in a try/except block to prevent the script from terminating + prematurely when this happens. + """ + try: + filetype = subprocess.check_output(["file", "-Lb", f], text=True) + except subprocess.CalledProcessError: + return False + """Skip kernel extensions""" + if "kext bundle" in filetype: + return False + return filetype.startswith("Mach-O") + else: + return subprocess.check_output(["file", "-Lb", f], text=True).startswith("ELF") + return False + + +def get_archs(filename, platform=sys.platform): + """ + Find the list of architectures present in a Mach-O file, or a single-element + list on non-OS X. + """ + architectures = [] + output = subprocess.check_output(["file", "-Lb", filename], text=True) + for string in output.split(" "): + if string == "arm64e": + architectures.append("arm64e") + elif string == "x86_64_haswell": + architectures.append("x86_64h") + elif string == "x86_64": + architectures.append("x86_64") + elif string == "i386": + architectures.append("i386") + + return architectures + + +def server_has_file(filename): + """ + Send the symbol server a HEAD request to see if it has this symbol file. + """ + try: + r = requests.head( + urllib.parse.urljoin(SYMBOL_SERVER_URL, urllib.parse.quote(filename)) + ) + return r.status_code == 200 + except requests.exceptions.RequestException: + return False + + +def process_file(dump_syms, path, arch, verbose, write_all): + arch_arg = ["-a", arch] + try: + stderr = None if verbose else subprocess.DEVNULL + stdout = subprocess.check_output([dump_syms] + arch_arg + [path], stderr=stderr) + except subprocess.CalledProcessError: + if verbose: + print("Processing %s%s...failed." % (path, " [%s]" % arch if arch else "")) + return None, None + module = stdout.splitlines()[0] + bits = module.split(b" ", 4) + if len(bits) != 5: + return None, None + _, platform, cpu_arch, debug_id, debug_file = bits + if verbose: + sys.stdout.write("Processing %s [%s]..." % (path, arch)) + filename = os.path.join(debug_file, debug_id, debug_file + b".sym") + # see if the server already has this symbol file + if not write_all: + if server_has_file(filename): + if verbose: + print("already on server.") + return None, None + # Collect for uploading + if verbose: + print("done.") + return filename, stdout + + +def get_files(paths, platform=sys.platform): + """ + For each entry passed in paths if the path is a file that can + be processed, yield it, otherwise if it is a directory yield files + under it that can be processed. + """ + for path in paths: + if os.path.isdir(path): + for root, subdirs, files in os.walk(path): + for f in files: + fullpath = os.path.join(root, f) + if should_process(fullpath, platform=platform): + yield fullpath + elif should_process(path, platform=platform): + yield path + + +def process_paths( + paths, executor, dump_syms, verbose, write_all=False, platform=sys.platform +): + jobs = set() + for fullpath in get_files(paths, platform=platform): + while os.path.islink(fullpath): + fullpath = os.path.join(os.path.dirname(fullpath), os.readlink(fullpath)) + if platform == "linux": + # See if there's a -dbg package installed and dump that instead. + dbgpath = "/usr/lib/debug" + fullpath + if os.path.isfile(dbgpath): + fullpath = dbgpath + for arch in get_archs(fullpath, platform=platform): + jobs.add( + executor.submit( + process_file, dump_syms, fullpath, arch, verbose, write_all + ) + ) + for job in concurrent.futures.as_completed(jobs): + try: + yield job.result() + except Exception as e: + traceback.print_exc(file=sys.stderr) + print("Error: %s" % str(e), file=sys.stderr) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-v", "--verbose", action="store_true", help="Produce verbose output" + ) + parser.add_argument( + "--all", + action="store_true", + help="Gather all system symbols, not just missing ones.", + ) + parser.add_argument("dump_syms", help="Path to dump_syms binary") + parser.add_argument( + "files", nargs="*", help="Specific files from which to gather symbols." + ) + args = parser.parse_args() + args.dump_syms = os.path.abspath(args.dump_syms) + # check for the dump_syms binary + if ( + not os.path.isabs(args.dump_syms) + or not os.path.exists(args.dump_syms) + or not os.access(args.dump_syms, os.X_OK) + ): + print( + "Error: can't find dump_syms binary at %s!" % args.dump_syms, + file=sys.stderr, + ) + return 1 + file_list = set() + executor = concurrent.futures.ProcessPoolExecutor() + zip_path = os.path.abspath("symbols.zip") + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + for filename, contents in process_paths( + args.files if args.files else SYSTEM_DIRS, + executor, + args.dump_syms, + args.verbose, + args.all, + ): + if filename and contents and filename not in file_list: + file_list.add(filename) + zf.writestr(filename, contents) + zf.writestr( + "ossyms-1.0-{platform}-{date}-symbols.txt".format( + platform=sys.platform.title(), + date=datetime.datetime.now().strftime("%Y%m%d%H%M%S"), + ), + "\n".join(file_list), + ) + if file_list: + if args.verbose: + print("Generated %s with %d symbols" % (zip_path, len(file_list))) + else: + os.unlink("symbols.zip") + + +if __name__ == "__main__": + main() |