summaryrefslogtreecommitdiffstats
path: root/tools/crashreporter/system-symbols/mac
diff options
context:
space:
mode:
Diffstat (limited to 'tools/crashreporter/system-symbols/mac')
-rwxr-xr-xtools/crashreporter/system-symbols/mac/PackageSymbolDumper.py392
-rw-r--r--tools/crashreporter/system-symbols/mac/get_update_packages.py154
-rwxr-xr-xtools/crashreporter/system-symbols/mac/list-packages.py59
-rw-r--r--tools/crashreporter/system-symbols/mac/macpkg.py199
-rwxr-xr-xtools/crashreporter/system-symbols/mac/run.sh59
-rw-r--r--tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py0
-rw-r--r--tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py214
7 files changed, 1077 insertions, 0 deletions
diff --git a/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py
new file mode 100755
index 0000000000..5e63a2f39f
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py
@@ -0,0 +1,392 @@
+#!/usr/bin/env python
+
+# Copyright 2015 Michael R. Miller.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""
+PackageSymbolDumper.py
+
+Dumps Breakpad symbols for the contents of an Apple update installer. Given a
+path to an Apple update installer as a .dmg or a path to a specific package
+within the disk image, PackageSymbolDumper mounts, traverses, and dumps symbols
+for all applicable frameworks and dylibs found within.
+
+Required tools for Linux:
+ pax
+ gzip
+ tar
+ xpwn's dmg (https://github.com/planetbeing/xpwn)
+
+Created on Apr 11, 2012
+
+@author: mrmiller
+"""
+import argparse
+import concurrent.futures
+import errno
+import logging
+import os
+import shutil
+import stat
+import subprocess
+import tempfile
+import traceback
+
+from macpkg import Pbzx, uncpio, unxar
+from scrapesymbols.gathersymbols import process_paths
+
+
+def expand_pkg(pkg_path, out_path):
+ """
+ Expands the contents of an installer package to some directory.
+
+ @param pkg_path: a path to an installer package (.pkg)
+ @param out_path: a path to hold the package contents
+ """
+ for name, content in unxar(open(pkg_path, "rb")):
+ with open(os.path.join(out_path, name), "wb") as fh:
+ shutil.copyfileobj(content, fh)
+
+
+def expand_dmg(dmg_path, out_path):
+ """
+ Expands the contents of a DMG file to some directory.
+
+ @param dmg_path: a path to a disk image file (.dmg)
+ @param out_path: a path to hold the image contents
+ """
+
+ with tempfile.NamedTemporaryFile() as f:
+ subprocess.check_call(
+ ["dmg", "extract", dmg_path, f.name], stdout=subprocess.DEVNULL
+ )
+ subprocess.check_call(
+ ["hfsplus", f.name, "extractall"], stdout=subprocess.DEVNULL, cwd=out_path
+ )
+
+
+def expand_zip(zip_path, out_path):
+ """
+ Expands the contents of a ZIP archive to some directory.
+
+ @param dmg_path: a path to a ZIP archive (.zip)
+ @param out_path: a path to hold the archive contents
+ """
+ subprocess.check_call(
+ ["unzip", "-d", out_path, zip_path], stdout=open(subprocess.DEVNULL, "wb")
+ )
+
+
+def filter_files(function, path):
+ """
+ Yield file paths matching a filter function by walking the
+ hierarchy rooted at path.
+
+ @param function: a function taking in a filename that returns true to
+ include the path
+ @param path: the root path of the hierarchy to traverse
+ """
+ for root, _dirs, files in os.walk(path):
+ for filename in files:
+ if function(filename):
+ yield os.path.join(root, filename)
+
+
+def find_packages(path):
+ """
+ Returns a list of installer packages (as determined by the .pkg extension),
+ disk images (as determined by the .dmg extension) or ZIP archives found
+ within path.
+
+ @param path: root path to search for .pkg, .dmg and .zip files
+ """
+ return filter_files(
+ lambda filename: os.path.splitext(filename)[1] in (".pkg", ".dmg", ".zip")
+ and not filename.startswith("._"),
+ path,
+ )
+
+
+def find_all_packages(paths):
+ """
+ Yield installer package files, disk images and ZIP archives found in all
+ of `paths`.
+
+ @param path: list of root paths to search for .pkg & .dmg files
+ """
+ for path in paths:
+ logging.info("find_all_packages: {}".format(path))
+ for pkg in find_packages(path):
+ yield pkg
+
+
+def find_payloads(path):
+ """
+ Returns a list of possible installer package payload paths.
+
+ @param path: root path for an installer package
+ """
+ return filter_files(
+ lambda filename: "Payload" in filename or ".pax.gz" in filename, path
+ )
+
+
+def extract_payload(payload_path, output_path):
+ """
+ Extracts the contents of an installer package payload to a given directory.
+
+ @param payload_path: path to an installer package's payload
+ @param output_path: output path for the payload's contents
+ @return True for success, False for failure.
+ """
+ header = open(payload_path, "rb").read(2)
+ try:
+ if header == b"BZ":
+ logging.info("Extracting bzip2 payload")
+ extract = "bzip2"
+ subprocess.check_call(
+ 'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format(
+ extract=extract, payload=payload_path, dest=output_path
+ ),
+ shell=True,
+ )
+ return True
+ elif header == b"\x1f\x8b":
+ logging.info("Extracting gzip payload")
+ extract = "gzip"
+ subprocess.check_call(
+ 'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format(
+ extract=extract, payload=payload_path, dest=output_path
+ ),
+ shell=True,
+ )
+ return True
+ elif header == b"pb":
+ logging.info("Extracting pbzx payload")
+
+ for path, mode, content in uncpio(Pbzx(open(payload_path, "rb"))):
+ if not path or not stat.S_ISREG(mode):
+ continue
+ out = os.path.join(output_path, path.decode())
+ os.makedirs(os.path.dirname(out), exist_ok=True)
+ with open(out, "wb") as fh:
+ shutil.copyfileobj(content, fh)
+
+ return True
+ else:
+ # Unsupported format
+ logging.error(
+ "Unknown payload format: 0x{0:x}{1:x}".format(header[0], header[1])
+ )
+ return False
+
+ except Exception:
+ return False
+
+
+def shutil_error_handler(caller, path, excinfo):
+ logging.error('Could not remove "{path}": {info}'.format(path=path, info=excinfo))
+
+
+def write_symbol_file(dest, filename, contents):
+ full_path = os.path.join(dest, filename)
+ try:
+ os.makedirs(os.path.dirname(full_path))
+ with open(full_path, "wb") as sym_file:
+ sym_file.write(contents)
+ except os.error as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+
+def dump_symbols(executor, dump_syms, path, dest):
+ system_library = os.path.join("System", "Library")
+ subdirectories = [
+ os.path.join(system_library, "Frameworks"),
+ os.path.join(system_library, "PrivateFrameworks"),
+ os.path.join(system_library, "Extensions"),
+ os.path.join("usr", "lib"),
+ ]
+
+ paths_to_dump = [os.path.join(path, d) for d in subdirectories]
+ existing_paths = [path for path in paths_to_dump if os.path.exists(path)]
+
+ for filename, contents in process_paths(
+ paths=existing_paths,
+ executor=executor,
+ dump_syms=dump_syms,
+ verbose=True,
+ write_all=True,
+ platform="darwin",
+ ):
+ if filename and contents:
+ logging.info("Added symbol file " + str(filename, "utf-8"))
+ write_symbol_file(dest, str(filename, "utf-8"), contents)
+
+
+def dump_symbols_from_payload(executor, dump_syms, payload_path, dest):
+ """
+ Dumps all the symbols found inside the payload of an installer package.
+
+ @param dump_syms: path to the dump_syms executable
+ @param payload_path: path to an installer package's payload
+ @param dest: output path for symbols
+ """
+ temp_dir = None
+ logging.info("Dumping symbols from payload: " + payload_path)
+ try:
+ temp_dir = tempfile.mkdtemp()
+ logging.info("Extracting payload to {path}.".format(path=temp_dir))
+ if not extract_payload(payload_path, temp_dir):
+ logging.error("Could not extract payload: " + payload_path)
+ return False
+
+ dump_symbols(executor, dump_syms, temp_dir, dest)
+
+ finally:
+ if temp_dir is not None:
+ shutil.rmtree(temp_dir, onerror=shutil_error_handler)
+
+ return True
+
+
+def dump_symbols_from_package(executor, dump_syms, pkg, dest):
+ """
+ Dumps all the symbols found inside an installer package.
+
+ @param dump_syms: path to the dump_syms executable
+ @param pkg: path to an installer package
+ @param dest: output path for symbols
+ """
+ successful = True
+ temp_dir = None
+ logging.info("Dumping symbols from package: " + pkg)
+ try:
+ temp_dir = tempfile.mkdtemp()
+ if os.path.splitext(pkg)[1] == ".pkg":
+ expand_pkg(pkg, temp_dir)
+ elif os.path.splitext(pkg)[1] == ".zip":
+ expand_zip(pkg, temp_dir)
+ else:
+ expand_dmg(pkg, temp_dir)
+
+ # check for any subpackages
+ for subpackage in find_packages(temp_dir):
+ logging.info("Found subpackage at: " + subpackage)
+ res = dump_symbols_from_package(executor, dump_syms, subpackage, dest)
+ if not res:
+ logging.error("Error while dumping subpackage: " + subpackage)
+
+ # dump symbols from any payloads (only expecting one) in the package
+ for payload in find_payloads(temp_dir):
+ res = dump_symbols_from_payload(executor, dump_syms, payload, dest)
+ if not res:
+ successful = False
+
+ # dump symbols directly extracted from the package
+ dump_symbols(executor, dump_syms, temp_dir, dest)
+
+ except Exception as e:
+ traceback.print_exc()
+ logging.error("Exception while dumping symbols from package: {}".format(e))
+ successful = False
+
+ finally:
+ if temp_dir is not None:
+ shutil.rmtree(temp_dir, onerror=shutil_error_handler)
+
+ return successful
+
+
+def read_processed_packages(tracking_file):
+ if tracking_file is None or not os.path.exists(tracking_file):
+ return set()
+ logging.info("Reading processed packages from {}".format(tracking_file))
+ return set(open(tracking_file, "r").read().splitlines())
+
+
+def write_processed_packages(tracking_file, processed_packages):
+ if tracking_file is None:
+ return
+ logging.info(
+ "Writing {} processed packages to {}".format(
+ len(processed_packages), tracking_file
+ )
+ )
+ open(tracking_file, "w").write("\n".join(processed_packages))
+
+
+def process_packages(package_finder, to, tracking_file, dump_syms):
+ processed_packages = read_processed_packages(tracking_file)
+ with concurrent.futures.ProcessPoolExecutor() as executor:
+ for pkg in package_finder():
+ if pkg in processed_packages:
+ logging.info("Skipping already-processed package: {}".format(pkg))
+ else:
+ dump_symbols_from_package(executor, dump_syms, pkg, to)
+ processed_packages.add(pkg)
+ write_processed_packages(tracking_file, processed_packages)
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Extracts Breakpad symbols from a Mac OS X support update."
+ )
+ parser.add_argument(
+ "--dump_syms",
+ default="dump_syms",
+ type=str,
+ help="path to the Breakpad dump_syms executable",
+ )
+ parser.add_argument(
+ "--tracking-file",
+ type=str,
+ help="Path to a file in which to store information "
+ + "about already-processed packages",
+ )
+ parser.add_argument(
+ "search", nargs="+", help="Paths to search recursively for packages"
+ )
+ parser.add_argument("to", type=str, help="destination path for the symbols")
+ args = parser.parse_args()
+
+ logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ )
+ for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
+ urllib3_logger = logging.getLogger(p)
+ urllib3_logger.setLevel(logging.ERROR)
+
+ if not args.search or not all(os.path.exists(p) for p in args.search):
+ logging.error("Invalid search path")
+ return
+ if not os.path.exists(args.to):
+ logging.error("Invalid path to destination")
+ return
+
+ def finder():
+ return find_all_packages(args.search)
+
+ process_packages(finder, args.to, args.tracking_file, args.dump_syms)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/crashreporter/system-symbols/mac/get_update_packages.py b/tools/crashreporter/system-symbols/mac/get_update_packages.py
new file mode 100644
index 0000000000..3192fa3ef0
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/get_update_packages.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Ted Mielczarek <ted@mielczarek.org>
+# and Michael R. Miller <michaelrmmiller@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import argparse
+import concurrent.futures
+import logging
+import os
+import re
+import shutil
+import subprocess
+import tempfile
+
+import requests
+import urlparse
+from PackageSymbolDumper import find_packages, process_packages
+
+OSX_RE = re.compile(r"10\.[0-9]+\.[0-9]+")
+
+
+def extract_dmg(dmg_path, dest):
+ logging.info("extract_dmg({}, {})".format(dmg_path, dest))
+ with tempfile.NamedTemporaryFile() as f:
+ subprocess.check_call(
+ ["dmg", "extract", dmg_path, f.name], stdout=subprocess.DEVNULL
+ )
+ subprocess.check_call(["hfsplus", f.name, "extractall"], cwd=dest)
+
+
+def get_update_packages():
+ for i in range(16):
+ logging.info("get_update_packages: page " + str(i))
+ url = (
+ "https://km.support.apple.com/kb/index?page=downloads_browse&sort=recency"
+ "&facet=all&category=PF6&locale=en_US&offset=%d" % i
+ )
+ res = requests.get(url)
+ if res.status_code != 200:
+ break
+ data = res.json()
+ downloads = data.get("downloads", [])
+ if not downloads:
+ break
+ for d in downloads:
+ title = d.get("title", "")
+ if OSX_RE.search(title) and "Combo" not in title:
+ logging.info("Title: " + title)
+ if "fileurl" in d:
+ yield d["fileurl"]
+ else:
+ logging.warn("No fileurl in download!")
+
+
+def fetch_url_to_file(url, download_dir):
+ filename = os.path.basename(urlparse.urlsplit(url).path)
+ local_filename = os.path.join(download_dir, filename)
+ if os.path.isfile(local_filename):
+ logging.info("{} already exists, skipping".format(local_filename))
+ return None
+ r = requests.get(url, stream=True)
+ res_len = int(r.headers.get("content-length", "0"))
+ logging.info("Downloading {} -> {} ({} bytes)".format(url, local_filename, res_len))
+ with open(local_filename, "wb") as f:
+ for chunk in r.iter_content(chunk_size=1024):
+ if chunk: # filter out keep-alive new chunks
+ f.write(chunk)
+ return local_filename
+
+
+def fetch_and_extract_dmg(url, tmpdir):
+ logging.info("fetch_and_extract_dmg: " + url)
+ filename = fetch_url_to_file(url, tmpdir)
+ if not filename:
+ return []
+ # Extract dmg contents to a subdir
+ subdir = tempfile.mkdtemp(dir=tmpdir)
+ extract_dmg(filename, subdir)
+ packages = list(find_packages(subdir))
+ logging.info(
+ "fetch_and_extract_dmg({}): found packages: {}".format(url, str(packages))
+ )
+ return packages
+
+
+def find_update_packages(tmpdir):
+ logging.info("find_update_packages")
+ # Only download 2 packages at a time.
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+ jobs = dict(
+ (executor.submit(fetch_and_extract_dmg, url, tmpdir), url)
+ for url in get_update_packages()
+ )
+ for future in concurrent.futures.as_completed(jobs):
+ url = jobs[future]
+ if future.exception() is not None:
+ logging.error(
+ "exception downloading {}: {}".format(url, future.exception())
+ )
+ else:
+ for pkg in future.result():
+ yield pkg
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Download OS X update packages and dump symbols from them"
+ )
+ parser.add_argument(
+ "--dump_syms",
+ default="dump_syms",
+ type=str,
+ help="path to the Breakpad dump_syms executable",
+ )
+ parser.add_argument("to", type=str, help="destination path for the symbols")
+ args = parser.parse_args()
+ logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ )
+ for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
+ urllib3_logger = logging.getLogger(p)
+ urllib3_logger.setLevel(logging.ERROR)
+ try:
+ tmpdir = tempfile.mkdtemp(suffix=".osxupdates")
+
+ def finder():
+ return find_update_packages(tmpdir)
+
+ process_packages(finder, args.to, None, args.dump_syms)
+ finally:
+ shutil.rmtree(tmpdir)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/crashreporter/system-symbols/mac/list-packages.py b/tools/crashreporter/system-symbols/mac/list-packages.py
new file mode 100755
index 0000000000..444c27be9d
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/list-packages.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+# Copyright 2015 Ted Mielczarek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import os
+import sys
+
+from reposadolib import reposadocommon
+
+reposadocommon.get_main_dir = lambda: "/usr/local/bin/"
+
+products = reposadocommon.get_product_info()
+args = []
+for product_id, product in products.items():
+ try:
+ title = product["title"]
+ except KeyError:
+ print("Missing title in {}, skipping".format(product), file=sys.stderr)
+ continue
+
+ try:
+ major_version = int(product["version"].split(".")[0])
+ except Exception:
+ print(
+ "Cannot extract the major version number in {}, skipping".format(product),
+ file=sys.stderr,
+ )
+ continue
+
+ if (
+ title.startswith("OS X")
+ or title.startswith("Mac OS X")
+ or title.startswith("macOS")
+ ) and major_version <= 10:
+ args.append(product_id)
+ else:
+ print("Skipping %r for repo_sync" % title, file=sys.stderr)
+if "JUST_ONE_PACKAGE" in os.environ:
+ args = args[:1]
+
+print("\n".join(args))
diff --git a/tools/crashreporter/system-symbols/mac/macpkg.py b/tools/crashreporter/system-symbols/mac/macpkg.py
new file mode 100644
index 0000000000..d606196c1c
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/macpkg.py
@@ -0,0 +1,199 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import bz2
+import io
+import lzma
+import os
+import struct
+import zlib
+from xml.etree.ElementTree import XML
+
+
+class ZlibFile(object):
+ def __init__(self, fileobj):
+ self.fileobj = fileobj
+ self.decompressor = zlib.decompressobj()
+ self.buf = b""
+
+ def read(self, length):
+ cutoff = min(length, len(self.buf))
+ result = self.buf[:cutoff]
+ self.buf = self.buf[cutoff:]
+ while len(result) < length:
+ buf = self.fileobj.read(io.DEFAULT_BUFFER_SIZE)
+ if not buf:
+ break
+ buf = self.decompressor.decompress(buf)
+ cutoff = min(length - len(result), len(buf))
+ result += buf[:cutoff]
+ self.buf += buf[cutoff:]
+ return result
+
+
+def unxar(fileobj):
+ magic = fileobj.read(4)
+ if magic != b"xar!":
+ raise Exception("Not a XAR?")
+
+ header_size = fileobj.read(2)
+ header_size = struct.unpack(">H", header_size)[0]
+ if header_size > 64:
+ raise Exception(
+ f"Don't know how to handle a {header_size} bytes XAR header size"
+ )
+ header_size -= 6 # what we've read so far.
+ header = fileobj.read(header_size)
+ if len(header) != header_size:
+ raise Exception("Failed to read XAR header")
+ (
+ version,
+ compressed_toc_len,
+ uncompressed_toc_len,
+ checksum_type,
+ ) = struct.unpack(">HQQL", header[:22])
+ if version != 1:
+ raise Exception(f"XAR version {version} not supported")
+ toc = fileobj.read(compressed_toc_len)
+ base = fileobj.tell()
+ if len(toc) != compressed_toc_len:
+ raise Exception("Failed to read XAR TOC")
+ toc = zlib.decompress(toc)
+ if len(toc) != uncompressed_toc_len:
+ raise Exception("Corrupted XAR?")
+ toc = XML(toc).find("toc")
+ for f in toc.findall("file"):
+ if f.find("type").text != "file":
+ continue
+ filename = f.find("name").text
+ data = f.find("data")
+ length = int(data.find("length").text)
+ size = int(data.find("size").text)
+ offset = int(data.find("offset").text)
+ encoding = data.find("encoding").get("style")
+ fileobj.seek(base + offset, os.SEEK_SET)
+ content = Take(fileobj, length)
+ if encoding == "application/octet-stream":
+ if length != size:
+ raise Exception(f"{length} != {size}")
+ elif encoding == "application/x-bzip2":
+ content = bz2.BZ2File(content)
+ elif encoding == "application/x-gzip":
+ # Despite the encoding saying gzip, it is in fact, a raw zlib stream.
+ content = ZlibFile(content)
+ else:
+ raise Exception(f"XAR encoding {encoding} not supported")
+
+ yield filename, content
+
+
+class Pbzx(object):
+ def __init__(self, fileobj):
+ magic = fileobj.read(4)
+ if magic != b"pbzx":
+ raise Exception("Not a PBZX payload?")
+ # The first thing in the file looks like the size of each
+ # decompressed chunk except the last one. It should match
+ # decompressed_size in all cases except last, but we don't
+ # check.
+ chunk_size = fileobj.read(8)
+ chunk_size = struct.unpack(">Q", chunk_size)[0]
+ self.fileobj = fileobj
+ self._init_one_chunk()
+
+ def _init_one_chunk(self):
+ self.offset = 0
+ header = self.fileobj.read(16)
+ if header == b"":
+ self.chunk = ""
+ return
+ if len(header) != 16:
+ raise Exception("Corrupted PBZX payload?")
+ decompressed_size, compressed_size = struct.unpack(">QQ", header)
+ chunk = self.fileobj.read(compressed_size)
+ if compressed_size == decompressed_size:
+ self.chunk = chunk
+ else:
+ self.chunk = lzma.decompress(chunk)
+ if len(self.chunk) != decompressed_size:
+ raise Exception("Corrupted PBZX payload?")
+
+ def read(self, length=None):
+ if length == 0:
+ return b""
+ if length and len(self.chunk) >= self.offset + length:
+ start = self.offset
+ self.offset += length
+ return self.chunk[start : self.offset]
+ else:
+ result = self.chunk[self.offset :]
+ self._init_one_chunk()
+ if self.chunk:
+ # XXX: suboptimal if length is larger than the chunk size
+ result += self.read(None if length is None else length - len(result))
+ return result
+
+
+class Take(object):
+ """
+ File object wrapper that allows to read at most a certain length.
+ """
+
+ def __init__(self, fileobj, limit):
+ self.fileobj = fileobj
+ self.limit = limit
+
+ def read(self, length=None):
+ if length is None:
+ length = self.limit
+ else:
+ length = min(length, self.limit)
+ result = self.fileobj.read(length)
+ self.limit -= len(result)
+ return result
+
+
+def uncpio(fileobj):
+ while True:
+ magic = fileobj.read(6)
+ # CPIO payloads in mac pkg files are using the portable ASCII format.
+ if magic != b"070707":
+ if magic.startswith(b"0707"):
+ raise Exception("Unsupported CPIO format")
+ raise Exception("Not a CPIO header")
+ header = fileobj.read(70)
+ (
+ dev,
+ ino,
+ mode,
+ uid,
+ gid,
+ nlink,
+ rdev,
+ mtime,
+ namesize,
+ filesize,
+ ) = struct.unpack(">6s6s6s6s6s6s6s11s6s11s", header)
+ mode = int(mode, 8)
+ nlink = int(nlink, 8)
+ namesize = int(namesize, 8)
+ filesize = int(filesize, 8)
+ name = fileobj.read(namesize)
+ if name[-1] != 0:
+ raise Exception("File name is not NUL terminated")
+ name = name[:-1]
+ if name == b"TRAILER!!!":
+ break
+
+ if b"/../" in name or name.startswith(b"../") or name == b"..":
+ raise Exception(".. is forbidden in file name")
+ if name.startswith(b"."):
+ name = name[1:]
+ if name.startswith(b"/"):
+ name = name[1:]
+ content = Take(fileobj, filesize)
+ yield name, mode, content
+ # Ensure the content is totally consumed
+ while content.read(4096):
+ pass
diff --git a/tools/crashreporter/system-symbols/mac/run.sh b/tools/crashreporter/system-symbols/mac/run.sh
new file mode 100755
index 0000000000..8dec95dffe
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/run.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+set -v -e -x
+
+base="$(realpath "$(dirname "$0")")"
+export PATH="$PATH:/builds/worker/bin:$base:${MOZ_FETCHES_DIR}/dmg"
+
+cd /builds/worker
+
+if test "$PROCESSED_PACKAGES_INDEX" && test "$PROCESSED_PACKAGES_PATH" && test "$TASKCLUSTER_ROOT_URL"; then
+ PROCESSED_PACKAGES="$TASKCLUSTER_ROOT_URL/api/index/v1/task/$PROCESSED_PACKAGES_INDEX/artifacts/$PROCESSED_PACKAGES_PATH"
+fi
+
+if test "$PROCESSED_PACKAGES"; then
+ rm -f processed-packages
+ if test `curl --output /dev/null --silent --head --location "$PROCESSED_PACKAGES" -w "%{http_code}"` = 200; then
+ curl -L "$PROCESSED_PACKAGES" | gzip -dc > processed-packages
+ elif test -f "$PROCESSED_PACKAGES"; then
+ gzip -dc "$PROCESSED_PACKAGES" > processed-packages
+ fi
+ if test -f processed-packages; then
+ # Prevent reposado from downloading packages that have previously been
+ # dumped.
+ for f in $(cat processed-packages); do
+ mkdir -p "$(dirname "$f")"
+ touch "$f"
+ done
+ fi
+fi
+
+mkdir -p /opt/data-reposado/html /opt/data-reposado/metadata artifacts
+
+# First, just fetch all the update info.
+python3 /usr/local/bin/repo_sync --no-download
+
+# Next, fetch just the update packages we're interested in.
+packages=$(python3 "${base}/list-packages.py")
+
+for package in ${packages}; do
+ # repo_sync is super-chatty, let's pipe stderr to separate files
+ python3 /usr/local/bin/repo_sync "--product-id=${package}" 2> "artifacts/repo_sync-product-id-${package}.stderr"
+ # Stop downloading packages if we have more than 10 GiB of them to process
+ download_size=$(du -B1073741824 -s /opt/data-reposado | cut -f1)
+ if [ ${download_size} -gt 10 ]; then
+ break
+ fi
+done
+
+du -sh /opt/data-reposado
+
+# Now scrape symbols out of anything that was downloaded.
+mkdir -p symbols tmp
+env TMP=tmp python3 "${base}/PackageSymbolDumper.py" --tracking-file=/builds/worker/processed-packages --dump_syms=$MOZ_FETCHES_DIR/dump_syms/dump_syms /opt/data-reposado/html/content/downloads /builds/worker/symbols
+
+# Hand out artifacts
+gzip -c processed-packages > artifacts/processed-packages.gz
+
+cd symbols
+zip -r9 /builds/worker/artifacts/target.crashreporter-symbols.zip ./* || echo "No symbols dumped"
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
new file mode 100644
index 0000000000..70be2a62a7
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+# Any copyright is dedicated to the Public Domain.
+# http://creativecommons.org/publicdomain/zero/1.0/
+
+import argparse
+import concurrent.futures
+import datetime
+import os
+import subprocess
+import sys
+import traceback
+import urllib
+import zipfile
+
+import requests
+
+if sys.platform == "darwin":
+ SYSTEM_DIRS = [
+ "/usr/lib",
+ "/System/Library/Frameworks",
+ "/System/Library/PrivateFrameworks",
+ "/System/Library/Extensions",
+ ]
+else:
+ SYSTEM_DIRS = ["/lib", "/usr/lib"]
+SYMBOL_SERVER_URL = (
+ "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/"
+)
+
+
+def should_process(f, platform=sys.platform):
+ """Determine if a file is a platform binary"""
+ if platform == "darwin":
+ """
+ The 'file' command can error out. One example is "illegal byte
+ sequence" on a Japanese language UTF8 text file. So we must wrap the
+ command in a try/except block to prevent the script from terminating
+ prematurely when this happens.
+ """
+ try:
+ filetype = subprocess.check_output(["file", "-Lb", f], text=True)
+ except subprocess.CalledProcessError:
+ return False
+ """Skip kernel extensions"""
+ if "kext bundle" in filetype:
+ return False
+ return filetype.startswith("Mach-O")
+ else:
+ return subprocess.check_output(["file", "-Lb", f], text=True).startswith("ELF")
+ return False
+
+
+def get_archs(filename, platform=sys.platform):
+ """
+ Find the list of architectures present in a Mach-O file, or a single-element
+ list on non-OS X.
+ """
+ architectures = []
+ output = subprocess.check_output(["file", "-Lb", filename], text=True)
+ for string in output.split(" "):
+ if string == "arm64e":
+ architectures.append("arm64e")
+ elif string == "x86_64_haswell":
+ architectures.append("x86_64h")
+ elif string == "x86_64":
+ architectures.append("x86_64")
+ elif string == "i386":
+ architectures.append("i386")
+
+ return architectures
+
+
+def server_has_file(filename):
+ """
+ Send the symbol server a HEAD request to see if it has this symbol file.
+ """
+ try:
+ r = requests.head(
+ urllib.parse.urljoin(SYMBOL_SERVER_URL, urllib.parse.quote(filename))
+ )
+ return r.status_code == 200
+ except requests.exceptions.RequestException:
+ return False
+
+
+def process_file(dump_syms, path, arch, verbose, write_all):
+ arch_arg = ["-a", arch]
+ try:
+ stderr = None if verbose else subprocess.DEVNULL
+ stdout = subprocess.check_output([dump_syms] + arch_arg + [path], stderr=stderr)
+ except subprocess.CalledProcessError:
+ if verbose:
+ print("Processing %s%s...failed." % (path, " [%s]" % arch if arch else ""))
+ return None, None
+ module = stdout.splitlines()[0]
+ bits = module.split(b" ", 4)
+ if len(bits) != 5:
+ return None, None
+ _, platform, cpu_arch, debug_id, debug_file = bits
+ if verbose:
+ sys.stdout.write("Processing %s [%s]..." % (path, arch))
+ filename = os.path.join(debug_file, debug_id, debug_file + b".sym")
+ # see if the server already has this symbol file
+ if not write_all:
+ if server_has_file(filename):
+ if verbose:
+ print("already on server.")
+ return None, None
+ # Collect for uploading
+ if verbose:
+ print("done.")
+ return filename, stdout
+
+
+def get_files(paths, platform=sys.platform):
+ """
+ For each entry passed in paths if the path is a file that can
+ be processed, yield it, otherwise if it is a directory yield files
+ under it that can be processed.
+ """
+ for path in paths:
+ if os.path.isdir(path):
+ for root, subdirs, files in os.walk(path):
+ for f in files:
+ fullpath = os.path.join(root, f)
+ if should_process(fullpath, platform=platform):
+ yield fullpath
+ elif should_process(path, platform=platform):
+ yield path
+
+
+def process_paths(
+ paths, executor, dump_syms, verbose, write_all=False, platform=sys.platform
+):
+ jobs = set()
+ for fullpath in get_files(paths, platform=platform):
+ while os.path.islink(fullpath):
+ fullpath = os.path.join(os.path.dirname(fullpath), os.readlink(fullpath))
+ if platform == "linux":
+ # See if there's a -dbg package installed and dump that instead.
+ dbgpath = "/usr/lib/debug" + fullpath
+ if os.path.isfile(dbgpath):
+ fullpath = dbgpath
+ for arch in get_archs(fullpath, platform=platform):
+ jobs.add(
+ executor.submit(
+ process_file, dump_syms, fullpath, arch, verbose, write_all
+ )
+ )
+ for job in concurrent.futures.as_completed(jobs):
+ try:
+ yield job.result()
+ except Exception as e:
+ traceback.print_exc(file=sys.stderr)
+ print("Error: %s" % str(e), file=sys.stderr)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-v", "--verbose", action="store_true", help="Produce verbose output"
+ )
+ parser.add_argument(
+ "--all",
+ action="store_true",
+ help="Gather all system symbols, not just missing ones.",
+ )
+ parser.add_argument("dump_syms", help="Path to dump_syms binary")
+ parser.add_argument(
+ "files", nargs="*", help="Specific files from which to gather symbols."
+ )
+ args = parser.parse_args()
+ args.dump_syms = os.path.abspath(args.dump_syms)
+ # check for the dump_syms binary
+ if (
+ not os.path.isabs(args.dump_syms)
+ or not os.path.exists(args.dump_syms)
+ or not os.access(args.dump_syms, os.X_OK)
+ ):
+ print(
+ "Error: can't find dump_syms binary at %s!" % args.dump_syms,
+ file=sys.stderr,
+ )
+ return 1
+ file_list = set()
+ executor = concurrent.futures.ProcessPoolExecutor()
+ zip_path = os.path.abspath("symbols.zip")
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+ for filename, contents in process_paths(
+ args.files if args.files else SYSTEM_DIRS,
+ executor,
+ args.dump_syms,
+ args.verbose,
+ args.all,
+ ):
+ if filename and contents and filename not in file_list:
+ file_list.add(filename)
+ zf.writestr(filename, contents)
+ zf.writestr(
+ "ossyms-1.0-{platform}-{date}-symbols.txt".format(
+ platform=sys.platform.title(),
+ date=datetime.datetime.now().strftime("%Y%m%d%H%M%S"),
+ ),
+ "\n".join(file_list),
+ )
+ if file_list:
+ if args.verbose:
+ print("Generated %s with %d symbols" % (zip_path, len(file_list)))
+ else:
+ os.unlink("symbols.zip")
+
+
+if __name__ == "__main__":
+ main()