7 files changed, 920 insertions, 0 deletions
diff --git a/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py
new file mode 100755
index 0000000000..00f5dd7499
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python
+
+# Copyright 2015 Michael R. Miller.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""
+PackageSymbolDumper.py
+
+Dumps Breakpad symbols for the contents of an Apple update installer.  Given a
+path to an Apple update installer as a .dmg or a path to a specific package
+within the disk image, PackageSymbolDumper mounts, traverses, and dumps symbols
+for all applicable frameworks and dylibs found within.
+
+Required tools for Linux:
+    pax
+    gzip
+    tar
+    xar (http://code.google.com/p/xar/)
+    xpwn's dmg (https://github.com/planetbeing/xpwn)
+
+Created on Apr 11, 2012
+
+@author: mrmiller
+"""
+from __future__ import absolute_import
+
+import argparse
+import concurrent.futures
+import errno
+import glob
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+
+from scrapesymbols.gathersymbols import process_paths
+
+
+def expand_pkg(pkg_path, out_path):
+    """
+    Expands the contents of an installer package to some directory.
+
+    @param pkg_path: a path to an installer package (.pkg)
+    @param out_path: a path to hold the package contents
+    """
+    subprocess.check_call(
+        'cd "{dest}" && xar -x -f "{src}"'.format(src=pkg_path, dest=out_path),
+        shell=True,
+    )
+
+
+def filter_files(function, path):
+    """
+    Yield file paths matching a filter function by walking the
+    hierarchy rooted at path.
+
+    @param function: a function taking in a filename that returns true to
+        include the path
+    @param path: the root path of the hierarchy to traverse
+    """
+    for root, _dirs, files in os.walk(path):
+        for filename in files:
+            if function(filename):
+                yield os.path.join(root, filename)
+
+
+def find_packages(path):
+    """
+    Returns a list of installer packages (as determined by the .pkg extension)
+    found within path.
+
+    @param path: root path to search for .pkg files
+    """
+    return filter_files(lambda filename: os.path.splitext(filename)[1] == ".pkg", path)
+
+
+def find_all_packages(paths):
+    """
+    Yield installer package files found in all of `paths`.
+
+    @param path: list of root paths to search for .pkg files
+    """
+    for path in paths:
+        logging.info("find_all_packages: {}".format(path))
+        for pkg in find_packages(path):
+            yield pkg
+
+
+def find_payloads(path):
+    """
+    Returns a list of possible installer package payload paths.
+
+    @param path: root path for an installer package
+    """
+    return filter_files(
+        lambda filename: "Payload" in filename or ".pax.gz" in filename, path
+    )
+
+
+def extract_payload(payload_path, output_path):
+    """
+    Extracts the contents of an installer package payload to a given directory.
+
+    @param payload_path: path to an installer package's payload
+    @param output_path: output path for the payload's contents
+    @return True for success, False for failure.
+    """
+    header = open(payload_path, "rb").read(2)
+    try:
+        if header == "BZ":
+            logging.info("Extracting bzip2 payload")
+            extract = "bzip2"
+            subprocess.check_call(
+                'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format(
+                    extract=extract, payload=payload_path, dest=output_path
+                ),
+                shell=True,
+            )
+            return True
+        elif header == "\x1f\x8b":
+            logging.info("Extracting gzip payload")
+            extract = "gzip"
+            subprocess.check_call(
+                'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format(
+                    extract=extract, payload=payload_path, dest=output_path
+                ),
+                shell=True,
+            )
+            return True
+        elif header == "pb":
+            logging.info("Extracting pbzx payload")
+            extract = "parse_pbzx.py"
+
+            payload_dir = os.path.dirname(payload_path)
+            # First, unpack the PBZX into cpio parts.
+            subprocess.check_call(["parse_pbzx.py", payload_path], cwd=payload_dir)
+            # Next, decompress any parts that are .xz, and feed them all into pax.
+            pax_proc = subprocess.Popen(
+                ["pax", "-r", "-k", "-s", ":^/::"],
+                stdin=subprocess.PIPE,
+                cwd=output_path,
+            )
+            for part in sorted(glob.glob(os.path.join(payload_dir, "Payload.part*"))):
+                if part.endswith(".xz"):
+                    logging.info("Extracting xz part {}".format(part))
+                    # This would be easier if we pulled in the lzma module...
+                    xz_proc = subprocess.Popen(
+                        ["xz", "-dc", part], stdout=subprocess.PIPE, cwd=payload_dir
+                    )
+                    shutil.copyfileobj(xz_proc.stdout, pax_proc.stdin)
+                    xz_proc.wait()
+                else:
+                    logging.info("Copying plain cpio part {}".format(part))
+                    with open(part, "rb") as f:
+                        shutil.copyfileobj(f, pax_proc.stdin)
+            pax_proc.stdin.close()
+            pax_proc.wait()
+            return True
+        else:
+            # Unsupported format
+            logging.error(
+                "Unknown payload format: 0x{0:x}{1:x}".format(
+                    ord(header[0]), ord(header[1])
+                )
+            )
+            return False
+
+    except subprocess.CalledProcessError:
+        return False
+
+
+def shutil_error_handler(caller, path, excinfo):
+    logging.error('Could not remove "{path}": {info}'.format(path=path, info=excinfo))
+
+
+def write_symbol_file(dest, filename, contents):
+    full_path = os.path.join(dest, filename)
+    try:
+        os.makedirs(os.path.dirname(full_path))
+        open(full_path, "wb").write(contents)
+    except os.error as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def dump_symbols_from_payload(executor, dump_syms, payload_path, dest):
+    """
+    Dumps all the symbols found inside the payload of an installer package.
+
+    @param dump_syms: path to the dump_syms executable
+    @param payload_path: path to an installer package's payload
+    @param dest: output path for symbols
+    """
+    temp_dir = None
+    logging.info("Dumping symbols from payload: " + payload_path)
+    try:
+        temp_dir = tempfile.mkdtemp()
+        logging.info("Extracting payload to {path}.".format(path=temp_dir))
+        if not extract_payload(payload_path, temp_dir):
+            logging.error("Could not extract payload: " + payload_path)
+            return
+
+        # dump the symbols for the payload contents
+        system_library = os.path.join("System", "Library")
+        subdirectories = [
+            os.path.join(system_library, "Frameworks"),
+            os.path.join(system_library, "PrivateFrameworks"),
+            os.path.join("usr", "lib"),
+        ]
+        paths_to_dump = map(lambda d: os.path.join(temp_dir, d), subdirectories)
+
+        for filename, contents in process_paths(
+            paths_to_dump, executor, dump_syms, False, platform="darwin"
+        ):
+            if filename and contents:
+                logging.info("Added symbol file " + filename)
+                write_symbol_file(dest, filename, contents)
+
+    finally:
+        if temp_dir is not None:
+            shutil.rmtree(temp_dir, onerror=shutil_error_handler)
+
+
+def dump_symbols_from_package(executor, dump_syms, pkg, dest):
+    """
+    Dumps all the symbols found inside an installer package.
+
+    @param dump_syms: path to the dump_syms executable
+    @param pkg: path to an installer package
+    @param dest: output path for symbols
+    """
+    temp_dir = None
+    logging.info("Dumping symbols from package: " + pkg)
+    try:
+        temp_dir = tempfile.mkdtemp()
+        expand_pkg(pkg, temp_dir)
+
+        # check for any subpackages
+        for subpackage in find_packages(temp_dir):
+            logging.warning("UNTESTED: Found subpackage at: " + subpackage)
+            dump_symbols_from_package(executor, dump_syms, subpackage, dest)
+
+        # dump symbols from any payloads (only expecting one) in the package
+        for payload in find_payloads(temp_dir):
+            dump_symbols_from_payload(executor, dump_syms, payload, dest)
+
+    except Exception as e:
+        logging.error("Exception while dumping symbols from package: {}".format(e))
+
+    finally:
+        if temp_dir is not None:
+            shutil.rmtree(temp_dir, onerror=shutil_error_handler)
+
+
+def read_processed_packages(tracking_file):
+    if tracking_file is None or not os.path.exists(tracking_file):
+        return set()
+    logging.info("Reading processed packages from {}".format(tracking_file))
+    return set(open(tracking_file, "r").read().splitlines())
+
+
+def write_processed_packages(tracking_file, processed_packages):
+    if tracking_file is None:
+        return
+    logging.info(
+        "Writing {} processed packages to {}".format(
+            len(processed_packages), tracking_file
+        )
+    )
+    open(tracking_file, "w").write("\n".join(processed_packages))
+
+
+def process_packages(package_finder, to, tracking_file, dump_syms):
+    processed_packages = read_processed_packages(tracking_file)
+    with concurrent.futures.ProcessPoolExecutor() as executor:
+        for pkg in package_finder():
+            if pkg in processed_packages:
+                logging.info("Skipping already-processed package: {}".format(pkg))
+            else:
+                dump_symbols_from_package(executor, dump_syms, pkg, to)
+                processed_packages.add(pkg)
+                write_processed_packages(tracking_file, processed_packages)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Extracts Breakpad symbols from a Mac OS X support update."
+    )
+    parser.add_argument(
+        "--dump_syms",
+        default="dump_syms",
+        type=str,
+        help="path to the Breakpad dump_syms executable",
+    )
+    parser.add_argument(
+        "--tracking-file",
+        type=str,
+        help="Path to a file in which to store information "
+        + "about already-processed packages",
+    )
+    parser.add_argument(
+        "search", nargs="+", help="Paths to search recursively for packages"
+    )
+    parser.add_argument("to", type=str, help="destination path for the symbols")
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=logging.DEBUG,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
+        urllib3_logger = logging.getLogger(p)
+        urllib3_logger.setLevel(logging.ERROR)
+
+    if not args.search or not all(os.path.exists(p) for p in args.search):
+        logging.error("Invalid search path")
+        return
+    if not os.path.exists(args.to):
+        logging.error("Invalid path to destination")
+        return
+
+    def finder():
+        return find_all_packages(args.search)
+
+    process_packages(finder, args.to, args.tracking_file, args.dump_syms)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/crashreporter/system-symbols/mac/get_update_packages.py b/tools/crashreporter/system-symbols/mac/get_update_packages.py
new file mode 100644
index 0000000000..7ddbad30b0
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/get_update_packages.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Ted Mielczarek <ted@mielczarek.org>
+# and Michael R. Miller <michaelrmmiller@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+
+import argparse
+import concurrent.futures
+import logging
+import os
+import re
+import requests
+import shutil
+import subprocess
+import tempfile
+import urlparse
+
+from PackageSymbolDumper import process_packages, find_packages
+
+OSX_RE = re.compile(r"10\.[0-9]+\.[0-9]+")
+
+
+def extract_dmg(dmg_path, dest):
+    logging.info("extract_dmg({}, {})".format(dmg_path, dest))
+    with tempfile.NamedTemporaryFile() as f:
+        subprocess.check_call(
+            ["dmg", "extract", dmg_path, f.name], stdout=open(os.devnull, "wb")
+        )
+        subprocess.check_call(["hfsplus", f.name, "extractall"], cwd=dest)
+
+
+def get_update_packages():
+    for i in range(16):
+        logging.info("get_update_packages: page " + str(i))
+        url = (
+            "https://km.support.apple.com/kb/index?page=downloads_browse&sort=recency"
+            "&facet=all&category=PF6&locale=en_US&offset=%d" % i
+        )
+        res = requests.get(url)
+        if res.status_code != 200:
+            break
+        data = res.json()
+        downloads = data.get("downloads", [])
+        if not downloads:
+            break
+        for d in downloads:
+            title = d.get("title", "")
+            if OSX_RE.search(title) and "Combo" not in title:
+                logging.info("Title: " + title)
+                if "fileurl" in d:
+                    yield d["fileurl"]
+                else:
+                    logging.warn("No fileurl in download!")
+
+
+def fetch_url_to_file(url, download_dir):
+    filename = os.path.basename(urlparse.urlsplit(url).path)
+    local_filename = os.path.join(download_dir, filename)
+    if os.path.isfile(local_filename):
+        logging.info("{} already exists, skipping".format(local_filename))
+        return None
+    r = requests.get(url, stream=True)
+    res_len = int(r.headers.get("content-length", "0"))
+    logging.info("Downloading {} -> {} ({} bytes)".format(url, local_filename, res_len))
+    with open(local_filename, "wb") as f:
+        for chunk in r.iter_content(chunk_size=1024):
+            if chunk:  # filter out keep-alive new chunks
+                f.write(chunk)
+    return local_filename
+
+
+def fetch_and_extract_dmg(url, tmpdir):
+    logging.info("fetch_and_extract_dmg: " + url)
+    filename = fetch_url_to_file(url, tmpdir)
+    if not filename:
+        return []
+    # Extract dmg contents to a subdir
+    subdir = tempfile.mkdtemp(dir=tmpdir)
+    extract_dmg(filename, subdir)
+    packages = list(find_packages(subdir))
+    logging.info(
+        "fetch_and_extract_dmg({}): found packages: {}".format(url, str(packages))
+    )
+    return packages
+
+
+def find_update_packages(tmpdir):
+    logging.info("find_update_packages")
+    # Only download 2 packages at a time.
+    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+        jobs = dict(
+            (executor.submit(fetch_and_extract_dmg, url, tmpdir), url)
+            for url in get_update_packages()
+        )
+        for future in concurrent.futures.as_completed(jobs):
+            url = jobs[future]
+            if future.exception() is not None:
+                logging.error(
+                    "exception downloading {}: {}".format(url, future.exception())
+                )
+            else:
+                for pkg in future.result():
+                    yield pkg
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Download OS X update packages and dump symbols from them"
+    )
+    parser.add_argument(
+        "--dump_syms",
+        default="dump_syms",
+        type=str,
+        help="path to the Breakpad dump_syms executable",
+    )
+    parser.add_argument("to", type=str, help="destination path for the symbols")
+    args = parser.parse_args()
+    logging.basicConfig(
+        level=logging.DEBUG,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
+        urllib3_logger = logging.getLogger(p)
+        urllib3_logger.setLevel(logging.ERROR)
+    try:
+        tmpdir = tempfile.mkdtemp(suffix=".osxupdates")
+
+        def finder():
+            return find_update_packages(tmpdir)
+
+        process_packages(finder, args.to, None, args.dump_syms)
+    finally:
+        shutil.rmtree(tmpdir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/crashreporter/system-symbols/mac/list-packages.py b/tools/crashreporter/system-symbols/mac/list-packages.py
new file mode 100755
index 0000000000..21e87877ef
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/list-packages.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+# Copyright 2015 Ted Mielczarek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import print_function, absolute_import
+
+import os
+import sys
+from reposadolib import reposadocommon
+
+reposadocommon.get_main_dir = lambda: "/usr/local/bin/"
+
+products = reposadocommon.get_product_info()
+args = []
+for product_id, p in products.items():
+    try:
+        t = p["title"]
+    except KeyError:
+        print("Missing title in {}, skipping".format(p), file=sys.stderr)
+        continue
+    # p['CatalogEntry']['Packages']
+    if t.startswith("OS X") or t.startswith("Mac OS X") or t.startswith("macOS"):
+        args.append("--product-id=" + product_id)
+    else:
+        print("Skipping %r for repo_sync" % t, file=sys.stderr)
+if "JUST_ONE_PACKAGE" in os.environ:
+    args = args[:1]
+
+print(" ".join(args))
diff --git a/tools/crashreporter/system-symbols/mac/parse_pbzx.py b/tools/crashreporter/system-symbols/mac/parse_pbzx.py
new file mode 100755
index 0000000000..242a58608e
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/parse_pbzx.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# This code is from https://gist.github.com/pudquick/ff412bcb29c9c1fa4b8d
+#
+# v2 pbzx stream handler
+# My personal writeup on the differences here:
+# https://gist.github.com/pudquick/29fcfe09c326a9b96cf5
+#
+# Pure python reimplementation of .cpio.xz content extraction from pbzx file
+# payload originally here:
+# http://www.tonymacx86.com/general-help/135458-pbzx-stream-parser.html
+#
+# Cleaned up C version (as the basis for my code) here, thanks to Pepijn Bruienne / @bruienne
+# https://gist.github.com/bruienne/029494bbcfb358098b41
+#
+# The python version of this code does not have an explicit license, but
+# is based on GPLv3 C code linked above.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import absolute_import
+
+import struct
+import sys
+
+
+def seekread(f, offset=None, length=0, relative=True):
+    if offset is not None:
+        # offset provided, let's seek
+        f.seek(offset, [0, 1, 2][relative])
+    if length != 0:
+        return f.read(length)
+
+
+def parse_pbzx(pbzx_path):
+    section = 0
+    xar_out_path = "%s.part%02d.cpio.xz" % (pbzx_path, section)
+    f = open(pbzx_path, "rb")
+    # pbzx = f.read()
+    # f.close()
+    magic = seekread(f, length=4)
+    if magic != "pbzx":
+        raise "Error: Not a pbzx file"
+    # Read 8 bytes for initial flags
+    flags = seekread(f, length=8)
+    # Interpret the flags as a 64-bit big-endian unsigned int
+    flags = struct.unpack(">Q", flags)[0]
+    xar_f = open(xar_out_path, "wb")
+    while flags & (1 << 24):
+        # Read in more flags
+        flags = seekread(f, length=8)
+        flags = struct.unpack(">Q", flags)[0]
+        # Read in length
+        f_length = seekread(f, length=8)
+        f_length = struct.unpack(">Q", f_length)[0]
+        xzmagic = seekread(f, length=6)
+        if xzmagic != "\xfd7zXZ\x00":
+            # This isn't xz content, this is actually _raw decompressed cpio_
+            # chunk of 16MB in size...
+            # Let's back up ...
+            seekread(f, offset=-6, length=0)
+            # ... and split it out ...
+            f_content = seekread(f, length=f_length)
+            section += 1
+            decomp_out = "%s.part%02d.cpio" % (pbzx_path, section)
+            g = open(decomp_out, "wb")
+            g.write(f_content)
+            g.close()
+            # Now to start the next section, which should hopefully be .xz
+            # (we'll just assume it is ...)
+            xar_f.close()
+            section += 1
+            new_out = "%s.part%02d.cpio.xz" % (pbzx_path, section)
+            xar_f = open(new_out, "wb")
+        else:
+            f_length -= 6
+            # This part needs buffering
+            f_content = seekread(f, length=f_length)
+            tail = seekread(f, offset=-2, length=2)
+            xar_f.write(xzmagic)
+            xar_f.write(f_content)
+            if tail != "YZ":
+                xar_f.close()
+                raise "Error: Footer is not xar file footer"
+    try:
+        f.close()
+        xar_f.close()
+    except BaseException:
+        pass
+
+
+def main():
+    parse_pbzx(sys.argv[1])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/crashreporter/system-symbols/mac/run.sh b/tools/crashreporter/system-symbols/mac/run.sh
new file mode 100755
index 0000000000..8f9ff76d03
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/run.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+
+set -v -e -x
+
+base="$(realpath "$(dirname "$0")")"
+export PATH="$PATH:/builds/worker/bin:$base"
+
+cd /builds/worker
+
+if test "$PROCESSED_PACKAGES_INDEX" && test "$PROCESSED_PACKAGES_PATH" && test "$TASKCLUSTER_ROOT_URL"; then
+  PROCESSED_PACKAGES="$TASKCLUSTER_ROOT_URL/api/index/v1/task/$PROCESSED_PACKAGES_INDEX/artifacts/$PROCESSED_PACKAGES_PATH"
+fi
+
+if test "$PROCESSED_PACKAGES"; then
+  rm -f processed-packages
+  if test `curl --output /dev/null --silent --head --location "$PROCESSED_PACKAGES" -w "%{http_code}"` = 200; then
+    curl -L "$PROCESSED_PACKAGES" | gzip -dc > processed-packages
+  elif test -f "$PROCESSED_PACKAGES"; then
+    gzip -dc "$PROCESSED_PACKAGES" > processed-packages
+  fi
+  if test -f processed-packages; then
+    # Prevent reposado from downloading packages that have previously been
+    # dumped.
+    for f in $(cat processed-packages); do
+      mkdir -p "$(dirname "$f")"
+      touch "$f"
+    done
+  fi
+fi
+
+mkdir -p /opt/data-reposado/html /opt/data-reposado/metadata
+
+# First, just fetch all the update info.
+python3 /usr/local/bin/repo_sync --no-download
+
+# Next, fetch just the update packages we're interested in.
+packages=$(python3 "${base}/list-packages.py")
+# shellcheck disable=SC2086
+python3 /usr/local/bin/repo_sync $packages
+
+du -sh /opt/data-reposado
+
+# Now scrape symbols out of anything that was downloaded.
+mkdir -p symbols artifacts
+python3 "${base}/PackageSymbolDumper.py" --tracking-file=/builds/worker/processed-packages --dump_syms=/builds/worker/bin/dump_syms_mac /opt/data-reposado/html/content/downloads /builds/worker/symbols
+
+# Hand out artifacts
+gzip -c processed-packages > artifacts/processed-packages.gz
+
+cd symbols
+zip -r9 /builds/worker/artifacts/target.crashreporter-symbols.zip ./* || echo "No symbols dumped"
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
new file mode 100644
index 0000000000..52fecaeb9f
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+# Any copyright is dedicated to the Public Domain.
+# http://creativecommons.org/publicdomain/zero/1.0/
+
+from __future__ import print_function, absolute_import
+
+import argparse
+import concurrent.futures
+import datetime
+import os
+import requests
+import subprocess
+import sys
+import urllib
+import zipfile
+
+
+if sys.platform == "darwin":
+    SYSTEM_DIRS = [
+        "/usr/lib",
+        "/System/Library/Frameworks",
+        "/System/Library/PrivateFrameworks",
+        "/System/Library/Extensions",
+    ]
+else:
+    SYSTEM_DIRS = ["/lib", "/usr/lib"]
+SYMBOL_SERVER_URL = (
+    "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/"
+)
+
+
+def should_process(f, platform=sys.platform):
+    """Determine if a file is a platform binary"""
+    if platform == "darwin":
+        """
+        The 'file' command can error out. One example is "illegal byte
+        sequence" on a Japanese language UTF8 text file. So we must wrap the
+        command in a try/except block to prevent the script from terminating
+        prematurely when this happens.
+        """
+        try:
+            filetype = subprocess.check_output(["file", "-Lb", f])
+        except subprocess.CalledProcessError:
+            return False
+        """Skip kernel extensions"""
+        if "kext bundle" in filetype:
+            return False
+        return filetype.startswith("Mach-O")
+    else:
+        return subprocess.check_output(["file", "-Lb", f]).startswith("ELF")
+    return False
+
+
+def get_archs(filename, platform=sys.platform):
+    """
+    Find the list of architectures present in a Mach-O file, or a single-element
+    list on non-OS X.
+    """
+    if platform == "darwin":
+        return (
+            subprocess.check_output(["lipo", "-info", filename])
+            .split(":")[2]
+            .strip()
+            .split()
+        )
+    return [None]
+
+
+def server_has_file(filename):
+    """
+    Send the symbol server a HEAD request to see if it has this symbol file.
+    """
+    try:
+        r = requests.head(
+            urllib.parse.urljoin(SYMBOL_SERVER_URL, urllib.parse.quote(filename))
+        )
+        return r.status_code == 200
+    except requests.exceptions.RequestException:
+        return False
+
+
+def process_file(dump_syms, path, arch, verbose, write_all):
+    if sys.platform == "darwin":
+        arch_arg = ["-a", arch]
+    else:
+        arch_arg = []
+    try:
+        stderr = None if verbose else open(os.devnull, "wb")
+        stdout = subprocess.check_output([dump_syms] + arch_arg + [path], stderr=stderr)
+    except subprocess.CalledProcessError:
+        if verbose:
+            print("Processing %s%s...failed." % (path, " [%s]" % arch if arch else ""))
+        return None, None
+    module = stdout.splitlines()[0]
+    bits = module.split(" ", 4)
+    if len(bits) != 5:
+        return None, None
+    _, platform, cpu_arch, debug_id, debug_file = bits
+    if verbose:
+        sys.stdout.write("Processing %s [%s]..." % (path, arch))
+    filename = os.path.join(debug_file, debug_id, debug_file + ".sym")
+    # see if the server already has this symbol file
+    if not write_all:
+        if server_has_file(filename):
+            if verbose:
+                print("already on server.")
+            return None, None
+    # Collect for uploading
+    if verbose:
+        print("done.")
+    return filename, stdout
+
+
+def get_files(paths, platform=sys.platform):
+    """
+    For each entry passed in paths if the path is a file that can
+    be processed, yield it, otherwise if it is a directory yield files
+    under it that can be processed.
+    """
+    for path in paths:
+        if os.path.isdir(path):
+            for root, subdirs, files in os.walk(path):
+                for f in files:
+                    fullpath = os.path.join(root, f)
+                    if should_process(fullpath, platform=platform):
+                        yield fullpath
+        elif should_process(path, platform=platform):
+            yield path
+
+
+def process_paths(
+    paths, executor, dump_syms, verbose, write_all=False, platform=sys.platform
+):
+    jobs = set()
+    for fullpath in get_files(paths, platform=platform):
+        while os.path.islink(fullpath):
+            fullpath = os.path.join(os.path.dirname(fullpath), os.readlink(fullpath))
+        if platform == "linux":
+            # See if there's a -dbg package installed and dump that instead.
+            dbgpath = "/usr/lib/debug" + fullpath
+            if os.path.isfile(dbgpath):
+                fullpath = dbgpath
+        for arch in get_archs(fullpath, platform=platform):
+            jobs.add(
+                executor.submit(
+                    process_file, dump_syms, fullpath, arch, verbose, write_all
+                )
+            )
+    for job in concurrent.futures.as_completed(jobs):
+        try:
+            yield job.result()
+        except Exception as e:
+            print("Error: %s" % str(e), file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Produce verbose output"
+    )
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Gather all system symbols, not just missing ones.",
+    )
+    parser.add_argument("dump_syms", help="Path to dump_syms binary")
+    parser.add_argument(
+        "files", nargs="*", help="Specific files from which to gather symbols."
+    )
+    args = parser.parse_args()
+    args.dump_syms = os.path.abspath(args.dump_syms)
+    # check for the dump_syms binary
+    if (
+        not os.path.isabs(args.dump_syms)
+        or not os.path.exists(args.dump_syms)
+        or not os.access(args.dump_syms, os.X_OK)
+    ):
+        print(
+            "Error: can't find dump_syms binary at %s!" % args.dump_syms,
+            file=sys.stderr,
+        )
+        return 1
+    file_list = set()
+    executor = concurrent.futures.ProcessPoolExecutor()
+    zip_path = os.path.abspath("symbols.zip")
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+        for filename, contents in process_paths(
+            args.files if args.files else SYSTEM_DIRS,
+            executor,
+            args.dump_syms,
+            args.verbose,
+            args.all,
+        ):
+            if filename and contents and filename not in file_list:
+                file_list.add(filename)
+                zf.writestr(filename, contents)
+        zf.writestr(
+            "ossyms-1.0-{platform}-{date}-symbols.txt".format(
+                platform=sys.platform.title(),
+                date=datetime.datetime.now().strftime("%Y%m%d%H%M%S"),
+            ),
+            "\n".join(file_list),
+        )
+    if file_list:
+        if args.verbose:
+            print("Generated %s with %d symbols" % (zip_path, len(file_list)))
+    else:
+        os.unlink("symbols.zip")
+
+
+if __name__ == "__main__":
+    main()