Adding upstream version 110.0.1.upstream/110.0.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:22:09 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 09:22:09 +0000
commit: 43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree: 620249daf56c0258faa40cbdcf9cfba06de2a846 /tools/crashreporter/system-symbols
parent: Initial commit. (diff)
download: firefox-upstream.tar.xz
firefox-upstream.zip
13 files changed, 1910 insertions, 0 deletions
diff --git a/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py
new file mode 100755
index 0000000000..5e63a2f39f
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py
@@ -0,0 +1,392 @@
+#!/usr/bin/env python
+
+# Copyright 2015 Michael R. Miller.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""
+PackageSymbolDumper.py
+
+Dumps Breakpad symbols for the contents of an Apple update installer.  Given a
+path to an Apple update installer as a .dmg or a path to a specific package
+within the disk image, PackageSymbolDumper mounts, traverses, and dumps symbols
+for all applicable frameworks and dylibs found within.
+
+Required tools for Linux:
+    pax
+    gzip
+    tar
+    xpwn's dmg (https://github.com/planetbeing/xpwn)
+
+Created on Apr 11, 2012
+
+@author: mrmiller
+"""
+import argparse
+import concurrent.futures
+import errno
+import logging
+import os
+import shutil
+import stat
+import subprocess
+import tempfile
+import traceback
+
+from macpkg import Pbzx, uncpio, unxar
+from scrapesymbols.gathersymbols import process_paths
+
+
+def expand_pkg(pkg_path, out_path):
+    """
+    Expands the contents of an installer package to some directory.
+
+    @param pkg_path: a path to an installer package (.pkg)
+    @param out_path: a path to hold the package contents
+    """
+    for name, content in unxar(open(pkg_path, "rb")):
+        with open(os.path.join(out_path, name), "wb") as fh:
+            shutil.copyfileobj(content, fh)
+
+
+def expand_dmg(dmg_path, out_path):
+    """
+    Expands the contents of a DMG file to some directory.
+
+    @param dmg_path: a path to a disk image file (.dmg)
+    @param out_path: a path to hold the image contents
+    """
+
+    with tempfile.NamedTemporaryFile() as f:
+        subprocess.check_call(
+            ["dmg", "extract", dmg_path, f.name], stdout=subprocess.DEVNULL
+        )
+        subprocess.check_call(
+            ["hfsplus", f.name, "extractall"], stdout=subprocess.DEVNULL, cwd=out_path
+        )
+
+
+def expand_zip(zip_path, out_path):
+    """
+    Expands the contents of a ZIP archive to some directory.
+
+    @param dmg_path: a path to a ZIP archive (.zip)
+    @param out_path: a path to hold the archive contents
+    """
+    subprocess.check_call(
+        ["unzip", "-d", out_path, zip_path], stdout=open(subprocess.DEVNULL, "wb")
+    )
+
+
+def filter_files(function, path):
+    """
+    Yield file paths matching a filter function by walking the
+    hierarchy rooted at path.
+
+    @param function: a function taking in a filename that returns true to
+        include the path
+    @param path: the root path of the hierarchy to traverse
+    """
+    for root, _dirs, files in os.walk(path):
+        for filename in files:
+            if function(filename):
+                yield os.path.join(root, filename)
+
+
+def find_packages(path):
+    """
+    Returns a list of installer packages (as determined by the .pkg extension),
+    disk images (as determined by the .dmg extension) or ZIP archives found
+    within path.
+
+    @param path: root path to search for .pkg, .dmg and .zip files
+    """
+    return filter_files(
+        lambda filename: os.path.splitext(filename)[1] in (".pkg", ".dmg", ".zip")
+        and not filename.startswith("._"),
+        path,
+    )
+
+
+def find_all_packages(paths):
+    """
+    Yield installer package files, disk images and ZIP archives found in all
+    of `paths`.
+
+    @param path: list of root paths to search for .pkg & .dmg files
+    """
+    for path in paths:
+        logging.info("find_all_packages: {}".format(path))
+        for pkg in find_packages(path):
+            yield pkg
+
+
+def find_payloads(path):
+    """
+    Returns a list of possible installer package payload paths.
+
+    @param path: root path for an installer package
+    """
+    return filter_files(
+        lambda filename: "Payload" in filename or ".pax.gz" in filename, path
+    )
+
+
+def extract_payload(payload_path, output_path):
+    """
+    Extracts the contents of an installer package payload to a given directory.
+
+    @param payload_path: path to an installer package's payload
+    @param output_path: output path for the payload's contents
+    @return True for success, False for failure.
+    """
+    header = open(payload_path, "rb").read(2)
+    try:
+        if header == b"BZ":
+            logging.info("Extracting bzip2 payload")
+            extract = "bzip2"
+            subprocess.check_call(
+                'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format(
+                    extract=extract, payload=payload_path, dest=output_path
+                ),
+                shell=True,
+            )
+            return True
+        elif header == b"\x1f\x8b":
+            logging.info("Extracting gzip payload")
+            extract = "gzip"
+            subprocess.check_call(
+                'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format(
+                    extract=extract, payload=payload_path, dest=output_path
+                ),
+                shell=True,
+            )
+            return True
+        elif header == b"pb":
+            logging.info("Extracting pbzx payload")
+
+            for path, mode, content in uncpio(Pbzx(open(payload_path, "rb"))):
+                if not path or not stat.S_ISREG(mode):
+                    continue
+                out = os.path.join(output_path, path.decode())
+                os.makedirs(os.path.dirname(out), exist_ok=True)
+                with open(out, "wb") as fh:
+                    shutil.copyfileobj(content, fh)
+
+            return True
+        else:
+            # Unsupported format
+            logging.error(
+                "Unknown payload format: 0x{0:x}{1:x}".format(header[0], header[1])
+            )
+            return False
+
+    except Exception:
+        return False
+
+
+def shutil_error_handler(caller, path, excinfo):
+    logging.error('Could not remove "{path}": {info}'.format(path=path, info=excinfo))
+
+
+def write_symbol_file(dest, filename, contents):
+    full_path = os.path.join(dest, filename)
+    try:
+        os.makedirs(os.path.dirname(full_path))
+        with open(full_path, "wb") as sym_file:
+            sym_file.write(contents)
+    except os.error as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def dump_symbols(executor, dump_syms, path, dest):
+    system_library = os.path.join("System", "Library")
+    subdirectories = [
+        os.path.join(system_library, "Frameworks"),
+        os.path.join(system_library, "PrivateFrameworks"),
+        os.path.join(system_library, "Extensions"),
+        os.path.join("usr", "lib"),
+    ]
+
+    paths_to_dump = [os.path.join(path, d) for d in subdirectories]
+    existing_paths = [path for path in paths_to_dump if os.path.exists(path)]
+
+    for filename, contents in process_paths(
+        paths=existing_paths,
+        executor=executor,
+        dump_syms=dump_syms,
+        verbose=True,
+        write_all=True,
+        platform="darwin",
+    ):
+        if filename and contents:
+            logging.info("Added symbol file " + str(filename, "utf-8"))
+            write_symbol_file(dest, str(filename, "utf-8"), contents)
+
+
+def dump_symbols_from_payload(executor, dump_syms, payload_path, dest):
+    """
+    Dumps all the symbols found inside the payload of an installer package.
+
+    @param dump_syms: path to the dump_syms executable
+    @param payload_path: path to an installer package's payload
+    @param dest: output path for symbols
+    """
+    temp_dir = None
+    logging.info("Dumping symbols from payload: " + payload_path)
+    try:
+        temp_dir = tempfile.mkdtemp()
+        logging.info("Extracting payload to {path}.".format(path=temp_dir))
+        if not extract_payload(payload_path, temp_dir):
+            logging.error("Could not extract payload: " + payload_path)
+            return False
+
+        dump_symbols(executor, dump_syms, temp_dir, dest)
+
+    finally:
+        if temp_dir is not None:
+            shutil.rmtree(temp_dir, onerror=shutil_error_handler)
+
+    return True
+
+
+def dump_symbols_from_package(executor, dump_syms, pkg, dest):
+    """
+    Dumps all the symbols found inside an installer package.
+
+    @param dump_syms: path to the dump_syms executable
+    @param pkg: path to an installer package
+    @param dest: output path for symbols
+    """
+    successful = True
+    temp_dir = None
+    logging.info("Dumping symbols from package: " + pkg)
+    try:
+        temp_dir = tempfile.mkdtemp()
+        if os.path.splitext(pkg)[1] == ".pkg":
+            expand_pkg(pkg, temp_dir)
+        elif os.path.splitext(pkg)[1] == ".zip":
+            expand_zip(pkg, temp_dir)
+        else:
+            expand_dmg(pkg, temp_dir)
+
+        # check for any subpackages
+        for subpackage in find_packages(temp_dir):
+            logging.info("Found subpackage at: " + subpackage)
+            res = dump_symbols_from_package(executor, dump_syms, subpackage, dest)
+            if not res:
+                logging.error("Error while dumping subpackage: " + subpackage)
+
+        # dump symbols from any payloads (only expecting one) in the package
+        for payload in find_payloads(temp_dir):
+            res = dump_symbols_from_payload(executor, dump_syms, payload, dest)
+            if not res:
+                successful = False
+
+        # dump symbols directly extracted from the package
+        dump_symbols(executor, dump_syms, temp_dir, dest)
+
+    except Exception as e:
+        traceback.print_exc()
+        logging.error("Exception while dumping symbols from package: {}".format(e))
+        successful = False
+
+    finally:
+        if temp_dir is not None:
+            shutil.rmtree(temp_dir, onerror=shutil_error_handler)
+
+    return successful
+
+
+def read_processed_packages(tracking_file):
+    if tracking_file is None or not os.path.exists(tracking_file):
+        return set()
+    logging.info("Reading processed packages from {}".format(tracking_file))
+    return set(open(tracking_file, "r").read().splitlines())
+
+
+def write_processed_packages(tracking_file, processed_packages):
+    if tracking_file is None:
+        return
+    logging.info(
+        "Writing {} processed packages to {}".format(
+            len(processed_packages), tracking_file
+        )
+    )
+    open(tracking_file, "w").write("\n".join(processed_packages))
+
+
+def process_packages(package_finder, to, tracking_file, dump_syms):
+    processed_packages = read_processed_packages(tracking_file)
+    with concurrent.futures.ProcessPoolExecutor() as executor:
+        for pkg in package_finder():
+            if pkg in processed_packages:
+                logging.info("Skipping already-processed package: {}".format(pkg))
+            else:
+                dump_symbols_from_package(executor, dump_syms, pkg, to)
+                processed_packages.add(pkg)
+                write_processed_packages(tracking_file, processed_packages)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Extracts Breakpad symbols from a Mac OS X support update."
+    )
+    parser.add_argument(
+        "--dump_syms",
+        default="dump_syms",
+        type=str,
+        help="path to the Breakpad dump_syms executable",
+    )
+    parser.add_argument(
+        "--tracking-file",
+        type=str,
+        help="Path to a file in which to store information "
+        + "about already-processed packages",
+    )
+    parser.add_argument(
+        "search", nargs="+", help="Paths to search recursively for packages"
+    )
+    parser.add_argument("to", type=str, help="destination path for the symbols")
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=logging.DEBUG,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
+        urllib3_logger = logging.getLogger(p)
+        urllib3_logger.setLevel(logging.ERROR)
+
+    if not args.search or not all(os.path.exists(p) for p in args.search):
+        logging.error("Invalid search path")
+        return
+    if not os.path.exists(args.to):
+        logging.error("Invalid path to destination")
+        return
+
+    def finder():
+        return find_all_packages(args.search)
+
+    process_packages(finder, args.to, args.tracking_file, args.dump_syms)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/crashreporter/system-symbols/mac/get_update_packages.py b/tools/crashreporter/system-symbols/mac/get_update_packages.py
new file mode 100644
index 0000000000..3192fa3ef0
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/get_update_packages.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Ted Mielczarek <ted@mielczarek.org>
+# and Michael R. Miller <michaelrmmiller@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import argparse
+import concurrent.futures
+import logging
+import os
+import re
+import shutil
+import subprocess
+import tempfile
+
+import requests
+import urlparse
+from PackageSymbolDumper import find_packages, process_packages
+
+OSX_RE = re.compile(r"10\.[0-9]+\.[0-9]+")
+
+
+def extract_dmg(dmg_path, dest):
+    logging.info("extract_dmg({}, {})".format(dmg_path, dest))
+    with tempfile.NamedTemporaryFile() as f:
+        subprocess.check_call(
+            ["dmg", "extract", dmg_path, f.name], stdout=subprocess.DEVNULL
+        )
+        subprocess.check_call(["hfsplus", f.name, "extractall"], cwd=dest)
+
+
+def get_update_packages():
+    for i in range(16):
+        logging.info("get_update_packages: page " + str(i))
+        url = (
+            "https://km.support.apple.com/kb/index?page=downloads_browse&sort=recency"
+            "&facet=all&category=PF6&locale=en_US&offset=%d" % i
+        )
+        res = requests.get(url)
+        if res.status_code != 200:
+            break
+        data = res.json()
+        downloads = data.get("downloads", [])
+        if not downloads:
+            break
+        for d in downloads:
+            title = d.get("title", "")
+            if OSX_RE.search(title) and "Combo" not in title:
+                logging.info("Title: " + title)
+                if "fileurl" in d:
+                    yield d["fileurl"]
+                else:
+                    logging.warn("No fileurl in download!")
+
+
+def fetch_url_to_file(url, download_dir):
+    filename = os.path.basename(urlparse.urlsplit(url).path)
+    local_filename = os.path.join(download_dir, filename)
+    if os.path.isfile(local_filename):
+        logging.info("{} already exists, skipping".format(local_filename))
+        return None
+    r = requests.get(url, stream=True)
+    res_len = int(r.headers.get("content-length", "0"))
+    logging.info("Downloading {} -> {} ({} bytes)".format(url, local_filename, res_len))
+    with open(local_filename, "wb") as f:
+        for chunk in r.iter_content(chunk_size=1024):
+            if chunk:  # filter out keep-alive new chunks
+                f.write(chunk)
+    return local_filename
+
+
+def fetch_and_extract_dmg(url, tmpdir):
+    logging.info("fetch_and_extract_dmg: " + url)
+    filename = fetch_url_to_file(url, tmpdir)
+    if not filename:
+        return []
+    # Extract dmg contents to a subdir
+    subdir = tempfile.mkdtemp(dir=tmpdir)
+    extract_dmg(filename, subdir)
+    packages = list(find_packages(subdir))
+    logging.info(
+        "fetch_and_extract_dmg({}): found packages: {}".format(url, str(packages))
+    )
+    return packages
+
+
+def find_update_packages(tmpdir):
+    logging.info("find_update_packages")
+    # Only download 2 packages at a time.
+    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+        jobs = dict(
+            (executor.submit(fetch_and_extract_dmg, url, tmpdir), url)
+            for url in get_update_packages()
+        )
+        for future in concurrent.futures.as_completed(jobs):
+            url = jobs[future]
+            if future.exception() is not None:
+                logging.error(
+                    "exception downloading {}: {}".format(url, future.exception())
+                )
+            else:
+                for pkg in future.result():
+                    yield pkg
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Download OS X update packages and dump symbols from them"
+    )
+    parser.add_argument(
+        "--dump_syms",
+        default="dump_syms",
+        type=str,
+        help="path to the Breakpad dump_syms executable",
+    )
+    parser.add_argument("to", type=str, help="destination path for the symbols")
+    args = parser.parse_args()
+    logging.basicConfig(
+        level=logging.DEBUG,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
+        urllib3_logger = logging.getLogger(p)
+        urllib3_logger.setLevel(logging.ERROR)
+    try:
+        tmpdir = tempfile.mkdtemp(suffix=".osxupdates")
+
+        def finder():
+            return find_update_packages(tmpdir)
+
+        process_packages(finder, args.to, None, args.dump_syms)
+    finally:
+        shutil.rmtree(tmpdir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/crashreporter/system-symbols/mac/list-packages.py b/tools/crashreporter/system-symbols/mac/list-packages.py
new file mode 100755
index 0000000000..444c27be9d
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/list-packages.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+# Copyright 2015 Ted Mielczarek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import os
+import sys
+
+from reposadolib import reposadocommon
+
+reposadocommon.get_main_dir = lambda: "/usr/local/bin/"
+
+products = reposadocommon.get_product_info()
+args = []
+for product_id, product in products.items():
+    try:
+        title = product["title"]
+    except KeyError:
+        print("Missing title in {}, skipping".format(product), file=sys.stderr)
+        continue
+
+    try:
+        major_version = int(product["version"].split(".")[0])
+    except Exception:
+        print(
+            "Cannot extract the major version number in {}, skipping".format(product),
+            file=sys.stderr,
+        )
+        continue
+
+    if (
+        title.startswith("OS X")
+        or title.startswith("Mac OS X")
+        or title.startswith("macOS")
+    ) and major_version <= 10:
+        args.append(product_id)
+    else:
+        print("Skipping %r for repo_sync" % title, file=sys.stderr)
+if "JUST_ONE_PACKAGE" in os.environ:
+    args = args[:1]
+
+print("\n".join(args))
diff --git a/tools/crashreporter/system-symbols/mac/macpkg.py b/tools/crashreporter/system-symbols/mac/macpkg.py
new file mode 100644
index 0000000000..d606196c1c
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/macpkg.py
@@ -0,0 +1,199 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import bz2
+import io
+import lzma
+import os
+import struct
+import zlib
+from xml.etree.ElementTree import XML
+
+
+class ZlibFile(object):
+    def __init__(self, fileobj):
+        self.fileobj = fileobj
+        self.decompressor = zlib.decompressobj()
+        self.buf = b""
+
+    def read(self, length):
+        cutoff = min(length, len(self.buf))
+        result = self.buf[:cutoff]
+        self.buf = self.buf[cutoff:]
+        while len(result) < length:
+            buf = self.fileobj.read(io.DEFAULT_BUFFER_SIZE)
+            if not buf:
+                break
+            buf = self.decompressor.decompress(buf)
+            cutoff = min(length - len(result), len(buf))
+            result += buf[:cutoff]
+            self.buf += buf[cutoff:]
+        return result
+
+
+def unxar(fileobj):
+    magic = fileobj.read(4)
+    if magic != b"xar!":
+        raise Exception("Not a XAR?")
+
+    header_size = fileobj.read(2)
+    header_size = struct.unpack(">H", header_size)[0]
+    if header_size > 64:
+        raise Exception(
+            f"Don't know how to handle a {header_size} bytes XAR header size"
+        )
+    header_size -= 6  # what we've read so far.
+    header = fileobj.read(header_size)
+    if len(header) != header_size:
+        raise Exception("Failed to read XAR header")
+    (
+        version,
+        compressed_toc_len,
+        uncompressed_toc_len,
+        checksum_type,
+    ) = struct.unpack(">HQQL", header[:22])
+    if version != 1:
+        raise Exception(f"XAR version {version} not supported")
+    toc = fileobj.read(compressed_toc_len)
+    base = fileobj.tell()
+    if len(toc) != compressed_toc_len:
+        raise Exception("Failed to read XAR TOC")
+    toc = zlib.decompress(toc)
+    if len(toc) != uncompressed_toc_len:
+        raise Exception("Corrupted XAR?")
+    toc = XML(toc).find("toc")
+    for f in toc.findall("file"):
+        if f.find("type").text != "file":
+            continue
+        filename = f.find("name").text
+        data = f.find("data")
+        length = int(data.find("length").text)
+        size = int(data.find("size").text)
+        offset = int(data.find("offset").text)
+        encoding = data.find("encoding").get("style")
+        fileobj.seek(base + offset, os.SEEK_SET)
+        content = Take(fileobj, length)
+        if encoding == "application/octet-stream":
+            if length != size:
+                raise Exception(f"{length} != {size}")
+        elif encoding == "application/x-bzip2":
+            content = bz2.BZ2File(content)
+        elif encoding == "application/x-gzip":
+            # Despite the encoding saying gzip, it is in fact, a raw zlib stream.
+            content = ZlibFile(content)
+        else:
+            raise Exception(f"XAR encoding {encoding} not supported")
+
+        yield filename, content
+
+
+class Pbzx(object):
+    def __init__(self, fileobj):
+        magic = fileobj.read(4)
+        if magic != b"pbzx":
+            raise Exception("Not a PBZX payload?")
+        # The first thing in the file looks like the size of each
+        # decompressed chunk except the last one. It should match
+        # decompressed_size in all cases except last, but we don't
+        # check.
+        chunk_size = fileobj.read(8)
+        chunk_size = struct.unpack(">Q", chunk_size)[0]
+        self.fileobj = fileobj
+        self._init_one_chunk()
+
+    def _init_one_chunk(self):
+        self.offset = 0
+        header = self.fileobj.read(16)
+        if header == b"":
+            self.chunk = ""
+            return
+        if len(header) != 16:
+            raise Exception("Corrupted PBZX payload?")
+        decompressed_size, compressed_size = struct.unpack(">QQ", header)
+        chunk = self.fileobj.read(compressed_size)
+        if compressed_size == decompressed_size:
+            self.chunk = chunk
+        else:
+            self.chunk = lzma.decompress(chunk)
+            if len(self.chunk) != decompressed_size:
+                raise Exception("Corrupted PBZX payload?")
+
+    def read(self, length=None):
+        if length == 0:
+            return b""
+        if length and len(self.chunk) >= self.offset + length:
+            start = self.offset
+            self.offset += length
+            return self.chunk[start : self.offset]
+        else:
+            result = self.chunk[self.offset :]
+            self._init_one_chunk()
+            if self.chunk:
+                # XXX: suboptimal if length is larger than the chunk size
+                result += self.read(None if length is None else length - len(result))
+            return result
+
+
+class Take(object):
+    """
+    File object wrapper that allows to read at most a certain length.
+    """
+
+    def __init__(self, fileobj, limit):
+        self.fileobj = fileobj
+        self.limit = limit
+
+    def read(self, length=None):
+        if length is None:
+            length = self.limit
+        else:
+            length = min(length, self.limit)
+        result = self.fileobj.read(length)
+        self.limit -= len(result)
+        return result
+
+
+def uncpio(fileobj):
+    while True:
+        magic = fileobj.read(6)
+        # CPIO payloads in mac pkg files are using the portable ASCII format.
+        if magic != b"070707":
+            if magic.startswith(b"0707"):
+                raise Exception("Unsupported CPIO format")
+            raise Exception("Not a CPIO header")
+        header = fileobj.read(70)
+        (
+            dev,
+            ino,
+            mode,
+            uid,
+            gid,
+            nlink,
+            rdev,
+            mtime,
+            namesize,
+            filesize,
+        ) = struct.unpack(">6s6s6s6s6s6s6s11s6s11s", header)
+        mode = int(mode, 8)
+        nlink = int(nlink, 8)
+        namesize = int(namesize, 8)
+        filesize = int(filesize, 8)
+        name = fileobj.read(namesize)
+        if name[-1] != 0:
+            raise Exception("File name is not NUL terminated")
+        name = name[:-1]
+        if name == b"TRAILER!!!":
+            break
+
+        if b"/../" in name or name.startswith(b"../") or name == b"..":
+            raise Exception(".. is forbidden in file name")
+        if name.startswith(b"."):
+            name = name[1:]
+        if name.startswith(b"/"):
+            name = name[1:]
+        content = Take(fileobj, filesize)
+        yield name, mode, content
+        # Ensure the content is totally consumed
+        while content.read(4096):
+            pass
diff --git a/tools/crashreporter/system-symbols/mac/run.sh b/tools/crashreporter/system-symbols/mac/run.sh
new file mode 100755
index 0000000000..8dec95dffe
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/run.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+
+set -v -e -x
+
+base="$(realpath "$(dirname "$0")")"
+export PATH="$PATH:/builds/worker/bin:$base:${MOZ_FETCHES_DIR}/dmg"
+
+cd /builds/worker
+
+if test "$PROCESSED_PACKAGES_INDEX" && test "$PROCESSED_PACKAGES_PATH" && test "$TASKCLUSTER_ROOT_URL"; then
+  PROCESSED_PACKAGES="$TASKCLUSTER_ROOT_URL/api/index/v1/task/$PROCESSED_PACKAGES_INDEX/artifacts/$PROCESSED_PACKAGES_PATH"
+fi
+
+if test "$PROCESSED_PACKAGES"; then
+  rm -f processed-packages
+  if test `curl --output /dev/null --silent --head --location "$PROCESSED_PACKAGES" -w "%{http_code}"` = 200; then
+    curl -L "$PROCESSED_PACKAGES" | gzip -dc > processed-packages
+  elif test -f "$PROCESSED_PACKAGES"; then
+    gzip -dc "$PROCESSED_PACKAGES" > processed-packages
+  fi
+  if test -f processed-packages; then
+    # Prevent reposado from downloading packages that have previously been
+    # dumped.
+    for f in $(cat processed-packages); do
+      mkdir -p "$(dirname "$f")"
+      touch "$f"
+    done
+  fi
+fi
+
+mkdir -p /opt/data-reposado/html /opt/data-reposado/metadata artifacts
+
+# First, just fetch all the update info.
+python3 /usr/local/bin/repo_sync --no-download
+
+# Next, fetch just the update packages we're interested in.
+packages=$(python3 "${base}/list-packages.py")
+
+for package in ${packages}; do
+  # repo_sync is super-chatty, let's pipe stderr to separate files
+  python3 /usr/local/bin/repo_sync "--product-id=${package}" 2> "artifacts/repo_sync-product-id-${package}.stderr"
+  # Stop downloading packages if we have more than 10 GiB of them to process
+  download_size=$(du -B1073741824 -s /opt/data-reposado | cut -f1)
+  if [ ${download_size} -gt 10 ]; then
+    break
+  fi
+done
+
+du -sh /opt/data-reposado
+
+# Now scrape symbols out of anything that was downloaded.
+mkdir -p symbols tmp
+env TMP=tmp python3 "${base}/PackageSymbolDumper.py" --tracking-file=/builds/worker/processed-packages --dump_syms=$MOZ_FETCHES_DIR/dump_syms/dump_syms /opt/data-reposado/html/content/downloads /builds/worker/symbols
+
+# Hand out artifacts
+gzip -c processed-packages > artifacts/processed-packages.gz
+
+cd symbols
+zip -r9 /builds/worker/artifacts/target.crashreporter-symbols.zip ./* || echo "No symbols dumped"
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
new file mode 100644
index 0000000000..70be2a62a7
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+# Any copyright is dedicated to the Public Domain.
+# http://creativecommons.org/publicdomain/zero/1.0/
+
+import argparse
+import concurrent.futures
+import datetime
+import os
+import subprocess
+import sys
+import traceback
+import urllib
+import zipfile
+
+import requests
+
+if sys.platform == "darwin":
+    SYSTEM_DIRS = [
+        "/usr/lib",
+        "/System/Library/Frameworks",
+        "/System/Library/PrivateFrameworks",
+        "/System/Library/Extensions",
+    ]
+else:
+    SYSTEM_DIRS = ["/lib", "/usr/lib"]
+SYMBOL_SERVER_URL = (
+    "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/"
+)
+
+
+def should_process(f, platform=sys.platform):
+    """Determine if a file is a platform binary"""
+    if platform == "darwin":
+        """
+        The 'file' command can error out. One example is "illegal byte
+        sequence" on a Japanese language UTF8 text file. So we must wrap the
+        command in a try/except block to prevent the script from terminating
+        prematurely when this happens.
+        """
+        try:
+            filetype = subprocess.check_output(["file", "-Lb", f], text=True)
+        except subprocess.CalledProcessError:
+            return False
+        """Skip kernel extensions"""
+        if "kext bundle" in filetype:
+            return False
+        return filetype.startswith("Mach-O")
+    else:
+        return subprocess.check_output(["file", "-Lb", f], text=True).startswith("ELF")
+    return False
+
+
+def get_archs(filename, platform=sys.platform):
+    """
+    Find the list of architectures present in a Mach-O file, or a single-element
+    list on non-OS X.
+    """
+    architectures = []
+    output = subprocess.check_output(["file", "-Lb", filename], text=True)
+    for string in output.split(" "):
+        if string == "arm64e":
+            architectures.append("arm64e")
+        elif string == "x86_64_haswell":
+            architectures.append("x86_64h")
+        elif string == "x86_64":
+            architectures.append("x86_64")
+        elif string == "i386":
+            architectures.append("i386")
+
+    return architectures
+
+
+def server_has_file(filename):
+    """
+    Send the symbol server a HEAD request to see if it has this symbol file.
+    """
+    try:
+        r = requests.head(
+            urllib.parse.urljoin(SYMBOL_SERVER_URL, urllib.parse.quote(filename))
+        )
+        return r.status_code == 200
+    except requests.exceptions.RequestException:
+        return False
+
+
+def process_file(dump_syms, path, arch, verbose, write_all):
+    arch_arg = ["-a", arch]
+    try:
+        stderr = None if verbose else subprocess.DEVNULL
+        stdout = subprocess.check_output([dump_syms] + arch_arg + [path], stderr=stderr)
+    except subprocess.CalledProcessError:
+        if verbose:
+            print("Processing %s%s...failed." % (path, " [%s]" % arch if arch else ""))
+        return None, None
+    module = stdout.splitlines()[0]
+    bits = module.split(b" ", 4)
+    if len(bits) != 5:
+        return None, None
+    _, platform, cpu_arch, debug_id, debug_file = bits
+    if verbose:
+        sys.stdout.write("Processing %s [%s]..." % (path, arch))
+    filename = os.path.join(debug_file, debug_id, debug_file + b".sym")
+    # see if the server already has this symbol file
+    if not write_all:
+        if server_has_file(filename):
+            if verbose:
+                print("already on server.")
+            return None, None
+    # Collect for uploading
+    if verbose:
+        print("done.")
+    return filename, stdout
+
+
+def get_files(paths, platform=sys.platform):
+    """
+    For each entry passed in paths if the path is a file that can
+    be processed, yield it, otherwise if it is a directory yield files
+    under it that can be processed.
+    """
+    for path in paths:
+        if os.path.isdir(path):
+            for root, subdirs, files in os.walk(path):
+                for f in files:
+                    fullpath = os.path.join(root, f)
+                    if should_process(fullpath, platform=platform):
+                        yield fullpath
+        elif should_process(path, platform=platform):
+            yield path
+
+
+def process_paths(
+    paths, executor, dump_syms, verbose, write_all=False, platform=sys.platform
+):
+    jobs = set()
+    for fullpath in get_files(paths, platform=platform):
+        while os.path.islink(fullpath):
+            fullpath = os.path.join(os.path.dirname(fullpath), os.readlink(fullpath))
+        if platform == "linux":
+            # See if there's a -dbg package installed and dump that instead.
+            dbgpath = "/usr/lib/debug" + fullpath
+            if os.path.isfile(dbgpath):
+                fullpath = dbgpath
+        for arch in get_archs(fullpath, platform=platform):
+            jobs.add(
+                executor.submit(
+                    process_file, dump_syms, fullpath, arch, verbose, write_all
+                )
+            )
+    for job in concurrent.futures.as_completed(jobs):
+        try:
+            yield job.result()
+        except Exception as e:
+            traceback.print_exc(file=sys.stderr)
+            print("Error: %s" % str(e), file=sys.stderr)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Produce verbose output"
+    )
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="Gather all system symbols, not just missing ones.",
+    )
+    parser.add_argument("dump_syms", help="Path to dump_syms binary")
+    parser.add_argument(
+        "files", nargs="*", help="Specific files from which to gather symbols."
+    )
+    args = parser.parse_args()
+    args.dump_syms = os.path.abspath(args.dump_syms)
+    # check for the dump_syms binary
+    if (
+        not os.path.isabs(args.dump_syms)
+        or not os.path.exists(args.dump_syms)
+        or not os.access(args.dump_syms, os.X_OK)
+    ):
+        print(
+            "Error: can't find dump_syms binary at %s!" % args.dump_syms,
+            file=sys.stderr,
+        )
+        return 1
+    file_list = set()
+    executor = concurrent.futures.ProcessPoolExecutor()
+    zip_path = os.path.abspath("symbols.zip")
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+        for filename, contents in process_paths(
+            args.files if args.files else SYSTEM_DIRS,
+            executor,
+            args.dump_syms,
+            args.verbose,
+            args.all,
+        ):
+            if filename and contents and filename not in file_list:
+                file_list.add(filename)
+                zf.writestr(filename, contents)
+        zf.writestr(
+            "ossyms-1.0-{platform}-{date}-symbols.txt".format(
+                platform=sys.platform.title(),
+                date=datetime.datetime.now().strftime("%Y%m%d%H%M%S"),
+            ),
+            "\n".join(file_list),
+        )
+    if file_list:
+        if args.verbose:
+            print("Generated %s with %d symbols" % (zip_path, len(file_list)))
+    else:
+        os.unlink("symbols.zip")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/crashreporter/system-symbols/win/LICENSE b/tools/crashreporter/system-symbols/win/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt
new file mode 100644
index 0000000000..d63dc716e9
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt
@@ -0,0 +1,17 @@
+d2d1.pdb
+d3d10level9.pdb
+d3d10warp.pdb
+d3d11.pdb
+d3d9.pdb
+d3dcompiler_47.pdb
+d3dim700.pdb
+kernel32.pdb
+kernelbase.pdb
+ntdll.pdb
+user32.pdb
+wkernel32.pdb
+wkernelbase.pdb
+wntdll.pdb
+ws2_32.pdb
+wuser32.pdb
+zipwriter.pdb
diff --git a/tools/crashreporter/system-symbols/win/run.sh b/tools/crashreporter/system-symbols/win/run.sh
new file mode 100755
index 0000000000..f95b2b160a
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/run.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+set -v -e -x
+
+base="$(realpath "$(dirname "$0")")"
+
+export DUMP_SYMS_PATH="${MOZ_FETCHES_DIR}/dump_syms/dump_syms"
+
+mkdir -p artifacts && \
+ulimit -n 16384 && \
+python3 "${base}/symsrv-fetch.py" artifacts/target.crashreporter-symbols.zip
diff --git a/tools/crashreporter/system-symbols/win/scrape-report.py b/tools/crashreporter/system-symbols/win/scrape-report.py
new file mode 100644
index 0000000000..9bc21801c3
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/scrape-report.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Mozilla
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import json
+import logging
+import os
+import sys
+
+import requests
+import urlparse
+
+log = logging.getLogger()
+
+
+def fetch_missing_symbols_from_crash(file_or_crash):
+    if os.path.isfile(file_or_crash):
+        log.info("Fetching missing symbols from JSON file: %s" % file_or_crash)
+        j = {"json_dump": json.load(open(file_or_crash, "rb"))}
+    else:
+        if "report/index/" in file_or_crash:
+            crash_id = urlparse.urlparse(file_or_crash).path.split("/")[-1]
+        else:
+            crash_id = file_or_crash
+        url = (
+            "https://crash-stats.mozilla.org/api/ProcessedCrash/"
+            "?crash_id={crash_id}&datatype=processed".format(crash_id=crash_id)
+        )
+        log.info("Fetching missing symbols from crash: %s" % url)
+        r = requests.get(url)
+        if r.status_code != 200:
+            log.error("Failed to fetch crash %s" % url)
+            return set()
+        j = r.json()
+    return set(
+        [
+            (m["debug_file"], m["debug_id"], m["filename"], m["code_id"])
+            for m in j["json_dump"]["modules"]
+            if "missing_symbols" in m
+        ]
+    )
+
+
+def main():
+    logging.basicConfig()
+    log.setLevel(logging.DEBUG)
+    urllib3_logger = logging.getLogger("urllib3")
+    urllib3_logger.setLevel(logging.ERROR)
+
+    if len(sys.argv) < 2:
+        log.error("Specify a crash URL or ID")
+        sys.exit(1)
+    symbols = fetch_missing_symbols_from_crash(sys.argv[1])
+    log.info("Found %d missing symbols" % len(symbols))
+    c = csv.writer(sys.stdout)
+    c.writerow(["debug_file", "debug_id", "code_file", "code_id"])
+    for row in symbols:
+        c.writerow(row)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/crashreporter/system-symbols/win/skiplist.txt b/tools/crashreporter/system-symbols/win/skiplist.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/skiplist.txt
diff --git a/tools/crashreporter/system-symbols/win/symsrv-fetch.py b/tools/crashreporter/system-symbols/win/symsrv-fetch.py
new file mode 100644
index 0000000000..b188aba6aa
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/symsrv-fetch.py
@@ -0,0 +1,528 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Mozilla
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script will read a CSV of modules from Socorro, and try to retrieve
+# missing symbols from Microsoft's symbol server. It honors a list
+# (ignorelist.txt) of symbols that are known to be from our applications,
+# and it maintains its own list of symbols that the MS symbol server
+# doesn't have (skiplist.txt).
+#
+# The script also depends on having write access to the directory it is
+# installed in, to write the skiplist text file.
+
+import argparse
+import asyncio
+import logging
+import os
+import shutil
+import zipfile
+from collections import defaultdict
+from tempfile import mkdtemp
+from urllib.parse import quote, urljoin
+
+from aiofile import AIOFile, LineReader
+from aiohttp import ClientSession, ClientTimeout
+from aiohttp.connector import TCPConnector
+
+# Just hardcoded here
+MICROSOFT_SYMBOL_SERVER = "https://msdl.microsoft.com/download/symbols/"
+USER_AGENT = "Microsoft-Symbol-Server/6.3.0.0"
+MOZILLA_SYMBOL_SERVER = (
+    "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/"
+)
+MISSING_SYMBOLS_URL = "https://symbols.mozilla.org/missingsymbols.csv?microsoft=only"
+HEADERS = {"User-Agent": USER_AGENT}
+SYM_SRV = "SRV*{0}*https://msdl.microsoft.com/download/symbols;SRV*{0}*https://software.intel.com/sites/downloads/symbols;SRV*{0}*https://download.amd.com/dir/bin;SRV*{0}*https://driver-symbols.nvidia.com"  # noqa
+TIMEOUT = 7200
+RETRIES = 5
+
+
+log = logging.getLogger()
+
+
+def get_type(data):
+    # PDB v7
+    if data.startswith(b"Microsoft C/C++ MSF 7.00"):
+        return "pdb-v7"
+    # PDB v2
+    if data.startswith(b"Microsoft C/C++ program database 2.00"):
+        return "pdb-v2"
+    # DLL
+    if data.startswith(b"MZ"):
+        return "dll"
+    # CAB
+    if data.startswith(b"MSCF"):
+        return "cab"
+
+    return "unknown"
+
+
+async def exp_backoff(retry_num):
+    await asyncio.sleep(2 ** retry_num)
+
+
+async def server_has_file(client, server, filename):
+    """
+    Send the symbol server a HEAD request to see if it has this symbol file.
+    """
+    url = urljoin(server, quote(filename))
+    for i in range(RETRIES):
+        try:
+            async with client.head(url, headers=HEADERS, allow_redirects=True) as resp:
+                if resp.status == 200 and (
+                    (
+                        "microsoft" in server
+                        and resp.headers["Content-Type"] == "application/octet-stream"
+                    )
+                    or "mozilla" in server
+                ):
+                    log.debug(f"File exists: {url}")
+                    return True
+                else:
+                    return False
+        except Exception as e:
+            # Sometimes we've SSL errors or disconnections... so in such a situation just retry
+            log.warning(f"Error with {url}: retry")
+            log.exception(e)
+            await exp_backoff(i)
+
+    log.debug(f"Too many retries (HEAD) for {url}: give up.")
+    return False
+
+
+async def fetch_file(client, server, filename):
+    """
+    Fetch the file from the server
+    """
+    url = urljoin(server, quote(filename))
+    log.debug(f"Fetch url: {url}")
+    for i in range(RETRIES):
+        try:
+            async with client.get(url, headers=HEADERS, allow_redirects=True) as resp:
+                if resp.status == 200:
+                    data = await resp.read()
+                    typ = get_type(data)
+                    if typ == "unknown":
+                        # try again
+                        await exp_backoff(i)
+                    elif typ == "pdb-v2":
+                        # too old: skip it
+                        log.debug(f"PDB v2 (skipped because too old): {url}")
+                        return None
+                    else:
+                        return data
+                else:
+                    log.error(f"Cannot get data (status {resp.status}) for {url}: ")
+        except Exception as e:
+            log.warning(f"Error with {url}")
+            log.exception(e)
+            await asyncio.sleep(0.5)
+
+    log.debug(f"Too many retries (GET) for {url}: give up.")
+    return None
+
+
+def write_skiplist(skiplist):
+    with open("skiplist.txt", "w") as sf:
+        sf.writelines(
+            f"{debug_id} {debug_file}\n" for debug_id, debug_file in skiplist.items()
+        )
+
+
+async def fetch_missing_symbols(u):
+    log.info("Trying missing symbols from %s" % u)
+    async with ClientSession() as client:
+        async with client.get(u, headers=HEADERS) as resp:
+            # The server currently does not set an encoding so force it to UTF-8
+            data = await resp.text("UTF-8")
+            # just skip the first line since it contains column headers
+            return data.splitlines()[1:]
+
+
+async def get_list(filename):
+    alist = set()
+    try:
+        async with AIOFile(filename, "r") as In:
+            async for line in LineReader(In):
+                line = line.rstrip()
+                alist.add(line)
+    except FileNotFoundError:
+        pass
+
+    log.debug(f"{filename} contains {len(alist)} items")
+
+    return alist
+
+
+async def get_skiplist():
+    skiplist = {}
+    path = "skiplist.txt"
+    try:
+        async with AIOFile(path, "r") as In:
+            async for line in LineReader(In):
+                line = line.strip()
+                if line == "":
+                    continue
+                s = line.split(" ", maxsplit=1)
+                if len(s) != 2:
+                    continue
+                debug_id, debug_file = s
+                skiplist[debug_id] = debug_file.lower()
+    except FileNotFoundError:
+        pass
+
+    log.debug(f"{path} contains {len(skiplist)} items")
+
+    return skiplist
+
+
+def get_missing_symbols(missing_symbols, skiplist, ignorelist):
+    modules = defaultdict(set)
+    stats = {"ignorelist": 0, "skiplist": 0}
+    for line in missing_symbols:
+        line = line.rstrip()
+        bits = line.split(",")
+        if len(bits) < 2:
+            continue
+        pdb, debug_id = bits[:2]
+        code_file, code_id = None, None
+        if len(bits) >= 4:
+            code_file, code_id = bits[2:4]
+        if pdb and debug_id and pdb.endswith(".pdb"):
+            if pdb.lower() in ignorelist:
+                stats["ignorelist"] += 1
+                continue
+
+            if skiplist.get(debug_id) != pdb.lower():
+                modules[pdb].add((debug_id, code_file, code_id))
+            else:
+                stats["skiplist"] += 1
+                # We've asked the symbol server previously about this,
+                # so skip it.
+                log.debug("%s/%s already in skiplist", pdb, debug_id)
+
+    return modules, stats
+
+
+async def collect_info(client, filename, debug_id, code_file, code_id):
+    pdb_path = os.path.join(filename, debug_id, filename)
+    sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", "") + ".sym")
+
+    has_pdb = await server_has_file(client, MICROSOFT_SYMBOL_SERVER, pdb_path)
+    has_code = is_there = False
+    if has_pdb:
+        if not await server_has_file(client, MOZILLA_SYMBOL_SERVER, sym_path):
+            has_code = (
+                code_file
+                and code_id
+                and await server_has_file(
+                    client,
+                    MICROSOFT_SYMBOL_SERVER,
+                    f"{code_file}/{code_id}/{code_file}",
+                )
+            )
+        else:
+            # if the file is on moz sym server no need to do anything
+            is_there = True
+            has_pdb = False
+
+    return (filename, debug_id, code_file, code_id, has_pdb, has_code, is_there)
+
+
+async def check_x86_file(path):
+    async with AIOFile(path, "rb") as In:
+        head = b"MODULE windows x86 "
+        chunk = await In.read(len(head))
+        if chunk == head:
+            return True
+    return False
+
+
+async def run_command(cmd):
+    proc = await asyncio.create_subprocess_shell(
+        cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+    )
+    _, err = await proc.communicate()
+    err = err.decode().strip()
+
+    return err
+
+
+async def dump_module(
+    output, symcache, filename, debug_id, code_file, code_id, has_code, dump_syms
+):
+    sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", ".sym"))
+    output_path = os.path.join(output, sym_path)
+    sym_srv = SYM_SRV.format(symcache)
+    res = {"path": sym_path, "error": "ok"}
+
+    if has_code:
+        cmd = (
+            f"{dump_syms} {code_file} --code-id {code_id} --check-cfi --inlines "
+            f"--store {output} --symbol-server '{sym_srv}' --verbose error"
+        )
+    else:
+        cmd = (
+            f"{dump_syms} {filename} --debug-id {debug_id} --check-cfi --inlines "
+            f"--store {output} --symbol-server '{sym_srv}' --verbose error"
+        )
+
+    err = await run_command(cmd)
+
+    if err:
+        log.error(f"Error with {cmd}")
+        log.error(err)
+        res["error"] = "dump error"
+        return res
+
+    if not os.path.exists(output_path):
+        log.error(f"Could not find file {output_path} after running {cmd}")
+        res["error"] = "dump error"
+        return res
+
+    if not has_code and not await check_x86_file(output_path):
+        # PDB for 32 bits contains everything we need (symbols + stack unwind info)
+        # But PDB for 64 bits don't contain stack unwind info
+        # (they're in the binary (.dll/.exe) itself).
+        # So here we're logging because we've got a PDB (64 bits) without its DLL/EXE.
+        if code_file and code_id:
+            log.debug(f"x86_64 binary {code_file}/{code_id} required")
+        else:
+            log.debug(f"x86_64 binary for {filename}/{debug_id} required")
+        res["error"] = "no binary"
+        return res
+
+    log.info(f"Successfully dumped: {filename}/{debug_id}")
+    return res
+
+
+async def dump(output, symcache, modules, dump_syms):
+    tasks = []
+    for filename, debug_id, code_file, code_id, has_code in modules:
+        tasks.append(
+            dump_module(
+                output,
+                symcache,
+                filename,
+                debug_id,
+                code_file,
+                code_id,
+                has_code,
+                dump_syms,
+            )
+        )
+
+    res = await asyncio.gather(*tasks)
+
+    # Even if we haven't CFI the generated file is useful to get symbols
+    # from addresses so keep error == 2.
+    file_index = {x["path"] for x in res if x["error"] in ["ok", "no binary"]}
+    stats = {
+        "dump_error": sum(1 for x in res if x["error"] == "dump error"),
+        "no_bin": sum(1 for x in res if x["error"] == "no binary"),
+    }
+
+    return file_index, stats
+
+
+async def collect(modules):
+    loop = asyncio.get_event_loop()
+    tasks = []
+
+    # In case of errors (Too many open files), just change limit_per_host
+    connector = TCPConnector(limit=100, limit_per_host=4)
+
+    async with ClientSession(
+        loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector
+    ) as client:
+        for filename, ids in modules.items():
+            for debug_id, code_file, code_id in ids:
+                tasks.append(
+                    collect_info(client, filename, debug_id, code_file, code_id)
+                )
+
+        res = await asyncio.gather(*tasks)
+    to_dump = []
+    stats = {"no_pdb": 0, "is_there": 0}
+    for filename, debug_id, code_file, code_id, has_pdb, has_code, is_there in res:
+        if not has_pdb:
+            if is_there:
+                stats["is_there"] += 1
+            else:
+                stats["no_pdb"] += 1
+                log.info(f"No pdb for {filename}/{debug_id}")
+            continue
+
+        log.info(
+            f"To dump: {filename}/{debug_id}, {code_file}/{code_id} and has_code = {has_code}"
+        )
+        to_dump.append((filename, debug_id, code_file, code_id, has_code))
+
+    log.info(f"Collected {len(to_dump)} files to dump")
+
+    return to_dump, stats
+
+
+async def make_dirs(path):
+    loop = asyncio.get_event_loop()
+
+    def helper(path):
+        os.makedirs(path, exist_ok=True)
+
+    await loop.run_in_executor(None, helper, path)
+
+
+async def fetch_and_write(output, client, filename, file_id):
+    path = os.path.join(filename, file_id, filename)
+    data = await fetch_file(client, MICROSOFT_SYMBOL_SERVER, path)
+
+    if not data:
+        return False
+
+    output_dir = os.path.join(output, filename, file_id)
+    await make_dirs(output_dir)
+
+    output_path = os.path.join(output_dir, filename)
+    async with AIOFile(output_path, "wb") as Out:
+        await Out.write(data)
+
+    return True
+
+
+async def fetch_all(output, modules):
+    loop = asyncio.get_event_loop()
+    tasks = []
+    fetched_modules = []
+
+    # In case of errors (Too many open files), just change limit_per_host
+    connector = TCPConnector(limit=100, limit_per_host=0)
+
+    async with ClientSession(
+        loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector
+    ) as client:
+        for filename, debug_id, code_file, code_id, has_code in modules:
+            tasks.append(fetch_and_write(output, client, filename, debug_id))
+            if has_code:
+                tasks.append(fetch_and_write(output, client, code_file, code_id))
+
+        res = await asyncio.gather(*tasks)
+        res = iter(res)
+        for filename, debug_id, code_file, code_id, has_code in modules:
+            fetched_pdb = next(res)
+            if has_code:
+                has_code = next(res)
+            if fetched_pdb:
+                fetched_modules.append(
+                    (filename, debug_id, code_file, code_id, has_code)
+                )
+
+    return fetched_modules
+
+
+def get_base_data(url):
+    async def helper(url):
+        return await asyncio.gather(
+            fetch_missing_symbols(url),
+            # Symbols that we know belong to us, so don't ask Microsoft for them.
+            get_list("ignorelist.txt"),
+            # Symbols that we know belong to Microsoft, so don't skiplist them.
+            get_list("known-microsoft-symbols.txt"),
+            # Symbols that we've asked for in the past unsuccessfully
+            get_skiplist(),
+        )
+
+    return asyncio.run(helper(url))
+
+
+def gen_zip(output, output_dir, file_index):
+    if not file_index:
+        return
+
+    with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as z:
+        for f in file_index:
+            z.write(os.path.join(output_dir, f), f)
+    log.info(f"Wrote zip as {output}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Fetch missing symbols from Microsoft symbol server"
+    )
+    parser.add_argument(
+        "--missing-symbols",
+        type=str,
+        help="missing symbols URL",
+        default=MISSING_SYMBOLS_URL,
+    )
+    parser.add_argument("zip", type=str, help="output zip file")
+    parser.add_argument(
+        "--dump-syms",
+        type=str,
+        help="dump_syms path",
+        default=os.environ.get("DUMP_SYMS_PATH"),
+    )
+
+    args = parser.parse_args()
+
+    assert args.dump_syms, "dump_syms path is empty"
+
+    logging.basicConfig(level=logging.DEBUG)
+    aiohttp_logger = logging.getLogger("aiohttp.client")
+    aiohttp_logger.setLevel(logging.INFO)
+    log.info("Started")
+
+    missing_symbols, ignorelist, known_ms_symbols, skiplist = get_base_data(
+        args.missing_symbols
+    )
+
+    modules, stats_skipped = get_missing_symbols(missing_symbols, skiplist, ignorelist)
+
+    symbol_path = mkdtemp("symsrvfetch")
+    temp_path = mkdtemp(prefix="symcache")
+
+    modules, stats_collect = asyncio.run(collect(modules))
+    modules = asyncio.run(fetch_all(temp_path, modules))
+
+    file_index, stats_dump = asyncio.run(
+        dump(symbol_path, temp_path, modules, args.dump_syms)
+    )
+
+    gen_zip(args.zip, symbol_path, file_index)
+
+    shutil.rmtree(symbol_path, True)
+    shutil.rmtree(temp_path, True)
+
+    write_skiplist(skiplist)
+
+    if not file_index:
+        log.info(f"No symbols downloaded: {len(missing_symbols)} considered")
+    else:
+        log.info(
+            f"Total files: {len(missing_symbols)}, Stored {len(file_index)} symbol files"
+        )
+
+    log.info(
+        f"{stats_collect['is_there']} already present, {stats_skipped['ignorelist']} in ignored list, "  # noqa
+        f"{stats_skipped['skiplist']} skipped, {stats_collect['no_pdb']} not found, "
+        f"{stats_dump['dump_error']} processed with errors, "
+        f"{stats_dump['no_bin']} processed but with no binaries (x86_64)"
+    )
+    log.info("Finished, exiting")
+
+
+if __name__ == "__main__":
+    main()
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:22:09 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 09:22:09 +0000
commit	43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree	620249daf56c0258faa40cbdcf9cfba06de2a846 /tools/crashreporter/system-symbols
parent	Initial commit. (diff)
download	firefox-upstream.tar.xz firefox-upstream.zip