summaryrefslogtreecommitdiffstats
path: root/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
blob: 9998dcac27062fba9374eb39dbd38ce86e275461 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/usr/bin/env python
# Any copyright is dedicated to the Public Domain.
# http://creativecommons.org/publicdomain/zero/1.0/

import argparse
import concurrent.futures
import datetime
import os
import subprocess
import sys
import traceback
import urllib
import zipfile

import requests

if sys.platform == "darwin":
    SYSTEM_DIRS = [
        "/usr/lib",
        "/System/Library/Frameworks",
        "/System/Library/PrivateFrameworks",
        "/System/Library/Extensions",
    ]
else:
    SYSTEM_DIRS = ["/lib", "/usr/lib"]
SYMBOL_SERVER_URL = "https://symbols.mozilla.org/"


def should_process(f, platform=sys.platform):
    """Determine if a file is a platform binary"""
    if platform == "darwin":
        """
        The 'file' command can error out. One example is "illegal byte
        sequence" on a Japanese language UTF8 text file. So we must wrap the
        command in a try/except block to prevent the script from terminating
        prematurely when this happens.
        """
        try:
            filetype = subprocess.check_output(["file", "-Lb", f], text=True)
        except subprocess.CalledProcessError:
            return False
        """Skip kernel extensions"""
        if "kext bundle" in filetype:
            return False
        return filetype.startswith("Mach-O")
    else:
        return subprocess.check_output(["file", "-Lb", f], text=True).startswith("ELF")
    return False


def get_archs(filename, platform=sys.platform):
    """
    Find the list of architectures present in a Mach-O file, or a single-element
    list on non-OS X.
    """
    architectures = []
    output = subprocess.check_output(["file", "-Lb", filename], text=True)
    for string in output.split(" "):
        if string == "arm64e":
            architectures.append("arm64e")
        elif string == "x86_64_haswell":
            architectures.append("x86_64h")
        elif string == "x86_64":
            architectures.append("x86_64")
        elif string == "i386":
            architectures.append("i386")

    return architectures


def server_has_file(filename):
    """
    Send the symbol server a HEAD request to see if it has this symbol file.
    """
    try:
        r = requests.head(
            urllib.parse.urljoin(SYMBOL_SERVER_URL, urllib.parse.quote(filename))
        )
        return r.status_code == 200
    except requests.exceptions.RequestException:
        return False


def process_file(dump_syms, path, arch, verbose, write_all):
    arch_arg = ["-a", arch]
    try:
        stderr = None if verbose else subprocess.DEVNULL
        stdout = subprocess.check_output([dump_syms] + arch_arg + [path], stderr=stderr)
    except subprocess.CalledProcessError:
        if verbose:
            print("Processing %s%s...failed." % (path, " [%s]" % arch if arch else ""))
        return None, None
    module = stdout.splitlines()[0]
    bits = module.split(b" ", 4)
    if len(bits) != 5:
        return None, None
    _, platform, cpu_arch, debug_id, debug_file = bits
    if verbose:
        sys.stdout.write("Processing %s [%s]..." % (path, arch))
    filename = os.path.join(debug_file, debug_id, debug_file + b".sym")
    # see if the server already has this symbol file
    if not write_all:
        if server_has_file(filename):
            if verbose:
                print("already on server.")
            return None, None
    # Collect for uploading
    if verbose:
        print("done.")
    return filename, stdout


def get_files(paths, platform=sys.platform):
    """
    For each entry passed in paths if the path is a file that can
    be processed, yield it, otherwise if it is a directory yield files
    under it that can be processed.
    """
    for path in paths:
        if os.path.isdir(path):
            for root, subdirs, files in os.walk(path):
                for f in files:
                    fullpath = os.path.join(root, f)
                    if should_process(fullpath, platform=platform):
                        yield fullpath
        elif should_process(path, platform=platform):
            yield path


def process_paths(
    paths, executor, dump_syms, verbose, write_all=False, platform=sys.platform
):
    jobs = set()
    for fullpath in get_files(paths, platform=platform):
        while os.path.islink(fullpath):
            fullpath = os.path.join(os.path.dirname(fullpath), os.readlink(fullpath))
        if platform == "linux":
            # See if there's a -dbg package installed and dump that instead.
            dbgpath = "/usr/lib/debug" + fullpath
            if os.path.isfile(dbgpath):
                fullpath = dbgpath
        for arch in get_archs(fullpath, platform=platform):
            jobs.add(
                executor.submit(
                    process_file, dump_syms, fullpath, arch, verbose, write_all
                )
            )
    for job in concurrent.futures.as_completed(jobs):
        try:
            yield job.result()
        except Exception as e:
            traceback.print_exc(file=sys.stderr)
            print("Error: %s" % str(e), file=sys.stderr)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-v", "--verbose", action="store_true", help="Produce verbose output"
    )
    parser.add_argument(
        "--all",
        action="store_true",
        help="Gather all system symbols, not just missing ones.",
    )
    parser.add_argument("dump_syms", help="Path to dump_syms binary")
    parser.add_argument(
        "files", nargs="*", help="Specific files from which to gather symbols."
    )
    args = parser.parse_args()
    args.dump_syms = os.path.abspath(args.dump_syms)
    # check for the dump_syms binary
    if (
        not os.path.isabs(args.dump_syms)
        or not os.path.exists(args.dump_syms)
        or not os.access(args.dump_syms, os.X_OK)
    ):
        print(
            "Error: can't find dump_syms binary at %s!" % args.dump_syms,
            file=sys.stderr,
        )
        return 1
    file_list = set()
    executor = concurrent.futures.ProcessPoolExecutor()
    zip_path = os.path.abspath("symbols.zip")
    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
        for filename, contents in process_paths(
            args.files if args.files else SYSTEM_DIRS,
            executor,
            args.dump_syms,
            args.verbose,
            args.all,
        ):
            if filename and contents and filename not in file_list:
                file_list.add(filename)
                zf.writestr(filename, contents)
        zf.writestr(
            "ossyms-1.0-{platform}-{date}-symbols.txt".format(
                platform=sys.platform.title(),
                date=datetime.datetime.now().strftime("%Y%m%d%H%M%S"),
            ),
            "\n".join(file_list),
        )
    if file_list:
        if args.verbose:
            print("Generated %s with %d symbols" % (zip_path, len(file_list)))
    else:
        os.unlink("symbols.zip")


if __name__ == "__main__":
    main()