diff options
Diffstat (limited to 'tools/crashreporter/system-symbols/win')
6 files changed, 831 insertions, 0 deletions
diff --git a/tools/crashreporter/system-symbols/win/LICENSE b/tools/crashreporter/system-symbols/win/LICENSE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt new file mode 100644 index 0000000000..d63dc716e9 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt @@ -0,0 +1,17 @@ +d2d1.pdb
+d3d10level9.pdb
+d3d10warp.pdb
+d3d11.pdb
+d3d9.pdb
+d3dcompiler_47.pdb
+d3dim700.pdb
+kernel32.pdb
+kernelbase.pdb
+ntdll.pdb
+user32.pdb
+wkernel32.pdb
+wkernelbase.pdb
+wntdll.pdb
+ws2_32.pdb
+wuser32.pdb
+zipwriter.pdb
diff --git a/tools/crashreporter/system-symbols/win/run.sh b/tools/crashreporter/system-symbols/win/run.sh new file mode 100755 index 0000000000..f95b2b160a --- /dev/null +++ b/tools/crashreporter/system-symbols/win/run.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -v -e -x + +base="$(realpath "$(dirname "$0")")" + +export DUMP_SYMS_PATH="${MOZ_FETCHES_DIR}/dump_syms/dump_syms" + +mkdir -p artifacts && \ +ulimit -n 16384 && \ +python3 "${base}/symsrv-fetch.py" artifacts/target.crashreporter-symbols.zip diff --git a/tools/crashreporter/system-symbols/win/scrape-report.py b/tools/crashreporter/system-symbols/win/scrape-report.py new file mode 100644 index 0000000000..9bc21801c3 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/scrape-report.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# +# Copyright 2016 Mozilla +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import csv +import json +import logging +import os +import sys + +import requests +import urlparse + +log = logging.getLogger() + + +def fetch_missing_symbols_from_crash(file_or_crash): + if os.path.isfile(file_or_crash): + log.info("Fetching missing symbols from JSON file: %s" % file_or_crash) + j = {"json_dump": json.load(open(file_or_crash, "rb"))} + else: + if "report/index/" in file_or_crash: + crash_id = urlparse.urlparse(file_or_crash).path.split("/")[-1] + else: + crash_id = file_or_crash + url = ( + "https://crash-stats.mozilla.org/api/ProcessedCrash/" + "?crash_id={crash_id}&datatype=processed".format(crash_id=crash_id) + ) + log.info("Fetching missing symbols from crash: %s" % url) + r = requests.get(url) + if r.status_code != 200: + log.error("Failed to fetch crash %s" % url) + return set() + j = r.json() + return set( + [ + (m["debug_file"], m["debug_id"], m["filename"], m["code_id"]) + for m in j["json_dump"]["modules"] + if "missing_symbols" in m + ] + ) + + +def main(): + logging.basicConfig() + log.setLevel(logging.DEBUG) + urllib3_logger = logging.getLogger("urllib3") + urllib3_logger.setLevel(logging.ERROR) + + if len(sys.argv) < 2: + log.error("Specify a crash URL or ID") + sys.exit(1) + symbols = fetch_missing_symbols_from_crash(sys.argv[1]) + log.info("Found %d missing symbols" % len(symbols)) + c = csv.writer(sys.stdout) + c.writerow(["debug_file", "debug_id", "code_file", "code_id"]) + for row in symbols: + c.writerow(row) + + +if __name__ == "__main__": + main() diff --git a/tools/crashreporter/system-symbols/win/skiplist.txt b/tools/crashreporter/system-symbols/win/skiplist.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/skiplist.txt diff --git a/tools/crashreporter/system-symbols/win/symsrv-fetch.py b/tools/crashreporter/system-symbols/win/symsrv-fetch.py new file mode 100644 index 0000000000..bc3b421cd0 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/symsrv-fetch.py @@ -0,0 +1,526 @@ +#!/usr/bin/env python +# +# Copyright 2016 Mozilla +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This script will read a CSV of modules from Socorro, and try to retrieve +# missing symbols from Microsoft's symbol server. It honors a list +# (ignorelist.txt) of symbols that are known to be from our applications, +# and it maintains its own list of symbols that the MS symbol server +# doesn't have (skiplist.txt). +# +# The script also depends on having write access to the directory it is +# installed in, to write the skiplist text file. + +import argparse +import asyncio +import logging +import os +import shutil +import zipfile +from collections import defaultdict +from tempfile import mkdtemp +from urllib.parse import quote, urljoin + +from aiofile import AIOFile, LineReader +from aiohttp import ClientSession, ClientTimeout +from aiohttp.connector import TCPConnector + +# Just hardcoded here +MICROSOFT_SYMBOL_SERVER = "https://msdl.microsoft.com/download/symbols/" +USER_AGENT = "Microsoft-Symbol-Server/6.3.0.0" +MOZILLA_SYMBOL_SERVER = "https://symbols.mozilla.org/" +MISSING_SYMBOLS_URL = "https://symbols.mozilla.org/missingsymbols.csv?microsoft=only" +HEADERS = {"User-Agent": USER_AGENT} +SYM_SRV = "SRV*{0}*https://msdl.microsoft.com/download/symbols;SRV*{0}*https://software.intel.com/sites/downloads/symbols;SRV*{0}*https://download.amd.com/dir/bin;SRV*{0}*https://driver-symbols.nvidia.com" # noqa +TIMEOUT = 7200 +RETRIES = 5 + + +log = logging.getLogger() + + +def get_type(data): + # PDB v7 + if data.startswith(b"Microsoft C/C++ MSF 7.00"): + return "pdb-v7" + # PDB v2 + if data.startswith(b"Microsoft C/C++ program database 2.00"): + return "pdb-v2" + # DLL + if data.startswith(b"MZ"): + return "dll" + # CAB + if data.startswith(b"MSCF"): + return "cab" + + return "unknown" + + +async def exp_backoff(retry_num): + await asyncio.sleep(2 ** retry_num) + + +async def server_has_file(client, server, filename): + """ + Send the symbol server a HEAD request to see if it has this symbol file. + """ + url = urljoin(server, quote(filename)) + for i in range(RETRIES): + try: + async with client.head(url, headers=HEADERS, allow_redirects=True) as resp: + if resp.status == 200 and ( + ( + "microsoft" in server + and resp.headers["Content-Type"] == "application/octet-stream" + ) + or "mozilla" in server + ): + log.debug(f"File exists: {url}") + return True + else: + return False + except Exception as e: + # Sometimes we've SSL errors or disconnections... so in such a situation just retry + log.warning(f"Error with {url}: retry") + log.exception(e) + await exp_backoff(i) + + log.debug(f"Too many retries (HEAD) for {url}: give up.") + return False + + +async def fetch_file(client, server, filename): + """ + Fetch the file from the server + """ + url = urljoin(server, quote(filename)) + log.debug(f"Fetch url: {url}") + for i in range(RETRIES): + try: + async with client.get(url, headers=HEADERS, allow_redirects=True) as resp: + if resp.status == 200: + data = await resp.read() + typ = get_type(data) + if typ == "unknown": + # try again + await exp_backoff(i) + elif typ == "pdb-v2": + # too old: skip it + log.debug(f"PDB v2 (skipped because too old): {url}") + return None + else: + return data + else: + log.error(f"Cannot get data (status {resp.status}) for {url}: ") + except Exception as e: + log.warning(f"Error with {url}") + log.exception(e) + await asyncio.sleep(0.5) + + log.debug(f"Too many retries (GET) for {url}: give up.") + return None + + +def write_skiplist(skiplist): + with open("skiplist.txt", "w") as sf: + sf.writelines( + f"{debug_id} {debug_file}\n" for debug_id, debug_file in skiplist.items() + ) + + +async def fetch_missing_symbols(u): + log.info("Trying missing symbols from %s" % u) + async with ClientSession() as client: + async with client.get(u, headers=HEADERS) as resp: + # The server currently does not set an encoding so force it to UTF-8 + data = await resp.text("UTF-8") + # just skip the first line since it contains column headers + return data.splitlines()[1:] + + +async def get_list(filename): + alist = set() + try: + async with AIOFile(filename, "r") as In: + async for line in LineReader(In): + line = line.rstrip() + alist.add(line) + except FileNotFoundError: + pass + + log.debug(f"{filename} contains {len(alist)} items") + + return alist + + +async def get_skiplist(): + skiplist = {} + path = "skiplist.txt" + try: + async with AIOFile(path, "r") as In: + async for line in LineReader(In): + line = line.strip() + if line == "": + continue + s = line.split(" ", maxsplit=1) + if len(s) != 2: + continue + debug_id, debug_file = s + skiplist[debug_id] = debug_file.lower() + except FileNotFoundError: + pass + + log.debug(f"{path} contains {len(skiplist)} items") + + return skiplist + + +def get_missing_symbols(missing_symbols, skiplist, ignorelist): + modules = defaultdict(set) + stats = {"ignorelist": 0, "skiplist": 0} + for line in missing_symbols: + line = line.rstrip() + bits = line.split(",") + if len(bits) < 2: + continue + pdb, debug_id = bits[:2] + code_file, code_id = None, None + if len(bits) >= 4: + code_file, code_id = bits[2:4] + if pdb and debug_id and pdb.endswith(".pdb"): + if pdb.lower() in ignorelist: + stats["ignorelist"] += 1 + continue + + if skiplist.get(debug_id) != pdb.lower(): + modules[pdb].add((debug_id, code_file, code_id)) + else: + stats["skiplist"] += 1 + # We've asked the symbol server previously about this, + # so skip it. + log.debug("%s/%s already in skiplist", pdb, debug_id) + + return modules, stats + + +async def collect_info(client, filename, debug_id, code_file, code_id): + pdb_path = os.path.join(filename, debug_id, filename) + sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", "") + ".sym") + + has_pdb = await server_has_file(client, MICROSOFT_SYMBOL_SERVER, pdb_path) + has_code = is_there = False + if has_pdb: + if not await server_has_file(client, MOZILLA_SYMBOL_SERVER, sym_path): + has_code = ( + code_file + and code_id + and await server_has_file( + client, + MICROSOFT_SYMBOL_SERVER, + f"{code_file}/{code_id}/{code_file}", + ) + ) + else: + # if the file is on moz sym server no need to do anything + is_there = True + has_pdb = False + + return (filename, debug_id, code_file, code_id, has_pdb, has_code, is_there) + + +async def check_x86_file(path): + async with AIOFile(path, "rb") as In: + head = b"MODULE windows x86 " + chunk = await In.read(len(head)) + if chunk == head: + return True + return False + + +async def run_command(cmd): + proc = await asyncio.create_subprocess_shell( + cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + _, err = await proc.communicate() + err = err.decode().strip() + + return err + + +async def dump_module( + output, symcache, filename, debug_id, code_file, code_id, has_code, dump_syms +): + sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", ".sym")) + output_path = os.path.join(output, sym_path) + sym_srv = SYM_SRV.format(symcache) + res = {"path": sym_path, "error": "ok"} + + if has_code: + cmd = ( + f"{dump_syms} {code_file} --code-id {code_id} --check-cfi --inlines " + f"--store {output} --symbol-server '{sym_srv}' --verbose error" + ) + else: + cmd = ( + f"{dump_syms} {filename} --debug-id {debug_id} --check-cfi --inlines " + f"--store {output} --symbol-server '{sym_srv}' --verbose error" + ) + + err = await run_command(cmd) + + if err: + log.error(f"Error with {cmd}") + log.error(err) + res["error"] = "dump error" + return res + + if not os.path.exists(output_path): + log.error(f"Could not find file {output_path} after running {cmd}") + res["error"] = "dump error" + return res + + if not has_code and not await check_x86_file(output_path): + # PDB for 32 bits contains everything we need (symbols + stack unwind info) + # But PDB for 64 bits don't contain stack unwind info + # (they're in the binary (.dll/.exe) itself). + # So here we're logging because we've got a PDB (64 bits) without its DLL/EXE. + if code_file and code_id: + log.debug(f"x86_64 binary {code_file}/{code_id} required") + else: + log.debug(f"x86_64 binary for {filename}/{debug_id} required") + res["error"] = "no binary" + return res + + log.info(f"Successfully dumped: {filename}/{debug_id}") + return res + + +async def dump(output, symcache, modules, dump_syms): + tasks = [] + for filename, debug_id, code_file, code_id, has_code in modules: + tasks.append( + dump_module( + output, + symcache, + filename, + debug_id, + code_file, + code_id, + has_code, + dump_syms, + ) + ) + + res = await asyncio.gather(*tasks) + + # Even if we haven't CFI the generated file is useful to get symbols + # from addresses so keep error == 2. + file_index = {x["path"] for x in res if x["error"] in ["ok", "no binary"]} + stats = { + "dump_error": sum(1 for x in res if x["error"] == "dump error"), + "no_bin": sum(1 for x in res if x["error"] == "no binary"), + } + + return file_index, stats + + +async def collect(modules): + loop = asyncio.get_event_loop() + tasks = [] + + # In case of errors (Too many open files), just change limit_per_host + connector = TCPConnector(limit=100, limit_per_host=4) + + async with ClientSession( + loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector + ) as client: + for filename, ids in modules.items(): + for debug_id, code_file, code_id in ids: + tasks.append( + collect_info(client, filename, debug_id, code_file, code_id) + ) + + res = await asyncio.gather(*tasks) + to_dump = [] + stats = {"no_pdb": 0, "is_there": 0} + for filename, debug_id, code_file, code_id, has_pdb, has_code, is_there in res: + if not has_pdb: + if is_there: + stats["is_there"] += 1 + else: + stats["no_pdb"] += 1 + log.info(f"No pdb for {filename}/{debug_id}") + continue + + log.info( + f"To dump: {filename}/{debug_id}, {code_file}/{code_id} and has_code = {has_code}" + ) + to_dump.append((filename, debug_id, code_file, code_id, has_code)) + + log.info(f"Collected {len(to_dump)} files to dump") + + return to_dump, stats + + +async def make_dirs(path): + loop = asyncio.get_event_loop() + + def helper(path): + os.makedirs(path, exist_ok=True) + + await loop.run_in_executor(None, helper, path) + + +async def fetch_and_write(output, client, filename, file_id): + path = os.path.join(filename, file_id, filename) + data = await fetch_file(client, MICROSOFT_SYMBOL_SERVER, path) + + if not data: + return False + + output_dir = os.path.join(output, filename, file_id) + await make_dirs(output_dir) + + output_path = os.path.join(output_dir, filename) + async with AIOFile(output_path, "wb") as Out: + await Out.write(data) + + return True + + +async def fetch_all(output, modules): + loop = asyncio.get_event_loop() + tasks = [] + fetched_modules = [] + + # In case of errors (Too many open files), just change limit_per_host + connector = TCPConnector(limit=100, limit_per_host=0) + + async with ClientSession( + loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector + ) as client: + for filename, debug_id, code_file, code_id, has_code in modules: + tasks.append(fetch_and_write(output, client, filename, debug_id)) + if has_code: + tasks.append(fetch_and_write(output, client, code_file, code_id)) + + res = await asyncio.gather(*tasks) + res = iter(res) + for filename, debug_id, code_file, code_id, has_code in modules: + fetched_pdb = next(res) + if has_code: + has_code = next(res) + if fetched_pdb: + fetched_modules.append( + (filename, debug_id, code_file, code_id, has_code) + ) + + return fetched_modules + + +def get_base_data(url): + async def helper(url): + return await asyncio.gather( + fetch_missing_symbols(url), + # Symbols that we know belong to us, so don't ask Microsoft for them. + get_list("ignorelist.txt"), + # Symbols that we know belong to Microsoft, so don't skiplist them. + get_list("known-microsoft-symbols.txt"), + # Symbols that we've asked for in the past unsuccessfully + get_skiplist(), + ) + + return asyncio.run(helper(url)) + + +def gen_zip(output, output_dir, file_index): + if not file_index: + return + + with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as z: + for f in file_index: + z.write(os.path.join(output_dir, f), f) + log.info(f"Wrote zip as {output}") + + +def main(): + parser = argparse.ArgumentParser( + description="Fetch missing symbols from Microsoft symbol server" + ) + parser.add_argument( + "--missing-symbols", + type=str, + help="missing symbols URL", + default=MISSING_SYMBOLS_URL, + ) + parser.add_argument("zip", type=str, help="output zip file") + parser.add_argument( + "--dump-syms", + type=str, + help="dump_syms path", + default=os.environ.get("DUMP_SYMS_PATH"), + ) + + args = parser.parse_args() + + assert args.dump_syms, "dump_syms path is empty" + + logging.basicConfig(level=logging.DEBUG) + aiohttp_logger = logging.getLogger("aiohttp.client") + aiohttp_logger.setLevel(logging.INFO) + log.info("Started") + + missing_symbols, ignorelist, known_ms_symbols, skiplist = get_base_data( + args.missing_symbols + ) + + modules, stats_skipped = get_missing_symbols(missing_symbols, skiplist, ignorelist) + + symbol_path = mkdtemp("symsrvfetch") + temp_path = mkdtemp(prefix="symcache") + + modules, stats_collect = asyncio.run(collect(modules)) + modules = asyncio.run(fetch_all(temp_path, modules)) + + file_index, stats_dump = asyncio.run( + dump(symbol_path, temp_path, modules, args.dump_syms) + ) + + gen_zip(args.zip, symbol_path, file_index) + + shutil.rmtree(symbol_path, True) + shutil.rmtree(temp_path, True) + + write_skiplist(skiplist) + + if not file_index: + log.info(f"No symbols downloaded: {len(missing_symbols)} considered") + else: + log.info( + f"Total files: {len(missing_symbols)}, Stored {len(file_index)} symbol files" + ) + + log.info( + f"{stats_collect['is_there']} already present, {stats_skipped['ignorelist']} in ignored list, " # noqa + f"{stats_skipped['skiplist']} skipped, {stats_collect['no_pdb']} not found, " + f"{stats_dump['dump_error']} processed with errors, " + f"{stats_dump['no_bin']} processed but with no binaries (x86_64)" + ) + log.info("Finished, exiting") + + +if __name__ == "__main__": + main() |