summaryrefslogtreecommitdiffstats
path: root/tools/crashreporter/system-symbols/win
diff options
context:
space:
mode:
Diffstat (limited to 'tools/crashreporter/system-symbols/win')
-rw-r--r--tools/crashreporter/system-symbols/win/LICENSE202
-rw-r--r--tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt17
-rwxr-xr-xtools/crashreporter/system-symbols/win/run.sh11
-rw-r--r--tools/crashreporter/system-symbols/win/scrape-report.py75
-rw-r--r--tools/crashreporter/system-symbols/win/skiplist.txt0
-rw-r--r--tools/crashreporter/system-symbols/win/symsrv-fetch.py526
6 files changed, 831 insertions, 0 deletions
diff --git a/tools/crashreporter/system-symbols/win/LICENSE b/tools/crashreporter/system-symbols/win/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt
new file mode 100644
index 0000000000..d63dc716e9
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt
@@ -0,0 +1,17 @@
+d2d1.pdb
+d3d10level9.pdb
+d3d10warp.pdb
+d3d11.pdb
+d3d9.pdb
+d3dcompiler_47.pdb
+d3dim700.pdb
+kernel32.pdb
+kernelbase.pdb
+ntdll.pdb
+user32.pdb
+wkernel32.pdb
+wkernelbase.pdb
+wntdll.pdb
+ws2_32.pdb
+wuser32.pdb
+zipwriter.pdb
diff --git a/tools/crashreporter/system-symbols/win/run.sh b/tools/crashreporter/system-symbols/win/run.sh
new file mode 100755
index 0000000000..f95b2b160a
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/run.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+set -v -e -x
+
+base="$(realpath "$(dirname "$0")")"
+
+export DUMP_SYMS_PATH="${MOZ_FETCHES_DIR}/dump_syms/dump_syms"
+
+mkdir -p artifacts && \
+ulimit -n 16384 && \
+python3 "${base}/symsrv-fetch.py" artifacts/target.crashreporter-symbols.zip
diff --git a/tools/crashreporter/system-symbols/win/scrape-report.py b/tools/crashreporter/system-symbols/win/scrape-report.py
new file mode 100644
index 0000000000..9bc21801c3
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/scrape-report.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Mozilla
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import csv
+import json
+import logging
+import os
+import sys
+
+import requests
+import urlparse
+
+log = logging.getLogger()
+
+
+def fetch_missing_symbols_from_crash(file_or_crash):
+ if os.path.isfile(file_or_crash):
+ log.info("Fetching missing symbols from JSON file: %s" % file_or_crash)
+ j = {"json_dump": json.load(open(file_or_crash, "rb"))}
+ else:
+ if "report/index/" in file_or_crash:
+ crash_id = urlparse.urlparse(file_or_crash).path.split("/")[-1]
+ else:
+ crash_id = file_or_crash
+ url = (
+ "https://crash-stats.mozilla.org/api/ProcessedCrash/"
+ "?crash_id={crash_id}&datatype=processed".format(crash_id=crash_id)
+ )
+ log.info("Fetching missing symbols from crash: %s" % url)
+ r = requests.get(url)
+ if r.status_code != 200:
+ log.error("Failed to fetch crash %s" % url)
+ return set()
+ j = r.json()
+ return set(
+ [
+ (m["debug_file"], m["debug_id"], m["filename"], m["code_id"])
+ for m in j["json_dump"]["modules"]
+ if "missing_symbols" in m
+ ]
+ )
+
+
+def main():
+ logging.basicConfig()
+ log.setLevel(logging.DEBUG)
+ urllib3_logger = logging.getLogger("urllib3")
+ urllib3_logger.setLevel(logging.ERROR)
+
+ if len(sys.argv) < 2:
+ log.error("Specify a crash URL or ID")
+ sys.exit(1)
+ symbols = fetch_missing_symbols_from_crash(sys.argv[1])
+ log.info("Found %d missing symbols" % len(symbols))
+ c = csv.writer(sys.stdout)
+ c.writerow(["debug_file", "debug_id", "code_file", "code_id"])
+ for row in symbols:
+ c.writerow(row)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/crashreporter/system-symbols/win/skiplist.txt b/tools/crashreporter/system-symbols/win/skiplist.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/skiplist.txt
diff --git a/tools/crashreporter/system-symbols/win/symsrv-fetch.py b/tools/crashreporter/system-symbols/win/symsrv-fetch.py
new file mode 100644
index 0000000000..bc3b421cd0
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/symsrv-fetch.py
@@ -0,0 +1,526 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Mozilla
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script will read a CSV of modules from Socorro, and try to retrieve
+# missing symbols from Microsoft's symbol server. It honors a list
+# (ignorelist.txt) of symbols that are known to be from our applications,
+# and it maintains its own list of symbols that the MS symbol server
+# doesn't have (skiplist.txt).
+#
+# The script also depends on having write access to the directory it is
+# installed in, to write the skiplist text file.
+
+import argparse
+import asyncio
+import logging
+import os
+import shutil
+import zipfile
+from collections import defaultdict
+from tempfile import mkdtemp
+from urllib.parse import quote, urljoin
+
+from aiofile import AIOFile, LineReader
+from aiohttp import ClientSession, ClientTimeout
+from aiohttp.connector import TCPConnector
+
+# Just hardcoded here
+MICROSOFT_SYMBOL_SERVER = "https://msdl.microsoft.com/download/symbols/"
+USER_AGENT = "Microsoft-Symbol-Server/6.3.0.0"
+MOZILLA_SYMBOL_SERVER = "https://symbols.mozilla.org/"
+MISSING_SYMBOLS_URL = "https://symbols.mozilla.org/missingsymbols.csv?microsoft=only"
+HEADERS = {"User-Agent": USER_AGENT}
+SYM_SRV = "SRV*{0}*https://msdl.microsoft.com/download/symbols;SRV*{0}*https://software.intel.com/sites/downloads/symbols;SRV*{0}*https://download.amd.com/dir/bin;SRV*{0}*https://driver-symbols.nvidia.com" # noqa
+TIMEOUT = 7200
+RETRIES = 5
+
+
+log = logging.getLogger()
+
+
+def get_type(data):
+ # PDB v7
+ if data.startswith(b"Microsoft C/C++ MSF 7.00"):
+ return "pdb-v7"
+ # PDB v2
+ if data.startswith(b"Microsoft C/C++ program database 2.00"):
+ return "pdb-v2"
+ # DLL
+ if data.startswith(b"MZ"):
+ return "dll"
+ # CAB
+ if data.startswith(b"MSCF"):
+ return "cab"
+
+ return "unknown"
+
+
+async def exp_backoff(retry_num):
+ await asyncio.sleep(2 ** retry_num)
+
+
+async def server_has_file(client, server, filename):
+ """
+ Send the symbol server a HEAD request to see if it has this symbol file.
+ """
+ url = urljoin(server, quote(filename))
+ for i in range(RETRIES):
+ try:
+ async with client.head(url, headers=HEADERS, allow_redirects=True) as resp:
+ if resp.status == 200 and (
+ (
+ "microsoft" in server
+ and resp.headers["Content-Type"] == "application/octet-stream"
+ )
+ or "mozilla" in server
+ ):
+ log.debug(f"File exists: {url}")
+ return True
+ else:
+ return False
+ except Exception as e:
+ # Sometimes we've SSL errors or disconnections... so in such a situation just retry
+ log.warning(f"Error with {url}: retry")
+ log.exception(e)
+ await exp_backoff(i)
+
+ log.debug(f"Too many retries (HEAD) for {url}: give up.")
+ return False
+
+
+async def fetch_file(client, server, filename):
+ """
+ Fetch the file from the server
+ """
+ url = urljoin(server, quote(filename))
+ log.debug(f"Fetch url: {url}")
+ for i in range(RETRIES):
+ try:
+ async with client.get(url, headers=HEADERS, allow_redirects=True) as resp:
+ if resp.status == 200:
+ data = await resp.read()
+ typ = get_type(data)
+ if typ == "unknown":
+ # try again
+ await exp_backoff(i)
+ elif typ == "pdb-v2":
+ # too old: skip it
+ log.debug(f"PDB v2 (skipped because too old): {url}")
+ return None
+ else:
+ return data
+ else:
+ log.error(f"Cannot get data (status {resp.status}) for {url}: ")
+ except Exception as e:
+ log.warning(f"Error with {url}")
+ log.exception(e)
+ await asyncio.sleep(0.5)
+
+ log.debug(f"Too many retries (GET) for {url}: give up.")
+ return None
+
+
+def write_skiplist(skiplist):
+ with open("skiplist.txt", "w") as sf:
+ sf.writelines(
+ f"{debug_id} {debug_file}\n" for debug_id, debug_file in skiplist.items()
+ )
+
+
+async def fetch_missing_symbols(u):
+ log.info("Trying missing symbols from %s" % u)
+ async with ClientSession() as client:
+ async with client.get(u, headers=HEADERS) as resp:
+ # The server currently does not set an encoding so force it to UTF-8
+ data = await resp.text("UTF-8")
+ # just skip the first line since it contains column headers
+ return data.splitlines()[1:]
+
+
+async def get_list(filename):
+ alist = set()
+ try:
+ async with AIOFile(filename, "r") as In:
+ async for line in LineReader(In):
+ line = line.rstrip()
+ alist.add(line)
+ except FileNotFoundError:
+ pass
+
+ log.debug(f"{filename} contains {len(alist)} items")
+
+ return alist
+
+
+async def get_skiplist():
+ skiplist = {}
+ path = "skiplist.txt"
+ try:
+ async with AIOFile(path, "r") as In:
+ async for line in LineReader(In):
+ line = line.strip()
+ if line == "":
+ continue
+ s = line.split(" ", maxsplit=1)
+ if len(s) != 2:
+ continue
+ debug_id, debug_file = s
+ skiplist[debug_id] = debug_file.lower()
+ except FileNotFoundError:
+ pass
+
+ log.debug(f"{path} contains {len(skiplist)} items")
+
+ return skiplist
+
+
+def get_missing_symbols(missing_symbols, skiplist, ignorelist):
+ modules = defaultdict(set)
+ stats = {"ignorelist": 0, "skiplist": 0}
+ for line in missing_symbols:
+ line = line.rstrip()
+ bits = line.split(",")
+ if len(bits) < 2:
+ continue
+ pdb, debug_id = bits[:2]
+ code_file, code_id = None, None
+ if len(bits) >= 4:
+ code_file, code_id = bits[2:4]
+ if pdb and debug_id and pdb.endswith(".pdb"):
+ if pdb.lower() in ignorelist:
+ stats["ignorelist"] += 1
+ continue
+
+ if skiplist.get(debug_id) != pdb.lower():
+ modules[pdb].add((debug_id, code_file, code_id))
+ else:
+ stats["skiplist"] += 1
+ # We've asked the symbol server previously about this,
+ # so skip it.
+ log.debug("%s/%s already in skiplist", pdb, debug_id)
+
+ return modules, stats
+
+
+async def collect_info(client, filename, debug_id, code_file, code_id):
+ pdb_path = os.path.join(filename, debug_id, filename)
+ sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", "") + ".sym")
+
+ has_pdb = await server_has_file(client, MICROSOFT_SYMBOL_SERVER, pdb_path)
+ has_code = is_there = False
+ if has_pdb:
+ if not await server_has_file(client, MOZILLA_SYMBOL_SERVER, sym_path):
+ has_code = (
+ code_file
+ and code_id
+ and await server_has_file(
+ client,
+ MICROSOFT_SYMBOL_SERVER,
+ f"{code_file}/{code_id}/{code_file}",
+ )
+ )
+ else:
+ # if the file is on moz sym server no need to do anything
+ is_there = True
+ has_pdb = False
+
+ return (filename, debug_id, code_file, code_id, has_pdb, has_code, is_there)
+
+
+async def check_x86_file(path):
+ async with AIOFile(path, "rb") as In:
+ head = b"MODULE windows x86 "
+ chunk = await In.read(len(head))
+ if chunk == head:
+ return True
+ return False
+
+
+async def run_command(cmd):
+ proc = await asyncio.create_subprocess_shell(
+ cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+ )
+ _, err = await proc.communicate()
+ err = err.decode().strip()
+
+ return err
+
+
+async def dump_module(
+ output, symcache, filename, debug_id, code_file, code_id, has_code, dump_syms
+):
+ sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", ".sym"))
+ output_path = os.path.join(output, sym_path)
+ sym_srv = SYM_SRV.format(symcache)
+ res = {"path": sym_path, "error": "ok"}
+
+ if has_code:
+ cmd = (
+ f"{dump_syms} {code_file} --code-id {code_id} --check-cfi --inlines "
+ f"--store {output} --symbol-server '{sym_srv}' --verbose error"
+ )
+ else:
+ cmd = (
+ f"{dump_syms} {filename} --debug-id {debug_id} --check-cfi --inlines "
+ f"--store {output} --symbol-server '{sym_srv}' --verbose error"
+ )
+
+ err = await run_command(cmd)
+
+ if err:
+ log.error(f"Error with {cmd}")
+ log.error(err)
+ res["error"] = "dump error"
+ return res
+
+ if not os.path.exists(output_path):
+ log.error(f"Could not find file {output_path} after running {cmd}")
+ res["error"] = "dump error"
+ return res
+
+ if not has_code and not await check_x86_file(output_path):
+ # PDB for 32 bits contains everything we need (symbols + stack unwind info)
+ # But PDB for 64 bits don't contain stack unwind info
+ # (they're in the binary (.dll/.exe) itself).
+ # So here we're logging because we've got a PDB (64 bits) without its DLL/EXE.
+ if code_file and code_id:
+ log.debug(f"x86_64 binary {code_file}/{code_id} required")
+ else:
+ log.debug(f"x86_64 binary for {filename}/{debug_id} required")
+ res["error"] = "no binary"
+ return res
+
+ log.info(f"Successfully dumped: {filename}/{debug_id}")
+ return res
+
+
+async def dump(output, symcache, modules, dump_syms):
+ tasks = []
+ for filename, debug_id, code_file, code_id, has_code in modules:
+ tasks.append(
+ dump_module(
+ output,
+ symcache,
+ filename,
+ debug_id,
+ code_file,
+ code_id,
+ has_code,
+ dump_syms,
+ )
+ )
+
+ res = await asyncio.gather(*tasks)
+
+ # Even if we haven't CFI the generated file is useful to get symbols
+ # from addresses so keep error == 2.
+ file_index = {x["path"] for x in res if x["error"] in ["ok", "no binary"]}
+ stats = {
+ "dump_error": sum(1 for x in res if x["error"] == "dump error"),
+ "no_bin": sum(1 for x in res if x["error"] == "no binary"),
+ }
+
+ return file_index, stats
+
+
+async def collect(modules):
+ loop = asyncio.get_event_loop()
+ tasks = []
+
+ # In case of errors (Too many open files), just change limit_per_host
+ connector = TCPConnector(limit=100, limit_per_host=4)
+
+ async with ClientSession(
+ loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector
+ ) as client:
+ for filename, ids in modules.items():
+ for debug_id, code_file, code_id in ids:
+ tasks.append(
+ collect_info(client, filename, debug_id, code_file, code_id)
+ )
+
+ res = await asyncio.gather(*tasks)
+ to_dump = []
+ stats = {"no_pdb": 0, "is_there": 0}
+ for filename, debug_id, code_file, code_id, has_pdb, has_code, is_there in res:
+ if not has_pdb:
+ if is_there:
+ stats["is_there"] += 1
+ else:
+ stats["no_pdb"] += 1
+ log.info(f"No pdb for {filename}/{debug_id}")
+ continue
+
+ log.info(
+ f"To dump: {filename}/{debug_id}, {code_file}/{code_id} and has_code = {has_code}"
+ )
+ to_dump.append((filename, debug_id, code_file, code_id, has_code))
+
+ log.info(f"Collected {len(to_dump)} files to dump")
+
+ return to_dump, stats
+
+
+async def make_dirs(path):
+ loop = asyncio.get_event_loop()
+
+ def helper(path):
+ os.makedirs(path, exist_ok=True)
+
+ await loop.run_in_executor(None, helper, path)
+
+
+async def fetch_and_write(output, client, filename, file_id):
+ path = os.path.join(filename, file_id, filename)
+ data = await fetch_file(client, MICROSOFT_SYMBOL_SERVER, path)
+
+ if not data:
+ return False
+
+ output_dir = os.path.join(output, filename, file_id)
+ await make_dirs(output_dir)
+
+ output_path = os.path.join(output_dir, filename)
+ async with AIOFile(output_path, "wb") as Out:
+ await Out.write(data)
+
+ return True
+
+
+async def fetch_all(output, modules):
+ loop = asyncio.get_event_loop()
+ tasks = []
+ fetched_modules = []
+
+ # In case of errors (Too many open files), just change limit_per_host
+ connector = TCPConnector(limit=100, limit_per_host=0)
+
+ async with ClientSession(
+ loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector
+ ) as client:
+ for filename, debug_id, code_file, code_id, has_code in modules:
+ tasks.append(fetch_and_write(output, client, filename, debug_id))
+ if has_code:
+ tasks.append(fetch_and_write(output, client, code_file, code_id))
+
+ res = await asyncio.gather(*tasks)
+ res = iter(res)
+ for filename, debug_id, code_file, code_id, has_code in modules:
+ fetched_pdb = next(res)
+ if has_code:
+ has_code = next(res)
+ if fetched_pdb:
+ fetched_modules.append(
+ (filename, debug_id, code_file, code_id, has_code)
+ )
+
+ return fetched_modules
+
+
+def get_base_data(url):
+ async def helper(url):
+ return await asyncio.gather(
+ fetch_missing_symbols(url),
+ # Symbols that we know belong to us, so don't ask Microsoft for them.
+ get_list("ignorelist.txt"),
+ # Symbols that we know belong to Microsoft, so don't skiplist them.
+ get_list("known-microsoft-symbols.txt"),
+ # Symbols that we've asked for in the past unsuccessfully
+ get_skiplist(),
+ )
+
+ return asyncio.run(helper(url))
+
+
+def gen_zip(output, output_dir, file_index):
+ if not file_index:
+ return
+
+ with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as z:
+ for f in file_index:
+ z.write(os.path.join(output_dir, f), f)
+ log.info(f"Wrote zip as {output}")
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Fetch missing symbols from Microsoft symbol server"
+ )
+ parser.add_argument(
+ "--missing-symbols",
+ type=str,
+ help="missing symbols URL",
+ default=MISSING_SYMBOLS_URL,
+ )
+ parser.add_argument("zip", type=str, help="output zip file")
+ parser.add_argument(
+ "--dump-syms",
+ type=str,
+ help="dump_syms path",
+ default=os.environ.get("DUMP_SYMS_PATH"),
+ )
+
+ args = parser.parse_args()
+
+ assert args.dump_syms, "dump_syms path is empty"
+
+ logging.basicConfig(level=logging.DEBUG)
+ aiohttp_logger = logging.getLogger("aiohttp.client")
+ aiohttp_logger.setLevel(logging.INFO)
+ log.info("Started")
+
+ missing_symbols, ignorelist, known_ms_symbols, skiplist = get_base_data(
+ args.missing_symbols
+ )
+
+ modules, stats_skipped = get_missing_symbols(missing_symbols, skiplist, ignorelist)
+
+ symbol_path = mkdtemp("symsrvfetch")
+ temp_path = mkdtemp(prefix="symcache")
+
+ modules, stats_collect = asyncio.run(collect(modules))
+ modules = asyncio.run(fetch_all(temp_path, modules))
+
+ file_index, stats_dump = asyncio.run(
+ dump(symbol_path, temp_path, modules, args.dump_syms)
+ )
+
+ gen_zip(args.zip, symbol_path, file_index)
+
+ shutil.rmtree(symbol_path, True)
+ shutil.rmtree(temp_path, True)
+
+ write_skiplist(skiplist)
+
+ if not file_index:
+ log.info(f"No symbols downloaded: {len(missing_symbols)} considered")
+ else:
+ log.info(
+ f"Total files: {len(missing_symbols)}, Stored {len(file_index)} symbol files"
+ )
+
+ log.info(
+ f"{stats_collect['is_there']} already present, {stats_skipped['ignorelist']} in ignored list, " # noqa
+ f"{stats_skipped['skiplist']} skipped, {stats_collect['no_pdb']} not found, "
+ f"{stats_dump['dump_error']} processed with errors, "
+ f"{stats_dump['no_bin']} processed but with no binaries (x86_64)"
+ )
+ log.info("Finished, exiting")
+
+
+if __name__ == "__main__":
+ main()