summaryrefslogtreecommitdiffstats
path: root/tools/crashreporter
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /tools/crashreporter
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/crashreporter')
-rw-r--r--tools/crashreporter/app.mozbuild9
-rw-r--r--tools/crashreporter/minidump_stackwalk/curl-compat/curl-compat.c10
-rw-r--r--tools/crashreporter/minidump_stackwalk/curl-compat/moz.build20
-rw-r--r--tools/crashreporter/minidump_stackwalk/http_symbol_supplier.cc540
-rw-r--r--tools/crashreporter/minidump_stackwalk/http_symbol_supplier.h120
-rw-r--r--tools/crashreporter/minidump_stackwalk/minidump_stackwalk.cc212
-rw-r--r--tools/crashreporter/minidump_stackwalk/moz.build37
-rw-r--r--tools/crashreporter/moz.configure5
-rwxr-xr-xtools/crashreporter/system-symbols/mac/PackageSymbolDumper.py347
-rw-r--r--tools/crashreporter/system-symbols/mac/get_update_packages.py156
-rwxr-xr-xtools/crashreporter/system-symbols/mac/list-packages.py47
-rwxr-xr-xtools/crashreporter/system-symbols/mac/parse_pbzx.py107
-rwxr-xr-xtools/crashreporter/system-symbols/mac/run.sh51
-rw-r--r--tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py0
-rw-r--r--tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py212
-rw-r--r--tools/crashreporter/system-symbols/win/LICENSE202
-rw-r--r--tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt17
-rwxr-xr-xtools/crashreporter/system-symbols/win/run.sh11
-rw-r--r--tools/crashreporter/system-symbols/win/scrape-report.py77
-rw-r--r--tools/crashreporter/system-symbols/win/skiplist.txt0
-rw-r--r--tools/crashreporter/system-symbols/win/symsrv-fetch.py521
21 files changed, 2701 insertions, 0 deletions
diff --git a/tools/crashreporter/app.mozbuild b/tools/crashreporter/app.mozbuild
new file mode 100644
index 0000000000..e44591f174
--- /dev/null
+++ b/tools/crashreporter/app.mozbuild
@@ -0,0 +1,9 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIRS += [
+ '/config/external/zlib',
+ '/toolkit/crashreporter/google-breakpad/src/processor',
+ '/tools/crashreporter/minidump_stackwalk',
+]
diff --git a/tools/crashreporter/minidump_stackwalk/curl-compat/curl-compat.c b/tools/crashreporter/minidump_stackwalk/curl-compat/curl-compat.c
new file mode 100644
index 0000000000..43317f12fb
--- /dev/null
+++ b/tools/crashreporter/minidump_stackwalk/curl-compat/curl-compat.c
@@ -0,0 +1,10 @@
+/* Any copyright is dedicated to the public domain.
+ http://creativecommons.org/publicdomain/zero/1.0/ */
+
+void curl_easy_cleanup() {}
+void curl_easy_escape() {}
+void curl_easy_getinfo() {}
+void curl_easy_init() {}
+void curl_easy_perform() {}
+void curl_easy_setopt() {}
+void curl_free() {}
diff --git a/tools/crashreporter/minidump_stackwalk/curl-compat/moz.build b/tools/crashreporter/minidump_stackwalk/curl-compat/moz.build
new file mode 100644
index 0000000000..e1c78875f3
--- /dev/null
+++ b/tools/crashreporter/minidump_stackwalk/curl-compat/moz.build
@@ -0,0 +1,20 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This creates a fake libcurl.so that exposes enough of the curl ABI that
+# linking minidump_stackwalk against it works. Because the fake libcurl.so
+# comes with no symbol versions, the resulting minidump_stackwalk doesn't
+# require specific symbol versions, making it binary compatible with any
+# libcurl.so.4 that provide the symbols, independently of the symbol version
+# they use. That means it works on systems before and after the switch to
+# openssl 1.1. That includes older and newer Ubuntu and Debian systems.
+SharedLibrary("curl")
+
+NoVisibilityFlags()
+
+DIST_INSTALL = False
+
+SOURCES += ["curl-compat.c"]
+
+LDFLAGS += ["-Wl,-soname,libcurl.so.4"]
diff --git a/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.cc b/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.cc
new file mode 100644
index 0000000000..d7c75201b8
--- /dev/null
+++ b/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.cc
@@ -0,0 +1,540 @@
+// Copyright (c) 2011 The Mozilla Foundation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of The Mozilla Foundation nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "http_symbol_supplier.h"
+
+#include <algorithm>
+
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <errno.h>
+
+#include "google_breakpad/processor/code_module.h"
+#include "google_breakpad/processor/system_info.h"
+#include "processor/logging.h"
+#include "processor/pathname_stripper.h"
+
+#ifdef _WIN32
+# include <direct.h>
+# include "zlib.h"
+#else
+# include <curl/curl.h>
+#endif
+
+namespace breakpad_extra {
+
+using google_breakpad::CodeModule;
+using google_breakpad::PathnameStripper;
+using google_breakpad::SystemInfo;
+
+static bool file_exists(const string& file_name) {
+ struct stat sb;
+ return stat(file_name.c_str(), &sb) == 0;
+}
+
+static string dirname(const string& path) {
+ size_t i = path.rfind('/');
+ if (i == string::npos) {
+ return path;
+ }
+ return path.substr(0, i);
+}
+
+#ifdef _WIN32
+# define mkdir_port(d) _mkdir(d)
+#else
+# define mkdir_port(d) mkdir(d, 0755)
+#endif
+
+static bool mkdirs(const string& file) {
+ vector<string> dirs;
+ string dir = dirname(file);
+ while (!file_exists(dir)) {
+ dirs.push_back(dir);
+ string new_dir = dirname(dir);
+ if (new_dir == dir || dir.empty()) {
+ break;
+ }
+ dir = new_dir;
+ }
+ for (auto d = dirs.rbegin(); d != dirs.rend(); ++d) {
+ if (mkdir_port(d->c_str()) != 0) {
+ BPLOG(ERROR) << "Error creating " << *d << ": " << errno;
+ return false;
+ }
+ }
+ return true;
+}
+
+static vector<string> vector_from(const string& front,
+ const vector<string>& rest) {
+ vector<string> vec(1, front);
+ std::copy(rest.begin(), rest.end(), std::back_inserter(vec));
+ return vec;
+}
+
+HTTPSymbolSupplier::HTTPSymbolSupplier(const vector<string>& server_urls,
+ const string& cache_path,
+ const vector<string>& local_paths,
+ const string& tmp_path)
+ : SimpleSymbolSupplier(vector_from(cache_path, local_paths)),
+ server_urls_(server_urls),
+ cache_path_(cache_path),
+ tmp_path_(tmp_path) {
+#ifdef _WIN32
+ session_ = InternetOpenW(L"Breakpad/1.0", INTERNET_OPEN_TYPE_PRECONFIG,
+ nullptr, nullptr, 0);
+ if (!session_) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: InternetOpenW: Error: "
+ << GetLastError();
+ }
+#else
+ session_ = curl_easy_init();
+#endif
+ for (auto i = server_urls_.begin(); i < server_urls_.end(); ++i) {
+ if (*(i->end() - 1) != '/') {
+ i->push_back('/');
+ }
+ }
+ // Remove any trailing slash on tmp_path.
+ if (!tmp_path_.empty() && *(tmp_path_.end() - 1) == '/') {
+ tmp_path_.erase(tmp_path_.end() - 1);
+ }
+}
+
+HTTPSymbolSupplier::~HTTPSymbolSupplier() {
+#ifdef _WIN32
+ InternetCloseHandle(session_);
+#else
+ curl_easy_cleanup(session_);
+#endif
+}
+
+void HTTPSymbolSupplier::StoreSymbolStats(const CodeModule* module,
+ const SymbolStats& stats) {
+ const auto& key =
+ std::make_pair(module->debug_file(), module->debug_identifier());
+ if (symbol_stats_.find(key) == symbol_stats_.end()) {
+ symbol_stats_[key] = stats;
+ }
+}
+
+void HTTPSymbolSupplier::StoreCacheHit(const CodeModule* module) {
+ SymbolStats stats = {true, 0.0f};
+ StoreSymbolStats(module, stats);
+}
+
+void HTTPSymbolSupplier::StoreCacheMiss(const CodeModule* module,
+ float fetch_time) {
+ SymbolStats stats = {false, fetch_time};
+ StoreSymbolStats(module, stats);
+}
+
+SymbolSupplier::SymbolResult HTTPSymbolSupplier::GetSymbolFile(
+ const CodeModule* module, const SystemInfo* system_info,
+ string* symbol_file) {
+ SymbolSupplier::SymbolResult res =
+ SimpleSymbolSupplier::GetSymbolFile(module, system_info, symbol_file);
+ if (res != SymbolSupplier::NOT_FOUND) {
+ StoreCacheHit(module);
+ return res;
+ }
+
+ if (!FetchSymbolFile(module, system_info)) {
+ return SymbolSupplier::NOT_FOUND;
+ }
+
+ return SimpleSymbolSupplier::GetSymbolFile(module, system_info, symbol_file);
+}
+
+SymbolSupplier::SymbolResult HTTPSymbolSupplier::GetSymbolFile(
+ const CodeModule* module, const SystemInfo* system_info,
+ string* symbol_file, string* symbol_data) {
+ SymbolSupplier::SymbolResult res = SimpleSymbolSupplier::GetSymbolFile(
+ module, system_info, symbol_file, symbol_data);
+ if (res != SymbolSupplier::NOT_FOUND) {
+ StoreCacheHit(module);
+ return res;
+ }
+
+ if (!FetchSymbolFile(module, system_info)) {
+ return SymbolSupplier::NOT_FOUND;
+ }
+
+ return SimpleSymbolSupplier::GetSymbolFile(module, system_info, symbol_file,
+ symbol_data);
+}
+
+SymbolSupplier::SymbolResult HTTPSymbolSupplier::GetCStringSymbolData(
+ const CodeModule* module, const SystemInfo* system_info,
+ string* symbol_file, char** symbol_data, size_t* size) {
+ SymbolSupplier::SymbolResult res = SimpleSymbolSupplier::GetCStringSymbolData(
+ module, system_info, symbol_file, symbol_data, size);
+ if (res != SymbolSupplier::NOT_FOUND) {
+ StoreCacheHit(module);
+ return res;
+ }
+
+ if (!FetchSymbolFile(module, system_info)) {
+ return SymbolSupplier::NOT_FOUND;
+ }
+
+ return SimpleSymbolSupplier::GetCStringSymbolData(
+ module, system_info, symbol_file, symbol_data, size);
+}
+
+namespace {
+string JoinPath(const string& path, const string& sub) {
+ if (path[path.length() - 1] == '/') {
+ return path + sub;
+ }
+ return path + "/" + sub;
+}
+
+#ifdef _WIN32
+string URLEncode(HINTERNET session, const string& url) {
+ string out(url.length() * 3, '\0');
+ DWORD length = out.length();
+ ;
+ if (InternetCanonicalizeUrlA(url.c_str(), &out[0], &length, 0)) {
+ out.resize(length);
+ return out;
+ }
+ return url;
+}
+
+string JoinURL(HINTERNET session, const string& url, const string& sub) {
+ return url + "/" + URLEncode(session, sub);
+}
+
+bool FetchURLToFile(HINTERNET session, const string& url, const string& file,
+ const string& tmp_path, float* fetch_time) {
+ *fetch_time = 0.0f;
+
+ URL_COMPONENTSA comps = {};
+ comps.dwStructSize = sizeof(URL_COMPONENTSA);
+ comps.dwHostNameLength = static_cast<DWORD>(-1);
+ comps.dwSchemeLength = static_cast<DWORD>(-1);
+ comps.dwUrlPathLength = static_cast<DWORD>(-1);
+ comps.dwExtraInfoLength = static_cast<DWORD>(-1);
+
+ if (!InternetCrackUrlA(url.c_str(), 0, 0, &comps)) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: InternetCrackUrlA: Error: "
+ << GetLastError();
+ return false;
+ }
+
+ DWORD start = GetTickCount();
+ string host(comps.lpszHostName, comps.dwHostNameLength);
+ string path(comps.lpszUrlPath, comps.dwUrlPathLength);
+ HINTERNET conn = InternetConnectA(session, host.c_str(), comps.nPort, nullptr,
+ nullptr, INTERNET_SERVICE_HTTP, 0, 0);
+ if (!conn) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: HttpOpenRequest: Error: "
+ << GetLastError();
+ return false;
+ }
+
+ HINTERNET req = HttpOpenRequestA(conn, "GET", path.c_str(), nullptr, nullptr,
+ nullptr, INTERNET_FLAG_NO_COOKIES, 0);
+ if (!req) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: HttpSendRequest: Error: "
+ << GetLastError();
+ InternetCloseHandle(conn);
+ return false;
+ }
+
+ DWORD status = 0;
+ DWORD size = sizeof(status);
+ if (!HttpSendRequest(req, nullptr, 0, nullptr, 0)) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: HttpSendRequest: Error: "
+ << GetLastError();
+ InternetCloseHandle(req);
+ InternetCloseHandle(conn);
+ return false;
+ }
+
+ if (!HttpQueryInfo(req, HTTP_QUERY_STATUS_CODE | HTTP_QUERY_FLAG_NUMBER,
+ &status, &size, nullptr)) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: HttpQueryInfo: Error: "
+ << GetLastError();
+ InternetCloseHandle(req);
+ InternetCloseHandle(conn);
+ return false;
+ }
+
+ bool do_ungzip = false;
+ // See if the content is gzipped and we need to decompress it.
+ char encoding[32];
+ DWORD encoding_size = sizeof(encoding);
+ if (HttpQueryInfo(req, HTTP_QUERY_CONTENT_ENCODING, encoding, &encoding_size,
+ nullptr) &&
+ strcmp(encoding, "gzip") == 0) {
+ do_ungzip = true;
+ BPLOG(INFO) << "HTTPSymbolSupplier: need to manually un-gzip";
+ }
+
+ bool success = false;
+ if (status == 200) {
+ DWORD bytes = 0;
+ string tempfile(MAX_PATH, '\0');
+ if (GetTempFileNameA(tmp_path.c_str(), "sym", 1, &tempfile[0]) != 0) {
+ tempfile.resize(strlen(tempfile.c_str()));
+ BPLOG(INFO) << "HTTPSymbolSupplier: symbol exists, saving to "
+ << tempfile;
+ FILE* f = fopen(tempfile.c_str(), "wb");
+ while (InternetQueryDataAvailable(req, &bytes, 0, 0) && bytes > 0) {
+ vector<uint8_t> data(bytes);
+ DWORD downloaded = 0;
+ if (InternetReadFile(req, &data[0], bytes, &downloaded)) {
+ fwrite(&data[0], downloaded, 1, f);
+ }
+ }
+ fclose(f);
+ if (do_ungzip) {
+ string gzfile = tempfile + ".gz";
+ MoveFileA(tempfile.c_str(), gzfile.c_str());
+ uint8_t buffer[4096];
+ gzFile g = gzopen(gzfile.c_str(), "r");
+ FILE* f = fopen(tempfile.c_str(), "w");
+ if (g && f) {
+ while (true) {
+ int bytes_read = gzread(g, buffer, sizeof(buffer));
+ if (bytes_read > 0) {
+ fwrite(buffer, bytes_read, 1, f);
+ } else {
+ if (bytes_read == 0) {
+ success = true;
+ }
+ break;
+ }
+ }
+ }
+ if (g) {
+ gzclose(g);
+ }
+ if (f) {
+ fclose(f);
+ }
+ if (!success) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: failed to decompress " << file;
+ }
+ } else {
+ success = true;
+ }
+
+ *fetch_time = GetTickCount() - start;
+
+ if (success) {
+ success = mkdirs(file);
+ if (!success) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: failed to create directories "
+ << "for " << file;
+ } else {
+ success = MoveFileA(tempfile.c_str(), file.c_str());
+ if (!success) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: failed to rename file";
+ unlink(tempfile.c_str());
+ }
+ }
+ }
+ }
+ } else {
+ BPLOG(INFO) << "HTTPSymbolSupplier: HTTP response code: " << status;
+ }
+
+ InternetCloseHandle(req);
+ InternetCloseHandle(conn);
+ return success;
+}
+
+#else // !_WIN32
+string URLEncode(CURL* curl, const string& url) {
+ char* escaped_url_raw =
+ curl_easy_escape(curl, const_cast<char*>(url.c_str()), url.length());
+ if (not escaped_url_raw) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: couldn't escape URL: " << url;
+ return "";
+ }
+ string escaped_url(escaped_url_raw);
+ curl_free(escaped_url_raw);
+ return escaped_url;
+}
+
+string JoinURL(CURL* curl, const string& url, const string& sub) {
+ return url + "/" + URLEncode(curl, sub);
+}
+
+bool FetchURLToFile(CURL* curl, const string& url, const string& file,
+ const string& tmp_path, float* fetch_time) {
+ *fetch_time = 0.0f;
+
+ string tempfile = JoinPath(tmp_path, "symbolXXXXXX");
+ int fd = mkstemp(&tempfile[0]);
+ if (fd == -1) {
+ return false;
+ }
+ FILE* f = fdopen(fd, "w");
+
+ curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+ curl_easy_setopt(curl, CURLOPT_ENCODING, "");
+ curl_easy_setopt(curl, CURLOPT_STDERR, stderr);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, f);
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
+
+ struct timeval t1, t2;
+ gettimeofday(&t1, nullptr);
+ bool result = false;
+ long retcode = -1;
+ if (curl_easy_perform(curl) != 0) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: curl_easy_perform failed";
+ } else if (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &retcode) != 0) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: curl_easy_getinfo failed";
+ } else if (retcode != 200) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: HTTP response code: " << retcode;
+ } else {
+ BPLOG(INFO) << "HTTPSymbolSupplier: symbol exists, saving to " << file;
+ result = true;
+ }
+ gettimeofday(&t2, nullptr);
+ *fetch_time =
+ (t2.tv_sec - t1.tv_sec) * 1000.0 + (t2.tv_usec - t1.tv_usec) / 1000.0;
+ fclose(f);
+ close(fd);
+
+ if (result) {
+ result = mkdirs(file);
+ if (!result) {
+ BPLOG(INFO) << "HTTPSymbolSupplier: failed to create directories for "
+ << file;
+ }
+ }
+ if (result) {
+ result = 0 == rename(tempfile.c_str(), file.c_str());
+ if (!result) {
+ int e = errno;
+ BPLOG(INFO) << "HTTPSymbolSupplier: failed to rename file, errno=" << e;
+ }
+ }
+
+ if (!result) {
+ unlink(tempfile.c_str());
+ }
+
+ return result;
+}
+#endif
+} // namespace
+
+bool HTTPSymbolSupplier::FetchSymbolFile(const CodeModule* module,
+ const SystemInfo* system_info) {
+ if (!session_) {
+ return false;
+ }
+ // Copied from simple_symbol_supplier.cc
+ string debug_file_name = PathnameStripper::File(module->debug_file());
+ if (debug_file_name.empty()) {
+ return false;
+ }
+ string path = debug_file_name;
+ string url = URLEncode(session_, debug_file_name);
+
+ // Append the identifier as a directory name.
+ string identifier = module->debug_identifier();
+ if (identifier.empty()) {
+ return false;
+ }
+ path = JoinPath(path, identifier);
+ url = JoinURL(session_, url, identifier);
+
+ // See if we already attempted to fetch this symbol file.
+ if (SymbolWasError(module, system_info)) {
+ return false;
+ }
+
+ // Transform the debug file name into one ending in .sym. If the existing
+ // name ends in .pdb, strip the .pdb. Otherwise, add .sym to the non-.pdb
+ // name.
+ string debug_file_extension;
+ if (debug_file_name.size() > 4) {
+ debug_file_extension = debug_file_name.substr(debug_file_name.size() - 4);
+ }
+ std::transform(debug_file_extension.begin(), debug_file_extension.end(),
+ debug_file_extension.begin(), tolower);
+ if (debug_file_extension == ".pdb") {
+ debug_file_name = debug_file_name.substr(0, debug_file_name.size() - 4);
+ }
+
+ debug_file_name += ".sym";
+ path = JoinPath(path, debug_file_name);
+ url = JoinURL(session_, url, debug_file_name);
+
+ string full_path = JoinPath(cache_path_, path);
+
+ bool result = false;
+ for (auto server_url = server_urls_.begin(); server_url < server_urls_.end();
+ ++server_url) {
+ string full_url = *server_url + url;
+ float fetch_time;
+ BPLOG(INFO) << "HTTPSymbolSupplier: querying " << full_url;
+ if (FetchURLToFile(session_, full_url, full_path, tmp_path_, &fetch_time)) {
+ StoreCacheMiss(module, fetch_time);
+ result = true;
+ break;
+ }
+ }
+ if (!result) {
+ error_symbols_.insert(
+ std::make_pair(module->debug_file(), module->debug_identifier()));
+ }
+ return result;
+}
+
+bool HTTPSymbolSupplier::GetStats(const CodeModule* module,
+ SymbolStats* stats) const {
+ const auto& found = symbol_stats_.find(
+ std::make_pair(module->debug_file(), module->debug_identifier()));
+ if (found == symbol_stats_.end()) {
+ return false;
+ }
+
+ *stats = found->second;
+ return true;
+}
+
+bool HTTPSymbolSupplier::SymbolWasError(const CodeModule* module,
+ const SystemInfo* system_info) {
+ return error_symbols_.find(std::make_pair(module->debug_file(),
+ module->debug_identifier())) !=
+ error_symbols_.end();
+}
+
+} // namespace breakpad_extra
diff --git a/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.h b/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.h
new file mode 100644
index 0000000000..bed4ed6868
--- /dev/null
+++ b/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.h
@@ -0,0 +1,120 @@
+// Copyright (c) 2011 The Mozilla Foundation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of The Mozilla Foundation nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// A SymbolSupplier that can fetch symbols via HTTP from a symbol server
+// serving them at Microsoft symbol server-compatible paths.
+
+#include <map>
+#include <set>
+#include <string>
+
+#include "processor/simple_symbol_supplier.h"
+
+#ifdef _WIN32
+# include <windows.h>
+# include <wininet.h>
+#else
+typedef void CURL;
+#endif
+
+namespace google_breakpad {
+class CodeModule;
+struct SystemInfo;
+} // namespace google_breakpad
+
+namespace breakpad_extra {
+
+using google_breakpad::CodeModule;
+using google_breakpad::SimpleSymbolSupplier;
+using google_breakpad::SymbolSupplier;
+using google_breakpad::SystemInfo;
+using std::string;
+using std::vector;
+
+class HTTPSymbolSupplier : public SimpleSymbolSupplier {
+ public:
+ // Construct an HTTPSymbolSupplier.
+ // |server_urls| contains URLs to query for symbols.
+ // |cache_path| is a directory in which to store downloaded symbols.
+ // |local_paths| are directories to query for symbols before checking URLs.
+ HTTPSymbolSupplier(const vector<string>& server_urls,
+ const string& cache_path,
+ const vector<string>& local_paths, const string& tmp_path);
+ virtual ~HTTPSymbolSupplier();
+
+ // Returns the path to the symbol file for the given module. See the
+ // description above.
+ virtual SymbolSupplier::SymbolResult GetSymbolFile(
+ const CodeModule* module, const SystemInfo* system_info,
+ string* symbol_file);
+
+ virtual SymbolSupplier::SymbolResult GetSymbolFile(
+ const CodeModule* module, const SystemInfo* system_info,
+ string* symbol_file, string* symbol_data);
+
+ // Allocates data buffer on heap and writes symbol data into buffer.
+ // Symbol supplier ALWAYS takes ownership of the data buffer.
+ virtual SymbolSupplier::SymbolResult GetCStringSymbolData(
+ const CodeModule* module, const SystemInfo* system_info,
+ string* symbol_file, char** symbol_data, size_t* size);
+
+ struct SymbolStats {
+ // true if the symbol file was already cached on disk,
+ // false if a HTTP request was made to fetch it.
+ bool was_cached_on_disk;
+ // If was_cached_on_disk is false, the time in milliseconds
+ // that the full HTTP request to fetch the symbol file took.
+ float fetch_time_ms;
+ };
+
+ // Get stats on symbols for a module.
+ // Returns true if stats were found, false if not.
+ bool GetStats(const CodeModule* module, SymbolStats* stats) const;
+
+ private:
+ bool FetchSymbolFile(const CodeModule* module, const SystemInfo* system_info);
+
+ bool SymbolWasError(const CodeModule* module, const SystemInfo* system_info);
+ void StoreCacheHit(const CodeModule* Module);
+ void StoreCacheMiss(const CodeModule* module, float fetch_time);
+ void StoreSymbolStats(const CodeModule* module, const SymbolStats& stats);
+
+ vector<string> server_urls_;
+ string cache_path_;
+ string tmp_path_;
+ std::set<std::pair<string, string>> error_symbols_;
+ std::map<std::pair<string, string>, SymbolStats> symbol_stats_;
+#ifdef _WIN32
+ HINTERNET session_;
+#else
+ CURL* session_;
+#endif
+};
+
+} // namespace breakpad_extra
diff --git a/tools/crashreporter/minidump_stackwalk/minidump_stackwalk.cc b/tools/crashreporter/minidump_stackwalk/minidump_stackwalk.cc
new file mode 100644
index 0000000000..f43cf14a01
--- /dev/null
+++ b/tools/crashreporter/minidump_stackwalk/minidump_stackwalk.cc
@@ -0,0 +1,212 @@
+// Copyright (c) 2010 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// minidump_stackwalk.cc: Process a minidump with MinidumpProcessor, printing
+// the results, including stack traces.
+//
+// Author: Mark Mentovai
+//
+// Lightly modified to use HTTPSymbolSupplier instead of SimpleSymbolSupplier
+
+#include <stdio.h>
+#include <string.h>
+
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "common/scoped_ptr.h"
+#include "common/using_std_string.h"
+#include "google_breakpad/processor/basic_source_line_resolver.h"
+#include "google_breakpad/processor/minidump.h"
+#include "google_breakpad/processor/minidump_processor.h"
+#include "google_breakpad/processor/process_state.h"
+#include "processor/logging.h"
+#include "processor/simple_symbol_supplier.h"
+#include "processor/stackwalk_common.h"
+
+#include "http_symbol_supplier.h"
+
+#ifdef ERROR
+# undef ERROR
+#endif
+
+#ifdef SEVERITY_ERROR
+# undef SEVERITY_ERROR
+#endif
+
+namespace {
+
+using breakpad_extra::HTTPSymbolSupplier;
+using google_breakpad::BasicSourceLineResolver;
+using google_breakpad::Minidump;
+using google_breakpad::MinidumpMemoryList;
+using google_breakpad::MinidumpProcessor;
+using google_breakpad::MinidumpThreadList;
+using google_breakpad::ProcessState;
+using google_breakpad::scoped_ptr;
+using google_breakpad::SimpleSymbolSupplier;
+
+// Processes |minidump_file| using MinidumpProcessor. |symbol_path|, if
+// non-empty, is the base directory of a symbol storage area, laid out in
+// the format required by SimpleSymbolSupplier. If such a storage area
+// is specified, it is made available for use by the MinidumpProcessor.
+//
+// Returns the value of MinidumpProcessor::Process. If processing succeeds,
+// prints identifying OS and CPU information from the minidump, crash
+// information if the minidump was produced as a result of a crash, and
+// call stacks for each thread contained in the minidump. All information
+// is printed to stdout.
+bool PrintMinidumpProcess(const string& minidump_file,
+ const std::vector<string>& symbol_args,
+ bool machine_readable, bool output_stack_contents) {
+ std::vector<string> server_paths;
+ std::vector<string> symbol_paths;
+ for (std::vector<string>::const_iterator arg = symbol_args.begin();
+ arg < symbol_args.end(); ++arg) {
+ if (arg->substr(0, 4) == "http") {
+ server_paths.push_back(*arg);
+ } else {
+ symbol_paths.push_back(*arg);
+ }
+ }
+
+ string temp_path;
+#ifdef _WIN32
+ temp_path.resize(MAX_PATH + 1);
+ DWORD length = GetTempPath(temp_path.length(), &temp_path[0]);
+ temp_path.resize(length + 1);
+#else
+ temp_path = "/tmp";
+#endif
+
+ if (server_paths.empty()) {
+ server_paths.push_back(
+ "http://s3-us-west-2.amazonaws.com/"
+ "org.mozilla.crash-stats.symbols-public/v1/");
+ }
+
+ scoped_ptr<HTTPSymbolSupplier> symbol_supplier;
+ string symbols_cache = temp_path;
+ if (!symbol_paths.empty()) {
+ symbols_cache = symbol_paths[0];
+ symbol_paths.erase(symbol_paths.begin());
+ }
+
+ // TODO(mmentovai): check existence of symbol_path if specified?
+ symbol_supplier.reset(new HTTPSymbolSupplier(server_paths, symbols_cache,
+ symbol_paths, temp_path));
+
+ BasicSourceLineResolver resolver;
+ MinidumpProcessor minidump_processor(symbol_supplier.get(), &resolver);
+
+ // Increase the maximum number of threads and regions.
+ MinidumpThreadList::set_max_threads(std::numeric_limits<uint32_t>::max());
+ MinidumpMemoryList::set_max_regions(std::numeric_limits<uint32_t>::max());
+ // Process the minidump.
+ Minidump dump(minidump_file);
+ if (!dump.Read()) {
+ BPLOG(ERROR) << "Minidump " << dump.path() << " could not be read";
+ return false;
+ }
+ ProcessState process_state;
+ if (minidump_processor.Process(&dump, &process_state) !=
+ google_breakpad::PROCESS_OK) {
+ BPLOG(ERROR) << "MinidumpProcessor::Process failed";
+ return false;
+ }
+
+ if (machine_readable) {
+ PrintProcessStateMachineReadable(process_state);
+ } else {
+ PrintProcessState(process_state, output_stack_contents, &resolver);
+ }
+
+ return true;
+}
+
+void usage(const char* program_name) {
+ fprintf(stderr,
+ "usage: %s [-m|-s] <minidump-file> [<symbol server URL> | "
+ "<symbol-path>]+\n"
+ " -m : Output in machine-readable format\n"
+ " -s : Output stack contents\n",
+ program_name);
+}
+
+} // namespace
+
+int main(int argc, char** argv) {
+ BPLOG_INIT(&argc, &argv);
+
+ if (argc < 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ const char* minidump_file;
+ bool machine_readable = false;
+ bool output_stack_contents = false;
+ int symbol_path_arg;
+
+ if (strcmp(argv[1], "-m") == 0) {
+ if (argc < 3) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ machine_readable = true;
+ minidump_file = argv[2];
+ symbol_path_arg = 3;
+ } else if (strcmp(argv[1], "-s") == 0) {
+ if (argc < 3) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ output_stack_contents = true;
+ minidump_file = argv[2];
+ symbol_path_arg = 3;
+ } else {
+ minidump_file = argv[1];
+ symbol_path_arg = 2;
+ }
+
+ // extra arguments are symbol paths
+ std::vector<string> symbol_paths;
+ if (argc > symbol_path_arg) {
+ for (int argi = symbol_path_arg; argi < argc; ++argi)
+ symbol_paths.push_back(argv[argi]);
+ }
+
+ return PrintMinidumpProcess(minidump_file, symbol_paths, machine_readable,
+ output_stack_contents)
+ ? 0
+ : 1;
+}
diff --git a/tools/crashreporter/minidump_stackwalk/moz.build b/tools/crashreporter/minidump_stackwalk/moz.build
new file mode 100644
index 0000000000..da769331ce
--- /dev/null
+++ b/tools/crashreporter/minidump_stackwalk/moz.build
@@ -0,0 +1,37 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Program("minidump_stackwalk")
+
+SOURCES += [
+ "/toolkit/crashreporter/google-breakpad/src/common/path_helper.cc",
+ "/toolkit/crashreporter/google-breakpad/src/processor/simple_symbol_supplier.cc",
+ "http_symbol_supplier.cc",
+ "minidump_stackwalk.cc",
+]
+
+USE_LIBS += [
+ "breakpad_processor",
+ "zlib",
+]
+
+if CONFIG["OS_TARGET"] == "WINNT":
+ if CONFIG["CC_TYPE"] in ("clang", "gcc"):
+ DEFINES["__USE_MINGW_ANSI_STDIO"] = True
+
+ LDFLAGS += [
+ "-static-libgcc",
+ "-static-libstdc++",
+ ]
+
+ OS_LIBS += ["wininet"]
+elif CONFIG["OS_TARGET"] == "Linux":
+ USE_LIBS += ["curl"]
+ DIRS += ["curl-compat"]
+else:
+ OS_LIBS += ["curl"]
+
+DisableStlWrapping()
+
+include("/toolkit/crashreporter/crashreporter.mozbuild")
diff --git a/tools/crashreporter/moz.configure b/tools/crashreporter/moz.configure
new file mode 100644
index 0000000000..f7380a502c
--- /dev/null
+++ b/tools/crashreporter/moz.configure
@@ -0,0 +1,5 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+include("../../build/moz.configure/rust.configure")
diff --git a/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py
new file mode 100755
index 0000000000..00f5dd7499
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py
@@ -0,0 +1,347 @@
+#!/usr/bin/env python
+
+# Copyright 2015 Michael R. Miller.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""
+PackageSymbolDumper.py
+
+Dumps Breakpad symbols for the contents of an Apple update installer. Given a
+path to an Apple update installer as a .dmg or a path to a specific package
+within the disk image, PackageSymbolDumper mounts, traverses, and dumps symbols
+for all applicable frameworks and dylibs found within.
+
+Required tools for Linux:
+ pax
+ gzip
+ tar
+ xar (http://code.google.com/p/xar/)
+ xpwn's dmg (https://github.com/planetbeing/xpwn)
+
+Created on Apr 11, 2012
+
+@author: mrmiller
+"""
+from __future__ import absolute_import
+
+import argparse
+import concurrent.futures
+import errno
+import glob
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+
+from scrapesymbols.gathersymbols import process_paths
+
+
+def expand_pkg(pkg_path, out_path):
+ """
+ Expands the contents of an installer package to some directory.
+
+ @param pkg_path: a path to an installer package (.pkg)
+ @param out_path: a path to hold the package contents
+ """
+ subprocess.check_call(
+ 'cd "{dest}" && xar -x -f "{src}"'.format(src=pkg_path, dest=out_path),
+ shell=True,
+ )
+
+
+def filter_files(function, path):
+ """
+ Yield file paths matching a filter function by walking the
+ hierarchy rooted at path.
+
+ @param function: a function taking in a filename that returns true to
+ include the path
+ @param path: the root path of the hierarchy to traverse
+ """
+ for root, _dirs, files in os.walk(path):
+ for filename in files:
+ if function(filename):
+ yield os.path.join(root, filename)
+
+
+def find_packages(path):
+ """
+ Returns a list of installer packages (as determined by the .pkg extension)
+ found within path.
+
+ @param path: root path to search for .pkg files
+ """
+ return filter_files(lambda filename: os.path.splitext(filename)[1] == ".pkg", path)
+
+
+def find_all_packages(paths):
+ """
+ Yield installer package files found in all of `paths`.
+
+ @param path: list of root paths to search for .pkg files
+ """
+ for path in paths:
+ logging.info("find_all_packages: {}".format(path))
+ for pkg in find_packages(path):
+ yield pkg
+
+
+def find_payloads(path):
+ """
+ Returns a list of possible installer package payload paths.
+
+ @param path: root path for an installer package
+ """
+ return filter_files(
+ lambda filename: "Payload" in filename or ".pax.gz" in filename, path
+ )
+
+
+def extract_payload(payload_path, output_path):
+ """
+ Extracts the contents of an installer package payload to a given directory.
+
+ @param payload_path: path to an installer package's payload
+ @param output_path: output path for the payload's contents
+ @return True for success, False for failure.
+ """
+ header = open(payload_path, "rb").read(2)
+ try:
+ if header == "BZ":
+ logging.info("Extracting bzip2 payload")
+ extract = "bzip2"
+ subprocess.check_call(
+ 'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format(
+ extract=extract, payload=payload_path, dest=output_path
+ ),
+ shell=True,
+ )
+ return True
+ elif header == "\x1f\x8b":
+ logging.info("Extracting gzip payload")
+ extract = "gzip"
+ subprocess.check_call(
+ 'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format(
+ extract=extract, payload=payload_path, dest=output_path
+ ),
+ shell=True,
+ )
+ return True
+ elif header == "pb":
+ logging.info("Extracting pbzx payload")
+ extract = "parse_pbzx.py"
+
+ payload_dir = os.path.dirname(payload_path)
+ # First, unpack the PBZX into cpio parts.
+ subprocess.check_call(["parse_pbzx.py", payload_path], cwd=payload_dir)
+ # Next, decompress any parts that are .xz, and feed them all into pax.
+ pax_proc = subprocess.Popen(
+ ["pax", "-r", "-k", "-s", ":^/::"],
+ stdin=subprocess.PIPE,
+ cwd=output_path,
+ )
+ for part in sorted(glob.glob(os.path.join(payload_dir, "Payload.part*"))):
+ if part.endswith(".xz"):
+ logging.info("Extracting xz part {}".format(part))
+ # This would be easier if we pulled in the lzma module...
+ xz_proc = subprocess.Popen(
+ ["xz", "-dc", part], stdout=subprocess.PIPE, cwd=payload_dir
+ )
+ shutil.copyfileobj(xz_proc.stdout, pax_proc.stdin)
+ xz_proc.wait()
+ else:
+ logging.info("Copying plain cpio part {}".format(part))
+ with open(part, "rb") as f:
+ shutil.copyfileobj(f, pax_proc.stdin)
+ pax_proc.stdin.close()
+ pax_proc.wait()
+ return True
+ else:
+ # Unsupported format
+ logging.error(
+ "Unknown payload format: 0x{0:x}{1:x}".format(
+ ord(header[0]), ord(header[1])
+ )
+ )
+ return False
+
+ except subprocess.CalledProcessError:
+ return False
+
+
+def shutil_error_handler(caller, path, excinfo):
+ logging.error('Could not remove "{path}": {info}'.format(path=path, info=excinfo))
+
+
+def write_symbol_file(dest, filename, contents):
+ full_path = os.path.join(dest, filename)
+ try:
+ os.makedirs(os.path.dirname(full_path))
+ open(full_path, "wb").write(contents)
+ except os.error as e:
+ if e.errno != errno.EEXIST:
+ raise
+
+
+def dump_symbols_from_payload(executor, dump_syms, payload_path, dest):
+ """
+ Dumps all the symbols found inside the payload of an installer package.
+
+ @param dump_syms: path to the dump_syms executable
+ @param payload_path: path to an installer package's payload
+ @param dest: output path for symbols
+ """
+ temp_dir = None
+ logging.info("Dumping symbols from payload: " + payload_path)
+ try:
+ temp_dir = tempfile.mkdtemp()
+ logging.info("Extracting payload to {path}.".format(path=temp_dir))
+ if not extract_payload(payload_path, temp_dir):
+ logging.error("Could not extract payload: " + payload_path)
+ return
+
+ # dump the symbols for the payload contents
+ system_library = os.path.join("System", "Library")
+ subdirectories = [
+ os.path.join(system_library, "Frameworks"),
+ os.path.join(system_library, "PrivateFrameworks"),
+ os.path.join("usr", "lib"),
+ ]
+ paths_to_dump = map(lambda d: os.path.join(temp_dir, d), subdirectories)
+
+ for filename, contents in process_paths(
+ paths_to_dump, executor, dump_syms, False, platform="darwin"
+ ):
+ if filename and contents:
+ logging.info("Added symbol file " + filename)
+ write_symbol_file(dest, filename, contents)
+
+ finally:
+ if temp_dir is not None:
+ shutil.rmtree(temp_dir, onerror=shutil_error_handler)
+
+
+def dump_symbols_from_package(executor, dump_syms, pkg, dest):
+ """
+ Dumps all the symbols found inside an installer package.
+
+ @param dump_syms: path to the dump_syms executable
+ @param pkg: path to an installer package
+ @param dest: output path for symbols
+ """
+ temp_dir = None
+ logging.info("Dumping symbols from package: " + pkg)
+ try:
+ temp_dir = tempfile.mkdtemp()
+ expand_pkg(pkg, temp_dir)
+
+ # check for any subpackages
+ for subpackage in find_packages(temp_dir):
+ logging.warning("UNTESTED: Found subpackage at: " + subpackage)
+ dump_symbols_from_package(executor, dump_syms, subpackage, dest)
+
+ # dump symbols from any payloads (only expecting one) in the package
+ for payload in find_payloads(temp_dir):
+ dump_symbols_from_payload(executor, dump_syms, payload, dest)
+
+ except Exception as e:
+ logging.error("Exception while dumping symbols from package: {}".format(e))
+
+ finally:
+ if temp_dir is not None:
+ shutil.rmtree(temp_dir, onerror=shutil_error_handler)
+
+
+def read_processed_packages(tracking_file):
+ if tracking_file is None or not os.path.exists(tracking_file):
+ return set()
+ logging.info("Reading processed packages from {}".format(tracking_file))
+ return set(open(tracking_file, "r").read().splitlines())
+
+
+def write_processed_packages(tracking_file, processed_packages):
+ if tracking_file is None:
+ return
+ logging.info(
+ "Writing {} processed packages to {}".format(
+ len(processed_packages), tracking_file
+ )
+ )
+ open(tracking_file, "w").write("\n".join(processed_packages))
+
+
+def process_packages(package_finder, to, tracking_file, dump_syms):
+ processed_packages = read_processed_packages(tracking_file)
+ with concurrent.futures.ProcessPoolExecutor() as executor:
+ for pkg in package_finder():
+ if pkg in processed_packages:
+ logging.info("Skipping already-processed package: {}".format(pkg))
+ else:
+ dump_symbols_from_package(executor, dump_syms, pkg, to)
+ processed_packages.add(pkg)
+ write_processed_packages(tracking_file, processed_packages)
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Extracts Breakpad symbols from a Mac OS X support update."
+ )
+ parser.add_argument(
+ "--dump_syms",
+ default="dump_syms",
+ type=str,
+ help="path to the Breakpad dump_syms executable",
+ )
+ parser.add_argument(
+ "--tracking-file",
+ type=str,
+ help="Path to a file in which to store information "
+ + "about already-processed packages",
+ )
+ parser.add_argument(
+ "search", nargs="+", help="Paths to search recursively for packages"
+ )
+ parser.add_argument("to", type=str, help="destination path for the symbols")
+ args = parser.parse_args()
+
+ logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ )
+ for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
+ urllib3_logger = logging.getLogger(p)
+ urllib3_logger.setLevel(logging.ERROR)
+
+ if not args.search or not all(os.path.exists(p) for p in args.search):
+ logging.error("Invalid search path")
+ return
+ if not os.path.exists(args.to):
+ logging.error("Invalid path to destination")
+ return
+
+ def finder():
+ return find_all_packages(args.search)
+
+ process_packages(finder, args.to, args.tracking_file, args.dump_syms)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/crashreporter/system-symbols/mac/get_update_packages.py b/tools/crashreporter/system-symbols/mac/get_update_packages.py
new file mode 100644
index 0000000000..7ddbad30b0
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/get_update_packages.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Ted Mielczarek <ted@mielczarek.org>
+# and Michael R. Miller <michaelrmmiller@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+
+import argparse
+import concurrent.futures
+import logging
+import os
+import re
+import requests
+import shutil
+import subprocess
+import tempfile
+import urlparse
+
+from PackageSymbolDumper import process_packages, find_packages
+
+OSX_RE = re.compile(r"10\.[0-9]+\.[0-9]+")
+
+
+def extract_dmg(dmg_path, dest):
+ logging.info("extract_dmg({}, {})".format(dmg_path, dest))
+ with tempfile.NamedTemporaryFile() as f:
+ subprocess.check_call(
+ ["dmg", "extract", dmg_path, f.name], stdout=open(os.devnull, "wb")
+ )
+ subprocess.check_call(["hfsplus", f.name, "extractall"], cwd=dest)
+
+
+def get_update_packages():
+ for i in range(16):
+ logging.info("get_update_packages: page " + str(i))
+ url = (
+ "https://km.support.apple.com/kb/index?page=downloads_browse&sort=recency"
+ "&facet=all&category=PF6&locale=en_US&offset=%d" % i
+ )
+ res = requests.get(url)
+ if res.status_code != 200:
+ break
+ data = res.json()
+ downloads = data.get("downloads", [])
+ if not downloads:
+ break
+ for d in downloads:
+ title = d.get("title", "")
+ if OSX_RE.search(title) and "Combo" not in title:
+ logging.info("Title: " + title)
+ if "fileurl" in d:
+ yield d["fileurl"]
+ else:
+ logging.warn("No fileurl in download!")
+
+
+def fetch_url_to_file(url, download_dir):
+ filename = os.path.basename(urlparse.urlsplit(url).path)
+ local_filename = os.path.join(download_dir, filename)
+ if os.path.isfile(local_filename):
+ logging.info("{} already exists, skipping".format(local_filename))
+ return None
+ r = requests.get(url, stream=True)
+ res_len = int(r.headers.get("content-length", "0"))
+ logging.info("Downloading {} -> {} ({} bytes)".format(url, local_filename, res_len))
+ with open(local_filename, "wb") as f:
+ for chunk in r.iter_content(chunk_size=1024):
+ if chunk: # filter out keep-alive new chunks
+ f.write(chunk)
+ return local_filename
+
+
+def fetch_and_extract_dmg(url, tmpdir):
+ logging.info("fetch_and_extract_dmg: " + url)
+ filename = fetch_url_to_file(url, tmpdir)
+ if not filename:
+ return []
+ # Extract dmg contents to a subdir
+ subdir = tempfile.mkdtemp(dir=tmpdir)
+ extract_dmg(filename, subdir)
+ packages = list(find_packages(subdir))
+ logging.info(
+ "fetch_and_extract_dmg({}): found packages: {}".format(url, str(packages))
+ )
+ return packages
+
+
+def find_update_packages(tmpdir):
+ logging.info("find_update_packages")
+ # Only download 2 packages at a time.
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+ jobs = dict(
+ (executor.submit(fetch_and_extract_dmg, url, tmpdir), url)
+ for url in get_update_packages()
+ )
+ for future in concurrent.futures.as_completed(jobs):
+ url = jobs[future]
+ if future.exception() is not None:
+ logging.error(
+ "exception downloading {}: {}".format(url, future.exception())
+ )
+ else:
+ for pkg in future.result():
+ yield pkg
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Download OS X update packages and dump symbols from them"
+ )
+ parser.add_argument(
+ "--dump_syms",
+ default="dump_syms",
+ type=str,
+ help="path to the Breakpad dump_syms executable",
+ )
+ parser.add_argument("to", type=str, help="destination path for the symbols")
+ args = parser.parse_args()
+ logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ )
+ for p in ("requests.packages.urllib3.connectionpool", "urllib3"):
+ urllib3_logger = logging.getLogger(p)
+ urllib3_logger.setLevel(logging.ERROR)
+ try:
+ tmpdir = tempfile.mkdtemp(suffix=".osxupdates")
+
+ def finder():
+ return find_update_packages(tmpdir)
+
+ process_packages(finder, args.to, None, args.dump_syms)
+ finally:
+ shutil.rmtree(tmpdir)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/crashreporter/system-symbols/mac/list-packages.py b/tools/crashreporter/system-symbols/mac/list-packages.py
new file mode 100755
index 0000000000..21e87877ef
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/list-packages.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+# Copyright 2015 Ted Mielczarek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import print_function, absolute_import
+
+import os
+import sys
+from reposadolib import reposadocommon
+
+reposadocommon.get_main_dir = lambda: "/usr/local/bin/"
+
+products = reposadocommon.get_product_info()
+args = []
+for product_id, p in products.items():
+ try:
+ t = p["title"]
+ except KeyError:
+ print("Missing title in {}, skipping".format(p), file=sys.stderr)
+ continue
+ # p['CatalogEntry']['Packages']
+ if t.startswith("OS X") or t.startswith("Mac OS X") or t.startswith("macOS"):
+ args.append("--product-id=" + product_id)
+ else:
+ print("Skipping %r for repo_sync" % t, file=sys.stderr)
+if "JUST_ONE_PACKAGE" in os.environ:
+ args = args[:1]
+
+print(" ".join(args))
diff --git a/tools/crashreporter/system-symbols/mac/parse_pbzx.py b/tools/crashreporter/system-symbols/mac/parse_pbzx.py
new file mode 100755
index 0000000000..242a58608e
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/parse_pbzx.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# This code is from https://gist.github.com/pudquick/ff412bcb29c9c1fa4b8d
+#
+# v2 pbzx stream handler
+# My personal writeup on the differences here:
+# https://gist.github.com/pudquick/29fcfe09c326a9b96cf5
+#
+# Pure python reimplementation of .cpio.xz content extraction from pbzx file
+# payload originally here:
+# http://www.tonymacx86.com/general-help/135458-pbzx-stream-parser.html
+#
+# Cleaned up C version (as the basis for my code) here, thanks to Pepijn Bruienne / @bruienne
+# https://gist.github.com/bruienne/029494bbcfb358098b41
+#
+# The python version of this code does not have an explicit license, but
+# is based on GPLv3 C code linked above.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import absolute_import
+
+import struct
+import sys
+
+
+def seekread(f, offset=None, length=0, relative=True):
+ if offset is not None:
+ # offset provided, let's seek
+ f.seek(offset, [0, 1, 2][relative])
+ if length != 0:
+ return f.read(length)
+
+
+def parse_pbzx(pbzx_path):
+ section = 0
+ xar_out_path = "%s.part%02d.cpio.xz" % (pbzx_path, section)
+ f = open(pbzx_path, "rb")
+ # pbzx = f.read()
+ # f.close()
+ magic = seekread(f, length=4)
+ if magic != "pbzx":
+ raise "Error: Not a pbzx file"
+ # Read 8 bytes for initial flags
+ flags = seekread(f, length=8)
+ # Interpret the flags as a 64-bit big-endian unsigned int
+ flags = struct.unpack(">Q", flags)[0]
+ xar_f = open(xar_out_path, "wb")
+ while flags & (1 << 24):
+ # Read in more flags
+ flags = seekread(f, length=8)
+ flags = struct.unpack(">Q", flags)[0]
+ # Read in length
+ f_length = seekread(f, length=8)
+ f_length = struct.unpack(">Q", f_length)[0]
+ xzmagic = seekread(f, length=6)
+ if xzmagic != "\xfd7zXZ\x00":
+ # This isn't xz content, this is actually _raw decompressed cpio_
+ # chunk of 16MB in size...
+ # Let's back up ...
+ seekread(f, offset=-6, length=0)
+ # ... and split it out ...
+ f_content = seekread(f, length=f_length)
+ section += 1
+ decomp_out = "%s.part%02d.cpio" % (pbzx_path, section)
+ g = open(decomp_out, "wb")
+ g.write(f_content)
+ g.close()
+ # Now to start the next section, which should hopefully be .xz
+ # (we'll just assume it is ...)
+ xar_f.close()
+ section += 1
+ new_out = "%s.part%02d.cpio.xz" % (pbzx_path, section)
+ xar_f = open(new_out, "wb")
+ else:
+ f_length -= 6
+ # This part needs buffering
+ f_content = seekread(f, length=f_length)
+ tail = seekread(f, offset=-2, length=2)
+ xar_f.write(xzmagic)
+ xar_f.write(f_content)
+ if tail != "YZ":
+ xar_f.close()
+ raise "Error: Footer is not xar file footer"
+ try:
+ f.close()
+ xar_f.close()
+ except BaseException:
+ pass
+
+
+def main():
+ parse_pbzx(sys.argv[1])
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/crashreporter/system-symbols/mac/run.sh b/tools/crashreporter/system-symbols/mac/run.sh
new file mode 100755
index 0000000000..8f9ff76d03
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/run.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+
+set -v -e -x
+
+base="$(realpath "$(dirname "$0")")"
+export PATH="$PATH:/builds/worker/bin:$base"
+
+cd /builds/worker
+
+if test "$PROCESSED_PACKAGES_INDEX" && test "$PROCESSED_PACKAGES_PATH" && test "$TASKCLUSTER_ROOT_URL"; then
+ PROCESSED_PACKAGES="$TASKCLUSTER_ROOT_URL/api/index/v1/task/$PROCESSED_PACKAGES_INDEX/artifacts/$PROCESSED_PACKAGES_PATH"
+fi
+
+if test "$PROCESSED_PACKAGES"; then
+ rm -f processed-packages
+ if test `curl --output /dev/null --silent --head --location "$PROCESSED_PACKAGES" -w "%{http_code}"` = 200; then
+ curl -L "$PROCESSED_PACKAGES" | gzip -dc > processed-packages
+ elif test -f "$PROCESSED_PACKAGES"; then
+ gzip -dc "$PROCESSED_PACKAGES" > processed-packages
+ fi
+ if test -f processed-packages; then
+ # Prevent reposado from downloading packages that have previously been
+ # dumped.
+ for f in $(cat processed-packages); do
+ mkdir -p "$(dirname "$f")"
+ touch "$f"
+ done
+ fi
+fi
+
+mkdir -p /opt/data-reposado/html /opt/data-reposado/metadata
+
+# First, just fetch all the update info.
+python3 /usr/local/bin/repo_sync --no-download
+
+# Next, fetch just the update packages we're interested in.
+packages=$(python3 "${base}/list-packages.py")
+# shellcheck disable=SC2086
+python3 /usr/local/bin/repo_sync $packages
+
+du -sh /opt/data-reposado
+
+# Now scrape symbols out of anything that was downloaded.
+mkdir -p symbols artifacts
+python3 "${base}/PackageSymbolDumper.py" --tracking-file=/builds/worker/processed-packages --dump_syms=/builds/worker/bin/dump_syms_mac /opt/data-reposado/html/content/downloads /builds/worker/symbols
+
+# Hand out artifacts
+gzip -c processed-packages > artifacts/processed-packages.gz
+
+cd symbols
+zip -r9 /builds/worker/artifacts/target.crashreporter-symbols.zip ./* || echo "No symbols dumped"
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py
diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
new file mode 100644
index 0000000000..52fecaeb9f
--- /dev/null
+++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+# Any copyright is dedicated to the Public Domain.
+# http://creativecommons.org/publicdomain/zero/1.0/
+
+from __future__ import print_function, absolute_import
+
+import argparse
+import concurrent.futures
+import datetime
+import os
+import requests
+import subprocess
+import sys
+import urllib
+import zipfile
+
+
+if sys.platform == "darwin":
+ SYSTEM_DIRS = [
+ "/usr/lib",
+ "/System/Library/Frameworks",
+ "/System/Library/PrivateFrameworks",
+ "/System/Library/Extensions",
+ ]
+else:
+ SYSTEM_DIRS = ["/lib", "/usr/lib"]
+SYMBOL_SERVER_URL = (
+ "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/"
+)
+
+
+def should_process(f, platform=sys.platform):
+ """Determine if a file is a platform binary"""
+ if platform == "darwin":
+ """
+ The 'file' command can error out. One example is "illegal byte
+ sequence" on a Japanese language UTF8 text file. So we must wrap the
+ command in a try/except block to prevent the script from terminating
+ prematurely when this happens.
+ """
+ try:
+ filetype = subprocess.check_output(["file", "-Lb", f])
+ except subprocess.CalledProcessError:
+ return False
+ """Skip kernel extensions"""
+ if "kext bundle" in filetype:
+ return False
+ return filetype.startswith("Mach-O")
+ else:
+ return subprocess.check_output(["file", "-Lb", f]).startswith("ELF")
+ return False
+
+
+def get_archs(filename, platform=sys.platform):
+ """
+ Find the list of architectures present in a Mach-O file, or a single-element
+ list on non-OS X.
+ """
+ if platform == "darwin":
+ return (
+ subprocess.check_output(["lipo", "-info", filename])
+ .split(":")[2]
+ .strip()
+ .split()
+ )
+ return [None]
+
+
+def server_has_file(filename):
+ """
+ Send the symbol server a HEAD request to see if it has this symbol file.
+ """
+ try:
+ r = requests.head(
+ urllib.parse.urljoin(SYMBOL_SERVER_URL, urllib.parse.quote(filename))
+ )
+ return r.status_code == 200
+ except requests.exceptions.RequestException:
+ return False
+
+
+def process_file(dump_syms, path, arch, verbose, write_all):
+ if sys.platform == "darwin":
+ arch_arg = ["-a", arch]
+ else:
+ arch_arg = []
+ try:
+ stderr = None if verbose else open(os.devnull, "wb")
+ stdout = subprocess.check_output([dump_syms] + arch_arg + [path], stderr=stderr)
+ except subprocess.CalledProcessError:
+ if verbose:
+ print("Processing %s%s...failed." % (path, " [%s]" % arch if arch else ""))
+ return None, None
+ module = stdout.splitlines()[0]
+ bits = module.split(" ", 4)
+ if len(bits) != 5:
+ return None, None
+ _, platform, cpu_arch, debug_id, debug_file = bits
+ if verbose:
+ sys.stdout.write("Processing %s [%s]..." % (path, arch))
+ filename = os.path.join(debug_file, debug_id, debug_file + ".sym")
+ # see if the server already has this symbol file
+ if not write_all:
+ if server_has_file(filename):
+ if verbose:
+ print("already on server.")
+ return None, None
+ # Collect for uploading
+ if verbose:
+ print("done.")
+ return filename, stdout
+
+
+def get_files(paths, platform=sys.platform):
+ """
+ For each entry passed in paths if the path is a file that can
+ be processed, yield it, otherwise if it is a directory yield files
+ under it that can be processed.
+ """
+ for path in paths:
+ if os.path.isdir(path):
+ for root, subdirs, files in os.walk(path):
+ for f in files:
+ fullpath = os.path.join(root, f)
+ if should_process(fullpath, platform=platform):
+ yield fullpath
+ elif should_process(path, platform=platform):
+ yield path
+
+
+def process_paths(
+ paths, executor, dump_syms, verbose, write_all=False, platform=sys.platform
+):
+ jobs = set()
+ for fullpath in get_files(paths, platform=platform):
+ while os.path.islink(fullpath):
+ fullpath = os.path.join(os.path.dirname(fullpath), os.readlink(fullpath))
+ if platform == "linux":
+ # See if there's a -dbg package installed and dump that instead.
+ dbgpath = "/usr/lib/debug" + fullpath
+ if os.path.isfile(dbgpath):
+ fullpath = dbgpath
+ for arch in get_archs(fullpath, platform=platform):
+ jobs.add(
+ executor.submit(
+ process_file, dump_syms, fullpath, arch, verbose, write_all
+ )
+ )
+ for job in concurrent.futures.as_completed(jobs):
+ try:
+ yield job.result()
+ except Exception as e:
+ print("Error: %s" % str(e), file=sys.stderr)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-v", "--verbose", action="store_true", help="Produce verbose output"
+ )
+ parser.add_argument(
+ "--all",
+ action="store_true",
+ help="Gather all system symbols, not just missing ones.",
+ )
+ parser.add_argument("dump_syms", help="Path to dump_syms binary")
+ parser.add_argument(
+ "files", nargs="*", help="Specific files from which to gather symbols."
+ )
+ args = parser.parse_args()
+ args.dump_syms = os.path.abspath(args.dump_syms)
+ # check for the dump_syms binary
+ if (
+ not os.path.isabs(args.dump_syms)
+ or not os.path.exists(args.dump_syms)
+ or not os.access(args.dump_syms, os.X_OK)
+ ):
+ print(
+ "Error: can't find dump_syms binary at %s!" % args.dump_syms,
+ file=sys.stderr,
+ )
+ return 1
+ file_list = set()
+ executor = concurrent.futures.ProcessPoolExecutor()
+ zip_path = os.path.abspath("symbols.zip")
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
+ for filename, contents in process_paths(
+ args.files if args.files else SYSTEM_DIRS,
+ executor,
+ args.dump_syms,
+ args.verbose,
+ args.all,
+ ):
+ if filename and contents and filename not in file_list:
+ file_list.add(filename)
+ zf.writestr(filename, contents)
+ zf.writestr(
+ "ossyms-1.0-{platform}-{date}-symbols.txt".format(
+ platform=sys.platform.title(),
+ date=datetime.datetime.now().strftime("%Y%m%d%H%M%S"),
+ ),
+ "\n".join(file_list),
+ )
+ if file_list:
+ if args.verbose:
+ print("Generated %s with %d symbols" % (zip_path, len(file_list)))
+ else:
+ os.unlink("symbols.zip")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/crashreporter/system-symbols/win/LICENSE b/tools/crashreporter/system-symbols/win/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt
new file mode 100644
index 0000000000..d63dc716e9
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt
@@ -0,0 +1,17 @@
+d2d1.pdb
+d3d10level9.pdb
+d3d10warp.pdb
+d3d11.pdb
+d3d9.pdb
+d3dcompiler_47.pdb
+d3dim700.pdb
+kernel32.pdb
+kernelbase.pdb
+ntdll.pdb
+user32.pdb
+wkernel32.pdb
+wkernelbase.pdb
+wntdll.pdb
+ws2_32.pdb
+wuser32.pdb
+zipwriter.pdb
diff --git a/tools/crashreporter/system-symbols/win/run.sh b/tools/crashreporter/system-symbols/win/run.sh
new file mode 100755
index 0000000000..4313731609
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/run.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+set -v -e -x
+
+base="$(realpath "$(dirname "$0")")"
+
+export DUMP_SYMS_PATH="${MOZ_FETCHES_DIR}/dump_syms/dump_syms"
+
+mkdir -p artifacts && \
+ulimit -n 16384 && \
+PYTHONPATH=$PWD python3 "${base}/symsrv-fetch.py" artifacts/target.crashreporter-symbols.zip
diff --git a/tools/crashreporter/system-symbols/win/scrape-report.py b/tools/crashreporter/system-symbols/win/scrape-report.py
new file mode 100644
index 0000000000..0e1eb18760
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/scrape-report.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Mozilla
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+
+import csv
+import json
+import logging
+import os
+import requests
+import sys
+import urlparse
+
+
+log = logging.getLogger()
+
+
+def fetch_missing_symbols_from_crash(file_or_crash):
+ if os.path.isfile(file_or_crash):
+ log.info("Fetching missing symbols from JSON file: %s" % file_or_crash)
+ j = {"json_dump": json.load(open(file_or_crash, "rb"))}
+ else:
+ if "report/index/" in file_or_crash:
+ crash_id = urlparse.urlparse(file_or_crash).path.split("/")[-1]
+ else:
+ crash_id = file_or_crash
+ url = (
+ "https://crash-stats.mozilla.org/api/ProcessedCrash/"
+ "?crash_id={crash_id}&datatype=processed".format(crash_id=crash_id)
+ )
+ log.info("Fetching missing symbols from crash: %s" % url)
+ r = requests.get(url)
+ if r.status_code != 200:
+ log.error("Failed to fetch crash %s" % url)
+ return set()
+ j = r.json()
+ return set(
+ [
+ (m["debug_file"], m["debug_id"], m["filename"], m["code_id"])
+ for m in j["json_dump"]["modules"]
+ if "missing_symbols" in m
+ ]
+ )
+
+
+def main():
+ logging.basicConfig()
+ log.setLevel(logging.DEBUG)
+ urllib3_logger = logging.getLogger("urllib3")
+ urllib3_logger.setLevel(logging.ERROR)
+
+ if len(sys.argv) < 2:
+ log.error("Specify a crash URL or ID")
+ sys.exit(1)
+ symbols = fetch_missing_symbols_from_crash(sys.argv[1])
+ log.info("Found %d missing symbols" % len(symbols))
+ c = csv.writer(sys.stdout)
+ c.writerow(["debug_file", "debug_id", "code_file", "code_id"])
+ for row in symbols:
+ c.writerow(row)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/crashreporter/system-symbols/win/skiplist.txt b/tools/crashreporter/system-symbols/win/skiplist.txt
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/skiplist.txt
diff --git a/tools/crashreporter/system-symbols/win/symsrv-fetch.py b/tools/crashreporter/system-symbols/win/symsrv-fetch.py
new file mode 100644
index 0000000000..734290e1a5
--- /dev/null
+++ b/tools/crashreporter/system-symbols/win/symsrv-fetch.py
@@ -0,0 +1,521 @@
+#!/usr/bin/env python
+#
+# Copyright 2016 Mozilla
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script will read a CSV of modules from Socorro, and try to retrieve
+# missing symbols from Microsoft's symbol server. It honors a blacklist
+# (blacklist.txt) of symbols that are known to be from our applications,
+# and it maintains its own list of symbols that the MS symbol server
+# doesn't have (skiplist.txt).
+#
+# The script also depends on having write access to the directory it is
+# installed in, to write the skiplist text file.
+
+from aiofile import AIOFile, LineReader
+from aiohttp import ClientSession, ClientTimeout
+from aiohttp.connector import TCPConnector
+import argparse
+import asyncio
+import os
+import shutil
+import logging
+from collections import defaultdict
+from tempfile import mkdtemp
+from urllib.parse import urljoin
+from urllib.parse import quote
+import zipfile
+
+
+# Just hardcoded here
+MICROSOFT_SYMBOL_SERVER = "https://msdl.microsoft.com/download/symbols/"
+USER_AGENT = "Microsoft-Symbol-Server/6.3.0.0"
+MOZILLA_SYMBOL_SERVER = (
+ "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/"
+)
+MISSING_SYMBOLS_URL = "https://symbols.mozilla.org/missingsymbols.csv?microsoft=only"
+HEADERS = {"User-Agent": USER_AGENT}
+SYM_SRV = "SRV*{0}*https://msdl.microsoft.com/download/symbols;SRV*{0}*https://software.intel.com/sites/downloads/symbols;SRV*{0}*https://download.amd.com/dir/bin;SRV*{0}*https://driver-symbols.nvidia.com"
+TIMEOUT = 7200
+RETRIES = 5
+
+
+log = logging.getLogger()
+
+
+def get_type(data):
+ # PDB v7
+ if data.startswith(b"Microsoft C/C++ MSF 7.00"):
+ return "pdb-v7"
+ # PDB v2
+ if data.startswith(b"Microsoft C/C++ program database 2.00"):
+ return "pdb-v2"
+ # DLL
+ if data.startswith(b"MZ"):
+ return "dll"
+ # CAB
+ if data.startswith(b"MSCF"):
+ return "cab"
+
+ return "unknown"
+
+
+async def exp_backoff(retry_num):
+ await asyncio.sleep(2 ** retry_num)
+
+
+async def server_has_file(client, server, filename):
+ """
+ Send the symbol server a HEAD request to see if it has this symbol file.
+ """
+ url = urljoin(server, quote(filename))
+ for i in range(RETRIES):
+ try:
+ async with client.head(url, headers=HEADERS, allow_redirects=True) as resp:
+ if resp.status == 200 and (
+ (
+ "microsoft" in server
+ and resp.headers["Content-Type"] == "application/octet-stream"
+ )
+ or "mozilla" in server
+ ):
+ log.debug(f"File exists: {url}")
+ return True
+ else:
+ return False
+ except Exception as e:
+ # Sometimes we've SSL errors or disconnections... so in such a situation just retry
+ log.warning(f"Error with {url}: retry")
+ log.exception(e)
+ await exp_backoff(i)
+
+ log.debug(f"Too many retries (HEAD) for {url}: give up.")
+ return False
+
+
+async def fetch_file(client, server, filename):
+ """
+ Fetch the file from the server
+ """
+ url = urljoin(server, quote(filename))
+ log.debug(f"Fetch url: {url}")
+ for i in range(RETRIES):
+ try:
+ async with client.get(url, headers=HEADERS, allow_redirects=True) as resp:
+ if resp.status == 200:
+ data = await resp.read()
+ typ = get_type(data)
+ if typ == "unknown":
+ # try again
+ await exp_backoff(i)
+ elif typ == "pdb-v2":
+ # too old: skip it
+ log.debug(f"PDB v2 (skipped because too old): {url}")
+ return None
+ else:
+ return data
+ else:
+ log.error(f"Cannot get data (status {resp.status}) for {url}: ")
+ except Exception as e:
+ log.warning(f"Error with {url}")
+ log.exception(e)
+ await asyncio.sleep(0.5)
+
+ log.debug(f"Too many retries (GET) for {url}: give up.")
+ return None
+
+
+def write_skiplist(skiplist):
+ with open("skiplist.txt", "w") as sf:
+ sf.writelines(
+ f"{debug_id} {debug_file}\n" for debug_id, debug_file in skiplist.items()
+ )
+
+
+async def fetch_missing_symbols(u):
+ log.info("Trying missing symbols from %s" % u)
+ async with ClientSession() as client:
+ async with client.get(u, headers=HEADERS) as resp:
+ # just skip the first line since it contains column headers
+ data = await resp.text()
+ return data.splitlines()[1:]
+
+
+async def get_list(filename):
+ alist = set()
+ try:
+ async with AIOFile(filename, "r") as In:
+ async for line in LineReader(In):
+ line = line.rstrip()
+ alist.add(line)
+ except FileNotFoundError:
+ pass
+
+ log.debug(f"{filename} contains {len(alist)} items")
+
+ return alist
+
+
+async def get_skiplist():
+ skiplist = {}
+ path = "skiplist.txt"
+ try:
+ async with AIOFile(path, "r") as In:
+ async for line in LineReader(In):
+ line = line.strip()
+ if line == "":
+ continue
+ s = line.split(" ", maxsplit=1)
+ if len(s) != 2:
+ continue
+ debug_id, debug_file = s
+ skiplist[debug_id] = debug_file.lower()
+ except FileNotFoundError:
+ pass
+
+ log.debug(f"{path} contains {len(skiplist)} items")
+
+ return skiplist
+
+
+def get_missing_symbols(missing_symbols, skiplist, blacklist):
+ modules = defaultdict(set)
+ stats = {"blacklist": 0, "skiplist": 0}
+ for line in missing_symbols:
+ line = line.rstrip()
+ bits = line.split(",")
+ if len(bits) < 2:
+ continue
+ pdb, debug_id = bits[:2]
+ code_file, code_id = None, None
+ if len(bits) >= 4:
+ code_file, code_id = bits[2:4]
+ if pdb and debug_id and pdb.endswith(".pdb"):
+ if pdb.lower() in blacklist:
+ stats["blacklist"] += 1
+ continue
+
+ if skiplist.get(debug_id) != pdb.lower():
+ modules[pdb].add((debug_id, code_file, code_id))
+ else:
+ stats["skiplist"] += 1
+ # We've asked the symbol server previously about this,
+ # so skip it.
+ log.debug("%s/%s already in skiplist", pdb, debug_id)
+
+ return modules, stats
+
+
+async def collect_info(client, filename, debug_id, code_file, code_id):
+ pdb_path = os.path.join(filename, debug_id, filename)
+ sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", "") + ".sym")
+
+ has_pdb = await server_has_file(client, MICROSOFT_SYMBOL_SERVER, pdb_path)
+ has_code = is_there = False
+ if has_pdb:
+ if not await server_has_file(client, MOZILLA_SYMBOL_SERVER, sym_path):
+ has_code = (
+ code_file
+ and code_id
+ and await server_has_file(
+ client,
+ MICROSOFT_SYMBOL_SERVER,
+ f"{code_file}/{code_id}/{code_file}",
+ )
+ )
+ else:
+ # if the file is on moz sym server no need to do anything
+ is_there = True
+ has_pdb = False
+
+ return (filename, debug_id, code_file, code_id, has_pdb, has_code, is_there)
+
+
+async def check_x86_file(path):
+ async with AIOFile(path, "rb") as In:
+ head = b"MODULE windows x86 "
+ chunk = await In.read(len(head))
+ if chunk == head:
+ return True
+ return False
+
+
+async def run_command(cmd):
+ proc = await asyncio.create_subprocess_shell(
+ cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+ )
+ _, err = await proc.communicate()
+ err = err.decode().strip()
+
+ return err
+
+
+async def dump_module(
+ output, symcache, filename, debug_id, code_file, code_id, has_code, dump_syms
+):
+ sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", ".sym"))
+ output_path = os.path.join(output, sym_path)
+ sym_srv = SYM_SRV.format(symcache)
+
+ if has_code:
+ cmd = (
+ f"{dump_syms} {code_file} --code-id {code_id} "
+ f"--store {output} --symbol-server '{sym_srv}' --verbose error"
+ )
+ else:
+ cmd = (
+ f"{dump_syms} {filename} --debug-id {debug_id} "
+ f"--store {output} --symbol-server '{sym_srv}' --verbose error"
+ )
+
+ err = await run_command(cmd)
+
+ if err:
+ log.error(f"Error with {cmd}")
+ log.error(err)
+ return 1
+
+ if not os.path.exists(output_path):
+ log.error(f"Could not find file {output_path} after running {cmd}")
+ return 1
+
+ if not has_code and not await check_x86_file(output_path):
+ # PDB for 32 bits contains everything we need (symbols + stack unwind info)
+ # But PDB for 64 bits don't contain stack unwind info
+ # (they're in the binary (.dll/.exe) itself).
+ # So here we're logging because we've a PDB (64 bits) without its DLL/EXE
+ if code_file and code_id:
+ log.debug(f"x86_64 binary {code_file}/{code_id} required")
+ else:
+ log.debug(f"x86_64 binary for {filename}/{debug_id} required")
+ return 2
+
+ log.info(f"Successfully dumped: {filename}/{debug_id}")
+ return sym_path
+
+
+async def dump(output, symcache, modules, dump_syms):
+ tasks = []
+ for filename, debug_id, code_file, code_id, has_code in modules:
+ tasks.append(
+ dump_module(
+ output,
+ symcache,
+ filename,
+ debug_id,
+ code_file,
+ code_id,
+ has_code,
+ dump_syms,
+ )
+ )
+
+ res = await asyncio.gather(*tasks)
+ file_index = {x for x in res if isinstance(x, str)}
+ stats = {
+ "dump_error": sum(1 for x in res if x == 1),
+ "no_bin": sum(1 for x in res if x == 2),
+ }
+
+ return file_index, stats
+
+
+async def collect(modules):
+ loop = asyncio.get_event_loop()
+ tasks = []
+
+ # In case of errors (Too many open files), just change limit_per_host
+ connector = TCPConnector(limit=100, limit_per_host=4)
+
+ async with ClientSession(
+ loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector
+ ) as client:
+ for filename, ids in modules.items():
+ for debug_id, code_file, code_id in ids:
+ tasks.append(
+ collect_info(client, filename, debug_id, code_file, code_id)
+ )
+
+ res = await asyncio.gather(*tasks)
+ to_dump = []
+ stats = {"no_pdb": 0, "is_there": 0}
+ for filename, debug_id, code_file, code_id, has_pdb, has_code, is_there in res:
+ if not has_pdb:
+ if is_there:
+ stats["is_there"] += 1
+ else:
+ stats["no_pdb"] += 1
+ log.info(f"No pdb for {filename}/{debug_id}")
+ continue
+
+ log.info(
+ f"To dump: {filename}/{debug_id}, {code_file}/{code_id} and has_code = {has_code}"
+ )
+ to_dump.append((filename, debug_id, code_file, code_id, has_code))
+
+ log.info(f"Collected {len(to_dump)} files to dump")
+
+ return to_dump, stats
+
+
+async def make_dirs(path):
+ loop = asyncio.get_event_loop()
+
+ def helper(path):
+ os.makedirs(path, exist_ok=True)
+
+ await loop.run_in_executor(None, helper, path)
+
+
+async def fetch_and_write(output, client, filename, file_id):
+ path = os.path.join(filename, file_id, filename)
+ data = await fetch_file(client, MICROSOFT_SYMBOL_SERVER, path)
+
+ if not data:
+ return False
+
+ output_dir = os.path.join(output, filename, file_id)
+ await make_dirs(output_dir)
+
+ output_path = os.path.join(output_dir, filename)
+ async with AIOFile(output_path, "wb") as Out:
+ await Out.write(data)
+
+ return True
+
+
+async def fetch_all(output, modules):
+ loop = asyncio.get_event_loop()
+ tasks = []
+ fetched_modules = []
+
+ # In case of errors (Too many open files), just change limit_per_host
+ connector = TCPConnector(limit=100, limit_per_host=0)
+
+ async with ClientSession(
+ loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector
+ ) as client:
+ for filename, debug_id, code_file, code_id, has_code in modules:
+ tasks.append(fetch_and_write(output, client, filename, debug_id))
+ if has_code:
+ tasks.append(fetch_and_write(output, client, code_file, code_id))
+
+ res = await asyncio.gather(*tasks)
+ res = iter(res)
+ for filename, debug_id, code_file, code_id, has_code in modules:
+ fetched_pdb = next(res)
+ if has_code:
+ has_code = next(res)
+ if fetched_pdb:
+ fetched_modules.append(
+ (filename, debug_id, code_file, code_id, has_code)
+ )
+
+ return fetched_modules
+
+
+def get_base_data(url):
+ async def helper(url):
+ return await asyncio.gather(
+ fetch_missing_symbols(url),
+ # Symbols that we know belong to us, so don't ask Microsoft for them.
+ get_list("blacklist.txt"),
+ # Symbols that we know belong to Microsoft, so don't skiplist them.
+ get_list("known-microsoft-symbols.txt"),
+ # Symbols that we've asked for in the past unsuccessfully
+ get_skiplist(),
+ )
+
+ return asyncio.run(helper(url))
+
+
+def gen_zip(output, output_dir, file_index):
+ if not file_index:
+ return
+
+ with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as z:
+ for f in file_index:
+ z.write(os.path.join(output_dir, f), f)
+ log.info(f"Wrote zip as {output}")
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Fetch missing symbols from Microsoft symbol server"
+ )
+ parser.add_argument(
+ "--missing-symbols",
+ type=str,
+ help="missing symbols URL",
+ default=MISSING_SYMBOLS_URL,
+ )
+ parser.add_argument("zip", type=str, help="output zip file")
+ parser.add_argument(
+ "--dump-syms",
+ type=str,
+ help="dump_syms path",
+ default=os.environ.get("DUMP_SYMS_PATH"),
+ )
+
+ args = parser.parse_args()
+
+ assert args.dump_syms, "dump_syms path is empty"
+
+ logging.basicConfig(level=logging.DEBUG)
+ aiohttp_logger = logging.getLogger("aiohttp.client")
+ aiohttp_logger.setLevel(logging.INFO)
+ log.info("Started")
+
+ missing_symbols, blacklist, known_ms_symbols, skiplist = get_base_data(
+ args.missing_symbols
+ )
+
+ modules, stats_skipped = get_missing_symbols(missing_symbols, skiplist, blacklist)
+
+ symbol_path = mkdtemp("symsrvfetch")
+ temp_path = mkdtemp(prefix="symcache")
+
+ modules, stats_collect = asyncio.run(collect(modules))
+ modules = asyncio.run(fetch_all(temp_path, modules))
+
+ file_index, stats_dump = asyncio.run(
+ dump(symbol_path, temp_path, modules, args.dump_syms)
+ )
+
+ gen_zip(args.zip, symbol_path, file_index)
+
+ shutil.rmtree(symbol_path, True)
+ shutil.rmtree(temp_path, True)
+
+ write_skiplist(skiplist)
+
+ if not file_index:
+ log.info(f"No symbols downloaded: {len(missing_symbols)} considered")
+ else:
+ log.info(
+ f"Total files: {len(missing_symbols)}, Stored {len(file_index)} symbol files"
+ )
+
+ log.info(
+ f"{stats_collect['is_there']} already present, {stats_skipped['blacklist']} in blacklist, "
+ f"{stats_skipped['skiplist']} skipped, {stats_collect['no_pdb']} not found, "
+ f"{stats_dump['dump_error']} processed with errors, "
+ f"{stats_dump['no_bin']} processed but with no binaries (x86_64)"
+ )
+ log.info("Finished, exiting")
+
+
+if __name__ == "__main__":
+ main()