diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /tools/crashreporter | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/crashreporter')
21 files changed, 2701 insertions, 0 deletions
diff --git a/tools/crashreporter/app.mozbuild b/tools/crashreporter/app.mozbuild new file mode 100644 index 0000000000..e44591f174 --- /dev/null +++ b/tools/crashreporter/app.mozbuild @@ -0,0 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +DIRS += [ + '/config/external/zlib', + '/toolkit/crashreporter/google-breakpad/src/processor', + '/tools/crashreporter/minidump_stackwalk', +] diff --git a/tools/crashreporter/minidump_stackwalk/curl-compat/curl-compat.c b/tools/crashreporter/minidump_stackwalk/curl-compat/curl-compat.c new file mode 100644 index 0000000000..43317f12fb --- /dev/null +++ b/tools/crashreporter/minidump_stackwalk/curl-compat/curl-compat.c @@ -0,0 +1,10 @@ +/* Any copyright is dedicated to the public domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +void curl_easy_cleanup() {} +void curl_easy_escape() {} +void curl_easy_getinfo() {} +void curl_easy_init() {} +void curl_easy_perform() {} +void curl_easy_setopt() {} +void curl_free() {} diff --git a/tools/crashreporter/minidump_stackwalk/curl-compat/moz.build b/tools/crashreporter/minidump_stackwalk/curl-compat/moz.build new file mode 100644 index 0000000000..e1c78875f3 --- /dev/null +++ b/tools/crashreporter/minidump_stackwalk/curl-compat/moz.build @@ -0,0 +1,20 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This creates a fake libcurl.so that exposes enough of the curl ABI that +# linking minidump_stackwalk against it works. Because the fake libcurl.so +# comes with no symbol versions, the resulting minidump_stackwalk doesn't +# require specific symbol versions, making it binary compatible with any +# libcurl.so.4 that provide the symbols, independently of the symbol version +# they use. That means it works on systems before and after the switch to +# openssl 1.1. That includes older and newer Ubuntu and Debian systems. +SharedLibrary("curl") + +NoVisibilityFlags() + +DIST_INSTALL = False + +SOURCES += ["curl-compat.c"] + +LDFLAGS += ["-Wl,-soname,libcurl.so.4"] diff --git a/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.cc b/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.cc new file mode 100644 index 0000000000..d7c75201b8 --- /dev/null +++ b/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.cc @@ -0,0 +1,540 @@ +// Copyright (c) 2011 The Mozilla Foundation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of The Mozilla Foundation nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "http_symbol_supplier.h" + +#include <algorithm> + +#include <sys/stat.h> +#include <sys/time.h> +#include <unistd.h> + +#include <errno.h> + +#include "google_breakpad/processor/code_module.h" +#include "google_breakpad/processor/system_info.h" +#include "processor/logging.h" +#include "processor/pathname_stripper.h" + +#ifdef _WIN32 +# include <direct.h> +# include "zlib.h" +#else +# include <curl/curl.h> +#endif + +namespace breakpad_extra { + +using google_breakpad::CodeModule; +using google_breakpad::PathnameStripper; +using google_breakpad::SystemInfo; + +static bool file_exists(const string& file_name) { + struct stat sb; + return stat(file_name.c_str(), &sb) == 0; +} + +static string dirname(const string& path) { + size_t i = path.rfind('/'); + if (i == string::npos) { + return path; + } + return path.substr(0, i); +} + +#ifdef _WIN32 +# define mkdir_port(d) _mkdir(d) +#else +# define mkdir_port(d) mkdir(d, 0755) +#endif + +static bool mkdirs(const string& file) { + vector<string> dirs; + string dir = dirname(file); + while (!file_exists(dir)) { + dirs.push_back(dir); + string new_dir = dirname(dir); + if (new_dir == dir || dir.empty()) { + break; + } + dir = new_dir; + } + for (auto d = dirs.rbegin(); d != dirs.rend(); ++d) { + if (mkdir_port(d->c_str()) != 0) { + BPLOG(ERROR) << "Error creating " << *d << ": " << errno; + return false; + } + } + return true; +} + +static vector<string> vector_from(const string& front, + const vector<string>& rest) { + vector<string> vec(1, front); + std::copy(rest.begin(), rest.end(), std::back_inserter(vec)); + return vec; +} + +HTTPSymbolSupplier::HTTPSymbolSupplier(const vector<string>& server_urls, + const string& cache_path, + const vector<string>& local_paths, + const string& tmp_path) + : SimpleSymbolSupplier(vector_from(cache_path, local_paths)), + server_urls_(server_urls), + cache_path_(cache_path), + tmp_path_(tmp_path) { +#ifdef _WIN32 + session_ = InternetOpenW(L"Breakpad/1.0", INTERNET_OPEN_TYPE_PRECONFIG, + nullptr, nullptr, 0); + if (!session_) { + BPLOG(INFO) << "HTTPSymbolSupplier: InternetOpenW: Error: " + << GetLastError(); + } +#else + session_ = curl_easy_init(); +#endif + for (auto i = server_urls_.begin(); i < server_urls_.end(); ++i) { + if (*(i->end() - 1) != '/') { + i->push_back('/'); + } + } + // Remove any trailing slash on tmp_path. + if (!tmp_path_.empty() && *(tmp_path_.end() - 1) == '/') { + tmp_path_.erase(tmp_path_.end() - 1); + } +} + +HTTPSymbolSupplier::~HTTPSymbolSupplier() { +#ifdef _WIN32 + InternetCloseHandle(session_); +#else + curl_easy_cleanup(session_); +#endif +} + +void HTTPSymbolSupplier::StoreSymbolStats(const CodeModule* module, + const SymbolStats& stats) { + const auto& key = + std::make_pair(module->debug_file(), module->debug_identifier()); + if (symbol_stats_.find(key) == symbol_stats_.end()) { + symbol_stats_[key] = stats; + } +} + +void HTTPSymbolSupplier::StoreCacheHit(const CodeModule* module) { + SymbolStats stats = {true, 0.0f}; + StoreSymbolStats(module, stats); +} + +void HTTPSymbolSupplier::StoreCacheMiss(const CodeModule* module, + float fetch_time) { + SymbolStats stats = {false, fetch_time}; + StoreSymbolStats(module, stats); +} + +SymbolSupplier::SymbolResult HTTPSymbolSupplier::GetSymbolFile( + const CodeModule* module, const SystemInfo* system_info, + string* symbol_file) { + SymbolSupplier::SymbolResult res = + SimpleSymbolSupplier::GetSymbolFile(module, system_info, symbol_file); + if (res != SymbolSupplier::NOT_FOUND) { + StoreCacheHit(module); + return res; + } + + if (!FetchSymbolFile(module, system_info)) { + return SymbolSupplier::NOT_FOUND; + } + + return SimpleSymbolSupplier::GetSymbolFile(module, system_info, symbol_file); +} + +SymbolSupplier::SymbolResult HTTPSymbolSupplier::GetSymbolFile( + const CodeModule* module, const SystemInfo* system_info, + string* symbol_file, string* symbol_data) { + SymbolSupplier::SymbolResult res = SimpleSymbolSupplier::GetSymbolFile( + module, system_info, symbol_file, symbol_data); + if (res != SymbolSupplier::NOT_FOUND) { + StoreCacheHit(module); + return res; + } + + if (!FetchSymbolFile(module, system_info)) { + return SymbolSupplier::NOT_FOUND; + } + + return SimpleSymbolSupplier::GetSymbolFile(module, system_info, symbol_file, + symbol_data); +} + +SymbolSupplier::SymbolResult HTTPSymbolSupplier::GetCStringSymbolData( + const CodeModule* module, const SystemInfo* system_info, + string* symbol_file, char** symbol_data, size_t* size) { + SymbolSupplier::SymbolResult res = SimpleSymbolSupplier::GetCStringSymbolData( + module, system_info, symbol_file, symbol_data, size); + if (res != SymbolSupplier::NOT_FOUND) { + StoreCacheHit(module); + return res; + } + + if (!FetchSymbolFile(module, system_info)) { + return SymbolSupplier::NOT_FOUND; + } + + return SimpleSymbolSupplier::GetCStringSymbolData( + module, system_info, symbol_file, symbol_data, size); +} + +namespace { +string JoinPath(const string& path, const string& sub) { + if (path[path.length() - 1] == '/') { + return path + sub; + } + return path + "/" + sub; +} + +#ifdef _WIN32 +string URLEncode(HINTERNET session, const string& url) { + string out(url.length() * 3, '\0'); + DWORD length = out.length(); + ; + if (InternetCanonicalizeUrlA(url.c_str(), &out[0], &length, 0)) { + out.resize(length); + return out; + } + return url; +} + +string JoinURL(HINTERNET session, const string& url, const string& sub) { + return url + "/" + URLEncode(session, sub); +} + +bool FetchURLToFile(HINTERNET session, const string& url, const string& file, + const string& tmp_path, float* fetch_time) { + *fetch_time = 0.0f; + + URL_COMPONENTSA comps = {}; + comps.dwStructSize = sizeof(URL_COMPONENTSA); + comps.dwHostNameLength = static_cast<DWORD>(-1); + comps.dwSchemeLength = static_cast<DWORD>(-1); + comps.dwUrlPathLength = static_cast<DWORD>(-1); + comps.dwExtraInfoLength = static_cast<DWORD>(-1); + + if (!InternetCrackUrlA(url.c_str(), 0, 0, &comps)) { + BPLOG(INFO) << "HTTPSymbolSupplier: InternetCrackUrlA: Error: " + << GetLastError(); + return false; + } + + DWORD start = GetTickCount(); + string host(comps.lpszHostName, comps.dwHostNameLength); + string path(comps.lpszUrlPath, comps.dwUrlPathLength); + HINTERNET conn = InternetConnectA(session, host.c_str(), comps.nPort, nullptr, + nullptr, INTERNET_SERVICE_HTTP, 0, 0); + if (!conn) { + BPLOG(INFO) << "HTTPSymbolSupplier: HttpOpenRequest: Error: " + << GetLastError(); + return false; + } + + HINTERNET req = HttpOpenRequestA(conn, "GET", path.c_str(), nullptr, nullptr, + nullptr, INTERNET_FLAG_NO_COOKIES, 0); + if (!req) { + BPLOG(INFO) << "HTTPSymbolSupplier: HttpSendRequest: Error: " + << GetLastError(); + InternetCloseHandle(conn); + return false; + } + + DWORD status = 0; + DWORD size = sizeof(status); + if (!HttpSendRequest(req, nullptr, 0, nullptr, 0)) { + BPLOG(INFO) << "HTTPSymbolSupplier: HttpSendRequest: Error: " + << GetLastError(); + InternetCloseHandle(req); + InternetCloseHandle(conn); + return false; + } + + if (!HttpQueryInfo(req, HTTP_QUERY_STATUS_CODE | HTTP_QUERY_FLAG_NUMBER, + &status, &size, nullptr)) { + BPLOG(INFO) << "HTTPSymbolSupplier: HttpQueryInfo: Error: " + << GetLastError(); + InternetCloseHandle(req); + InternetCloseHandle(conn); + return false; + } + + bool do_ungzip = false; + // See if the content is gzipped and we need to decompress it. + char encoding[32]; + DWORD encoding_size = sizeof(encoding); + if (HttpQueryInfo(req, HTTP_QUERY_CONTENT_ENCODING, encoding, &encoding_size, + nullptr) && + strcmp(encoding, "gzip") == 0) { + do_ungzip = true; + BPLOG(INFO) << "HTTPSymbolSupplier: need to manually un-gzip"; + } + + bool success = false; + if (status == 200) { + DWORD bytes = 0; + string tempfile(MAX_PATH, '\0'); + if (GetTempFileNameA(tmp_path.c_str(), "sym", 1, &tempfile[0]) != 0) { + tempfile.resize(strlen(tempfile.c_str())); + BPLOG(INFO) << "HTTPSymbolSupplier: symbol exists, saving to " + << tempfile; + FILE* f = fopen(tempfile.c_str(), "wb"); + while (InternetQueryDataAvailable(req, &bytes, 0, 0) && bytes > 0) { + vector<uint8_t> data(bytes); + DWORD downloaded = 0; + if (InternetReadFile(req, &data[0], bytes, &downloaded)) { + fwrite(&data[0], downloaded, 1, f); + } + } + fclose(f); + if (do_ungzip) { + string gzfile = tempfile + ".gz"; + MoveFileA(tempfile.c_str(), gzfile.c_str()); + uint8_t buffer[4096]; + gzFile g = gzopen(gzfile.c_str(), "r"); + FILE* f = fopen(tempfile.c_str(), "w"); + if (g && f) { + while (true) { + int bytes_read = gzread(g, buffer, sizeof(buffer)); + if (bytes_read > 0) { + fwrite(buffer, bytes_read, 1, f); + } else { + if (bytes_read == 0) { + success = true; + } + break; + } + } + } + if (g) { + gzclose(g); + } + if (f) { + fclose(f); + } + if (!success) { + BPLOG(INFO) << "HTTPSymbolSupplier: failed to decompress " << file; + } + } else { + success = true; + } + + *fetch_time = GetTickCount() - start; + + if (success) { + success = mkdirs(file); + if (!success) { + BPLOG(INFO) << "HTTPSymbolSupplier: failed to create directories " + << "for " << file; + } else { + success = MoveFileA(tempfile.c_str(), file.c_str()); + if (!success) { + BPLOG(INFO) << "HTTPSymbolSupplier: failed to rename file"; + unlink(tempfile.c_str()); + } + } + } + } + } else { + BPLOG(INFO) << "HTTPSymbolSupplier: HTTP response code: " << status; + } + + InternetCloseHandle(req); + InternetCloseHandle(conn); + return success; +} + +#else // !_WIN32 +string URLEncode(CURL* curl, const string& url) { + char* escaped_url_raw = + curl_easy_escape(curl, const_cast<char*>(url.c_str()), url.length()); + if (not escaped_url_raw) { + BPLOG(INFO) << "HTTPSymbolSupplier: couldn't escape URL: " << url; + return ""; + } + string escaped_url(escaped_url_raw); + curl_free(escaped_url_raw); + return escaped_url; +} + +string JoinURL(CURL* curl, const string& url, const string& sub) { + return url + "/" + URLEncode(curl, sub); +} + +bool FetchURLToFile(CURL* curl, const string& url, const string& file, + const string& tmp_path, float* fetch_time) { + *fetch_time = 0.0f; + + string tempfile = JoinPath(tmp_path, "symbolXXXXXX"); + int fd = mkstemp(&tempfile[0]); + if (fd == -1) { + return false; + } + FILE* f = fdopen(fd, "w"); + + curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl, CURLOPT_ENCODING, ""); + curl_easy_setopt(curl, CURLOPT_STDERR, stderr); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, f); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); + + struct timeval t1, t2; + gettimeofday(&t1, nullptr); + bool result = false; + long retcode = -1; + if (curl_easy_perform(curl) != 0) { + BPLOG(INFO) << "HTTPSymbolSupplier: curl_easy_perform failed"; + } else if (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &retcode) != 0) { + BPLOG(INFO) << "HTTPSymbolSupplier: curl_easy_getinfo failed"; + } else if (retcode != 200) { + BPLOG(INFO) << "HTTPSymbolSupplier: HTTP response code: " << retcode; + } else { + BPLOG(INFO) << "HTTPSymbolSupplier: symbol exists, saving to " << file; + result = true; + } + gettimeofday(&t2, nullptr); + *fetch_time = + (t2.tv_sec - t1.tv_sec) * 1000.0 + (t2.tv_usec - t1.tv_usec) / 1000.0; + fclose(f); + close(fd); + + if (result) { + result = mkdirs(file); + if (!result) { + BPLOG(INFO) << "HTTPSymbolSupplier: failed to create directories for " + << file; + } + } + if (result) { + result = 0 == rename(tempfile.c_str(), file.c_str()); + if (!result) { + int e = errno; + BPLOG(INFO) << "HTTPSymbolSupplier: failed to rename file, errno=" << e; + } + } + + if (!result) { + unlink(tempfile.c_str()); + } + + return result; +} +#endif +} // namespace + +bool HTTPSymbolSupplier::FetchSymbolFile(const CodeModule* module, + const SystemInfo* system_info) { + if (!session_) { + return false; + } + // Copied from simple_symbol_supplier.cc + string debug_file_name = PathnameStripper::File(module->debug_file()); + if (debug_file_name.empty()) { + return false; + } + string path = debug_file_name; + string url = URLEncode(session_, debug_file_name); + + // Append the identifier as a directory name. + string identifier = module->debug_identifier(); + if (identifier.empty()) { + return false; + } + path = JoinPath(path, identifier); + url = JoinURL(session_, url, identifier); + + // See if we already attempted to fetch this symbol file. + if (SymbolWasError(module, system_info)) { + return false; + } + + // Transform the debug file name into one ending in .sym. If the existing + // name ends in .pdb, strip the .pdb. Otherwise, add .sym to the non-.pdb + // name. + string debug_file_extension; + if (debug_file_name.size() > 4) { + debug_file_extension = debug_file_name.substr(debug_file_name.size() - 4); + } + std::transform(debug_file_extension.begin(), debug_file_extension.end(), + debug_file_extension.begin(), tolower); + if (debug_file_extension == ".pdb") { + debug_file_name = debug_file_name.substr(0, debug_file_name.size() - 4); + } + + debug_file_name += ".sym"; + path = JoinPath(path, debug_file_name); + url = JoinURL(session_, url, debug_file_name); + + string full_path = JoinPath(cache_path_, path); + + bool result = false; + for (auto server_url = server_urls_.begin(); server_url < server_urls_.end(); + ++server_url) { + string full_url = *server_url + url; + float fetch_time; + BPLOG(INFO) << "HTTPSymbolSupplier: querying " << full_url; + if (FetchURLToFile(session_, full_url, full_path, tmp_path_, &fetch_time)) { + StoreCacheMiss(module, fetch_time); + result = true; + break; + } + } + if (!result) { + error_symbols_.insert( + std::make_pair(module->debug_file(), module->debug_identifier())); + } + return result; +} + +bool HTTPSymbolSupplier::GetStats(const CodeModule* module, + SymbolStats* stats) const { + const auto& found = symbol_stats_.find( + std::make_pair(module->debug_file(), module->debug_identifier())); + if (found == symbol_stats_.end()) { + return false; + } + + *stats = found->second; + return true; +} + +bool HTTPSymbolSupplier::SymbolWasError(const CodeModule* module, + const SystemInfo* system_info) { + return error_symbols_.find(std::make_pair(module->debug_file(), + module->debug_identifier())) != + error_symbols_.end(); +} + +} // namespace breakpad_extra diff --git a/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.h b/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.h new file mode 100644 index 0000000000..bed4ed6868 --- /dev/null +++ b/tools/crashreporter/minidump_stackwalk/http_symbol_supplier.h @@ -0,0 +1,120 @@ +// Copyright (c) 2011 The Mozilla Foundation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of The Mozilla Foundation nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A SymbolSupplier that can fetch symbols via HTTP from a symbol server +// serving them at Microsoft symbol server-compatible paths. + +#include <map> +#include <set> +#include <string> + +#include "processor/simple_symbol_supplier.h" + +#ifdef _WIN32 +# include <windows.h> +# include <wininet.h> +#else +typedef void CURL; +#endif + +namespace google_breakpad { +class CodeModule; +struct SystemInfo; +} // namespace google_breakpad + +namespace breakpad_extra { + +using google_breakpad::CodeModule; +using google_breakpad::SimpleSymbolSupplier; +using google_breakpad::SymbolSupplier; +using google_breakpad::SystemInfo; +using std::string; +using std::vector; + +class HTTPSymbolSupplier : public SimpleSymbolSupplier { + public: + // Construct an HTTPSymbolSupplier. + // |server_urls| contains URLs to query for symbols. + // |cache_path| is a directory in which to store downloaded symbols. + // |local_paths| are directories to query for symbols before checking URLs. + HTTPSymbolSupplier(const vector<string>& server_urls, + const string& cache_path, + const vector<string>& local_paths, const string& tmp_path); + virtual ~HTTPSymbolSupplier(); + + // Returns the path to the symbol file for the given module. See the + // description above. + virtual SymbolSupplier::SymbolResult GetSymbolFile( + const CodeModule* module, const SystemInfo* system_info, + string* symbol_file); + + virtual SymbolSupplier::SymbolResult GetSymbolFile( + const CodeModule* module, const SystemInfo* system_info, + string* symbol_file, string* symbol_data); + + // Allocates data buffer on heap and writes symbol data into buffer. + // Symbol supplier ALWAYS takes ownership of the data buffer. + virtual SymbolSupplier::SymbolResult GetCStringSymbolData( + const CodeModule* module, const SystemInfo* system_info, + string* symbol_file, char** symbol_data, size_t* size); + + struct SymbolStats { + // true if the symbol file was already cached on disk, + // false if a HTTP request was made to fetch it. + bool was_cached_on_disk; + // If was_cached_on_disk is false, the time in milliseconds + // that the full HTTP request to fetch the symbol file took. + float fetch_time_ms; + }; + + // Get stats on symbols for a module. + // Returns true if stats were found, false if not. + bool GetStats(const CodeModule* module, SymbolStats* stats) const; + + private: + bool FetchSymbolFile(const CodeModule* module, const SystemInfo* system_info); + + bool SymbolWasError(const CodeModule* module, const SystemInfo* system_info); + void StoreCacheHit(const CodeModule* Module); + void StoreCacheMiss(const CodeModule* module, float fetch_time); + void StoreSymbolStats(const CodeModule* module, const SymbolStats& stats); + + vector<string> server_urls_; + string cache_path_; + string tmp_path_; + std::set<std::pair<string, string>> error_symbols_; + std::map<std::pair<string, string>, SymbolStats> symbol_stats_; +#ifdef _WIN32 + HINTERNET session_; +#else + CURL* session_; +#endif +}; + +} // namespace breakpad_extra diff --git a/tools/crashreporter/minidump_stackwalk/minidump_stackwalk.cc b/tools/crashreporter/minidump_stackwalk/minidump_stackwalk.cc new file mode 100644 index 0000000000..f43cf14a01 --- /dev/null +++ b/tools/crashreporter/minidump_stackwalk/minidump_stackwalk.cc @@ -0,0 +1,212 @@ +// Copyright (c) 2010 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// minidump_stackwalk.cc: Process a minidump with MinidumpProcessor, printing +// the results, including stack traces. +// +// Author: Mark Mentovai +// +// Lightly modified to use HTTPSymbolSupplier instead of SimpleSymbolSupplier + +#include <stdio.h> +#include <string.h> + +#include <limits> +#include <string> +#include <vector> + +#include "common/scoped_ptr.h" +#include "common/using_std_string.h" +#include "google_breakpad/processor/basic_source_line_resolver.h" +#include "google_breakpad/processor/minidump.h" +#include "google_breakpad/processor/minidump_processor.h" +#include "google_breakpad/processor/process_state.h" +#include "processor/logging.h" +#include "processor/simple_symbol_supplier.h" +#include "processor/stackwalk_common.h" + +#include "http_symbol_supplier.h" + +#ifdef ERROR +# undef ERROR +#endif + +#ifdef SEVERITY_ERROR +# undef SEVERITY_ERROR +#endif + +namespace { + +using breakpad_extra::HTTPSymbolSupplier; +using google_breakpad::BasicSourceLineResolver; +using google_breakpad::Minidump; +using google_breakpad::MinidumpMemoryList; +using google_breakpad::MinidumpProcessor; +using google_breakpad::MinidumpThreadList; +using google_breakpad::ProcessState; +using google_breakpad::scoped_ptr; +using google_breakpad::SimpleSymbolSupplier; + +// Processes |minidump_file| using MinidumpProcessor. |symbol_path|, if +// non-empty, is the base directory of a symbol storage area, laid out in +// the format required by SimpleSymbolSupplier. If such a storage area +// is specified, it is made available for use by the MinidumpProcessor. +// +// Returns the value of MinidumpProcessor::Process. If processing succeeds, +// prints identifying OS and CPU information from the minidump, crash +// information if the minidump was produced as a result of a crash, and +// call stacks for each thread contained in the minidump. All information +// is printed to stdout. +bool PrintMinidumpProcess(const string& minidump_file, + const std::vector<string>& symbol_args, + bool machine_readable, bool output_stack_contents) { + std::vector<string> server_paths; + std::vector<string> symbol_paths; + for (std::vector<string>::const_iterator arg = symbol_args.begin(); + arg < symbol_args.end(); ++arg) { + if (arg->substr(0, 4) == "http") { + server_paths.push_back(*arg); + } else { + symbol_paths.push_back(*arg); + } + } + + string temp_path; +#ifdef _WIN32 + temp_path.resize(MAX_PATH + 1); + DWORD length = GetTempPath(temp_path.length(), &temp_path[0]); + temp_path.resize(length + 1); +#else + temp_path = "/tmp"; +#endif + + if (server_paths.empty()) { + server_paths.push_back( + "http://s3-us-west-2.amazonaws.com/" + "org.mozilla.crash-stats.symbols-public/v1/"); + } + + scoped_ptr<HTTPSymbolSupplier> symbol_supplier; + string symbols_cache = temp_path; + if (!symbol_paths.empty()) { + symbols_cache = symbol_paths[0]; + symbol_paths.erase(symbol_paths.begin()); + } + + // TODO(mmentovai): check existence of symbol_path if specified? + symbol_supplier.reset(new HTTPSymbolSupplier(server_paths, symbols_cache, + symbol_paths, temp_path)); + + BasicSourceLineResolver resolver; + MinidumpProcessor minidump_processor(symbol_supplier.get(), &resolver); + + // Increase the maximum number of threads and regions. + MinidumpThreadList::set_max_threads(std::numeric_limits<uint32_t>::max()); + MinidumpMemoryList::set_max_regions(std::numeric_limits<uint32_t>::max()); + // Process the minidump. + Minidump dump(minidump_file); + if (!dump.Read()) { + BPLOG(ERROR) << "Minidump " << dump.path() << " could not be read"; + return false; + } + ProcessState process_state; + if (minidump_processor.Process(&dump, &process_state) != + google_breakpad::PROCESS_OK) { + BPLOG(ERROR) << "MinidumpProcessor::Process failed"; + return false; + } + + if (machine_readable) { + PrintProcessStateMachineReadable(process_state); + } else { + PrintProcessState(process_state, output_stack_contents, &resolver); + } + + return true; +} + +void usage(const char* program_name) { + fprintf(stderr, + "usage: %s [-m|-s] <minidump-file> [<symbol server URL> | " + "<symbol-path>]+\n" + " -m : Output in machine-readable format\n" + " -s : Output stack contents\n", + program_name); +} + +} // namespace + +int main(int argc, char** argv) { + BPLOG_INIT(&argc, &argv); + + if (argc < 2) { + usage(argv[0]); + return 1; + } + + const char* minidump_file; + bool machine_readable = false; + bool output_stack_contents = false; + int symbol_path_arg; + + if (strcmp(argv[1], "-m") == 0) { + if (argc < 3) { + usage(argv[0]); + return 1; + } + + machine_readable = true; + minidump_file = argv[2]; + symbol_path_arg = 3; + } else if (strcmp(argv[1], "-s") == 0) { + if (argc < 3) { + usage(argv[0]); + return 1; + } + + output_stack_contents = true; + minidump_file = argv[2]; + symbol_path_arg = 3; + } else { + minidump_file = argv[1]; + symbol_path_arg = 2; + } + + // extra arguments are symbol paths + std::vector<string> symbol_paths; + if (argc > symbol_path_arg) { + for (int argi = symbol_path_arg; argi < argc; ++argi) + symbol_paths.push_back(argv[argi]); + } + + return PrintMinidumpProcess(minidump_file, symbol_paths, machine_readable, + output_stack_contents) + ? 0 + : 1; +} diff --git a/tools/crashreporter/minidump_stackwalk/moz.build b/tools/crashreporter/minidump_stackwalk/moz.build new file mode 100644 index 0000000000..da769331ce --- /dev/null +++ b/tools/crashreporter/minidump_stackwalk/moz.build @@ -0,0 +1,37 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Program("minidump_stackwalk") + +SOURCES += [ + "/toolkit/crashreporter/google-breakpad/src/common/path_helper.cc", + "/toolkit/crashreporter/google-breakpad/src/processor/simple_symbol_supplier.cc", + "http_symbol_supplier.cc", + "minidump_stackwalk.cc", +] + +USE_LIBS += [ + "breakpad_processor", + "zlib", +] + +if CONFIG["OS_TARGET"] == "WINNT": + if CONFIG["CC_TYPE"] in ("clang", "gcc"): + DEFINES["__USE_MINGW_ANSI_STDIO"] = True + + LDFLAGS += [ + "-static-libgcc", + "-static-libstdc++", + ] + + OS_LIBS += ["wininet"] +elif CONFIG["OS_TARGET"] == "Linux": + USE_LIBS += ["curl"] + DIRS += ["curl-compat"] +else: + OS_LIBS += ["curl"] + +DisableStlWrapping() + +include("/toolkit/crashreporter/crashreporter.mozbuild") diff --git a/tools/crashreporter/moz.configure b/tools/crashreporter/moz.configure new file mode 100644 index 0000000000..f7380a502c --- /dev/null +++ b/tools/crashreporter/moz.configure @@ -0,0 +1,5 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +include("../../build/moz.configure/rust.configure") diff --git a/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py new file mode 100755 index 0000000000..00f5dd7499 --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/PackageSymbolDumper.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python + +# Copyright 2015 Michael R. Miller. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +PackageSymbolDumper.py + +Dumps Breakpad symbols for the contents of an Apple update installer. Given a +path to an Apple update installer as a .dmg or a path to a specific package +within the disk image, PackageSymbolDumper mounts, traverses, and dumps symbols +for all applicable frameworks and dylibs found within. + +Required tools for Linux: + pax + gzip + tar + xar (http://code.google.com/p/xar/) + xpwn's dmg (https://github.com/planetbeing/xpwn) + +Created on Apr 11, 2012 + +@author: mrmiller +""" +from __future__ import absolute_import + +import argparse +import concurrent.futures +import errno +import glob +import logging +import os +import shutil +import subprocess +import tempfile + +from scrapesymbols.gathersymbols import process_paths + + +def expand_pkg(pkg_path, out_path): + """ + Expands the contents of an installer package to some directory. + + @param pkg_path: a path to an installer package (.pkg) + @param out_path: a path to hold the package contents + """ + subprocess.check_call( + 'cd "{dest}" && xar -x -f "{src}"'.format(src=pkg_path, dest=out_path), + shell=True, + ) + + +def filter_files(function, path): + """ + Yield file paths matching a filter function by walking the + hierarchy rooted at path. + + @param function: a function taking in a filename that returns true to + include the path + @param path: the root path of the hierarchy to traverse + """ + for root, _dirs, files in os.walk(path): + for filename in files: + if function(filename): + yield os.path.join(root, filename) + + +def find_packages(path): + """ + Returns a list of installer packages (as determined by the .pkg extension) + found within path. + + @param path: root path to search for .pkg files + """ + return filter_files(lambda filename: os.path.splitext(filename)[1] == ".pkg", path) + + +def find_all_packages(paths): + """ + Yield installer package files found in all of `paths`. + + @param path: list of root paths to search for .pkg files + """ + for path in paths: + logging.info("find_all_packages: {}".format(path)) + for pkg in find_packages(path): + yield pkg + + +def find_payloads(path): + """ + Returns a list of possible installer package payload paths. + + @param path: root path for an installer package + """ + return filter_files( + lambda filename: "Payload" in filename or ".pax.gz" in filename, path + ) + + +def extract_payload(payload_path, output_path): + """ + Extracts the contents of an installer package payload to a given directory. + + @param payload_path: path to an installer package's payload + @param output_path: output path for the payload's contents + @return True for success, False for failure. + """ + header = open(payload_path, "rb").read(2) + try: + if header == "BZ": + logging.info("Extracting bzip2 payload") + extract = "bzip2" + subprocess.check_call( + 'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format( + extract=extract, payload=payload_path, dest=output_path + ), + shell=True, + ) + return True + elif header == "\x1f\x8b": + logging.info("Extracting gzip payload") + extract = "gzip" + subprocess.check_call( + 'cd {dest} && {extract} -dc {payload} | pax -r -k -s ":^/::"'.format( + extract=extract, payload=payload_path, dest=output_path + ), + shell=True, + ) + return True + elif header == "pb": + logging.info("Extracting pbzx payload") + extract = "parse_pbzx.py" + + payload_dir = os.path.dirname(payload_path) + # First, unpack the PBZX into cpio parts. + subprocess.check_call(["parse_pbzx.py", payload_path], cwd=payload_dir) + # Next, decompress any parts that are .xz, and feed them all into pax. + pax_proc = subprocess.Popen( + ["pax", "-r", "-k", "-s", ":^/::"], + stdin=subprocess.PIPE, + cwd=output_path, + ) + for part in sorted(glob.glob(os.path.join(payload_dir, "Payload.part*"))): + if part.endswith(".xz"): + logging.info("Extracting xz part {}".format(part)) + # This would be easier if we pulled in the lzma module... + xz_proc = subprocess.Popen( + ["xz", "-dc", part], stdout=subprocess.PIPE, cwd=payload_dir + ) + shutil.copyfileobj(xz_proc.stdout, pax_proc.stdin) + xz_proc.wait() + else: + logging.info("Copying plain cpio part {}".format(part)) + with open(part, "rb") as f: + shutil.copyfileobj(f, pax_proc.stdin) + pax_proc.stdin.close() + pax_proc.wait() + return True + else: + # Unsupported format + logging.error( + "Unknown payload format: 0x{0:x}{1:x}".format( + ord(header[0]), ord(header[1]) + ) + ) + return False + + except subprocess.CalledProcessError: + return False + + +def shutil_error_handler(caller, path, excinfo): + logging.error('Could not remove "{path}": {info}'.format(path=path, info=excinfo)) + + +def write_symbol_file(dest, filename, contents): + full_path = os.path.join(dest, filename) + try: + os.makedirs(os.path.dirname(full_path)) + open(full_path, "wb").write(contents) + except os.error as e: + if e.errno != errno.EEXIST: + raise + + +def dump_symbols_from_payload(executor, dump_syms, payload_path, dest): + """ + Dumps all the symbols found inside the payload of an installer package. + + @param dump_syms: path to the dump_syms executable + @param payload_path: path to an installer package's payload + @param dest: output path for symbols + """ + temp_dir = None + logging.info("Dumping symbols from payload: " + payload_path) + try: + temp_dir = tempfile.mkdtemp() + logging.info("Extracting payload to {path}.".format(path=temp_dir)) + if not extract_payload(payload_path, temp_dir): + logging.error("Could not extract payload: " + payload_path) + return + + # dump the symbols for the payload contents + system_library = os.path.join("System", "Library") + subdirectories = [ + os.path.join(system_library, "Frameworks"), + os.path.join(system_library, "PrivateFrameworks"), + os.path.join("usr", "lib"), + ] + paths_to_dump = map(lambda d: os.path.join(temp_dir, d), subdirectories) + + for filename, contents in process_paths( + paths_to_dump, executor, dump_syms, False, platform="darwin" + ): + if filename and contents: + logging.info("Added symbol file " + filename) + write_symbol_file(dest, filename, contents) + + finally: + if temp_dir is not None: + shutil.rmtree(temp_dir, onerror=shutil_error_handler) + + +def dump_symbols_from_package(executor, dump_syms, pkg, dest): + """ + Dumps all the symbols found inside an installer package. + + @param dump_syms: path to the dump_syms executable + @param pkg: path to an installer package + @param dest: output path for symbols + """ + temp_dir = None + logging.info("Dumping symbols from package: " + pkg) + try: + temp_dir = tempfile.mkdtemp() + expand_pkg(pkg, temp_dir) + + # check for any subpackages + for subpackage in find_packages(temp_dir): + logging.warning("UNTESTED: Found subpackage at: " + subpackage) + dump_symbols_from_package(executor, dump_syms, subpackage, dest) + + # dump symbols from any payloads (only expecting one) in the package + for payload in find_payloads(temp_dir): + dump_symbols_from_payload(executor, dump_syms, payload, dest) + + except Exception as e: + logging.error("Exception while dumping symbols from package: {}".format(e)) + + finally: + if temp_dir is not None: + shutil.rmtree(temp_dir, onerror=shutil_error_handler) + + +def read_processed_packages(tracking_file): + if tracking_file is None or not os.path.exists(tracking_file): + return set() + logging.info("Reading processed packages from {}".format(tracking_file)) + return set(open(tracking_file, "r").read().splitlines()) + + +def write_processed_packages(tracking_file, processed_packages): + if tracking_file is None: + return + logging.info( + "Writing {} processed packages to {}".format( + len(processed_packages), tracking_file + ) + ) + open(tracking_file, "w").write("\n".join(processed_packages)) + + +def process_packages(package_finder, to, tracking_file, dump_syms): + processed_packages = read_processed_packages(tracking_file) + with concurrent.futures.ProcessPoolExecutor() as executor: + for pkg in package_finder(): + if pkg in processed_packages: + logging.info("Skipping already-processed package: {}".format(pkg)) + else: + dump_symbols_from_package(executor, dump_syms, pkg, to) + processed_packages.add(pkg) + write_processed_packages(tracking_file, processed_packages) + + +def main(): + parser = argparse.ArgumentParser( + description="Extracts Breakpad symbols from a Mac OS X support update." + ) + parser.add_argument( + "--dump_syms", + default="dump_syms", + type=str, + help="path to the Breakpad dump_syms executable", + ) + parser.add_argument( + "--tracking-file", + type=str, + help="Path to a file in which to store information " + + "about already-processed packages", + ) + parser.add_argument( + "search", nargs="+", help="Paths to search recursively for packages" + ) + parser.add_argument("to", type=str, help="destination path for the symbols") + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + for p in ("requests.packages.urllib3.connectionpool", "urllib3"): + urllib3_logger = logging.getLogger(p) + urllib3_logger.setLevel(logging.ERROR) + + if not args.search or not all(os.path.exists(p) for p in args.search): + logging.error("Invalid search path") + return + if not os.path.exists(args.to): + logging.error("Invalid path to destination") + return + + def finder(): + return find_all_packages(args.search) + + process_packages(finder, args.to, args.tracking_file, args.dump_syms) + + +if __name__ == "__main__": + main() diff --git a/tools/crashreporter/system-symbols/mac/get_update_packages.py b/tools/crashreporter/system-symbols/mac/get_update_packages.py new file mode 100644 index 0000000000..7ddbad30b0 --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/get_update_packages.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python + +# Copyright (c) 2015 Ted Mielczarek <ted@mielczarek.org> +# and Michael R. Miller <michaelrmmiller@gmail.com> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import + +import argparse +import concurrent.futures +import logging +import os +import re +import requests +import shutil +import subprocess +import tempfile +import urlparse + +from PackageSymbolDumper import process_packages, find_packages + +OSX_RE = re.compile(r"10\.[0-9]+\.[0-9]+") + + +def extract_dmg(dmg_path, dest): + logging.info("extract_dmg({}, {})".format(dmg_path, dest)) + with tempfile.NamedTemporaryFile() as f: + subprocess.check_call( + ["dmg", "extract", dmg_path, f.name], stdout=open(os.devnull, "wb") + ) + subprocess.check_call(["hfsplus", f.name, "extractall"], cwd=dest) + + +def get_update_packages(): + for i in range(16): + logging.info("get_update_packages: page " + str(i)) + url = ( + "https://km.support.apple.com/kb/index?page=downloads_browse&sort=recency" + "&facet=all&category=PF6&locale=en_US&offset=%d" % i + ) + res = requests.get(url) + if res.status_code != 200: + break + data = res.json() + downloads = data.get("downloads", []) + if not downloads: + break + for d in downloads: + title = d.get("title", "") + if OSX_RE.search(title) and "Combo" not in title: + logging.info("Title: " + title) + if "fileurl" in d: + yield d["fileurl"] + else: + logging.warn("No fileurl in download!") + + +def fetch_url_to_file(url, download_dir): + filename = os.path.basename(urlparse.urlsplit(url).path) + local_filename = os.path.join(download_dir, filename) + if os.path.isfile(local_filename): + logging.info("{} already exists, skipping".format(local_filename)) + return None + r = requests.get(url, stream=True) + res_len = int(r.headers.get("content-length", "0")) + logging.info("Downloading {} -> {} ({} bytes)".format(url, local_filename, res_len)) + with open(local_filename, "wb") as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + return local_filename + + +def fetch_and_extract_dmg(url, tmpdir): + logging.info("fetch_and_extract_dmg: " + url) + filename = fetch_url_to_file(url, tmpdir) + if not filename: + return [] + # Extract dmg contents to a subdir + subdir = tempfile.mkdtemp(dir=tmpdir) + extract_dmg(filename, subdir) + packages = list(find_packages(subdir)) + logging.info( + "fetch_and_extract_dmg({}): found packages: {}".format(url, str(packages)) + ) + return packages + + +def find_update_packages(tmpdir): + logging.info("find_update_packages") + # Only download 2 packages at a time. + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + jobs = dict( + (executor.submit(fetch_and_extract_dmg, url, tmpdir), url) + for url in get_update_packages() + ) + for future in concurrent.futures.as_completed(jobs): + url = jobs[future] + if future.exception() is not None: + logging.error( + "exception downloading {}: {}".format(url, future.exception()) + ) + else: + for pkg in future.result(): + yield pkg + + +def main(): + parser = argparse.ArgumentParser( + description="Download OS X update packages and dump symbols from them" + ) + parser.add_argument( + "--dump_syms", + default="dump_syms", + type=str, + help="path to the Breakpad dump_syms executable", + ) + parser.add_argument("to", type=str, help="destination path for the symbols") + args = parser.parse_args() + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + for p in ("requests.packages.urllib3.connectionpool", "urllib3"): + urllib3_logger = logging.getLogger(p) + urllib3_logger.setLevel(logging.ERROR) + try: + tmpdir = tempfile.mkdtemp(suffix=".osxupdates") + + def finder(): + return find_update_packages(tmpdir) + + process_packages(finder, args.to, None, args.dump_syms) + finally: + shutil.rmtree(tmpdir) + + +if __name__ == "__main__": + main() diff --git a/tools/crashreporter/system-symbols/mac/list-packages.py b/tools/crashreporter/system-symbols/mac/list-packages.py new file mode 100755 index 0000000000..21e87877ef --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/list-packages.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +# Copyright 2015 Ted Mielczarek. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import print_function, absolute_import + +import os +import sys +from reposadolib import reposadocommon + +reposadocommon.get_main_dir = lambda: "/usr/local/bin/" + +products = reposadocommon.get_product_info() +args = [] +for product_id, p in products.items(): + try: + t = p["title"] + except KeyError: + print("Missing title in {}, skipping".format(p), file=sys.stderr) + continue + # p['CatalogEntry']['Packages'] + if t.startswith("OS X") or t.startswith("Mac OS X") or t.startswith("macOS"): + args.append("--product-id=" + product_id) + else: + print("Skipping %r for repo_sync" % t, file=sys.stderr) +if "JUST_ONE_PACKAGE" in os.environ: + args = args[:1] + +print(" ".join(args)) diff --git a/tools/crashreporter/system-symbols/mac/parse_pbzx.py b/tools/crashreporter/system-symbols/mac/parse_pbzx.py new file mode 100755 index 0000000000..242a58608e --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/parse_pbzx.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# This code is from https://gist.github.com/pudquick/ff412bcb29c9c1fa4b8d +# +# v2 pbzx stream handler +# My personal writeup on the differences here: +# https://gist.github.com/pudquick/29fcfe09c326a9b96cf5 +# +# Pure python reimplementation of .cpio.xz content extraction from pbzx file +# payload originally here: +# http://www.tonymacx86.com/general-help/135458-pbzx-stream-parser.html +# +# Cleaned up C version (as the basis for my code) here, thanks to Pepijn Bruienne / @bruienne +# https://gist.github.com/bruienne/029494bbcfb358098b41 +# +# The python version of this code does not have an explicit license, but +# is based on GPLv3 C code linked above. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import absolute_import + +import struct +import sys + + +def seekread(f, offset=None, length=0, relative=True): + if offset is not None: + # offset provided, let's seek + f.seek(offset, [0, 1, 2][relative]) + if length != 0: + return f.read(length) + + +def parse_pbzx(pbzx_path): + section = 0 + xar_out_path = "%s.part%02d.cpio.xz" % (pbzx_path, section) + f = open(pbzx_path, "rb") + # pbzx = f.read() + # f.close() + magic = seekread(f, length=4) + if magic != "pbzx": + raise "Error: Not a pbzx file" + # Read 8 bytes for initial flags + flags = seekread(f, length=8) + # Interpret the flags as a 64-bit big-endian unsigned int + flags = struct.unpack(">Q", flags)[0] + xar_f = open(xar_out_path, "wb") + while flags & (1 << 24): + # Read in more flags + flags = seekread(f, length=8) + flags = struct.unpack(">Q", flags)[0] + # Read in length + f_length = seekread(f, length=8) + f_length = struct.unpack(">Q", f_length)[0] + xzmagic = seekread(f, length=6) + if xzmagic != "\xfd7zXZ\x00": + # This isn't xz content, this is actually _raw decompressed cpio_ + # chunk of 16MB in size... + # Let's back up ... + seekread(f, offset=-6, length=0) + # ... and split it out ... + f_content = seekread(f, length=f_length) + section += 1 + decomp_out = "%s.part%02d.cpio" % (pbzx_path, section) + g = open(decomp_out, "wb") + g.write(f_content) + g.close() + # Now to start the next section, which should hopefully be .xz + # (we'll just assume it is ...) + xar_f.close() + section += 1 + new_out = "%s.part%02d.cpio.xz" % (pbzx_path, section) + xar_f = open(new_out, "wb") + else: + f_length -= 6 + # This part needs buffering + f_content = seekread(f, length=f_length) + tail = seekread(f, offset=-2, length=2) + xar_f.write(xzmagic) + xar_f.write(f_content) + if tail != "YZ": + xar_f.close() + raise "Error: Footer is not xar file footer" + try: + f.close() + xar_f.close() + except BaseException: + pass + + +def main(): + parse_pbzx(sys.argv[1]) + + +if __name__ == "__main__": + main() diff --git a/tools/crashreporter/system-symbols/mac/run.sh b/tools/crashreporter/system-symbols/mac/run.sh new file mode 100755 index 0000000000..8f9ff76d03 --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/run.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +set -v -e -x + +base="$(realpath "$(dirname "$0")")" +export PATH="$PATH:/builds/worker/bin:$base" + +cd /builds/worker + +if test "$PROCESSED_PACKAGES_INDEX" && test "$PROCESSED_PACKAGES_PATH" && test "$TASKCLUSTER_ROOT_URL"; then + PROCESSED_PACKAGES="$TASKCLUSTER_ROOT_URL/api/index/v1/task/$PROCESSED_PACKAGES_INDEX/artifacts/$PROCESSED_PACKAGES_PATH" +fi + +if test "$PROCESSED_PACKAGES"; then + rm -f processed-packages + if test `curl --output /dev/null --silent --head --location "$PROCESSED_PACKAGES" -w "%{http_code}"` = 200; then + curl -L "$PROCESSED_PACKAGES" | gzip -dc > processed-packages + elif test -f "$PROCESSED_PACKAGES"; then + gzip -dc "$PROCESSED_PACKAGES" > processed-packages + fi + if test -f processed-packages; then + # Prevent reposado from downloading packages that have previously been + # dumped. + for f in $(cat processed-packages); do + mkdir -p "$(dirname "$f")" + touch "$f" + done + fi +fi + +mkdir -p /opt/data-reposado/html /opt/data-reposado/metadata + +# First, just fetch all the update info. +python3 /usr/local/bin/repo_sync --no-download + +# Next, fetch just the update packages we're interested in. +packages=$(python3 "${base}/list-packages.py") +# shellcheck disable=SC2086 +python3 /usr/local/bin/repo_sync $packages + +du -sh /opt/data-reposado + +# Now scrape symbols out of anything that was downloaded. +mkdir -p symbols artifacts +python3 "${base}/PackageSymbolDumper.py" --tracking-file=/builds/worker/processed-packages --dump_syms=/builds/worker/bin/dump_syms_mac /opt/data-reposado/html/content/downloads /builds/worker/symbols + +# Hand out artifacts +gzip -c processed-packages > artifacts/processed-packages.gz + +cd symbols +zip -r9 /builds/worker/artifacts/target.crashreporter-symbols.zip ./* || echo "No symbols dumped" diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/__init__.py diff --git a/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py new file mode 100644 index 0000000000..52fecaeb9f --- /dev/null +++ b/tools/crashreporter/system-symbols/mac/scrapesymbols/gathersymbols.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# Any copyright is dedicated to the Public Domain. +# http://creativecommons.org/publicdomain/zero/1.0/ + +from __future__ import print_function, absolute_import + +import argparse +import concurrent.futures +import datetime +import os +import requests +import subprocess +import sys +import urllib +import zipfile + + +if sys.platform == "darwin": + SYSTEM_DIRS = [ + "/usr/lib", + "/System/Library/Frameworks", + "/System/Library/PrivateFrameworks", + "/System/Library/Extensions", + ] +else: + SYSTEM_DIRS = ["/lib", "/usr/lib"] +SYMBOL_SERVER_URL = ( + "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/" +) + + +def should_process(f, platform=sys.platform): + """Determine if a file is a platform binary""" + if platform == "darwin": + """ + The 'file' command can error out. One example is "illegal byte + sequence" on a Japanese language UTF8 text file. So we must wrap the + command in a try/except block to prevent the script from terminating + prematurely when this happens. + """ + try: + filetype = subprocess.check_output(["file", "-Lb", f]) + except subprocess.CalledProcessError: + return False + """Skip kernel extensions""" + if "kext bundle" in filetype: + return False + return filetype.startswith("Mach-O") + else: + return subprocess.check_output(["file", "-Lb", f]).startswith("ELF") + return False + + +def get_archs(filename, platform=sys.platform): + """ + Find the list of architectures present in a Mach-O file, or a single-element + list on non-OS X. + """ + if platform == "darwin": + return ( + subprocess.check_output(["lipo", "-info", filename]) + .split(":")[2] + .strip() + .split() + ) + return [None] + + +def server_has_file(filename): + """ + Send the symbol server a HEAD request to see if it has this symbol file. + """ + try: + r = requests.head( + urllib.parse.urljoin(SYMBOL_SERVER_URL, urllib.parse.quote(filename)) + ) + return r.status_code == 200 + except requests.exceptions.RequestException: + return False + + +def process_file(dump_syms, path, arch, verbose, write_all): + if sys.platform == "darwin": + arch_arg = ["-a", arch] + else: + arch_arg = [] + try: + stderr = None if verbose else open(os.devnull, "wb") + stdout = subprocess.check_output([dump_syms] + arch_arg + [path], stderr=stderr) + except subprocess.CalledProcessError: + if verbose: + print("Processing %s%s...failed." % (path, " [%s]" % arch if arch else "")) + return None, None + module = stdout.splitlines()[0] + bits = module.split(" ", 4) + if len(bits) != 5: + return None, None + _, platform, cpu_arch, debug_id, debug_file = bits + if verbose: + sys.stdout.write("Processing %s [%s]..." % (path, arch)) + filename = os.path.join(debug_file, debug_id, debug_file + ".sym") + # see if the server already has this symbol file + if not write_all: + if server_has_file(filename): + if verbose: + print("already on server.") + return None, None + # Collect for uploading + if verbose: + print("done.") + return filename, stdout + + +def get_files(paths, platform=sys.platform): + """ + For each entry passed in paths if the path is a file that can + be processed, yield it, otherwise if it is a directory yield files + under it that can be processed. + """ + for path in paths: + if os.path.isdir(path): + for root, subdirs, files in os.walk(path): + for f in files: + fullpath = os.path.join(root, f) + if should_process(fullpath, platform=platform): + yield fullpath + elif should_process(path, platform=platform): + yield path + + +def process_paths( + paths, executor, dump_syms, verbose, write_all=False, platform=sys.platform +): + jobs = set() + for fullpath in get_files(paths, platform=platform): + while os.path.islink(fullpath): + fullpath = os.path.join(os.path.dirname(fullpath), os.readlink(fullpath)) + if platform == "linux": + # See if there's a -dbg package installed and dump that instead. + dbgpath = "/usr/lib/debug" + fullpath + if os.path.isfile(dbgpath): + fullpath = dbgpath + for arch in get_archs(fullpath, platform=platform): + jobs.add( + executor.submit( + process_file, dump_syms, fullpath, arch, verbose, write_all + ) + ) + for job in concurrent.futures.as_completed(jobs): + try: + yield job.result() + except Exception as e: + print("Error: %s" % str(e), file=sys.stderr) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-v", "--verbose", action="store_true", help="Produce verbose output" + ) + parser.add_argument( + "--all", + action="store_true", + help="Gather all system symbols, not just missing ones.", + ) + parser.add_argument("dump_syms", help="Path to dump_syms binary") + parser.add_argument( + "files", nargs="*", help="Specific files from which to gather symbols." + ) + args = parser.parse_args() + args.dump_syms = os.path.abspath(args.dump_syms) + # check for the dump_syms binary + if ( + not os.path.isabs(args.dump_syms) + or not os.path.exists(args.dump_syms) + or not os.access(args.dump_syms, os.X_OK) + ): + print( + "Error: can't find dump_syms binary at %s!" % args.dump_syms, + file=sys.stderr, + ) + return 1 + file_list = set() + executor = concurrent.futures.ProcessPoolExecutor() + zip_path = os.path.abspath("symbols.zip") + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + for filename, contents in process_paths( + args.files if args.files else SYSTEM_DIRS, + executor, + args.dump_syms, + args.verbose, + args.all, + ): + if filename and contents and filename not in file_list: + file_list.add(filename) + zf.writestr(filename, contents) + zf.writestr( + "ossyms-1.0-{platform}-{date}-symbols.txt".format( + platform=sys.platform.title(), + date=datetime.datetime.now().strftime("%Y%m%d%H%M%S"), + ), + "\n".join(file_list), + ) + if file_list: + if args.verbose: + print("Generated %s with %d symbols" % (zip_path, len(file_list))) + else: + os.unlink("symbols.zip") + + +if __name__ == "__main__": + main() diff --git a/tools/crashreporter/system-symbols/win/LICENSE b/tools/crashreporter/system-symbols/win/LICENSE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt new file mode 100644 index 0000000000..d63dc716e9 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/known-microsoft-symbols.txt @@ -0,0 +1,17 @@ +d2d1.pdb
+d3d10level9.pdb
+d3d10warp.pdb
+d3d11.pdb
+d3d9.pdb
+d3dcompiler_47.pdb
+d3dim700.pdb
+kernel32.pdb
+kernelbase.pdb
+ntdll.pdb
+user32.pdb
+wkernel32.pdb
+wkernelbase.pdb
+wntdll.pdb
+ws2_32.pdb
+wuser32.pdb
+zipwriter.pdb
diff --git a/tools/crashreporter/system-symbols/win/run.sh b/tools/crashreporter/system-symbols/win/run.sh new file mode 100755 index 0000000000..4313731609 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/run.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -v -e -x + +base="$(realpath "$(dirname "$0")")" + +export DUMP_SYMS_PATH="${MOZ_FETCHES_DIR}/dump_syms/dump_syms" + +mkdir -p artifacts && \ +ulimit -n 16384 && \ +PYTHONPATH=$PWD python3 "${base}/symsrv-fetch.py" artifacts/target.crashreporter-symbols.zip diff --git a/tools/crashreporter/system-symbols/win/scrape-report.py b/tools/crashreporter/system-symbols/win/scrape-report.py new file mode 100644 index 0000000000..0e1eb18760 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/scrape-report.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# +# Copyright 2016 Mozilla +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import csv +import json +import logging +import os +import requests +import sys +import urlparse + + +log = logging.getLogger() + + +def fetch_missing_symbols_from_crash(file_or_crash): + if os.path.isfile(file_or_crash): + log.info("Fetching missing symbols from JSON file: %s" % file_or_crash) + j = {"json_dump": json.load(open(file_or_crash, "rb"))} + else: + if "report/index/" in file_or_crash: + crash_id = urlparse.urlparse(file_or_crash).path.split("/")[-1] + else: + crash_id = file_or_crash + url = ( + "https://crash-stats.mozilla.org/api/ProcessedCrash/" + "?crash_id={crash_id}&datatype=processed".format(crash_id=crash_id) + ) + log.info("Fetching missing symbols from crash: %s" % url) + r = requests.get(url) + if r.status_code != 200: + log.error("Failed to fetch crash %s" % url) + return set() + j = r.json() + return set( + [ + (m["debug_file"], m["debug_id"], m["filename"], m["code_id"]) + for m in j["json_dump"]["modules"] + if "missing_symbols" in m + ] + ) + + +def main(): + logging.basicConfig() + log.setLevel(logging.DEBUG) + urllib3_logger = logging.getLogger("urllib3") + urllib3_logger.setLevel(logging.ERROR) + + if len(sys.argv) < 2: + log.error("Specify a crash URL or ID") + sys.exit(1) + symbols = fetch_missing_symbols_from_crash(sys.argv[1]) + log.info("Found %d missing symbols" % len(symbols)) + c = csv.writer(sys.stdout) + c.writerow(["debug_file", "debug_id", "code_file", "code_id"]) + for row in symbols: + c.writerow(row) + + +if __name__ == "__main__": + main() diff --git a/tools/crashreporter/system-symbols/win/skiplist.txt b/tools/crashreporter/system-symbols/win/skiplist.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/skiplist.txt diff --git a/tools/crashreporter/system-symbols/win/symsrv-fetch.py b/tools/crashreporter/system-symbols/win/symsrv-fetch.py new file mode 100644 index 0000000000..734290e1a5 --- /dev/null +++ b/tools/crashreporter/system-symbols/win/symsrv-fetch.py @@ -0,0 +1,521 @@ +#!/usr/bin/env python +# +# Copyright 2016 Mozilla +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This script will read a CSV of modules from Socorro, and try to retrieve +# missing symbols from Microsoft's symbol server. It honors a blacklist +# (blacklist.txt) of symbols that are known to be from our applications, +# and it maintains its own list of symbols that the MS symbol server +# doesn't have (skiplist.txt). +# +# The script also depends on having write access to the directory it is +# installed in, to write the skiplist text file. + +from aiofile import AIOFile, LineReader +from aiohttp import ClientSession, ClientTimeout +from aiohttp.connector import TCPConnector +import argparse +import asyncio +import os +import shutil +import logging +from collections import defaultdict +from tempfile import mkdtemp +from urllib.parse import urljoin +from urllib.parse import quote +import zipfile + + +# Just hardcoded here +MICROSOFT_SYMBOL_SERVER = "https://msdl.microsoft.com/download/symbols/" +USER_AGENT = "Microsoft-Symbol-Server/6.3.0.0" +MOZILLA_SYMBOL_SERVER = ( + "https://s3-us-west-2.amazonaws.com/org.mozilla.crash-stats.symbols-public/v1/" +) +MISSING_SYMBOLS_URL = "https://symbols.mozilla.org/missingsymbols.csv?microsoft=only" +HEADERS = {"User-Agent": USER_AGENT} +SYM_SRV = "SRV*{0}*https://msdl.microsoft.com/download/symbols;SRV*{0}*https://software.intel.com/sites/downloads/symbols;SRV*{0}*https://download.amd.com/dir/bin;SRV*{0}*https://driver-symbols.nvidia.com" +TIMEOUT = 7200 +RETRIES = 5 + + +log = logging.getLogger() + + +def get_type(data): + # PDB v7 + if data.startswith(b"Microsoft C/C++ MSF 7.00"): + return "pdb-v7" + # PDB v2 + if data.startswith(b"Microsoft C/C++ program database 2.00"): + return "pdb-v2" + # DLL + if data.startswith(b"MZ"): + return "dll" + # CAB + if data.startswith(b"MSCF"): + return "cab" + + return "unknown" + + +async def exp_backoff(retry_num): + await asyncio.sleep(2 ** retry_num) + + +async def server_has_file(client, server, filename): + """ + Send the symbol server a HEAD request to see if it has this symbol file. + """ + url = urljoin(server, quote(filename)) + for i in range(RETRIES): + try: + async with client.head(url, headers=HEADERS, allow_redirects=True) as resp: + if resp.status == 200 and ( + ( + "microsoft" in server + and resp.headers["Content-Type"] == "application/octet-stream" + ) + or "mozilla" in server + ): + log.debug(f"File exists: {url}") + return True + else: + return False + except Exception as e: + # Sometimes we've SSL errors or disconnections... so in such a situation just retry + log.warning(f"Error with {url}: retry") + log.exception(e) + await exp_backoff(i) + + log.debug(f"Too many retries (HEAD) for {url}: give up.") + return False + + +async def fetch_file(client, server, filename): + """ + Fetch the file from the server + """ + url = urljoin(server, quote(filename)) + log.debug(f"Fetch url: {url}") + for i in range(RETRIES): + try: + async with client.get(url, headers=HEADERS, allow_redirects=True) as resp: + if resp.status == 200: + data = await resp.read() + typ = get_type(data) + if typ == "unknown": + # try again + await exp_backoff(i) + elif typ == "pdb-v2": + # too old: skip it + log.debug(f"PDB v2 (skipped because too old): {url}") + return None + else: + return data + else: + log.error(f"Cannot get data (status {resp.status}) for {url}: ") + except Exception as e: + log.warning(f"Error with {url}") + log.exception(e) + await asyncio.sleep(0.5) + + log.debug(f"Too many retries (GET) for {url}: give up.") + return None + + +def write_skiplist(skiplist): + with open("skiplist.txt", "w") as sf: + sf.writelines( + f"{debug_id} {debug_file}\n" for debug_id, debug_file in skiplist.items() + ) + + +async def fetch_missing_symbols(u): + log.info("Trying missing symbols from %s" % u) + async with ClientSession() as client: + async with client.get(u, headers=HEADERS) as resp: + # just skip the first line since it contains column headers + data = await resp.text() + return data.splitlines()[1:] + + +async def get_list(filename): + alist = set() + try: + async with AIOFile(filename, "r") as In: + async for line in LineReader(In): + line = line.rstrip() + alist.add(line) + except FileNotFoundError: + pass + + log.debug(f"{filename} contains {len(alist)} items") + + return alist + + +async def get_skiplist(): + skiplist = {} + path = "skiplist.txt" + try: + async with AIOFile(path, "r") as In: + async for line in LineReader(In): + line = line.strip() + if line == "": + continue + s = line.split(" ", maxsplit=1) + if len(s) != 2: + continue + debug_id, debug_file = s + skiplist[debug_id] = debug_file.lower() + except FileNotFoundError: + pass + + log.debug(f"{path} contains {len(skiplist)} items") + + return skiplist + + +def get_missing_symbols(missing_symbols, skiplist, blacklist): + modules = defaultdict(set) + stats = {"blacklist": 0, "skiplist": 0} + for line in missing_symbols: + line = line.rstrip() + bits = line.split(",") + if len(bits) < 2: + continue + pdb, debug_id = bits[:2] + code_file, code_id = None, None + if len(bits) >= 4: + code_file, code_id = bits[2:4] + if pdb and debug_id and pdb.endswith(".pdb"): + if pdb.lower() in blacklist: + stats["blacklist"] += 1 + continue + + if skiplist.get(debug_id) != pdb.lower(): + modules[pdb].add((debug_id, code_file, code_id)) + else: + stats["skiplist"] += 1 + # We've asked the symbol server previously about this, + # so skip it. + log.debug("%s/%s already in skiplist", pdb, debug_id) + + return modules, stats + + +async def collect_info(client, filename, debug_id, code_file, code_id): + pdb_path = os.path.join(filename, debug_id, filename) + sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", "") + ".sym") + + has_pdb = await server_has_file(client, MICROSOFT_SYMBOL_SERVER, pdb_path) + has_code = is_there = False + if has_pdb: + if not await server_has_file(client, MOZILLA_SYMBOL_SERVER, sym_path): + has_code = ( + code_file + and code_id + and await server_has_file( + client, + MICROSOFT_SYMBOL_SERVER, + f"{code_file}/{code_id}/{code_file}", + ) + ) + else: + # if the file is on moz sym server no need to do anything + is_there = True + has_pdb = False + + return (filename, debug_id, code_file, code_id, has_pdb, has_code, is_there) + + +async def check_x86_file(path): + async with AIOFile(path, "rb") as In: + head = b"MODULE windows x86 " + chunk = await In.read(len(head)) + if chunk == head: + return True + return False + + +async def run_command(cmd): + proc = await asyncio.create_subprocess_shell( + cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + _, err = await proc.communicate() + err = err.decode().strip() + + return err + + +async def dump_module( + output, symcache, filename, debug_id, code_file, code_id, has_code, dump_syms +): + sym_path = os.path.join(filename, debug_id, filename.replace(".pdb", ".sym")) + output_path = os.path.join(output, sym_path) + sym_srv = SYM_SRV.format(symcache) + + if has_code: + cmd = ( + f"{dump_syms} {code_file} --code-id {code_id} " + f"--store {output} --symbol-server '{sym_srv}' --verbose error" + ) + else: + cmd = ( + f"{dump_syms} {filename} --debug-id {debug_id} " + f"--store {output} --symbol-server '{sym_srv}' --verbose error" + ) + + err = await run_command(cmd) + + if err: + log.error(f"Error with {cmd}") + log.error(err) + return 1 + + if not os.path.exists(output_path): + log.error(f"Could not find file {output_path} after running {cmd}") + return 1 + + if not has_code and not await check_x86_file(output_path): + # PDB for 32 bits contains everything we need (symbols + stack unwind info) + # But PDB for 64 bits don't contain stack unwind info + # (they're in the binary (.dll/.exe) itself). + # So here we're logging because we've a PDB (64 bits) without its DLL/EXE + if code_file and code_id: + log.debug(f"x86_64 binary {code_file}/{code_id} required") + else: + log.debug(f"x86_64 binary for {filename}/{debug_id} required") + return 2 + + log.info(f"Successfully dumped: {filename}/{debug_id}") + return sym_path + + +async def dump(output, symcache, modules, dump_syms): + tasks = [] + for filename, debug_id, code_file, code_id, has_code in modules: + tasks.append( + dump_module( + output, + symcache, + filename, + debug_id, + code_file, + code_id, + has_code, + dump_syms, + ) + ) + + res = await asyncio.gather(*tasks) + file_index = {x for x in res if isinstance(x, str)} + stats = { + "dump_error": sum(1 for x in res if x == 1), + "no_bin": sum(1 for x in res if x == 2), + } + + return file_index, stats + + +async def collect(modules): + loop = asyncio.get_event_loop() + tasks = [] + + # In case of errors (Too many open files), just change limit_per_host + connector = TCPConnector(limit=100, limit_per_host=4) + + async with ClientSession( + loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector + ) as client: + for filename, ids in modules.items(): + for debug_id, code_file, code_id in ids: + tasks.append( + collect_info(client, filename, debug_id, code_file, code_id) + ) + + res = await asyncio.gather(*tasks) + to_dump = [] + stats = {"no_pdb": 0, "is_there": 0} + for filename, debug_id, code_file, code_id, has_pdb, has_code, is_there in res: + if not has_pdb: + if is_there: + stats["is_there"] += 1 + else: + stats["no_pdb"] += 1 + log.info(f"No pdb for {filename}/{debug_id}") + continue + + log.info( + f"To dump: {filename}/{debug_id}, {code_file}/{code_id} and has_code = {has_code}" + ) + to_dump.append((filename, debug_id, code_file, code_id, has_code)) + + log.info(f"Collected {len(to_dump)} files to dump") + + return to_dump, stats + + +async def make_dirs(path): + loop = asyncio.get_event_loop() + + def helper(path): + os.makedirs(path, exist_ok=True) + + await loop.run_in_executor(None, helper, path) + + +async def fetch_and_write(output, client, filename, file_id): + path = os.path.join(filename, file_id, filename) + data = await fetch_file(client, MICROSOFT_SYMBOL_SERVER, path) + + if not data: + return False + + output_dir = os.path.join(output, filename, file_id) + await make_dirs(output_dir) + + output_path = os.path.join(output_dir, filename) + async with AIOFile(output_path, "wb") as Out: + await Out.write(data) + + return True + + +async def fetch_all(output, modules): + loop = asyncio.get_event_loop() + tasks = [] + fetched_modules = [] + + # In case of errors (Too many open files), just change limit_per_host + connector = TCPConnector(limit=100, limit_per_host=0) + + async with ClientSession( + loop=loop, timeout=ClientTimeout(total=TIMEOUT), connector=connector + ) as client: + for filename, debug_id, code_file, code_id, has_code in modules: + tasks.append(fetch_and_write(output, client, filename, debug_id)) + if has_code: + tasks.append(fetch_and_write(output, client, code_file, code_id)) + + res = await asyncio.gather(*tasks) + res = iter(res) + for filename, debug_id, code_file, code_id, has_code in modules: + fetched_pdb = next(res) + if has_code: + has_code = next(res) + if fetched_pdb: + fetched_modules.append( + (filename, debug_id, code_file, code_id, has_code) + ) + + return fetched_modules + + +def get_base_data(url): + async def helper(url): + return await asyncio.gather( + fetch_missing_symbols(url), + # Symbols that we know belong to us, so don't ask Microsoft for them. + get_list("blacklist.txt"), + # Symbols that we know belong to Microsoft, so don't skiplist them. + get_list("known-microsoft-symbols.txt"), + # Symbols that we've asked for in the past unsuccessfully + get_skiplist(), + ) + + return asyncio.run(helper(url)) + + +def gen_zip(output, output_dir, file_index): + if not file_index: + return + + with zipfile.ZipFile(output, "w", zipfile.ZIP_DEFLATED) as z: + for f in file_index: + z.write(os.path.join(output_dir, f), f) + log.info(f"Wrote zip as {output}") + + +def main(): + parser = argparse.ArgumentParser( + description="Fetch missing symbols from Microsoft symbol server" + ) + parser.add_argument( + "--missing-symbols", + type=str, + help="missing symbols URL", + default=MISSING_SYMBOLS_URL, + ) + parser.add_argument("zip", type=str, help="output zip file") + parser.add_argument( + "--dump-syms", + type=str, + help="dump_syms path", + default=os.environ.get("DUMP_SYMS_PATH"), + ) + + args = parser.parse_args() + + assert args.dump_syms, "dump_syms path is empty" + + logging.basicConfig(level=logging.DEBUG) + aiohttp_logger = logging.getLogger("aiohttp.client") + aiohttp_logger.setLevel(logging.INFO) + log.info("Started") + + missing_symbols, blacklist, known_ms_symbols, skiplist = get_base_data( + args.missing_symbols + ) + + modules, stats_skipped = get_missing_symbols(missing_symbols, skiplist, blacklist) + + symbol_path = mkdtemp("symsrvfetch") + temp_path = mkdtemp(prefix="symcache") + + modules, stats_collect = asyncio.run(collect(modules)) + modules = asyncio.run(fetch_all(temp_path, modules)) + + file_index, stats_dump = asyncio.run( + dump(symbol_path, temp_path, modules, args.dump_syms) + ) + + gen_zip(args.zip, symbol_path, file_index) + + shutil.rmtree(symbol_path, True) + shutil.rmtree(temp_path, True) + + write_skiplist(skiplist) + + if not file_index: + log.info(f"No symbols downloaded: {len(missing_symbols)} considered") + else: + log.info( + f"Total files: {len(missing_symbols)}, Stored {len(file_index)} symbol files" + ) + + log.info( + f"{stats_collect['is_there']} already present, {stats_skipped['blacklist']} in blacklist, " + f"{stats_skipped['skiplist']} skipped, {stats_collect['no_pdb']} not found, " + f"{stats_dump['dump_error']} processed with errors, " + f"{stats_dump['no_bin']} processed but with no binaries (x86_64)" + ) + log.info("Finished, exiting") + + +if __name__ == "__main__": + main() |