diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /toolkit/crashreporter/google-breakpad/src/common/windows/pdb_source_line_writer.cc | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | toolkit/crashreporter/google-breakpad/src/common/windows/pdb_source_line_writer.cc | 1194 |
1 files changed, 1194 insertions, 0 deletions
diff --git a/toolkit/crashreporter/google-breakpad/src/common/windows/pdb_source_line_writer.cc b/toolkit/crashreporter/google-breakpad/src/common/windows/pdb_source_line_writer.cc new file mode 100644 index 0000000000..4030a2e953 --- /dev/null +++ b/toolkit/crashreporter/google-breakpad/src/common/windows/pdb_source_line_writer.cc @@ -0,0 +1,1194 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "common/windows/pdb_source_line_writer.h" + +#include <windows.h> +#include <winnt.h> +#include <atlbase.h> +#include <dia2.h> +#include <diacreate.h> +#include <ImageHlp.h> +#include <stdio.h> + +#include <algorithm> +#include <limits> +#include <map> +#include <set> +#include <utility> + +#include "common/windows/dia_util.h" +#include "common/windows/guid_string.h" +#include "common/windows/pe_util.h" +#include "common/windows/string_utils-inl.h" + +// This constant may be missing from DbgHelp.h. See the documentation for +// IDiaSymbol::get_undecoratedNameEx. +#ifndef UNDNAME_NO_ECSU +#define UNDNAME_NO_ECSU 0x8000 // Suppresses enum/class/struct/union. +#endif // UNDNAME_NO_ECSU + +namespace google_breakpad { + +namespace { + +using std::vector; + +// The symbol (among possibly many) selected to represent an rva. +struct SelectedSymbol { + SelectedSymbol(const CComPtr<IDiaSymbol>& symbol, bool is_public) + : symbol(symbol), is_public(is_public), is_multiple(false) {} + + // The symbol to use for an rva. + CComPtr<IDiaSymbol> symbol; + // Whether this is a public or function symbol. + bool is_public; + // Whether the rva has multiple associated symbols. An rva will correspond to + // multiple symbols in the case of linker identical symbol folding. + bool is_multiple; +}; + +// Maps rva to the symbol to use for that address. +typedef std::map<DWORD, SelectedSymbol> SymbolMap; + +// Record this in the map as the selected symbol for the rva if it satisfies the +// necessary conditions. +void MaybeRecordSymbol(DWORD rva, + const CComPtr<IDiaSymbol> symbol, + bool is_public, + SymbolMap* map) { + SymbolMap::iterator loc = map->find(rva); + if (loc == map->end()) { + map->insert(std::make_pair(rva, SelectedSymbol(symbol, is_public))); + return; + } + + // Prefer function symbols to public symbols. + if (is_public && !loc->second.is_public) { + return; + } + + loc->second.is_multiple = true; + + // Take the 'least' symbol by lexicographical order of the decorated name. We + // use the decorated rather than undecorated name because computing the latter + // is expensive. + BSTR current_name, new_name; + loc->second.symbol->get_name(¤t_name); + symbol->get_name(&new_name); + if (wcscmp(new_name, current_name) < 0) { + loc->second.symbol = symbol; + loc->second.is_public = is_public; + } +} + + + +bool SymbolsMatch(IDiaSymbol* a, IDiaSymbol* b) { + DWORD a_section, a_offset, b_section, b_offset; + if (FAILED(a->get_addressSection(&a_section)) || + FAILED(a->get_addressOffset(&a_offset)) || + FAILED(b->get_addressSection(&b_section)) || + FAILED(b->get_addressOffset(&b_offset))) + return false; + return a_section == b_section && a_offset == b_offset; +} + +bool CreateDiaDataSourceInstance(CComPtr<IDiaDataSource> &data_source) { + if (SUCCEEDED(data_source.CoCreateInstance(CLSID_DiaSource))) { + return true; + } + + class DECLSPEC_UUID("B86AE24D-BF2F-4ac9-B5A2-34B14E4CE11D") DiaSource100; + class DECLSPEC_UUID("761D3BCD-1304-41D5-94E8-EAC54E4AC172") DiaSource110; + class DECLSPEC_UUID("3BFCEA48-620F-4B6B-81F7-B9AF75454C7D") DiaSource120; + class DECLSPEC_UUID("E6756135-1E65-4D17-8576-610761398C3C") DiaSource140; + + // If the CoCreateInstance call above failed, msdia*.dll is not registered. + // We can try loading the DLL corresponding to the #included DIA SDK, but + // the DIA headers don't provide a version. Lets try to figure out which DIA + // version we're compiling against by comparing CLSIDs. + const wchar_t *msdia_dll = nullptr; + if (CLSID_DiaSource == _uuidof(DiaSource100)) { + msdia_dll = L"msdia100.dll"; + } else if (CLSID_DiaSource == _uuidof(DiaSource110)) { + msdia_dll = L"msdia110.dll"; + } else if (CLSID_DiaSource == _uuidof(DiaSource120)) { + msdia_dll = L"msdia120.dll"; + } else if (CLSID_DiaSource == _uuidof(DiaSource140)) { + msdia_dll = L"msdia140.dll"; + } + + if (msdia_dll && + SUCCEEDED(NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource, + reinterpret_cast<void **>(&data_source)))) { + return true; + } + + return false; +} + +} // namespace + +PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) { +} + +PDBSourceLineWriter::~PDBSourceLineWriter() { + Close(); +} + +bool PDBSourceLineWriter::SetCodeFile(const wstring &exe_file) { + if (code_file_.empty()) { + code_file_ = exe_file; + return true; + } + // Setting a different code file path is an error. It is success only if the + // file paths are the same. + return exe_file == code_file_; +} + +bool PDBSourceLineWriter::Open(const wstring &file, FileFormat format) { + Close(); + code_file_.clear(); + + if (FAILED(CoInitialize(NULL))) { + fprintf(stderr, "CoInitialize failed\n"); + return false; + } + + CComPtr<IDiaDataSource> data_source; + if (!CreateDiaDataSourceInstance(data_source)) { + const int kGuidSize = 64; + wchar_t classid[kGuidSize] = {0}; + StringFromGUID2(CLSID_DiaSource, classid, kGuidSize); + fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed " + "(msdia*.dll unregistered?)\n", classid); + return false; + } + + switch (format) { + case PDB_FILE: + if (FAILED(data_source->loadDataFromPdb(file.c_str()))) { + fprintf(stderr, "loadDataFromPdb failed for %ws\n", file.c_str()); + return false; + } + break; + case EXE_FILE: + if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) { + fprintf(stderr, "loadDataForExe failed for %ws\n", file.c_str()); + return false; + } + code_file_ = file; + break; + case ANY_FILE: + if (FAILED(data_source->loadDataFromPdb(file.c_str()))) { + if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) { + fprintf(stderr, "loadDataForPdb and loadDataFromExe failed for %ws\n", + file.c_str()); + return false; + } + code_file_ = file; + } + break; + default: + fprintf(stderr, "Unknown file format\n"); + return false; + } + + if (FAILED(data_source->openSession(&session_))) { + fprintf(stderr, "openSession failed\n"); + } + + return true; +} + +bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) { + // The line number format is: + // <rva> <line number> <source file id> + CComPtr<IDiaLineNumber> line; + ULONG count; + + while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) { + DWORD rva; + if (FAILED(line->get_relativeVirtualAddress(&rva))) { + fprintf(stderr, "failed to get line rva\n"); + return false; + } + + DWORD length; + if (FAILED(line->get_length(&length))) { + fprintf(stderr, "failed to get line code length\n"); + return false; + } + + DWORD dia_source_id; + if (FAILED(line->get_sourceFileId(&dia_source_id))) { + fprintf(stderr, "failed to get line source file id\n"); + return false; + } + // duplicate file names are coalesced to share one ID + DWORD source_id = GetRealFileID(dia_source_id); + + DWORD line_num; + if (FAILED(line->get_lineNumber(&line_num))) { + fprintf(stderr, "failed to get line number\n"); + return false; + } + + AddressRangeVector ranges; + MapAddressRange(image_map_, AddressRange(rva, length), &ranges); + for (size_t i = 0; i < ranges.size(); ++i) { + fprintf(output_, "%lx %lx %lu %lu\n", ranges[i].rva, ranges[i].length, + line_num, source_id); + } + line.Release(); + } + return true; +} + +bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function, + IDiaSymbol *block, + bool has_multiple_symbols) { + // The function format is: + // FUNC <address> <length> <param_stack_size> <function> + DWORD rva; + if (FAILED(block->get_relativeVirtualAddress(&rva))) { + fprintf(stderr, "couldn't get rva\n"); + return false; + } + + ULONGLONG length; + if (FAILED(block->get_length(&length))) { + fprintf(stderr, "failed to get function length\n"); + return false; + } + + if (length == 0) { + // Silently ignore zero-length functions, which can infrequently pop up. + return true; + } + + CComBSTR name; + int stack_param_size; + if (!GetSymbolFunctionName(function, &name, &stack_param_size)) { + return false; + } + + // If the decorated name didn't give the parameter size, try to + // calculate it. + if (stack_param_size < 0) { + stack_param_size = GetFunctionStackParamSize(function); + } + + AddressRangeVector ranges; + MapAddressRange(image_map_, AddressRange(rva, static_cast<DWORD>(length)), + &ranges); + for (size_t i = 0; i < ranges.size(); ++i) { + const char* optional_multiple_field = has_multiple_symbols ? "m " : ""; + fprintf(output_, "FUNC %s%lx %lx %x %ws\n", optional_multiple_field, + ranges[i].rva, ranges[i].length, stack_param_size, name.m_str); + } + + CComPtr<IDiaEnumLineNumbers> lines; + if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) { + return false; + } + + if (!PrintLines(lines)) { + return false; + } + return true; +} + +bool PDBSourceLineWriter::PrintSourceFiles() { + CComPtr<IDiaSymbol> global; + if (FAILED(session_->get_globalScope(&global))) { + fprintf(stderr, "get_globalScope failed\n"); + return false; + } + + CComPtr<IDiaEnumSymbols> compilands; + if (FAILED(global->findChildren(SymTagCompiland, NULL, + nsNone, &compilands))) { + fprintf(stderr, "findChildren failed\n"); + return false; + } + + CComPtr<IDiaSymbol> compiland; + ULONG count; + while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) { + CComPtr<IDiaEnumSourceFiles> source_files; + if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) { + return false; + } + CComPtr<IDiaSourceFile> file; + while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) { + DWORD file_id; + if (FAILED(file->get_uniqueId(&file_id))) { + return false; + } + + CComBSTR file_name; + if (FAILED(file->get_fileName(&file_name))) { + return false; + } + + wstring file_name_string(file_name); + if (!FileIDIsCached(file_name_string)) { + // this is a new file name, cache it and output a FILE line. + CacheFileID(file_name_string, file_id); + fwprintf(output_, L"FILE %d %ws\n", file_id, file_name_string.c_str()); + } else { + // this file name has already been seen, just save this + // ID for later lookup. + StoreDuplicateFileID(file_name_string, file_id); + } + file.Release(); + } + compiland.Release(); + } + return true; +} + +bool PDBSourceLineWriter::PrintFunctions() { + ULONG count = 0; + DWORD rva = 0; + CComPtr<IDiaSymbol> global; + HRESULT hr; + + if (FAILED(session_->get_globalScope(&global))) { + fprintf(stderr, "get_globalScope failed\n"); + return false; + } + + CComPtr<IDiaEnumSymbols> symbols = NULL; + + // Find all function symbols first. + SymbolMap rva_symbol; + hr = global->findChildren(SymTagFunction, NULL, nsNone, &symbols); + + if (SUCCEEDED(hr)) { + CComPtr<IDiaSymbol> symbol = NULL; + + while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) { + if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) { + // Potentially record this as the canonical symbol for this rva. + MaybeRecordSymbol(rva, symbol, false, &rva_symbol); + } else { + fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n"); + return false; + } + + symbol.Release(); + } + + symbols.Release(); + } + + // Find all public symbols and record public symbols that are not also private + // symbols. + hr = global->findChildren(SymTagPublicSymbol, NULL, nsNone, &symbols); + + if (SUCCEEDED(hr)) { + CComPtr<IDiaSymbol> symbol = NULL; + + while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) { + if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) { + // Potentially record this as the canonical symbol for this rva. + MaybeRecordSymbol(rva, symbol, true, &rva_symbol); + } else { + fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n"); + return false; + } + + symbol.Release(); + } + + symbols.Release(); + } + + // For each rva, dump the selected symbol at the address. + SymbolMap::iterator it; + for (it = rva_symbol.begin(); it != rva_symbol.end(); ++it) { + CComPtr<IDiaSymbol> symbol = it->second.symbol; + // Only print public symbols if there is no function symbol for the address. + if (!it->second.is_public) { + if (!PrintFunction(symbol, symbol, it->second.is_multiple)) + return false; + } else { + if (!PrintCodePublicSymbol(symbol, it->second.is_multiple)) + return false; + } + } + + // When building with PGO, the compiler can split functions into + // "hot" and "cold" blocks, and move the "cold" blocks out to separate + // pages, so the function can be noncontiguous. To find these blocks, + // we have to iterate over all the compilands, and then find blocks + // that are children of them. We can then find the lexical parents + // of those blocks and print out an extra FUNC line for blocks + // that are not contained in their parent functions. + CComPtr<IDiaEnumSymbols> compilands; + if (FAILED(global->findChildren(SymTagCompiland, NULL, + nsNone, &compilands))) { + fprintf(stderr, "findChildren failed on the global\n"); + return false; + } + + CComPtr<IDiaSymbol> compiland; + while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) { + CComPtr<IDiaEnumSymbols> blocks; + if (FAILED(compiland->findChildren(SymTagBlock, NULL, + nsNone, &blocks))) { + fprintf(stderr, "findChildren failed on a compiland\n"); + return false; + } + + CComPtr<IDiaSymbol> block; + while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) { + // find this block's lexical parent function + CComPtr<IDiaSymbol> parent; + DWORD tag; + if (SUCCEEDED(block->get_lexicalParent(&parent)) && + SUCCEEDED(parent->get_symTag(&tag)) && + tag == SymTagFunction) { + // now get the block's offset and the function's offset and size, + // and determine if the block is outside of the function + DWORD func_rva, block_rva; + ULONGLONG func_length; + if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) && + SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) && + SUCCEEDED(parent->get_length(&func_length))) { + if (block_rva < func_rva || block_rva > (func_rva + func_length)) { + if (!PrintFunction(parent, block, false)) { + return false; + } + } + } + } + parent.Release(); + block.Release(); + } + blocks.Release(); + compiland.Release(); + } + + global.Release(); + return true; +} + +#undef max + +bool PDBSourceLineWriter::PrintFrameDataUsingPDB() { + // It would be nice if it were possible to output frame data alongside the + // associated function, as is done with line numbers, but the DIA API + // doesn't make it possible to get the frame data in that way. + + CComPtr<IDiaEnumFrameData> frame_data_enum; + if (!FindTable(session_, &frame_data_enum)) + return false; + + DWORD last_type = std::numeric_limits<DWORD>::max(); + DWORD last_rva = std::numeric_limits<DWORD>::max(); + DWORD last_code_size = 0; + DWORD last_prolog_size = std::numeric_limits<DWORD>::max(); + + CComPtr<IDiaFrameData> frame_data; + ULONG count = 0; + while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) && + count == 1) { + DWORD type; + if (FAILED(frame_data->get_type(&type))) + return false; + + DWORD rva; + if (FAILED(frame_data->get_relativeVirtualAddress(&rva))) + return false; + + DWORD code_size; + if (FAILED(frame_data->get_lengthBlock(&code_size))) + return false; + + DWORD prolog_size; + if (FAILED(frame_data->get_lengthProlog(&prolog_size))) + return false; + + // parameter_size is the size of parameters passed on the stack. If any + // parameters are not passed on the stack (such as in registers), their + // sizes will not be included in parameter_size. + DWORD parameter_size; + if (FAILED(frame_data->get_lengthParams(¶meter_size))) + return false; + + DWORD saved_register_size; + if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size))) + return false; + + DWORD local_size; + if (FAILED(frame_data->get_lengthLocals(&local_size))) + return false; + + // get_maxStack can return S_FALSE, just use 0 in that case. + DWORD max_stack_size = 0; + if (FAILED(frame_data->get_maxStack(&max_stack_size))) + return false; + + // get_programString can return S_FALSE, indicating that there is no + // program string. In that case, check whether %ebp is used. + HRESULT program_string_result; + CComBSTR program_string; + if (FAILED(program_string_result = frame_data->get_program( + &program_string))) { + return false; + } + + // get_allocatesBasePointer can return S_FALSE, treat that as though + // %ebp is not used. + BOOL allocates_base_pointer = FALSE; + if (program_string_result != S_OK) { + if (FAILED(frame_data->get_allocatesBasePointer( + &allocates_base_pointer))) { + return false; + } + } + + // Only print out a line if type, rva, code_size, or prolog_size have + // changed from the last line. It is surprisingly common (especially in + // system library PDBs) for DIA to return a series of identical + // IDiaFrameData objects. For kernel32.pdb from Windows XP SP2 on x86, + // this check reduces the size of the dumped symbol file by a third. + if (type != last_type || rva != last_rva || code_size != last_code_size || + prolog_size != last_prolog_size) { + // The prolog and the code portions of the frame have to be treated + // independently as they may have independently changed in size, or may + // even have been split. + // NOTE: If epilog size is ever non-zero, we have to do something + // similar with it. + + // Figure out where the prolog bytes have landed. + AddressRangeVector prolog_ranges; + if (prolog_size > 0) { + MapAddressRange(image_map_, AddressRange(rva, prolog_size), + &prolog_ranges); + } + + // And figure out where the code bytes have landed. + AddressRangeVector code_ranges; + MapAddressRange(image_map_, + AddressRange(rva + prolog_size, + code_size - prolog_size), + &code_ranges); + + struct FrameInfo { + DWORD rva; + DWORD code_size; + DWORD prolog_size; + }; + std::vector<FrameInfo> frame_infos; + + // Special case: The prolog and the code bytes remain contiguous. This is + // only done for compactness of the symbol file, and we could actually + // be outputting independent frame info for the prolog and code portions. + if (prolog_ranges.size() == 1 && code_ranges.size() == 1 && + prolog_ranges[0].end() == code_ranges[0].rva) { + FrameInfo fi = { prolog_ranges[0].rva, + prolog_ranges[0].length + code_ranges[0].length, + prolog_ranges[0].length }; + frame_infos.push_back(fi); + } else { + // Otherwise we output the prolog and code frame info independently. + for (size_t i = 0; i < prolog_ranges.size(); ++i) { + FrameInfo fi = { prolog_ranges[i].rva, + prolog_ranges[i].length, + prolog_ranges[i].length }; + frame_infos.push_back(fi); + } + for (size_t i = 0; i < code_ranges.size(); ++i) { + FrameInfo fi = { code_ranges[i].rva, code_ranges[i].length, 0 }; + frame_infos.push_back(fi); + } + } + + for (size_t i = 0; i < frame_infos.size(); ++i) { + const FrameInfo& fi(frame_infos[i]); + fprintf(output_, "STACK WIN %lx %lx %lx %lx %x %lx %lx %lx %lx %d ", + type, fi.rva, fi.code_size, fi.prolog_size, + 0 /* epilog_size */, parameter_size, saved_register_size, + local_size, max_stack_size, program_string_result == S_OK); + if (program_string_result == S_OK) { + fprintf(output_, "%ws\n", program_string.m_str); + } else { + fprintf(output_, "%d\n", allocates_base_pointer); + } + } + + last_type = type; + last_rva = rva; + last_code_size = code_size; + last_prolog_size = prolog_size; + } + + frame_data.Release(); + } + + return true; +} + +bool PDBSourceLineWriter::PrintFrameDataUsingEXE() { + if (code_file_.empty() && !FindPEFile()) { + fprintf(stderr, "Couldn't locate EXE or DLL file.\n"); + return false; + } + + return PrintPEFrameData(code_file_, output_); +} + +bool PDBSourceLineWriter::PrintFrameData() { + PDBModuleInfo info; + if (GetModuleInfo(&info) && info.cpu == L"x86_64") { + return PrintFrameDataUsingEXE(); + } else { + return PrintFrameDataUsingPDB(); + } + return false; +} + +bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol, + bool has_multiple_symbols) { + BOOL is_code; + if (FAILED(symbol->get_code(&is_code))) { + return false; + } + if (!is_code) { + return true; + } + + DWORD rva; + if (FAILED(symbol->get_relativeVirtualAddress(&rva))) { + return false; + } + + CComBSTR name; + int stack_param_size; + if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) { + return false; + } + + AddressRangeVector ranges; + MapAddressRange(image_map_, AddressRange(rva, 1), &ranges); + for (size_t i = 0; i < ranges.size(); ++i) { + const char* optional_multiple_field = has_multiple_symbols ? "m " : ""; + fprintf(output_, "PUBLIC %s%lx %x %ws\n", optional_multiple_field, + ranges[i].rva, stack_param_size > 0 ? stack_param_size : 0, + name.m_str); + } + + // Now walk the function in the original untranslated space, asking DIA + // what function is at that location, stepping through OMAP blocks. If + // we're still in the same function, emit another entry, because the + // symbol could have been split into multiple pieces. If we've gotten to + // another symbol in the original address space, then we're done for + // this symbol. See https://crbug.com/678874. + for (;;) { + // This steps to the next block in the original image. Simply doing + // rva++ would also be correct, but would emit tons of unnecessary + // entries. + rva = image_map_.subsequent_rva_block[rva]; + if (rva == 0) + break; + + CComPtr<IDiaSymbol> next_sym = NULL; + LONG displacement; + if (FAILED(session_->findSymbolByRVAEx(rva, SymTagPublicSymbol, &next_sym, + &displacement))) { + break; + } + + if (!SymbolsMatch(symbol, next_sym)) + break; + + AddressRangeVector next_ranges; + MapAddressRange(image_map_, AddressRange(rva, 1), &next_ranges); + for (size_t i = 0; i < next_ranges.size(); ++i) { + fprintf(output_, "PUBLIC %lx %x %ws\n", next_ranges[i].rva, + stack_param_size > 0 ? stack_param_size : 0, name.m_str); + } + } + + return true; +} + +bool PDBSourceLineWriter::PrintPDBInfo() { + PDBModuleInfo info; + if (!GetModuleInfo(&info)) { + return false; + } + + // Hard-code "windows" for the OS because that's the only thing that makes + // sense for PDB files. (This might not be strictly correct for Windows CE + // support, but we don't care about that at the moment.) + fprintf(output_, "MODULE windows %ws %ws %ws\n", + info.cpu.c_str(), info.debug_identifier.c_str(), + info.debug_file.c_str()); + + return true; +} + +bool PDBSourceLineWriter::PrintPEInfo() { + PEModuleInfo info; + if (!GetPEInfo(&info)) { + return false; + } + + fprintf(output_, "INFO CODE_ID %ws %ws\n", + info.code_identifier.c_str(), + info.code_file.c_str()); + return true; +} + +// wcstol_positive_strict is sort of like wcstol, but much stricter. string +// should be a buffer pointing to a null-terminated string containing only +// decimal digits. If the entire string can be converted to an integer +// without overflowing, and there are no non-digit characters before the +// result is set to the value and this function returns true. Otherwise, +// this function returns false. This is an alternative to the strtol, atoi, +// and scanf families, which are not as strict about input and in some cases +// don't provide a good way for the caller to determine if a conversion was +// successful. +static bool wcstol_positive_strict(wchar_t *string, int *result) { + int value = 0; + for (wchar_t *c = string; *c != '\0'; ++c) { + int last_value = value; + value *= 10; + // Detect overflow. + if (value / 10 != last_value || value < 0) { + return false; + } + if (*c < '0' || *c > '9') { + return false; + } + unsigned int c_value = *c - '0'; + last_value = value; + value += c_value; + // Detect overflow. + if (value < last_value) { + return false; + } + // Forbid leading zeroes unless the string is just "0". + if (value == 0 && *(c+1) != '\0') { + return false; + } + } + *result = value; + return true; +} + +bool PDBSourceLineWriter::FindPEFile() { + CComPtr<IDiaSymbol> global; + if (FAILED(session_->get_globalScope(&global))) { + fprintf(stderr, "get_globalScope failed\n"); + return false; + } + + CComBSTR symbols_file; + if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) { + wstring file(symbols_file); + + // Look for an EXE or DLL file. + const wchar_t *extensions[] = { L"exe", L"dll" }; + for (size_t i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) { + size_t dot_pos = file.find_last_of(L"."); + if (dot_pos != wstring::npos) { + file.replace(dot_pos + 1, wstring::npos, extensions[i]); + // Check if this file exists. + if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) { + code_file_ = file; + return true; + } + } + } + } + + return false; +} + +// static +bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol *function, + BSTR *name, + int *stack_param_size) { + *stack_param_size = -1; + const DWORD undecorate_options = UNDNAME_NO_MS_KEYWORDS | + UNDNAME_NO_FUNCTION_RETURNS | + UNDNAME_NO_ALLOCATION_MODEL | + UNDNAME_NO_ALLOCATION_LANGUAGE | + UNDNAME_NO_THISTYPE | + UNDNAME_NO_ACCESS_SPECIFIERS | + UNDNAME_NO_THROW_SIGNATURES | + UNDNAME_NO_MEMBER_TYPE | + UNDNAME_NO_RETURN_UDT_MODEL | + UNDNAME_NO_ECSU; + + // Use get_undecoratedNameEx to get readable C++ names with arguments. + if (function->get_undecoratedNameEx(undecorate_options, name) != S_OK) { + if (function->get_name(name) != S_OK) { + fprintf(stderr, "failed to get function name\n"); + return false; + } + + // It's possible for get_name to return an empty string, so + // special-case that. + if (wcscmp(*name, L"") == 0) { + SysFreeString(*name); + // dwarf_cu_to_module.cc uses "<name omitted>", so match that. + *name = SysAllocString(L"<name omitted>"); + return true; + } + + // If a name comes from get_name because no undecorated form existed, + // it's already formatted properly to be used as output. Don't do any + // additional processing. + // + // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's. + // This will result in calling get_name for some C++ symbols, so + // all of the parameter and return type information may not be included in + // the name string. + } else { + // C++ uses a bogus "void" argument for functions and methods that don't + // take any parameters. Take it out of the undecorated name because it's + // ugly and unnecessary. + const wchar_t *replace_string = L"(void)"; + const size_t replace_length = wcslen(replace_string); + const wchar_t *replacement_string = L"()"; + size_t length = wcslen(*name); + if (length >= replace_length) { + wchar_t *name_end = *name + length - replace_length; + if (wcscmp(name_end, replace_string) == 0) { + WindowsStringUtils::safe_wcscpy(name_end, replace_length, + replacement_string); + length = wcslen(*name); + } + } + + // Undecorate names used for stdcall and fastcall. These names prefix + // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it + // with '@' followed by the number of bytes of parameters, in decimal. + // If such a name is found, take note of the size and undecorate it. + // Only do this for names that aren't C++, which is determined based on + // whether the undecorated name contains any ':' or '(' characters. + if (!wcschr(*name, ':') && !wcschr(*name, '(') && + (*name[0] == '_' || *name[0] == '@')) { + wchar_t *last_at = wcsrchr(*name + 1, '@'); + if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) { + // If this function adheres to the fastcall convention, it accepts up + // to the first 8 bytes of parameters in registers (%ecx and %edx). + // We're only interested in the stack space used for parameters, so + // so subtract 8 and don't let the size go below 0. + if (*name[0] == '@') { + if (*stack_param_size > 8) { + *stack_param_size -= 8; + } else { + *stack_param_size = 0; + } + } + + // Undecorate the name by moving it one character to the left in its + // buffer, and terminating it where the last '@' had been. + WindowsStringUtils::safe_wcsncpy(*name, length, + *name + 1, last_at - *name - 1); + } else if (*name[0] == '_') { + // This symbol's name is encoded according to the cdecl rules. The + // name doesn't end in a '@' character followed by a decimal positive + // integer, so it's not a stdcall name. Strip off the leading + // underscore. + WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length); + } + } + } + + return true; +} + +// static +int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol *function) { + // This implementation is highly x86-specific. + + // Gather the symbols corresponding to data. + CComPtr<IDiaEnumSymbols> data_children; + if (FAILED(function->findChildren(SymTagData, NULL, nsNone, + &data_children))) { + return 0; + } + + // lowest_base is the lowest %ebp-relative byte offset used for a parameter. + // highest_end is one greater than the highest offset (i.e. base + length). + // Stack parameters are assumed to be contiguous, because in reality, they + // are. + int lowest_base = INT_MAX; + int highest_end = INT_MIN; + + CComPtr<IDiaSymbol> child; + DWORD count; + while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) { + // If any operation fails at this point, just proceed to the next child. + // Use the next_child label instead of continue because child needs to + // be released before it's reused. Declare constructable/destructable + // types early to avoid gotos that cross initializations. + CComPtr<IDiaSymbol> child_type; + + // DataIsObjectPtr is only used for |this|. Because |this| can be passed + // as a stack parameter, look for it in addition to traditional + // parameters. + DWORD child_kind; + if (FAILED(child->get_dataKind(&child_kind)) || + (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) { + goto next_child; + } + + // Only concentrate on register-relative parameters. Parameters may also + // be enregistered (passed directly in a register), but those don't + // consume any stack space, so they're not of interest. + DWORD child_location_type; + if (FAILED(child->get_locationType(&child_location_type)) || + child_location_type != LocIsRegRel) { + goto next_child; + } + + // Of register-relative parameters, the only ones that make any sense are + // %ebp- or %esp-relative. Note that MSVC's debugging information always + // gives parameters as %ebp-relative even when a function doesn't use a + // traditional frame pointer and stack parameters are accessed relative to + // %esp, so just look for %ebp-relative parameters. If you wanted to + // access parameters, you'd probably want to treat these %ebp-relative + // offsets as if they were relative to %esp before a function's prolog + // executed. + DWORD child_register; + if (FAILED(child->get_registerId(&child_register)) || + child_register != CV_REG_EBP) { + goto next_child; + } + + LONG child_register_offset; + if (FAILED(child->get_offset(&child_register_offset))) { + goto next_child; + } + + // IDiaSymbol::get_type can succeed but still pass back a NULL value. + if (FAILED(child->get_type(&child_type)) || !child_type) { + goto next_child; + } + + ULONGLONG child_length; + if (FAILED(child_type->get_length(&child_length))) { + goto next_child; + } + + // Extra scope to avoid goto jumping over variable initialization + { + int child_end = child_register_offset + static_cast<ULONG>(child_length); + if (child_register_offset < lowest_base) { + lowest_base = child_register_offset; + } + if (child_end > highest_end) { + highest_end = child_end; + } + } + +next_child: + child.Release(); + } + + int param_size = 0; + // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest + // possible address to find a stack parameter before executing a function's + // prolog (see above). Some optimizations cause parameter offsets to be + // lower than 4, but we're not concerned with those because we're only + // looking for parameters contained in addresses higher than where the + // return address is stored. + if (lowest_base < 4) { + lowest_base = 4; + } + if (highest_end > lowest_base) { + // All stack parameters are pushed as at least 4-byte quantities. If the + // last type was narrower than 4 bytes, promote it. This assumes that all + // parameters' offsets are 4-byte-aligned, which is always the case. Only + // worry about the last type, because we're not summing the type sizes, + // just looking at the lowest and highest offsets. + int remainder = highest_end % 4; + if (remainder) { + highest_end += 4 - remainder; + } + + param_size = highest_end - lowest_base; + } + + return param_size; +} + +bool PDBSourceLineWriter::WriteSymbols(FILE *symbol_file) { + output_ = symbol_file; + + // Load the OMAP information, and disable auto-translation of addresses in + // preference of doing it ourselves. + OmapData omap_data; + if (!GetOmapDataAndDisableTranslation(session_, &omap_data)) + return false; + BuildImageMap(omap_data, &image_map_); + + bool ret = PrintPDBInfo(); + // This is not a critical piece of the symbol file. + PrintPEInfo(); + ret = ret && + PrintSourceFiles() && + PrintFunctions() && + PrintFrameData(); + + output_ = NULL; + return ret; +} + +void PDBSourceLineWriter::Close() { + if (session_ != nullptr) { + session_.Release(); + } +} + +bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo *info) { + if (!info) { + return false; + } + + info->debug_file.clear(); + info->debug_identifier.clear(); + info->cpu.clear(); + + CComPtr<IDiaSymbol> global; + if (FAILED(session_->get_globalScope(&global))) { + return false; + } + + DWORD machine_type; + // get_machineType can return S_FALSE. + if (global->get_machineType(&machine_type) == S_OK) { + // The documentation claims that get_machineType returns a value from + // the CV_CPU_TYPE_e enumeration, but that's not the case. + // Instead, it returns one of the IMAGE_FILE_MACHINE values as + // defined here: + // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx + info->cpu = FileHeaderMachineToCpuString(static_cast<WORD>(machine_type)); + } else { + // Unexpected, but handle gracefully. + info->cpu = L"unknown"; + } + + // DWORD* and int* are not compatible. This is clean and avoids a cast. + DWORD age; + if (FAILED(global->get_age(&age))) { + return false; + } + + bool uses_guid; + if (!UsesGUID(&uses_guid)) { + return false; + } + + if (uses_guid) { + GUID guid; + if (FAILED(global->get_guid(&guid))) { + return false; + } + + info->debug_identifier = GenerateDebugIdentifier(age, guid); + } else { + DWORD signature; + if (FAILED(global->get_signature(&signature))) { + return false; + } + + info->debug_identifier = GenerateDebugIdentifier(age, signature); + } + + CComBSTR debug_file_string; + if (FAILED(global->get_symbolsFileName(&debug_file_string))) { + return false; + } + info->debug_file = + WindowsStringUtils::GetBaseName(wstring(debug_file_string)); + + return true; +} + +bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo *info) { + if (!info) { + return false; + } + + if (code_file_.empty() && !FindPEFile()) { + fprintf(stderr, "Couldn't locate EXE or DLL file.\n"); + return false; + } + + return ReadPEInfo(code_file_, info); +} + +bool PDBSourceLineWriter::UsesGUID(bool *uses_guid) { + if (!uses_guid) + return false; + + CComPtr<IDiaSymbol> global; + if (FAILED(session_->get_globalScope(&global))) + return false; + + GUID guid; + if (FAILED(global->get_guid(&guid))) + return false; + + DWORD signature; + if (FAILED(global->get_signature(&signature))) + return false; + + // There are two possibilities for guid: either it's a real 128-bit GUID + // as identified in a code module by a new-style CodeView record, or it's + // a 32-bit signature (timestamp) as identified by an old-style record. + // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h. + // + // Because DIA doesn't provide a way to directly determine whether a module + // uses a GUID or a 32-bit signature, this code checks whether the first 32 + // bits of guid are the same as the signature, and if the rest of guid is + // zero. If so, then with a pretty high degree of certainty, there's an + // old-style CodeView record in use. This method will only falsely find an + // an old-style CodeView record if a real 128-bit GUID has its first 32 + // bits set the same as the module's signature (timestamp) and the rest of + // the GUID is set to 0. This is highly unlikely. + + GUID signature_guid = {signature}; // 0-initializes other members + *uses_guid = !IsEqualGUID(guid, signature_guid); + return true; +} + +} // namespace google_breakpad |