diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:47:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:47:29 +0000 |
commit | 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch) | |
tree | a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /tools/profiler/lul | |
parent | Initial commit. (diff) | |
download | firefox-esr-37a0381f8351b370577b65028ba1f6563ae23fdf.tar.xz firefox-esr-37a0381f8351b370577b65028ba1f6563ae23fdf.zip |
Adding upstream version 115.8.0esr.upstream/115.8.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/profiler/lul')
-rw-r--r-- | tools/profiler/lul/AutoObjectMapper.cpp | 79 | ||||
-rw-r--r-- | tools/profiler/lul/AutoObjectMapper.h | 64 | ||||
-rw-r--r-- | tools/profiler/lul/LulCommon.cpp | 100 | ||||
-rw-r--r-- | tools/profiler/lul/LulCommonExt.h | 509 | ||||
-rw-r--r-- | tools/profiler/lul/LulDwarf.cpp | 2538 | ||||
-rw-r--r-- | tools/profiler/lul/LulDwarfExt.h | 1312 | ||||
-rw-r--r-- | tools/profiler/lul/LulDwarfInt.h | 193 | ||||
-rw-r--r-- | tools/profiler/lul/LulDwarfSummariser.cpp | 549 | ||||
-rw-r--r-- | tools/profiler/lul/LulDwarfSummariser.h | 64 | ||||
-rw-r--r-- | tools/profiler/lul/LulElf.cpp | 887 | ||||
-rw-r--r-- | tools/profiler/lul/LulElfExt.h | 69 | ||||
-rw-r--r-- | tools/profiler/lul/LulElfInt.h | 218 | ||||
-rw-r--r-- | tools/profiler/lul/LulMain.cpp | 2079 | ||||
-rw-r--r-- | tools/profiler/lul/LulMain.h | 378 | ||||
-rw-r--r-- | tools/profiler/lul/LulMainInt.h | 631 | ||||
-rw-r--r-- | tools/profiler/lul/platform-linux-lul.cpp | 75 | ||||
-rw-r--r-- | tools/profiler/lul/platform-linux-lul.h | 19 |
17 files changed, 9764 insertions, 0 deletions
diff --git a/tools/profiler/lul/AutoObjectMapper.cpp b/tools/profiler/lul/AutoObjectMapper.cpp new file mode 100644 index 0000000000..f7489fbfee --- /dev/null +++ b/tools/profiler/lul/AutoObjectMapper.cpp @@ -0,0 +1,79 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <sys/mman.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "PlatformMacros.h" +#include "AutoObjectMapper.h" + +// A helper function for creating failure error messages in +// AutoObjectMapper*::Map. +static void failedToMessage(void (*aLog)(const char*), const char* aHowFailed, + std::string aFileName) { + char buf[300]; + SprintfLiteral(buf, "AutoObjectMapper::Map: Failed to %s \'%s\'", aHowFailed, + aFileName.c_str()); + buf[sizeof(buf) - 1] = 0; + aLog(buf); +} + +AutoObjectMapperPOSIX::AutoObjectMapperPOSIX(void (*aLog)(const char*)) + : mImage(nullptr), mSize(0), mLog(aLog), mIsMapped(false) {} + +AutoObjectMapperPOSIX::~AutoObjectMapperPOSIX() { + if (!mIsMapped) { + // There's nothing to do. + MOZ_ASSERT(!mImage); + MOZ_ASSERT(mSize == 0); + return; + } + MOZ_ASSERT(mSize > 0); + // The following assertion doesn't necessarily have to be true, + // but we assume (reasonably enough) that no mmap facility would + // be crazy enough to map anything at page zero. + MOZ_ASSERT(mImage); + munmap(mImage, mSize); +} + +bool AutoObjectMapperPOSIX::Map(/*OUT*/ void** start, /*OUT*/ size_t* length, + std::string fileName) { + MOZ_ASSERT(!mIsMapped); + + int fd = open(fileName.c_str(), O_RDONLY); + if (fd == -1) { + failedToMessage(mLog, "open", fileName); + return false; + } + + struct stat st; + int err = fstat(fd, &st); + size_t sz = (err == 0) ? st.st_size : 0; + if (err != 0 || sz == 0) { + failedToMessage(mLog, "fstat", fileName); + close(fd); + return false; + } + + void* image = mmap(nullptr, sz, PROT_READ, MAP_SHARED, fd, 0); + if (image == MAP_FAILED) { + failedToMessage(mLog, "mmap", fileName); + close(fd); + return false; + } + + close(fd); + mIsMapped = true; + mImage = *start = image; + mSize = *length = sz; + return true; +} diff --git a/tools/profiler/lul/AutoObjectMapper.h b/tools/profiler/lul/AutoObjectMapper.h new file mode 100644 index 0000000000..f63aa43e0e --- /dev/null +++ b/tools/profiler/lul/AutoObjectMapper.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AutoObjectMapper_h +#define AutoObjectMapper_h + +#include <string> + +#include "mozilla/Attributes.h" +#include "PlatformMacros.h" + +// A (nearly-) RAII class that maps an object in and then unmaps it on +// destruction. This base class version uses the "normal" POSIX +// functions: open, fstat, close, mmap, munmap. + +class MOZ_STACK_CLASS AutoObjectMapperPOSIX { + public: + // The constructor does not attempt to map the file, because that + // might fail. Instead, once the object has been constructed, + // call Map() to attempt the mapping. There is no corresponding + // Unmap() since the unmapping is done in the destructor. Failure + // messages are sent to |aLog|. + explicit AutoObjectMapperPOSIX(void (*aLog)(const char*)); + + // Unmap the file on destruction of this object. + ~AutoObjectMapperPOSIX(); + + // Map |fileName| into the address space and return the mapping + // extents. If the file is zero sized this will fail. The file is + // mapped read-only and private. Returns true iff the mapping + // succeeded, in which case *start and *length hold its extent. + // Once a call to Map succeeds, all subsequent calls to it will + // fail. + bool Map(/*OUT*/ void** start, /*OUT*/ size_t* length, std::string fileName); + + protected: + // If we are currently holding a mapped object, these record the + // mapped address range. + void* mImage; + size_t mSize; + + // A logging sink, for complaining about mapping failures. + void (*mLog)(const char*); + + private: + // Are we currently holding a mapped object? This is private to + // the base class. Derived classes need to have their own way to + // track whether they are holding a mapped object. + bool mIsMapped; + + // Disable copying and assignment. + AutoObjectMapperPOSIX(const AutoObjectMapperPOSIX&); + AutoObjectMapperPOSIX& operator=(const AutoObjectMapperPOSIX&); + // Disable heap allocation of this class. + void* operator new(size_t); + void* operator new[](size_t); + void operator delete(void*); + void operator delete[](void*); +}; + +#endif // AutoObjectMapper_h diff --git a/tools/profiler/lul/LulCommon.cpp b/tools/profiler/lul/LulCommon.cpp new file mode 100644 index 0000000000..428f102c42 --- /dev/null +++ b/tools/profiler/lul/LulCommon.cpp @@ -0,0 +1,100 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2011, 2013 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/module.cc +// src/common/unique_string.cc + +// There's no internal-only interface for LulCommon. Hence include +// the external interface directly. +#include "LulCommonExt.h" + +#include <stdlib.h> +#include <string.h> + +#include <string> +#include <map> + +namespace lul { + +using std::string; + +//////////////////////////////////////////////////////////////// +// Module +// +Module::Module(const string& name, const string& os, const string& architecture, + const string& id) + : name_(name), os_(os), architecture_(architecture), id_(id) {} + +Module::~Module() {} + +//////////////////////////////////////////////////////////////// +// UniqueString +// +class UniqueString { + public: + explicit UniqueString(string str) { str_ = strdup(str.c_str()); } + ~UniqueString() { free(reinterpret_cast<void*>(const_cast<char*>(str_))); } + const char* str_; +}; + +const char* FromUniqueString(const UniqueString* ustr) { return ustr->str_; } + +bool IsEmptyUniqueString(const UniqueString* ustr) { + return (ustr->str_)[0] == '\0'; +} + +//////////////////////////////////////////////////////////////// +// UniqueStringUniverse +// +UniqueStringUniverse::~UniqueStringUniverse() { + for (std::map<string, UniqueString*>::iterator it = map_.begin(); + it != map_.end(); it++) { + delete it->second; + } +} + +const UniqueString* UniqueStringUniverse::ToUniqueString(string str) { + std::map<string, UniqueString*>::iterator it = map_.find(str); + if (it == map_.end()) { + UniqueString* ustr = new UniqueString(str); + map_[str] = ustr; + return ustr; + } else { + return it->second; + } +} + +} // namespace lul diff --git a/tools/profiler/lul/LulCommonExt.h b/tools/profiler/lul/LulCommonExt.h new file mode 100644 index 0000000000..b20a7321ff --- /dev/null +++ b/tools/profiler/lul/LulCommonExt.h @@ -0,0 +1,509 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2010, 2012, 2013 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// module.h: Define google_breakpad::Module. A Module holds debugging +// information, and can write that information out as a Breakpad +// symbol file. + +// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999. +// Copyright (c) 2001, 2002 Peter Dimov +// +// Permission to copy, use, modify, sell and distribute this software +// is granted provided this copyright notice appears in all copies. +// This software is provided "as is" without express or implied +// warranty, and with no claim as to its suitability for any purpose. +// +// See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation. +// + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/unique_string.h +// src/common/scoped_ptr.h +// src/common/module.h + +// External interface for the "Common" component of LUL. + +#ifndef LulCommonExt_h +#define LulCommonExt_h + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> + +#include <string> +#include <map> +#include <vector> +#include <cstddef> // for std::ptrdiff_t + +#include "mozilla/Assertions.h" + +namespace lul { + +using std::map; +using std::string; + +//////////////////////////////////////////////////////////////// +// UniqueString +// + +// Abstract type +class UniqueString; + +// Get the contained C string (debugging only) +const char* FromUniqueString(const UniqueString*); + +// Is the given string empty (that is, "") ? +bool IsEmptyUniqueString(const UniqueString*); + +//////////////////////////////////////////////////////////////// +// UniqueStringUniverse +// + +// All UniqueStrings live in some specific UniqueStringUniverse. +class UniqueStringUniverse { + public: + UniqueStringUniverse() {} + ~UniqueStringUniverse(); + // Convert a |string| to a UniqueString, that lives in this universe. + const UniqueString* ToUniqueString(string str); + + private: + map<string, UniqueString*> map_; +}; + +//////////////////////////////////////////////////////////////// +// GUID +// + +typedef struct { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +} MDGUID; // GUID + +typedef MDGUID GUID; + +//////////////////////////////////////////////////////////////// +// scoped_ptr +// + +// scoped_ptr mimics a built-in pointer except that it guarantees deletion +// of the object pointed to, either on destruction of the scoped_ptr or via +// an explicit reset(). scoped_ptr is a simple solution for simple needs; +// use shared_ptr or std::auto_ptr if your needs are more complex. + +// *** NOTE *** +// If your scoped_ptr is a class member of class FOO pointing to a +// forward declared type BAR (as shown below), then you MUST use a non-inlined +// version of the destructor. The destructor of a scoped_ptr (called from +// FOO's destructor) must have a complete definition of BAR in order to +// destroy it. Example: +// +// -- foo.h -- +// class BAR; +// +// class FOO { +// public: +// FOO(); +// ~FOO(); // Required for sources that instantiate class FOO to compile! +// +// private: +// scoped_ptr<BAR> bar_; +// }; +// +// -- foo.cc -- +// #include "foo.h" +// FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition. + +// scoped_ptr_malloc added by Google +// When one of these goes out of scope, instead of doing a delete or +// delete[], it calls free(). scoped_ptr_malloc<char> is likely to see +// much more use than any other specializations. + +// release() added by Google +// Use this to conditionally transfer ownership of a heap-allocated object +// to the caller, usually on method success. + +template <typename T> +class scoped_ptr { + private: + T* ptr; + + scoped_ptr(scoped_ptr const&); + scoped_ptr& operator=(scoped_ptr const&); + + public: + typedef T element_type; + + explicit scoped_ptr(T* p = 0) : ptr(p) {} + + ~scoped_ptr() { delete ptr; } + + void reset(T* p = 0) { + if (ptr != p) { + delete ptr; + ptr = p; + } + } + + T& operator*() const { + MOZ_ASSERT(ptr != 0); + return *ptr; + } + + T* operator->() const { + MOZ_ASSERT(ptr != 0); + return ptr; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_ptr& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_ptr should have its own object + template <typename U> + bool operator==(scoped_ptr<U> const& p) const; + template <typename U> + bool operator!=(scoped_ptr<U> const& p) const; +}; + +template <typename T> +inline void swap(scoped_ptr<T>& a, scoped_ptr<T>& b) { + a.swap(b); +} + +template <typename T> +inline bool operator==(T* p, const scoped_ptr<T>& b) { + return p == b.get(); +} + +template <typename T> +inline bool operator!=(T* p, const scoped_ptr<T>& b) { + return p != b.get(); +} + +// scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to +// is guaranteed, either on destruction of the scoped_array or via an explicit +// reset(). Use shared_array or std::vector if your needs are more complex. + +template <typename T> +class scoped_array { + private: + T* ptr; + + scoped_array(scoped_array const&); + scoped_array& operator=(scoped_array const&); + + public: + typedef T element_type; + + explicit scoped_array(T* p = 0) : ptr(p) {} + + ~scoped_array() { delete[] ptr; } + + void reset(T* p = 0) { + if (ptr != p) { + delete[] ptr; + ptr = p; + } + } + + T& operator[](std::ptrdiff_t i) const { + MOZ_ASSERT(ptr != 0); + MOZ_ASSERT(i >= 0); + return ptr[i]; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_array& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_array should have its own object + template <typename U> + bool operator==(scoped_array<U> const& p) const; + template <typename U> + bool operator!=(scoped_array<U> const& p) const; +}; + +template <class T> +inline void swap(scoped_array<T>& a, scoped_array<T>& b) { + a.swap(b); +} + +template <typename T> +inline bool operator==(T* p, const scoped_array<T>& b) { + return p == b.get(); +} + +template <typename T> +inline bool operator!=(T* p, const scoped_array<T>& b) { + return p != b.get(); +} + +// This class wraps the c library function free() in a class that can be +// passed as a template argument to scoped_ptr_malloc below. +class ScopedPtrMallocFree { + public: + inline void operator()(void* x) const { free(x); } +}; + +// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a +// second template argument, the functor used to free the object. + +template <typename T, typename FreeProc = ScopedPtrMallocFree> +class scoped_ptr_malloc { + private: + T* ptr; + + scoped_ptr_malloc(scoped_ptr_malloc const&); + scoped_ptr_malloc& operator=(scoped_ptr_malloc const&); + + public: + typedef T element_type; + + explicit scoped_ptr_malloc(T* p = 0) : ptr(p) {} + + ~scoped_ptr_malloc() { free_((void*)ptr); } + + void reset(T* p = 0) { + if (ptr != p) { + free_((void*)ptr); + ptr = p; + } + } + + T& operator*() const { + MOZ_ASSERT(ptr != 0); + return *ptr; + } + + T* operator->() const { + MOZ_ASSERT(ptr != 0); + return ptr; + } + + bool operator==(T* p) const { return ptr == p; } + + bool operator!=(T* p) const { return ptr != p; } + + T* get() const { return ptr; } + + void swap(scoped_ptr_malloc& b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + // no reason to use these: each scoped_ptr_malloc should have its own object + template <typename U, typename GP> + bool operator==(scoped_ptr_malloc<U, GP> const& p) const; + template <typename U, typename GP> + bool operator!=(scoped_ptr_malloc<U, GP> const& p) const; + + static FreeProc const free_; +}; + +template <typename T, typename FP> +FP const scoped_ptr_malloc<T, FP>::free_ = FP(); + +template <typename T, typename FP> +inline void swap(scoped_ptr_malloc<T, FP>& a, scoped_ptr_malloc<T, FP>& b) { + a.swap(b); +} + +template <typename T, typename FP> +inline bool operator==(T* p, const scoped_ptr_malloc<T, FP>& b) { + return p == b.get(); +} + +template <typename T, typename FP> +inline bool operator!=(T* p, const scoped_ptr_malloc<T, FP>& b) { + return p != b.get(); +} + +//////////////////////////////////////////////////////////////// +// Module +// + +// A Module represents the contents of a module, and supports methods +// for adding information produced by parsing STABS or DWARF data +// --- possibly both from the same file --- and then writing out the +// unified contents as a Breakpad-format symbol file. +class Module { + public: + // The type of addresses and sizes in a symbol table. + typedef uint64_t Address; + + // Representation of an expression. This can either be a postfix + // expression, in which case it is stored as a string, or a simple + // expression of the form (identifier + imm) or *(identifier + imm). + // It can also be invalid (denoting "no value"). + enum ExprHow { kExprInvalid = 1, kExprPostfix, kExprSimple, kExprSimpleMem }; + + struct Expr { + // Construct a simple-form expression + Expr(const UniqueString* ident, long offset, bool deref) { + if (IsEmptyUniqueString(ident)) { + Expr(); + } else { + postfix_ = ""; + ident_ = ident; + offset_ = offset; + how_ = deref ? kExprSimpleMem : kExprSimple; + } + } + + // Construct an invalid expression + Expr() { + postfix_ = ""; + ident_ = nullptr; + offset_ = 0; + how_ = kExprInvalid; + } + + // Return the postfix expression string, either directly, + // if this is a postfix expression, or by synthesising it + // for a simple expression. + std::string getExprPostfix() const { + switch (how_) { + case kExprPostfix: + return postfix_; + case kExprSimple: + case kExprSimpleMem: { + char buf[40]; + sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+', + how_ == kExprSimple ? "" : " ^"); + return std::string(FromUniqueString(ident_)) + std::string(buf); + } + case kExprInvalid: + default: + MOZ_ASSERT(0 && "getExprPostfix: invalid Module::Expr type"); + return "Expr::genExprPostfix: kExprInvalid"; + } + } + + // The identifier that gives the starting value for simple expressions. + const UniqueString* ident_; + // The offset to add for simple expressions. + long offset_; + // The Postfix expression string to evaluate for non-simple expressions. + std::string postfix_; + // The operation expressed by this expression. + ExprHow how_; + }; + + // A map from register names to expressions that recover + // their values. This can represent a complete set of rules to + // follow at some address, or a set of changes to be applied to an + // extant set of rules. + // NOTE! there are two completely different types called RuleMap. This + // is one of them. + typedef std::map<const UniqueString*, Expr> RuleMap; + + // A map from addresses to RuleMaps, representing changes that take + // effect at given addresses. + typedef std::map<Address, RuleMap> RuleChangeMap; + + // A range of 'STACK CFI' stack walking information. An instance of + // this structure corresponds to a 'STACK CFI INIT' record and the + // subsequent 'STACK CFI' records that fall within its range. + struct StackFrameEntry { + // The starting address and number of bytes of machine code this + // entry covers. + Address address, size; + + // The initial register recovery rules, in force at the starting + // address. + RuleMap initial_rules; + + // A map from addresses to rule changes. To find the rules in + // force at a given address, start with initial_rules, and then + // apply the changes given in this map for all addresses up to and + // including the address you're interested in. + RuleChangeMap rule_changes; + }; + + // Create a new module with the given name, operating system, + // architecture, and ID string. + Module(const std::string& name, const std::string& os, + const std::string& architecture, const std::string& id); + ~Module(); + + private: + // Module header entries. + std::string name_, os_, architecture_, id_; +}; + +} // namespace lul + +#endif // LulCommonExt_h diff --git a/tools/profiler/lul/LulDwarf.cpp b/tools/profiler/lul/LulDwarf.cpp new file mode 100644 index 0000000000..ea38ce50ea --- /dev/null +++ b/tools/profiler/lul/LulDwarf.cpp @@ -0,0 +1,2538 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit, +// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/bytereader.cc +// src/common/dwarf/dwarf2reader.cc +// src/common/dwarf_cfi_to_module.cc + +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include <stack> +#include <string> + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/Sprintf.h" +#include "mozilla/Vector.h" + +#include "LulCommonExt.h" +#include "LulDwarfInt.h" + +// Set this to 1 for verbose logging +#define DEBUG_DWARF 0 + +namespace lul { + +using std::pair; +using std::string; + +ByteReader::ByteReader(enum Endianness endian) + : offset_reader_(NULL), + address_reader_(NULL), + endian_(endian), + address_size_(0), + offset_size_(0), + have_section_base_(), + have_text_base_(), + have_data_base_(), + have_function_base_() {} + +ByteReader::~ByteReader() {} + +void ByteReader::SetOffsetSize(uint8 size) { + offset_size_ = size; + MOZ_ASSERT(size == 4 || size == 8); + if (size == 4) { + this->offset_reader_ = &ByteReader::ReadFourBytes; + } else { + this->offset_reader_ = &ByteReader::ReadEightBytes; + } +} + +void ByteReader::SetAddressSize(uint8 size) { + address_size_ = size; + MOZ_ASSERT(size == 4 || size == 8); + if (size == 4) { + this->address_reader_ = &ByteReader::ReadFourBytes; + } else { + this->address_reader_ = &ByteReader::ReadEightBytes; + } +} + +uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) { + const uint64 initial_length = ReadFourBytes(start); + start += 4; + + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length == 0xffffffff) { + SetOffsetSize(8); + *len = 12; + return ReadOffset(start); + } else { + SetOffsetSize(4); + *len = 4; + } + return initial_length; +} + +bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const { + if (encoding == DW_EH_PE_omit) return true; + if (encoding == DW_EH_PE_aligned) return true; + if ((encoding & 0x7) > DW_EH_PE_udata8) return false; + if ((encoding & 0x70) > DW_EH_PE_funcrel) return false; + return true; +} + +bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const { + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + return true; + case DW_EH_PE_pcrel: + return have_section_base_; + case DW_EH_PE_textrel: + return have_text_base_; + case DW_EH_PE_datarel: + return have_data_base_; + case DW_EH_PE_funcrel: + return have_function_base_; + default: + return false; + } +} + +uint64 ByteReader::ReadEncodedPointer(const char* buffer, + DwarfPointerEncoding encoding, + size_t* len) const { + // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't + // see it here. + MOZ_ASSERT(encoding != DW_EH_PE_omit); + + // The Linux Standards Base 4.0 does not make this clear, but the + // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c) + // agree that aligned pointers are always absolute, machine-sized, + // machine-signed pointers. + if (encoding == DW_EH_PE_aligned) { + MOZ_ASSERT(have_section_base_); + + // We don't need to align BUFFER in *our* address space. Rather, we + // need to find the next position in our buffer that would be aligned + // when the .eh_frame section the buffer contains is loaded into the + // program's memory. So align assuming that buffer_base_ gets loaded at + // address section_base_, where section_base_ itself may or may not be + // aligned. + + // First, find the offset to START from the closest prior aligned + // address. + uint64 skew = section_base_ & (AddressSize() - 1); + // Now find the offset from that aligned address to buffer. + uint64 offset = skew + (buffer - buffer_base_); + // Round up to the next boundary. + uint64 aligned = (offset + AddressSize() - 1) & -AddressSize(); + // Convert back to a pointer. + const char* aligned_buffer = buffer_base_ + (aligned - skew); + // Finally, store the length and actually fetch the pointer. + *len = aligned_buffer - buffer + AddressSize(); + return ReadAddress(aligned_buffer); + } + + // Extract the value first, ignoring whether it's a pointer or an + // offset relative to some base. + uint64 offset; + switch (encoding & 0x0f) { + case DW_EH_PE_absptr: + // DW_EH_PE_absptr is weird, as it is used as a meaningful value for + // both the high and low nybble of encoding bytes. When it appears in + // the high nybble, it means that the pointer is absolute, not an + // offset from some base address. When it appears in the low nybble, + // as here, it means that the pointer is stored as a normal + // machine-sized and machine-signed address. A low nybble of + // DW_EH_PE_absptr does not imply that the pointer is absolute; it is + // correct for us to treat the value as an offset from a base address + // if the upper nybble is not DW_EH_PE_absptr. + offset = ReadAddress(buffer); + *len = AddressSize(); + break; + + case DW_EH_PE_uleb128: + offset = ReadUnsignedLEB128(buffer, len); + break; + + case DW_EH_PE_udata2: + offset = ReadTwoBytes(buffer); + *len = 2; + break; + + case DW_EH_PE_udata4: + offset = ReadFourBytes(buffer); + *len = 4; + break; + + case DW_EH_PE_udata8: + offset = ReadEightBytes(buffer); + *len = 8; + break; + + case DW_EH_PE_sleb128: + offset = ReadSignedLEB128(buffer, len); + break; + + case DW_EH_PE_sdata2: + offset = ReadTwoBytes(buffer); + // Sign-extend from 16 bits. + offset = (offset ^ 0x8000) - 0x8000; + *len = 2; + break; + + case DW_EH_PE_sdata4: + offset = ReadFourBytes(buffer); + // Sign-extend from 32 bits. + offset = (offset ^ 0x80000000ULL) - 0x80000000ULL; + *len = 4; + break; + + case DW_EH_PE_sdata8: + // No need to sign-extend; this is the full width of our type. + offset = ReadEightBytes(buffer); + *len = 8; + break; + + default: + abort(); + } + + // Find the appropriate base address. + uint64 base; + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = 0; + break; + + case DW_EH_PE_pcrel: + MOZ_ASSERT(have_section_base_); + base = section_base_ + (buffer - buffer_base_); + break; + + case DW_EH_PE_textrel: + MOZ_ASSERT(have_text_base_); + base = text_base_; + break; + + case DW_EH_PE_datarel: + MOZ_ASSERT(have_data_base_); + base = data_base_; + break; + + case DW_EH_PE_funcrel: + MOZ_ASSERT(have_function_base_); + base = function_base_; + break; + + default: + abort(); + } + + uint64 pointer = base + offset; + + // Remove inappropriate upper bits. + if (AddressSize() == 4) + pointer = pointer & 0xffffffff; + else + MOZ_ASSERT(AddressSize() == sizeof(uint64)); + + return pointer; +} + +// A DWARF rule for recovering the address or value of a register, or +// computing the canonical frame address. This is an 8-way sum-of-products +// type. Excluding the INVALID variant, there is one subclass of this for +// each '*Rule' member function in CallFrameInfo::Handler. +// +// This could logically be nested within State, but then the qualified names +// get horrendous. + +class CallFrameInfo::Rule final { + public: + enum Tag { + INVALID, + Undefined, + SameValue, + Offset, + ValOffset, + Register, + Expression, + ValExpression + }; + + private: + // tag_ (below) indicates the form of the expression. There are 7 forms + // plus INVALID. All non-INVALID expressions denote a machine-word-sized + // value at unwind time. The description below assumes the presence of, at + // unwind time: + // + // * a function R, which takes a Dwarf register number and returns its value + // in the callee frame (the one we are unwinding out of). + // + // * a function EvalDwarfExpr, which evaluates a Dwarf expression. + // + // Register numbers are encoded using the target ABI's Dwarf + // register-numbering conventions. Except where otherwise noted, a register + // value may also be the special value CallFrameInfo::Handler::kCFARegister + // ("the CFA"). + // + // The expression forms are represented using tag_, word1_ and word2_. The + // forms and denoted values are as follows: + // + // * INVALID: not a valid expression. + // valid fields: (none) + // denotes: no value + // + // * Undefined: denotes no value. This is used for a register whose value + // cannot be recovered. + // valid fields: (none) + // denotes: no value + // + // * SameValue: the register's value is the same as in the callee. + // valid fields: (none) + // denotes: R(the register that this Rule is associated with, + // not stored here) + // + // * Offset: the register's value is in memory at word2_ bytes away from + // Dwarf register number word1_. word2_ is interpreted as a *signed* + // offset. + // valid fields: word1_=DwarfReg, word2=Offset + // denotes: *(R(word1_) + word2_) + // + // * ValOffset: same as Offset, without the dereference. + // valid fields: word1_=DwarfReg, word2=Offset + // denotes: R(word1_) + word2_ + // + // * Register: the register's value is in some other register, + // which may not be the CFA. + // valid fields: word1_=DwarfReg + // denotes: R(word1_) + // + // * Expression: the register's value is in memory at a location that can be + // computed from the Dwarf expression contained in the word2_ bytes + // starting at word1_. Note these locations are into the area of the .so + // temporarily mmaped info for debuginfo reading and have no validity once + // debuginfo reading has finished. + // valid fields: ExprStart=word1_, ExprLen=word2_ + // denotes: *(EvalDwarfExpr(word1_, word2_)) + // + // * ValExpression: same as Expression, without the dereference. + // valid fields: ExprStart=word1_, ExprLen=word2_ + // denotes: EvalDwarfExpr(word1_, word2_) + // + + // 3 words (or less) for representation. Unused word1_/word2_ fields must + // be set to zero. + Tag tag_; + uintptr_t word1_; + uintptr_t word2_; + + // To ensure that word1_ can hold a pointer to an expression string. + static_assert(sizeof(const char*) <= sizeof(word1_)); + // To ensure that word2_ can hold any string length or memory offset. + static_assert(sizeof(size_t) <= sizeof(word2_)); + + // This class denotes an 8-way sum-of-product type, and accessing invalid + // fields is meaningless. The accessors and constructors below enforce + // that. + bool isCanonical() const { + switch (tag_) { + case Tag::INVALID: + case Tag::Undefined: + case Tag::SameValue: + return word1_ == 0 && word2_ == 0; + case Tag::Offset: + case Tag::ValOffset: + return true; + case Tag::Register: + return word2_ == 0; + case Tag::Expression: + case Tag::ValExpression: + return true; + default: + MOZ_CRASH(); + } + } + + public: + Tag tag() const { return tag_; } + int dwreg() const { + switch (tag_) { + case Tag::Offset: + case Tag::ValOffset: + case Tag::Register: + return (int)word1_; + default: + MOZ_CRASH(); + } + } + intptr_t offset() const { + switch (tag_) { + case Tag::Offset: + case Tag::ValOffset: + return (intptr_t)word2_; + default: + MOZ_CRASH(); + } + } + ImageSlice expr() const { + switch (tag_) { + case Tag::Expression: + case Tag::ValExpression: + return ImageSlice((const char*)word1_, (size_t)word2_); + default: + MOZ_CRASH(); + } + } + + // Constructor-y stuff + Rule() { + tag_ = Tag::INVALID; + word1_ = 0; + word2_ = 0; + } + + static Rule mkINVALID() { + Rule r; // is initialised by Rule() + return r; + } + static Rule mkUndefinedRule() { + Rule r; + r.tag_ = Tag::Undefined; + r.word1_ = 0; + r.word2_ = 0; + return r; + } + static Rule mkSameValueRule() { + Rule r; + r.tag_ = Tag::SameValue; + r.word1_ = 0; + r.word2_ = 0; + return r; + } + static Rule mkOffsetRule(int dwreg, intptr_t offset) { + Rule r; + r.tag_ = Tag::Offset; + r.word1_ = (uintptr_t)dwreg; + r.word2_ = (uintptr_t)offset; + return r; + } + static Rule mkValOffsetRule(int dwreg, intptr_t offset) { + Rule r; + r.tag_ = Tag::ValOffset; + r.word1_ = (uintptr_t)dwreg; + r.word2_ = (uintptr_t)offset; + return r; + } + static Rule mkRegisterRule(int dwreg) { + Rule r; + r.tag_ = Tag::Register; + r.word1_ = (uintptr_t)dwreg; + r.word2_ = 0; + return r; + } + static Rule mkExpressionRule(ImageSlice expr) { + Rule r; + r.tag_ = Tag::Expression; + r.word1_ = (uintptr_t)expr.start_; + r.word2_ = (uintptr_t)expr.length_; + return r; + } + static Rule mkValExpressionRule(ImageSlice expr) { + Rule r; + r.tag_ = Tag::ValExpression; + r.word1_ = (uintptr_t)expr.start_; + r.word2_ = (uintptr_t)expr.length_; + return r; + } + + // Misc + inline bool isVALID() const { return tag_ != Tag::INVALID; } + + bool operator==(const Rule& rhs) const { + MOZ_ASSERT(isVALID() && rhs.isVALID()); + MOZ_ASSERT(isCanonical()); + MOZ_ASSERT(rhs.isCanonical()); + if (tag_ != rhs.tag_) { + return false; + } + switch (tag_) { + case Tag::INVALID: + MOZ_CRASH(); + case Tag::Undefined: + case Tag::SameValue: + return true; + case Tag::Offset: + case Tag::ValOffset: + return word1_ == rhs.word1_ && word2_ == rhs.word2_; + case Tag::Register: + return word1_ == rhs.word1_; + case Tag::Expression: + case Tag::ValExpression: + return expr() == rhs.expr(); + default: + MOZ_CRASH(); + } + } + + bool operator!=(const Rule& rhs) const { return !(*this == rhs); } + + // Tell HANDLER that, at ADDRESS in the program, REG can be + // recovered using this rule. If REG is kCFARegister, then this rule + // describes how to compute the canonical frame address. Return what the + // HANDLER member function returned. + bool Handle(Handler* handler, uint64 address, int reg) const { + MOZ_ASSERT(isVALID()); + MOZ_ASSERT(isCanonical()); + switch (tag_) { + case Tag::Undefined: + return handler->UndefinedRule(address, reg); + case Tag::SameValue: + return handler->SameValueRule(address, reg); + case Tag::Offset: + return handler->OffsetRule(address, reg, word1_, word2_); + case Tag::ValOffset: + return handler->ValOffsetRule(address, reg, word1_, word2_); + case Tag::Register: + return handler->RegisterRule(address, reg, word1_); + case Tag::Expression: + return handler->ExpressionRule( + address, reg, ImageSlice((const char*)word1_, (size_t)word2_)); + case Tag::ValExpression: + return handler->ValExpressionRule( + address, reg, ImageSlice((const char*)word1_, (size_t)word2_)); + default: + MOZ_CRASH(); + } + } + + void SetBaseRegister(unsigned reg) { + MOZ_ASSERT(isVALID()); + MOZ_ASSERT(isCanonical()); + switch (tag_) { + case Tag::ValOffset: + word1_ = reg; + break; + case Tag::Offset: + // We don't actually need SetBaseRegister or SetOffset here, since they + // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it + // doesn't make sense to use OffsetRule for computing the CFA: it + // computes the address at which a register is saved, not a value. + // (fallthrough) + case Tag::Undefined: + case Tag::SameValue: + case Tag::Register: + case Tag::Expression: + case Tag::ValExpression: + // Do nothing + break; + default: + MOZ_CRASH(); + } + } + + void SetOffset(long long offset) { + MOZ_ASSERT(isVALID()); + MOZ_ASSERT(isCanonical()); + switch (tag_) { + case Tag::ValOffset: + word2_ = offset; + break; + case Tag::Offset: + // Same comment as in SetBaseRegister applies + // (fallthrough) + case Tag::Undefined: + case Tag::SameValue: + case Tag::Register: + case Tag::Expression: + case Tag::ValExpression: + // Do nothing + break; + default: + MOZ_CRASH(); + } + } + + // For debugging only + string show() const { + char buf[100]; + string s = ""; + switch (tag_) { + case Tag::INVALID: + s = "INVALID"; + break; + case Tag::Undefined: + s = "Undefined"; + break; + case Tag::SameValue: + s = "SameValue"; + break; + case Tag::Offset: + s = "Offset{..}"; + break; + case Tag::ValOffset: + sprintf(buf, "ValOffset{reg=%d offs=%lld}", (int)word1_, + (long long int)word2_); + s = string(buf); + break; + case Tag::Register: + s = "Register{..}"; + break; + case Tag::Expression: + s = "Expression{..}"; + break; + case Tag::ValExpression: + s = "ValExpression{..}"; + break; + default: + MOZ_CRASH(); + } + return s; + } +}; + +// `RuleMapLowLevel` is a simple class that maps from `int` (register numbers) +// to `Rule`. This is implemented as a vector of `<int, Rule>` pairs, with a +// 12-element inline capacity. From a big-O perspective this is obviously a +// terrible way to implement an associative map. This workload is however +// quite special in that the maximum number of elements is normally 7 (on +// x86_64-linux), and so this implementation is much faster than one based on +// std::map with its attendant R-B-tree node allocation and balancing +// overheads. +// +// An iterator that enumerates the mapping in increasing order of the `int` +// keys is provided. This ordered iteration facility is required by +// CallFrameInfo::RuleMap::HandleTransitionTo, which needs to iterate through +// two such maps simultaneously and in-order so as to compare them. + +// All `Rule`s in the map must satisfy `isVALID()`. That conveniently means +// that `Rule::mkINVALID()` can be used to indicate "not found` in `get()`. + +class CallFrameInfo::RuleMapLowLevel { + using Entry = pair<int, Rule>; + + // The inline capacity of 12 is carefully chosen. It would be wise to make + // careful measurements of time, instruction count, allocation count and + // allocated bytes before changing it. For x86_64-linux, a value of 8 is + // marginally better; using 12 increases the total heap bytes allocated by + // around 20%. For arm64-linux, a value of 24 is better; using 12 increases + // the total blocks allocated by around 20%. But it's a not bad tradeoff + // for both targets, and in any case is vastly superior to the previous + // scheme of using `std::map`. + mozilla::Vector<Entry, 12> entries_; + + public: + void clear() { entries_.clear(); } + + RuleMapLowLevel() { clear(); } + + RuleMapLowLevel& operator=(const RuleMapLowLevel& rhs) { + entries_.clear(); + for (size_t i = 0; i < rhs.entries_.length(); i++) { + bool ok = entries_.append(rhs.entries_[i]); + MOZ_RELEASE_ASSERT(ok); + } + return *this; + } + + void set(int reg, Rule rule) { + MOZ_ASSERT(rule.isVALID()); + // Find the place where it should go, if any + size_t i = 0; + size_t nEnt = entries_.length(); + while (i < nEnt && entries_[i].first < reg) { + i++; + } + if (i == nEnt) { + // No entry exists, and all the existing ones are for lower register + // numbers. So just add it at the end. + bool ok = entries_.append(Entry(reg, rule)); + MOZ_RELEASE_ASSERT(ok); + } else { + // It needs to live at location `i`, and .. + MOZ_ASSERT(i < nEnt); + if (entries_[i].first == reg) { + // .. there's already an old entry, so just update it. + entries_[i].second = rule; + } else { + // .. there's no previous entry, so shift `i` and all those following + // it one place to the right, and put the new entry at `i`. Doing it + // manually is measurably cheaper than using `Vector::insert`. + MOZ_ASSERT(entries_[i].first > reg); + bool ok = entries_.append(Entry(999999, Rule::mkINVALID())); + MOZ_RELEASE_ASSERT(ok); + for (size_t j = nEnt; j >= i + 1; j--) { + entries_[j] = entries_[j - 1]; + } + entries_[i] = Entry(reg, rule); + } + } + // Check in-order-ness and validity. + for (size_t i = 0; i < entries_.length(); i++) { + MOZ_ASSERT(entries_[i].second.isVALID()); + MOZ_ASSERT_IF(i > 0, entries_[i - 1].first < entries_[i].first); + } + MOZ_ASSERT(get(reg).isVALID()); + } + + // Find the entry for `reg`, or return `Rule::mkINVALID()` if not found. + Rule get(int reg) const { + size_t nEnt = entries_.length(); + // "early exit" in the case where `entries_[i].first > reg` was tested on + // x86_64 and found to be slightly slower than just testing all entries, + // presumably because the reduced amount of searching was not offset by + // the cost of an extra test per iteration. + for (size_t i = 0; i < nEnt; i++) { + if (entries_[i].first == reg) { + CallFrameInfo::Rule ret = entries_[i].second; + MOZ_ASSERT(ret.isVALID()); + return ret; + } + } + return CallFrameInfo::Rule::mkINVALID(); + } + + // A very simple in-order iteration facility. + class Iter { + const RuleMapLowLevel* rmll_; + size_t nextIx_; + + public: + explicit Iter(const RuleMapLowLevel* rmll) : rmll_(rmll), nextIx_(0) {} + bool avail() const { return nextIx_ < rmll_->entries_.length(); } + bool finished() const { return !avail(); } + // Move the iterator to the next entry. + void step() { + MOZ_RELEASE_ASSERT(nextIx_ < rmll_->entries_.length()); + nextIx_++; + } + // Get the value at the current iteration point, but don't advance to the + // next entry. + pair<int, Rule> peek() { + MOZ_RELEASE_ASSERT(nextIx_ < rmll_->entries_.length()); + return rmll_->entries_[nextIx_]; + } + }; +}; + +// A map from register numbers to rules. This is a wrapper around +// `RuleMapLowLevel`, with added logic for dealing with the "special" CFA +// rule, and with `HandleTransitionTo`, which effectively computes the +// difference between two `RuleMaps`. + +class CallFrameInfo::RuleMap { + public: + RuleMap() : cfa_rule_(Rule::mkINVALID()) {} + RuleMap(const RuleMap& rhs) : cfa_rule_(Rule::mkINVALID()) { *this = rhs; } + ~RuleMap() { Clear(); } + + RuleMap& operator=(const RuleMap& rhs); + + // Set the rule for computing the CFA to RULE. + void SetCFARule(Rule rule) { cfa_rule_ = rule; } + + // Return the current CFA rule. Be careful not to modify it -- it's returned + // by value. If you want to modify the CFA rule, use CFARuleRef() instead. + // We use these two for DW_CFA_def_cfa_offset and DW_CFA_def_cfa_register, + // and for detecting references to the CFA before a rule for it has been + // established. + Rule CFARule() const { return cfa_rule_; } + Rule* CFARuleRef() { return &cfa_rule_; } + + // Return the rule for REG, or the INVALID rule if there is none. + Rule RegisterRule(int reg) const; + + // Set the rule for computing REG to RULE. + void SetRegisterRule(int reg, Rule rule); + + // Make all the appropriate calls to HANDLER as if we were changing from + // this RuleMap to NEW_RULES at ADDRESS. We use this to implement + // DW_CFA_restore_state, where lots of rules can change simultaneously. + // Return true if all handlers returned true; otherwise, return false. + bool HandleTransitionTo(Handler* handler, uint64 address, + const RuleMap& new_rules) const; + + private: + // Remove all register rules and clear cfa_rule_. + void Clear(); + + // The rule for computing the canonical frame address. + Rule cfa_rule_; + + // A map from register numbers to postfix expressions to recover + // their values. + RuleMapLowLevel registers_; +}; + +CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) { + Clear(); + if (rhs.cfa_rule_.isVALID()) cfa_rule_ = rhs.cfa_rule_; + registers_ = rhs.registers_; + return *this; +} + +CallFrameInfo::Rule CallFrameInfo::RuleMap::RegisterRule(int reg) const { + MOZ_ASSERT(reg != Handler::kCFARegister); + return registers_.get(reg); +} + +void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule rule) { + MOZ_ASSERT(reg != Handler::kCFARegister); + MOZ_ASSERT(rule.isVALID()); + registers_.set(reg, rule); +} + +bool CallFrameInfo::RuleMap::HandleTransitionTo( + Handler* handler, uint64 address, const RuleMap& new_rules) const { + // Transition from cfa_rule_ to new_rules.cfa_rule_. + if (cfa_rule_.isVALID() && new_rules.cfa_rule_.isVALID()) { + if (cfa_rule_ != new_rules.cfa_rule_ && + !new_rules.cfa_rule_.Handle(handler, address, Handler::kCFARegister)) { + return false; + } + } else if (cfa_rule_.isVALID()) { + // this RuleMap has a CFA rule but new_rules doesn't. + // CallFrameInfo::Handler has no way to handle this --- and shouldn't; + // it's garbage input. The instruction interpreter should have + // detected this and warned, so take no action here. + } else if (new_rules.cfa_rule_.isVALID()) { + // This shouldn't be possible: NEW_RULES is some prior state, and + // there's no way to remove entries. + MOZ_ASSERT(0); + } else { + // Both CFA rules are empty. No action needed. + } + + // Traverse the two maps in order by register number, and report + // whatever differences we find. + RuleMapLowLevel::Iter old_it(®isters_); + RuleMapLowLevel::Iter new_it(&new_rules.registers_); + while (!old_it.finished() && !new_it.finished()) { + pair<int, Rule> old_pair = old_it.peek(); + pair<int, Rule> new_pair = new_it.peek(); + if (old_pair.first < new_pair.first) { + // This RuleMap has an entry for old.first, but NEW_RULES doesn't. + // + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + if (!handler->SameValueRule(address, old_pair.first)) { + return false; + } + old_it.step(); + } else if (old_pair.first > new_pair.first) { + // NEW_RULES has an entry for new_pair.first, but this RuleMap + // doesn't. This shouldn't be possible: NEW_RULES is some prior + // state, and there's no way to remove entries. + MOZ_ASSERT(0); + } else { + // Both maps have an entry for this register. Report the new + // rule if it is different. + if (old_pair.second != new_pair.second && + !new_pair.second.Handle(handler, address, new_pair.first)) { + return false; + } + new_it.step(); + old_it.step(); + } + } + // Finish off entries from this RuleMap with no counterparts in new_rules. + while (!old_it.finished()) { + pair<int, Rule> old_pair = old_it.peek(); + if (!handler->SameValueRule(address, old_pair.first)) return false; + old_it.step(); + } + // Since we only make transitions from a rule set to some previously + // saved rule set, and we can only add rules to the map, NEW_RULES + // must have fewer rules than *this. + MOZ_ASSERT(new_it.finished()); + + return true; +} + +// Remove all register rules and clear cfa_rule_. +void CallFrameInfo::RuleMap::Clear() { + cfa_rule_ = Rule::mkINVALID(); + registers_.clear(); +} + +// The state of the call frame information interpreter as it processes +// instructions from a CIE and FDE. +class CallFrameInfo::State { + public: + // Create a call frame information interpreter state with the given + // reporter, reader, handler, and initial call frame info address. + State(ByteReader* reader, Handler* handler, Reporter* reporter, + uint64 address) + : reader_(reader), + handler_(handler), + reporter_(reporter), + address_(address), + entry_(NULL), + cursor_(NULL), + saved_rules_(NULL) {} + + ~State() { + if (saved_rules_) delete saved_rules_; + } + + // Interpret instructions from CIE, save the resulting rule set for + // DW_CFA_restore instructions, and return true. On error, report + // the problem to reporter_ and return false. + bool InterpretCIE(const CIE& cie); + + // Interpret instructions from FDE, and return true. On error, + // report the problem to reporter_ and return false. + bool InterpretFDE(const FDE& fde); + + private: + // The operands of a CFI instruction, for ParseOperands. + struct Operands { + unsigned register_number; // A register number. + uint64 offset; // An offset or address. + long signed_offset; // A signed offset. + ImageSlice expression; // A DWARF expression. + }; + + // Parse CFI instruction operands from STATE's instruction stream as + // described by FORMAT. On success, populate OPERANDS with the + // results, and return true. On failure, report the problem and + // return false. + // + // Each character of FORMAT should be one of the following: + // + // 'r' unsigned LEB128 register number (OPERANDS->register_number) + // 'o' unsigned LEB128 offset (OPERANDS->offset) + // 's' signed LEB128 offset (OPERANDS->signed_offset) + // 'a' machine-size address (OPERANDS->offset) + // (If the CIE has a 'z' augmentation string, 'a' uses the + // encoding specified by the 'R' argument.) + // '1' a one-byte offset (OPERANDS->offset) + // '2' a two-byte offset (OPERANDS->offset) + // '4' a four-byte offset (OPERANDS->offset) + // '8' an eight-byte offset (OPERANDS->offset) + // 'e' a DW_FORM_block holding a (OPERANDS->expression) + // DWARF expression + bool ParseOperands(const char* format, Operands* operands); + + // Interpret one CFI instruction from STATE's instruction stream, update + // STATE, report any rule changes to handler_, and return true. On + // failure, report the problem and return false. + MOZ_ALWAYS_INLINE bool DoInstruction(); + + // Repeatedly call `DoInstruction`, until either: + // * it returns `false`, which indicates some kind of failure, + // in which case return `false` from here too, or + // * we've run out of instructions (that is, `cursor_ >= entry_->end`), + // in which case return `true`. + // This is marked as never-inline because it is the only place that + // `DoInstruction` is called from, and we want to maximise the chances that + // `DoInstruction` is inlined into this routine. + MOZ_NEVER_INLINE bool DoInstructions(); + + // The following Do* member functions are subroutines of DoInstruction, + // factoring out the actual work of operations that have several + // different encodings. + + // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and + // return true. On failure, report and return false. (Used for + // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.) + bool DoDefCFA(unsigned base_register, long offset); + + // Change the offset of the CFA rule to OFFSET, and return true. On + // failure, report and return false. (Subroutine for + // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.) + bool DoDefCFAOffset(long offset); + + // Specify that REG can be recovered using RULE, and return true. On + // failure, report and return false. + bool DoRule(unsigned reg, Rule rule); + + // Specify that REG can be found at OFFSET from the CFA, and return true. + // On failure, report and return false. (Subroutine for DW_CFA_offset, + // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.) + bool DoOffset(unsigned reg, long offset); + + // Specify that the caller's value for REG is the CFA plus OFFSET, + // and return true. On failure, report and return false. (Subroutine + // for DW_CFA_val_offset and DW_CFA_val_offset_sf.) + bool DoValOffset(unsigned reg, long offset); + + // Restore REG to the rule established in the CIE, and return true. On + // failure, report and return false. (Subroutine for DW_CFA_restore and + // DW_CFA_restore_extended.) + bool DoRestore(unsigned reg); + + // Return the section offset of the instruction at cursor. For use + // in error messages. + uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); } + + // Report that entry_ is incomplete, and return false. For brevity. + bool ReportIncomplete() { + reporter_->Incomplete(entry_->offset, entry_->kind); + return false; + } + + // For reading multi-byte values with the appropriate endianness. + ByteReader* reader_; + + // The handler to which we should report the data we find. + Handler* handler_; + + // For reporting problems in the info we're parsing. + Reporter* reporter_; + + // The code address to which the next instruction in the stream applies. + uint64 address_; + + // The entry whose instructions we are currently processing. This is + // first a CIE, and then an FDE. + const Entry* entry_; + + // The next instruction to process. + const char* cursor_; + + // The current set of rules. + RuleMap rules_; + + // The set of rules established by the CIE, used by DW_CFA_restore + // and DW_CFA_restore_extended. We set this after interpreting the + // CIE's instructions. + RuleMap cie_rules_; + + // A stack of saved states, for DW_CFA_remember_state and + // DW_CFA_restore_state. + std::stack<RuleMap>* saved_rules_; +}; + +bool CallFrameInfo::State::InterpretCIE(const CIE& cie) { + entry_ = &cie; + cursor_ = entry_->instructions; + if (!DoInstructions()) { + return false; + } + // Note the rules established by the CIE, for use by DW_CFA_restore + // and DW_CFA_restore_extended. + cie_rules_ = rules_; + return true; +} + +bool CallFrameInfo::State::InterpretFDE(const FDE& fde) { + entry_ = &fde; + cursor_ = entry_->instructions; + return DoInstructions(); +} + +bool CallFrameInfo::State::ParseOperands(const char* format, + Operands* operands) { + size_t len; + const char* operand; + + for (operand = format; *operand; operand++) { + size_t bytes_left = entry_->end - cursor_; + switch (*operand) { + case 'r': + operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'o': + operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 's': + operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'a': + operands->offset = reader_->ReadEncodedPointer( + cursor_, entry_->cie->pointer_encoding, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case '1': + if (1 > bytes_left) return ReportIncomplete(); + operands->offset = static_cast<unsigned char>(*cursor_++); + break; + + case '2': + if (2 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadTwoBytes(cursor_); + cursor_ += 2; + break; + + case '4': + if (4 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadFourBytes(cursor_); + cursor_ += 4; + break; + + case '8': + if (8 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadEightBytes(cursor_); + cursor_ += 8; + break; + + case 'e': { + size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left || expression_length > bytes_left - len) + return ReportIncomplete(); + cursor_ += len; + operands->expression = ImageSlice(cursor_, expression_length); + cursor_ += expression_length; + break; + } + + default: + MOZ_ASSERT(0); + } + } + + return true; +} + +MOZ_ALWAYS_INLINE +bool CallFrameInfo::State::DoInstruction() { + CIE* cie = entry_->cie; + Operands ops; + + // Our entry's kind should have been set by now. + MOZ_ASSERT(entry_->kind != kUnknown); + + // We shouldn't have been invoked unless there were more + // instructions to parse. + MOZ_ASSERT(cursor_ < entry_->end); + + unsigned opcode = *cursor_++; + if ((opcode & 0xc0) != 0) { + switch (opcode & 0xc0) { + // Advance the address. + case DW_CFA_advance_loc: { + size_t code_offset = opcode & 0x3f; + address_ += code_offset * cie->code_alignment_factor; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset: + if (!ParseOperands("o", &ops) || + !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor)) + return false; + break; + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore: + if (!DoRestore(opcode & 0x3f)) return false; + break; + + // The 'if' above should have excluded this possibility. + default: + MOZ_ASSERT(0); + } + + // Return here, so the big switch below won't be indented. + return true; + } + + switch (opcode) { + // Set the address. + case DW_CFA_set_loc: + if (!ParseOperands("a", &ops)) return false; + address_ = ops.offset; + break; + + // Advance the address. + case DW_CFA_advance_loc1: + if (!ParseOperands("1", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc2: + if (!ParseOperands("2", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc4: + if (!ParseOperands("4", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_MIPS_advance_loc8: + if (!ParseOperands("8", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa: + if (!ParseOperands("ro", &ops) || + !DoDefCFA(ops.register_number, ops.offset)) + return false; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa_sf: + if (!ParseOperands("rs", &ops) || + !DoDefCFA(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Change the base register used to compute the CFA. + case DW_CFA_def_cfa_register: { + Rule* cfa_rule = rules_.CFARuleRef(); + if (!cfa_rule->isVALID()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + if (!ParseOperands("r", &ops)) return false; + cfa_rule->SetBaseRegister(ops.register_number); + if (!cfa_rule->Handle(handler_, address_, Handler::kCFARegister)) + return false; + break; + } + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset: + if (!ParseOperands("o", &ops) || !DoDefCFAOffset(ops.offset)) + return false; + break; + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset_sf: + if (!ParseOperands("s", &ops) || + !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Specify an expression whose value is the CFA. + case DW_CFA_def_cfa_expression: { + if (!ParseOperands("e", &ops)) return false; + Rule rule = Rule::mkValExpressionRule(ops.expression); + rules_.SetCFARule(rule); + if (!rule.Handle(handler_, address_, Handler::kCFARegister)) return false; + break; + } + + // The register's value cannot be recovered. + case DW_CFA_undefined: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, Rule::mkUndefinedRule())) + return false; + break; + } + + // The register's value is unchanged from its value in the caller. + case DW_CFA_same_value: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, Rule::mkSameValueRule())) + return false; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_offset_extended_sf: + if (!ParseOperands("rs", &ops) || + !DoOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_GNU_negative_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + -ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset: + if (!ParseOperands("ro", &ops) || + !DoValOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset_sf: + if (!ParseOperands("rs", &ops) || + !DoValOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register has been saved in another register. + case DW_CFA_register: { + if (!ParseOperands("ro", &ops) || + !DoRule(ops.register_number, Rule::mkRegisterRule(ops.offset))) + return false; + break; + } + + // An expression yields the address at which the register is saved. + case DW_CFA_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, Rule::mkExpressionRule(ops.expression))) + return false; + break; + } + + // An expression yields the caller's value for the register. + case DW_CFA_val_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, + Rule::mkValExpressionRule(ops.expression))) + return false; + break; + } + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore_extended: + if (!ParseOperands("r", &ops) || !DoRestore(ops.register_number)) + return false; + break; + + // Save the current set of rules on a stack. + case DW_CFA_remember_state: + if (!saved_rules_) { + saved_rules_ = new std::stack<RuleMap>(); + } + saved_rules_->push(rules_); + break; + + // Pop the current set of rules off the stack. + case DW_CFA_restore_state: { + if (!saved_rules_ || saved_rules_->empty()) { + reporter_->EmptyStateStack(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + const RuleMap& new_rules = saved_rules_->top(); + if (rules_.CFARule().isVALID() && !new_rules.CFARule().isVALID()) { + reporter_->ClearingCFARule(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + rules_.HandleTransitionTo(handler_, address_, new_rules); + rules_ = new_rules; + saved_rules_->pop(); + break; + } + + // No operation. (Padding instruction.) + case DW_CFA_nop: + break; + + // A SPARC register window save: Registers 8 through 15 (%o0-%o7) + // are saved in registers 24 through 31 (%i0-%i7), and registers + // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets + // (0-15 * the register size). The register numbers must be + // hard-coded. A GNU extension, and not a pretty one. + case DW_CFA_GNU_window_save: { + // Save %o0-%o7 in %i0-%i7. + for (int i = 8; i < 16; i++) + if (!DoRule(i, Rule::mkRegisterRule(i + 16))) return false; + // Save %l0-%l7 and %i0-%i7 at the CFA. + for (int i = 16; i < 32; i++) + // Assume that the byte reader's address size is the same as + // the architecture's register size. !@#%*^ hilarious. + if (!DoRule(i, Rule::mkOffsetRule(Handler::kCFARegister, + (i - 16) * reader_->AddressSize()))) + return false; + break; + } + + // I'm not sure what this is. GDB doesn't use it for unwinding. + case DW_CFA_GNU_args_size: + if (!ParseOperands("o", &ops)) return false; + break; + + // An opcode we don't recognize. + default: { + reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } + + return true; +} + +// See declaration above for rationale re the no-inline directive. +MOZ_NEVER_INLINE +bool CallFrameInfo::State::DoInstructions() { + while (cursor_ < entry_->end) { + if (!DoInstruction()) { + return false; + } + } + return true; +} + +bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) { + Rule rule = Rule::mkValOffsetRule(base_register, offset); + rules_.SetCFARule(rule); + return rule.Handle(handler_, address_, Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoDefCFAOffset(long offset) { + Rule* cfa_rule = rules_.CFARuleRef(); + if (!cfa_rule->isVALID()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + cfa_rule->SetOffset(offset); + return cfa_rule->Handle(handler_, address_, Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoRule(unsigned reg, Rule rule) { + rules_.SetRegisterRule(reg, rule); + return rule.Handle(handler_, address_, reg); +} + +bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) { + if (!rules_.CFARule().isVALID()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + Rule rule = Rule::mkOffsetRule(Handler::kCFARegister, offset); + return DoRule(reg, rule); +} + +bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) { + if (!rules_.CFARule().isVALID()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, Rule::mkValOffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoRestore(unsigned reg) { + // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE. + if (entry_->kind == kCIE) { + reporter_->RestoreInCIE(entry_->offset, CursorOffset()); + return false; + } + Rule rule = cie_rules_.RegisterRule(reg); + if (!rule.isVALID()) { + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + rule = Rule::mkSameValueRule(); + } + return DoRule(reg, rule); +} + +bool CallFrameInfo::ReadEntryPrologue(const char* cursor, Entry* entry) { + const char* buffer_end = buffer_ + buffer_length_; + + // Initialize enough of ENTRY for use in error reporting. + entry->offset = cursor - buffer_; + entry->start = cursor; + entry->kind = kUnknown; + entry->end = NULL; + + // Read the initial length. This sets reader_'s offset size. + size_t length_size; + uint64 length = reader_->ReadInitialLength(cursor, &length_size); + if (length_size > size_t(buffer_end - cursor)) return ReportIncomplete(entry); + cursor += length_size; + + // In a .eh_frame section, a length of zero marks the end of the series + // of entries. + if (length == 0 && eh_frame_) { + entry->kind = kTerminator; + entry->end = cursor; + return true; + } + + // Validate the length. + if (length > size_t(buffer_end - cursor)) return ReportIncomplete(entry); + + // The length is the number of bytes after the initial length field; + // we have that position handy at this point, so compute the end + // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine, + // and the length didn't fit in a size_t, we would have rejected it + // above.) + entry->end = cursor + length; + + // Parse the next field: either the offset of a CIE or a CIE id. + size_t offset_size = reader_->OffsetSize(); + if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); + entry->id = reader_->ReadOffset(cursor); + + // Don't advance cursor past id field yet; in .eh_frame data we need + // the id's position to compute the section offset of an FDE's CIE. + + // Now we can decide what kind of entry this is. + if (eh_frame_) { + // In .eh_frame data, an ID of zero marks the entry as a CIE, and + // anything else is an offset from the id field of the FDE to the start + // of the CIE. + if (entry->id == 0) { + entry->kind = kCIE; + } else { + entry->kind = kFDE; + // Turn the offset from the id into an offset from the buffer's start. + entry->id = (cursor - buffer_) - entry->id; + } + } else { + // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the + // offset size for the entry) marks the entry as a CIE, and anything + // else is the offset of the CIE from the beginning of the section. + if (offset_size == 4) + entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; + else { + MOZ_ASSERT(offset_size == 8); + entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + } + } + + // Now advance cursor past the id. + cursor += offset_size; + + // The fields specific to this kind of entry start here. + entry->fields = cursor; + + entry->cie = NULL; + + return true; +} + +bool CallFrameInfo::ReadCIEFields(CIE* cie) { + const char* cursor = cie->fields; + size_t len; + + MOZ_ASSERT(cie->kind == kCIE); + + // Prepare for early exit. + cie->version = 0; + cie->augmentation.clear(); + cie->code_alignment_factor = 0; + cie->data_alignment_factor = 0; + cie->return_address_register = 0; + cie->has_z_augmentation = false; + cie->pointer_encoding = DW_EH_PE_absptr; + cie->instructions = 0; + + // Parse the version number. + if (cie->end - cursor < 1) return ReportIncomplete(cie); + cie->version = reader_->ReadOneByte(cursor); + cursor++; + + // If we don't recognize the version, we can't parse any more fields of the + // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a + // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well; + // the difference between those versions seems to be the same as for + // .debug_frame. + if (cie->version < 1 || cie->version > 4) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } + + const char* augmentation_start = cursor; + const void* augmentation_end = + memchr(augmentation_start, '\0', cie->end - augmentation_start); + if (!augmentation_end) return ReportIncomplete(cie); + cursor = static_cast<const char*>(augmentation_end); + cie->augmentation = string(augmentation_start, cursor - augmentation_start); + // Skip the terminating '\0'. + cursor++; + + // Is this CFI augmented? + if (!cie->augmentation.empty()) { + // Is it an augmentation we recognize? + if (cie->augmentation[0] == DW_Z_augmentation_start) { + // Linux C++ ABI 'z' augmentation, used for exception handling data. + cie->has_z_augmentation = true; + } else { + // Not an augmentation we recognize. Augmentations can have arbitrary + // effects on the form of rest of the content, so we have to give up. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + + if (cie->version >= 4) { + // Check that the address_size and segment_size fields are plausible. + if (cie->end - cursor < 2) { + return ReportIncomplete(cie); + } + uint8_t address_size = reader_->ReadOneByte(cursor); + cursor++; + if (address_size != sizeof(void*)) { + // This is not per-se invalid CFI. But we can reasonably expect to + // be running on a target of the same word size as the CFI is for, + // so we reject this case. + reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid address_size"); + return false; + } + uint8_t segment_size = reader_->ReadOneByte(cursor); + cursor++; + if (segment_size != 0) { + // This is also not per-se invalid CFI, but we don't currently handle + // the case of non-zero |segment_size|. + reporter_->InvalidDwarf4Artefact(cie->offset, "Invalid segment_size"); + return false; + } + // We only continue parsing if |segment_size| is zero. If this routine + // is ever changed to allow non-zero |segment_size|, then + // ReadFDEFields() below will have to be changed to match, per comments + // there. + } + + // Parse the code alignment factor. + cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the data alignment factor. + cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the return address register. This is a ubyte in version 1, and + // a ULEB128 in version 3. + if (cie->version == 1) { + if (cursor >= cie->end) return ReportIncomplete(cie); + cie->return_address_register = uint8(*cursor++); + } else { + cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + } + + // If we have a 'z' augmentation string, find the augmentation data and + // use the augmentation string to parse it. + if (cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len + data_size) + return ReportIncomplete(cie); + cursor += len; + const char* data = cursor; + cursor += data_size; + const char* data_end = cursor; + + cie->has_z_lsda = false; + cie->has_z_personality = false; + cie->has_z_signal_frame = false; + + // Walk the augmentation string, and extract values from the + // augmentation data as the string directs. + for (size_t i = 1; i < cie->augmentation.size(); i++) { + switch (cie->augmentation[i]) { + case DW_Z_has_LSDA: + // The CIE's augmentation data holds the language-specific data + // area pointer's encoding, and the FDE's augmentation data holds + // the pointer itself. + cie->has_z_lsda = true; + // Fetch the LSDA encoding from the augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->lsda_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->lsda_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); + return false; + } + // Don't check if the encoding is usable here --- we haven't + // read the FDE's fields yet, so we're not prepared for + // DW_EH_PE_funcrel, although that's a fine encoding for the + // LSDA to use, since it appears in the FDE. + break; + + case DW_Z_has_personality_routine: + // The CIE's augmentation data holds the personality routine + // pointer's encoding, followed by the pointer itself. + cie->has_z_personality = true; + // Fetch the personality routine pointer's encoding from the + // augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->personality_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->personality_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->personality_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + // Fetch the personality routine's pointer itself from the data. + cie->personality_address = reader_->ReadEncodedPointer( + data, cie->personality_encoding, &len); + if (len > size_t(data_end - data)) return ReportIncomplete(cie); + data += len; + break; + + case DW_Z_has_FDE_address_encoding: + // The CIE's augmentation data holds the pointer encoding to use + // for addresses in the FDE. + if (data >= data_end) return ReportIncomplete(cie); + cie->pointer_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->pointer_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->pointer_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + break; + + case DW_Z_is_signal_trampoline: + // Frames using this CIE are signal delivery frames. + cie->has_z_signal_frame = true; + break; + + default: + // An augmentation we don't recognize. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + } + + // The CIE's instructions start here. + cie->instructions = cursor; + + return true; +} + +bool CallFrameInfo::ReadFDEFields(FDE* fde) { + const char* cursor = fde->fields; + size_t size; + + // At this point, for Dwarf 4 and above, we are assuming that the + // associated CIE has its |segment_size| field equal to zero. This is + // checked for in ReadCIEFields() above. If ReadCIEFields() is ever + // changed to allow non-zero |segment_size| CIEs then we will have to read + // the segment_selector value at this point. + + fde->address = + reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, &size); + if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde); + cursor += size; + reader_->SetFunctionBase(fde->address); + + // For the length, we strip off the upper nybble of the encoding used for + // the starting address. + DwarfPointerEncoding length_encoding = + DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); + fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); + if (size > size_t(fde->end - cursor)) return ReportIncomplete(fde); + cursor += size; + + // If the CIE has a 'z' augmentation string, then augmentation data + // appears here. + if (fde->cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); + if (size_t(fde->end - cursor) < size + data_size) + return ReportIncomplete(fde); + cursor += size; + + // In the abstract, we should walk the augmentation string, and extract + // items from the FDE's augmentation data as we encounter augmentation + // string characters that specify their presence: the ordering of items + // in the augmentation string determines the arrangement of values in + // the augmentation data. + // + // In practice, there's only ever one value in FDE augmentation data + // that we support --- the LSDA pointer --- and we have to bail if we + // see any unrecognized augmentation string characters. So if there is + // anything here at all, we know what it is, and where it starts. + if (fde->cie->has_z_lsda) { + // Check whether the LSDA's pointer encoding is usable now: only once + // we've parsed the FDE's starting address do we call reader_-> + // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes + // usable. + if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { + reporter_->UnusablePointerEncoding(fde->cie->offset, + fde->cie->lsda_encoding); + return false; + } + + fde->lsda_address = + reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); + if (size > data_size) return ReportIncomplete(fde); + // Ideally, we would also complain here if there were unconsumed + // augmentation data. + } + + cursor += data_size; + } + + // The FDE's instructions start after those. + fde->instructions = cursor; + + return true; +} + +bool CallFrameInfo::Start() { + const char* buffer_end = buffer_ + buffer_length_; + const char* cursor; + bool all_ok = true; + const char* entry_end; + bool ok; + + // Traverse all the entries in buffer_, skipping CIEs and offering + // FDEs to the handler. + for (cursor = buffer_; cursor < buffer_end; + cursor = entry_end, all_ok = all_ok && ok) { + FDE fde; + + // Make it easy to skip this entry with 'continue': assume that + // things are not okay until we've checked all the data, and + // prepare the address of the next entry. + ok = false; + + // Read the entry's prologue. + if (!ReadEntryPrologue(cursor, &fde)) { + if (!fde.end) { + // If we couldn't even figure out this entry's extent, then we + // must stop processing entries altogether. + all_ok = false; + break; + } + entry_end = fde.end; + continue; + } + + // The next iteration picks up after this entry. + entry_end = fde.end; + + // Did we see an .eh_frame terminating mark? + if (fde.kind == kTerminator) { + // If there appears to be more data left in the section after the + // terminating mark, warn the user. But this is just a warning; + // we leave all_ok true. + if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); + break; + } + + // In this loop, we skip CIEs. We only parse them fully when we + // parse an FDE that refers to them. This limits our memory + // consumption (beyond the buffer itself) to that needed to + // process the largest single entry. + if (fde.kind != kFDE) { + ok = true; + continue; + } + + // Validate the CIE pointer. + if (fde.id > buffer_length_) { + reporter_->CIEPointerOutOfRange(fde.offset, fde.id); + continue; + } + + CIE cie; + + // Parse this FDE's CIE header. + if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) continue; + // This had better be an actual CIE. + if (cie.kind != kCIE) { + reporter_->BadCIEId(fde.offset, fde.id); + continue; + } + if (!ReadCIEFields(&cie)) continue; + + // We now have the values that govern both the CIE and the FDE. + cie.cie = &cie; + fde.cie = &cie; + + // Parse the FDE's header. + if (!ReadFDEFields(&fde)) continue; + + // Call Entry to ask the consumer if they're interested. + if (!handler_->Entry(fde.offset, fde.address, fde.size, cie.version, + cie.augmentation, cie.return_address_register)) { + // The handler isn't interested in this entry. That's not an error. + ok = true; + continue; + } + + if (cie.has_z_augmentation) { + // Report the personality routine address, if we have one. + if (cie.has_z_personality) { + if (!handler_->PersonalityRoutine( + cie.personality_address, + IsIndirectEncoding(cie.personality_encoding))) + continue; + } + + // Report the language-specific data area address, if we have one. + if (cie.has_z_lsda) { + if (!handler_->LanguageSpecificDataArea( + fde.lsda_address, IsIndirectEncoding(cie.lsda_encoding))) + continue; + } + + // If this is a signal-handling frame, report that. + if (cie.has_z_signal_frame) { + if (!handler_->SignalHandler()) continue; + } + } + + // Interpret the CIE's instructions, and then the FDE's instructions. + State state(reader_, handler_, reporter_, fde.address); + ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); + + // Tell the ByteReader that the function start address from the + // FDE header is no longer valid. + reader_->ClearFunctionBase(); + + // Report the end of the entry. + handler_->End(); + } + + return all_ok; +} + +const char* CallFrameInfo::KindName(EntryKind kind) { + if (kind == CallFrameInfo::kUnknown) + return "entry"; + else if (kind == CallFrameInfo::kCIE) + return "common information entry"; + else if (kind == CallFrameInfo::kFDE) + return "frame description entry"; + else { + MOZ_ASSERT(kind == CallFrameInfo::kTerminator); + return ".eh_frame sequence terminator"; + } +} + +bool CallFrameInfo::ReportIncomplete(Entry* entry) { + reporter_->Incomplete(entry->offset, entry->kind); + return false; +} + +void CallFrameInfo::Reporter::Incomplete(uint64 offset, + CallFrameInfo::EntryKind kind) { + char buf[300]; + SprintfLiteral(buf, "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker" + " before end of section contents\n", + filename_.c_str(), offset, section_.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset, + uint64 cie_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer is out of range: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer does not point to a CIE: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized version: %d\n", + filename_.c_str(), offset, section_.c_str(), version); + log_(buf); +} + +void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset, + const string& aug) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized augmentation: '%s'\n", + filename_.c_str(), offset, section_.c_str(), aug.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::InvalidDwarf4Artefact(uint64 offset, + const char* what) { + char* what_safe = strndup(what, 100); + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies invalid Dwarf4 artefact: %s\n", + filename_.c_str(), offset, section_.c_str(), what_safe); + log_(buf); + free(what_safe); +} + +void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset, + uint8 encoding) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies invalid pointer encoding: " + "0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); + log_(buf); +} + +void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset, + uint8 encoding) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies a pointer encoding for which" + " we have no base address: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); + log_(buf); +} + +void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " the DW_CFA_restore instruction at offset 0x%llx" + " cannot be used in a common information entry\n", + filename_.c_str(), offset, section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::BadInstruction(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx is unrecognized\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::NoCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx assumes that a CFA rule " + "has been set, but none has been set\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " should pop a saved state from the stack, but the stack " + "is empty\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " would clear the CFA rule in effect\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +unsigned int DwarfCFIToModule::RegisterNames::I386() { + /* + 8 "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi", + 3 "$eip", "$eflags", "$unused1", + 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + 2 "$unused2", "$unused3", + 8 "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + 3 "$fcw", "$fsw", "$mxcsr", + 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5", + 2 "$tr", "$ldtr" + */ + return 8 + 3 + 8 + 2 + 8 + 8 + 3 + 8 + 2; +} + +unsigned int DwarfCFIToModule::RegisterNames::X86_64() { + /* + 8 "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp", + 8 "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", + 1 "$rip", + 8 "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + 8 "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15", + 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + 1 "$rflags", + 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2", + 4 "$fs.base", "$gs.base", "$unused3", "$unused4", + 2 "$tr", "$ldtr", + 3 "$mxcsr", "$fcw", "$fsw" + */ + return 8 + 8 + 1 + 8 + 8 + 8 + 8 + 1 + 8 + 4 + 2 + 3; +} + +// Per ARM IHI 0040A, section 3.1 +unsigned int DwarfCFIToModule::RegisterNames::ARM() { + /* + 8 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + 8 "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc", + 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + 8 "fps", "cpsr", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + 8 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + 8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + 8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7" + */ + return 13 * 8; +} + +// Per ARM IHI 0057A, section 3.1 +unsigned int DwarfCFIToModule::RegisterNames::ARM64() { + /* + 8 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + 8 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + 8 "x16" "x17", "x18", "x19", "x20", "x21", "x22", "x23", + 8 "x24", "x25", "x26", "x27", "x28", "x29", "x30","sp", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + 8 "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + 8 "v16", "v17", "v18", "v19", "v20", "v21", "v22, "v23", + 8 "v24", "x25", "x26, "x27", "v28", "v29", "v30", "v31", + */ + return 12 * 8; +} + +unsigned int DwarfCFIToModule::RegisterNames::MIPS() { + /* + 8 "$zero", "$at", "$v0", "$v1", "$a0", "$a1", "$a2", "$a3", + 8 "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + 8 "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7", + 8 "$t8", "$t9", "$k0", "$k1", "$gp", "$sp", "$fp", "$ra", + 9 "$lo", "$hi", "$pc", "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", + 8 "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", + 7 "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", + 7 "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", + 6 "$f28", "$f29", "$f30", "$f31", "$fcsr", "$fir" + */ + return 8 + 8 + 8 + 8 + 9 + 8 + 7 + 7 + 6; +} + +// See prototype for comments. +int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, + ImageSlice expr, bool debug, bool pushCfaAtStart, + bool derefAtEnd) { + const char* cursor = expr.start_; + const char* end1 = cursor + expr.length_; + + char buf[100]; + if (debug) { + SprintfLiteral(buf, "LUL.DW << DwarfExpr, len is %d\n", + (int)(end1 - cursor)); + summ->Log(buf); + } + + // Add a marker for the start of this expression. In it, indicate + // whether or not the CFA should be pushed onto the stack prior to + // evaluation. + int32_t start_ix = + summ->AddPfxInstr(PfxInstr(PX_Start, pushCfaAtStart ? 1 : 0)); + MOZ_ASSERT(start_ix >= 0); + + while (cursor < end1) { + uint8 opc = reader->ReadOneByte(cursor); + cursor++; + + const char* nm = nullptr; + PfxExprOp pxop = PX_End; + + switch (opc) { + case DW_OP_lit0 ... DW_OP_lit31: { + int32_t simm32 = (int32_t)(opc - DW_OP_lit0); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_lit%d\n", (int)simm32); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, simm32)); + break; + } + + case DW_OP_breg0 ... DW_OP_breg31: { + size_t len; + int64_t n = reader->ReadSignedLEB128(cursor, &len); + cursor += len; + DW_REG_NUMBER reg = (DW_REG_NUMBER)(opc - DW_OP_breg0); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_breg%d %lld\n", (int)reg, + (long long int)n); + summ->Log(buf); + } + // PfxInstr only allows a 32 bit signed offset. So we + // must fail if the immediate is out of range. + if (n < INT32_MIN || INT32_MAX < n) goto fail; + (void)summ->AddPfxInstr(PfxInstr(PX_DwReg, reg)); + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, (int32_t)n)); + (void)summ->AddPfxInstr(PfxInstr(PX_Add)); + break; + } + + case DW_OP_const4s: { + uint64_t u64 = reader->ReadFourBytes(cursor); + cursor += 4; + // u64 is guaranteed by |ReadFourBytes| to be in the + // range 0 .. FFFFFFFF inclusive. But to be safe: + uint32_t u32 = (uint32_t)(u64 & 0xFFFFFFFF); + int32_t s32 = (int32_t)u32; + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_const4s %d\n", (int)s32); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(PX_SImm32, s32)); + break; + } + + case DW_OP_deref: + nm = "deref"; + pxop = PX_Deref; + goto no_operands; + case DW_OP_and: + nm = "and"; + pxop = PX_And; + goto no_operands; + case DW_OP_plus: + nm = "plus"; + pxop = PX_Add; + goto no_operands; + case DW_OP_minus: + nm = "minus"; + pxop = PX_Sub; + goto no_operands; + case DW_OP_shl: + nm = "shl"; + pxop = PX_Shl; + goto no_operands; + case DW_OP_ge: + nm = "ge"; + pxop = PX_CmpGES; + goto no_operands; + no_operands: + MOZ_ASSERT(nm && pxop != PX_End); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_%s\n", nm); + summ->Log(buf); + } + (void)summ->AddPfxInstr(PfxInstr(pxop)); + break; + + default: + if (debug) { + SprintfLiteral(buf, "LUL.DW unknown opc %d\n", (int)opc); + summ->Log(buf); + } + goto fail; + + } // switch (opc) + + } // while (cursor < end1) + + MOZ_ASSERT(cursor >= end1); + + if (cursor > end1) { + // We overran the Dwarf expression. Give up. + goto fail; + } + + // For DW_CFA_expression, what the expression denotes is the address + // of where the previous value is located. The caller of this routine + // may therefore request one last dereference before the end marker is + // inserted. + if (derefAtEnd) { + (void)summ->AddPfxInstr(PfxInstr(PX_Deref)); + } + + // Insert an end marker, and declare success. + (void)summ->AddPfxInstr(PfxInstr(PX_End)); + if (debug) { + SprintfLiteral(buf, + "LUL.DW conversion of dwarf expression succeeded, " + "ix = %d\n", + (int)start_ix); + summ->Log(buf); + summ->Log("LUL.DW >>\n"); + } + return start_ix; + +fail: + if (debug) { + summ->Log("LUL.DW conversion of dwarf expression failed\n"); + summ->Log("LUL.DW >>\n"); + } + return -1; +} + +bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const string& augmentation, + unsigned return_address) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW DwarfCFIToModule::Entry 0x%llx,+%lld\n", + address, length); + summ_->Log(buf); + } + + summ_->Entry(address, length); + + // If dwarf2reader::CallFrameInfo can handle this version and + // augmentation, then we should be okay with that, so there's no + // need to check them here. + + // Get ready to collect entries. + return_address_ = return_address; + + // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI + // may not establish any rule for .ra if the return address column + // is an ordinary register, and that register holds the return + // address on entry to the function. So establish an initial .ra + // rule citing the return address register. + if (return_address_ < num_dw_regs_) { + summ_->Rule(address, return_address_, NODEREF, return_address, 0); + } + + return true; +} + +const UniqueString* DwarfCFIToModule::RegisterName(int i) { + if (i < 0) { + MOZ_ASSERT(i == kCFARegister); + return usu_->ToUniqueString(".cfa"); + } + unsigned reg = i; + if (reg == return_address_) return usu_->ToUniqueString(".ra"); + + char buf[30]; + SprintfLiteral(buf, "dwarf_reg_%u", reg); + return usu_->ToUniqueString(buf); +} + +bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) { + reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg)); + // Treat this as a non-fatal error. + return true; +} + +bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = Same\n", address, reg); + summ_->Log(buf); + } + // reg + 0 + summ_->Rule(address, reg, NODEREF, reg, 0); + return true; +} + +bool DwarfCFIToModule::OffsetRule(uint64 address, int reg, int base_register, + long offset) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = *(r%d + %ld)\n", address, + reg, base_register, offset); + summ_->Log(buf); + } + // *(base_register + offset) + summ_->Rule(address, reg, DEREF, base_register, offset); + return true; +} + +bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg, int base_register, + long offset) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d + %ld\n", address, reg, + base_register, offset); + summ_->Log(buf); + } + // base_register + offset + summ_->Rule(address, reg, NODEREF, base_register, offset); + return true; +} + +bool DwarfCFIToModule::RegisterRule(uint64 address, int reg, + int base_register) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d\n", address, reg, + base_register); + summ_->Log(buf); + } + // base_register + 0 + summ_->Rule(address, reg, NODEREF, base_register, 0); + return true; +} + +bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg, + const ImageSlice& expression) { + bool debug = !!DEBUG_DWARF; + int32_t start_ix = + parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/, + true /*derefAtEnd*/); + if (start_ix >= 0) { + summ_->Rule(address, reg, PFXEXPR, 0, start_ix); + } else { + // Parsing of the Dwarf expression failed. Treat this as a + // non-fatal error, hence return |true| even on this path. + reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg)); + } + return true; +} + +bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg, + const ImageSlice& expression) { + bool debug = !!DEBUG_DWARF; + int32_t start_ix = + parseDwarfExpr(summ_, reader_, expression, debug, true /*pushCfaAtStart*/, + false /*!derefAtEnd*/); + if (start_ix >= 0) { + summ_->Rule(address, reg, PFXEXPR, 0, start_ix); + } else { + // Parsing of the Dwarf expression failed. Treat this as a + // non-fatal error, hence return |true| even on this path. + reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg)); + } + return true; +} + +bool DwarfCFIToModule::End() { + // module_->AddStackFrameEntry(entry_); + if (DEBUG_DWARF) { + summ_->Log("LUL.DW DwarfCFIToModule::End()\n"); + } + summ_->End(); + return true; +} + +void DwarfCFIToModule::Reporter::UndefinedNotSupported( + size_t offset, const UniqueString* reg) { + char buf[300]; + SprintfLiteral(buf, "DwarfCFIToModule::Reporter::UndefinedNotSupported()\n"); + log_(buf); + // BPLOG(INFO) << file_ << ", section '" << section_ + // << "': the call frame entry at offset 0x" + // << std::setbase(16) << offset << std::setbase(10) + // << " sets the rule for register '" << FromUniqueString(reg) + // << "' to 'undefined', but the Breakpad symbol file format cannot " + // << " express this"; +} + +// FIXME: move this somewhere sensible +static bool is_power_of_2(uint64_t n) { + int i, nSetBits = 0; + for (i = 0; i < 8 * (int)sizeof(n); i++) { + if ((n & ((uint64_t)1) << i) != 0) nSetBits++; + } + return nSetBits <= 1; +} + +void DwarfCFIToModule::Reporter::ExpressionCouldNotBeSummarised( + size_t offset, const UniqueString* reg) { + static uint64_t n_complaints = 0; // This isn't threadsafe + n_complaints++; + if (!is_power_of_2(n_complaints)) return; + char buf[300]; + SprintfLiteral(buf, + "DwarfCFIToModule::Reporter::" + "ExpressionCouldNotBeSummarised(shown %llu times)\n", + (unsigned long long int)n_complaints); + log_(buf); +} + +} // namespace lul diff --git a/tools/profiler/lul/LulDwarfExt.h b/tools/profiler/lul/LulDwarfExt.h new file mode 100644 index 0000000000..4ee6fe17a8 --- /dev/null +++ b/tools/profiler/lul/LulDwarfExt.h @@ -0,0 +1,1312 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright 2006, 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/types.h +// src/common/dwarf/dwarf2enums.h +// src/common/dwarf/bytereader.h +// src/common/dwarf_cfi_to_module.h +// src/common/dwarf/dwarf2reader.h + +#ifndef LulDwarfExt_h +#define LulDwarfExt_h + +#include "LulDwarfSummariser.h" + +#include "mozilla/Assertions.h" + +#include <stdint.h> +#include <string> + +typedef signed char int8; +typedef short int16; +typedef int int32; +typedef long long int64; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long long uint64; + +#ifdef __PTRDIFF_TYPE__ +typedef __PTRDIFF_TYPE__ intptr; +typedef unsigned __PTRDIFF_TYPE__ uintptr; +#else +# error "Can't find pointer-sized integral types." +#endif + +namespace lul { + +class UniqueString; + +// This represents a read-only slice of the "image" (the temporarily mmaped-in +// .so). It is used for representing byte ranges containing Dwarf expressions. +// Note that equality (operator==) is on slice contents, not slice locations. +struct ImageSlice { + const char* start_; + size_t length_; + ImageSlice() : start_(0), length_(0) {} + ImageSlice(const char* start, size_t length) + : start_(start), length_(length) {} + // Make one from a C string (for testing only). Note, the terminating zero + // is not included in the length. + explicit ImageSlice(const char* cstring) + : start_(cstring), length_(strlen(cstring)) {} + explicit ImageSlice(const std::string& str) + : start_(str.c_str()), length_(str.length()) {} + ImageSlice(const ImageSlice& other) + : start_(other.start_), length_(other.length_) {} + ImageSlice(ImageSlice& other) + : start_(other.start_), length_(other.length_) {} + bool operator==(const ImageSlice& other) const { + if (length_ != other.length_) { + return false; + } + // This relies on the fact that that memcmp returns zero whenever length_ + // is zero. + return memcmp(start_, other.start_, length_) == 0; + } +}; + +// Exception handling frame description pointer formats, as described +// by the Linux Standard Base Core Specification 4.0, section 11.5, +// DWARF Extensions. +enum DwarfPointerEncoding { + DW_EH_PE_absptr = 0x00, + DW_EH_PE_omit = 0xff, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + + // The GNU toolchain sources define this enum value as well, + // simply to help classify the lower nybble values into signed and + // unsigned groups. + DW_EH_PE_signed = 0x08, + + // This is not documented in LSB 4.0, but it is used in both the + // Linux and OS X toolchains. It can be added to any other + // encoding (except DW_EH_PE_aligned), and indicates that the + // encoded value represents the address at which the true address + // is stored, not the true address itself. + DW_EH_PE_indirect = 0x80 +}; + +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN +// because it conflicts with a macro +enum Endianness { ENDIANNESS_BIG, ENDIANNESS_LITTLE }; + +// A ByteReader knows how to read single- and multi-byte values of +// various endiannesses, sizes, and encodings, as used in DWARF +// debugging information and Linux C++ exception handling data. +class ByteReader { + public: + // Construct a ByteReader capable of reading one-, two-, four-, and + // eight-byte values according to ENDIANNESS, absolute machine-sized + // addresses, DWARF-style "initial length" values, signed and + // unsigned LEB128 numbers, and Linux C++ exception handling data's + // encoded pointers. + explicit ByteReader(enum Endianness endianness); + virtual ~ByteReader(); + + // Read a single byte from BUFFER and return it as an unsigned 8 bit + // number. + uint8 ReadOneByte(const char* buffer) const; + + // Read two bytes from BUFFER and return them as an unsigned 16 bit + // number, using this ByteReader's endianness. + uint16 ReadTwoBytes(const char* buffer) const; + + // Read four bytes from BUFFER and return them as an unsigned 32 bit + // number, using this ByteReader's endianness. This function returns + // a uint64 so that it is compatible with ReadAddress and + // ReadOffset. The number it returns will never be outside the range + // of an unsigned 32 bit integer. + uint64 ReadFourBytes(const char* buffer) const; + + // Read eight bytes from BUFFER and return them as an unsigned 64 + // bit number, using this ByteReader's endianness. + uint64 ReadEightBytes(const char* buffer) const; + + // Read an unsigned LEB128 (Little Endian Base 128) number from + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to + // the number of bytes read. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; + + // Read a signed LEB128 number from BUFFER and return it as an + // signed 64 bit integer. Set LEN to the number of bytes read. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N in two's + // complement. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; + + // Indicate that addresses on this architecture are SIZE bytes long. SIZE + // must be either 4 or 8. (DWARF allows addresses to be any number of + // bytes in length from 1 to 255, but we only support 32- and 64-bit + // addresses at the moment.) You must call this before using the + // ReadAddress member function. + // + // For data in a .debug_info section, or something that .debug_info + // refers to like line number or macro data, the compilation unit + // header's address_size field indicates the address size to use. Call + // frame information doesn't indicate its address size (a shortcoming of + // the spec); you must supply the appropriate size based on the + // architecture of the target machine. + void SetAddressSize(uint8 size); + + // Return the current address size, in bytes. This is either 4, + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. + uint8 AddressSize() const { return address_size_; } + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer, respecting this ByteReader's endianness and address size. You + // must call SetAddressSize before calling this function. + uint64 ReadAddress(const char* buffer) const; + + // DWARF actually defines two slightly different formats: 32-bit DWARF + // and 64-bit DWARF. This is *not* related to the size of registers or + // addresses on the target machine; it refers only to the size of section + // offsets and data lengths appearing in the DWARF data. One only needs + // 64-bit DWARF when the debugging data itself is larger than 4GiB. + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the + // debugging data itself is very large. + // + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each + // compilation unit and call frame information entry begins with an + // "initial length" field, which, in addition to giving the length of the + // data, also indicates the size of section offsets and lengths appearing + // in that data. The ReadInitialLength member function, below, reads an + // initial length and sets the ByteReader's offset size as a side effect. + // Thus, in the normal process of reading DWARF data, the appropriate + // offset size is set automatically. So, you should only need to call + // SetOffsetSize if you are using the same ByteReader to jump from the + // midst of one block of DWARF data into another. + + // Read a DWARF "initial length" field from START, and return it as + // an unsigned 64 bit integer, respecting this ByteReader's + // endianness. Set *LEN to the length of the initial length in + // bytes, either four or twelve. As a side effect, set this + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF + // initial length) or 8 (if we see a 64-bit DWARF initial length). + // + // A DWARF initial length is either: + // + // - a byte count stored as an unsigned 32-bit value less than + // 0xffffff00, indicating that the data whose length is being + // measured uses the 32-bit DWARF format, or + // + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, + // indicating that the data whose length is being measured uses + // the 64-bit DWARF format. + uint64 ReadInitialLength(const char* start, size_t* len); + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes + // long. You must call ReadInitialLength or SetOffsetSize before calling + // this function; see the comments above for details. + uint64 ReadOffset(const char* buffer) const; + + // Return the current offset size, in bytes. + // A return value of 4 indicates that we are reading 32-bit DWARF. + // A return value of 8 indicates that we are reading 64-bit DWARF. + uint8 OffsetSize() const { return offset_size_; } + + // Indicate that section offsets and lengths are SIZE bytes long. SIZE + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). + // Usually, you should not call this function yourself; instead, let a + // call to ReadInitialLength establish the data's offset size + // automatically. + void SetOffsetSize(uint8 size); + + // The Linux C++ ABI uses a variant of DWARF call frame information + // for exception handling. This data is included in the program's + // address space as the ".eh_frame" section, and intepreted at + // runtime to walk the stack, find exception handlers, and run + // cleanup code. The format is mostly the same as DWARF CFI, with + // some adjustments made to provide the additional + // exception-handling data, and to make the data easier to work with + // in memory --- for example, to allow it to be placed in read-only + // memory even when describing position-independent code. + // + // In particular, exception handling data can select a number of + // different encodings for pointers that appear in the data, as + // described by the DwarfPointerEncoding enum. There are actually + // four axes(!) to the encoding: + // + // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use + // the DWARF LEB128 encoding. + // + // - The pointer's signedness: pointers can be signed or unsigned. + // + // - The pointer's base address: the data stored in the exception + // handling data can be the actual address (that is, an absolute + // pointer), or relative to one of a number of different base + // addreses --- including that of the encoded pointer itself, for + // a form of "pc-relative" addressing. + // + // - The pointer may be indirect: it may be the address where the + // true pointer is stored. (This is used to refer to things via + // global offset table entries, program linkage table entries, or + // other tricks used in position-independent code.) + // + // There are also two options that fall outside that matrix + // altogether: the pointer may be omitted, or it may have padding to + // align it on an appropriate address boundary. (That last option + // may seem like it should be just another axis, but it is not.) + + // Indicate that the exception handling data is loaded starting at + // SECTION_BASE, and that the start of its buffer in our own memory + // is BUFFER_BASE. This allows us to find the address that a given + // byte in our buffer would have when loaded into the program the + // data describes. We need this to resolve DW_EH_PE_pcrel pointers. + void SetCFIDataBase(uint64 section_base, const char* buffer_base); + + // Indicate that the base address of the program's ".text" section + // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. + void SetTextBase(uint64 text_base); + + // Indicate that the base address for DW_EH_PE_datarel pointers is + // DATA_BASE. The proper value depends on the ABI; it is usually the + // address of the global offset table, held in a designated register in + // position-independent code. You will need to look at the startup code + // for the target system to be sure. I tried; my eyes bled. + void SetDataBase(uint64 data_base); + + // Indicate that the base address for the FDE we are processing is + // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel + // pointers. (This encoding does not seem to be used by the GNU + // toolchain.) + void SetFunctionBase(uint64 function_base); + + // Indicate that we are no longer processing any FDE, so any use of + // a DW_EH_PE_funcrel encoding is an error. + void ClearFunctionBase(); + + // Return true if ENCODING is a valid pointer encoding. + bool ValidEncoding(DwarfPointerEncoding encoding) const; + + // Return true if we have all the information we need to read a + // pointer that uses ENCODING. This checks that the appropriate + // SetFooBase function for ENCODING has been called. + bool UsableEncoding(DwarfPointerEncoding encoding) const; + + // Read an encoded pointer from BUFFER using ENCODING; return the + // absolute address it represents, and set *LEN to the pointer's + // length in bytes, including any padding for aligned pointers. + // + // This function calls 'abort' if ENCODING is invalid or refers to a + // base address this reader hasn't been given, so you should check + // with ValidEncoding and UsableEncoding first if you would rather + // die in a more helpful way. + uint64 ReadEncodedPointer(const char* buffer, DwarfPointerEncoding encoding, + size_t* len) const; + + private: + // Function pointer type for our address and offset readers. + typedef uint64 (ByteReader::*AddressReader)(const char*) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + // This function pointer gets set by SetOffsetSize. + AddressReader offset_reader_; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + // This function pointer gets set by SetAddressSize. + AddressReader address_reader_; + + Endianness endian_; + uint8 address_size_; + uint8 offset_size_; + + // Base addresses for Linux C++ exception handling data's encoded pointers. + bool have_section_base_, have_text_base_, have_data_base_; + bool have_function_base_; + uint64 section_base_; + uint64 text_base_, data_base_, function_base_; + const char* buffer_base_; +}; + +inline uint8 ByteReader::ReadOneByte(const char* buffer) const { + return buffer[0]; +} + +inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast<const unsigned char*>(signed_buffer); + const uint16 buffer0 = buffer[0]; + const uint16 buffer1 = buffer[1]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8; + } else { + return buffer1 | buffer0 << 8; + } +} + +inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast<const unsigned char*>(signed_buffer); + const uint32 buffer0 = buffer[0]; + const uint32 buffer1 = buffer[1]; + const uint32 buffer2 = buffer[2]; + const uint32 buffer3 = buffer[3]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; + } else { + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; + } +} + +inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const { + const unsigned char* buffer = + reinterpret_cast<const unsigned char*>(signed_buffer); + const uint64 buffer0 = buffer[0]; + const uint64 buffer1 = buffer[1]; + const uint64 buffer2 = buffer[2]; + const uint64 buffer3 = buffer[3]; + const uint64 buffer4 = buffer[4]; + const uint64 buffer5 = buffer[5]; + const uint64 buffer6 = buffer[6]; + const uint64 buffer7 = buffer[7]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; + } else { + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; + } +} + +// Read an unsigned LEB128 number. Each byte contains 7 bits of +// information, plus one bit saying whether the number continues or +// not. + +inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, + size_t* len) const { + uint64 result = 0; + size_t num_read = 0; + unsigned int shift = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + + result |= (static_cast<uint64>(byte & 0x7f)) << shift; + + shift += 7; + + } while (byte & 0x80); + + *len = num_read; + + return result; +} + +// Read a signed LEB128 number. These are like regular LEB128 +// numbers, except the last byte may have a sign bit set. + +inline int64 ByteReader::ReadSignedLEB128(const char* buffer, + size_t* len) const { + int64 result = 0; + unsigned int shift = 0; + size_t num_read = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + result |= (static_cast<uint64>(byte & 0x7f) << shift); + shift += 7; + } while (byte & 0x80); + + if ((shift < 8 * sizeof(result)) && (byte & 0x40)) + result |= -((static_cast<int64>(1)) << shift); + *len = num_read; + return result; +} + +inline uint64 ByteReader::ReadOffset(const char* buffer) const { + MOZ_ASSERT(this->offset_reader_); + return (this->*offset_reader_)(buffer); +} + +inline uint64 ByteReader::ReadAddress(const char* buffer) const { + MOZ_ASSERT(this->address_reader_); + return (this->*address_reader_)(buffer); +} + +inline void ByteReader::SetCFIDataBase(uint64 section_base, + const char* buffer_base) { + section_base_ = section_base; + buffer_base_ = buffer_base; + have_section_base_ = true; +} + +inline void ByteReader::SetTextBase(uint64 text_base) { + text_base_ = text_base; + have_text_base_ = true; +} + +inline void ByteReader::SetDataBase(uint64 data_base) { + data_base_ = data_base; + have_data_base_ = true; +} + +inline void ByteReader::SetFunctionBase(uint64 function_base) { + function_base_ = function_base; + have_function_base_ = true; +} + +inline void ByteReader::ClearFunctionBase() { have_function_base_ = false; } + +// (derived from) +// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which +// accepts parsed DWARF call frame info and adds it to a Summariser object. + +// This class is a reader for DWARF's Call Frame Information. CFI +// describes how to unwind stack frames --- even for functions that do +// not follow fixed conventions for saving registers, whose frame size +// varies as they execute, etc. +// +// CFI describes, at each machine instruction, how to compute the +// stack frame's base address, how to find the return address, and +// where to find the saved values of the caller's registers (if the +// callee has stashed them somewhere to free up the registers for its +// own use). +// +// For example, suppose we have a function whose machine code looks +// like this (imagine an assembly language that looks like C, for a +// machine with 32-bit registers, and a stack that grows towards lower +// addresses): +// +// func: ; entry point; return address at sp +// func+0: sp = sp - 16 ; allocate space for stack frame +// func+1: sp[12] = r0 ; save r0 at sp+12 +// ... ; other code, not frame-related +// func+10: sp -= 4; *sp = x ; push some x on the stack +// ... ; other code, not frame-related +// func+20: r0 = sp[16] ; restore saved r0 +// func+21: sp += 20 ; pop whole stack frame +// func+22: pc = *sp; sp += 4 ; pop return address and jump to it +// +// DWARF CFI is (a very compressed representation of) a table with a +// row for each machine instruction address and a column for each +// register showing how to restore it, if possible. +// +// A special column named "CFA", for "Canonical Frame Address", tells how +// to compute the base address of the frame; registers' entries may +// refer to the CFA in describing where the registers are saved. +// +// Another special column, named "RA", represents the return address. +// +// For example, here is a complete (uncompressed) table describing the +// function above: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 cfa[0] +// func+2: sp+16 cfa[-4] cfa[0] +// func+11: sp+20 cfa[-4] cfa[0] +// func+21: sp+20 cfa[0] +// func+22: sp cfa[0] +// +// Some things to note here: +// +// - Each row describes the state of affairs *before* executing the +// instruction at the given address. Thus, the row for func+0 +// describes the state before we allocate the stack frame. In the +// next row, the formula for computing the CFA has changed, +// reflecting that allocation. +// +// - The other entries are written in terms of the CFA; this allows +// them to remain unchanged as the stack pointer gets bumped around. +// For example, the rule for recovering the return address (the "ra" +// column) remains unchanged throughout the function, even as the +// stack pointer takes on three different offsets from the return +// address. +// +// - Although we haven't shown it, most calling conventions designate +// "callee-saves" and "caller-saves" registers. The callee must +// preserve the values of callee-saves registers; if it uses them, +// it must save their original values somewhere, and restore them +// before it returns. In contrast, the callee is free to trash +// caller-saves registers; if the callee uses these, it will +// probably not bother to save them anywhere, and the CFI will +// probably mark their values as "unrecoverable". +// +// (However, since the caller cannot assume the callee was going to +// save them, caller-saves registers are probably dead in the caller +// anyway, so compilers usually don't generate CFA for caller-saves +// registers.) +// +// - Exactly where the CFA points is a matter of convention that +// depends on the architecture and ABI in use. In the example, the +// CFA is the value the stack pointer had upon entry to the +// function, pointing at the saved return address. But on the x86, +// the call frame information generated by GCC follows the +// convention that the CFA is the address *after* the saved return +// address. +// +// But by definition, the CFA remains constant throughout the +// lifetime of the frame. This makes it a useful value for other +// columns to refer to. It is also gives debuggers a useful handle +// for identifying a frame. +// +// If you look at the table above, you'll notice that a given entry is +// often the same as the one immediately above it: most instructions +// change only one or two aspects of the stack frame, if they affect +// it at all. The DWARF format takes advantage of this fact, and +// reduces the size of the data by mentioning only the addresses and +// columns at which changes take place. So for the above, DWARF CFI +// data would only actually mention the following: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 +// func+2: cfa[-4] +// func+11: sp+20 +// func+21: r0 +// func+22: sp +// +// In fact, this is the way the parser reports CFI to the consumer: as +// a series of statements of the form, "At address X, column Y changed +// to Z," and related conventions for describing the initial state. +// +// Naturally, it would be impractical to have to scan the entire +// program's CFI, noting changes as we go, just to recover the +// unwinding rules in effect at one particular instruction. To avoid +// this, CFI data is grouped into "entries", each of which covers a +// specified range of addresses and begins with a complete statement +// of the rules for all recoverable registers at that starting +// address. Each entry typically covers a single function. +// +// Thus, to compute the contents of a given row of the table --- that +// is, rules for recovering the CFA, RA, and registers at a given +// instruction --- the consumer should find the entry that covers that +// instruction's address, start with the initial state supplied at the +// beginning of the entry, and work forward until it has processed all +// the changes up to and including those for the present instruction. +// +// There are seven kinds of rules that can appear in an entry of the +// table: +// +// - "undefined": The given register is not preserved by the callee; +// its value cannot be recovered. +// +// - "same value": This register has the same value it did in the callee. +// +// - offset(N): The register is saved at offset N from the CFA. +// +// - val_offset(N): The value the register had in the caller is the +// CFA plus offset N. (This is usually only useful for describing +// the stack pointer.) +// +// - register(R): The register's value was saved in another register R. +// +// - expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the address at which the +// register was saved. +// +// - val_expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the value the register +// had in the caller. + +class CallFrameInfo { + public: + // The different kinds of entries one finds in CFI. Used internally, + // and for error reporting. + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; + + // The handler class to which the parser hands the parsed call frame + // information. Defined below. + class Handler; + + // A reporter class, which CallFrameInfo uses to report errors + // encountered while parsing call frame information. Defined below. + class Reporter; + + // Create a DWARF CFI parser. BUFFER points to the contents of the + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. + // REPORTER is an error reporter the parser should use to report + // problems. READER is a ByteReader instance that has the endianness and + // address size set properly. Report the data we find to HANDLER. + // + // This class can also parse Linux C++ exception handling data, as found + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is + // placed in loadable segments so that it is present in the program's + // address space, and is interpreted by the C++ runtime to search the + // call stack for a handler interested in the exception being thrown, + // actually pop the frames, and find cleanup code to run. + // + // There are two differences between the call frame information described + // in the DWARF standard and the exception handling data Linux places in + // the .eh_frame section: + // + // - Exception handling data uses uses a different format for call frame + // information entry headers. The distinguished CIE id, the way FDEs + // refer to their CIEs, and the way the end of the series of entries is + // determined are all slightly different. + // + // If the constructor's EH_FRAME argument is true, then the + // CallFrameInfo parses the entry headers as Linux C++ exception + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo + // parses standard DWARF call frame information. + // + // - Linux C++ exception handling data uses CIE augmentation strings + // beginning with 'z' to specify the presence of additional data after + // the CIE and FDE headers and special encodings used for addresses in + // frame description entries. + // + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or + // exception handling data if you have supplied READER with the base + // addresses needed to interpret the pointer encodings that 'z' + // augmentations can specify. See the ByteReader interface for details + // about the base addresses. See the CallFrameInfo::Handler interface + // for details about the additional information one might find in + // 'z'-augmented data. + // + // Thus: + // + // - If you are parsing standard DWARF CFI, as found in a .debug_frame + // section, you should pass false for the EH_FRAME argument, or omit + // it, and you need not worry about providing READER with the + // additional base addresses. + // + // - If you want to parse Linux C++ exception handling data from a + // .eh_frame section, you should pass EH_FRAME as true, and call + // READER's Set*Base member functions before calling our Start method. + // + // - If you want to parse DWARF CFI that uses the 'z' augmentations + // (although I don't think any toolchain ever emits such data), you + // could pass false for EH_FRAME, but call READER's Set*Base members. + // + // The extensions the Linux C++ ABI makes to DWARF for exception + // handling are described here, rather poorly: + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // The mechanics of C++ exception handling, personality routines, + // and language-specific data areas are described here, rather nicely: + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html + + CallFrameInfo(const char* buffer, size_t buffer_length, ByteReader* reader, + Handler* handler, Reporter* reporter, bool eh_frame = false) + : buffer_(buffer), + buffer_length_(buffer_length), + reader_(reader), + handler_(handler), + reporter_(reporter), + eh_frame_(eh_frame) {} + + ~CallFrameInfo() {} + + // Parse the entries in BUFFER, reporting what we find to HANDLER. + // Return true if we reach the end of the section successfully, or + // false if we encounter an error. + bool Start(); + + // Return the textual name of KIND. For error reporting. + static const char* KindName(EntryKind kind); + + private: + struct CIE; + + // A CFI entry, either an FDE or a CIE. + struct Entry { + // The starting offset of the entry in the section, for error + // reporting. + size_t offset; + + // The start of this entry in the buffer. + const char* start; + + // Which kind of entry this is. + // + // We want to be able to use this for error reporting even while we're + // in the midst of parsing. Error reporting code may assume that kind, + // offset, and start fields are valid, although kind may be kUnknown. + EntryKind kind; + + // The end of this entry's common prologue (initial length and id), and + // the start of this entry's kind-specific fields. + const char* fields; + + // The start of this entry's instructions. + const char* instructions; + + // The address past the entry's last byte in the buffer. (Note that + // since offset points to the entry's initial length field, and the + // length field is the number of bytes after that field, this is not + // simply buffer_ + offset + length.) + const char* end; + + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a + // CIE, and the offset of the associated CIE in an FDE. + uint64 id; + + // The CIE that applies to this entry, if we've parsed it. If this is a + // CIE, then this field points to this structure. + CIE* cie; + }; + + // A common information entry (CIE). + struct CIE : public Entry { + uint8 version; // CFI data version number + std::string augmentation; // vendor format extension markers + uint64 code_alignment_factor; // scale for code address adjustments + int data_alignment_factor; // scale for stack pointer adjustments + unsigned return_address_register; // which register holds the return addr + + // True if this CIE includes Linux C++ ABI 'z' augmentation data. + bool has_z_augmentation; + + // Parsed 'z' augmentation data. These are meaningful only if + // has_z_augmentation is true. + bool has_z_lsda; // The 'z' augmentation included 'L'. + bool has_z_personality; // The 'z' augmentation included 'P'. + bool has_z_signal_frame; // The 'z' augmentation included 'S'. + + // If has_z_lsda is true, this is the encoding to be used for language- + // specific data area pointers in FDEs. + DwarfPointerEncoding lsda_encoding; + + // If has_z_personality is true, this is the encoding used for the + // personality routine pointer in the augmentation data. + DwarfPointerEncoding personality_encoding; + + // If has_z_personality is true, this is the address of the personality + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the + // address where the personality routine's address is stored. + uint64 personality_address; + + // This is the encoding used for addresses in the FDE header and + // in DW_CFA_set_loc instructions. This is always valid, whether + // or not we saw a 'z' augmentation string; its default value is + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. + DwarfPointerEncoding pointer_encoding; + }; + + // A frame description entry (FDE). + struct FDE : public Entry { + uint64 address; // start address of described code + uint64 size; // size of described code, in bytes + + // If cie->has_z_lsda is true, then this is the language-specific data + // area's address --- or its address's address, if cie->lsda_encoding + // has the DW_EH_PE_indirect bit set. + uint64 lsda_address; + }; + + // Internal use. + class Rule; + class RuleMapLowLevel; + class RuleMap; + class State; + + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the + // data to parse. On success, populate ENTRY as appropriate, and return + // true. On failure, report the problem, and return false. Even if we + // return false, set ENTRY->end to the first byte after the entry if we + // were able to figure that out, or NULL if we weren't. + bool ReadEntryPrologue(const char* cursor, Entry* entry); + + // Parse the fields of a CIE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of CIE are + // populated; use CIE->fields and CIE->end as the start and limit for + // parsing. On success, populate the rest of *CIE, and return true; on + // failure, report the problem and return false. + bool ReadCIEFields(CIE* cie); + + // Parse the fields of an FDE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of *FDE are + // initialized; use FDE->fields and FDE->end as the start and limit for + // parsing. Assume that FDE->cie is fully initialized. On success, + // populate the rest of *FDE, and return true; on failure, report the + // problem and return false. + bool ReadFDEFields(FDE* fde); + + // Report that ENTRY is incomplete, and return false. This is just a + // trivial wrapper for invoking reporter_->Incomplete; it provides a + // little brevity. + bool ReportIncomplete(Entry* entry); + + // Return true if ENCODING has the DW_EH_PE_indirect bit set. + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { + return encoding & DW_EH_PE_indirect; + } + + // The contents of the DWARF .debug_info section we're parsing. + const char* buffer_; + size_t buffer_length_; + + // For reading multi-byte values with the appropriate endianness. + ByteReader* reader_; + + // The handler to which we should report the data we find. + Handler* handler_; + + // For reporting problems in the info we're parsing. + Reporter* reporter_; + + // True if we are processing .eh_frame-format data. + bool eh_frame_; +}; + +// The handler class for CallFrameInfo. The a CFI parser calls the +// member functions of a handler object to report the data it finds. +class CallFrameInfo::Handler { + public: + // The pseudo-register number for the canonical frame address. + enum { kCFARegister = DW_REG_CFA }; + + Handler() {} + virtual ~Handler() {} + + // The parser has found CFI for the machine code at ADDRESS, + // extending for LENGTH bytes. OFFSET is the offset of the frame + // description entry in the section, for use in error messages. + // VERSION is the version number of the CFI format. AUGMENTATION is + // a string describing any producer-specific extensions present in + // the data. RETURN_ADDRESS is the number of the register that holds + // the address to which the function should return. + // + // Entry should return true to process this CFI, or false to skip to + // the next entry. + // + // The parser invokes Entry for each Frame Description Entry (FDE) + // it finds. The parser doesn't report Common Information Entries + // to the handler explicitly; instead, if the handler elects to + // process a given FDE, the parser reiterates the appropriate CIE's + // contents at the beginning of the FDE's rules. + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string& augmentation, + unsigned return_address) = 0; + + // When the Entry function returns true, the parser calls these + // handler functions repeatedly to describe the rules for recovering + // registers at each instruction in the given range of machine code. + // Immediately after a call to Entry, the handler should assume that + // the rule for each callee-saves register is "unchanged" --- that + // is, that the register still has the value it had in the caller. + // + // If a *Rule function returns true, we continue processing this entry's + // instructions. If a *Rule function returns false, we stop evaluating + // instructions, and skip to the next entry. Either way, we call End + // before going on to the next entry. + // + // In all of these functions, if the REG parameter is kCFARegister, then + // the rule describes how to find the canonical frame address. + // kCFARegister may be passed as a BASE_REGISTER argument, meaning that + // the canonical frame address should be used as the base address for the + // computation. All other REG values will be positive. + + // At ADDRESS, register REG's value is not recoverable. + virtual bool UndefinedRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG's value is the same as that it had in + // the caller. + virtual bool SameValueRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG has been saved at offset OFFSET from + // BASE_REGISTER. + virtual bool OffsetRule(uint64 address, int reg, int base_register, + long offset) = 0; + + // At ADDRESS, the caller's value of register REG is the current + // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an + // address at which the register's value is saved.) + virtual bool ValOffsetRule(uint64 address, int reg, int base_register, + long offset) = 0; + + // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs + // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that + // BASE_REGISTER is the "home" for REG's saved value: if you want to + // assign to a variable whose home is REG in the calling frame, you + // should put the value in BASE_REGISTER. + virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the address at + // which REG was saved. + virtual bool ExpressionRule(uint64 address, int reg, + const ImageSlice& expression) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the caller's + // value for REG. (This rule doesn't provide an address at which the + // register's value is saved.) + virtual bool ValExpressionRule(uint64 address, int reg, + const ImageSlice& expression) = 0; + + // Indicate that the rules for the address range reported by the + // last call to Entry are complete. End should return true if + // everything is okay, or false if an error has occurred and parsing + // should stop. + virtual bool End() = 0; + + // Handler functions for Linux C++ exception handling data. These are + // only called if the data includes 'z' augmentation strings. + + // The Linux C++ ABI uses an extension of the DWARF CFI format to + // walk the stack to propagate exceptions from the throw to the + // appropriate catch, and do the appropriate cleanups along the way. + // CFI entries used for exception handling have two additional data + // associated with them: + // + // - The "language-specific data area" describes which exception + // types the function has 'catch' clauses for, and indicates how + // to go about re-entering the function at the appropriate catch + // clause. If the exception is not caught, it describes the + // destructors that must run before the frame is popped. + // + // - The "personality routine" is responsible for interpreting the + // language-specific data area's contents, and deciding whether + // the exception should continue to propagate down the stack, + // perhaps after doing some cleanup for this frame, or whether the + // exception will be caught here. + // + // In principle, the language-specific data area is opaque to + // everybody but the personality routine. In practice, these values + // may be useful or interesting to readers with extra context, and + // we have to at least skip them anyway, so we might as well report + // them to the handler. + + // This entry's exception handling personality routine's address is + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the routine's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool PersonalityRoutine(uint64 address, bool indirect) { + return true; + } + + // This entry's language-specific data area (LSDA) is located at + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the area's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { + return true; + } + + // This entry describes a signal trampoline --- this frame is the + // caller of a signal handler. The default definition for this + // handler function simply returns true, allowing parsing of the + // entry to continue. + // + // The best description of the rationale for and meaning of signal + // trampoline CFI entries seems to be in the GCC bug database: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 + virtual bool SignalHandler() { return true; } +}; + +// The CallFrameInfo class makes calls on an instance of this class to +// report errors or warn about problems in the data it is parsing. +// These messages are sent to the message sink |aLog| provided to the +// constructor. +class CallFrameInfo::Reporter { + public: + // Create an error reporter which attributes troubles to the section + // named SECTION in FILENAME. + // + // Normally SECTION would be .debug_frame, but the Mac puts CFI data + // in a Mach-O section named __debug_frame. If we support + // Linux-style exception handling data, we could be reading an + // .eh_frame section. + Reporter(void (*aLog)(const char*), const std::string& filename, + const std::string& section = ".debug_frame") + : log_(aLog), filename_(filename), section_(section) {} + virtual ~Reporter() {} + + // The CFI entry at OFFSET ends too early to be well-formed. KIND + // indicates what kind of entry it is; KIND can be kUnknown if we + // haven't parsed enough of the entry to tell yet. + virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); + + // The .eh_frame data has a four-byte zero at OFFSET where the next + // entry's length would be; this is a terminator. However, the buffer + // length as given to the CallFrameInfo constructor says there should be + // more data. + virtual void EarlyEHTerminator(uint64 offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the + // section is not that large. + virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry + // there is not a CIE. + virtual void BadCIEId(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to a CIE with version number VERSION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // a version number we don't recognize. + virtual void UnrecognizedVersion(uint64 offset, int version); + + // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // augmentations we don't recognize. + virtual void UnrecognizedAugmentation(uint64 offset, + const std::string& augmentation); + + // The FDE at OFFSET contains an invalid or otherwise unusable Dwarf4 + // specific field (currently, only "address_size" or "segment_size"). + // Parsing DWARF CFI with unexpected values here seems dubious at best, + // so we stop. WHAT gives a little more information about what is wrong. + virtual void InvalidDwarf4Artefact(uint64 offset, const char* what); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not + // a valid encoding. + virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends + // on a base address which has not been supplied. + virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); + + // The CIE at OFFSET contains a DW_CFA_restore instruction at + // INSN_OFFSET, which may not appear in a CIE. + virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); + + // The entry at OFFSET, of kind KIND, has an unrecognized + // instruction at INSN_OFFSET. + virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, establishes a rule that cites the CFA, but we have not + // established a CFA rule yet. + virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, is a DW_CFA_restore_state instruction, but the stack of + // saved states is empty. + virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry + // at OFFSET, of kind KIND, would restore a state that has no CFA + // rule, whereas the current state does have a CFA rule. This is + // bogus input, which the CallFrameInfo::Handler interface doesn't + // (and shouldn't) have any way to report. + virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + private: + // A logging sink function, as supplied by LUL's user. + void (*log_)(const char*); + + protected: + // The name of the file whose CFI we're reading. + std::string filename_; + + // The name of the CFI section in that file. + std::string section_; +}; + +using lul::CallFrameInfo; +using lul::Summariser; + +// A class that accepts parsed call frame information from the DWARF +// CFI parser and populates a google_breakpad::Module object with the +// contents. +class DwarfCFIToModule : public CallFrameInfo::Handler { + public: + // DwarfCFIToModule uses an instance of this class to report errors + // detected while converting DWARF CFI to Breakpad STACK CFI records. + class Reporter { + public: + // Create a reporter that writes messages to the message sink + // |aLog|. FILE is the name of the file we're processing, and + // SECTION is the name of the section within that file that we're + // looking at (.debug_frame, .eh_frame, etc.). + Reporter(void (*aLog)(const char*), const std::string& file, + const std::string& section) + : log_(aLog), file_(file), section_(section) {} + virtual ~Reporter() {} + + // The DWARF CFI entry at OFFSET says that REG is undefined, but the + // Breakpad symbol file format cannot express this. + virtual void UndefinedNotSupported(size_t offset, const UniqueString* reg); + + // The DWARF CFI entry at OFFSET says that REG uses a DWARF + // expression to find its value, but parseDwarfExpr could not + // convert it to a sequence of PfxInstrs. + virtual void ExpressionCouldNotBeSummarised(size_t offset, + const UniqueString* reg); + + private: + // A logging sink function, as supplied by LUL's user. + void (*log_)(const char*); + + protected: + std::string file_, section_; + }; + + // Register name tables. If TABLE is a vector returned by one of these + // functions, then TABLE[R] is the name of the register numbered R in + // DWARF call frame information. + class RegisterNames { + public: + // Intel's "x86" or IA-32. + static unsigned int I386(); + + // AMD x86_64, AMD64, Intel EM64T, or Intel 64 + static unsigned int X86_64(); + + // ARM. + static unsigned int ARM(); + + // AARCH64. + static unsigned int ARM64(); + + // MIPS. + static unsigned int MIPS(); + }; + + // Create a handler for the dwarf2reader::CallFrameInfo parser that + // records the stack unwinding information it receives in SUMM. + // + // Use REGISTER_NAMES[I] as the name of register number I; *this + // keeps a reference to the vector, so the vector should remain + // alive for as long as the DwarfCFIToModule does. + // + // Use REPORTER for reporting problems encountered in the conversion + // process. + DwarfCFIToModule(const unsigned int num_dw_regs, Reporter* reporter, + ByteReader* reader, + /*MOD*/ UniqueStringUniverse* usu, + /*OUT*/ Summariser* summ) + : summ_(summ), + usu_(usu), + num_dw_regs_(num_dw_regs), + reporter_(reporter), + reader_(reader), + return_address_(-1) {} + virtual ~DwarfCFIToModule() {} + + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string& augmentation, + unsigned return_address) override; + virtual bool UndefinedRule(uint64 address, int reg) override; + virtual bool SameValueRule(uint64 address, int reg) override; + virtual bool OffsetRule(uint64 address, int reg, int base_register, + long offset) override; + virtual bool ValOffsetRule(uint64 address, int reg, int base_register, + long offset) override; + virtual bool RegisterRule(uint64 address, int reg, + int base_register) override; + virtual bool ExpressionRule(uint64 address, int reg, + const ImageSlice& expression) override; + virtual bool ValExpressionRule(uint64 address, int reg, + const ImageSlice& expression) override; + virtual bool End() override; + + private: + // Return the name to use for register I. + const UniqueString* RegisterName(int i); + + // The Summariser to which we should give entries + Summariser* summ_; + + // Universe for creating UniqueStrings in, should that be necessary. + UniqueStringUniverse* usu_; + + // The number of Dwarf-defined register names for this architecture. + const unsigned int num_dw_regs_; + + // The reporter to use to report problems. + Reporter* reporter_; + + // The ByteReader to use for parsing Dwarf expressions. + ByteReader* reader_; + + // The section offset of the current frame description entry, for + // use in error messages. + size_t entry_offset_; + + // The return address column for that entry. + unsigned return_address_; +}; + +// Convert the Dwarf expression in |expr| into PfxInstrs stored in the +// SecMap referred to by |summ|, and return the index of the starting +// PfxInstr added, which must be >= 0. In case of failure return -1. +int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, + ImageSlice expr, bool debug, bool pushCfaAtStart, + bool derefAtEnd); + +} // namespace lul + +#endif // LulDwarfExt_h diff --git a/tools/profiler/lul/LulDwarfInt.h b/tools/profiler/lul/LulDwarfInt.h new file mode 100644 index 0000000000..b72c6e08e3 --- /dev/null +++ b/tools/profiler/lul/LulDwarfInt.h @@ -0,0 +1,193 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2008, 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file is derived from the following file in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/dwarf2enums.h + +#ifndef LulDwarfInt_h +#define LulDwarfInt_h + +#include "LulCommonExt.h" +#include "LulDwarfExt.h" + +namespace lul { + +// These enums do not follow the google3 style only because they are +// known universally (specs, other implementations) by the names in +// exactly this capitalization. +// Tag names and codes. + +// Call Frame Info instructions. +enum DwarfCFI { + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80, + DW_CFA_restore = 0xc0, + DW_CFA_nop = 0x00, + DW_CFA_set_loc = 0x01, + DW_CFA_advance_loc1 = 0x02, + DW_CFA_advance_loc2 = 0x03, + DW_CFA_advance_loc4 = 0x04, + DW_CFA_offset_extended = 0x05, + DW_CFA_restore_extended = 0x06, + DW_CFA_undefined = 0x07, + DW_CFA_same_value = 0x08, + DW_CFA_register = 0x09, + DW_CFA_remember_state = 0x0a, + DW_CFA_restore_state = 0x0b, + DW_CFA_def_cfa = 0x0c, + DW_CFA_def_cfa_register = 0x0d, + DW_CFA_def_cfa_offset = 0x0e, + DW_CFA_def_cfa_expression = 0x0f, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + + // Opcodes in this range are reserved for user extensions. + DW_CFA_lo_user = 0x1c, + DW_CFA_hi_user = 0x3f, + + // SGI/MIPS specific. + DW_CFA_MIPS_advance_loc8 = 0x1d, + + // GNU extensions. + DW_CFA_GNU_window_save = 0x2d, + DW_CFA_GNU_args_size = 0x2e, + DW_CFA_GNU_negative_offset_extended = 0x2f +}; + +// Exception handling 'z' augmentation letters. +enum DwarfZAugmentationCodes { + // If the CFI augmentation string begins with 'z', then the CIE and FDE + // have an augmentation data area just before the instructions, whose + // contents are determined by the subsequent augmentation letters. + DW_Z_augmentation_start = 'z', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, and the FDE + // augmentation data includes a language-specific data area pointer, + // represented using that encoding. + DW_Z_has_LSDA = 'L', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, followed by a pointer + // to a personality routine, represented using that encoding. + DW_Z_has_personality_routine = 'P', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding describing how the FDE's + // initial location, address range, and DW_CFA_set_loc operands are + // encoded. + DW_Z_has_FDE_address_encoding = 'R', + + // If this letter is present in a 'z' augmentation string, then code + // addresses covered by FDEs that cite this CIE are signal delivery + // trampolines. Return addresses of frames in trampolines should not be + // adjusted as described in section 6.4.4 of the DWARF 3 spec. + DW_Z_is_signal_trampoline = 'S' +}; + +// Expression opcodes +enum DwarfExpressionOpcodes { + DW_OP_addr = 0x03, + DW_OP_deref = 0x06, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_const8u = 0x0e, + DW_OP_const8s = 0x0f, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1a, + DW_OP_div = 0x1b, + DW_OP_minus = 0x1c, + DW_OP_mod = 0x1d, + DW_OP_mul = 0x1e, + DW_OP_neg = 0x1f, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_skip = 0x2f, + DW_OP_bra = 0x28, + DW_OP_eq = 0x29, + DW_OP_ge = 0x2a, + DW_OP_gt = 0x2b, + DW_OP_le = 0x2c, + DW_OP_lt = 0x2d, + DW_OP_ne = 0x2e, + DW_OP_lit0 = 0x30, + DW_OP_lit31 = 0x4f, + DW_OP_reg0 = 0x50, + DW_OP_reg31 = 0x6f, + DW_OP_breg0 = 0x70, + DW_OP_breg31 = 0x8f, + DW_OP_regx = 0x90, + DW_OP_fbreg = 0x91, + DW_OP_bregx = 0x92, + DW_OP_piece = 0x93, + DW_OP_deref_size = 0x94, + DW_OP_xderef_size = 0x95, + DW_OP_nop = 0x96, + DW_OP_push_object_address = 0x97, + DW_OP_call2 = 0x98, + DW_OP_call4 = 0x99, + DW_OP_call_ref = 0x9a, + DW_OP_form_tls_address = 0x9b, + DW_OP_call_frame_cfa = 0x9c, + DW_OP_bit_piece = 0x9d, + DW_OP_lo_user = 0xe0, + DW_OP_hi_user = 0xff +}; + +} // namespace lul + +#endif // LulDwarfInt_h diff --git a/tools/profiler/lul/LulDwarfSummariser.cpp b/tools/profiler/lul/LulDwarfSummariser.cpp new file mode 100644 index 0000000000..e9172c3e18 --- /dev/null +++ b/tools/profiler/lul/LulDwarfSummariser.cpp @@ -0,0 +1,549 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulDwarfSummariser.h" + +#include "LulDwarfExt.h" + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +// Set this to 1 for verbose logging +#define DEBUG_SUMMARISER 0 + +namespace lul { + +// Do |s64|'s lowest 32 bits sign extend back to |s64| itself? +static inline bool fitsIn32Bits(int64 s64) { + return s64 == ((s64 & 0xffffffff) ^ 0x80000000) - 0x80000000; +} + +// Check a LExpr prefix expression, starting at pfxInstrs[start] up to +// the next PX_End instruction, to ensure that: +// * It only mentions registers that are tracked on this target +// * The start point is sane +// If the expression is ok, return NULL. Else return a pointer +// a const char* holding a bit of text describing the problem. +static const char* checkPfxExpr(const vector<PfxInstr>* pfxInstrs, + int64_t start) { + size_t nInstrs = pfxInstrs->size(); + if (start < 0 || start >= (ssize_t)nInstrs) { + return "bogus start point"; + } + size_t i; + for (i = start; i < nInstrs; i++) { + PfxInstr pxi = (*pfxInstrs)[i]; + if (pxi.mOpcode == PX_End) break; + if (pxi.mOpcode == PX_DwReg && + !registerIsTracked((DW_REG_NUMBER)pxi.mOperand)) { + return "uses untracked reg"; + } + } + return nullptr; // success +} + +Summariser::Summariser(SecMap* aSecMap, uintptr_t aTextBias, + void (*aLog)(const char*)) + : mSecMap(aSecMap), mTextBias(aTextBias), mLog(aLog) { + mCurrAddr = 0; + mMax1Addr = 0; // Gives an empty range. + + // Initialise the running RuleSet to "haven't got a clue" status. + new (&mCurrRules) RuleSet(); +} + +void Summariser::Entry(uintptr_t aAddress, uintptr_t aLength) { + aAddress += mTextBias; + if (DEBUG_SUMMARISER) { + char buf[100]; + SprintfLiteral(buf, "LUL Entry(%llx, %llu)\n", + (unsigned long long int)aAddress, + (unsigned long long int)aLength); + mLog(buf); + } + // This throws away any previous summary, that is, assumes + // that the previous summary, if any, has been properly finished + // by a call to End(). + mCurrAddr = aAddress; + mMax1Addr = aAddress + aLength; + new (&mCurrRules) RuleSet(); +} + +void Summariser::Rule(uintptr_t aAddress, int aNewReg, LExprHow how, + int16_t oldReg, int64_t offset) { + aAddress += mTextBias; + if (DEBUG_SUMMARISER) { + char buf[100]; + if (how == NODEREF || how == DEREF) { + bool deref = how == DEREF; + SprintfLiteral(buf, "LUL 0x%llx old-r%d = %sr%d + %lld%s\n", + (unsigned long long int)aAddress, aNewReg, + deref ? "*(" : "", (int)oldReg, (long long int)offset, + deref ? ")" : ""); + } else if (how == PFXEXPR) { + SprintfLiteral(buf, "LUL 0x%llx old-r%d = pfx-expr-at %lld\n", + (unsigned long long int)aAddress, aNewReg, + (long long int)offset); + } else { + SprintfLiteral(buf, "LUL 0x%llx old-r%d = (invalid LExpr!)\n", + (unsigned long long int)aAddress, aNewReg); + } + mLog(buf); + } + + if (mCurrAddr < aAddress) { + // Flush the existing summary first. + mSecMap->AddRuleSet(&mCurrRules, mCurrAddr, aAddress - mCurrAddr); + if (DEBUG_SUMMARISER) { + mLog("LUL "); + mCurrRules.Print(mCurrAddr, aAddress - mCurrAddr, mLog); + mLog("\n"); + } + mCurrAddr = aAddress; + } + + // If for some reason summarisation fails, either or both of these + // become non-null and point at constant text describing the + // problem. Using two rather than just one avoids complications of + // having to concatenate two strings to produce a complete error message. + const char* reason1 = nullptr; + const char* reason2 = nullptr; + + // |offset| needs to be a 32 bit value that sign extends to 64 bits + // on a 64 bit target. We will need to incorporate |offset| into + // any LExpr made here. So we may as well check it right now. + if (!fitsIn32Bits(offset)) { + reason1 = "offset not in signed 32-bit range"; + goto cant_summarise; + } + + // FIXME: factor out common parts of the arch-dependent summarisers. + +#if defined(GP_ARCH_arm) + + // ----------------- arm ----------------- // + + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: + // This is a rule that defines the CFA. The only forms we + // choose to represent are: r7/11/12/13 + offset. The offset + // must fit into 32 bits since 'uintptr_t' is 32 bit on ARM, + // hence there is no need to check it for overflow. + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + switch (oldReg) { + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + break; + default: + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + case DW_REG_ARM_R14: + case DW_REG_ARM_R15: { + // This is a new rule for R7, R11, R12, R13 (SP), R14 (LR) or + // R15 (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for R7/11/12/13/14/15: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for R7/11/12/13/14/15: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_ARM_R7: + mCurrRules.mR7expr = expr; + break; + case DW_REG_ARM_R11: + mCurrRules.mR11expr = expr; + break; + case DW_REG_ARM_R12: + mCurrRules.mR12expr = expr; + break; + case DW_REG_ARM_R13: + mCurrRules.mR13expr = expr; + break; + case DW_REG_ARM_R14: + mCurrRules.mR14expr = expr; + break; + case DW_REG_ARM_R15: + mCurrRules.mR15expr = expr; + break; + default: + MOZ_ASSERT(0); + } + break; + } + + default: + // Leave |reason1| and |reason2| unset here. This program point + // is reached so often that it causes a flood of "Can't + // summarise" messages. In any case, we don't really care about + // the fact that this summary would produce a new value for a + // register that we're not tracking. We do on the other hand + // care if the summary's expression *uses* a register that we're + // not tracking. But in that case one of the above failures + // should tell us which. + goto cant_summarise; + } + + // Mark callee-saved registers (r4 .. r11) as unchanged, if there is + // no other information about them. FIXME: do this just once, at + // the point where the ruleset is committed. + if (mCurrRules.mR7expr.mHow == UNKNOWN) { + mCurrRules.mR7expr = LExpr(NODEREF, DW_REG_ARM_R7, 0); + } + if (mCurrRules.mR11expr.mHow == UNKNOWN) { + mCurrRules.mR11expr = LExpr(NODEREF, DW_REG_ARM_R11, 0); + } + if (mCurrRules.mR12expr.mHow == UNKNOWN) { + mCurrRules.mR12expr = LExpr(NODEREF, DW_REG_ARM_R12, 0); + } + + // The old r13 (SP) value before the call is always the same as the + // CFA. + mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0); + + // If there's no information about R15 (the return address), say + // it's a copy of R14 (the link register). + if (mCurrRules.mR15expr.mHow == UNKNOWN) { + mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0); + } + +#elif defined(GP_ARCH_arm64) + + // ----------------- arm64 ----------------- // + + switch (aNewReg) { + case DW_REG_CFA: + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + switch (oldReg) { + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_SP: + break; + default: + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_X30: + case DW_REG_AARCH64_SP: { + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for X29/X30/SP: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for X29/X30/SP: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_AARCH64_X29: + mCurrRules.mX29expr = expr; + break; + case DW_REG_AARCH64_X30: + mCurrRules.mX30expr = expr; + break; + case DW_REG_AARCH64_SP: + mCurrRules.mSPexpr = expr; + break; + default: + MOZ_ASSERT(0); + } + break; + } + default: + // Leave |reason1| and |reason2| unset here, for the reasons explained + // in the analogous point + goto cant_summarise; + } + + if (mCurrRules.mX29expr.mHow == UNKNOWN) { + mCurrRules.mX29expr = LExpr(NODEREF, DW_REG_AARCH64_X29, 0); + } + if (mCurrRules.mX30expr.mHow == UNKNOWN) { + mCurrRules.mX30expr = LExpr(NODEREF, DW_REG_AARCH64_X30, 0); + } + // On aarch64, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mSPexpr.mHow == UNKNOWN) { + mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0); + } +#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + + // ---------------- x64/x86 ---------------- // + + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: { + // This is a rule that defines the CFA. The only forms we choose to + // represent are: = SP+offset, = FP+offset, or =prefix-expr. + switch (how) { + case NODEREF: + if (oldReg != DW_REG_INTEL_XSP && oldReg != DW_REG_INTEL_XBP) { + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + break; + case DEREF: + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for CFA: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + } + + case DW_REG_INTEL_XSP: + case DW_REG_INTEL_XBP: + case DW_REG_INTEL_XIP: { + // This is a new rule for XSP, XBP or XIP (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for XSP/XBP/XIP: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for XSP/XBP/XIP: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_INTEL_XBP: + mCurrRules.mXbpExpr = expr; + break; + case DW_REG_INTEL_XSP: + mCurrRules.mXspExpr = expr; + break; + case DW_REG_INTEL_XIP: + mCurrRules.mXipExpr = expr; + break; + default: + MOZ_CRASH("impossible value for aNewReg"); + } + break; + } + + default: + // Leave |reason1| and |reason2| unset here, for the reasons + // explained in the analogous point in the ARM case just above. + goto cant_summarise; + } + + // On Intel, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mXspExpr.mHow == UNKNOWN) { + mCurrRules.mXspExpr = LExpr(NODEREF, DW_REG_CFA, 0); + } + + // Also, gcc says "Undef" for BP when it is unchanged. + if (mCurrRules.mXbpExpr.mHow == UNKNOWN) { + mCurrRules.mXbpExpr = LExpr(NODEREF, DW_REG_INTEL_XBP, 0); + } + +#elif defined(GP_ARCH_mips64) + // ---------------- mips ---------------- // + // + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + case DW_REG_CFA: + // This is a rule that defines the CFA. The only forms we can + // represent are: = SP+offset or = FP+offset. + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + if (oldReg != DW_REG_MIPS_SP && oldReg != DW_REG_MIPS_FP) { + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_MIPS_SP: + case DW_REG_MIPS_FP: + case DW_REG_MIPS_PC: { + // This is a new rule for SP, FP or PC (the return address). + switch (how) { + case NODEREF: + case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for SP/FP/PC: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for SP/FP/PC: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_MIPS_FP: + mCurrRules.mFPexpr = expr; + break; + case DW_REG_MIPS_SP: + mCurrRules.mSPexpr = expr; + break; + case DW_REG_MIPS_PC: + mCurrRules.mPCexpr = expr; + break; + default: + MOZ_CRASH("impossible value for aNewReg"); + } + break; + } + default: + // Leave |reason1| and |reason2| unset here, for the reasons + // explained in the analogous point in the ARM case just above. + goto cant_summarise; + } + + // On MIPS, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mSPexpr.mHow == UNKNOWN) { + mCurrRules.mSPexpr = LExpr(NODEREF, DW_REG_CFA, 0); + } + + // Also, gcc says "Undef" for FP when it is unchanged. + if (mCurrRules.mFPexpr.mHow == UNKNOWN) { + mCurrRules.mFPexpr = LExpr(NODEREF, DW_REG_MIPS_FP, 0); + } + +#else + +# error "Unsupported arch" +#endif + + return; + +cant_summarise: + if (reason1 || reason2) { + char buf[200]; + SprintfLiteral(buf, + "LUL can't summarise: " + "SVMA=0x%llx: %s%s, expr=LExpr(%s,%u,%lld)\n", + (unsigned long long int)(aAddress - mTextBias), + reason1 ? reason1 : "", reason2 ? reason2 : "", + NameOf_LExprHow(how), (unsigned int)oldReg, + (long long int)offset); + mLog(buf); + } +} + +uint32_t Summariser::AddPfxInstr(PfxInstr pfxi) { + return mSecMap->AddPfxInstr(pfxi); +} + +void Summariser::End() { + if (DEBUG_SUMMARISER) { + mLog("LUL End\n"); + } + if (mCurrAddr < mMax1Addr) { + mSecMap->AddRuleSet(&mCurrRules, mCurrAddr, mMax1Addr - mCurrAddr); + if (DEBUG_SUMMARISER) { + mLog("LUL "); + mCurrRules.Print(mCurrAddr, mMax1Addr - mCurrAddr, mLog); + mLog("\n"); + } + } +} + +} // namespace lul diff --git a/tools/profiler/lul/LulDwarfSummariser.h b/tools/profiler/lul/LulDwarfSummariser.h new file mode 100644 index 0000000000..30f1ba23c1 --- /dev/null +++ b/tools/profiler/lul/LulDwarfSummariser.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulDwarfSummariser_h +#define LulDwarfSummariser_h + +#include "LulMainInt.h" + +namespace lul { + +class Summariser { + public: + Summariser(SecMap* aSecMap, uintptr_t aTextBias, void (*aLog)(const char*)); + + virtual void Entry(uintptr_t aAddress, uintptr_t aLength); + virtual void End(); + + // Tell the summariser that the value for |aNewReg| at |aAddress| is + // recovered using the LExpr that can be constructed using the + // components |how|, |oldReg| and |offset|. The summariser will + // inspect the components and may reject them for various reasons, + // but the hope is that it will find them acceptable and record this + // rule permanently. + virtual void Rule(uintptr_t aAddress, int aNewReg, LExprHow how, + int16_t oldReg, int64_t offset); + + virtual uint32_t AddPfxInstr(PfxInstr pfxi); + + // Send output to the logging sink, for debugging. + virtual void Log(const char* str) { mLog(str); } + + private: + // The SecMap in which we park the finished summaries (RuleSets) and + // also any PfxInstrs derived from Dwarf expressions. + SecMap* mSecMap; + + // Running state for the current summary (RuleSet) under construction. + RuleSet mCurrRules; + + // The start of the address range to which the RuleSet under + // construction applies. + uintptr_t mCurrAddr; + + // The highest address, plus one, for which the RuleSet under + // construction could possibly apply. If there are no further + // incoming events then mCurrRules will eventually be emitted + // as-is, for the range mCurrAddr.. mMax1Addr - 1, if that is + // nonempty. + uintptr_t mMax1Addr; + + // The bias value (to add to the SVMAs, to get AVMAs) to be used + // when adding entries into mSecMap. + uintptr_t mTextBias; + + // A logging sink, for debugging. + void (*mLog)(const char* aFmt); +}; + +} // namespace lul + +#endif // LulDwarfSummariser_h diff --git a/tools/profiler/lul/LulElf.cpp b/tools/profiler/lul/LulElf.cpp new file mode 100644 index 0000000000..28980a1349 --- /dev/null +++ b/tools/profiler/lul/LulElf.cpp @@ -0,0 +1,887 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2011, 2012 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// (derived from) +// dump_symbols.cc: implement google_breakpad::WriteSymbolFile: +// Find all the debugging info in a file and dump it as a Breakpad symbol file. +// +// dump_symbols.h: Read debugging information from an ELF file, and write +// it out as a Breakpad symbol file. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/linux/dump_symbols.cc +// src/common/linux/elfutils.cc +// src/common/linux/file_id.cc + +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#include <arpa/inet.h> + +#include <set> +#include <string> +#include <vector> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "PlatformMacros.h" +#include "LulCommonExt.h" +#include "LulDwarfExt.h" +#include "LulElfInt.h" +#include "LulMainInt.h" + +#if defined(GP_PLAT_arm_android) && !defined(SHT_ARM_EXIDX) +// bionic and older glibsc don't define it +# define SHT_ARM_EXIDX (SHT_LOPROC + 1) +#endif + +#if (defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)) && \ + !defined(SHT_X86_64_UNWIND) +// This is sometimes necessary on x86_64-android and x86_64-linux. +# define SHT_X86_64_UNWIND 0x70000001 +#endif + +// Old Linux header doesn't define EM_AARCH64 +#ifndef EM_AARCH64 +# define EM_AARCH64 183 +#endif + +// This namespace contains helper functions. +namespace { + +using lul::DwarfCFIToModule; +using lul::FindElfSectionByName; +using lul::GetOffset; +using lul::IsValidElf; +using lul::Module; +using lul::scoped_ptr; +using lul::Summariser; +using lul::UniqueStringUniverse; +using std::set; +using std::string; +using std::vector; + +// +// FDWrapper +// +// Wrapper class to make sure opened file is closed. +// +class FDWrapper { + public: + explicit FDWrapper(int fd) : fd_(fd) {} + ~FDWrapper() { + if (fd_ != -1) close(fd_); + } + int get() { return fd_; } + int release() { + int fd = fd_; + fd_ = -1; + return fd; + } + + private: + int fd_; +}; + +// +// MmapWrapper +// +// Wrapper class to make sure mapped regions are unmapped. +// +class MmapWrapper { + public: + MmapWrapper() : is_set_(false), base_(NULL), size_(0) {} + ~MmapWrapper() { + if (is_set_ && base_ != NULL) { + MOZ_ASSERT(size_ > 0); + munmap(base_, size_); + } + } + void set(void* mapped_address, size_t mapped_size) { + is_set_ = true; + base_ = mapped_address; + size_ = mapped_size; + } + void release() { + MOZ_ASSERT(is_set_); + is_set_ = false; + base_ = NULL; + size_ = 0; + } + + private: + bool is_set_; + void* base_; + size_t size_; +}; + +// Set NUM_DW_REGNAMES to be the number of Dwarf register names +// appropriate to the machine architecture given in HEADER. Return +// true on success, or false if HEADER's machine architecture is not +// supported. +template <typename ElfClass> +bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, + unsigned int* num_dw_regnames) { + switch (elf_header->e_machine) { + case EM_386: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386(); + return true; + case EM_ARM: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM(); + return true; + case EM_X86_64: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64(); + return true; + case EM_MIPS: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::MIPS(); + return true; + case EM_AARCH64: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM64(); + return true; + default: + MOZ_ASSERT(0); + return false; + } +} + +template <typename ElfClass> +bool LoadDwarfCFI(const string& dwarf_filename, + const typename ElfClass::Ehdr* elf_header, + const char* section_name, + const typename ElfClass::Shdr* section, const bool eh_frame, + const typename ElfClass::Shdr* got_section, + const typename ElfClass::Shdr* text_section, + const bool big_endian, SecMap* smap, uintptr_t text_bias, + UniqueStringUniverse* usu, void (*log)(const char*)) { + // Find the appropriate set of register names for this file's + // architecture. + unsigned int num_dw_regs = 0; + if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) { + fprintf(stderr, + "%s: unrecognized ELF machine architecture '%d';" + " cannot convert DWARF call frame information\n", + dwarf_filename.c_str(), elf_header->e_machine); + return false; + } + + const lul::Endianness endianness = + big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE; + + // Find the call frame information and its size. + const char* cfi = GetOffset<ElfClass, char>(elf_header, section->sh_offset); + size_t cfi_size = section->sh_size; + + // Plug together the parser, handler, and their entourages. + + // Here's a summariser, which will receive the output of the + // parser, create summaries, and add them to |smap|. + Summariser summ(smap, text_bias, log); + + lul::ByteReader reader(endianness); + reader.SetAddressSize(ElfClass::kAddrSize); + + DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name); + DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ); + + // Provide the base addresses for .eh_frame encoded pointers, if + // possible. + reader.SetCFIDataBase(section->sh_addr, cfi); + if (got_section) reader.SetDataBase(got_section->sh_addr); + if (text_section) reader.SetTextBase(text_section->sh_addr); + + lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename, + section_name); + lul::CallFrameInfo parser(cfi, cfi_size, &reader, &handler, &dwarf_reporter, + eh_frame); + parser.Start(); + + return true; +} + +bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, + void** elf_header) { + int obj_fd = open(obj_file.c_str(), O_RDONLY); + if (obj_fd < 0) { + fprintf(stderr, "Failed to open ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + FDWrapper obj_fd_wrapper(obj_fd); + struct stat st; + if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { + fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + // Mapping it read-only is good enough. In any case, mapping it + // read-write confuses Valgrind's debuginfo acquire/discard + // heuristics, making it hard to profile the profiler. + void* obj_base = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, obj_fd, 0); + if (obj_base == MAP_FAILED) { + fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", obj_file.c_str(), + strerror(errno)); + return false; + } + map_wrapper->set(obj_base, st.st_size); + *elf_header = obj_base; + if (!IsValidElf(*elf_header)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); + return false; + } + return true; +} + +// Get the endianness of ELF_HEADER. If it's invalid, return false. +template <typename ElfClass> +bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, + bool* big_endian) { + if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { + *big_endian = false; + return true; + } + if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { + *big_endian = true; + return true; + } + + fprintf(stderr, "bad data encoding in ELF header: %d\n", + elf_header->e_ident[EI_DATA]); + return false; +} + +// +// LoadSymbolsInfo +// +// Holds the state between the two calls to LoadSymbols() in case it's necessary +// to follow the .gnu_debuglink section and load debug information from a +// different file. +// +template <typename ElfClass> +class LoadSymbolsInfo { + public: + typedef typename ElfClass::Addr Addr; + + explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) + : debug_dirs_(dbg_dirs), has_loading_addr_(false) {} + + // Keeps track of which sections have been loaded so sections don't + // accidentally get loaded twice from two different files. + void LoadedSection(const string& section) { + if (loaded_sections_.count(section) == 0) { + loaded_sections_.insert(section); + } else { + fprintf(stderr, "Section %s has already been loaded.\n", section.c_str()); + } + } + + string debuglink_file() const { return debuglink_file_; } + + private: + const vector<string>& debug_dirs_; // Directories in which to + // search for the debug ELF file. + + string debuglink_file_; // Full path to the debug ELF file. + + bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. + + set<string> loaded_sections_; // Tracks the Loaded ELF sections + // between calls to LoadSymbols(). +}; + +// Find the preferred loading address of the binary. +template <typename ElfClass> +typename ElfClass::Addr GetLoadingAddress( + const typename ElfClass::Phdr* program_headers, int nheader) { + typedef typename ElfClass::Phdr Phdr; + + // For non-PIC executables (e_type == ET_EXEC), the load address is + // the start address of the first PT_LOAD segment. (ELF requires + // the segments to be sorted by load address.) For PIC executables + // and dynamic libraries (e_type == ET_DYN), this address will + // normally be zero. + for (int i = 0; i < nheader; ++i) { + const Phdr& header = program_headers[i]; + if (header.p_type == PT_LOAD) return header.p_vaddr; + } + return 0; +} + +template <typename ElfClass> +bool LoadSymbols(const string& obj_file, const bool big_endian, + const typename ElfClass::Ehdr* elf_header, + const bool read_gnu_debug_link, + LoadSymbolsInfo<ElfClass>* info, SecMap* smap, void* rx_avma, + size_t rx_size, UniqueStringUniverse* usu, + void (*log)(const char*)) { + typedef typename ElfClass::Phdr Phdr; + typedef typename ElfClass::Shdr Shdr; + + char buf[500]; + SprintfLiteral(buf, "LoadSymbols: BEGIN %s\n", obj_file.c_str()); + buf[sizeof(buf) - 1] = 0; + log(buf); + + // This is how the text bias is calculated. + // BEGIN CALCULATE BIAS + uintptr_t loading_addr = GetLoadingAddress<ElfClass>( + GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), + elf_header->e_phnum); + uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr; + SprintfLiteral(buf, "LoadSymbols: rx_avma=%llx, text_bias=%llx", + (unsigned long long int)(uintptr_t)rx_avma, + (unsigned long long int)text_bias); + buf[sizeof(buf) - 1] = 0; + log(buf); + // END CALCULATE BIAS + + const Shdr* sections = + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); + const char* names_end = names + section_names->sh_size; + bool found_usable_info = false; + + // Dwarf Call Frame Information (CFI) is actually independent from + // the other DWARF debugging information, and can be used alone. + const Shdr* dwarf_cfi_section = + FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, sections, + names, names_end, elf_header->e_shnum); + if (dwarf_cfi_section) { + // Ignore the return value of this function; even without call frame + // information, the other debugging information could be perfectly + // useful. + info->LoadedSection(".debug_frame"); + bool result = LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame", + dwarf_cfi_section, false, 0, 0, + big_endian, smap, text_bias, usu, log); + found_usable_info = found_usable_info || result; + if (result) log("LoadSymbols: read CFI from .debug_frame"); + } + + // Linux C++ exception handling information can also provide + // unwinding data. + const Shdr* eh_frame_section = + FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, sections, names, + names_end, elf_header->e_shnum); +#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) + if (!eh_frame_section) { + // Possibly depending on which linker created libxul.so, on x86_64-linux + // and -android, .eh_frame may instead have the SHT_X86_64_UNWIND type. + eh_frame_section = + FindElfSectionByName<ElfClass>(".eh_frame", SHT_X86_64_UNWIND, sections, + names, names_end, elf_header->e_shnum); + } +#endif + if (eh_frame_section) { + // Pointers in .eh_frame data may be relative to the base addresses of + // certain sections. Provide those sections if present. + const Shdr* got_section = FindElfSectionByName<ElfClass>( + ".got", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); + const Shdr* text_section = FindElfSectionByName<ElfClass>( + ".text", SHT_PROGBITS, sections, names, names_end, elf_header->e_shnum); + info->LoadedSection(".eh_frame"); + // As above, ignore the return value of this function. + bool result = LoadDwarfCFI<ElfClass>( + obj_file, elf_header, ".eh_frame", eh_frame_section, true, got_section, + text_section, big_endian, smap, text_bias, usu, log); + found_usable_info = found_usable_info || result; + if (result) log("LoadSymbols: read CFI from .eh_frame"); + } + + SprintfLiteral(buf, "LoadSymbols: END %s\n", obj_file.c_str()); + buf[sizeof(buf) - 1] = 0; + log(buf); + + return found_usable_info; +} + +// Return the breakpad symbol file identifier for the architecture of +// ELF_HEADER. +template <typename ElfClass> +const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { + typedef typename ElfClass::Half Half; + Half arch = elf_header->e_machine; + switch (arch) { + case EM_386: + return "x86"; + case EM_ARM: + return "arm"; + case EM_AARCH64: + return "arm64"; + case EM_MIPS: + return "mips"; + case EM_PPC64: + return "ppc64"; + case EM_PPC: + return "ppc"; + case EM_S390: + return "s390"; + case EM_SPARC: + return "sparc"; + case EM_SPARCV9: + return "sparcv9"; + case EM_X86_64: + return "x86_64"; + default: + return NULL; + } +} + +// Format the Elf file identifier in IDENTIFIER as a UUID with the +// dashes removed. +string FormatIdentifier(unsigned char identifier[16]) { + char identifier_str[40]; + lul::FileID::ConvertIdentifierToString(identifier, identifier_str, + sizeof(identifier_str)); + string id_no_dash; + for (int i = 0; identifier_str[i] != '\0'; ++i) + if (identifier_str[i] != '-') id_no_dash += identifier_str[i]; + // Add an extra "0" by the end. PDB files on Windows have an 'age' + // number appended to the end of the file identifier; this isn't + // really used or necessary on other platforms, but be consistent. + id_no_dash += '0'; + return id_no_dash; +} + +// Return the non-directory portion of FILENAME: the portion after the +// last slash, or the whole filename if there are no slashes. +string BaseFileName(const string& filename) { + // Lots of copies! basename's behavior is less than ideal. + char* c_filename = strdup(filename.c_str()); + string base = basename(c_filename); + free(c_filename); + return base; +} + +template <typename ElfClass> +bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, + const string& obj_filename, + const vector<string>& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + typedef typename ElfClass::Ehdr Ehdr; + + unsigned char identifier[16]; + if (!lul ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { + fprintf(stderr, "%s: unable to generate file identifier\n", + obj_filename.c_str()); + return false; + } + + const char* architecture = ElfArchitecture<ElfClass>(elf_header); + if (!architecture) { + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", + obj_filename.c_str(), elf_header->e_machine); + return false; + } + + // Figure out what endianness this file is. + bool big_endian; + if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) return false; + + string name = BaseFileName(obj_filename); + string os = "Linux"; + string id = FormatIdentifier(identifier); + + LoadSymbolsInfo<ElfClass> info(debug_dirs); + if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header, + !debug_dirs.empty(), &info, smap, rx_avma, rx_size, + usu, log)) { + const string debuglink_file = info.debuglink_file(); + if (debuglink_file.empty()) return false; + + // Load debuglink ELF file. + fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); + MmapWrapper debug_map_wrapper; + Ehdr* debug_elf_header = NULL; + if (!LoadELF(debuglink_file, &debug_map_wrapper, + reinterpret_cast<void**>(&debug_elf_header))) + return false; + // Sanity checks to make sure everything matches up. + const char* debug_architecture = + ElfArchitecture<ElfClass>(debug_elf_header); + if (!debug_architecture) { + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", + debuglink_file.c_str(), debug_elf_header->e_machine); + return false; + } + if (strcmp(architecture, debug_architecture)) { + fprintf(stderr, + "%s with ELF machine architecture %s does not match " + "%s with ELF architecture %s\n", + debuglink_file.c_str(), debug_architecture, obj_filename.c_str(), + architecture); + return false; + } + + bool debug_big_endian; + if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian)) + return false; + if (debug_big_endian != big_endian) { + fprintf(stderr, "%s and %s does not match in endianness\n", + obj_filename.c_str(), debuglink_file.c_str()); + return false; + } + + if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian, + debug_elf_header, false, &info, smap, rx_avma, + rx_size, usu, log)) { + return false; + } + } + + return true; +} + +} // namespace + +namespace lul { + +bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename, + const vector<string>& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + if (!IsValidElf(obj_file)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); + return false; + } + + int elfclass = ElfClass(obj_file); + if (elfclass == ELFCLASS32) { + return ReadSymbolDataElfClass<ElfClass32>( + reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, debug_dirs, + smap, rx_avma, rx_size, usu, log); + } + if (elfclass == ELFCLASS64) { + return ReadSymbolDataElfClass<ElfClass64>( + reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, debug_dirs, + smap, rx_avma, rx_size, usu, log); + } + + return false; +} + +bool ReadSymbolData(const string& obj_file, const vector<string>& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, void (*log)(const char*)) { + MmapWrapper map_wrapper; + void* elf_header = NULL; + if (!LoadELF(obj_file, &map_wrapper, &elf_header)) return false; + + return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header), + obj_file, debug_dirs, smap, rx_avma, rx_size, + usu, log); +} + +namespace { + +template <typename ElfClass> +void FindElfClassSection(const char* elf_base, const char* section_name, + typename ElfClass::Word section_type, + const void** section_start, int* section_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Shdr Shdr; + + MOZ_ASSERT(elf_base); + MOZ_ASSERT(section_start); + MOZ_ASSERT(section_size); + + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); + + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); + + const Shdr* sections = + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); + const char* names_end = names + section_names->sh_size; + + const Shdr* section = + FindElfSectionByName<ElfClass>(section_name, section_type, sections, + names, names_end, elf_header->e_shnum); + + if (section != NULL && section->sh_size > 0) { + *section_start = elf_base + section->sh_offset; + *section_size = section->sh_size; + } +} + +template <typename ElfClass> +void FindElfClassSegment(const char* elf_base, + typename ElfClass::Word segment_type, + const void** segment_start, int* segment_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Phdr Phdr; + + MOZ_ASSERT(elf_base); + MOZ_ASSERT(segment_start); + MOZ_ASSERT(segment_size); + + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); + + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); + + const Phdr* phdrs = + GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff); + + for (int i = 0; i < elf_header->e_phnum; ++i) { + if (phdrs[i].p_type == segment_type) { + *segment_start = elf_base + phdrs[i].p_offset; + *segment_size = phdrs[i].p_filesz; + return; + } + } +} + +} // namespace + +bool IsValidElf(const void* elf_base) { + return strncmp(reinterpret_cast<const char*>(elf_base), ELFMAG, SELFMAG) == 0; +} + +int ElfClass(const void* elf_base) { + const ElfW(Ehdr)* elf_header = reinterpret_cast<const ElfW(Ehdr)*>(elf_base); + + return elf_header->e_ident[EI_CLASS]; +} + +bool FindElfSection(const void* elf_mapped_base, const char* section_name, + uint32_t section_type, const void** section_start, + int* section_size, int* elfclass) { + MOZ_ASSERT(elf_mapped_base); + MOZ_ASSERT(section_start); + MOZ_ASSERT(section_size); + + *section_start = NULL; + *section_size = 0; + + if (!IsValidElf(elf_mapped_base)) return false; + + int cls = ElfClass(elf_mapped_base); + if (elfclass) { + *elfclass = cls; + } + + const char* elf_base = static_cast<const char*>(elf_mapped_base); + + if (cls == ELFCLASS32) { + FindElfClassSection<ElfClass32>(elf_base, section_name, section_type, + section_start, section_size); + return *section_start != NULL; + } else if (cls == ELFCLASS64) { + FindElfClassSection<ElfClass64>(elf_base, section_name, section_type, + section_start, section_size); + return *section_start != NULL; + } + + return false; +} + +bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type, + const void** segment_start, int* segment_size, + int* elfclass) { + MOZ_ASSERT(elf_mapped_base); + MOZ_ASSERT(segment_start); + MOZ_ASSERT(segment_size); + + *segment_start = NULL; + *segment_size = 0; + + if (!IsValidElf(elf_mapped_base)) return false; + + int cls = ElfClass(elf_mapped_base); + if (elfclass) { + *elfclass = cls; + } + + const char* elf_base = static_cast<const char*>(elf_mapped_base); + + if (cls == ELFCLASS32) { + FindElfClassSegment<ElfClass32>(elf_base, segment_type, segment_start, + segment_size); + return *segment_start != NULL; + } else if (cls == ELFCLASS64) { + FindElfClassSegment<ElfClass64>(elf_base, segment_type, segment_start, + segment_size); + return *segment_start != NULL; + } + + return false; +} + +// (derived from) +// file_id.cc: Return a unique identifier for a file +// +// See file_id.h for documentation +// + +// ELF note name and desc are 32-bits word padded. +#define NOTE_PADDING(a) ((a + 3) & ~3) + +// These functions are also used inside the crashed process, so be safe +// and use the syscall/libc wrappers instead of direct syscalls or libc. + +template <typename ElfClass> +static bool ElfClassBuildIDNoteIdentifier(const void* section, int length, + uint8_t identifier[kMDGUIDSize]) { + typedef typename ElfClass::Nhdr Nhdr; + + const void* section_end = reinterpret_cast<const char*>(section) + length; + const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section); + while (reinterpret_cast<const void*>(note_header) < section_end) { + if (note_header->n_type == NT_GNU_BUILD_ID) break; + note_header = reinterpret_cast<const Nhdr*>( + reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) + + NOTE_PADDING(note_header->n_namesz) + + NOTE_PADDING(note_header->n_descsz)); + } + if (reinterpret_cast<const void*>(note_header) >= section_end || + note_header->n_descsz == 0) { + return false; + } + + const char* build_id = reinterpret_cast<const char*>(note_header) + + sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); + // Copy as many bits of the build ID as will fit + // into the GUID space. + memset(identifier, 0, kMDGUIDSize); + memcpy(identifier, build_id, + std::min(kMDGUIDSize, (size_t)note_header->n_descsz)); + + return true; +} + +// Attempt to locate a .note.gnu.build-id section in an ELF binary +// and copy as many bytes of it as will fit into |identifier|. +static bool FindElfBuildIDNote(const void* elf_mapped_base, + uint8_t identifier[kMDGUIDSize]) { + void* note_section; + int note_size, elfclass; + if ((!FindElfSegment(elf_mapped_base, PT_NOTE, (const void**)¬e_section, + ¬e_size, &elfclass) || + note_size == 0) && + (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, + (const void**)¬e_section, ¬e_size, &elfclass) || + note_size == 0)) { + return false; + } + + if (elfclass == ELFCLASS32) { + return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size, + identifier); + } else if (elfclass == ELFCLASS64) { + return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size, + identifier); + } + + return false; +} + +// Attempt to locate the .text section of an ELF binary and generate +// a simple hash by XORing the first page worth of bytes into |identifier|. +static bool HashElfTextSection(const void* elf_mapped_base, + uint8_t identifier[kMDGUIDSize]) { + void* text_section; + int text_size; + if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, + (const void**)&text_section, &text_size, NULL) || + text_size == 0) { + return false; + } + + memset(identifier, 0, kMDGUIDSize); + const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section); + const uint8_t* ptr_end = ptr + std::min(text_size, 4096); + while (ptr < ptr_end) { + for (unsigned i = 0; i < kMDGUIDSize; i++) identifier[i] ^= ptr[i]; + ptr += kMDGUIDSize; + } + return true; +} + +// static +bool FileID::ElfFileIdentifierFromMappedFile(const void* base, + uint8_t identifier[kMDGUIDSize]) { + // Look for a build id note first. + if (FindElfBuildIDNote(base, identifier)) return true; + + // Fall back on hashing the first page of the text section. + return HashElfTextSection(base, identifier); +} + +// static +void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], + char* buffer, int buffer_length) { + uint8_t identifier_swapped[kMDGUIDSize]; + + // Endian-ness swap to match dump processor expectation. + memcpy(identifier_swapped, identifier, kMDGUIDSize); + uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped); + *data1 = htonl(*data1); + uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4); + *data2 = htons(*data2); + uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6); + *data3 = htons(*data3); + + int buffer_idx = 0; + for (unsigned int idx = 0; + (buffer_idx < buffer_length) && (idx < kMDGUIDSize); ++idx) { + int hi = (identifier_swapped[idx] >> 4) & 0x0F; + int lo = (identifier_swapped[idx]) & 0x0F; + + if (idx == 4 || idx == 6 || idx == 8 || idx == 10) + buffer[buffer_idx++] = '-'; + + buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; + buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; + } + + // NULL terminate + buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; +} + +} // namespace lul diff --git a/tools/profiler/lul/LulElfExt.h b/tools/profiler/lul/LulElfExt.h new file mode 100644 index 0000000000..73d9ff7f15 --- /dev/null +++ b/tools/profiler/lul/LulElfExt.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2011, 2012 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/linux/dump_symbols.h + +#ifndef LulElfExt_h +#define LulElfExt_h + +// These two functions are the external interface to the +// ELF/Dwarf/EXIDX reader. + +#include "LulMainInt.h" + +using lul::SecMap; + +namespace lul { + +class UniqueStringUniverse; + +// Find all the unwind information in OBJ_FILE, an ELF executable +// or shared library, and add it to SMAP. +bool ReadSymbolData(const std::string& obj_file, + const std::vector<std::string>& debug_dirs, SecMap* smap, + void* rx_avma, size_t rx_size, UniqueStringUniverse* usu, + void (*log)(const char*)); + +// The same as ReadSymbolData, except that OBJ_FILE is assumed to +// point to a mapped-in image of OBJ_FILENAME. +bool ReadSymbolDataInternal(const uint8_t* obj_file, + const std::string& obj_filename, + const std::vector<std::string>& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)); + +} // namespace lul + +#endif // LulElfExt_h diff --git a/tools/profiler/lul/LulElfInt.h b/tools/profiler/lul/LulElfInt.h new file mode 100644 index 0000000000..31ffba8ff0 --- /dev/null +++ b/tools/profiler/lul/LulElfInt.h @@ -0,0 +1,218 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2012, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/android/include/elf.h +// src/common/linux/elfutils.h +// src/common/linux/file_id.h +// src/common/linux/elfutils-inl.h + +#ifndef LulElfInt_h +#define LulElfInt_h + +// This header defines functions etc internal to the ELF reader. It +// should not be included outside of LulElf.cpp. + +#include <elf.h> +#include <stdlib.h> + +#include "mozilla/Assertions.h" + +#include "PlatformMacros.h" + +// (derived from) +// elfutils.h: Utilities for dealing with ELF files. +// +#include <link.h> + +#if defined(GP_OS_android) + +// From toolkit/crashreporter/google-breakpad/src/common/android/include/elf.h +// The Android headers don't always define this constant. +# ifndef EM_X86_64 +# define EM_X86_64 62 +# endif + +# ifndef EM_PPC64 +# define EM_PPC64 21 +# endif + +# ifndef EM_S390 +# define EM_S390 22 +# endif + +# ifndef NT_GNU_BUILD_ID +# define NT_GNU_BUILD_ID 3 +# endif + +# ifndef ElfW +# define ElfW(type) _ElfW(Elf, ELFSIZE, type) +# define _ElfW(e, w, t) _ElfW_1(e, w, _##t) +# define _ElfW_1(e, w, t) e##w##t +# endif + +#endif + +#if defined(GP_OS_freebsd) + +# ifndef ElfW +# define ElfW(type) Elf_##type +# endif + +#endif + +namespace lul { + +// Traits classes so consumers can write templatized code to deal +// with specific ELF bits. +struct ElfClass32 { + typedef Elf32_Addr Addr; + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Nhdr Nhdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Half Half; + typedef Elf32_Off Off; + typedef Elf32_Word Word; + static const int kClass = ELFCLASS32; + static const size_t kAddrSize = sizeof(Elf32_Addr); +}; + +struct ElfClass64 { + typedef Elf64_Addr Addr; + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Nhdr Nhdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Half Half; + typedef Elf64_Off Off; + typedef Elf64_Word Word; + static const int kClass = ELFCLASS64; + static const size_t kAddrSize = sizeof(Elf64_Addr); +}; + +bool IsValidElf(const void* elf_header); +int ElfClass(const void* elf_base); + +// Attempt to find a section named |section_name| of type |section_type| +// in the ELF binary data at |elf_mapped_base|. On success, returns true +// and sets |*section_start| to point to the start of the section data, +// and |*section_size| to the size of the section's data. If |elfclass| +// is not NULL, set |*elfclass| to the ELF file class. +bool FindElfSection(const void* elf_mapped_base, const char* section_name, + uint32_t section_type, const void** section_start, + int* section_size, int* elfclass); + +// Internal helper method, exposed for convenience for callers +// that already have more info. +template <typename ElfClass> +const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, const char* section_names, + const char* names_end, int nsection); + +// Attempt to find the first segment of type |segment_type| in the ELF +// binary data at |elf_mapped_base|. On success, returns true and sets +// |*segment_start| to point to the start of the segment data, and +// and |*segment_size| to the size of the segment's data. If |elfclass| +// is not NULL, set |*elfclass| to the ELF file class. +bool FindElfSegment(const void* elf_mapped_base, uint32_t segment_type, + const void** segment_start, int* segment_size, + int* elfclass); + +// Convert an offset from an Elf header into a pointer to the mapped +// address in the current process. Takes an extra template parameter +// to specify the return type to avoid having to dynamic_cast the +// result. +template <typename ElfClass, typename T> +const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset); + +// (derived from) +// file_id.h: Return a unique identifier for a file +// + +static const size_t kMDGUIDSize = sizeof(MDGUID); + +class FileID { + public: + // Load the identifier for the elf file mapped into memory at |base| into + // |identifier|. Return false if the identifier could not be created for the + // file. + static bool ElfFileIdentifierFromMappedFile(const void* base, + uint8_t identifier[kMDGUIDSize]); + + // Convert the |identifier| data to a NULL terminated string. The string will + // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE). + // The |buffer| should be at least 37 bytes long to receive all of the data + // and termination. Shorter buffers will contain truncated data. + static void ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], + char* buffer, int buffer_length); +}; + +template <typename ElfClass, typename T> +const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset) { + return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) + + offset); +} + +template <typename ElfClass> +const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, const char* section_names, + const char* names_end, int nsection) { + MOZ_ASSERT(name != NULL); + MOZ_ASSERT(sections != NULL); + MOZ_ASSERT(nsection > 0); + + int name_len = strlen(name); + if (name_len == 0) return NULL; + + for (int i = 0; i < nsection; ++i) { + const char* section_name = section_names + sections[i].sh_name; + if (sections[i].sh_type == section_type && + names_end - section_name >= name_len + 1 && + strcmp(name, section_name) == 0) { + return sections + i; + } + } + return NULL; +} + +} // namespace lul + +// And finally, the external interface, offered to LulMain.cpp +#include "LulElfExt.h" + +#endif // LulElfInt_h diff --git a/tools/profiler/lul/LulMain.cpp b/tools/profiler/lul/LulMain.cpp new file mode 100644 index 0000000000..7cf5508234 --- /dev/null +++ b/tools/profiler/lul/LulMain.cpp @@ -0,0 +1,2079 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulMain.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> // write(), only for testing LUL + +#include <algorithm> // std::sort +#include <string> +#include <utility> + +#include "GeckoProfiler.h" // for profiler_current_thread_id() +#include "LulCommonExt.h" +#include "LulElfExt.h" +#include "LulMainInt.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/MemoryChecking.h" +#include "mozilla/Sprintf.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" + +// Set this to 1 for verbose logging +#define DEBUG_MAIN 0 + +namespace lul { + +using mozilla::CheckedInt; +using mozilla::DebugOnly; +using mozilla::MallocSizeOf; +using mozilla::Unused; +using std::pair; +using std::string; +using std::vector; + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT. +// Any such function -- and, hence, the transitive closure of those +// reachable from it -- must not do any dynamic memory allocation. +// Doing so risks deadlock. There is exactly one root function for +// the transitive closure: Lul::Unwind. +// +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +static const char* NameOf_DW_REG(int16_t aReg) { + switch (aReg) { + case DW_REG_CFA: + return "cfa"; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + return "xbp"; + case DW_REG_INTEL_XSP: + return "xsp"; + case DW_REG_INTEL_XIP: + return "xip"; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + return "r7"; + case DW_REG_ARM_R11: + return "r11"; + case DW_REG_ARM_R12: + return "r12"; + case DW_REG_ARM_R13: + return "r13"; + case DW_REG_ARM_R14: + return "r14"; + case DW_REG_ARM_R15: + return "r15"; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return "x29"; + case DW_REG_AARCH64_X30: + return "x30"; + case DW_REG_AARCH64_SP: + return "sp"; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return "sp"; + case DW_REG_MIPS_FP: + return "fp"; + case DW_REG_MIPS_PC: + return "pc"; +#else +# error "Unsupported arch" +#endif + default: + return "???"; + } +} + +string LExpr::ShowRule(const char* aNewReg) const { + char buf[64]; + string res = string(aNewReg) + "="; + switch (mHow) { + case UNKNOWN: + res += "Unknown"; + break; + case NODEREF: + SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case DEREF: + SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case PFXEXPR: + SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset); + res += buf; + break; + default: + res += "???"; + break; + } + return res; +} + +void RuleSet::Print(uintptr_t avma, uintptr_t len, + void (*aLog)(const char*)) const { + char buf[96]; + SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)avma, + (unsigned long long int)(avma + len - 1)); + string res = string(buf); + res += mCfaExpr.ShowRule("cfa"); + res += " in"; + // For each reg we care about, print the recovery expression. +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + res += mXipExpr.ShowRule(" RA"); + res += mXspExpr.ShowRule(" SP"); + res += mXbpExpr.ShowRule(" BP"); +#elif defined(GP_ARCH_arm) + res += mR15expr.ShowRule(" R15"); + res += mR7expr.ShowRule(" R7"); + res += mR11expr.ShowRule(" R11"); + res += mR12expr.ShowRule(" R12"); + res += mR13expr.ShowRule(" R13"); + res += mR14expr.ShowRule(" R14"); +#elif defined(GP_ARCH_arm64) + res += mX29expr.ShowRule(" X29"); + res += mX30expr.ShowRule(" X30"); + res += mSPexpr.ShowRule(" SP"); +#elif defined(GP_ARCH_mips64) + res += mPCexpr.ShowRule(" PC"); + res += mSPexpr.ShowRule(" SP"); + res += mFPexpr.ShowRule(" FP"); +#else +# error "Unsupported arch" +#endif + aLog(res.c_str()); +} + +LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) { + switch (aRegno) { + case DW_REG_CFA: + return &mCfaExpr; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XIP: + return &mXipExpr; + case DW_REG_INTEL_XSP: + return &mXspExpr; + case DW_REG_INTEL_XBP: + return &mXbpExpr; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R15: + return &mR15expr; + case DW_REG_ARM_R14: + return &mR14expr; + case DW_REG_ARM_R13: + return &mR13expr; + case DW_REG_ARM_R12: + return &mR12expr; + case DW_REG_ARM_R11: + return &mR11expr; + case DW_REG_ARM_R7: + return &mR7expr; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return &mX29expr; + case DW_REG_AARCH64_X30: + return &mX30expr; + case DW_REG_AARCH64_SP: + return &mSPexpr; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return &mSPexpr; + case DW_REG_MIPS_FP: + return &mFPexpr; + case DW_REG_MIPS_PC: + return &mPCexpr; +#else +# error "Unknown arch" +#endif + default: + return nullptr; + } +} + +RuleSet::RuleSet() { + // All fields are of type LExpr and so are initialised by LExpr::LExpr(). +} + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// See header file LulMainInt.h for comments about invariants. + +SecMap::SecMap(uintptr_t mapStartAVMA, uint32_t mapLen, + void (*aLog)(const char*)) + : mUsable(false), + mUniqifier(new mozilla::HashMap<RuleSet, uint32_t, RuleSet, + InfallibleAllocPolicy>), + mLog(aLog) { + if (mapLen == 0) { + // Degenerate case. + mMapMinAVMA = 1; + mMapMaxAVMA = 0; + } else { + mMapMinAVMA = mapStartAVMA; + mMapMaxAVMA = mapStartAVMA + (uintptr_t)mapLen - 1; + } +} + +SecMap::~SecMap() { + mExtents.clear(); + mDictionary.clear(); + if (mUniqifier) { + mUniqifier->clear(); + mUniqifier = nullptr; + } +} + +// RUNS IN NO-MALLOC CONTEXT +RuleSet* SecMap::FindRuleSet(uintptr_t ia) { + // Binary search mExtents to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. Note that this works correctly even when + // mExtents.size() == 0. + + // Can't do this until the array has been sorted and preened. + MOZ_ASSERT(mUsable); + + long int lo = 0; + long int hi = (long int)mExtents.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + Extent* mid_extent = &mExtents[mid]; + uintptr_t mid_offset = mid_extent->offset(); + uintptr_t mid_len = mid_extent->len(); + uintptr_t mid_minAddr = mMapMinAVMA + mid_offset; + uintptr_t mid_maxAddr = mid_minAddr + mid_len - 1; + if (ia < mid_minAddr) { + hi = mid - 1; + continue; + } + if (ia > mid_maxAddr) { + lo = mid + 1; + continue; + } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + uint32_t mid_extent_dictIx = mid_extent->dictIx(); + MOZ_RELEASE_ASSERT(mid_extent_dictIx < mExtents.size()); + return &mDictionary[mid_extent_dictIx]; + } + // NOTREACHED +} + +// Add a RuleSet to the collection. The rule is copied in. Calling +// this makes the map non-searchable. +void SecMap::AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len) { + mUsable = false; + + // Zero length RuleSet? Meaningless, but ignore it anyway. + if (len == 0) { + return; + } + + // Ignore attempts to add RuleSets whose address range doesn't fall within + // the declared address range for the SecMap. Maybe we should print some + // kind of error message rather than silently ignoring them. + if (!(avma >= mMapMinAVMA && avma + len - 1 <= mMapMaxAVMA)) { + return; + } + + // Because `mMapStartAVMA` .. `mMapEndAVMA` can specify at most a 2^32-1 byte + // chunk of address space, the following must now hold. + MOZ_RELEASE_ASSERT(len <= (uintptr_t)0xFFFFFFFF); + + // See if `mUniqifier` already has `rs`. If so set `dictIx` to the assigned + // dictionary index; if not, add `rs` to `mUniqifier` and assign a new + // dictionary index. This is the core of the RuleSet-de-duplication process. + uint32_t dictIx = 0; + mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>::AddPtr + p = mUniqifier->lookupForAdd(*rs); + if (!p) { + dictIx = mUniqifier->count(); + // If this ever fails, Extents::dictIx will need to be changed to be a + // type wider than the current uint16_t. + MOZ_RELEASE_ASSERT(dictIx < (1 << 16)); + // This returns `false` on OOM. We ignore the return value since we asked + // for it to use the InfallibleAllocPolicy. + DebugOnly<bool> addedOK = mUniqifier->add(p, *rs, dictIx); + MOZ_ASSERT(addedOK); + } else { + dictIx = p->value(); + } + + uint32_t offset = (uint32_t)(avma - mMapMinAVMA); + while (len > 0) { + // Because Extents::len is a uint16_t, we have to add multiple `mExtents` + // entries to cover the case where `len` is equal to or greater than 2^16. + // This happens only exceedingly rarely. In order to get more test + // coverage on what would otherwise be a very low probability (less than + // 0.0002%) corner case, we do this in steps of 4095. On libxul.so as of + // Sept 2020, this increases the number of `mExtents` entries by about + // 0.05%, hence has no meaningful effect on space use, but increases the + // use of this corner case, and hence its test coverage, by a factor of 250. + uint32_t this_step_len = (len > 4095) ? 4095 : len; + mExtents.emplace_back(offset, this_step_len, dictIx); + offset += this_step_len; + len -= this_step_len; + } +} + +// Add a PfxInstr to the vector of such instrs, and return the index +// in the vector. Calling this makes the map non-searchable. +uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) { + mUsable = false; + mPfxInstrs.push_back(pfxi); + return mPfxInstrs.size() - 1; +} + +// Prepare the map for searching, by sorting it, de-overlapping entries and +// removing any resulting zero-length entries. At the start of this routine, +// all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and not have zero +// length, as a result of the checks in AddRuleSet(). +void SecMap::PrepareRuleSets() { + // At this point, the de-duped RuleSets are in `mUniqifier`, and + // `mDictionary` is empty. This method will, amongst other things, copy + // them into `mDictionary` in order of their assigned dictionary-index + // values, as established by `SecMap::AddRuleSet`, and free `mUniqifier`; + // after this method, it has no further use. + MOZ_RELEASE_ASSERT(mUniqifier); + MOZ_RELEASE_ASSERT(mDictionary.empty()); + + if (mExtents.empty()) { + mUniqifier->clear(); + mUniqifier = nullptr; + return; + } + + if (mMapMinAVMA == 1 && mMapMaxAVMA == 0) { + // The map is empty. This should never happen. + mExtents.clear(); + mUniqifier->clear(); + mUniqifier = nullptr; + return; + } + MOZ_RELEASE_ASSERT(mMapMinAVMA <= mMapMaxAVMA); + + // We must have at least one Extent, and as a consequence there must be at + // least one entry in the uniqifier. + MOZ_RELEASE_ASSERT(!mExtents.empty() && !mUniqifier->empty()); + +#ifdef DEBUG + // Check invariants on incoming Extents. + for (size_t i = 0; i < mExtents.size(); ++i) { + Extent* ext = &mExtents[i]; + uint32_t len = ext->len(); + MOZ_ASSERT(len > 0); + MOZ_ASSERT(len <= 4095 /* per '4095' in AddRuleSet() */); + uint32_t offset = ext->offset(); + uintptr_t avma = mMapMinAVMA + (uintptr_t)offset; + // Upper bounds test. There's no lower bounds test because `offset` is a + // positive displacement from `mMapMinAVMA`, so a small underrun will + // manifest as `len` being close to 2^32. + MOZ_ASSERT(avma + (uintptr_t)len - 1 <= mMapMaxAVMA); + } +#endif + + // Sort by start addresses. + std::sort(mExtents.begin(), mExtents.end(), + [](const Extent& ext1, const Extent& ext2) { + return ext1.offset() < ext2.offset(); + }); + + // Iteratively truncate any overlaps and remove any zero length + // entries that might result, or that may have been present + // initially. Unless the input is seriously screwy, this is + // expected to iterate only once. + while (true) { + size_t i; + size_t n = mExtents.size(); + size_t nZeroLen = 0; + + if (n == 0) { + break; + } + + for (i = 1; i < n; ++i) { + Extent* prev = &mExtents[i - 1]; + Extent* here = &mExtents[i]; + MOZ_ASSERT(prev->offset() <= here->offset()); + if (prev->offset() + prev->len() > here->offset()) { + prev->setLen(here->offset() - prev->offset()); + } + if (prev->len() == 0) { + nZeroLen++; + } + } + + if (mExtents[n - 1].len() == 0) { + nZeroLen++; + } + + // At this point, the entries are in-order and non-overlapping. + // If none of them are zero-length, we are done. + if (nZeroLen == 0) { + break; + } + + // Slide back the entries to remove the zero length ones. + size_t j = 0; // The write-point. + for (i = 0; i < n; ++i) { + if (mExtents[i].len() == 0) { + continue; + } + if (j != i) { + mExtents[j] = mExtents[i]; + } + ++j; + } + MOZ_ASSERT(i == n); + MOZ_ASSERT(nZeroLen <= n); + MOZ_ASSERT(j == n - nZeroLen); + while (nZeroLen > 0) { + mExtents.pop_back(); + nZeroLen--; + } + + MOZ_ASSERT(mExtents.size() == j); + } + + size_t nExtents = mExtents.size(); + +#ifdef DEBUG + // Do a final check on the extents: their address ranges must be + // ascending, non overlapping, non zero sized. + if (nExtents > 0) { + MOZ_ASSERT(mExtents[0].len() > 0); + for (size_t i = 1; i < nExtents; ++i) { + const Extent* prev = &mExtents[i - 1]; + const Extent* here = &mExtents[i]; + MOZ_ASSERT(prev->offset() < here->offset()); + MOZ_ASSERT(here->len() > 0); + MOZ_ASSERT(prev->offset() + prev->len() <= here->offset()); + } + } +#endif + + // Create the final dictionary by enumerating the uniqifier. + size_t nUniques = mUniqifier->count(); + + RuleSet dummy; + mozilla::PodZero(&dummy); + + mDictionary.reserve(nUniques); + for (size_t i = 0; i < nUniques; i++) { + mDictionary.push_back(dummy); + } + + for (auto iter = mUniqifier->iter(); !iter.done(); iter.next()) { + MOZ_RELEASE_ASSERT(iter.get().value() < nUniques); + mDictionary[iter.get().value()] = iter.get().key(); + } + + mUniqifier = nullptr; + + char buf[150]; + SprintfLiteral( + buf, + "PrepareRuleSets: %lu extents, %lu rulesets, " + "avma min/max 0x%llx, 0x%llx\n", + (unsigned long int)nExtents, (unsigned long int)mDictionary.size(), + (unsigned long long int)mMapMinAVMA, (unsigned long long int)mMapMaxAVMA); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Is now usable for binary search. + mUsable = true; + +#if 0 + mLog("\nRulesets after preening\n"); + for (size_t i = 0; i < nExtents; ++i) { + const Extent* extent = &mExtents[i]; + uintptr_t avma = mMapMinAVMA + (uintptr_t)extent->offset(); + mDictionary[extent->dictIx()].Print(avma, extent->len(), mLog); + mLog("\n"); + } + mLog("\n"); +#endif +} + +bool SecMap::IsEmpty() { return mExtents.empty(); } + +size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // It's conceivable that these calls would be unsafe with some + // implementations of std::vector, but it seems to be working for now... + n += aMallocSizeOf(mPfxInstrs.data()); + + if (mUniqifier) { + n += mUniqifier->shallowSizeOfIncludingThis(aMallocSizeOf); + } + n += aMallocSizeOf(mDictionary.data()); + n += aMallocSizeOf(mExtents.data()); + + return n; +} + +//////////////////////////////////////////////////////////////// +// SegArray // +//////////////////////////////////////////////////////////////// + +// A SegArray holds a set of address ranges that together exactly +// cover an address range, with no overlaps or holes. Each range has +// an associated value, which in this case has been specialised to be +// a simple boolean. The representation is kept to minimal canonical +// form in which adjacent ranges with the same associated value are +// merged together. Each range is represented by a |struct Seg|. +// +// SegArrays are used to keep track of which parts of the address +// space are known to contain instructions. +class SegArray { + public: + void add(uintptr_t lo, uintptr_t hi, bool val) { + if (lo > hi) { + return; + } + split_at(lo); + if (hi < UINTPTR_MAX) { + split_at(hi + 1); + } + std::vector<Seg>::size_type iLo, iHi, i; + iLo = find(lo); + iHi = find(hi); + for (i = iLo; i <= iHi; ++i) { + mSegs[i].val = val; + } + preen(); + } + + // RUNS IN NO-MALLOC CONTEXT + bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min, + /*OUT*/ uintptr_t* rx_max, uintptr_t addr) { + std::vector<Seg>::size_type i = find(addr); + if (!mSegs[i].val) { + return false; + } + *rx_min = mSegs[i].lo; + *rx_max = mSegs[i].hi; + return true; + } + + SegArray() { + Seg s(0, UINTPTR_MAX, false); + mSegs.push_back(s); + } + + private: + struct Seg { + Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {} + uintptr_t lo; + uintptr_t hi; + bool val; + }; + + void preen() { + for (std::vector<Seg>::iterator iter = mSegs.begin(); + iter < mSegs.end() - 1; ++iter) { + if (iter[0].val != iter[1].val) { + continue; + } + iter[0].hi = iter[1].hi; + mSegs.erase(iter + 1); + // Back up one, so as not to miss an opportunity to merge + // with the entry after this one. + --iter; + } + } + + // RUNS IN NO-MALLOC CONTEXT + std::vector<Seg>::size_type find(uintptr_t a) { + long int lo = 0; + long int hi = (long int)mSegs.size(); + while (true) { + // The unsearched space is lo .. hi inclusive. + if (lo > hi) { + // Not found. This can't happen. + return (std::vector<Seg>::size_type)(-1); + } + long int mid = lo + ((hi - lo) / 2); + uintptr_t mid_lo = mSegs[mid].lo; + uintptr_t mid_hi = mSegs[mid].hi; + if (a < mid_lo) { + hi = mid - 1; + continue; + } + if (a > mid_hi) { + lo = mid + 1; + continue; + } + return (std::vector<Seg>::size_type)mid; + } + } + + void split_at(uintptr_t a) { + std::vector<Seg>::size_type i = find(a); + if (mSegs[i].lo == a) { + return; + } + mSegs.insert(mSegs.begin() + i + 1, mSegs[i]); + mSegs[i].hi = a - 1; + mSegs[i + 1].lo = a; + } + + void show() { + printf("<< %d entries:\n", (int)mSegs.size()); + for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end(); + ++iter) { + printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo, + (unsigned long long int)(*iter).hi, + (*iter).val ? "true" : "false"); + } + printf(">>\n"); + } + + std::vector<Seg> mSegs; +}; + +//////////////////////////////////////////////////////////////// +// PriMap // +//////////////////////////////////////////////////////////////// + +class PriMap { + public: + explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {} + + // RUNS IN NO-MALLOC CONTEXT + pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) { + SecMap* sm = FindSecMap(ia); + return pair<const RuleSet*, const vector<PfxInstr>*>( + sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr); + } + + // Add a secondary map. No overlaps allowed w.r.t. existing + // secondary maps. + void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) { + // We can't add an empty SecMap to the PriMap. But that's OK + // since we'd never be able to find anything in it anyway. + if (aSecMap->IsEmpty()) { + return; + } + + // Iterate through the SecMaps and find the right place for this + // one. At the same time, ensure that the in-order + // non-overlapping invariant is preserved (and, generally, holds). + // FIXME: this gives a cost that is O(N^2) in the total number of + // shared objects in the system. ToDo: better. + MOZ_ASSERT(aSecMap->mMapMinAVMA <= aSecMap->mMapMaxAVMA); + + size_t num_secMaps = mSecMaps.size(); + uintptr_t i; + for (i = 0; i < num_secMaps; ++i) { + mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i]; + MOZ_ASSERT(sm_i->mMapMinAVMA <= sm_i->mMapMaxAVMA); + if (aSecMap->mMapMinAVMA < sm_i->mMapMaxAVMA) { + // |aSecMap| needs to be inserted immediately before mSecMaps[i]. + break; + } + } + MOZ_ASSERT(i <= num_secMaps); + if (i == num_secMaps) { + // It goes at the end. + mSecMaps.push_back(std::move(aSecMap)); + } else { + std::vector<mozilla::UniquePtr<SecMap>>::iterator iter = + mSecMaps.begin() + i; + mSecMaps.insert(iter, std::move(aSecMap)); + } + char buf[100]; + SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n", + (int)mSecMaps.size()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } + + // Remove and delete any SecMaps in the mapping, that intersect + // with the specified address range. + void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) { + MOZ_ASSERT(avma_min <= avma_max); + size_t num_secMaps = mSecMaps.size(); + if (num_secMaps > 0) { + intptr_t i; + // Iterate from end to start over the vector, so as to ensure + // that the special case where |avma_min| and |avma_max| denote + // the entire address space, can be completed in time proportional + // to the number of elements in the map. + for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) { + mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i]; + if (sm_i->mMapMaxAVMA < avma_min || avma_max < sm_i->mMapMinAVMA) { + // There's no overlap. Move on. + continue; + } + // We need to remove mSecMaps[i] and slide all those above it + // downwards to cover the hole. + mSecMaps.erase(mSecMaps.begin() + i); + } + } + } + + // Return the number of currently contained SecMaps. + size_t CountSecMaps() { return mSecMaps.size(); } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // It's conceivable that this call would be unsafe with some + // implementations of std::vector, but it seems to be working for now... + n += aMallocSizeOf(mSecMaps.data()); + + for (size_t i = 0; i < mSecMaps.size(); i++) { + n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf); + } + + return n; + } + + private: + // RUNS IN NO-MALLOC CONTEXT + SecMap* FindSecMap(uintptr_t ia) { + // Binary search mSecMaps to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. + long int lo = 0; + long int hi = (long int)mSecMaps.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid]; + uintptr_t mid_minAddr = mid_secMap->mMapMinAVMA; + uintptr_t mid_maxAddr = mid_secMap->mMapMaxAVMA; + if (ia < mid_minAddr) { + hi = mid - 1; + continue; + } + if (ia > mid_maxAddr) { + lo = mid + 1; + continue; + } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + return mid_secMap.get(); + } + // NOTREACHED + } + + private: + // sorted array of per-object ranges, non overlapping, non empty + std::vector<mozilla::UniquePtr<SecMap>> mSecMaps; + + // a logging sink, for debugging. + void (*mLog)(const char*); +}; + +//////////////////////////////////////////////////////////////// +// LUL // +//////////////////////////////////////////////////////////////// + +#define LUL_LOG(_str) \ + do { \ + char buf[200]; \ + SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \ + uint64_t(profiler_current_process_id().ToNumber()), \ + uint64_t(profiler_current_thread_id().ToNumber()), this, \ + (_str)); \ + buf[sizeof(buf) - 1] = 0; \ + mLog(buf); \ + } while (0) + +LUL::LUL(void (*aLog)(const char*)) + : mLog(aLog), + mAdminMode(true), + mAdminThreadId(profiler_current_thread_id()), + mPriMap(new PriMap(aLog)), + mSegArray(new SegArray()), + mUSU(new UniqueStringUniverse()) { + LUL_LOG("LUL::LUL: Created object"); +} + +LUL::~LUL() { + LUL_LOG("LUL::~LUL: Destroyed object"); + delete mPriMap; + delete mSegArray; + mLog = nullptr; + delete mUSU; +} + +void LUL::MaybeShowStats() { + // This is racey in the sense that it can't guarantee that + // n_new == n_new_Context + n_new_CFI + n_new_Scanned + // if it should happen that mStats is updated by some other thread + // in between computation of n_new and n_new_{Context,CFI,FP}. + // But it's just stats printing, so we don't really care. + uint32_t n_new = mStats - mStatsPrevious; + if (n_new >= 5000) { + uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext; + uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI; + uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP; + mStatsPrevious = mStats; + char buf[200]; + SprintfLiteral(buf, + "LUL frame stats: TOTAL %5u" + " CTX %4u CFI %4u FP %4u", + n_new, n_new_Context, n_new_CFI, n_new_FP); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } +} + +size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + n += mPriMap->SizeOfIncludingThis(aMallocSizeOf); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mSegArray + // - mUSU + + return n; +} + +void LUL::EnableUnwinding() { + LUL_LOG("LUL::EnableUnwinding"); + // Don't assert for Admin mode here. That is, tolerate a call here + // if we are already in Unwinding mode. + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mAdminMode = false; +} + +void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName, + const void* aMappedImage) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyMap %llx %llu %s\n", + (unsigned long long int)aRXavma, (unsigned long long int)aSize, + aFileName); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // We can't have a SecMap covering more than 2^32-1 bytes of address space. + // See the definition of SecMap for why. Rather than crash the system, just + // limit the SecMap's size accordingly. This case is never actually + // expected to happen. + if (((unsigned long long int)aSize) > 0xFFFFFFFFULL) { + aSize = (uintptr_t)0xFFFFFFFF; + } + MOZ_RELEASE_ASSERT(aSize <= 0xFFFFFFFF); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Here's a new mapping, for this object. + mozilla::UniquePtr<SecMap> smap = + mozilla::MakeUnique<SecMap>(aRXavma, (uint32_t)aSize, mLog); + + // Read CFI or EXIDX unwind data into |smap|. + if (!aMappedImage) { + (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(), + smap.get(), (void*)aRXavma, aSize, mUSU, mLog); + } else { + (void)lul::ReadSymbolDataInternal( + (const uint8_t*)aMappedImage, string(aFileName), + std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog); + } + + mLog("NotifyMap .. preparing entries\n"); + + smap->PrepareRuleSets(); + + SprintfLiteral(buf, "NotifyMap got %lld entries\n", + (long long int)smap->Size()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Add it to the primary map (the top level set of mapped objects). + mPriMap->AddSecMap(std::move(smap)); + + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + +void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n", + (unsigned long long int)aRXavma, + (unsigned long long int)aSize); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + +void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[100]; + SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n", + (unsigned long long int)aRXavmaMin, + (unsigned long long int)aRXavmaMax); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + MOZ_ASSERT(aRXavmaMin <= aRXavmaMax); + + // Remove from the primary map, any secondary maps that intersect + // with the address range. Also delete the secondary maps. + mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax); + + // Tell the segment array that the address range no longer + // contains valid code. + mSegArray->add(aRXavmaMin, aRXavmaMax, false); + + SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n", + (int)mPriMap->CountSecMaps()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +} + +size_t LUL::CountMappings() { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + return mPriMap->CountSecMaps(); +} + +// RUNS IN NO-MALLOC CONTEXT +static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) { + if (!aAddr.Valid()) { + return TaggedUWord(); + } + + // Lower limit check. |aAddr.Value()| is the lowest requested address + // and |aStackImg->mStartAvma| is the lowest address we actually have, + // so the comparison is straightforward. + if (aAddr.Value() < aStackImg->mStartAvma) { + return TaggedUWord(); + } + + // Upper limit check. We must compute the highest requested address + // and the highest address we actually have, but being careful to + // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the + // 3/7 values below that, then we will get overflow. See bug #1245477. + typedef CheckedInt<uintptr_t> CheckedUWord; + CheckedUWord highest_requested_plus_one = + CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t)); + CheckedUWord highest_available_plus_one = + CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen); + if (!highest_requested_plus_one.isValid() // overflow? + || !highest_available_plus_one.isValid() // overflow? + || (highest_requested_plus_one.value() > + highest_available_plus_one.value())) { // in range? + return TaggedUWord(); + } + + return TaggedUWord( + *(uintptr_t*)(&aStackImg + ->mContents[aAddr.Value() - aStackImg->mStartAvma])); +} + +// RUNS IN NO-MALLOC CONTEXT +static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs, + TaggedUWord aCFA) { + switch (aReg) { + case DW_REG_CFA: + return aCFA; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + return aOldRegs->xbp; + case DW_REG_INTEL_XSP: + return aOldRegs->xsp; + case DW_REG_INTEL_XIP: + return aOldRegs->xip; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + return aOldRegs->r7; + case DW_REG_ARM_R11: + return aOldRegs->r11; + case DW_REG_ARM_R12: + return aOldRegs->r12; + case DW_REG_ARM_R13: + return aOldRegs->r13; + case DW_REG_ARM_R14: + return aOldRegs->r14; + case DW_REG_ARM_R15: + return aOldRegs->r15; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return aOldRegs->x29; + case DW_REG_AARCH64_X30: + return aOldRegs->x30; + case DW_REG_AARCH64_SP: + return aOldRegs->sp; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return aOldRegs->sp; + case DW_REG_MIPS_FP: + return aOldRegs->fp; + case DW_REG_MIPS_PC: + return aOldRegs->pc; +#else +# error "Unsupported arch" +#endif + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +// See prototype for comment. +TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>& aPfxInstrs) { + // A small evaluation stack, and a stack pointer, which points to + // the highest numbered in-use element. + const int N_STACK = 10; + TaggedUWord stack[N_STACK]; + int stackPointer = -1; + for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord(); + +#define PUSH(_tuw) \ + do { \ + if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \ + stack[++stackPointer] = (_tuw); \ + } while (0) + +#define POP(_lval) \ + do { \ + if (stackPointer < 0) goto fail; /* underflow */ \ + _lval = stack[stackPointer--]; \ + } while (0) + + // Cursor in the instruction sequence. + size_t curr = start + 1; + + // Check the start point is sane. + size_t nInstrs = aPfxInstrs.size(); + if (start < 0 || (size_t)start >= nInstrs) goto fail; + + { + // The instruction sequence must start with PX_Start. If not, + // something is seriously wrong. + PfxInstr first = aPfxInstrs[start]; + if (first.mOpcode != PX_Start) goto fail; + + // Push the CFA on the stack to start with (or not), as required by + // the original DW_OP_*expression* CFI. + if (first.mOperand != 0) PUSH(aCFA); + } + + while (true) { + if (curr >= nInstrs) goto fail; // ran off the end of the sequence + + PfxInstr pfxi = aPfxInstrs[curr++]; + if (pfxi.mOpcode == PX_End) break; // we're done + + switch (pfxi.mOpcode) { + case PX_Start: + // This should appear only at the start of the sequence. + goto fail; + case PX_End: + // We just took care of that, so we shouldn't see it again. + MOZ_ASSERT(0); + goto fail; + case PX_SImm32: + PUSH(TaggedUWord((intptr_t)pfxi.mOperand)); + break; + case PX_DwReg: { + DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand; + MOZ_ASSERT(reg != DW_REG_CFA); + PUSH(EvaluateReg(reg, aOldRegs, aCFA)); + break; + } + case PX_Deref: { + TaggedUWord addr; + POP(addr); + PUSH(DerefTUW(addr, aStackImg)); + break; + } + case PX_Add: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y + x); + break; + } + case PX_Sub: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y - x); + break; + } + case PX_And: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y & x); + break; + } + case PX_Or: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y | x); + break; + } + case PX_CmpGES: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y.CmpGEs(x)); + break; + } + case PX_Shl: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y << x); + break; + } + default: + MOZ_ASSERT(0); + goto fail; + } + } // while (true) + + // Evaluation finished. The top value on the stack is the result. + if (stackPointer >= 0) { + return stack[stackPointer]; + } + // Else fall through + +fail: + return TaggedUWord(); + +#undef PUSH +#undef POP +} + +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA, + const StackImage* aStackImg, + const vector<PfxInstr>* aPfxInstrs) const { + switch (mHow) { + case UNKNOWN: + return TaggedUWord(); + case NODEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return tuw; + } + case DEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return DerefTUW(tuw, aStackImg); + } + case PFXEXPR: { + MOZ_ASSERT(aPfxInstrs); + if (!aPfxInstrs) { + return TaggedUWord(); + } + return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs); + } + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg, + const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) { + // Take a copy of regs, since we'll need to refer to the old values + // whilst computing the new ones. + UnwindRegs old_regs = *aRegs; + + // Mark all the current register values as invalid, so that the + // caller can see, on our return, which ones have been computed + // anew. If we don't even manage to compute a new PC value, then + // the caller will have to abandon the unwind. + // FIXME: Create and use instead: aRegs->SetAllInvalid(); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + aRegs->xbp = TaggedUWord(); + aRegs->xsp = TaggedUWord(); + aRegs->xip = TaggedUWord(); +#elif defined(GP_ARCH_arm) + aRegs->r7 = TaggedUWord(); + aRegs->r11 = TaggedUWord(); + aRegs->r12 = TaggedUWord(); + aRegs->r13 = TaggedUWord(); + aRegs->r14 = TaggedUWord(); + aRegs->r15 = TaggedUWord(); +#elif defined(GP_ARCH_arm64) + aRegs->x29 = TaggedUWord(); + aRegs->x30 = TaggedUWord(); + aRegs->sp = TaggedUWord(); + aRegs->pc = TaggedUWord(); +#elif defined(GP_ARCH_mips64) + aRegs->sp = TaggedUWord(); + aRegs->fp = TaggedUWord(); + aRegs->pc = TaggedUWord(); +#else +# error "Unsupported arch" +#endif + + // This is generally useful. + const TaggedUWord inval = TaggedUWord(); + + // First, compute the CFA. + TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/, + aStackImg, aPfxInstrs); + + // If we didn't manage to compute the CFA, well .. that's ungood, + // but keep going anyway. It'll be OK provided none of the register + // value rules mention the CFA. In any case, compute the new values + // for each register that we're tracking. + +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + aRegs->xbp = + aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xsp = + aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xip = + aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_arm) + aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r11 = + aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r12 = + aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r13 = + aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r14 = + aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r15 = + aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_arm64) + aRegs->x29 = + aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->x30 = + aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_mips64) + aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#else +# error "Unsupported arch" +#endif + + // We're done. Any regs for which we didn't manage to compute a + // new value will now be marked as invalid. +} + +// RUNS IN NO-MALLOC CONTEXT +void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs, + /*OUT*/ uintptr_t* aFrameSPs, + /*OUT*/ size_t* aFramesUsed, + /*OUT*/ size_t* aFramePointerFramesAcquired, + size_t aFramesAvail, UnwindRegs* aStartRegs, + StackImage* aStackImg) { + MOZ_RELEASE_ASSERT(!mAdminMode); + + ///////////////////////////////////////////////////////// + // BEGIN UNWIND + + *aFramesUsed = 0; + + UnwindRegs regs = *aStartRegs; + TaggedUWord last_valid_sp = TaggedUWord(); + + while (true) { + if (DEBUG_MAIN) { + char buf[300]; + mLog("\n"); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + SprintfLiteral( + buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n", + (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(), + (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(), + (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_arm) + SprintfLiteral( + buf, + "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx" + " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n", + (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(), + (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(), + (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(), + (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(), + (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(), + (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_arm64) + SprintfLiteral( + buf, + "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx" + " sp %d/%llx\n", + (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(), + (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(), + (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(), + (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_mips64) + SprintfLiteral( + buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n", + (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(), + (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(), + (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#else +# error "Unsupported arch" +#endif + } + +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + TaggedUWord ia = regs.xip; + TaggedUWord sp = regs.xsp; +#elif defined(GP_ARCH_arm) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14); + TaggedUWord sp = regs.r13; +#elif defined(GP_ARCH_arm64) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30); + TaggedUWord sp = regs.sp; +#elif defined(GP_ARCH_mips64) + TaggedUWord ia = regs.pc; + TaggedUWord sp = regs.sp; +#else +# error "Unsupported arch" +#endif + + if (*aFramesUsed >= aFramesAvail) { + break; + } + + // If we don't have a valid value for the PC, give up. + if (!ia.Valid()) { + break; + } + + // If this is the innermost frame, record the SP value, which + // presumably is valid. If this isn't the innermost frame, and we + // have a valid SP value, check that its SP value isn't less that + // the one we've seen so far, so as to catch potential SP value + // cycles. + if (*aFramesUsed == 0) { + last_valid_sp = sp; + } else { + MOZ_ASSERT(last_valid_sp.Valid()); + if (sp.Valid()) { + if (sp.Value() < last_valid_sp.Value()) { + // Hmm, SP going in the wrong direction. Let's stop. + break; + } + // Remember where we got to. + last_valid_sp = sp; + } + } + + aFramePCs[*aFramesUsed] = ia.Value(); + aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0; + (*aFramesUsed)++; + + // Find the RuleSet for the current IA, if any. This will also + // query the backing (secondary) maps if it isn't found in the + // thread-local cache. + + // If this isn't the innermost frame, back up into the calling insn. + if (*aFramesUsed > 1) { + ia = ia + TaggedUWord((uintptr_t)(-1)); + } + + pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs = + mPriMap->Lookup(ia.Value()); + const RuleSet* ruleset = ruleset_and_pfxinstrs.first; + const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second; + + if (DEBUG_MAIN) { + char buf[100]; + SprintfLiteral(buf, "ruleset for 0x%llx = %p\n", + (unsigned long long int)ia.Value(), ruleset); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } + +#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux) + ///////////////////////////////////////////// + //// + // On 32 bit x86-linux, syscalls are often done via the VDSO + // function __kernel_vsyscall, which doesn't have a corresponding + // object that we can read debuginfo from. That effectively kills + // off all stack traces for threads blocked in syscalls. Hence + // special-case by looking at the code surrounding the program + // counter. + // + // 0xf7757420 <__kernel_vsyscall+0>: push %ecx + // 0xf7757421 <__kernel_vsyscall+1>: push %edx + // 0xf7757422 <__kernel_vsyscall+2>: push %ebp + // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp + // 0xf7757425 <__kernel_vsyscall+5>: sysenter + // 0xf7757427 <__kernel_vsyscall+7>: nop + // 0xf7757428 <__kernel_vsyscall+8>: nop + // 0xf7757429 <__kernel_vsyscall+9>: nop + // 0xf775742a <__kernel_vsyscall+10>: nop + // 0xf775742b <__kernel_vsyscall+11>: nop + // 0xf775742c <__kernel_vsyscall+12>: nop + // 0xf775742d <__kernel_vsyscall+13>: nop + // 0xf775742e <__kernel_vsyscall+14>: int $0x80 + // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp + // 0xf7757431 <__kernel_vsyscall+17>: pop %edx + // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx + // 0xf7757433 <__kernel_vsyscall+19>: ret + // + // In cases where the sampled thread is blocked in a syscall, its + // program counter will point at "pop %ebp". Hence we look for + // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and + // the corresponding register-recovery actions are: + // new_ebp = *(old_esp + 0) + // new eip = *(old_esp + 12) + // new_esp = old_esp + 16 + // + // It may also be the case that the program counter points two + // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in + // the case where the syscall has been restarted but the thread + // hasn't been rescheduled. The code below doesn't handle that; + // it could easily be made to. + // + if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) { + uintptr_t insns_min, insns_max; + uintptr_t eip = ia.Value(); + bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip); + if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) { + uint8_t* eipC = (uint8_t*)eip; + if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D && + eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) { + TaggedUWord sp_plus_0 = sp; + TaggedUWord sp_plus_12 = sp; + TaggedUWord sp_plus_16 = sp; + sp_plus_12 = sp_plus_12 + TaggedUWord(12); + sp_plus_16 = sp_plus_16 + TaggedUWord(16); + TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg); + TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg); + TaggedUWord new_esp = sp_plus_16; + if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) { + regs.xbp = new_ebp; + regs.xip = new_eip; + regs.xsp = new_esp; + continue; + } + } + } + } + //// + ///////////////////////////////////////////// +#endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux) + + // So, do we have a ruleset for this address? If so, use it now. + if (ruleset) { + if (DEBUG_MAIN) { + ruleset->Print(ia.Value(), 1 /*bogus, but doesn't matter*/, mLog); + mLog("\n"); + } + // Use the RuleSet to compute the registers for the previous + // frame. |regs| is modified in-place. + UseRuleSet(®s, aStackImg, ruleset, pfxinstrs); + continue; + } + +#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ + defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ + defined(GP_PLAT_amd64_freebsd) + // There's no RuleSet for the specified address. On amd64/x86_linux, see if + // it's possible to recover the caller's frame by using the frame pointer. + + // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image), + // and assume the following layout: + // + // <--- new_SP + // +----------+ + // | new_IP | (return address) + // +----------+ + // | new_BP | <--- old_BP + // +----------+ + // | .... | + // | .... | + // | .... | + // +----------+ <---- old_SP (arbitrary, but must be <= old_BP) + + const size_t wordSzB = sizeof(uintptr_t); + TaggedUWord old_xsp = regs.xsp; + + // points at new_BP ? + TaggedUWord old_xbp = regs.xbp; + // points at new_IP ? + TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB); + // is the new_SP ? + TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB); + + if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() && + old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) { + // We don't need to do any range, alignment or validity checks for + // addresses passed to DerefTUW, since that performs them itself, and + // returns an invalid value on failure. Any such value will poison + // subsequent uses, and we do a final check for validity before putting + // the computed values into |regs|. + TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg); + if (new_xbp.Valid() && new_xbp.IsAligned() && + old_xbp.Value() < new_xbp.Value()) { + TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg); + TaggedUWord new_xsp = old_xbp_plus2; + if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) { + regs.xbp = new_xbp; + regs.xip = new_xip; + regs.xsp = new_xsp; + (*aFramePointerFramesAcquired)++; + continue; + } + } + } +#elif defined(GP_ARCH_arm64) + // Here is an example of generated code for prologue and epilogue.. + // + // stp x29, x30, [sp, #-16]! + // mov x29, sp + // ... + // ldp x29, x30, [sp], #16 + // ret + // + // Next is another example of generated code. + // + // stp x20, x19, [sp, #-32]! + // stp x29, x30, [sp, #16] + // add x29, sp, #0x10 + // ... + // ldp x29, x30, [sp, #16] + // ldp x20, x19, [sp], #32 + // ret + // + // Previous x29 and x30 register are stored in the address of x29 register. + // But since sp register value depends on local variables, we cannot compute + // previous sp register from current sp/fp/lr register and there is no + // regular rule for sp register in prologue. But since return address is lr + // register, if x29 is valid, we will get return address without sp + // register. + // + // So we assume the following layout that if no rule set. x29 is frame + // pointer, so we will be able to compute x29 and x30 . + // + // +----------+ <--- new_sp (cannot compute) + // | .... | + // +----------+ + // | new_lr | (return address) + // +----------+ + // | new_fp | <--- old_fp + // +----------+ + // | .... | + // | .... | + // +----------+ <---- old_sp (arbitrary, but unused) + + TaggedUWord old_fp = regs.x29; + if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() && + last_valid_sp.Value() <= old_fp.Value()) { + TaggedUWord new_fp = DerefTUW(old_fp, aStackImg); + if (new_fp.Valid() && new_fp.IsAligned() && + old_fp.Value() < new_fp.Value()) { + TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8); + TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg); + if (new_lr.Valid()) { + regs.x29 = new_fp; + regs.x30 = new_lr; + // When using frame pointer to walk stack, we cannot compute sp + // register since we cannot compute sp register from fp/lr/sp + // register, and there is no regular rule to compute previous sp + // register. So mark as invalid. + regs.sp = TaggedUWord(); + (*aFramePointerFramesAcquired)++; + continue; + } + } + } +#endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || + // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || + // defined(GP_PLAT_amd64_freebsd) + + // We failed to recover a frame either using CFI or FP chasing, and we + // have no other ways to recover the frame. So we have to give up. + break; + + } // top level unwind loop + + // END UNWIND + ///////////////////////////////////////////////////////// +} + +//////////////////////////////////////////////////////////////// +// LUL Unit Testing // +//////////////////////////////////////////////////////////////// + +static const int LUL_UNIT_TEST_STACK_SIZE = 32768; + +#if defined(GP_ARCH_mips64) +static __attribute__((noinline)) unsigned long __getpc(void) { + unsigned long rtaddr; + __asm__ volatile("move %0, $31" : "=r"(rtaddr)); + return rtaddr; +} +#endif + +// This function is innermost in the test call sequence. It uses LUL +// to unwind, and compares the result with the sequence specified in +// the director string. These need to agree in order for the test to +// pass. In order not to screw up the results, this function needs +// to have a not-very big stack frame, since we're only presenting +// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and +// that chunk unavoidably includes the frame for this function. +// +// This function must not be inlined into its callers. Doing so will +// cause the expected-vs-actual backtrace consistency checking to +// fail. Prints summary results to |aLUL|'s logging sink and also +// returns a boolean indicating whether or not the test failed. +static __attribute__((noinline)) bool GetAndCheckStackTrace( + LUL* aLUL, const char* dstring) { + // Get hold of the current unwind-start registers. + UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); +#if defined(GP_ARCH_amd64) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "leaq 0(%%rip), %%r15" + "\n\t" + "movq %%r15, 0(%0)" + "\n\t" + "movq %%rsp, 8(%0)" + "\n\t" + "movq %%rbp, 16(%0)" + "\n" + : + : "r"(&block[0]) + : "memory", "r15"); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 128; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 12); + __asm__ __volatile__( + ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/ + "\n\t" + "popl %%edi" + "\n\t" + "movl %%edi, 0(%0)" + "\n\t" + "movl %%esp, 4(%0)" + "\n\t" + "movl %%ebp, 8(%0)" + "\n" + : + : "r"(&block[0]) + : "memory", "edi"); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + volatile uintptr_t block[6]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "mov r0, r15" + "\n\t" + "str r0, [%0, #0]" + "\n\t" + "str r14, [%0, #4]" + "\n\t" + "str r13, [%0, #8]" + "\n\t" + "str r12, [%0, #12]" + "\n\t" + "str r11, [%0, #16]" + "\n\t" + "str r7, [%0, #20]" + "\n" + : + : "r"(&block[0]) + : "memory", "r0"); + startRegs.r15 = TaggedUWord(block[0]); + startRegs.r14 = TaggedUWord(block[1]); + startRegs.r13 = TaggedUWord(block[2]); + startRegs.r12 = TaggedUWord(block[3]); + startRegs.r11 = TaggedUWord(block[4]); + startRegs.r7 = TaggedUWord(block[5]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_ARCH_arm64) + volatile uintptr_t block[4]; + MOZ_ASSERT(sizeof(block) == 32); + __asm__ __volatile__( + "adr x0, . \n\t" + "str x0, [%0, #0] \n\t" + "str x29, [%0, #8] \n\t" + "str x30, [%0, #16] \n\t" + "mov x0, sp \n\t" + "str x0, [%0, #24] \n\t" + : + : "r"(&block[0]) + : "memory", "x0"); + startRegs.pc = TaggedUWord(block[0]); + startRegs.x29 = TaggedUWord(block[1]); + startRegs.x30 = TaggedUWord(block[2]); + startRegs.sp = TaggedUWord(block[3]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_ARCH_mips64) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "sd $29, 8(%0) \n" + "sd $30, 16(%0) \n" + : + : "r"(block) + : "memory"); + block[0] = __getpc(); + startRegs.pc = TaggedUWord(block[0]); + startRegs.sp = TaggedUWord(block[1]); + startRegs.fp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#else +# error "Unsupported platform" +#endif + + // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the + // stack. + uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE; + uintptr_t ws = sizeof(void*); + start &= ~(ws - 1); + end &= ~(ws - 1); + uintptr_t nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) { + nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + StackImage* stackImg = new StackImage(); + stackImg->mLen = nToCopy; + stackImg->mStartAvma = start; + if (nToCopy > 0) { + MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy); + memcpy(&stackImg->mContents[0], (void*)start, nToCopy); + } + + // Unwind it. + const int MAX_TEST_FRAMES = 64; + uintptr_t framePCs[MAX_TEST_FRAMES]; + uintptr_t frameSPs[MAX_TEST_FRAMES]; + size_t framesAvail = mozilla::ArrayLength(framePCs); + size_t framesUsed = 0; + size_t framePointerFramesAcquired = 0; + aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed, + &framePointerFramesAcquired, framesAvail, &startRegs, stackImg); + + delete stackImg; + + // if (0) { + // // Show what we have. + // fprintf(stderr, "Got %d frames:\n", (int)framesUsed); + // for (size_t i = 0; i < framesUsed; i++) { + // fprintf(stderr, " [%2d] SP %p PC %p\n", + // (int)i, (void*)frameSPs[i], (void*)framePCs[i]); + // } + // fprintf(stderr, "\n"); + //} + + // Check to see if there's a consistent binding between digits in + // the director string ('1' .. '8') and the PC values acquired by + // the unwind. If there isn't, the unwinding has failed somehow. + uintptr_t binding[8]; // binding for '1' .. binding for '8' + memset((void*)binding, 0, sizeof(binding)); + + // The general plan is to work backwards along the director string + // and forwards along the framePCs array. Doing so corresponds to + // working outwards from the innermost frame of the recursive test set. + const char* cursor = dstring; + + // Find the end. This leaves |cursor| two bytes past the first + // character we want to look at -- see comment below. + while (*cursor) cursor++; + + // Counts the number of consistent frames. + size_t nConsistent = 0; + + // Iterate back to the start of the director string. The starting + // points are a bit complex. We can't use framePCs[0] because that + // contains the PC in this frame (above). We can't use framePCs[1] + // because that will contain the PC at return point in the recursive + // test group (TestFn[1-8]) for their call "out" to this function, + // GetAndCheckStackTrace. Although LUL will compute a correct + // return address, that will not be the same return address as for a + // recursive call out of the the function to another function in the + // group. Hence we can only start consistency checking at + // framePCs[2]. + // + // To be consistent, then, we must ignore the last element in the + // director string as that corresponds to framePCs[1]. Hence the + // start points are: framePCs[2] and the director string 2 bytes + // before the terminating zero. + // + // Also as a result of this, the number of consistent frames counted + // will always be one less than the length of the director string + // (not including its terminating zero). + size_t frameIx; + for (cursor = cursor - 2, frameIx = 2; + cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) { + char c = *cursor; + uintptr_t pc = framePCs[frameIx]; + // If this doesn't hold, the director string is ill-formed. + MOZ_ASSERT(c >= '1' && c <= '8'); + int n = ((int)c) - ((int)'1'); + if (binding[n] == 0) { + // There's no binding for |c| yet, so install |pc| and carry on. + binding[n] = pc; + nConsistent++; + continue; + } + // There's a pre-existing binding for |c|. Check it's consistent. + if (binding[n] != pc) { + // Not consistent. Give up now. + break; + } + // Consistent. Keep going. + nConsistent++; + } + + // So, did we succeed? + bool passed = nConsistent + 1 == strlen(dstring); + + // Show the results. + char buf[200]; + SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring); + buf[sizeof(buf) - 1] = 0; + aLUL->mLog(buf); + SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n", + (int)nConsistent, (int)strlen(dstring), + passed ? "PASS" : "FAIL"); + buf[sizeof(buf) - 1] = 0; + aLUL->mLog(buf); + + return !passed; +} + +// Macro magic to create a set of 8 mutually recursive functions with +// varying frame sizes. These will recurse amongst themselves as +// specified by |strP|, the directory string, and call +// GetAndCheckStackTrace when the string becomes empty, passing it the +// original value of the string. This checks the result, printing +// results on |aLUL|'s logging sink, and also returns a boolean +// indicating whether or not the results are acceptable (correct). + +#define DECL_TEST_FN(NAME) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP); + +#define GEN_TEST_FN(NAME, FRAMESIZE) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \ + /* Create a frame of size (at least) FRAMESIZE, so that the */ \ + /* 8 functions created by this macro offer some variation in frame */ \ + /* sizes. This isn't as simple as it might seem, since a clever */ \ + /* optimizing compiler (eg, clang-5) detects that the array is unused */ \ + /* and removes it. We try to defeat this by passing it to a function */ \ + /* in a different compilation unit, and hoping that clang does not */ \ + /* notice that the call is a no-op. */ \ + char space[FRAMESIZE]; \ + Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \ + \ + if (*strP == '\0') { \ + /* We've come to the end of the director string. */ \ + /* Take a stack snapshot. */ \ + /* We purposefully use a negation to avoid tail-call optimization */ \ + return !GetAndCheckStackTrace(aLUL, strPorig); \ + } else { \ + /* Recurse onwards. This is a bit subtle. The obvious */ \ + /* thing to do here is call onwards directly, from within the */ \ + /* arms of the case statement. That gives a problem in that */ \ + /* there will be multiple return points inside each function when */ \ + /* unwinding, so it will be difficult to check for consistency */ \ + /* against the director string. Instead, we make an indirect */ \ + /* call, so as to guarantee that there is only one call site */ \ + /* within each function. This does assume that the compiler */ \ + /* won't transform it back to the simple direct-call form. */ \ + /* To discourage it from doing so, the call is bracketed with */ \ + /* __asm__ __volatile__ sections so as to make it not-movable. */ \ + bool (*nextFn)(LUL*, const char*, const char*) = NULL; \ + switch (*strP) { \ + case '1': \ + nextFn = TestFn1; \ + break; \ + case '2': \ + nextFn = TestFn2; \ + break; \ + case '3': \ + nextFn = TestFn3; \ + break; \ + case '4': \ + nextFn = TestFn4; \ + break; \ + case '5': \ + nextFn = TestFn5; \ + break; \ + case '6': \ + nextFn = TestFn6; \ + break; \ + case '7': \ + nextFn = TestFn7; \ + break; \ + case '8': \ + nextFn = TestFn8; \ + break; \ + default: \ + nextFn = TestFn8; \ + break; \ + } \ + /* "use" |space| immediately after the recursive call, */ \ + /* so as to dissuade clang from deallocating the space while */ \ + /* the call is active, or otherwise messing with the stack frame. */ \ + __asm__ __volatile__("" ::: "cc", "memory"); \ + bool passed = nextFn(aLUL, strPorig, strP + 1); \ + Unused << write(1, space, 0); \ + __asm__ __volatile__("" ::: "cc", "memory"); \ + return passed; \ + } \ + } + +// The test functions are mutually recursive, so it is necessary to +// declare them before defining them. +DECL_TEST_FN(TestFn1) +DECL_TEST_FN(TestFn2) +DECL_TEST_FN(TestFn3) +DECL_TEST_FN(TestFn4) +DECL_TEST_FN(TestFn5) +DECL_TEST_FN(TestFn6) +DECL_TEST_FN(TestFn7) +DECL_TEST_FN(TestFn8) + +GEN_TEST_FN(TestFn1, 123) +GEN_TEST_FN(TestFn2, 456) +GEN_TEST_FN(TestFn3, 789) +GEN_TEST_FN(TestFn4, 23) +GEN_TEST_FN(TestFn5, 47) +GEN_TEST_FN(TestFn6, 117) +GEN_TEST_FN(TestFn7, 1) +GEN_TEST_FN(TestFn8, 99) + +// This starts the test sequence going. Call here to generate a +// sequence of calls as directed by the string |dstring|. The call +// sequence will, from its innermost frame, finish by calling +// GetAndCheckStackTrace() and passing it |dstring|. +// GetAndCheckStackTrace() will unwind the stack, check consistency +// of those results against |dstring|, and print a pass/fail message +// to aLUL's logging sink. It also updates the counters in *aNTests +// and aNTestsPassed. +__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests, + /*OUT*/ int* aNTestsPassed, LUL* aLUL, + const char* dstring) { + // Ensure that the stack has at least this much space on it. This + // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes + // and hand it to LUL. Safe in the sense that no segfault can + // happen because the stack is at least this big. This is all + // somewhat dubious in the sense that a sufficiently clever compiler + // (clang, for one) can figure out that space[] is unused and delete + // it from the frame. Hence the somewhat elaborate hoop jumping to + // fill it up before the call and to at least appear to use the + // value afterwards. + int i; + volatile char space[LUL_UNIT_TEST_STACK_SIZE]; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + space[i] = (char)(i & 0x7F); + } + + // Really run the test. + bool passed = TestFn1(aLUL, dstring, dstring); + + // Appear to use space[], by visiting the value to compute some kind + // of checksum, and then (apparently) using the checksum. + int sum = 0; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + // If this doesn't fool LLVM, I don't know what will. + sum += space[i] - 3 * i; + } + __asm__ __volatile__("" : : "r"(sum)); + + // Update the counters. + (*aNTests)++; + if (passed) { + (*aNTestsPassed)++; + } +} + +void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed, + LUL* aLUL) { + aLUL->mLog(":\n"); + aLUL->mLog("LULUnitTest: BEGIN\n"); + *aNTests = *aNTestsPassed = 0; + TestUnw(aNTests, aNTestsPassed, aLUL, "11111111"); + TestUnw(aNTests, aNTestsPassed, aLUL, "11222211"); + TestUnw(aNTests, aNTestsPassed, aLUL, "111222333"); + TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212"); + TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258"); + TestUnw(aNTests, aNTestsPassed, aLUL, + "123456781122334455667788777777777777777777777"); + aLUL->mLog("LULUnitTest: END\n"); + aLUL->mLog(":\n"); +} + +} // namespace lul diff --git a/tools/profiler/lul/LulMain.h b/tools/profiler/lul/LulMain.h new file mode 100644 index 0000000000..d386bd5c4f --- /dev/null +++ b/tools/profiler/lul/LulMain.h @@ -0,0 +1,378 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMain_h +#define LulMain_h + +#include "PlatformMacros.h" +#include "mozilla/Atomics.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/ProfilerUtils.h" + +// LUL: A Lightweight Unwind Library. +// This file provides the end-user (external) interface for LUL. + +// Some comments about naming in the implementation. These are safe +// to ignore if you are merely using LUL, but are important if you +// hack on its internals. +// +// Debuginfo readers in general have tended to use the word "address" +// to mean several different things. This sometimes makes them +// difficult to understand and maintain. LUL tries hard to avoid +// using the word "address" and instead uses the following more +// precise terms: +// +// * SVMA ("Stated Virtual Memory Address"): this is an address of a +// symbol (etc) as it is stated in the symbol table, or other +// metadata, of an object. Such values are typically small and +// start from zero or thereabouts, unless the object has been +// prelinked. +// +// * AVMA ("Actual Virtual Memory Address"): this is the address of a +// symbol (etc) in a running process, that is, once the associated +// object has been mapped into a process. Such values are typically +// much larger than SVMAs, since objects can get mapped arbitrarily +// far along the address space. +// +// * "Bias": the difference between AVMA and SVMA for a given symbol +// (specifically, AVMA - SVMA). The bias is always an integral +// number of pages. Once we know the bias for a given object's +// text section (for example), we can compute the AVMAs of all of +// its text symbols by adding the bias to their SVMAs. +// +// * "Image address": typically, to read debuginfo from an object we +// will temporarily mmap in the file so as to read symbol tables +// etc. Addresses in this temporary mapping are called "Image +// addresses". Note that the temporary mapping is entirely +// unrelated to the mappings of the file that the dynamic linker +// must perform merely in order to get the program to run. Hence +// image addresses are unrelated to either SVMAs or AVMAs. + +namespace lul { + +// A machine word plus validity tag. +class TaggedUWord { + public: + // RUNS IN NO-MALLOC CONTEXT + // Construct a valid one. + explicit TaggedUWord(uintptr_t w) : mValue(w), mValid(true) {} + + // RUNS IN NO-MALLOC CONTEXT + // Construct an invalid one. + TaggedUWord() : mValue(0), mValid(false) {} + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator+(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() + rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator-(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() - rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator&(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() & rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator|(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() | rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord CmpGEs(TaggedUWord rhs) const { + if (Valid() && rhs.Valid()) { + intptr_t s1 = (intptr_t)Value(); + intptr_t s2 = (intptr_t)rhs.Value(); + return TaggedUWord(s1 >= s2 ? 1 : 0); + } + return TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator<<(TaggedUWord rhs) const { + if (Valid() && rhs.Valid()) { + uintptr_t shift = rhs.Value(); + if (shift < 8 * sizeof(uintptr_t)) return TaggedUWord(Value() << shift); + } + return TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + // Is equal? Note: non-validity on either side gives non-equality. + bool operator==(TaggedUWord other) const { + return (mValid && other.Valid()) ? (mValue == other.Value()) : false; + } + + // RUNS IN NO-MALLOC CONTEXT + // Is it word-aligned? + bool IsAligned() const { + return mValid && (mValue & (sizeof(uintptr_t) - 1)) == 0; + } + + // RUNS IN NO-MALLOC CONTEXT + uintptr_t Value() const { return mValue; } + + // RUNS IN NO-MALLOC CONTEXT + bool Valid() const { return mValid; } + + private: + uintptr_t mValue; + bool mValid; +}; + +// The registers, with validity tags, that will be unwound. + +struct UnwindRegs { +#if defined(GP_ARCH_arm) + TaggedUWord r7; + TaggedUWord r11; + TaggedUWord r12; + TaggedUWord r13; + TaggedUWord r14; + TaggedUWord r15; +#elif defined(GP_ARCH_arm64) + TaggedUWord x29; + TaggedUWord x30; + TaggedUWord sp; + TaggedUWord pc; +#elif defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + TaggedUWord xbp; + TaggedUWord xsp; + TaggedUWord xip; +#elif defined(GP_ARCH_mips64) + TaggedUWord sp; + TaggedUWord fp; + TaggedUWord pc; +#else +# error "Unknown plat" +#endif +}; + +// The maximum number of bytes in a stack snapshot. This value can be increased +// if necessary, but testing showed that 160k is enough to obtain good +// backtraces on x86_64 Linux. Most backtraces fit comfortably into 4-8k of +// stack space, but we do have some very deep stacks occasionally. Please see +// the comments in DoNativeBacktrace as to why it's OK to have this value be so +// large. +static const size_t N_STACK_BYTES = 160 * 1024; + +// The stack chunk image that will be unwound. +struct StackImage { + // [start_avma, +len) specify the address range in the buffer. + // Obviously we require 0 <= len <= N_STACK_BYTES. + uintptr_t mStartAvma; + size_t mLen; + uint8_t mContents[N_STACK_BYTES]; +}; + +// Statistics collection for the unwinder. +template <typename T> +class LULStats { + public: + LULStats() : mContext(0), mCFI(0), mFP(0) {} + + template <typename S> + explicit LULStats(const LULStats<S>& aOther) + : mContext(aOther.mContext), mCFI(aOther.mCFI), mFP(aOther.mFP) {} + + template <typename S> + LULStats<T>& operator=(const LULStats<S>& aOther) { + mContext = aOther.mContext; + mCFI = aOther.mCFI; + mFP = aOther.mFP; + return *this; + } + + template <typename S> + uint32_t operator-(const LULStats<S>& aOther) { + return (mContext - aOther.mContext) + (mCFI - aOther.mCFI) + + (mFP - aOther.mFP); + } + + T mContext; // Number of context frames + T mCFI; // Number of CFI/EXIDX frames + T mFP; // Number of frame-pointer recovered frames +}; + +// The core unwinder library class. Just one of these is needed, and +// it can be shared by multiple unwinder threads. +// +// The library operates in one of two modes. +// +// * Admin mode. The library is this state after creation. In Admin +// mode, no unwinding may be performed. It is however allowable to +// perform administrative tasks -- primarily, loading of unwind info +// -- in this mode. In particular, it is safe for the library to +// perform dynamic memory allocation in this mode. Safe in the +// sense that there is no risk of deadlock against unwinding threads +// that might -- because of where they have been sampled -- hold the +// system's malloc lock. +// +// * Unwind mode. In this mode, calls to ::Unwind may be made, but +// nothing else. ::Unwind guarantees not to make any dynamic memory +// requests, so as to guarantee that the calling thread won't +// deadlock in the case where it already holds the system's malloc lock. +// +// The library is created in Admin mode. After debuginfo is loaded, +// the caller must switch it into Unwind mode by calling +// ::EnableUnwinding. There is no way to switch it back to Admin mode +// after that. To safely switch back to Admin mode would require the +// caller (or other external agent) to guarantee that there are no +// pending ::Unwind calls. + +class PriMap; +class SegArray; +class UniqueStringUniverse; + +class LUL { + public: + // Create; supply a logging sink. Sets the object in Admin mode. + explicit LUL(void (*aLog)(const char*)); + + // Destroy. Caller is responsible for ensuring that no other + // threads are in Unwind calls. All resources are freed and all + // registered unwinder threads are deregistered. Can be called + // either in Admin or Unwind mode. + ~LUL(); + + // Notify the library that unwinding is now allowed and so + // admin-mode calls are no longer allowed. The object is initially + // created in admin mode. The only possible transition is + // admin->unwinding, therefore. + void EnableUnwinding(); + + // Notify of a new r-x mapping, and load the associated unwind info. + // The filename is strdup'd and used for debug printing. If + // aMappedImage is NULL, this function will mmap/munmap the file + // itself, so as to be able to read the unwind info. If + // aMappedImage is non-NULL then it is assumed to point to a + // called-supplied and caller-managed mapped image of the file. + // May only be called in Admin mode. + void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName, + const void* aMappedImage); + + // In rare cases we know an executable area exists but don't know + // what the associated file is. This call notifies LUL of such + // areas. This is important for correct functioning of stack + // scanning and of the x86-{linux,android} special-case + // __kernel_syscall function handling. + // This must be called only after the code area in + // question really has been mapped. + // May only be called in Admin mode. + void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize); + + // Notify that a mapped area has been unmapped; discard any + // associated unwind info. Acquires mRWlock for writing. Note that + // to avoid segfaulting the stack-scan unwinder, which inspects code + // areas, this must be called before the code area in question is + // really unmapped. Note that, unlike NotifyAfterMap(), this + // function takes the start and end addresses of the range to be + // unmapped, rather than a start and a length parameter. This is so + // as to make it possible to notify an unmap for the entire address + // space using a single call. + // May only be called in Admin mode. + void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax); + + // Apply NotifyBeforeUnmap to the entire address space. This causes + // LUL to discard all unwind and executable-area information for the + // entire address space. + // May only be called in Admin mode. + void NotifyBeforeUnmapAll() { NotifyBeforeUnmap(0, UINTPTR_MAX); } + + // Returns the number of mappings currently registered. + // May only be called in Admin mode. + size_t CountMappings(); + + // Unwind |aStackImg| starting with the context in |aStartRegs|. + // Write the number of frames recovered in *aFramesUsed. Put + // the PC values in aFramePCs[0 .. *aFramesUsed-1] and + // the SP values in aFrameSPs[0 .. *aFramesUsed-1]. + // |aFramesAvail| is the size of the two output arrays and hence the + // largest possible value of *aFramesUsed. PC values are always + // valid, and the unwind will stop when the PC becomes invalid, but + // the SP values might be invalid, in which case the value zero will + // be written in the relevant frameSPs[] slot. + // + // This function assumes that the SP values increase as it unwinds + // away from the innermost frame -- that is, that the stack grows + // down. It monitors SP values as it unwinds to check they + // decrease, so as to avoid looping on corrupted stacks. + // + // May only be called in Unwind mode. Multiple threads may unwind + // at once. LUL user is responsible for ensuring that no thread makes + // any Admin calls whilst in Unwind mode. + // MOZ_CRASHes if the calling thread is not registered for unwinding. + // + // The calling thread must previously have been registered via a call to + // RegisterSampledThread. + void Unwind(/*OUT*/ uintptr_t* aFramePCs, + /*OUT*/ uintptr_t* aFrameSPs, + /*OUT*/ size_t* aFramesUsed, + /*OUT*/ size_t* aFramePointerFramesAcquired, size_t aFramesAvail, + UnwindRegs* aStartRegs, StackImage* aStackImg); + + // The logging sink. Call to send debug strings to the caller- + // specified destination. Can only be called by the Admin thread. + void (*mLog)(const char*); + + // Statistics relating to unwinding. These have to be atomic since + // unwinding can occur on different threads simultaneously. + LULStats<mozilla::Atomic<uint32_t>> mStats; + + // Possibly show the statistics. This may not be called from any + // registered sampling thread, since it involves I/O. + void MaybeShowStats(); + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf) const; + + private: + // The statistics counters at the point where they were last printed. + LULStats<uint32_t> mStatsPrevious; + + // Are we in admin mode? Initially |true| but changes to |false| + // once unwinding begins. + bool mAdminMode; + + // The thread ID associated with admin mode. This is the only thread + // that is allowed do perform non-Unwind calls on this object. Conversely, + // no registered Unwinding thread may be the admin thread. This is so + // as to clearly partition the one thread that may do dynamic memory + // allocation from the threads that are being sampled, since the latter + // absolutely may not do dynamic memory allocation. + ProfilerThreadId mAdminThreadId; + + // The top level mapping from code address ranges to postprocessed + // unwind info. Basically a sorted array of (addr, len, info) + // records. This field is updated by NotifyAfterMap and NotifyBeforeUnmap. + PriMap* mPriMap; + + // An auxiliary structure that records which address ranges are + // mapped r-x, for the benefit of the stack scanner. + SegArray* mSegArray; + + // A UniqueStringUniverse that holds all the strdup'd strings created + // whilst reading unwind information. This is included so as to make + // it possible to free them in ~LUL. + UniqueStringUniverse* mUSU; +}; + +// Run unit tests on an initialised, loaded-up LUL instance, and print +// summary results on |aLUL|'s logging sink. Also return the number +// of tests run in *aNTests and the number that passed in +// *aNTestsPassed. +void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed, + LUL* aLUL); + +} // namespace lul + +#endif // LulMain_h diff --git a/tools/profiler/lul/LulMainInt.h b/tools/profiler/lul/LulMainInt.h new file mode 100644 index 0000000000..001a4aecfb --- /dev/null +++ b/tools/profiler/lul/LulMainInt.h @@ -0,0 +1,631 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMainInt_h +#define LulMainInt_h + +#include "PlatformMacros.h" +#include "LulMain.h" // for TaggedUWord + +#include <string> +#include <vector> + +#include "mozilla/Assertions.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/Sprintf.h" + +// This file provides an internal interface inside LUL. If you are an +// end-user of LUL, do not include it in your code. The end-user +// interface is in LulMain.h. + +namespace lul { + +using std::vector; + +//////////////////////////////////////////////////////////////// +// DW_REG_ constants // +//////////////////////////////////////////////////////////////// + +// These are the Dwarf CFI register numbers, as (presumably) defined +// in the ELF ABI supplements for each architecture. + +enum DW_REG_NUMBER { + // No real register has this number. It's convenient to be able to + // treat the CFA (Canonical Frame Address) as "just another + // register", though. + DW_REG_CFA = -1, +#if defined(GP_ARCH_arm) + // ARM registers + DW_REG_ARM_R7 = 7, + DW_REG_ARM_R11 = 11, + DW_REG_ARM_R12 = 12, + DW_REG_ARM_R13 = 13, + DW_REG_ARM_R14 = 14, + DW_REG_ARM_R15 = 15, +#elif defined(GP_ARCH_arm64) + // aarch64 registers + DW_REG_AARCH64_X29 = 29, + DW_REG_AARCH64_X30 = 30, + DW_REG_AARCH64_SP = 31, +#elif defined(GP_ARCH_amd64) + // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are + // combined, a merged set of register constants is needed. + DW_REG_INTEL_XBP = 6, + DW_REG_INTEL_XSP = 7, + DW_REG_INTEL_XIP = 16, +#elif defined(GP_ARCH_x86) + DW_REG_INTEL_XBP = 5, + DW_REG_INTEL_XSP = 4, + DW_REG_INTEL_XIP = 8, +#elif defined(GP_ARCH_mips64) + DW_REG_MIPS_SP = 29, + DW_REG_MIPS_FP = 30, + DW_REG_MIPS_PC = 34, +#else +# error "Unknown arch" +#endif +}; + +//////////////////////////////////////////////////////////////// +// PfxExpr // +//////////////////////////////////////////////////////////////// + +enum PfxExprOp { + // meaning of mOperand effect on stack + PX_Start, // bool start-with-CFA? start, with CFA on stack, or not + PX_End, // none stop; result is at top of stack + PX_SImm32, // int32 push signed int32 + PX_DwReg, // DW_REG_NUMBER push value of the specified reg + PX_Deref, // none pop X ; push *X + PX_Add, // none pop X ; pop Y ; push Y + X + PX_Sub, // none pop X ; pop Y ; push Y - X + PX_And, // none pop X ; pop Y ; push Y & X + PX_Or, // none pop X ; pop Y ; push Y | X + PX_CmpGES, // none pop X ; pop Y ; push (Y >=s X) ? 1 : 0 + PX_Shl // none pop X ; pop Y ; push Y << X +}; + +struct PfxInstr { + PfxInstr(PfxExprOp opcode, int32_t operand) + : mOpcode(opcode), mOperand(operand) {} + explicit PfxInstr(PfxExprOp opcode) : mOpcode(opcode), mOperand(0) {} + bool operator==(const PfxInstr& other) const { + return mOpcode == other.mOpcode && mOperand == other.mOperand; + } + PfxExprOp mOpcode; + int32_t mOperand; +}; + +static_assert(sizeof(PfxInstr) <= 8, "PfxInstr size changed unexpectedly"); + +// Evaluate the prefix expression whose PfxInstrs start at aPfxInstrs[start]. +// In the case of any mishap (stack over/underflow, running off the end of +// the instruction vector, obviously malformed sequences), +// return an invalid TaggedUWord. +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>& aPfxInstrs); + +//////////////////////////////////////////////////////////////// +// LExpr // +//////////////////////////////////////////////////////////////// + +// An expression -- very primitive. Denotes either "register + +// offset", a dereferenced version of the same, or a reference to a +// prefix expression stored elsewhere. So as to allow convenient +// handling of Dwarf-derived unwind info, the register may also denote +// the CFA. A large number of these need to be stored, so we ensure +// it fits into 8 bytes. See comment below on RuleSet to see how +// expressions fit into the bigger picture. + +enum LExprHow { + UNKNOWN = 0, // This LExpr denotes no value. + NODEREF, // Value is (mReg + mOffset). + DEREF, // Value is *(mReg + mOffset). + PFXEXPR // Value is EvaluatePfxExpr(secMap->mPfxInstrs[mOffset]) +}; + +inline static const char* NameOf_LExprHow(LExprHow how) { + switch (how) { + case UNKNOWN: + return "UNKNOWN"; + case NODEREF: + return "NODEREF"; + case DEREF: + return "DEREF"; + case PFXEXPR: + return "PFXEXPR"; + default: + return "LExpr-??"; + } +} + +struct LExpr { + // Denotes an expression with no value. + LExpr() : mHow(UNKNOWN), mReg(0), mOffset(0) {} + + // Denotes any expressible expression. + LExpr(LExprHow how, int16_t reg, int32_t offset) + : mHow(how), mReg(reg), mOffset(offset) { + switch (how) { + case UNKNOWN: + MOZ_ASSERT(reg == 0 && offset == 0); + break; + case NODEREF: + break; + case DEREF: + break; + case PFXEXPR: + MOZ_ASSERT(reg == 0 && offset >= 0); + break; + default: + MOZ_RELEASE_ASSERT(0, "LExpr::LExpr: invalid how"); + } + } + + // Hash it, carefully looking only at defined parts. + mozilla::HashNumber hash() const { + mozilla::HashNumber h = mHow; + switch (mHow) { + case UNKNOWN: + break; + case NODEREF: + case DEREF: + h = mozilla::AddToHash(h, mReg); + h = mozilla::AddToHash(h, mOffset); + break; + case PFXEXPR: + h = mozilla::AddToHash(h, mOffset); + break; + default: + MOZ_RELEASE_ASSERT(0, "LExpr::hash: invalid how"); + } + return h; + } + + // And structural equality. + bool equals(const LExpr& other) const { + if (mHow != other.mHow) { + return false; + } + switch (mHow) { + case UNKNOWN: + return true; + case NODEREF: + case DEREF: + return mReg == other.mReg && mOffset == other.mOffset; + case PFXEXPR: + return mOffset == other.mOffset; + default: + MOZ_RELEASE_ASSERT(0, "LExpr::equals: invalid how"); + } + } + + // Change the offset for an expression that references memory. + LExpr add_delta(long delta) { + MOZ_ASSERT(mHow == NODEREF); + // If this is a non-debug build and the above assertion would have + // failed, at least return LExpr() so that the machinery that uses + // the resulting expression fails in a repeatable way. + return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset + delta) + : LExpr(); // Gone bad + } + + // Dereference an expression that denotes a memory address. + LExpr deref() { + MOZ_ASSERT(mHow == NODEREF); + // Same rationale as for add_delta(). + return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset) + : LExpr(); // Gone bad + } + + // Print a rule for recovery of |aNewReg| whose recovered value + // is this LExpr. + std::string ShowRule(const char* aNewReg) const; + + // Evaluate this expression, producing a TaggedUWord. |aOldRegs| + // holds register values that may be referred to by the expression. + // |aCFA| holds the CFA value, if any, that applies. |aStackImg| + // contains a chuck of stack that will be consulted if the expression + // references memory. |aPfxInstrs| holds the vector of PfxInstrs + // that will be consulted if this is a PFXEXPR. + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA, + const StackImage* aStackImg, + const vector<PfxInstr>* aPfxInstrs) const; + + // Representation of expressions. If |mReg| is DW_REG_CFA (-1) then + // it denotes the CFA. All other allowed values for |mReg| are + // nonnegative and are DW_REG_ values. + LExprHow mHow : 8; + int16_t mReg; // A DW_REG_ value + int32_t mOffset; // 32-bit signed offset should be more than enough. +}; + +static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly"); + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +// This is platform-dependent. It describes how to recover the CFA and then +// how to recover the registers for the previous frame. Such "recipes" are +// specific to particular ranges of machine code, but the associated range +// is not stored in RuleSet, because in general each RuleSet may be used +// for many such range fragments ("extents"). See the comments below for +// Extent and SecMap. +// +// The set of LExprs contained in a given RuleSet describe a DAG which +// says how to compute the caller's registers ("new registers") from +// the callee's registers ("old registers"). The DAG can contain a +// single internal node, which is the value of the CFA for the callee. +// It would be possible to construct a DAG that omits the CFA, but +// including it makes the summarisers simpler, and the Dwarf CFI spec +// has the CFA as a central concept. +// +// For this to make sense, |mCfaExpr| can't have +// |mReg| == DW_REG_CFA since we have no previous value for the CFA. +// All of the other |Expr| fields can -- and usually do -- specify +// |mReg| == DW_REG_CFA. +// +// With that in place, the unwind algorithm proceeds as follows. +// +// (0) Initially: we have values for the old registers, and a memory +// image. +// +// (1) Compute the CFA by evaluating |mCfaExpr|. Add the computed +// value to the set of "old registers". +// +// (2) Compute values for the registers by evaluating all of the other +// |Expr| fields in the RuleSet. These can depend on both the old +// register values and the just-computed CFA. +// +// If we are unwinding without computing a CFA, perhaps because the +// RuleSets are derived from EXIDX instead of Dwarf, then +// |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will +// be invalid -- that is, TaggedUWord() -- and so any attempt to use +// that will result in the same value. But that's OK because the +// RuleSet would make no sense if depended on the CFA but specified no +// way to compute it. +// +// A RuleSet is not allowed to cover zero address range. Having zero +// length would break binary searching in SecMaps and PriMaps. + +class RuleSet { + public: + RuleSet(); + void Print(uintptr_t avma, uintptr_t len, void (*aLog)(const char*)) const; + + // Find the LExpr* for a given DW_REG_ value in this class. + LExpr* ExprForRegno(DW_REG_NUMBER aRegno); + + // How to compute the CFA. + LExpr mCfaExpr; + // How to compute caller register values. These may reference the + // value defined by |mCfaExpr|. +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + LExpr mXipExpr; // return address + LExpr mXspExpr; + LExpr mXbpExpr; +#elif defined(GP_ARCH_arm) + LExpr mR15expr; // return address + LExpr mR14expr; + LExpr mR13expr; + LExpr mR12expr; + LExpr mR11expr; + LExpr mR7expr; +#elif defined(GP_ARCH_arm64) + LExpr mX29expr; // frame pointer register + LExpr mX30expr; // link register + LExpr mSPexpr; +#elif defined(GP_ARCH_mips64) + LExpr mPCexpr; + LExpr mFPexpr; + LExpr mSPexpr; +#else +# error "Unknown arch" +#endif + + // Machinery in support of hashing. + typedef RuleSet Lookup; + + static mozilla::HashNumber hash(RuleSet rs) { + mozilla::HashNumber h = rs.mCfaExpr.hash(); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + h = mozilla::AddToHash(h, rs.mXipExpr.hash()); + h = mozilla::AddToHash(h, rs.mXspExpr.hash()); + h = mozilla::AddToHash(h, rs.mXbpExpr.hash()); +#elif defined(GP_ARCH_arm) + h = mozilla::AddToHash(h, rs.mR15expr.hash()); + h = mozilla::AddToHash(h, rs.mR14expr.hash()); + h = mozilla::AddToHash(h, rs.mR13expr.hash()); + h = mozilla::AddToHash(h, rs.mR12expr.hash()); + h = mozilla::AddToHash(h, rs.mR11expr.hash()); + h = mozilla::AddToHash(h, rs.mR7expr.hash()); +#elif defined(GP_ARCH_arm64) + h = mozilla::AddToHash(h, rs.mX29expr.hash()); + h = mozilla::AddToHash(h, rs.mX30expr.hash()); + h = mozilla::AddToHash(h, rs.mSPexpr.hash()); +#elif defined(GP_ARCH_mips64) + h = mozilla::AddToHash(h, rs.mPCexpr.hash()); + h = mozilla::AddToHash(h, rs.mFPexpr.hash()); + h = mozilla::AddToHash(h, rs.mSPexpr.hash()); +#else +# error "Unknown arch" +#endif + return h; + } + + static bool match(const RuleSet& rs1, const RuleSet& rs2) { + return rs1.mCfaExpr.equals(rs2.mCfaExpr) && +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + rs1.mXipExpr.equals(rs2.mXipExpr) && + rs1.mXspExpr.equals(rs2.mXspExpr) && + rs1.mXbpExpr.equals(rs2.mXbpExpr); +#elif defined(GP_ARCH_arm) + rs1.mR15expr.equals(rs2.mR15expr) && + rs1.mR14expr.equals(rs2.mR14expr) && + rs1.mR13expr.equals(rs2.mR13expr) && + rs1.mR12expr.equals(rs2.mR12expr) && + rs1.mR11expr.equals(rs2.mR11expr) && rs1.mR7expr.equals(rs2.mR7expr); +#elif defined(GP_ARCH_arm64) + rs1.mX29expr.equals(rs2.mX29expr) && + rs1.mX30expr.equals(rs2.mX30expr) && rs1.mSPexpr.equals(rs2.mSPexpr); +#elif defined(GP_ARCH_mips64) + rs1.mPCexpr.equals(rs2.mPCexpr) && rs1.mFPexpr.equals(rs2.mFPexpr) && + rs1.mSPexpr.equals(rs2.mSPexpr); +#else +# error "Unknown arch" +#endif + } +}; + +// Returns |true| for Dwarf register numbers which are members +// of the set of registers that LUL unwinds on this target. +static inline bool registerIsTracked(DW_REG_NUMBER reg) { + switch (reg) { +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + case DW_REG_INTEL_XSP: + case DW_REG_INTEL_XIP: + return true; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + case DW_REG_ARM_R11: + case DW_REG_ARM_R12: + case DW_REG_ARM_R13: + case DW_REG_ARM_R14: + case DW_REG_ARM_R15: + return true; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + case DW_REG_AARCH64_X30: + case DW_REG_AARCH64_SP: + return true; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_FP: + case DW_REG_MIPS_SP: + case DW_REG_MIPS_PC: + return true; +#else +# error "Unknown arch" +#endif + default: + return false; + } +} + +//////////////////////////////////////////////////////////////// +// Extent // +//////////////////////////////////////////////////////////////// + +struct Extent { + // Three fields, which together take 8 bytes. + uint32_t mOffset; + uint16_t mLen; + uint16_t mDictIx; + + // What this means is: suppose we are looking for the unwind rules for some + // code address (AVMA) `avma`. If we can find some SecMap `secmap` such + // that `avma` falls in the range + // + // `[secmap.mMapMinAVMA, secmap.mMapMaxAVMA]` + // + // then the RuleSet to use is `secmap.mDictionary[dictIx]` iff we can find + // an `extent` in `secmap.mExtents` such that `avma` falls into the range + // + // `[secmap.mMapMinAVMA + extent.offset(), + // secmap.mMapMinAVMA + extent.offset() + extent.len())`. + // + // Packing Extent into the minimum space is important, since there will be + // huge numbers of Extents -- around 3 million for libxul.so as of Sept + // 2020. Here, we aim for an 8-byte size, with the field sizes chosen + // carefully, as follows: + // + // `offset` denotes a byte offset inside the text section for some shared + // object. libxul.so is by far the largest. As of Sept 2020 it has a text + // size of up to around 120MB, that is, close to 2^27 bytes. Hence a 32-bit + // `offset` field gives a safety margin of around a factor of 32 + // (== 2 ^(32 - 27)). + // + // `dictIx` indicates a unique `RuleSet` for some code address range. + // Experimentation on x86_64-linux indicates that only around 300 different + // `RuleSet`s exist, for libxul.so. A 16-bit bit field allows up to 65536 + // to be recorded, hence leaving us a generous safety margin. + // + // `len` indicates the length of the associated address range. + // + // Note the representation becomes unusable if either `offset` overflows 32 + // bits or `dictIx` overflows 16 bits. On the other hand, it does not + // matter (although is undesirable) if `len` overflows 16 bits, because in + // that case we can add multiple size-65535 entries to `secmap.mExtents` to + // cover the entire range. Hence the field sizes are biased so as to give a + // good safety margin for `offset` and `dictIx` at the cost of stealing bits + // from `len`. Almost all `len` values we will ever see in practice are + // 65535 or less, so stealing those bits does not matter much. + // + // If further compression is required, it would be feasible to implement + // Extent using 29 bits for the offset, 8 bits for the length and 11 bits + // for the dictionary index, giving a total of 6 bytes, provided that the + // data is packed into 3 uint16_t's. That would be a bit slower, though, + // due to the bit packing, and it would be more fragile, in the sense that + // it would fail for any object with more than 512MB of text segment, or + // with more than 2048 different `RuleSet`s. For the current (Sept 2020) + // libxul.so situation, though, it would work fine. + + Extent(uint32_t offset, uint32_t len, uint32_t dictIx) { + MOZ_RELEASE_ASSERT(len < (1 << 16)); + MOZ_RELEASE_ASSERT(dictIx < (1 << 16)); + mOffset = offset; + mLen = len; + mDictIx = dictIx; + } + inline uint32_t offset() const { return mOffset; } + inline uint32_t len() const { return mLen; } + inline uint32_t dictIx() const { return mDictIx; } + void setLen(uint32_t len) { + MOZ_RELEASE_ASSERT(len < (1 << 16)); + mLen = len; + } + void Print(void (*aLog)(const char*)) const { + char buf[64]; + SprintfLiteral(buf, "Extent(offs=0x%x, len=%u, dictIx=%u)", this->offset(), + this->len(), this->dictIx()); + aLog(buf); + } +}; + +static_assert(sizeof(Extent) == 8); + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// A SecMap may have zero address range, temporarily, whilst RuleSets +// are being added to it. But adding a zero-range SecMap to a PriMap +// will make it impossible to maintain the total order of the PriMap +// entries, and so that can't be allowed to happen. + +class SecMap { + public: + // In the constructor, `mapStartAVMA` and `mapLen` define the actual + // (in-process) virtual addresses covered by the SecMap. All RuleSets + // subsequently added to it by calling `AddRuleSet` must fall into this + // address range, and attempts to add ones outside the range will be + // ignored. This restriction exists because the type Extent (see below) + // indicates an address range for a RuleSet, but for reasons of compactness, + // it does not contain the start address of the range. Instead, it contains + // a 32-bit offset from the base address of the SecMap. This is also the + // reason why the map's size is a `uint32_t` and not a `uintptr_t`. + // + // The effect is to limit this mechanism to shared objects / executables + // whose text section size does not exceed 4GB (2^32 bytes). Given that, as + // of Sept 2020, libxul.so's text section size is around 120MB, this does + // not seem like much of a limitation. + // + // From the supplied `mapStartAVMA` and `mapLen`, fields `mMapMinAVMA` and + // `mMapMaxAVMA` are calculated. It is intended that no two SecMaps owned + // by the same PriMap contain overlapping address ranges, and the PriMap + // logic enforces that. + // + // Some invariants: + // + // mExtents is nonempty + // <=> mMapMinAVMA <= mMapMaxAVMA + // && mMapMinAVMA <= apply_delta(mExtents[0].offset()) + // && apply_delta(mExtents[#rulesets-1].offset() + // + mExtents[#rulesets-1].len() - 1) <= mMapMaxAVMA + // where + // apply_delta(off) = off + mMapMinAVMA + // + // This requires that no RuleSet has zero length. + // + // mExtents is empty + // <=> mMapMinAVMA > mMapMaxAVMA + // + // This doesn't constrain mMapMinAVMA and mMapMaxAVMA uniquely, so let's use + // mMapMinAVMA == 1 and mMapMaxAVMA == 0 to denote this case. + + SecMap(uintptr_t mapStartAVMA, uint32_t mapLen, void (*aLog)(const char*)); + ~SecMap(); + + // Binary search mRuleSets to find one that brackets |ia|, or nullptr + // if none is found. It's not allowable to do this until PrepareRuleSets + // has been called first. + RuleSet* FindRuleSet(uintptr_t ia); + + // Add a RuleSet to the collection. The rule is copied in. Calling + // this makes the map non-searchable. + void AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len); + + // Add a PfxInstr to the vector of such instrs, and return the index + // in the vector. Calling this makes the map non-searchable. + uint32_t AddPfxInstr(PfxInstr pfxi); + + // Returns the entire vector of PfxInstrs. + const vector<PfxInstr>* GetPfxInstrs() { return &mPfxInstrs; } + + // Prepare the map for searching, by sorting it, de-overlapping entries and + // removing any resulting zero-length entries. At the start of this + // routine, all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and + // not have zero length, as a result of the checks in AddRuleSet(). + void PrepareRuleSets(); + + bool IsEmpty(); + + size_t Size() { return mExtents.size() + mDictionary.size(); } + + size_t SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + // The extent of this SecMap as a whole. The extents of all contained + // RuleSets must fall inside this. See comment above for details. + uintptr_t mMapMinAVMA; + uintptr_t mMapMaxAVMA; + + private: + // False whilst adding entries; true once it is safe to call FindRuleSet. + // Transition (false->true) is caused by calling PrepareRuleSets(). + bool mUsable; + + // This is used to find and remove duplicate RuleSets while we are adding + // them to the SecMap. Almost all RuleSets are duplicates, so de-duping + // them is a huge space win. This is non-null while `mUsable` is false, and + // becomes null (is discarded) after the call to PrepareRuleSets, which + // copies all the entries into `mDictionary`. + mozilla::UniquePtr< + mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>> + mUniqifier; + + // This will contain final contents of `mUniqifier`, but ordered + // (implicitly) by the `uint32_t` value fields, for fast access. + vector<RuleSet> mDictionary; + + // A vector of Extents, sorted by offset value, nonoverlapping (post + // PrepareRuleSets()). + vector<Extent> mExtents; + + // A vector of PfxInstrs, which are referred to by the RuleSets. + // These are provided as a representation of Dwarf expressions + // (DW_CFA_val_expression, DW_CFA_expression, DW_CFA_def_cfa_expression), + // are relatively expensive to evaluate, and and are therefore + // expected to be used only occasionally. + // + // The vector holds a bunch of separate PfxInstr programs, each one + // starting with a PX_Start and terminated by a PX_End, all + // concatenated together. When a RuleSet can't recover a value + // using a self-contained LExpr, it uses a PFXEXPR whose mOffset is + // the index in this vector of start of the necessary PfxInstr program. + vector<PfxInstr> mPfxInstrs; + + // A logging sink, for debugging. + void (*mLog)(const char*); +}; + +} // namespace lul + +#endif // ndef LulMainInt_h diff --git a/tools/profiler/lul/platform-linux-lul.cpp b/tools/profiler/lul/platform-linux-lul.cpp new file mode 100644 index 0000000000..4027905c60 --- /dev/null +++ b/tools/profiler/lul/platform-linux-lul.cpp @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <stdio.h> +#include <signal.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> + +#include "mozilla/ProfilerState.h" +#include "platform.h" +#include "PlatformMacros.h" +#include "LulMain.h" +#include "shared-libraries.h" +#include "AutoObjectMapper.h" + +// Contains miscellaneous helpers that are used to connect the Gecko Profiler +// and LUL. + +// Find out, in a platform-dependent way, where the code modules got +// mapped in the process' virtual address space, and get |aLUL| to +// load unwind info for them. +void read_procmaps(lul::LUL* aLUL) { + MOZ_ASSERT(aLUL->CountMappings() == 0); + +#if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd) + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + + for (size_t i = 0; i < info.GetSize(); i++) { + const SharedLibrary& lib = info.GetEntry(i); + + std::string nativePath = lib.GetNativeDebugPath(); + + // We can use the standard POSIX-based mapper. + AutoObjectMapperPOSIX mapper(aLUL->mLog); + + // Ask |mapper| to map the object. Then hand its mapped address + // to NotifyAfterMap(). + void* image = nullptr; + size_t size = 0; + bool ok = mapper.Map(&image, &size, nativePath); + if (ok && image && size > 0) { + aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd() - lib.GetStart(), + nativePath.c_str(), image); + } else if (!ok && lib.GetDebugName().IsEmpty()) { + // The object has no name and (as a consequence) the mapper failed to map + // it. This happens on Linux, where GetInfoForSelf() produces such a + // mapping for the VDSO. This is a problem on x86-{linux,android} because + // lack of knowledge about the mapped area inhibits LUL's special + // __kernel_syscall handling. Hence notify |aLUL| at least of the + // mapping, even though it can't read any unwind information for the area. + aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd() - lib.GetStart()); + } + + // |mapper| goes out of scope at this point and so its destructor + // unmaps the object. + } + +#else +# error "Unknown platform" +#endif +} + +// LUL needs a callback for its logging sink. +void logging_sink_for_LUL(const char* str) { + // These are only printed when Verbose logging is enabled (e.g. with + // MOZ_LOG="prof:5"). This is because LUL's logging is much more verbose than + // the rest of the profiler's logging, which occurs at the Info (3) and Debug + // (4) levels. + MOZ_LOG(gProfilerLog, mozilla::LogLevel::Verbose, + ("[%" PRIu64 "] %s", + uint64_t(profiler_current_process_id().ToNumber()), str)); +} diff --git a/tools/profiler/lul/platform-linux-lul.h b/tools/profiler/lul/platform-linux-lul.h new file mode 100644 index 0000000000..7c94299961 --- /dev/null +++ b/tools/profiler/lul/platform-linux-lul.h @@ -0,0 +1,19 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PLATFORM_LINUX_LUL_H +#define MOZ_PLATFORM_LINUX_LUL_H + +#include "platform.h" + +// Find out, in a platform-dependent way, where the code modules got +// mapped in the process' virtual address space, and get |aLUL| to +// load unwind info for them. +void read_procmaps(lul::LUL* aLUL); + +// LUL needs a callback for its logging sink. +void logging_sink_for_LUL(const char* str); + +#endif /* ndef MOZ_PLATFORM_LINUX_LUL_H */ |