diff options
Diffstat (limited to '')
-rw-r--r-- | mozglue/baseprofiler/lul/LulMain.cpp | 1958 |
1 files changed, 1958 insertions, 0 deletions
diff --git a/mozglue/baseprofiler/lul/LulMain.cpp b/mozglue/baseprofiler/lul/LulMain.cpp new file mode 100644 index 0000000000..75f205ac3a --- /dev/null +++ b/mozglue/baseprofiler/lul/LulMain.cpp @@ -0,0 +1,1958 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulMain.h" + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> // write(), only for testing LUL + +#include <algorithm> // std::sort +#include <string> +#include <utility> + +#include "mozilla/Assertions.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/MemoryChecking.h" +#include "mozilla/Sprintf.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" + +#include "BaseProfiler.h" +#include "LulCommonExt.h" +#include "LulElfExt.h" +#include "LulMainInt.h" + +using mozilla::baseprofiler::profiler_current_process_id; +using mozilla::baseprofiler::profiler_current_thread_id; + +// Set this to 1 for verbose logging +#define DEBUG_MAIN 0 + +namespace lul { + +using mozilla::CheckedInt; +using mozilla::DebugOnly; +using mozilla::MallocSizeOf; +using mozilla::Unused; +using std::pair; +using std::string; +using std::vector; + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT. +// Any such function -- and, hence, the transitive closure of those +// reachable from it -- must not do any dynamic memory allocation. +// Doing so risks deadlock. There is exactly one root function for +// the transitive closure: Lul::Unwind. +// +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +static const char* NameOf_DW_REG(int16_t aReg) { + switch (aReg) { + case DW_REG_CFA: + return "cfa"; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + return "xbp"; + case DW_REG_INTEL_XSP: + return "xsp"; + case DW_REG_INTEL_XIP: + return "xip"; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + return "r7"; + case DW_REG_ARM_R11: + return "r11"; + case DW_REG_ARM_R12: + return "r12"; + case DW_REG_ARM_R13: + return "r13"; + case DW_REG_ARM_R14: + return "r14"; + case DW_REG_ARM_R15: + return "r15"; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return "x29"; + case DW_REG_AARCH64_X30: + return "x30"; + case DW_REG_AARCH64_SP: + return "sp"; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return "sp"; + case DW_REG_MIPS_FP: + return "fp"; + case DW_REG_MIPS_PC: + return "pc"; +#else +# error "Unsupported arch" +#endif + default: + return "???"; + } +} + +string LExpr::ShowRule(const char* aNewReg) const { + char buf[64]; + string res = string(aNewReg) + "="; + switch (mHow) { + case UNKNOWN: + res += "Unknown"; + break; + case NODEREF: + SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case DEREF: + SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case PFXEXPR: + SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset); + res += buf; + break; + default: + res += "???"; + break; + } + return res; +} + +void RuleSet::Print(void (*aLog)(const char*)) const { + char buf[96]; + SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)mAddr, + (unsigned long long int)(mAddr + mLen - 1)); + string res = string(buf); + res += mCfaExpr.ShowRule("cfa"); + res += " in"; + // For each reg we care about, print the recovery expression. +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + res += mXipExpr.ShowRule(" RA"); + res += mXspExpr.ShowRule(" SP"); + res += mXbpExpr.ShowRule(" BP"); +#elif defined(GP_ARCH_arm) + res += mR15expr.ShowRule(" R15"); + res += mR7expr.ShowRule(" R7"); + res += mR11expr.ShowRule(" R11"); + res += mR12expr.ShowRule(" R12"); + res += mR13expr.ShowRule(" R13"); + res += mR14expr.ShowRule(" R14"); +#elif defined(GP_ARCH_arm64) + res += mX29expr.ShowRule(" X29"); + res += mX30expr.ShowRule(" X30"); + res += mSPexpr.ShowRule(" SP"); +#elif defined(GP_ARCH_mips64) + res += mPCexpr.ShowRule(" PC"); + res += mSPexpr.ShowRule(" SP"); + res += mFPexpr.ShowRule(" FP"); +#else +# error "Unsupported arch" +#endif + aLog(res.c_str()); +} + +LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) { + switch (aRegno) { + case DW_REG_CFA: + return &mCfaExpr; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XIP: + return &mXipExpr; + case DW_REG_INTEL_XSP: + return &mXspExpr; + case DW_REG_INTEL_XBP: + return &mXbpExpr; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R15: + return &mR15expr; + case DW_REG_ARM_R14: + return &mR14expr; + case DW_REG_ARM_R13: + return &mR13expr; + case DW_REG_ARM_R12: + return &mR12expr; + case DW_REG_ARM_R11: + return &mR11expr; + case DW_REG_ARM_R7: + return &mR7expr; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return &mX29expr; + case DW_REG_AARCH64_X30: + return &mX30expr; + case DW_REG_AARCH64_SP: + return &mSPexpr; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return &mSPexpr; + case DW_REG_MIPS_FP: + return &mFPexpr; + case DW_REG_MIPS_PC: + return &mPCexpr; +#else +# error "Unknown arch" +#endif + default: + return nullptr; + } +} + +RuleSet::RuleSet() { + mAddr = 0; + mLen = 0; + // The only other fields are of type LExpr and those are initialised + // by LExpr::LExpr(). +} + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// See header file LulMainInt.h for comments about invariants. + +SecMap::SecMap(void (*aLog)(const char*)) + : mSummaryMinAddr(1), mSummaryMaxAddr(0), mUsable(true), mLog(aLog) {} + +SecMap::~SecMap() { mRuleSets.clear(); } + +// RUNS IN NO-MALLOC CONTEXT +RuleSet* SecMap::FindRuleSet(uintptr_t ia) { + // Binary search mRuleSets to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. Note that this works correctly even when + // mRuleSets.size() == 0. + + // Can't do this until the array has been sorted and preened. + MOZ_ASSERT(mUsable); + + long int lo = 0; + long int hi = (long int)mRuleSets.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + RuleSet* mid_ruleSet = &mRuleSets[mid]; + uintptr_t mid_minAddr = mid_ruleSet->mAddr; + uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1; + if (ia < mid_minAddr) { + hi = mid - 1; + continue; + } + if (ia > mid_maxAddr) { + lo = mid + 1; + continue; + } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + return mid_ruleSet; + } + // NOTREACHED +} + +// Add a RuleSet to the collection. The rule is copied in. Calling +// this makes the map non-searchable. +void SecMap::AddRuleSet(const RuleSet* rs) { + mUsable = false; + mRuleSets.push_back(*rs); +} + +// Add a PfxInstr to the vector of such instrs, and return the index +// in the vector. Calling this makes the map non-searchable. +uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) { + mUsable = false; + mPfxInstrs.push_back(pfxi); + return mPfxInstrs.size() - 1; +} + +static bool CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) { + return rs1.mAddr < rs2.mAddr; +} + +// Prepare the map for searching. Completely remove any which don't +// fall inside the specified range [start, +len). +void SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) { + if (mRuleSets.empty()) { + return; + } + + MOZ_ASSERT(aLen > 0); + if (aLen == 0) { + // This should never happen. + mRuleSets.clear(); + return; + } + + // Sort by start addresses. + std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE); + + // Detect any entry not completely contained within [start, +len). + // Set its length to zero, so that the next pass will remove it. + for (size_t i = 0; i < mRuleSets.size(); ++i) { + RuleSet* rs = &mRuleSets[i]; + if (rs->mLen > 0 && + (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) { + rs->mLen = 0; + } + } + + // Iteratively truncate any overlaps and remove any zero length + // entries that might result, or that may have been present + // initially. Unless the input is seriously screwy, this is + // expected to iterate only once. + while (true) { + size_t i; + size_t n = mRuleSets.size(); + size_t nZeroLen = 0; + + if (n == 0) { + break; + } + + for (i = 1; i < n; ++i) { + RuleSet* prev = &mRuleSets[i - 1]; + RuleSet* here = &mRuleSets[i]; + MOZ_ASSERT(prev->mAddr <= here->mAddr); + if (prev->mAddr + prev->mLen > here->mAddr) { + prev->mLen = here->mAddr - prev->mAddr; + } + if (prev->mLen == 0) nZeroLen++; + } + + if (mRuleSets[n - 1].mLen == 0) { + nZeroLen++; + } + + // At this point, the entries are in-order and non-overlapping. + // If none of them are zero-length, we are done. + if (nZeroLen == 0) { + break; + } + + // Slide back the entries to remove the zero length ones. + size_t j = 0; // The write-point. + for (i = 0; i < n; ++i) { + if (mRuleSets[i].mLen == 0) { + continue; + } + if (j != i) mRuleSets[j] = mRuleSets[i]; + ++j; + } + MOZ_ASSERT(i == n); + MOZ_ASSERT(nZeroLen <= n); + MOZ_ASSERT(j == n - nZeroLen); + while (nZeroLen > 0) { + mRuleSets.pop_back(); + nZeroLen--; + } + + MOZ_ASSERT(mRuleSets.size() == j); + } + + size_t n = mRuleSets.size(); + +#ifdef DEBUG + // Do a final check on the rules: their address ranges must be + // ascending, non overlapping, non zero sized. + if (n > 0) { + MOZ_ASSERT(mRuleSets[0].mLen > 0); + for (size_t i = 1; i < n; ++i) { + RuleSet* prev = &mRuleSets[i - 1]; + RuleSet* here = &mRuleSets[i]; + MOZ_ASSERT(prev->mAddr < here->mAddr); + MOZ_ASSERT(here->mLen > 0); + MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr); + } + } +#endif + + // Set the summary min and max address values. + if (n == 0) { + // Use the values defined in comments in the class declaration. + mSummaryMinAddr = 1; + mSummaryMaxAddr = 0; + } else { + mSummaryMinAddr = mRuleSets[0].mAddr; + mSummaryMaxAddr = mRuleSets[n - 1].mAddr + mRuleSets[n - 1].mLen - 1; + } + char buf[150]; + SprintfLiteral(buf, "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n", + (int)n, (unsigned long long int)mSummaryMinAddr, + (unsigned long long int)mSummaryMaxAddr); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Is now usable for binary search. + mUsable = true; + +#if 0 + mLog("\nRulesets after preening\n"); + for (size_t i = 0; i < mRuleSets.size(); ++i) { + mRuleSets[i].Print(mLog); + mLog("\n"); + } + mLog("\n"); +#endif +} + +bool SecMap::IsEmpty() { return mRuleSets.empty(); } + +size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // It's conceivable that these calls would be unsafe with some + // implementations of std::vector, but it seems to be working for now... + n += aMallocSizeOf(mRuleSets.data()); + n += aMallocSizeOf(mPfxInstrs.data()); + + return n; +} + +//////////////////////////////////////////////////////////////// +// SegArray // +//////////////////////////////////////////////////////////////// + +// A SegArray holds a set of address ranges that together exactly +// cover an address range, with no overlaps or holes. Each range has +// an associated value, which in this case has been specialised to be +// a simple boolean. The representation is kept to minimal canonical +// form in which adjacent ranges with the same associated value are +// merged together. Each range is represented by a |struct Seg|. +// +// SegArrays are used to keep track of which parts of the address +// space are known to contain instructions. +class SegArray { + public: + void add(uintptr_t lo, uintptr_t hi, bool val) { + if (lo > hi) { + return; + } + split_at(lo); + if (hi < UINTPTR_MAX) { + split_at(hi + 1); + } + std::vector<Seg>::size_type iLo, iHi, i; + iLo = find(lo); + iHi = find(hi); + for (i = iLo; i <= iHi; ++i) { + mSegs[i].val = val; + } + preen(); + } + + // RUNS IN NO-MALLOC CONTEXT + bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min, + /*OUT*/ uintptr_t* rx_max, uintptr_t addr) { + std::vector<Seg>::size_type i = find(addr); + if (!mSegs[i].val) { + return false; + } + *rx_min = mSegs[i].lo; + *rx_max = mSegs[i].hi; + return true; + } + + SegArray() { + Seg s(0, UINTPTR_MAX, false); + mSegs.push_back(s); + } + + private: + struct Seg { + Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {} + uintptr_t lo; + uintptr_t hi; + bool val; + }; + + void preen() { + for (std::vector<Seg>::iterator iter = mSegs.begin(); + iter < mSegs.end() - 1; ++iter) { + if (iter[0].val != iter[1].val) { + continue; + } + iter[0].hi = iter[1].hi; + mSegs.erase(iter + 1); + // Back up one, so as not to miss an opportunity to merge + // with the entry after this one. + --iter; + } + } + + // RUNS IN NO-MALLOC CONTEXT + std::vector<Seg>::size_type find(uintptr_t a) { + long int lo = 0; + long int hi = (long int)mSegs.size(); + while (true) { + // The unsearched space is lo .. hi inclusive. + if (lo > hi) { + // Not found. This can't happen. + return (std::vector<Seg>::size_type)(-1); + } + long int mid = lo + ((hi - lo) / 2); + uintptr_t mid_lo = mSegs[mid].lo; + uintptr_t mid_hi = mSegs[mid].hi; + if (a < mid_lo) { + hi = mid - 1; + continue; + } + if (a > mid_hi) { + lo = mid + 1; + continue; + } + return (std::vector<Seg>::size_type)mid; + } + } + + void split_at(uintptr_t a) { + std::vector<Seg>::size_type i = find(a); + if (mSegs[i].lo == a) { + return; + } + mSegs.insert(mSegs.begin() + i + 1, mSegs[i]); + mSegs[i].hi = a - 1; + mSegs[i + 1].lo = a; + } + + void show() { + printf("<< %d entries:\n", (int)mSegs.size()); + for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end(); + ++iter) { + printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo, + (unsigned long long int)(*iter).hi, + (*iter).val ? "true" : "false"); + } + printf(">>\n"); + } + + std::vector<Seg> mSegs; +}; + +//////////////////////////////////////////////////////////////// +// PriMap // +//////////////////////////////////////////////////////////////// + +class PriMap { + public: + explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {} + + // RUNS IN NO-MALLOC CONTEXT + pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) { + SecMap* sm = FindSecMap(ia); + return pair<const RuleSet*, const vector<PfxInstr>*>( + sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr); + } + + // Add a secondary map. No overlaps allowed w.r.t. existing + // secondary maps. + void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) { + // We can't add an empty SecMap to the PriMap. But that's OK + // since we'd never be able to find anything in it anyway. + if (aSecMap->IsEmpty()) { + return; + } + + // Iterate through the SecMaps and find the right place for this + // one. At the same time, ensure that the in-order + // non-overlapping invariant is preserved (and, generally, holds). + // FIXME: this gives a cost that is O(N^2) in the total number of + // shared objects in the system. ToDo: better. + MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr); + + size_t num_secMaps = mSecMaps.size(); + uintptr_t i; + for (i = 0; i < num_secMaps; ++i) { + mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i]; + MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr); + if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) { + // |aSecMap| needs to be inserted immediately before mSecMaps[i]. + break; + } + } + MOZ_ASSERT(i <= num_secMaps); + if (i == num_secMaps) { + // It goes at the end. + mSecMaps.push_back(std::move(aSecMap)); + } else { + std::vector<mozilla::UniquePtr<SecMap>>::iterator iter = + mSecMaps.begin() + i; + mSecMaps.insert(iter, std::move(aSecMap)); + } + char buf[100]; + SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n", + (int)mSecMaps.size()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } + + // Remove and delete any SecMaps in the mapping, that intersect + // with the specified address range. + void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) { + MOZ_ASSERT(avma_min <= avma_max); + size_t num_secMaps = mSecMaps.size(); + if (num_secMaps > 0) { + intptr_t i; + // Iterate from end to start over the vector, so as to ensure + // that the special case where |avma_min| and |avma_max| denote + // the entire address space, can be completed in time proportional + // to the number of elements in the map. + for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) { + mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i]; + if (sm_i->mSummaryMaxAddr < avma_min || + avma_max < sm_i->mSummaryMinAddr) { + // There's no overlap. Move on. + continue; + } + // We need to remove mSecMaps[i] and slide all those above it + // downwards to cover the hole. + mSecMaps.erase(mSecMaps.begin() + i); + } + } + } + + // Return the number of currently contained SecMaps. + size_t CountSecMaps() { return mSecMaps.size(); } + + size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + + // It's conceivable that this call would be unsafe with some + // implementations of std::vector, but it seems to be working for now... + n += aMallocSizeOf(mSecMaps.data()); + + for (size_t i = 0; i < mSecMaps.size(); i++) { + n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf); + } + + return n; + } + + private: + // RUNS IN NO-MALLOC CONTEXT + SecMap* FindSecMap(uintptr_t ia) { + // Binary search mSecMaps to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. + long int lo = 0; + long int hi = (long int)mSecMaps.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid]; + uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr; + uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr; + if (ia < mid_minAddr) { + hi = mid - 1; + continue; + } + if (ia > mid_maxAddr) { + lo = mid + 1; + continue; + } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + return mid_secMap.get(); + } + // NOTREACHED + } + + private: + // sorted array of per-object ranges, non overlapping, non empty + std::vector<mozilla::UniquePtr<SecMap>> mSecMaps; + + // a logging sink, for debugging. + void (*mLog)(const char*); +}; + +//////////////////////////////////////////////////////////////// +// LUL // +//////////////////////////////////////////////////////////////// + +#define LUL_LOG(_str) \ + do { \ + char buf[200]; \ + SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \ + uint64_t(profiler_current_process_id().ToNumber()), \ + uint64_t(profiler_current_thread_id().ToNumber()), this, \ + (_str)); \ + buf[sizeof(buf) - 1] = 0; \ + mLog(buf); \ + } while (0) + +LUL::LUL(void (*aLog)(const char*)) + : mLog(aLog), + mAdminMode(true), + mAdminThreadId(profiler_current_thread_id()), + mPriMap(new PriMap(aLog)), + mSegArray(new SegArray()), + mUSU(new UniqueStringUniverse()) { + LUL_LOG("LUL::LUL: Created object"); +} + +LUL::~LUL() { + LUL_LOG("LUL::~LUL: Destroyed object"); + delete mPriMap; + delete mSegArray; + mLog = nullptr; + delete mUSU; +} + +void LUL::MaybeShowStats() { + // This is racey in the sense that it can't guarantee that + // n_new == n_new_Context + n_new_CFI + n_new_Scanned + // if it should happen that mStats is updated by some other thread + // in between computation of n_new and n_new_{Context,CFI,FP}. + // But it's just stats printing, so we don't really care. + uint32_t n_new = mStats - mStatsPrevious; + if (n_new >= 5000) { + uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext; + uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI; + uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP; + mStatsPrevious = mStats; + char buf[200]; + SprintfLiteral(buf, + "LUL frame stats: TOTAL %5u" + " CTX %4u CFI %4u FP %4u", + n_new, n_new_Context, n_new_CFI, n_new_FP); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } +} + +size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const { + size_t n = aMallocSizeOf(this); + n += mPriMap->SizeOfIncludingThis(aMallocSizeOf); + + // Measurement of the following members may be added later if DMD finds it + // is worthwhile: + // - mSegArray + // - mUSU + + return n; +} + +void LUL::EnableUnwinding() { + LUL_LOG("LUL::EnableUnwinding"); + // Don't assert for Admin mode here. That is, tolerate a call here + // if we are already in Unwinding mode. + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mAdminMode = false; +} + +void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName, + const void* aMappedImage) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyMap %llx %llu %s\n", + (unsigned long long int)aRXavma, (unsigned long long int)aSize, + aFileName); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Here's a new mapping, for this object. + mozilla::UniquePtr<SecMap> smap = mozilla::MakeUnique<SecMap>(mLog); + + // Read CFI or EXIDX unwind data into |smap|. + if (!aMappedImage) { + (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(), + smap.get(), (void*)aRXavma, aSize, mUSU, mLog); + } else { + (void)lul::ReadSymbolDataInternal( + (const uint8_t*)aMappedImage, string(aFileName), + std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog); + } + + mLog("NotifyMap .. preparing entries\n"); + + smap->PrepareRuleSets(aRXavma, aSize); + + SprintfLiteral(buf, "NotifyMap got %lld entries\n", + (long long int)smap->Size()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Add it to the primary map (the top level set of mapped objects). + mPriMap->AddSecMap(std::move(smap)); + + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + +void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n", + (unsigned long long int)aRXavma, + (unsigned long long int)aSize); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + +void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + mLog(":\n"); + char buf[100]; + SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n", + (unsigned long long int)aRXavmaMin, + (unsigned long long int)aRXavmaMax); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + + MOZ_ASSERT(aRXavmaMin <= aRXavmaMax); + + // Remove from the primary map, any secondary maps that intersect + // with the address range. Also delete the secondary maps. + mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax); + + // Tell the segment array that the address range no longer + // contains valid code. + mSegArray->add(aRXavmaMin, aRXavmaMax, false); + + SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n", + (int)mPriMap->CountSecMaps()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +} + +size_t LUL::CountMappings() { + MOZ_RELEASE_ASSERT(mAdminMode); + MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId); + + return mPriMap->CountSecMaps(); +} + +// RUNS IN NO-MALLOC CONTEXT +static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) { + if (!aAddr.Valid()) { + return TaggedUWord(); + } + + // Lower limit check. |aAddr.Value()| is the lowest requested address + // and |aStackImg->mStartAvma| is the lowest address we actually have, + // so the comparison is straightforward. + if (aAddr.Value() < aStackImg->mStartAvma) { + return TaggedUWord(); + } + + // Upper limit check. We must compute the highest requested address + // and the highest address we actually have, but being careful to + // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the + // 3/7 values below that, then we will get overflow. See bug #1245477. + typedef CheckedInt<uintptr_t> CheckedUWord; + CheckedUWord highest_requested_plus_one = + CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t)); + CheckedUWord highest_available_plus_one = + CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen); + if (!highest_requested_plus_one.isValid() // overflow? + || !highest_available_plus_one.isValid() // overflow? + || (highest_requested_plus_one.value() > + highest_available_plus_one.value())) { // in range? + return TaggedUWord(); + } + + return TaggedUWord( + *(uintptr_t*)(&aStackImg + ->mContents[aAddr.Value() - aStackImg->mStartAvma])); +} + +// RUNS IN NO-MALLOC CONTEXT +static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs, + TaggedUWord aCFA) { + switch (aReg) { + case DW_REG_CFA: + return aCFA; +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + case DW_REG_INTEL_XBP: + return aOldRegs->xbp; + case DW_REG_INTEL_XSP: + return aOldRegs->xsp; + case DW_REG_INTEL_XIP: + return aOldRegs->xip; +#elif defined(GP_ARCH_arm) + case DW_REG_ARM_R7: + return aOldRegs->r7; + case DW_REG_ARM_R11: + return aOldRegs->r11; + case DW_REG_ARM_R12: + return aOldRegs->r12; + case DW_REG_ARM_R13: + return aOldRegs->r13; + case DW_REG_ARM_R14: + return aOldRegs->r14; + case DW_REG_ARM_R15: + return aOldRegs->r15; +#elif defined(GP_ARCH_arm64) + case DW_REG_AARCH64_X29: + return aOldRegs->x29; + case DW_REG_AARCH64_X30: + return aOldRegs->x30; + case DW_REG_AARCH64_SP: + return aOldRegs->sp; +#elif defined(GP_ARCH_mips64) + case DW_REG_MIPS_SP: + return aOldRegs->sp; + case DW_REG_MIPS_FP: + return aOldRegs->fp; + case DW_REG_MIPS_PC: + return aOldRegs->pc; +#else +# error "Unsupported arch" +#endif + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +// See prototype for comment. +TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>& aPfxInstrs) { + // A small evaluation stack, and a stack pointer, which points to + // the highest numbered in-use element. + const int N_STACK = 10; + TaggedUWord stack[N_STACK]; + int stackPointer = -1; + for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord(); + +#define PUSH(_tuw) \ + do { \ + if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \ + stack[++stackPointer] = (_tuw); \ + } while (0) + +#define POP(_lval) \ + do { \ + if (stackPointer < 0) goto fail; /* underflow */ \ + _lval = stack[stackPointer--]; \ + } while (0) + + // Cursor in the instruction sequence. + size_t curr = start + 1; + + // Check the start point is sane. + size_t nInstrs = aPfxInstrs.size(); + if (start < 0 || (size_t)start >= nInstrs) goto fail; + + { + // The instruction sequence must start with PX_Start. If not, + // something is seriously wrong. + PfxInstr first = aPfxInstrs[start]; + if (first.mOpcode != PX_Start) goto fail; + + // Push the CFA on the stack to start with (or not), as required by + // the original DW_OP_*expression* CFI. + if (first.mOperand != 0) PUSH(aCFA); + } + + while (true) { + if (curr >= nInstrs) goto fail; // ran off the end of the sequence + + PfxInstr pfxi = aPfxInstrs[curr++]; + if (pfxi.mOpcode == PX_End) break; // we're done + + switch (pfxi.mOpcode) { + case PX_Start: + // This should appear only at the start of the sequence. + goto fail; + case PX_End: + // We just took care of that, so we shouldn't see it again. + MOZ_ASSERT(0); + goto fail; + case PX_SImm32: + PUSH(TaggedUWord((intptr_t)pfxi.mOperand)); + break; + case PX_DwReg: { + DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand; + MOZ_ASSERT(reg != DW_REG_CFA); + PUSH(EvaluateReg(reg, aOldRegs, aCFA)); + break; + } + case PX_Deref: { + TaggedUWord addr; + POP(addr); + PUSH(DerefTUW(addr, aStackImg)); + break; + } + case PX_Add: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y + x); + break; + } + case PX_Sub: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y - x); + break; + } + case PX_And: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y & x); + break; + } + case PX_Or: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y | x); + break; + } + case PX_CmpGES: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y.CmpGEs(x)); + break; + } + case PX_Shl: { + TaggedUWord x, y; + POP(x); + POP(y); + PUSH(y << x); + break; + } + default: + MOZ_ASSERT(0); + goto fail; + } + } // while (true) + + // Evaluation finished. The top value on the stack is the result. + if (stackPointer >= 0) { + return stack[stackPointer]; + } + // Else fall through + +fail: + return TaggedUWord(); + +#undef PUSH +#undef POP +} + +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA, + const StackImage* aStackImg, + const vector<PfxInstr>* aPfxInstrs) const { + switch (mHow) { + case UNKNOWN: + return TaggedUWord(); + case NODEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return tuw; + } + case DEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return DerefTUW(tuw, aStackImg); + } + case PFXEXPR: { + MOZ_ASSERT(aPfxInstrs); + if (!aPfxInstrs) { + return TaggedUWord(); + } + return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs); + } + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg, + const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) { + // Take a copy of regs, since we'll need to refer to the old values + // whilst computing the new ones. + UnwindRegs old_regs = *aRegs; + + // Mark all the current register values as invalid, so that the + // caller can see, on our return, which ones have been computed + // anew. If we don't even manage to compute a new PC value, then + // the caller will have to abandon the unwind. + // FIXME: Create and use instead: aRegs->SetAllInvalid(); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + aRegs->xbp = TaggedUWord(); + aRegs->xsp = TaggedUWord(); + aRegs->xip = TaggedUWord(); +#elif defined(GP_ARCH_arm) + aRegs->r7 = TaggedUWord(); + aRegs->r11 = TaggedUWord(); + aRegs->r12 = TaggedUWord(); + aRegs->r13 = TaggedUWord(); + aRegs->r14 = TaggedUWord(); + aRegs->r15 = TaggedUWord(); +#elif defined(GP_ARCH_arm64) + aRegs->x29 = TaggedUWord(); + aRegs->x30 = TaggedUWord(); + aRegs->sp = TaggedUWord(); + aRegs->pc = TaggedUWord(); +#elif defined(GP_ARCH_mips64) + aRegs->sp = TaggedUWord(); + aRegs->fp = TaggedUWord(); + aRegs->pc = TaggedUWord(); +#else +# error "Unsupported arch" +#endif + + // This is generally useful. + const TaggedUWord inval = TaggedUWord(); + + // First, compute the CFA. + TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/, + aStackImg, aPfxInstrs); + + // If we didn't manage to compute the CFA, well .. that's ungood, + // but keep going anyway. It'll be OK provided none of the register + // value rules mention the CFA. In any case, compute the new values + // for each register that we're tracking. + +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + aRegs->xbp = + aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xsp = + aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xip = + aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_arm) + aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r11 = + aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r12 = + aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r13 = + aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r14 = + aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r15 = + aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_arm64) + aRegs->x29 = + aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->x30 = + aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(GP_ARCH_mips64) + aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#else +# error "Unsupported arch" +#endif + + // We're done. Any regs for which we didn't manage to compute a + // new value will now be marked as invalid. +} + +// RUNS IN NO-MALLOC CONTEXT +void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs, + /*OUT*/ uintptr_t* aFrameSPs, + /*OUT*/ size_t* aFramesUsed, + /*OUT*/ size_t* aFramePointerFramesAcquired, + size_t aFramesAvail, UnwindRegs* aStartRegs, + StackImage* aStackImg) { + MOZ_RELEASE_ASSERT(!mAdminMode); + + ///////////////////////////////////////////////////////// + // BEGIN UNWIND + + *aFramesUsed = 0; + + UnwindRegs regs = *aStartRegs; + TaggedUWord last_valid_sp = TaggedUWord(); + + while (true) { + if (DEBUG_MAIN) { + char buf[300]; + mLog("\n"); +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + SprintfLiteral( + buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n", + (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(), + (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(), + (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_arm) + SprintfLiteral( + buf, + "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx" + " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n", + (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(), + (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(), + (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(), + (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(), + (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(), + (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_arm64) + SprintfLiteral( + buf, + "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx" + " sp %d/%llx\n", + (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(), + (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(), + (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(), + (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#elif defined(GP_ARCH_mips64) + SprintfLiteral( + buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n", + (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(), + (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(), + (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value()); + buf[sizeof(buf) - 1] = 0; + mLog(buf); +#else +# error "Unsupported arch" +#endif + } + +#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86) + TaggedUWord ia = regs.xip; + TaggedUWord sp = regs.xsp; +#elif defined(GP_ARCH_arm) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14); + TaggedUWord sp = regs.r13; +#elif defined(GP_ARCH_arm64) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30); + TaggedUWord sp = regs.sp; +#elif defined(GP_ARCH_mips64) + TaggedUWord ia = regs.pc; + TaggedUWord sp = regs.sp; +#else +# error "Unsupported arch" +#endif + + if (*aFramesUsed >= aFramesAvail) { + break; + } + + // If we don't have a valid value for the PC, give up. + if (!ia.Valid()) { + break; + } + + // If this is the innermost frame, record the SP value, which + // presumably is valid. If this isn't the innermost frame, and we + // have a valid SP value, check that its SP value isn't less that + // the one we've seen so far, so as to catch potential SP value + // cycles. + if (*aFramesUsed == 0) { + last_valid_sp = sp; + } else { + MOZ_ASSERT(last_valid_sp.Valid()); + if (sp.Valid()) { + if (sp.Value() < last_valid_sp.Value()) { + // Hmm, SP going in the wrong direction. Let's stop. + break; + } + // Remember where we got to. + last_valid_sp = sp; + } + } + + // For the innermost frame, the IA value is what we need. For all + // other frames, it's actually the return address, so back up one + // byte so as to get it into the calling instruction. + aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1); + aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0; + (*aFramesUsed)++; + + // Find the RuleSet for the current IA, if any. This will also + // query the backing (secondary) maps if it isn't found in the + // thread-local cache. + + // If this isn't the innermost frame, back up into the calling insn. + if (*aFramesUsed > 1) { + ia = ia + TaggedUWord((uintptr_t)(-1)); + } + + pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs = + mPriMap->Lookup(ia.Value()); + const RuleSet* ruleset = ruleset_and_pfxinstrs.first; + const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second; + + if (DEBUG_MAIN) { + char buf[100]; + SprintfLiteral(buf, "ruleset for 0x%llx = %p\n", + (unsigned long long int)ia.Value(), ruleset); + buf[sizeof(buf) - 1] = 0; + mLog(buf); + } + +#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux) + ///////////////////////////////////////////// + //// + // On 32 bit x86-linux, syscalls are often done via the VDSO + // function __kernel_vsyscall, which doesn't have a corresponding + // object that we can read debuginfo from. That effectively kills + // off all stack traces for threads blocked in syscalls. Hence + // special-case by looking at the code surrounding the program + // counter. + // + // 0xf7757420 <__kernel_vsyscall+0>: push %ecx + // 0xf7757421 <__kernel_vsyscall+1>: push %edx + // 0xf7757422 <__kernel_vsyscall+2>: push %ebp + // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp + // 0xf7757425 <__kernel_vsyscall+5>: sysenter + // 0xf7757427 <__kernel_vsyscall+7>: nop + // 0xf7757428 <__kernel_vsyscall+8>: nop + // 0xf7757429 <__kernel_vsyscall+9>: nop + // 0xf775742a <__kernel_vsyscall+10>: nop + // 0xf775742b <__kernel_vsyscall+11>: nop + // 0xf775742c <__kernel_vsyscall+12>: nop + // 0xf775742d <__kernel_vsyscall+13>: nop + // 0xf775742e <__kernel_vsyscall+14>: int $0x80 + // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp + // 0xf7757431 <__kernel_vsyscall+17>: pop %edx + // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx + // 0xf7757433 <__kernel_vsyscall+19>: ret + // + // In cases where the sampled thread is blocked in a syscall, its + // program counter will point at "pop %ebp". Hence we look for + // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and + // the corresponding register-recovery actions are: + // new_ebp = *(old_esp + 0) + // new eip = *(old_esp + 12) + // new_esp = old_esp + 16 + // + // It may also be the case that the program counter points two + // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in + // the case where the syscall has been restarted but the thread + // hasn't been rescheduled. The code below doesn't handle that; + // it could easily be made to. + // + if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) { + uintptr_t insns_min, insns_max; + uintptr_t eip = ia.Value(); + bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip); + if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) { + uint8_t* eipC = (uint8_t*)eip; + if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D && + eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) { + TaggedUWord sp_plus_0 = sp; + TaggedUWord sp_plus_12 = sp; + TaggedUWord sp_plus_16 = sp; + sp_plus_12 = sp_plus_12 + TaggedUWord(12); + sp_plus_16 = sp_plus_16 + TaggedUWord(16); + TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg); + TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg); + TaggedUWord new_esp = sp_plus_16; + if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) { + regs.xbp = new_ebp; + regs.xip = new_eip; + regs.xsp = new_esp; + continue; + } + } + } + } + //// + ///////////////////////////////////////////// +#endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux) + + // So, do we have a ruleset for this address? If so, use it now. + if (ruleset) { + if (DEBUG_MAIN) { + ruleset->Print(mLog); + mLog("\n"); + } + // Use the RuleSet to compute the registers for the previous + // frame. |regs| is modified in-place. + UseRuleSet(®s, aStackImg, ruleset, pfxinstrs); + continue; + } + +#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \ + defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \ + defined(GP_PLAT_amd64_freebsd) + // There's no RuleSet for the specified address. On amd64/x86_linux, see if + // it's possible to recover the caller's frame by using the frame pointer. + + // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image), + // and assume the following layout: + // + // <--- new_SP + // +----------+ + // | new_IP | (return address) + // +----------+ + // | new_BP | <--- old_BP + // +----------+ + // | .... | + // | .... | + // | .... | + // +----------+ <---- old_SP (arbitrary, but must be <= old_BP) + + const size_t wordSzB = sizeof(uintptr_t); + TaggedUWord old_xsp = regs.xsp; + + // points at new_BP ? + TaggedUWord old_xbp = regs.xbp; + // points at new_IP ? + TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB); + // is the new_SP ? + TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB); + + if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() && + old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) { + // We don't need to do any range, alignment or validity checks for + // addresses passed to DerefTUW, since that performs them itself, and + // returns an invalid value on failure. Any such value will poison + // subsequent uses, and we do a final check for validity before putting + // the computed values into |regs|. + TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg); + if (new_xbp.Valid() && new_xbp.IsAligned() && + old_xbp.Value() < new_xbp.Value()) { + TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg); + TaggedUWord new_xsp = old_xbp_plus2; + if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) { + regs.xbp = new_xbp; + regs.xip = new_xip; + regs.xsp = new_xsp; + (*aFramePointerFramesAcquired)++; + continue; + } + } + } +#elif defined(GP_ARCH_arm64) + // Here is an example of generated code for prologue and epilogue.. + // + // stp x29, x30, [sp, #-16]! + // mov x29, sp + // ... + // ldp x29, x30, [sp], #16 + // ret + // + // Next is another example of generated code. + // + // stp x20, x19, [sp, #-32]! + // stp x29, x30, [sp, #16] + // add x29, sp, #0x10 + // ... + // ldp x29, x30, [sp, #16] + // ldp x20, x19, [sp], #32 + // ret + // + // Previous x29 and x30 register are stored in the address of x29 register. + // But since sp register value depends on local variables, we cannot compute + // previous sp register from current sp/fp/lr register and there is no + // regular rule for sp register in prologue. But since return address is lr + // register, if x29 is valid, we will get return address without sp + // register. + // + // So we assume the following layout that if no rule set. x29 is frame + // pointer, so we will be able to compute x29 and x30 . + // + // +----------+ <--- new_sp (cannot compute) + // | .... | + // +----------+ + // | new_lr | (return address) + // +----------+ + // | new_fp | <--- old_fp + // +----------+ + // | .... | + // | .... | + // +----------+ <---- old_sp (arbitrary, but unused) + + TaggedUWord old_fp = regs.x29; + if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() && + last_valid_sp.Value() <= old_fp.Value()) { + TaggedUWord new_fp = DerefTUW(old_fp, aStackImg); + if (new_fp.Valid() && new_fp.IsAligned() && + old_fp.Value() < new_fp.Value()) { + TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8); + TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg); + if (new_lr.Valid()) { + regs.x29 = new_fp; + regs.x30 = new_lr; + // When using frame pointer to walk stack, we cannot compute sp + // register since we cannot compute sp register from fp/lr/sp + // register, and there is no regular rule to compute previous sp + // register. So mark as invalid. + regs.sp = TaggedUWord(); + (*aFramePointerFramesAcquired)++; + continue; + } + } + } +#endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || + // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) + + // We failed to recover a frame either using CFI or FP chasing, and we + // have no other ways to recover the frame. So we have to give up. + break; + + } // top level unwind loop + + // END UNWIND + ///////////////////////////////////////////////////////// +} + +//////////////////////////////////////////////////////////////// +// LUL Unit Testing // +//////////////////////////////////////////////////////////////// + +static const int LUL_UNIT_TEST_STACK_SIZE = 32768; + +#if defined(GP_ARCH_mips64) +static __attribute__((noinline)) unsigned long __getpc(void) { + unsigned long rtaddr; + __asm__ volatile("move %0, $31" : "=r"(rtaddr)); + return rtaddr; +} +#endif + +// This function is innermost in the test call sequence. It uses LUL +// to unwind, and compares the result with the sequence specified in +// the director string. These need to agree in order for the test to +// pass. In order not to screw up the results, this function needs +// to have a not-very big stack frame, since we're only presenting +// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and +// that chunk unavoidably includes the frame for this function. +// +// This function must not be inlined into its callers. Doing so will +// cause the expected-vs-actual backtrace consistency checking to +// fail. Prints summary results to |aLUL|'s logging sink and also +// returns a boolean indicating whether or not the test failed. +static __attribute__((noinline)) bool GetAndCheckStackTrace( + LUL* aLUL, const char* dstring) { + // Get hold of the current unwind-start registers. + UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); +#if defined(GP_ARCH_amd64) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "leaq 0(%%rip), %%r15" + "\n\t" + "movq %%r15, 0(%0)" + "\n\t" + "movq %%rsp, 8(%0)" + "\n\t" + "movq %%rbp, 16(%0)" + "\n" + : + : "r"(&block[0]) + : "memory", "r15"); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 128; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 12); + __asm__ __volatile__( + ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/ + "\n\t" + "popl %%edi" + "\n\t" + "movl %%edi, 0(%0)" + "\n\t" + "movl %%esp, 4(%0)" + "\n\t" + "movl %%ebp, 8(%0)" + "\n" + : + : "r"(&block[0]) + : "memory", "edi"); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android) + volatile uintptr_t block[6]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "mov r0, r15" + "\n\t" + "str r0, [%0, #0]" + "\n\t" + "str r14, [%0, #4]" + "\n\t" + "str r13, [%0, #8]" + "\n\t" + "str r12, [%0, #12]" + "\n\t" + "str r11, [%0, #16]" + "\n\t" + "str r7, [%0, #20]" + "\n" + : + : "r"(&block[0]) + : "memory", "r0"); + startRegs.r15 = TaggedUWord(block[0]); + startRegs.r14 = TaggedUWord(block[1]); + startRegs.r13 = TaggedUWord(block[2]); + startRegs.r12 = TaggedUWord(block[3]); + startRegs.r11 = TaggedUWord(block[4]); + startRegs.r7 = TaggedUWord(block[5]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_ARCH_arm64) + volatile uintptr_t block[4]; + MOZ_ASSERT(sizeof(block) == 32); + __asm__ __volatile__( + "adr x0, . \n\t" + "str x0, [%0, #0] \n\t" + "str x29, [%0, #8] \n\t" + "str x30, [%0, #16] \n\t" + "mov x0, sp \n\t" + "str x0, [%0, #24] \n\t" + : + : "r"(&block[0]) + : "memory", "x0"); + startRegs.pc = TaggedUWord(block[0]); + startRegs.x29 = TaggedUWord(block[1]); + startRegs.x30 = TaggedUWord(block[2]); + startRegs.sp = TaggedUWord(block[3]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(GP_ARCH_mips64) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "sd $29, 8(%0) \n" + "sd $30, 16(%0) \n" + : + : "r"(block) + : "memory"); + block[0] = __getpc(); + startRegs.pc = TaggedUWord(block[0]); + startRegs.sp = TaggedUWord(block[1]); + startRegs.fp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#else +# error "Unsupported platform" +#endif + + // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the + // stack. + uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE; + uintptr_t ws = sizeof(void*); + start &= ~(ws - 1); + end &= ~(ws - 1); + uintptr_t nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) { + nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + StackImage* stackImg = new StackImage(); + stackImg->mLen = nToCopy; + stackImg->mStartAvma = start; + if (nToCopy > 0) { + MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy); + memcpy(&stackImg->mContents[0], (void*)start, nToCopy); + } + + // Unwind it. + const int MAX_TEST_FRAMES = 64; + uintptr_t framePCs[MAX_TEST_FRAMES]; + uintptr_t frameSPs[MAX_TEST_FRAMES]; + size_t framesAvail = mozilla::ArrayLength(framePCs); + size_t framesUsed = 0; + size_t framePointerFramesAcquired = 0; + aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed, + &framePointerFramesAcquired, framesAvail, &startRegs, stackImg); + + delete stackImg; + + // if (0) { + // // Show what we have. + // fprintf(stderr, "Got %d frames:\n", (int)framesUsed); + // for (size_t i = 0; i < framesUsed; i++) { + // fprintf(stderr, " [%2d] SP %p PC %p\n", + // (int)i, (void*)frameSPs[i], (void*)framePCs[i]); + // } + // fprintf(stderr, "\n"); + //} + + // Check to see if there's a consistent binding between digits in + // the director string ('1' .. '8') and the PC values acquired by + // the unwind. If there isn't, the unwinding has failed somehow. + uintptr_t binding[8]; // binding for '1' .. binding for '8' + memset((void*)binding, 0, sizeof(binding)); + + // The general plan is to work backwards along the director string + // and forwards along the framePCs array. Doing so corresponds to + // working outwards from the innermost frame of the recursive test set. + const char* cursor = dstring; + + // Find the end. This leaves |cursor| two bytes past the first + // character we want to look at -- see comment below. + while (*cursor) cursor++; + + // Counts the number of consistent frames. + size_t nConsistent = 0; + + // Iterate back to the start of the director string. The starting + // points are a bit complex. We can't use framePCs[0] because that + // contains the PC in this frame (above). We can't use framePCs[1] + // because that will contain the PC at return point in the recursive + // test group (TestFn[1-8]) for their call "out" to this function, + // GetAndCheckStackTrace. Although LUL will compute a correct + // return address, that will not be the same return address as for a + // recursive call out of the the function to another function in the + // group. Hence we can only start consistency checking at + // framePCs[2]. + // + // To be consistent, then, we must ignore the last element in the + // director string as that corresponds to framePCs[1]. Hence the + // start points are: framePCs[2] and the director string 2 bytes + // before the terminating zero. + // + // Also as a result of this, the number of consistent frames counted + // will always be one less than the length of the director string + // (not including its terminating zero). + size_t frameIx; + for (cursor = cursor - 2, frameIx = 2; + cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) { + char c = *cursor; + uintptr_t pc = framePCs[frameIx]; + // If this doesn't hold, the director string is ill-formed. + MOZ_ASSERT(c >= '1' && c <= '8'); + int n = ((int)c) - ((int)'1'); + if (binding[n] == 0) { + // There's no binding for |c| yet, so install |pc| and carry on. + binding[n] = pc; + nConsistent++; + continue; + } + // There's a pre-existing binding for |c|. Check it's consistent. + if (binding[n] != pc) { + // Not consistent. Give up now. + break; + } + // Consistent. Keep going. + nConsistent++; + } + + // So, did we succeed? + bool passed = nConsistent + 1 == strlen(dstring); + + // Show the results. + char buf[200]; + SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring); + buf[sizeof(buf) - 1] = 0; + aLUL->mLog(buf); + SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n", + (int)nConsistent, (int)strlen(dstring), + passed ? "PASS" : "FAIL"); + buf[sizeof(buf) - 1] = 0; + aLUL->mLog(buf); + + return !passed; +} + +// Macro magic to create a set of 8 mutually recursive functions with +// varying frame sizes. These will recurse amongst themselves as +// specified by |strP|, the directory string, and call +// GetAndCheckStackTrace when the string becomes empty, passing it the +// original value of the string. This checks the result, printing +// results on |aLUL|'s logging sink, and also returns a boolean +// indicating whether or not the results are acceptable (correct). + +#define DECL_TEST_FN(NAME) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP); + +#define GEN_TEST_FN(NAME, FRAMESIZE) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \ + /* Create a frame of size (at least) FRAMESIZE, so that the */ \ + /* 8 functions created by this macro offer some variation in frame */ \ + /* sizes. This isn't as simple as it might seem, since a clever */ \ + /* optimizing compiler (eg, clang-5) detects that the array is unused */ \ + /* and removes it. We try to defeat this by passing it to a function */ \ + /* in a different compilation unit, and hoping that clang does not */ \ + /* notice that the call is a no-op. */ \ + char space[FRAMESIZE]; \ + Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \ + \ + if (*strP == '\0') { \ + /* We've come to the end of the director string. */ \ + /* Take a stack snapshot. */ \ + /* We purposefully use a negation to avoid tail-call optimization */ \ + return !GetAndCheckStackTrace(aLUL, strPorig); \ + } else { \ + /* Recurse onwards. This is a bit subtle. The obvious */ \ + /* thing to do here is call onwards directly, from within the */ \ + /* arms of the case statement. That gives a problem in that */ \ + /* there will be multiple return points inside each function when */ \ + /* unwinding, so it will be difficult to check for consistency */ \ + /* against the director string. Instead, we make an indirect */ \ + /* call, so as to guarantee that there is only one call site */ \ + /* within each function. This does assume that the compiler */ \ + /* won't transform it back to the simple direct-call form. */ \ + /* To discourage it from doing so, the call is bracketed with */ \ + /* __asm__ __volatile__ sections so as to make it not-movable. */ \ + bool (*nextFn)(LUL*, const char*, const char*) = NULL; \ + switch (*strP) { \ + case '1': \ + nextFn = TestFn1; \ + break; \ + case '2': \ + nextFn = TestFn2; \ + break; \ + case '3': \ + nextFn = TestFn3; \ + break; \ + case '4': \ + nextFn = TestFn4; \ + break; \ + case '5': \ + nextFn = TestFn5; \ + break; \ + case '6': \ + nextFn = TestFn6; \ + break; \ + case '7': \ + nextFn = TestFn7; \ + break; \ + case '8': \ + nextFn = TestFn8; \ + break; \ + default: \ + nextFn = TestFn8; \ + break; \ + } \ + /* "use" |space| immediately after the recursive call, */ \ + /* so as to dissuade clang from deallocating the space while */ \ + /* the call is active, or otherwise messing with the stack frame. */ \ + __asm__ __volatile__("" ::: "cc", "memory"); \ + bool passed = nextFn(aLUL, strPorig, strP + 1); \ + Unused << write(1, space, 0); \ + __asm__ __volatile__("" ::: "cc", "memory"); \ + return passed; \ + } \ + } + +// The test functions are mutually recursive, so it is necessary to +// declare them before defining them. +DECL_TEST_FN(TestFn1) +DECL_TEST_FN(TestFn2) +DECL_TEST_FN(TestFn3) +DECL_TEST_FN(TestFn4) +DECL_TEST_FN(TestFn5) +DECL_TEST_FN(TestFn6) +DECL_TEST_FN(TestFn7) +DECL_TEST_FN(TestFn8) + +GEN_TEST_FN(TestFn1, 123) +GEN_TEST_FN(TestFn2, 456) +GEN_TEST_FN(TestFn3, 789) +GEN_TEST_FN(TestFn4, 23) +GEN_TEST_FN(TestFn5, 47) +GEN_TEST_FN(TestFn6, 117) +GEN_TEST_FN(TestFn7, 1) +GEN_TEST_FN(TestFn8, 99) + +// This starts the test sequence going. Call here to generate a +// sequence of calls as directed by the string |dstring|. The call +// sequence will, from its innermost frame, finish by calling +// GetAndCheckStackTrace() and passing it |dstring|. +// GetAndCheckStackTrace() will unwind the stack, check consistency +// of those results against |dstring|, and print a pass/fail message +// to aLUL's logging sink. It also updates the counters in *aNTests +// and aNTestsPassed. +__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests, + /*OUT*/ int* aNTestsPassed, LUL* aLUL, + const char* dstring) { + // Ensure that the stack has at least this much space on it. This + // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes + // and hand it to LUL. Safe in the sense that no segfault can + // happen because the stack is at least this big. This is all + // somewhat dubious in the sense that a sufficiently clever compiler + // (clang, for one) can figure out that space[] is unused and delete + // it from the frame. Hence the somewhat elaborate hoop jumping to + // fill it up before the call and to at least appear to use the + // value afterwards. + int i; + volatile char space[LUL_UNIT_TEST_STACK_SIZE]; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + space[i] = (char)(i & 0x7F); + } + + // Really run the test. + bool passed = TestFn1(aLUL, dstring, dstring); + + // Appear to use space[], by visiting the value to compute some kind + // of checksum, and then (apparently) using the checksum. + int sum = 0; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + // If this doesn't fool LLVM, I don't know what will. + sum += space[i] - 3 * i; + } + __asm__ __volatile__("" : : "r"(sum)); + + // Update the counters. + (*aNTests)++; + if (passed) { + (*aNTestsPassed)++; + } +} + +void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed, + LUL* aLUL) { + aLUL->mLog(":\n"); + aLUL->mLog("LULUnitTest: BEGIN\n"); + *aNTests = *aNTestsPassed = 0; + TestUnw(aNTests, aNTestsPassed, aLUL, "11111111"); + TestUnw(aNTests, aNTestsPassed, aLUL, "11222211"); + TestUnw(aNTests, aNTestsPassed, aLUL, "111222333"); + TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212"); + TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258"); + TestUnw(aNTests, aNTestsPassed, aLUL, + "123456781122334455667788777777777777777777777"); + aLUL->mLog("LULUnitTest: END\n"); + aLUL->mLog(":\n"); +} + +} // namespace lul |