diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /memory/replace | |
parent | Initial commit. (diff) | |
download | firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
57 files changed, 9767 insertions, 0 deletions
diff --git a/memory/replace/dmd/DMD.cpp b/memory/replace/dmd/DMD.cpp new file mode 100644 index 0000000000..f93c8eebca --- /dev/null +++ b/memory/replace/dmd/DMD.cpp @@ -0,0 +1,1882 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if !defined(MOZ_PROFILING) +# error "DMD requires MOZ_PROFILING" +#endif + +#ifdef XP_WIN +# include <windows.h> +# include <process.h> +#else +# include <pthread.h> +# include <sys/types.h> +# include <unistd.h> +#endif + +#ifdef ANDROID +# include <android/log.h> +#endif + +#include "nscore.h" + +#include "mozilla/Assertions.h" +#include "mozilla/FastBernoulliTrial.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/Likely.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/PodOperations.h" +#include "mozilla/StackWalk.h" +#include "mozilla/ThreadLocal.h" + +// CodeAddressService is defined entirely in the header, so this does not make +// DMD depend on XPCOM's object file. +#include "CodeAddressService.h" + +// replace_malloc.h needs to be included before replace_malloc_bridge.h, +// which DMD.h includes, so DMD.h needs to be included after replace_malloc.h. +#include "replace_malloc.h" +#include "DMD.h" + +namespace mozilla { +namespace dmd { + +class DMDBridge : public ReplaceMallocBridge { + virtual DMDFuncs* GetDMDFuncs() override; +}; + +static DMDBridge* gDMDBridge; +static DMDFuncs gDMDFuncs; + +DMDFuncs* DMDBridge::GetDMDFuncs() { return &gDMDFuncs; } + +MOZ_FORMAT_PRINTF(1, 2) +inline void StatusMsg(const char* aFmt, ...) { + va_list ap; + va_start(ap, aFmt); + gDMDFuncs.StatusMsg(aFmt, ap); + va_end(ap); +} + +//--------------------------------------------------------------------------- +// Utilities +//--------------------------------------------------------------------------- + +#ifndef DISALLOW_COPY_AND_ASSIGN +# define DISALLOW_COPY_AND_ASSIGN(T) \ + T(const T&); \ + void operator=(const T&) +#endif + +static malloc_table_t gMallocTable; + +// This provides infallible allocations (they abort on OOM). We use it for all +// of DMD's own allocations, which fall into the following three cases. +// +// - Direct allocations (the easy case). +// +// - Indirect allocations in mozilla::{Vector,HashSet,HashMap} -- this class +// serves as their AllocPolicy. +// +// - Other indirect allocations (e.g. MozStackWalk) -- see the comments on +// Thread::mBlockIntercepts and in replace_malloc for how these work. +// +// It would be nice if we could use the InfallibleAllocPolicy from mozalloc, +// but DMD cannot use mozalloc. +// +class InfallibleAllocPolicy { + static void ExitOnFailure(const void* aP); + + public: + template <typename T> + static T* maybe_pod_malloc(size_t aNumElems) { + if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) + return nullptr; + return (T*)gMallocTable.malloc(aNumElems * sizeof(T)); + } + + template <typename T> + static T* maybe_pod_calloc(size_t aNumElems) { + return (T*)gMallocTable.calloc(aNumElems, sizeof(T)); + } + + template <typename T> + static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) + return nullptr; + return (T*)gMallocTable.realloc(aPtr, aNewSize * sizeof(T)); + } + + static void* malloc_(size_t aSize) { + void* p = gMallocTable.malloc(aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_malloc(size_t aNumElems) { + T* p = maybe_pod_malloc<T>(aNumElems); + ExitOnFailure(p); + return p; + } + + static void* calloc_(size_t aCount, size_t aSize) { + void* p = gMallocTable.calloc(aCount, aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_calloc(size_t aNumElems) { + T* p = maybe_pod_calloc<T>(aNumElems); + ExitOnFailure(p); + return p; + } + + static void* realloc_(void* aPtr, size_t aNewSize) { + void* p = gMallocTable.realloc(aPtr, aNewSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize); + ExitOnFailure(p); + return p; + } + + static void* memalign_(size_t aAlignment, size_t aSize) { + void* p = gMallocTable.memalign(aAlignment, aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static void free_(T* aPtr, size_t aSize = 0) { + gMallocTable.free(aPtr); + } + + static char* strdup_(const char* aStr) { + char* s = (char*)InfallibleAllocPolicy::malloc_(strlen(aStr) + 1); + strcpy(s, aStr); + return s; + } + + template <class T> + static T* new_() { + void* mem = malloc_(sizeof(T)); + return new (mem) T; + } + + template <class T, typename P1> + static T* new_(const P1& aP1) { + void* mem = malloc_(sizeof(T)); + return new (mem) T(aP1); + } + + template <class T> + static void delete_(T* aPtr) { + if (aPtr) { + aPtr->~T(); + InfallibleAllocPolicy::free_(aPtr); + } + } + + static void reportAllocOverflow() { ExitOnFailure(nullptr); } + bool checkSimulatedOOM() const { return true; } +}; + +// This is only needed because of the |const void*| vs |void*| arg mismatch. +static size_t MallocSizeOf(const void* aPtr) { + return gMallocTable.malloc_usable_size(const_cast<void*>(aPtr)); +} + +void DMDFuncs::StatusMsg(const char* aFmt, va_list aAp) { +#ifdef ANDROID + __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, aAp); +#else + // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL. + char* fmt = (char*)InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64); + sprintf(fmt, "DMD[%d] %s", getpid(), aFmt); + vfprintf(stderr, fmt, aAp); + InfallibleAllocPolicy::free_(fmt); +#endif +} + +/* static */ +void InfallibleAllocPolicy::ExitOnFailure(const void* aP) { + if (!aP) { + MOZ_CRASH("DMD out of memory; aborting"); + } +} + +static double Percent(size_t part, size_t whole) { + return (whole == 0) ? 0 : 100 * (double)part / whole; +} + +// Commifies the number. +static char* Show(size_t n, char* buf, size_t buflen) { + int nc = 0, i = 0, lasti = buflen - 2; + buf[lasti + 1] = '\0'; + if (n == 0) { + buf[lasti - i] = '0'; + i++; + } else { + while (n > 0) { + if (((i - nc) % 3) == 0 && i != 0) { + buf[lasti - i] = ','; + i++; + nc++; + } + buf[lasti - i] = static_cast<char>((n % 10) + '0'); + i++; + n /= 10; + } + } + int firstCharIndex = lasti - i + 1; + + MOZ_ASSERT(firstCharIndex >= 0); + return &buf[firstCharIndex]; +} + +//--------------------------------------------------------------------------- +// Options (Part 1) +//--------------------------------------------------------------------------- + +class Options { + template <typename T> + struct NumOption { + const T mDefault; + const T mMax; + T mActual; + NumOption(T aDefault, T aMax) + : mDefault(aDefault), mMax(aMax), mActual(aDefault) {} + }; + + // DMD has several modes. These modes affect what data is recorded and + // written to the output file, and the written data affects the + // post-processing that dmd.py can do. + // + // Users specify the mode as soon as DMD starts. This leads to minimal memory + // usage and log file size. It has the disadvantage that is inflexible -- if + // you want to change modes you have to re-run DMD. But in practice changing + // modes seems to be rare, so it's not much of a problem. + // + // An alternative possibility would be to always record and output *all* the + // information needed for all modes. This would let you choose the mode when + // running dmd.py, and so you could do multiple kinds of profiling on a + // single DMD run. But if you are only interested in one of the simpler + // modes, you'd pay the price of (a) increased memory usage and (b) *very* + // large log files. + // + // Finally, another alternative possibility would be to do mode selection + // partly at DMD startup or recording, and then partly in dmd.py. This would + // give some extra flexibility at moderate memory and file size cost. But + // certain mode pairs wouldn't work, which would be confusing. + // + enum class Mode { + // For each live block, this mode outputs: size (usable and slop) and + // (possibly) and allocation stack. This mode is good for live heap + // profiling. + Live, + + // Like "Live", but for each live block it also outputs: zero or more + // report stacks. This mode is good for identifying where memory reporters + // should be added. This is the default mode. + DarkMatter, + + // Like "Live", but also outputs the same data for dead blocks. This mode + // does cumulative heap profiling, which is good for identifying where large + // amounts of short-lived allocations ("heap churn") occur. + Cumulative, + + // Like "Live", but this mode also outputs for each live block the address + // of the block and the values contained in the blocks. This mode is useful + // for investigating leaks, by helping to figure out which blocks refer to + // other blocks. This mode force-enables full stacks coverage. + Scan + }; + + // With full stacks, every heap block gets a stack trace recorded for it. + // This is complete but slow. + // + // With partial stacks, not all heap blocks will get a stack trace recorded. + // A Bernoulli trial (see mfbt/FastBernoulliTrial.h for details) is performed + // for each heap block to decide if it gets one. Because bigger heap blocks + // are more likely to get a stack trace, even though most heap *blocks* won't + // get a stack trace, most heap *bytes* will. + enum class Stacks { Full, Partial }; + + char* mDMDEnvVar; // a saved copy, for later printing + + Mode mMode; + Stacks mStacks; + bool mShowDumpStats; + + void BadArg(const char* aArg); + static const char* ValueIfMatch(const char* aArg, const char* aOptionName); + static bool GetLong(const char* aArg, const char* aOptionName, long aMin, + long aMax, long* aValue); + static bool GetBool(const char* aArg, const char* aOptionName, bool* aValue); + + public: + explicit Options(const char* aDMDEnvVar); + + bool IsLiveMode() const { return mMode == Mode::Live; } + bool IsDarkMatterMode() const { return mMode == Mode::DarkMatter; } + bool IsCumulativeMode() const { return mMode == Mode::Cumulative; } + bool IsScanMode() const { return mMode == Mode::Scan; } + + const char* ModeString() const; + + const char* DMDEnvVar() const { return mDMDEnvVar; } + + bool DoFullStacks() const { return mStacks == Stacks::Full; } + size_t ShowDumpStats() const { return mShowDumpStats; } +}; + +static Options* gOptions; + +//--------------------------------------------------------------------------- +// The global lock +//--------------------------------------------------------------------------- + +// MutexBase implements the platform-specific parts of a mutex. + +#ifdef XP_WIN + +class MutexBase { + CRITICAL_SECTION mCS; + + DISALLOW_COPY_AND_ASSIGN(MutexBase); + + public: + MutexBase() { InitializeCriticalSection(&mCS); } + ~MutexBase() { DeleteCriticalSection(&mCS); } + + void Lock() { EnterCriticalSection(&mCS); } + void Unlock() { LeaveCriticalSection(&mCS); } +}; + +#else + +class MutexBase { + pthread_mutex_t mMutex; + + MutexBase(const MutexBase&) = delete; + + const MutexBase& operator=(const MutexBase&) = delete; + + public: + MutexBase() { pthread_mutex_init(&mMutex, nullptr); } + + void Lock() { pthread_mutex_lock(&mMutex); } + void Unlock() { pthread_mutex_unlock(&mMutex); } +}; + +#endif + +class Mutex : private MutexBase { + bool mIsLocked; + + Mutex(const Mutex&) = delete; + + const Mutex& operator=(const Mutex&) = delete; + + public: + Mutex() : mIsLocked(false) {} + + void Lock() { + MutexBase::Lock(); + MOZ_ASSERT(!mIsLocked); + mIsLocked = true; + } + + void Unlock() { + MOZ_ASSERT(mIsLocked); + mIsLocked = false; + MutexBase::Unlock(); + } + + bool IsLocked() { return mIsLocked; } +}; + +// This lock must be held while manipulating global state such as +// gStackTraceTable, gLiveBlockTable, gDeadBlockTable. Note that gOptions is +// *not* protected by this lock because it is only written to by Options(), +// which is only invoked at start-up and in ResetEverything(), which is only +// used by SmokeDMD.cpp. +static Mutex* gStateLock = nullptr; + +class AutoLockState { + AutoLockState(const AutoLockState&) = delete; + + const AutoLockState& operator=(const AutoLockState&) = delete; + + public: + AutoLockState() { gStateLock->Lock(); } + ~AutoLockState() { gStateLock->Unlock(); } +}; + +class AutoUnlockState { + AutoUnlockState(const AutoUnlockState&) = delete; + + const AutoUnlockState& operator=(const AutoUnlockState&) = delete; + + public: + AutoUnlockState() { gStateLock->Unlock(); } + ~AutoUnlockState() { gStateLock->Lock(); } +}; + +//--------------------------------------------------------------------------- +// Per-thread blocking of intercepts +//--------------------------------------------------------------------------- + +// On MacOS, the first __thread/thread_local access calls malloc, which leads +// to an infinite loop. So we use pthread-based TLS instead, which somehow +// doesn't have this problem. +#if !defined(XP_DARWIN) +# define DMD_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) +#else +# define DMD_THREAD_LOCAL(T) \ + detail::ThreadLocal<T, detail::ThreadLocalKeyStorage> +#endif + +class Thread { + // Required for allocation via InfallibleAllocPolicy::new_. + friend class InfallibleAllocPolicy; + + // When true, this blocks intercepts, which allows malloc interception + // functions to themselves call malloc. (Nb: for direct calls to malloc we + // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes + // indirectly call vanilla malloc via functions like MozStackWalk.) + bool mBlockIntercepts; + + Thread() : mBlockIntercepts(false) {} + + Thread(const Thread&) = delete; + + const Thread& operator=(const Thread&) = delete; + + static DMD_THREAD_LOCAL(Thread*) tlsThread; + + public: + static void Init() { + if (!tlsThread.init()) { + MOZ_CRASH(); + } + } + + static Thread* Fetch() { + Thread* t = tlsThread.get(); + if (MOZ_UNLIKELY(!t)) { + // This memory is never freed, even if the thread dies. It's a leak, but + // only a tiny one. + t = InfallibleAllocPolicy::new_<Thread>(); + tlsThread.set(t); + } + + return t; + } + + bool BlockIntercepts() { + MOZ_ASSERT(!mBlockIntercepts); + return mBlockIntercepts = true; + } + + bool UnblockIntercepts() { + MOZ_ASSERT(mBlockIntercepts); + return mBlockIntercepts = false; + } + + bool InterceptsAreBlocked() const { return mBlockIntercepts; } +}; + +DMD_THREAD_LOCAL(Thread*) Thread::tlsThread; + +// An object of this class must be created (on the stack) before running any +// code that might allocate. +class AutoBlockIntercepts { + Thread* const mT; + + AutoBlockIntercepts(const AutoBlockIntercepts&) = delete; + + const AutoBlockIntercepts& operator=(const AutoBlockIntercepts&) = delete; + + public: + explicit AutoBlockIntercepts(Thread* aT) : mT(aT) { mT->BlockIntercepts(); } + ~AutoBlockIntercepts() { + MOZ_ASSERT(mT->InterceptsAreBlocked()); + mT->UnblockIntercepts(); + } +}; + +//--------------------------------------------------------------------------- +// Location service +//--------------------------------------------------------------------------- + +struct DescribeCodeAddressLock { + static void Unlock() { gStateLock->Unlock(); } + static void Lock() { gStateLock->Lock(); } + static bool IsLocked() { return gStateLock->IsLocked(); } +}; + +typedef CodeAddressService<InfallibleAllocPolicy, DescribeCodeAddressLock> + CodeAddressService; + +//--------------------------------------------------------------------------- +// Stack traces +//--------------------------------------------------------------------------- + +class StackTrace { + public: + static const uint32_t MaxFrames = 24; + + private: + uint32_t mLength; // The number of PCs. + const void* mPcs[MaxFrames]; // The PCs themselves. + + public: + StackTrace() : mLength(0) {} + StackTrace(const StackTrace& aOther) : mLength(aOther.mLength) { + PodCopy(mPcs, aOther.mPcs, mLength); + } + + uint32_t Length() const { return mLength; } + const void* Pc(uint32_t i) const { + MOZ_ASSERT(i < mLength); + return mPcs[i]; + } + + uint32_t Size() const { return mLength * sizeof(mPcs[0]); } + + // The stack trace returned by this function is interned in gStackTraceTable, + // and so is immortal and unmovable. + static const StackTrace* Get(Thread* aT); + + // Hash policy. + + typedef StackTrace* Lookup; + + static mozilla::HashNumber hash(const StackTrace* const& aSt) { + return mozilla::HashBytes(aSt->mPcs, aSt->Size()); + } + + static bool match(const StackTrace* const& aA, const StackTrace* const& aB) { + return aA->mLength == aB->mLength && + memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0; + } + + private: + static void StackWalkCallback(uint32_t aFrameNumber, void* aPc, void* aSp, + void* aClosure) { + StackTrace* st = (StackTrace*)aClosure; + MOZ_ASSERT(st->mLength < MaxFrames); + st->mPcs[st->mLength] = aPc; + st->mLength++; + MOZ_ASSERT(st->mLength == aFrameNumber); + } +}; + +typedef mozilla::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy> + StackTraceTable; +static StackTraceTable* gStackTraceTable = nullptr; + +typedef mozilla::HashSet<const StackTrace*, + mozilla::DefaultHasher<const StackTrace*>, + InfallibleAllocPolicy> + StackTraceSet; + +typedef mozilla::HashSet<const void*, mozilla::DefaultHasher<const void*>, + InfallibleAllocPolicy> + PointerSet; +typedef mozilla::HashMap<const void*, uint32_t, + mozilla::DefaultHasher<const void*>, + InfallibleAllocPolicy> + PointerIdMap; + +// We won't GC the stack trace table until it this many elements. +static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024; + +/* static */ const StackTrace* StackTrace::Get(Thread* aT) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(aT->InterceptsAreBlocked()); + + // On Windows, MozStackWalk can acquire a lock from the shared library + // loader. Another thread might call malloc while holding that lock (when + // loading a shared library). So we can't be in gStateLock during the call + // to MozStackWalk. For details, see + // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8 + // On Linux, something similar can happen; see bug 824340. + // So let's just release it on all platforms. + StackTrace tmp; + { + AutoUnlockState unlock; + // In each of the following cases, skipFrames is chosen so that the + // first frame in each stack trace is a replace_* function (or as close as + // possible, given the vagaries of inlining on different platforms). +#if defined(XP_WIN) && defined(_M_IX86) + // This avoids MozStackWalk(), which causes unusably slow startup on Win32 + // when it is called during static initialization (see bug 1241684). + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Win32: Registers::SyncPopulate() for the + // frame pointer, and GetStackTop() for the stack end. + CONTEXT context; + RtlCaptureContext(&context); + void** fp = reinterpret_cast<void**>(context.Ebp); + + PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb()); + void* stackEnd = static_cast<void*>(pTib->StackBase); + FramePointerStackWalk(StackWalkCallback, /* skipFrames = */ 0, MaxFrames, + &tmp, fp, stackEnd); +#elif defined(XP_MACOSX) + // This avoids MozStackWalk(), which has become unusably slow on Mac due to + // changes in libunwind. + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Mac: Registers::SyncPopulate() for the frame + // pointer, and GetStackTop() for the stack end. + void** fp; +# if defined(__x86_64__) + asm( + // Dereference %rbp to get previous %rbp + "movq (%%rbp), %0\n\t" + : "=r"(fp)); +# else + asm("ldr %0, [x29]\n\t" : "=r"(fp)); +# endif + void* stackEnd = pthread_get_stackaddr_np(pthread_self()); + FramePointerStackWalk(StackWalkCallback, /* skipFrames = */ 0, MaxFrames, + &tmp, fp, stackEnd); +#else +# if defined(XP_WIN) && defined(_M_X64) + int skipFrames = 1; +# else + int skipFrames = 2; +# endif + MozStackWalk(StackWalkCallback, skipFrames, MaxFrames, &tmp); +#endif + } + + StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp); + if (!p) { + StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp); + MOZ_ALWAYS_TRUE(gStackTraceTable->add(p, stnew)); + } + return *p; +} + +//--------------------------------------------------------------------------- +// Heap blocks +//--------------------------------------------------------------------------- + +// This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit +// is zero) with a 1-bit tag. +// +// |T| is the pointer type, e.g. |int*|, not the pointed-to type. This makes +// is easier to have const pointers, e.g. |TaggedPtr<const int*>|. +template <typename T> +class TaggedPtr { + union { + T mPtr; + uintptr_t mUint; + }; + + static const uintptr_t kTagMask = uintptr_t(0x1); + static const uintptr_t kPtrMask = ~kTagMask; + + static bool IsTwoByteAligned(T aPtr) { + return (uintptr_t(aPtr) & kTagMask) == 0; + } + + public: + TaggedPtr() : mPtr(nullptr) {} + + TaggedPtr(T aPtr, bool aBool) : mPtr(aPtr) { + MOZ_ASSERT(IsTwoByteAligned(aPtr)); + uintptr_t tag = uintptr_t(aBool); + MOZ_ASSERT(tag <= kTagMask); + mUint |= (tag & kTagMask); + } + + void Set(T aPtr, bool aBool) { + MOZ_ASSERT(IsTwoByteAligned(aPtr)); + mPtr = aPtr; + uintptr_t tag = uintptr_t(aBool); + MOZ_ASSERT(tag <= kTagMask); + mUint |= (tag & kTagMask); + } + + T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); } + + bool Tag() const { return bool(mUint & kTagMask); } +}; + +// A live heap block. Stores both basic data and data about reports, if we're +// in DarkMatter mode. +class LiveBlock { + const void* mPtr; + const size_t mReqSize; // size requested + + // The stack trace where this block was allocated, or nullptr if we didn't + // record one. + const StackTrace* const mAllocStackTrace; + + // This array has two elements because we record at most two reports of a + // block. + // - Ptr: |mReportStackTrace| - stack trace where this block was reported. + // nullptr if not reported. + // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on + // allocation? If so, DMD must not clear the report at the end of + // Analyze(). Only relevant if |mReportStackTrace| is non-nullptr. + // + // |mPtr| is used as the key in LiveBlockTable, so it's ok for this member + // to be |mutable|. + // + // Only used in DarkMatter mode. + mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2]; + + public: + LiveBlock(const void* aPtr, size_t aReqSize, + const StackTrace* aAllocStackTrace) + : mPtr(aPtr), + mReqSize(aReqSize), + mAllocStackTrace(aAllocStackTrace), + mReportStackTrace_mReportedOnAlloc() // all fields get zeroed + {} + + const void* Address() const { return mPtr; } + + size_t ReqSize() const { return mReqSize; } + + size_t SlopSize() const { return MallocSizeOf(mPtr) - mReqSize; } + + const StackTrace* AllocStackTrace() const { return mAllocStackTrace; } + + const StackTrace* ReportStackTrace1() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[0].Ptr(); + } + + const StackTrace* ReportStackTrace2() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[1].Ptr(); + } + + bool ReportedOnAlloc1() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[0].Tag(); + } + + bool ReportedOnAlloc2() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[1].Tag(); + } + + void AddStackTracesToTable(StackTraceSet& aStackTraces) const { + if (AllocStackTrace()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(AllocStackTrace())); + } + if (gOptions->IsDarkMatterMode()) { + if (ReportStackTrace1()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(ReportStackTrace1())); + } + if (ReportStackTrace2()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(ReportStackTrace2())); + } + } + } + + uint32_t NumReports() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + if (ReportStackTrace2()) { + MOZ_ASSERT(ReportStackTrace1()); + return 2; + } + if (ReportStackTrace1()) { + return 1; + } + return 0; + } + + // This is |const| thanks to the |mutable| fields above. + void Report(Thread* aT, bool aReportedOnAlloc) const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + // We don't bother recording reports after the 2nd one. + uint32_t numReports = NumReports(); + if (numReports < 2) { + mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT), + aReportedOnAlloc); + } + } + + void UnreportIfNotReportedOnAlloc() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) { + mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0); + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + + } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) { + // Shift the 2nd report down to the 1st one. + mReportStackTrace_mReportedOnAlloc[0] = + mReportStackTrace_mReportedOnAlloc[1]; + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + + } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) { + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + } + } + + // Hash policy. + + typedef const void* Lookup; + + static mozilla::HashNumber hash(const void* const& aPtr) { + return mozilla::HashGeneric(aPtr); + } + + static bool match(const LiveBlock& aB, const void* const& aPtr) { + return aB.mPtr == aPtr; + } +}; + +// A table of live blocks where the lookup key is the block address. +typedef mozilla::HashSet<LiveBlock, LiveBlock, InfallibleAllocPolicy> + LiveBlockTable; +static LiveBlockTable* gLiveBlockTable = nullptr; + +class AggregatedLiveBlockHashPolicy { + public: + typedef const LiveBlock* const Lookup; + + static mozilla::HashNumber hash(const LiveBlock* const& aB) { + return gOptions->IsDarkMatterMode() + ? mozilla::HashGeneric( + aB->ReqSize(), aB->SlopSize(), aB->AllocStackTrace(), + aB->ReportedOnAlloc1(), aB->ReportedOnAlloc2()) + : mozilla::HashGeneric(aB->ReqSize(), aB->SlopSize(), + aB->AllocStackTrace()); + } + + static bool match(const LiveBlock* const& aA, const LiveBlock* const& aB) { + return gOptions->IsDarkMatterMode() + ? aA->ReqSize() == aB->ReqSize() && + aA->SlopSize() == aB->SlopSize() && + aA->AllocStackTrace() == aB->AllocStackTrace() && + aA->ReportStackTrace1() == aB->ReportStackTrace1() && + aA->ReportStackTrace2() == aB->ReportStackTrace2() + : aA->ReqSize() == aB->ReqSize() && + aA->SlopSize() == aB->SlopSize() && + aA->AllocStackTrace() == aB->AllocStackTrace(); + } +}; + +// A table of live blocks where the lookup key is everything but the block +// address. For aggregating similar live blocks at output time. +typedef mozilla::HashMap<const LiveBlock*, size_t, + AggregatedLiveBlockHashPolicy, InfallibleAllocPolicy> + AggregatedLiveBlockTable; + +// A freed heap block. +class DeadBlock { + const size_t mReqSize; // size requested + const size_t mSlopSize; // slop above size requested + + // The stack trace where this block was allocated. + const StackTrace* const mAllocStackTrace; + + public: + DeadBlock() : mReqSize(0), mSlopSize(0), mAllocStackTrace(nullptr) {} + + explicit DeadBlock(const LiveBlock& aLb) + : mReqSize(aLb.ReqSize()), + mSlopSize(aLb.SlopSize()), + mAllocStackTrace(aLb.AllocStackTrace()) {} + + ~DeadBlock() {} + + size_t ReqSize() const { return mReqSize; } + size_t SlopSize() const { return mSlopSize; } + + const StackTrace* AllocStackTrace() const { return mAllocStackTrace; } + + void AddStackTracesToTable(StackTraceSet& aStackTraces) const { + if (AllocStackTrace()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(AllocStackTrace())); + } + } + + // Hash policy. + + typedef DeadBlock Lookup; + + static mozilla::HashNumber hash(const DeadBlock& aB) { + return mozilla::HashGeneric(aB.ReqSize(), aB.SlopSize(), + aB.AllocStackTrace()); + } + + static bool match(const DeadBlock& aA, const DeadBlock& aB) { + return aA.ReqSize() == aB.ReqSize() && aA.SlopSize() == aB.SlopSize() && + aA.AllocStackTrace() == aB.AllocStackTrace(); + } +}; + +// For each unique DeadBlock value we store a count of how many actual dead +// blocks have that value. +typedef mozilla::HashMap<DeadBlock, size_t, DeadBlock, InfallibleAllocPolicy> + DeadBlockTable; +static DeadBlockTable* gDeadBlockTable = nullptr; + +// Add the dead block to the dead block table, if that's appropriate. +void MaybeAddToDeadBlockTable(const DeadBlock& aDb) { + if (gOptions->IsCumulativeMode() && aDb.AllocStackTrace()) { + AutoLockState lock; + if (DeadBlockTable::AddPtr p = gDeadBlockTable->lookupForAdd(aDb)) { + p->value() += 1; + } else { + MOZ_ALWAYS_TRUE(gDeadBlockTable->add(p, aDb, 1)); + } + } +} + +// Add a pointer to each live stack trace into the given StackTraceSet. (A +// stack trace is live if it's used by one of the live blocks.) +static void GatherUsedStackTraces(StackTraceSet& aStackTraces) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + aStackTraces.clear(); + MOZ_ALWAYS_TRUE(aStackTraces.reserve(512)); + + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().AddStackTracesToTable(aStackTraces); + } + + for (auto iter = gDeadBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().key().AddStackTracesToTable(aStackTraces); + } +} + +// Delete stack traces that we aren't using, and compact our hashtable. +static void GCStackTraces() { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + StackTraceSet usedStackTraces; + GatherUsedStackTraces(usedStackTraces); + + // Delete all unused stack traces from gStackTraceTable. The ModIterator + // destructor will automatically rehash and compact the table. + for (auto iter = gStackTraceTable->modIter(); !iter.done(); iter.next()) { + StackTrace* const& st = iter.get(); + if (!usedStackTraces.has(st)) { + iter.remove(); + InfallibleAllocPolicy::delete_(st); + } + } + + // Schedule a GC when we have twice as many stack traces as we had right after + // this GC finished. + gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count(); +} + +//--------------------------------------------------------------------------- +// malloc/free callbacks +//--------------------------------------------------------------------------- + +static FastBernoulliTrial* gBernoulli; + +// In testing, a probability of 0.003 resulted in ~25% of heap blocks getting +// a stack trace and ~80% of heap bytes getting a stack trace. (This is +// possible because big heap blocks are more likely to get a stack trace.) +// +// We deliberately choose not to give the user control over this probability +// (other than effectively setting it to 1 via --stacks=full) because it's +// quite inscrutable and generally the user just wants "faster and imprecise" +// or "slower and precise". +// +// The random number seeds are arbitrary and were obtained from random.org. If +// you change them you'll need to change the tests as well, because their +// expected output is based on the particular sequence of trial results that we +// get with these seeds. +static void ResetBernoulli() { + new (gBernoulli) + FastBernoulliTrial(0.003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0); +} + +static void AllocCallback(void* aPtr, size_t aReqSize, Thread* aT) { + if (!aPtr) { + return; + } + + AutoLockState lock; + AutoBlockIntercepts block(aT); + + size_t actualSize = gMallocTable.malloc_usable_size(aPtr); + + // We may or may not record the allocation stack trace, depending on the + // options and the outcome of a Bernoulli trial. + bool getTrace = gOptions->DoFullStacks() || gBernoulli->trial(actualSize); + LiveBlock b(aPtr, aReqSize, getTrace ? StackTrace::Get(aT) : nullptr); + MOZ_ALWAYS_TRUE(gLiveBlockTable->putNew(aPtr, b)); +} + +static void FreeCallback(void* aPtr, Thread* aT, DeadBlock* aDeadBlock) { + if (!aPtr) { + return; + } + + AutoLockState lock; + AutoBlockIntercepts block(aT); + + if (LiveBlockTable::Ptr lb = gLiveBlockTable->lookup(aPtr)) { + if (gOptions->IsCumulativeMode()) { + // Copy it out so it can be added to the dead block list later. + new (aDeadBlock) DeadBlock(*lb); + } + gLiveBlockTable->remove(lb); + } else { + // We have no record of the block. It must be a bogus pointer, or one that + // DMD wasn't able to see allocated. This should be extremely rare. + } + + if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) { + GCStackTraces(); + } +} + +//--------------------------------------------------------------------------- +// malloc/free interception +//--------------------------------------------------------------------------- + +static bool Init(malloc_table_t* aMallocTable); + +} // namespace dmd +} // namespace mozilla + +static void* replace_malloc(size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + // Intercepts are blocked, which means this must be a call to malloc + // triggered indirectly by DMD (e.g. via MozStackWalk). Be infallible. + return InfallibleAllocPolicy::malloc_(aSize); + } + + // This must be a call to malloc from outside DMD. Intercept it. + void* ptr = gMallocTable.malloc(aSize); + AllocCallback(ptr, aSize, t); + return ptr; +} + +static void* replace_calloc(size_t aCount, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::calloc_(aCount, aSize); + } + + // |aCount * aSize| could overflow, but if that happens then + // |gMallocTable.calloc()| will return nullptr and |AllocCallback()| will + // return immediately without using the overflowed value. + void* ptr = gMallocTable.calloc(aCount, aSize); + AllocCallback(ptr, aCount * aSize, t); + return ptr; +} + +static void* replace_realloc(void* aOldPtr, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::realloc_(aOldPtr, aSize); + } + + // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. + if (!aOldPtr) { + return replace_malloc(aSize); + } + + // Be very careful here! Must remove the block from the table before doing + // the realloc to avoid races, just like in replace_free(). + // Nb: This does an unnecessary hashtable remove+add if the block doesn't + // move, but doing better isn't worth the effort. + DeadBlock db; + FreeCallback(aOldPtr, t, &db); + void* ptr = gMallocTable.realloc(aOldPtr, aSize); + if (ptr) { + AllocCallback(ptr, aSize, t); + MaybeAddToDeadBlockTable(db); + } else { + // If realloc fails, we undo the prior operations by re-inserting the old + // pointer into the live block table. We don't have to do anything with the + // dead block list because the dead block hasn't yet been inserted. The + // block will end up looking like it was allocated for the first time here, + // which is untrue, and the slop bytes will be zero, which may be untrue. + // But this case is rare and doing better isn't worth the effort. + AllocCallback(aOldPtr, gMallocTable.malloc_usable_size(aOldPtr), t); + } + return ptr; +} + +static void* replace_memalign(size_t aAlignment, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::memalign_(aAlignment, aSize); + } + + void* ptr = gMallocTable.memalign(aAlignment, aSize); + AllocCallback(ptr, aSize, t); + return ptr; +} + +static void replace_free(void* aPtr) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::free_(aPtr); + } + + // Do the actual free after updating the table. Otherwise, another thread + // could call malloc and get the freed block and update the table, and then + // our update here would remove the newly-malloc'd block. + DeadBlock db; + FreeCallback(aPtr, t, &db); + MaybeAddToDeadBlockTable(db); + gMallocTable.free(aPtr); +} + +void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) { + if (mozilla::dmd::Init(aMallocTable)) { +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#define MALLOC_DECL(name, ...) aMallocTable->name = replace_##name; +#include "malloc_decls.h" + *aBridge = mozilla::dmd::gDMDBridge; + } +} + +namespace mozilla { +namespace dmd { + +//--------------------------------------------------------------------------- +// Options (Part 2) +//--------------------------------------------------------------------------- + +// Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah" +// (where "blah" is non-empty) and return the pointer to "blah". |aArg| can +// have leading space chars (but not other whitespace). +const char* Options::ValueIfMatch(const char* aArg, const char* aOptionName) { + MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain + size_t optionLen = strlen(aOptionName); + if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' && + aArg[optionLen + 1]) { + return aArg + optionLen + 1; + } + return nullptr; +} + +// Extracts a |long| value for an option from an argument. It must be within +// the range |aMin..aMax| (inclusive). +bool Options::GetLong(const char* aArg, const char* aOptionName, long aMin, + long aMax, long* aValue) { + if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { + char* endPtr; + *aValue = strtol(optionValue, &endPtr, /* base */ 10); + if (!*endPtr && aMin <= *aValue && *aValue <= aMax && *aValue != LONG_MIN && + *aValue != LONG_MAX) { + return true; + } + } + return false; +} + +// Extracts a |bool| value for an option -- encoded as "yes" or "no" -- from an +// argument. +bool Options::GetBool(const char* aArg, const char* aOptionName, bool* aValue) { + if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { + if (strcmp(optionValue, "yes") == 0) { + *aValue = true; + return true; + } + if (strcmp(optionValue, "no") == 0) { + *aValue = false; + return true; + } + } + return false; +} + +Options::Options(const char* aDMDEnvVar) + : mDMDEnvVar(aDMDEnvVar ? InfallibleAllocPolicy::strdup_(aDMDEnvVar) + : nullptr), + mMode(Mode::DarkMatter), + mStacks(Stacks::Partial), + mShowDumpStats(false) { + char* e = mDMDEnvVar; + if (e && strcmp(e, "1") != 0) { + bool isEnd = false; + while (!isEnd) { + // Consume leading whitespace. + while (isspace(*e)) { + e++; + } + + // Save the start of the arg. + const char* arg = e; + + // Find the first char after the arg, and temporarily change it to '\0' + // to isolate the arg. + while (!isspace(*e) && *e != '\0') { + e++; + } + char replacedChar = *e; + isEnd = replacedChar == '\0'; + *e = '\0'; + + // Handle arg + bool myBool; + if (strcmp(arg, "--mode=live") == 0) { + mMode = Mode::Live; + } else if (strcmp(arg, "--mode=dark-matter") == 0) { + mMode = Mode::DarkMatter; + } else if (strcmp(arg, "--mode=cumulative") == 0) { + mMode = Mode::Cumulative; + } else if (strcmp(arg, "--mode=scan") == 0) { + mMode = Mode::Scan; + + } else if (strcmp(arg, "--stacks=full") == 0) { + mStacks = Stacks::Full; + } else if (strcmp(arg, "--stacks=partial") == 0) { + mStacks = Stacks::Partial; + + } else if (GetBool(arg, "--show-dump-stats", &myBool)) { + mShowDumpStats = myBool; + + } else if (strcmp(arg, "") == 0) { + // This can only happen if there is trailing whitespace. Ignore. + MOZ_ASSERT(isEnd); + + } else { + BadArg(arg); + } + + // Undo the temporary isolation. + *e = replacedChar; + } + } + + if (mMode == Mode::Scan) { + mStacks = Stacks::Full; + } +} + +void Options::BadArg(const char* aArg) { + StatusMsg("\n"); + StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg); + StatusMsg("See the output of |mach help run| for the allowed options.\n"); + exit(1); +} + +const char* Options::ModeString() const { + switch (mMode) { + case Mode::Live: + return "live"; + case Mode::DarkMatter: + return "dark-matter"; + case Mode::Cumulative: + return "cumulative"; + case Mode::Scan: + return "scan"; + default: + MOZ_ASSERT(false); + return "(unknown DMD mode)"; + } +} + +//--------------------------------------------------------------------------- +// DMD start-up +//--------------------------------------------------------------------------- + +#ifndef XP_WIN +static void prefork() { + if (gStateLock) { + gStateLock->Lock(); + } +} + +static void postfork() { + if (gStateLock) { + gStateLock->Unlock(); + } +} +#endif + +// WARNING: this function runs *very* early -- before all static initializers +// have run. For this reason, non-scalar globals such as gStateLock and +// gStackTraceTable are allocated dynamically (so we can guarantee their +// construction in this function) rather than statically. +static bool Init(malloc_table_t* aMallocTable) { + // DMD is controlled by the |DMD| environment variable. + const char* e = getenv("DMD"); + + if (!e) { + return false; + } + // Initialize the function table first, because StatusMsg uses + // InfallibleAllocPolicy::malloc_, which uses it. + gMallocTable = *aMallocTable; + + StatusMsg("$DMD = '%s'\n", e); + + gDMDBridge = InfallibleAllocPolicy::new_<DMDBridge>(); + +#ifndef XP_WIN + // Avoid deadlocks when forking by acquiring our state lock prior to forking + // and releasing it after forking. See |LogAlloc|'s |replace_init| for + // in-depth details. + // + // Note: This must run after attempting an allocation so as to give the + // system malloc a chance to insert its own atfork handler. + pthread_atfork(prefork, postfork, postfork); +#endif + // Parse $DMD env var. + gOptions = InfallibleAllocPolicy::new_<Options>(e); + + gStateLock = InfallibleAllocPolicy::new_<Mutex>(); + + gBernoulli = (FastBernoulliTrial*)InfallibleAllocPolicy::malloc_( + sizeof(FastBernoulliTrial)); + ResetBernoulli(); + + Thread::Init(); + + { + AutoLockState lock; + + gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>(8192); + gLiveBlockTable = InfallibleAllocPolicy::new_<LiveBlockTable>(8192); + + // Create this even if the mode isn't Cumulative (albeit with a small + // size), in case the mode is changed later on (as is done by SmokeDMD.cpp, + // for example). + size_t tableSize = gOptions->IsCumulativeMode() ? 8192 : 4; + gDeadBlockTable = InfallibleAllocPolicy::new_<DeadBlockTable>(tableSize); + } + + return true; +} + +//--------------------------------------------------------------------------- +// Block reporting and unreporting +//--------------------------------------------------------------------------- + +static void ReportHelper(const void* aPtr, bool aReportedOnAlloc) { + if (!gOptions->IsDarkMatterMode() || !aPtr) { + return; + } + + Thread* t = Thread::Fetch(); + + AutoBlockIntercepts block(t); + AutoLockState lock; + + if (LiveBlockTable::Ptr p = gLiveBlockTable->lookup(aPtr)) { + p->Report(t, aReportedOnAlloc); + } else { + // We have no record of the block. It must be a bogus pointer. This should + // be extremely rare because Report() is almost always called in + // conjunction with a malloc_size_of-style function. Print a message so + // that we get some feedback. + StatusMsg("Unknown pointer %p\n", aPtr); + } +} + +void DMDFuncs::Report(const void* aPtr) { + ReportHelper(aPtr, /* onAlloc */ false); +} + +void DMDFuncs::ReportOnAlloc(const void* aPtr) { + ReportHelper(aPtr, /* onAlloc */ true); +} + +//--------------------------------------------------------------------------- +// DMD output +//--------------------------------------------------------------------------- + +// The version number of the output format. Increment this if you make +// backwards-incompatible changes to the format. See DMD.h for the version +// history. +static const int kOutputVersionNumber = 5; + +// Note that, unlike most SizeOf* functions, this function does not take a +// |mozilla::MallocSizeOf| argument. That's because those arguments are +// primarily to aid DMD track heap blocks... but DMD deliberately doesn't track +// heap blocks it allocated for itself! +// +// SizeOfInternal should be called while you're holding the state lock and +// while intercepts are blocked; SizeOf acquires the lock and blocks +// intercepts. + +static void SizeOfInternal(Sizes* aSizes) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + aSizes->Clear(); + + StackTraceSet usedStackTraces; + GatherUsedStackTraces(usedStackTraces); + + for (auto iter = gStackTraceTable->iter(); !iter.done(); iter.next()) { + StackTrace* const& st = iter.get(); + + if (usedStackTraces.has(st)) { + aSizes->mStackTracesUsed += MallocSizeOf(st); + } else { + aSizes->mStackTracesUnused += MallocSizeOf(st); + } + } + + aSizes->mStackTraceTable = + gStackTraceTable->shallowSizeOfIncludingThis(MallocSizeOf); + + aSizes->mLiveBlockTable = + gLiveBlockTable->shallowSizeOfIncludingThis(MallocSizeOf); + + aSizes->mDeadBlockTable = + gDeadBlockTable->shallowSizeOfIncludingThis(MallocSizeOf); +} + +void DMDFuncs::SizeOf(Sizes* aSizes) { + aSizes->Clear(); + + AutoBlockIntercepts block(Thread::Fetch()); + AutoLockState lock; + SizeOfInternal(aSizes); +} + +void DMDFuncs::ClearReports() { + if (!gOptions->IsDarkMatterMode()) { + return; + } + + AutoLockState lock; + + // Unreport all blocks that were marked reported by a memory reporter. This + // excludes those that were reported on allocation, because they need to keep + // their reported marking. + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().UnreportIfNotReportedOnAlloc(); + } +} + +class ToIdStringConverter final { + public: + ToIdStringConverter() : mIdMap(512), mNextId(0) {} + + // Converts a pointer to a unique ID. Reuses the existing ID for the pointer + // if it's been seen before. + const char* ToIdString(const void* aPtr) { + uint32_t id; + PointerIdMap::AddPtr p = mIdMap.lookupForAdd(aPtr); + if (!p) { + id = mNextId++; + MOZ_ALWAYS_TRUE(mIdMap.add(p, aPtr, id)); + } else { + id = p->value(); + } + return Base32(id); + } + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const { + return mIdMap.shallowSizeOfExcludingThis(aMallocSizeOf); + } + + private: + // This function converts an integer to base-32. We use base-32 values for + // indexing into the traceTable and the frameTable, for the following reasons. + // + // - Base-32 gives more compact indices than base-16. + // + // - 32 is a power-of-two, which makes the necessary div/mod calculations + // fast. + // + // - We can (and do) choose non-numeric digits for base-32. When + // inspecting/debugging the JSON output, non-numeric indices are easier to + // search for than numeric indices. + // + char* Base32(uint32_t aN) { + static const char digits[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"; + + char* b = mIdBuf + kIdBufLen - 1; + *b = '\0'; + do { + b--; + if (b == mIdBuf) { + MOZ_CRASH("Base32 buffer too small"); + } + *b = digits[aN % 32]; + aN /= 32; + } while (aN); + + return b; + } + + PointerIdMap mIdMap; + uint32_t mNextId; + + // |mIdBuf| must have space for at least eight chars, which is the space + // needed to hold 'Dffffff' (including the terminating null char), which is + // the base-32 representation of 0xffffffff. + static const size_t kIdBufLen = 16; + char mIdBuf[kIdBufLen]; +}; + +// Helper class for converting a pointer value to a string. +class ToStringConverter { + public: + const char* ToPtrString(const void* aPtr) { + snprintf(kPtrBuf, sizeof(kPtrBuf) - 1, "%" PRIxPTR, (uintptr_t)aPtr); + return kPtrBuf; + } + + private: + char kPtrBuf[32]; +}; + +static void WriteBlockContents(JSONWriter& aWriter, const LiveBlock& aBlock) { + size_t numWords = aBlock.ReqSize() / sizeof(uintptr_t*); + if (numWords == 0) { + return; + } + + aWriter.StartArrayProperty("contents", aWriter.SingleLineStyle); + { + const uintptr_t** block = (const uintptr_t**)aBlock.Address(); + ToStringConverter sc; + for (size_t i = 0; i < numWords; ++i) { + aWriter.StringElement(MakeStringSpan(sc.ToPtrString(block[i]))); + } + } + aWriter.EndArray(); +} + +static void AnalyzeImpl(UniquePtr<JSONWriteFunc> aWriter) { + // Some blocks may have been allocated while creating |aWriter|. Those blocks + // will be freed at the end of this function when |write| is destroyed. The + // allocations will have occurred while intercepts were not blocked, so the + // frees better be as well, otherwise we'll get assertion failures. + // Therefore, this declaration must precede the AutoBlockIntercepts + // declaration, to ensure that |write| is destroyed *after* intercepts are + // unblocked. + JSONWriter writer(std::move(aWriter)); + + AutoBlockIntercepts block(Thread::Fetch()); + AutoLockState lock; + + // Allocate this on the heap instead of the stack because it's fairly large. + auto locService = InfallibleAllocPolicy::new_<CodeAddressService>(); + + StackTraceSet usedStackTraces(512); + PointerSet usedPcs(512); + + size_t iscSize; + + static int analysisCount = 1; + StatusMsg("Dump %d {\n", analysisCount++); + + writer.Start(); + { + writer.IntProperty("version", kOutputVersionNumber); + + writer.StartObjectProperty("invocation"); + { + const char* var = gOptions->DMDEnvVar(); + if (var) { + writer.StringProperty("dmdEnvVar", MakeStringSpan(var)); + } else { + writer.NullProperty("dmdEnvVar"); + } + + writer.StringProperty("mode", MakeStringSpan(gOptions->ModeString())); + } + writer.EndObject(); + + StatusMsg(" Constructing the heap block list...\n"); + + ToIdStringConverter isc; + ToStringConverter sc; + + writer.StartArrayProperty("blockList"); + { + // Lambda that writes out a live block. + auto writeLiveBlock = [&](const LiveBlock& aB, size_t aNum) { + aB.AddStackTracesToTable(usedStackTraces); + + MOZ_ASSERT_IF(gOptions->IsScanMode(), aNum == 1); + + writer.StartObjectElement(writer.SingleLineStyle); + { + if (gOptions->IsScanMode()) { + writer.StringProperty("addr", + MakeStringSpan(sc.ToPtrString(aB.Address()))); + WriteBlockContents(writer, aB); + } + writer.IntProperty("req", aB.ReqSize()); + if (aB.SlopSize() > 0) { + writer.IntProperty("slop", aB.SlopSize()); + } + + if (aB.AllocStackTrace()) { + writer.StringProperty( + "alloc", MakeStringSpan(isc.ToIdString(aB.AllocStackTrace()))); + } + + if (gOptions->IsDarkMatterMode() && aB.NumReports() > 0) { + writer.StartArrayProperty("reps"); + { + if (aB.ReportStackTrace1()) { + writer.StringElement( + MakeStringSpan(isc.ToIdString(aB.ReportStackTrace1()))); + } + if (aB.ReportStackTrace2()) { + writer.StringElement( + MakeStringSpan(isc.ToIdString(aB.ReportStackTrace2()))); + } + } + writer.EndArray(); + } + + if (aNum > 1) { + writer.IntProperty("num", aNum); + } + } + writer.EndObject(); + }; + + // Live blocks. + if (!gOptions->IsScanMode()) { + // At this point we typically have many LiveBlocks that differ only in + // their address. Aggregate them to reduce the size of the output file. + AggregatedLiveBlockTable agg(8192); + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + const LiveBlock& b = iter.get(); + b.AddStackTracesToTable(usedStackTraces); + + if (AggregatedLiveBlockTable::AddPtr p = agg.lookupForAdd(&b)) { + p->value() += 1; + } else { + MOZ_ALWAYS_TRUE(agg.add(p, &b, 1)); + } + } + + // Now iterate over the aggregated table. + for (auto iter = agg.iter(); !iter.done(); iter.next()) { + const LiveBlock& b = *iter.get().key(); + size_t num = iter.get().value(); + writeLiveBlock(b, num); + } + + } else { + // In scan mode we cannot aggregate because we print each live block's + // address and contents. + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + const LiveBlock& b = iter.get(); + b.AddStackTracesToTable(usedStackTraces); + + writeLiveBlock(b, 1); + } + } + + // Dead blocks. + for (auto iter = gDeadBlockTable->iter(); !iter.done(); iter.next()) { + const DeadBlock& b = iter.get().key(); + b.AddStackTracesToTable(usedStackTraces); + + size_t num = iter.get().value(); + MOZ_ASSERT(num > 0); + + writer.StartObjectElement(writer.SingleLineStyle); + { + writer.IntProperty("req", b.ReqSize()); + if (b.SlopSize() > 0) { + writer.IntProperty("slop", b.SlopSize()); + } + if (b.AllocStackTrace()) { + writer.StringProperty( + "alloc", MakeStringSpan(isc.ToIdString(b.AllocStackTrace()))); + } + + if (num > 1) { + writer.IntProperty("num", num); + } + } + writer.EndObject(); + } + } + writer.EndArray(); + + StatusMsg(" Constructing the stack trace table...\n"); + + writer.StartObjectProperty("traceTable"); + { + for (auto iter = usedStackTraces.iter(); !iter.done(); iter.next()) { + const StackTrace* const st = iter.get(); + writer.StartArrayProperty(MakeStringSpan(isc.ToIdString(st)), + writer.SingleLineStyle); + { + for (uint32_t i = 0; i < st->Length(); i++) { + const void* pc = st->Pc(i); + writer.StringElement(MakeStringSpan(isc.ToIdString(pc))); + MOZ_ALWAYS_TRUE(usedPcs.put(pc)); + } + } + writer.EndArray(); + } + } + writer.EndObject(); + + StatusMsg(" Constructing the stack frame table...\n"); + + writer.StartObjectProperty("frameTable"); + { + static const size_t locBufLen = 1024; + char locBuf[locBufLen]; + + for (auto iter = usedPcs.iter(); !iter.done(); iter.next()) { + const void* const pc = iter.get(); + + // Use 0 for the frame number. See the JSON format description comment + // in DMD.h to understand why. + locService->GetLocation(0, pc, locBuf, locBufLen); + writer.StringProperty(MakeStringSpan(isc.ToIdString(pc)), + MakeStringSpan(locBuf)); + } + } + writer.EndObject(); + + iscSize = isc.sizeOfExcludingThis(MallocSizeOf); + } + writer.End(); + + if (gOptions->ShowDumpStats()) { + Sizes sizes; + SizeOfInternal(&sizes); + + static const size_t kBufLen = 64; + char buf1[kBufLen]; + char buf2[kBufLen]; + char buf3[kBufLen]; + + StatusMsg(" Execution measurements {\n"); + + StatusMsg(" Data structures that persist after Dump() ends {\n"); + + StatusMsg(" Used stack traces: %10s bytes\n", + Show(sizes.mStackTracesUsed, buf1, kBufLen)); + + StatusMsg(" Unused stack traces: %10s bytes\n", + Show(sizes.mStackTracesUnused, buf1, kBufLen)); + + StatusMsg(" Stack trace table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mStackTraceTable, buf1, kBufLen), + Show(gStackTraceTable->capacity(), buf2, kBufLen), + Show(gStackTraceTable->count(), buf3, kBufLen)); + + StatusMsg(" Live block table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mLiveBlockTable, buf1, kBufLen), + Show(gLiveBlockTable->capacity(), buf2, kBufLen), + Show(gLiveBlockTable->count(), buf3, kBufLen)); + + StatusMsg(" Dead block table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mDeadBlockTable, buf1, kBufLen), + Show(gDeadBlockTable->capacity(), buf2, kBufLen), + Show(gDeadBlockTable->count(), buf3, kBufLen)); + + StatusMsg(" }\n"); + StatusMsg(" Data structures that are destroyed after Dump() ends {\n"); + + StatusMsg( + " Location service: %10s bytes\n", + Show(locService->SizeOfIncludingThis(MallocSizeOf), buf1, kBufLen)); + StatusMsg(" Used stack traces set: %10s bytes\n", + Show(usedStackTraces.shallowSizeOfExcludingThis(MallocSizeOf), + buf1, kBufLen)); + StatusMsg( + " Used PCs set: %10s bytes\n", + Show(usedPcs.shallowSizeOfExcludingThis(MallocSizeOf), buf1, kBufLen)); + StatusMsg(" Pointer ID map: %10s bytes\n", + Show(iscSize, buf1, kBufLen)); + + StatusMsg(" }\n"); + StatusMsg(" Counts {\n"); + + size_t hits = locService->NumCacheHits(); + size_t misses = locService->NumCacheMisses(); + size_t requests = hits + misses; + StatusMsg(" Location service: %10s requests\n", + Show(requests, buf1, kBufLen)); + + size_t count = locService->CacheCount(); + size_t capacity = locService->CacheCapacity(); + StatusMsg( + " Location service cache: " + "%4.1f%% hit rate, %.1f%% occupancy at end\n", + Percent(hits, requests), Percent(count, capacity)); + + StatusMsg(" }\n"); + StatusMsg(" }\n"); + } + + InfallibleAllocPolicy::delete_(locService); + + StatusMsg("}\n"); +} + +void DMDFuncs::Analyze(UniquePtr<JSONWriteFunc> aWriter) { + AnalyzeImpl(std::move(aWriter)); + ClearReports(); +} + +//--------------------------------------------------------------------------- +// Testing +//--------------------------------------------------------------------------- + +void DMDFuncs::ResetEverything(const char* aOptions) { + AutoLockState lock; + + // Reset options. + InfallibleAllocPolicy::delete_(gOptions); + gOptions = InfallibleAllocPolicy::new_<Options>(aOptions); + + // Clear all existing blocks. + gLiveBlockTable->clear(); + gDeadBlockTable->clear(); + + // Reset gBernoulli to a deterministic state. (Its current state depends on + // all previous trials.) + ResetBernoulli(); +} + +} // namespace dmd +} // namespace mozilla diff --git a/memory/replace/dmd/DMD.h b/memory/replace/dmd/DMD.h new file mode 100644 index 0000000000..c057047800 --- /dev/null +++ b/memory/replace/dmd/DMD.h @@ -0,0 +1,291 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DMD_h___ +#define DMD_h___ + +#include <stdarg.h> +#include <string.h> + +#include <utility> + +#include "mozilla/DebugOnly.h" +#include "mozilla/Types.h" +#include "mozilla/UniquePtr.h" +#include "replace_malloc_bridge.h" + +namespace mozilla { + +class JSONWriteFunc; + +namespace dmd { + +struct Sizes { + size_t mStackTracesUsed; + size_t mStackTracesUnused; + size_t mStackTraceTable; + size_t mLiveBlockTable; + size_t mDeadBlockTable; + + Sizes() { Clear(); } + void Clear() { memset(this, 0, sizeof(Sizes)); } +}; + +// See further below for a description of each method. The DMDFuncs class +// should contain a virtual method for each of them (except IsRunning, +// which can be inferred from the DMDFuncs singleton existing). +struct DMDFuncs { + virtual void Report(const void*); + + virtual void ReportOnAlloc(const void*); + + virtual void ClearReports(); + + virtual void Analyze(UniquePtr<JSONWriteFunc>); + + virtual void SizeOf(Sizes*); + + virtual void StatusMsg(const char*, va_list) MOZ_FORMAT_PRINTF(2, 0); + + virtual void ResetEverything(const char*); + +#ifndef REPLACE_MALLOC_IMPL + // We deliberately don't use ReplaceMalloc::GetDMDFuncs here, because if we + // did, the following would happen. + // - The code footprint of each call to Get() larger as GetDMDFuncs ends + // up inlined. + // - When no replace-malloc library is loaded, the number of instructions + // executed is equivalent, but don't necessarily fit in the same cache + // line. + // - When a non-DMD replace-malloc library is loaded, the overhead is + // higher because there is first a check for the replace malloc bridge + // and then for the DMDFuncs singleton. + // Initializing the DMDFuncs singleton on the first access makes the + // overhead even worse. Either Get() is inlined and massive, or it isn't + // and a simple value check becomes a function call. + static DMDFuncs* Get() { return sSingleton.Get(); } + + private: + // Wrapper class keeping a pointer to the DMD functions. It is statically + // initialized because it needs to be set early enough. + // Debug builds also check that it's never accessed before the static + // initialization actually occured, which could be the case if some other + // static initializer ended up calling into DMD. + class Singleton { + public: + Singleton() + : mValue(ReplaceMalloc::GetDMDFuncs()) +# ifdef DEBUG + , + mInitialized(true) +# endif + { + } + + DMDFuncs* Get() { + MOZ_ASSERT(mInitialized); + return mValue; + } + + private: + DMDFuncs* mValue; +# ifdef DEBUG + bool mInitialized; +# endif + }; + + // This singleton pointer must be defined on the program side. In Gecko, + // this is done in xpcom/base/nsMemoryInfoDumper.cpp. + static /* DMDFuncs:: */ Singleton sSingleton; +#endif +}; + +#ifndef REPLACE_MALLOC_IMPL +// Mark a heap block as reported by a memory reporter. +inline void Report(const void* aPtr) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->Report(aPtr); + } +} + +// Mark a heap block as reported immediately on allocation. +inline void ReportOnAlloc(const void* aPtr) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ReportOnAlloc(aPtr); + } +} + +// Clears existing reportedness data from any prior runs of the memory +// reporters. The following sequence should be used. +// - ClearReports() +// - run the memory reporters +// - Analyze() +// This sequence avoids spurious twice-reported warnings. +inline void ClearReports() { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ClearReports(); + } +} + +// Determines which heap blocks have been reported, and dumps JSON output +// (via |aWriter|) describing the heap. +// +// The following sample output contains comments that explain the format and +// design choices. The output files can be quite large, so a number of +// decisions were made to minimize size, such as using short property names and +// omitting properties whenever possible. +// +// { +// // The version number of the format, which will be incremented each time +// // backwards-incompatible changes are made. A mandatory integer. +// // +// // Version history: +// // - 1: Bug 1044709 +// // - 2: Bug 1094552 +// // - 3: Bug 1100851 +// // - 4: Bug 1121830 +// // - 5: Bug 1253512 +// "version": 5, +// +// // Information about how DMD was invoked. A mandatory object. +// "invocation": { +// // The contents of the $DMD environment variable. A string, or |null| if +// // $DMD is undefined. +// "dmdEnvVar": "--mode=dark-matter", +// +// // The profiling mode. A mandatory string taking one of the following +// // values: "live", "dark-matter", "cumulative", "scan". +// "mode": "dark-matter", +// }, +// +// // Details of all analyzed heap blocks. A mandatory array. +// "blockList": [ +// // An example of a heap block. +// { +// // Requested size, in bytes. This is a mandatory integer. +// "req": 3584, +// +// // Requested slop size, in bytes. This is mandatory if it is non-zero, +// // but omitted otherwise. +// "slop": 512, +// +// // The stack trace at which the block was allocated. An optional +// // string that indexes into the "traceTable" object. If omitted, no +// // allocation stack trace was recorded for the block. +// "alloc": "A", +// +// // One or more stack traces at which this heap block was reported by a +// // memory reporter. An optional array that will only be present in +// // "dark-matter" mode. The elements are strings that index into +// // the "traceTable" object. +// "reps": ["B"] +// +// // The number of heap blocks with exactly the above properties. This +// // is mandatory if it is greater than one, but omitted otherwise. +// // (Blocks with identical properties don't have to be aggregated via +// // this property, but it can greatly reduce output file size.) +// "num": 5, +// +// // The address of the block. This is mandatory in "scan" mode, but +// // omitted otherwise. +// "addr": "4e4e4e4e", +// +// // The contents of the block, read one word at a time. This is +// // mandatory in "scan" mode for blocks at least one word long, but +// // omitted otherwise. +// "contents": ["0", "6", "7f7f7f7f", "0"] +// } +// ], +// +// // The stack traces referenced by elements of the "blockList" array. This +// // could be an array, but making it an object makes it easier to see +// // which stacks correspond to which references in the "blockList" array. +// "traceTable": { +// // Each property corresponds to a stack trace mentioned in the "blocks" +// // object. Each element is an index into the "frameTable" object. +// "A": ["D", "E"], +// "B": ["F", "G"] +// }, +// +// // The stack frames referenced by the "traceTable" object. The +// // descriptions can be quite long, so they are stored separately from the +// // "traceTable" object so that each one only has to be written once. +// // This could also be an array, but again, making it an object makes it +// // easier to see which frames correspond to which references in the +// // "traceTable" object. +// "frameTable": { +// // Each property key is a frame key mentioned in the "traceTable" object. +// // Each property value is a string containing a frame description. Each +// // frame description must be in a format recognized by `fix_stacks.py`, +// // which requires a frame number at the start. Because each stack frame +// // description in this table can be shared between multiple stack +// // traces, we use a dummy value of #00. The proper frame number can be +// // reconstructed later by scripts that output stack traces in a +// // conventional non-shared format. +// "D": "#00: foo (Foo.cpp:123)", +// "E": "#00: bar (Bar.cpp:234)", +// "F": "#00: baz (Baz.cpp:345)", +// "G": "#00: quux (Quux.cpp:456)" +// } +// } +// +// Implementation note: normally, this function wouldn't be templated, but in +// that case, the function is compiled, which makes the destructor for the +// UniquePtr fire up, and that needs JSONWriteFunc to be fully defined. That, +// in turn, requires to include JSONWriter.h, which includes +// double-conversion.h, which ends up breaking various things built with +// -Werror for various reasons. +// +template <typename JSONWriteFunc> +inline void Analyze(UniquePtr<JSONWriteFunc> aWriteFunc) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->Analyze(std::move(aWriteFunc)); + } +} + +// Gets the size of various data structures. Used to implement a memory +// reporter for DMD. +inline void SizeOf(Sizes* aSizes) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->SizeOf(aSizes); + } +} + +// Prints a status message prefixed with "DMD[<pid>]". Use sparingly. +MOZ_FORMAT_PRINTF(1, 2) +inline void StatusMsg(const char* aFmt, ...) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + va_list ap; + va_start(ap, aFmt); + funcs->StatusMsg(aFmt, ap); + va_end(ap); + } +} + +// Indicates whether or not DMD is running. +inline bool IsRunning() { return !!DMDFuncs::Get(); } + +// Resets all DMD options and then sets new ones according to those specified +// in |aOptions|. Also clears all recorded data about allocations. Only used +// for testing purposes. +inline void ResetEverything(const char* aOptions) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ResetEverything(aOptions); + } +} +#endif + +} // namespace dmd +} // namespace mozilla + +#endif /* DMD_h___ */ diff --git a/memory/replace/dmd/README b/memory/replace/dmd/README new file mode 100644 index 0000000000..0b2b9cc1e0 --- /dev/null +++ b/memory/replace/dmd/README @@ -0,0 +1,3 @@ +This is DMD. See +https://developer.mozilla.org/en-US/docs/Mozilla/Performance/DMD for +details on how to use it. diff --git a/memory/replace/dmd/block_analyzer.py b/memory/replace/dmd/block_analyzer.py new file mode 100644 index 0000000000..dcdfeff7ff --- /dev/null +++ b/memory/replace/dmd/block_analyzer.py @@ -0,0 +1,293 @@ +#!/usr/bin/python + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# From a scan mode DMD log, extract some information about a +# particular block, such as its allocation stack or which other blocks +# contain pointers to it. This can be useful when investigating leaks +# caused by unknown references to refcounted objects. + +from __future__ import absolute_import, print_function + +import json +import gzip +import sys +import argparse +import re + + +# The DMD output version this script handles. +outputVersion = 5 + +# If --ignore-alloc-fns is specified, stack frames containing functions that +# match these strings will be removed from the *start* of stack traces. (Once +# we hit a non-matching frame, any subsequent frames won't be removed even if +# they do match.) +allocatorFns = [ + "malloc (", + "replace_malloc", + "replace_calloc", + "replace_realloc", + "replace_memalign", + "replace_posix_memalign", + "malloc_zone_malloc", + "moz_xmalloc", + "moz_xcalloc", + "moz_xrealloc", + "operator new(", + "operator new[](", + "g_malloc", + "g_slice_alloc", + "callocCanGC", + "reallocCanGC", + "vpx_malloc", + "vpx_calloc", + "vpx_realloc", + "vpx_memalign", + "js_malloc", + "js_calloc", + "js_realloc", + "pod_malloc", + "pod_calloc", + "pod_realloc", + "nsTArrayInfallibleAllocator::Malloc", + "Allocator<ReplaceMallocBase>::malloc(", + # This one necessary to fully filter some sequences of allocation functions + # that happen in practice. Note that ??? entries that follow non-allocation + # functions won't be stripped, as explained above. + "???", +] + +#### + +# Command line arguments + + +def range_1_24(string): + value = int(string) + if value < 1 or value > 24: + msg = "{:s} is not in the range 1..24".format(string) + raise argparse.ArgumentTypeError(msg) + return value + + +parser = argparse.ArgumentParser( + description="Analyze the heap graph to find out things about an object. \ +By default this prints out information about blocks that point to the given block." +) + +parser.add_argument("dmd_log_file_name", help="clamped DMD log file name") + +parser.add_argument("block", help="address of the block of interest") + +parser.add_argument( + "--info", + dest="info", + action="store_true", + default=False, + help="Print out information about the block.", +) + +parser.add_argument( + "-sfl", + "--max-stack-frame-length", + type=int, + default=150, + help="Maximum number of characters to print from each stack frame", +) + +parser.add_argument( + "-a", + "--ignore-alloc-fns", + action="store_true", + help="ignore allocation functions at the start of traces", +) + +parser.add_argument( + "-f", + "--max-frames", + type=range_1_24, + default=8, + help="maximum number of frames to consider in each trace", +) + +parser.add_argument( + "-c", + "--chain-reports", + action="store_true", + help="if only one block is found to hold onto the object, report " + "the next one, too", +) + + +#### + + +class BlockData: + def __init__(self, json_block): + self.addr = json_block["addr"] + + if "contents" in json_block: + contents = json_block["contents"] + else: + contents = [] + self.contents = [] + for c in contents: + self.contents.append(int(c, 16)) + + self.req_size = json_block["req"] + + self.alloc_stack = json_block["alloc"] + + +def print_trace_segment(args, stacks, block): + (traceTable, frameTable) = stacks + + for l in traceTable[block.alloc_stack]: + # The 5: is to remove the bogus leading "#00: " from the stack frame. + print(" " + frameTable[l][5 : args.max_stack_frame_length]) + + +def show_referrers(args, blocks, stacks, block): + visited = set([]) + + anyFound = False + + while True: + referrers = {} + + for b, data in blocks.iteritems(): + which_edge = 0 + for e in data.contents: + if e == block: + # 8 is the number of bytes per word on a 64-bit system. + # XXX This means that this output will be wrong for logs from 32-bit systems! + referrers.setdefault(b, []).append(8 * which_edge) + anyFound = True + which_edge += 1 + + for r in referrers: + sys.stdout.write( + "0x{} size = {} bytes".format(blocks[r].addr, blocks[r].req_size) + ) + plural = "s" if len(referrers[r]) > 1 else "" + sys.stdout.write( + " at byte offset" + + plural + + " " + + (", ".join(str(x) for x in referrers[r])) + ) + print + print_trace_segment(args, stacks, blocks[r]) + print + + if args.chain_reports: + if len(referrers) == 0: + sys.stdout.write("Found no more referrers.\n") + break + if len(referrers) > 1: + sys.stdout.write("Found too many referrers.\n") + break + + sys.stdout.write("Chaining to next referrer.\n\n") + for r in referrers: + block = r + if block in visited: + sys.stdout.write("Found a loop.\n") + break + visited.add(block) + else: + break + + if not anyFound: + print("No referrers found.") + + +def show_block_info(args, blocks, stacks, block): + b = blocks[block] + sys.stdout.write("block: 0x{}\n".format(b.addr)) + sys.stdout.write("requested size: {} bytes\n".format(b.req_size)) + sys.stdout.write("\n") + sys.stdout.write("block contents: ") + for c in b.contents: + v = "0" if c == 0 else blocks[c].addr + sys.stdout.write("0x{} ".format(v)) + sys.stdout.write("\n\n") + sys.stdout.write("allocation stack:\n") + print_trace_segment(args, stacks, b) + return + + +def cleanupTraceTable(args, frameTable, traceTable): + # Remove allocation functions at the start of traces. + if args.ignore_alloc_fns: + # Build a regexp that matches every function in allocatorFns. + escapedAllocatorFns = map(re.escape, allocatorFns) + fn_re = re.compile("|".join(escapedAllocatorFns)) + + # Remove allocator fns from each stack trace. + for traceKey, frameKeys in traceTable.items(): + numSkippedFrames = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if re.search(fn_re, frameDesc): + numSkippedFrames += 1 + else: + break + if numSkippedFrames > 0: + traceTable[traceKey] = frameKeys[numSkippedFrames:] + + # Trim the number of frames. + for traceKey, frameKeys in traceTable.items(): + if len(frameKeys) > args.max_frames: + traceTable[traceKey] = frameKeys[: args.max_frames] + + +def loadGraph(options): + # Handle gzipped input if necessary. + isZipped = options.dmd_log_file_name.endswith(".gz") + opener = gzip.open if isZipped else open + + with opener(options.dmd_log_file_name, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + block_list = j["blockList"] + blocks = {} + + for json_block in block_list: + blocks[int(json_block["addr"], 16)] = BlockData(json_block) + + traceTable = j["traceTable"] + frameTable = j["frameTable"] + + cleanupTraceTable(options, frameTable, traceTable) + + return (blocks, (traceTable, frameTable)) + + +def analyzeLogs(): + options = parser.parse_args() + + (blocks, stacks) = loadGraph(options) + + block = int(options.block, 16) + + if block not in blocks: + print("Object " + options.block + " not found in traces.") + print("It could still be the target of some nodes.") + return + + if options.info: + show_block_info(options, blocks, stacks, block) + return + + show_referrers(options, blocks, stacks, block) + + +if __name__ == "__main__": + analyzeLogs() diff --git a/memory/replace/dmd/dmd.py b/memory/replace/dmd/dmd.py new file mode 100755 index 0000000000..46e59eefc1 --- /dev/null +++ b/memory/replace/dmd/dmd.py @@ -0,0 +1,1002 @@ +#! /usr/bin/env python +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""This script analyzes a JSON file emitted by DMD.""" + +from __future__ import absolute_import, print_function, division + +import argparse +import collections +import gzip +import json +import os +import platform +import re +import shutil +import sys +import tempfile +from bisect import bisect_right + +# The DMD output version this script handles. +outputVersion = 5 + +# If --ignore-alloc-fns is specified, stack frames containing functions that +# match these strings will be removed from the *start* of stack traces. (Once +# we hit a non-matching frame, any subsequent frames won't be removed even if +# they do match.) +allocatorFns = [ + # Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc, + # pod_malloc, malloc_zone_*, g_malloc. + "malloc", + # Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc, + # pod_calloc, malloc_zone_calloc, pod_callocCanGC. + "calloc", + # Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc, + # pod_realloc, pod_reallocCanGC. + "realloc", + # Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign, + # moz_xmemalign, vpx_memalign, malloc_zone_memalign. + "memalign", + "operator new(", + "operator new[](", + "g_slice_alloc", + # This one necessary to fully filter some sequences of allocation functions + # that happen in practice. Note that ??? entries that follow non-allocation + # functions won't be stripped, as explained above. + "???", +] + + +class Record(object): + """A record is an aggregation of heap blocks that have identical stack + traces. It can also be used to represent the difference between two + records.""" + + def __init__(self): + self.numBlocks = 0 + self.reqSize = 0 + self.slopSize = 0 + self.usableSize = 0 + self.allocatedAtDesc = None + self.reportedAtDescs = [] + self.usableSizes = collections.defaultdict(int) + + def isZero(self, args): + return ( + self.numBlocks == 0 + and self.reqSize == 0 + and self.slopSize == 0 + and self.usableSize == 0 + and len(self.usableSizes) == 0 + ) + + def negate(self): + self.numBlocks = -self.numBlocks + self.reqSize = -self.reqSize + self.slopSize = -self.slopSize + self.usableSize = -self.usableSize + + negatedUsableSizes = collections.defaultdict(int) + for usableSize, count in self.usableSizes.items(): + negatedUsableSizes[-usableSize] = count + self.usableSizes = negatedUsableSizes + + def subtract(self, r): + # We should only be calling this on records with matching stack traces. + # Check this. + assert self.allocatedAtDesc == r.allocatedAtDesc + assert self.reportedAtDescs == r.reportedAtDescs + + self.numBlocks -= r.numBlocks + self.reqSize -= r.reqSize + self.slopSize -= r.slopSize + self.usableSize -= r.usableSize + + usableSizes1 = self.usableSizes + usableSizes2 = r.usableSizes + usableSizes3 = collections.defaultdict(int) + for usableSize in usableSizes1: + counts1 = usableSizes1[usableSize] + if usableSize in usableSizes2: + counts2 = usableSizes2[usableSize] + del usableSizes2[usableSize] + counts3 = counts1 - counts2 + if counts3 != 0: + if counts3 < 0: + usableSize = -usableSize + counts3 = -counts3 + usableSizes3[usableSize] = counts3 + else: + usableSizes3[usableSize] = counts1 + + for usableSize in usableSizes2: + usableSizes3[-usableSize] = usableSizes2[usableSize] + + self.usableSizes = usableSizes3 + + @staticmethod + def cmpByUsableSize(r1, r2): + # Sort by usable size, then by req size. + return cmp(abs(r1.usableSize), abs(r2.usableSize)) or Record.cmpByReqSize( + r1, r2 + ) + + @staticmethod + def cmpByReqSize(r1, r2): + # Sort by req size. + return cmp(abs(r1.reqSize), abs(r2.reqSize)) + + @staticmethod + def cmpBySlopSize(r1, r2): + # Sort by slop size. + return cmp(abs(r1.slopSize), abs(r2.slopSize)) + + @staticmethod + def cmpByNumBlocks(r1, r2): + # Sort by block counts, then by usable size. + return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or Record.cmpByUsableSize( + r1, r2 + ) + + +sortByChoices = { + "usable": Record.cmpByUsableSize, # the default + "req": Record.cmpByReqSize, + "slop": Record.cmpBySlopSize, + "num-blocks": Record.cmpByNumBlocks, +} + + +def parseCommandLine(): + # 24 is the maximum number of frames that DMD will produce. + def range_1_24(string): + value = int(string) + if value < 1 or value > 24: + msg = "{:s} is not in the range 1..24".format(string) + raise argparse.ArgumentTypeError(msg) + return value + + description = """ +Analyze heap data produced by DMD. +If one file is specified, analyze it; if two files are specified, analyze the +difference. +Input files can be gzipped. +Write to stdout unless -o/--output is specified. +Stack traces are fixed to show function names, filenames and line numbers +unless --no-fix-stacks is specified; stack fixing modifies the original file +and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment +variable is used to find breakpad symbols for stack fixing. +""" + p = argparse.ArgumentParser(description=description) + + p.add_argument( + "-o", + "--output", + type=argparse.FileType("w"), + help="output file; stdout if unspecified", + ) + + p.add_argument( + "-f", + "--max-frames", + type=range_1_24, + default=8, + help="maximum number of frames to consider in each trace", + ) + + p.add_argument( + "-s", + "--sort-by", + choices=sortByChoices.keys(), + default="usable", + help="sort the records by a particular metric", + ) + + p.add_argument( + "-a", + "--ignore-alloc-fns", + action="store_true", + help="ignore allocation functions at the start of traces", + ) + + p.add_argument("--no-fix-stacks", action="store_true", help="do not fix stacks") + + p.add_argument( + "--clamp-contents", + action="store_true", + help="for a scan mode log, clamp addresses to the start of live blocks, " + "or zero if not in one", + ) + + p.add_argument( + "--print-clamp-stats", + action="store_true", + help="print information about the results of pointer clamping; mostly " + "useful for debugging clamping", + ) + + p.add_argument( + "--filter-stacks-for-testing", + action="store_true", + help="filter stack traces; only useful for testing purposes", + ) + + p.add_argument( + "--allocation-filter", + help="Only print entries that have a stack that matches the filter", + ) + + p.add_argument("input_file", help="a file produced by DMD") + + p.add_argument( + "input_file2", + nargs="?", + help="a file produced by DMD; if present, it is diff'd with input_file", + ) + + return p.parse_args(sys.argv[1:]) + + +# Fix stacks if necessary: first write the output to a tempfile, then replace +# the original file with it. +def fixStackTraces(inputFilename, isZipped, opener): + # This append() call is needed to make the import statements work when this + # script is installed as a symlink. + sys.path.append(os.path.dirname(__file__)) + + bpsyms = os.environ.get("BREAKPAD_SYMBOLS_PATH", None) + sysname = platform.system() + if bpsyms and os.path.exists(bpsyms): + import fix_stacks as fixModule + + def fix(line): + return fixModule.fixSymbols(line, jsonMode=True, breakpadSymsDir=bpsyms) + + elif sysname in ("Linux", "Darwin", "Windows"): + import fix_stacks as fixModule + + def fix(line): + return fixModule.fixSymbols(line, jsonMode=True) + + else: + return + + # Fix stacks, writing output to a temporary file, and then overwrite the + # original file. + tmpFile = tempfile.NamedTemporaryFile(delete=False) + + # If the input is gzipped, then the output (written initially to |tmpFile|) + # should be gzipped as well. + # + # And we want to set its pre-gzipped filename to '' rather than the name of + # the temporary file, so that programs like the Unix 'file' utility don't + # say that it was called 'tmp6ozTxE' (or something like that) before it was + # zipped. So that explains the |filename=''| parameter. + # + # But setting the filename like that clobbers |tmpFile.name|, so we must + # get that now in order to move |tmpFile| at the end. + tmpFilename = tmpFile.name + if isZipped: + tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile) + + with opener(inputFilename, "rb") as inputFile: + for line in inputFile: + tmpFile.write(fix(line)) + + tmpFile.close() + + shutil.move(tmpFilename, inputFilename) + + +def getDigestFromFile(args, inputFile): + # Handle gzipped input if necessary. + isZipped = inputFile.endswith(".gz") + opener = gzip.open if isZipped else open + + # Fix stack traces unless otherwise instructed. + if not args.no_fix_stacks: + fixStackTraces(inputFile, isZipped, opener) + + if args.clamp_contents: + clampBlockList(args, inputFile, isZipped, opener) + + with opener(inputFile, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + # Extract the main parts of the JSON object. + invocation = j["invocation"] + dmdEnvVar = invocation["dmdEnvVar"] + mode = invocation["mode"] + blockList = j["blockList"] + traceTable = j["traceTable"] + frameTable = j["frameTable"] + + # Insert the necessary entries for unrecorded stack traces. Note that 'ut' + # and 'uf' will not overlap with any keys produced by DMD's + # ToIdStringConverter::Base32() function. + unrecordedTraceID = "ut" + unrecordedFrameID = "uf" + traceTable[unrecordedTraceID] = [unrecordedFrameID] + frameTable[ + unrecordedFrameID + ] = "#00: (no stack trace recorded due to --stacks=partial)" + + # For the purposes of this script, 'scan' behaves like 'live'. + if mode == "scan": + mode = "live" + + if mode not in ["live", "dark-matter", "cumulative"]: + raise Exception("bad 'mode' property: '{:s}'".format(mode)) + + # Remove allocation functions at the start of traces. + if args.ignore_alloc_fns: + # Build a regexp that matches every function in allocatorFns. + escapedAllocatorFns = map(re.escape, allocatorFns) + fn_re = re.compile("|".join(escapedAllocatorFns)) + + # Remove allocator fns from each stack trace. + for traceKey, frameKeys in traceTable.items(): + numSkippedFrames = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if re.search(fn_re, frameDesc): + numSkippedFrames += 1 + else: + break + if numSkippedFrames > 0: + traceTable[traceKey] = frameKeys[numSkippedFrames:] + + # Trim the number of frames. + for traceKey, frameKeys in traceTable.items(): + if len(frameKeys) > args.max_frames: + traceTable[traceKey] = frameKeys[: args.max_frames] + + def buildTraceDescription(traceTable, frameTable, traceKey): + frameKeys = traceTable[traceKey] + fmt = " #{:02d}{:}" + + if args.filter_stacks_for_testing: + # This option is used by `test_dmd.js`, which runs the code in + # `SmokeDMD.cpp`. When running that test, there is too much + # variation in the stack traces across different machines and + # platforms to do exact output matching. However, every stack trace + # should have at least three frames that contain `DMD` (in one of + # `DMD.cpp`, `SmokeDMD.cpp`, `SmokeDMD`, or `SmokeDMD.exe`). Some + # example frames from automation (where `..` indicates excised path + # segments): + # + # Linux debug, with stack fixing using breakpad syms: + # `#01: replace_realloc(void*, unsigned long) [../dmd/DMD.cpp:1110]` + # + # Linux opt, with native stack fixing: + # `#02: TestFull(char const*, int, char const*, int) (../dmd/test/SmokeDMD.cpp:165)` + # + # Mac opt, with native stack fixing: + # `#03: RunTests() (../build/tests/bin/SmokeDMD + 0x21f9)` + # + # Windows opt, with native stack fixing failing due to a missing PDB: + # `#04: ??? (..\\build\\tests\\bin\\SmokeDMD.exe + 0x1c58)` + # + # If we see three such frames, we replace the entire stack trace + # with a single, predictable frame. This imprecise matching will at + # least detect if stack fixing fails completely. + dmd_frame_matches = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if "DMD" in frameDesc: + dmd_frame_matches += 1 + if dmd_frame_matches >= 3: + return [fmt.format(1, ": ... DMD.cpp ...")] + + # The frame number is always '#00' (see DMD.h for why), so we have to + # replace that with the correct frame number. + desc = [] + for n, frameKey in enumerate(traceTable[traceKey], start=1): + desc.append(fmt.format(n, frameTable[frameKey][3:])) + return desc + + # Aggregate blocks into records. All sufficiently similar blocks go into a + # single record. + + if mode in ["live", "cumulative"]: + liveOrCumulativeRecords = collections.defaultdict(Record) + elif mode == "dark-matter": + unreportedRecords = collections.defaultdict(Record) + onceReportedRecords = collections.defaultdict(Record) + twiceReportedRecords = collections.defaultdict(Record) + + heapUsableSize = 0 + heapBlocks = 0 + + recordKeyPartCache = {} + + for block in blockList: + # For each block we compute a |recordKey|, and all blocks with the same + # |recordKey| are aggregated into a single record. The |recordKey| is + # derived from the block's 'alloc' and 'reps' (if present) stack + # traces. + # + # We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing + # traces for equality. We can't use trace keys or frame keys because + # they're not comparable across different DMD runs (which is relevant + # when doing diffs). + # + # Using frame descriptions also fits in with the stack trimming done + # for --max-frames, which requires that stack traces with common + # beginnings but different endings to be considered equivalent. E.g. if + # we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4] + # and we trim the final frame of each they should be considered + # equivalent because the untrimmed frame descriptions (D1 and D2) + # match. + # + # Having said all that, during a single invocation of dmd.py on a + # single DMD file, for a single frameKey value the record key will + # always be the same, and we might encounter it 1000s of times. So we + # cache prior results for speed. + def makeRecordKeyPart(traceKey): + if traceKey in recordKeyPartCache: + return recordKeyPartCache[traceKey] + + recordKeyPart = str( + map(lambda frameKey: frameTable[frameKey], traceTable[traceKey]) + ) + recordKeyPartCache[traceKey] = recordKeyPart + return recordKeyPart + + allocatedAtTraceKey = block.get("alloc", unrecordedTraceID) + if mode in ["live", "cumulative"]: + recordKey = makeRecordKeyPart(allocatedAtTraceKey) + records = liveOrCumulativeRecords + elif mode == "dark-matter": + recordKey = makeRecordKeyPart(allocatedAtTraceKey) + if "reps" in block: + reportedAtTraceKeys = block["reps"] + for reportedAtTraceKey in reportedAtTraceKeys: + recordKey += makeRecordKeyPart(reportedAtTraceKey) + if len(reportedAtTraceKeys) == 1: + records = onceReportedRecords + else: + records = twiceReportedRecords + else: + records = unreportedRecords + + record = records[recordKey] + + if "req" not in block: + raise Exception("'req' property missing in block'") + + reqSize = block["req"] + slopSize = block.get("slop", 0) + + if "num" in block: + num = block["num"] + else: + num = 1 + + usableSize = reqSize + slopSize + heapUsableSize += num * usableSize + heapBlocks += num + + record.numBlocks += num + record.reqSize += num * reqSize + record.slopSize += num * slopSize + record.usableSize += num * usableSize + if record.allocatedAtDesc is None: + record.allocatedAtDesc = buildTraceDescription( + traceTable, frameTable, allocatedAtTraceKey + ) + + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + if "reps" in block and record.reportedAtDescs == []: + + def f(k): + return buildTraceDescription(traceTable, frameTable, k) + + record.reportedAtDescs = map(f, reportedAtTraceKeys) + record.usableSizes[usableSize] += num + + # All the processed data for a single DMD file is called a "digest". + digest = {} + digest["dmdEnvVar"] = dmdEnvVar + digest["mode"] = mode + digest["heapUsableSize"] = heapUsableSize + digest["heapBlocks"] = heapBlocks + if mode in ["live", "cumulative"]: + digest["liveOrCumulativeRecords"] = liveOrCumulativeRecords + elif mode == "dark-matter": + digest["unreportedRecords"] = unreportedRecords + digest["onceReportedRecords"] = onceReportedRecords + digest["twiceReportedRecords"] = twiceReportedRecords + return digest + + +def diffRecords(args, records1, records2): + records3 = {} + + # Process records1. + for k in records1: + r1 = records1[k] + if k in records2: + # This record is present in both records1 and records2. + r2 = records2[k] + del records2[k] + r2.subtract(r1) + if not r2.isZero(args): + records3[k] = r2 + else: + # This record is present only in records1. + r1.negate() + records3[k] = r1 + + for k in records2: + # This record is present only in records2. + records3[k] = records2[k] + + return records3 + + +def diffDigests(args, d1, d2): + if d1["mode"] != d2["mode"]: + raise Exception("the input files have different 'mode' properties") + + d3 = {} + d3["dmdEnvVar"] = (d1["dmdEnvVar"], d2["dmdEnvVar"]) + d3["mode"] = d1["mode"] + d3["heapUsableSize"] = d2["heapUsableSize"] - d1["heapUsableSize"] + d3["heapBlocks"] = d2["heapBlocks"] - d1["heapBlocks"] + if d1["mode"] in ["live", "cumulative"]: + d3["liveOrCumulativeRecords"] = diffRecords( + args, d1["liveOrCumulativeRecords"], d2["liveOrCumulativeRecords"] + ) + elif d1["mode"] == "dark-matter": + d3["unreportedRecords"] = diffRecords( + args, d1["unreportedRecords"], d2["unreportedRecords"] + ) + d3["onceReportedRecords"] = diffRecords( + args, d1["onceReportedRecords"], d2["onceReportedRecords"] + ) + d3["twiceReportedRecords"] = diffRecords( + args, d1["twiceReportedRecords"], d2["twiceReportedRecords"] + ) + return d3 + + +def printDigest(args, digest): + dmdEnvVar = digest["dmdEnvVar"] + mode = digest["mode"] + heapUsableSize = digest["heapUsableSize"] + heapBlocks = digest["heapBlocks"] + if mode in ["live", "cumulative"]: + liveOrCumulativeRecords = digest["liveOrCumulativeRecords"] + elif mode == "dark-matter": + unreportedRecords = digest["unreportedRecords"] + onceReportedRecords = digest["onceReportedRecords"] + twiceReportedRecords = digest["twiceReportedRecords"] + + separator = "#" + "-" * 65 + "\n" + + def number(n): + """Format a number with comma as a separator.""" + return "{:,d}".format(n) + + def perc(m, n): + return 0 if n == 0 else (100 * m / n) + + def plural(n): + return "" if n == 1 else "s" + + # Prints to stdout, or to file if -o/--output was specified. + def out(*arguments, **kwargs): + print(*arguments, file=args.output, **kwargs) + + def printStack(traceDesc): + for frameDesc in traceDesc: + out(frameDesc) + + def printRecords(recordKind, records, heapUsableSize): + RecordKind = recordKind.capitalize() + out(separator) + numRecords = len(records) + cmpRecords = sortByChoices[args.sort_by] + sortedRecords = sorted(records.values(), cmp=cmpRecords, reverse=True) + kindBlocks = 0 + kindUsableSize = 0 + maxRecord = 1000 + + if args.allocation_filter: + sortedRecords = list( + filter( + lambda x: any( + map(lambda y: args.allocation_filter in y, x.allocatedAtDesc) + ), + sortedRecords, + ) + ) + + # First iteration: get totals, etc. + for record in sortedRecords: + kindBlocks += record.numBlocks + kindUsableSize += record.usableSize + + # Second iteration: print. + if numRecords == 0: + out("# no {:} heap blocks\n".format(recordKind)) + + kindCumulativeUsableSize = 0 + for i, record in enumerate(sortedRecords, start=1): + # Stop printing at the |maxRecord|th record. + if i == maxRecord: + out( + "# {:}: stopping after {:,d} heap block records\n".format( + RecordKind, i + ) + ) + break + + kindCumulativeUsableSize += record.usableSize + + out(RecordKind + " {") + out( + " {:} block{:} in heap block record {:,d} of {:,d}".format( + number(record.numBlocks), plural(record.numBlocks), i, numRecords + ) + ) + out( + " {:} bytes ({:} requested / {:} slop)".format( + number(record.usableSize), + number(record.reqSize), + number(record.slopSize), + ) + ) + + usableSizes = sorted( + record.usableSizes.items(), key=lambda x: abs(x[0]), reverse=True + ) + hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1 + + if not hasSingleBlock: + out(" Individual block sizes: ", end="") + if len(usableSizes) == 0: + out("(no change)", end="") + else: + isFirst = True + for usableSize, count in usableSizes: + if not isFirst: + out("; ", end="") + out("{:}".format(number(usableSize)), end="") + if count > 1: + out(" x {:,d}".format(count), end="") + isFirst = False + out() + + out( + " {:4.2f}% of the heap ({:4.2f}% cumulative)".format( + perc(record.usableSize, heapUsableSize), + perc(kindCumulativeUsableSize, heapUsableSize), + ) + ) + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + out( + " {:4.2f}% of {:} ({:4.2f}% cumulative)".format( + perc(record.usableSize, kindUsableSize), + recordKind, + perc(kindCumulativeUsableSize, kindUsableSize), + ) + ) + out(" Allocated at {") + printStack(record.allocatedAtDesc) + out(" }") + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + for n, reportedAtDesc in enumerate(record.reportedAtDescs): + again = "again " if n > 0 else "" + out(" Reported {:}at {{".format(again)) + printStack(reportedAtDesc) + out(" }") + out("}\n") + + return (kindUsableSize, kindBlocks) + + def printInvocation(n, dmdEnvVar, mode): + out("Invocation{:} {{".format(n)) + if dmdEnvVar is None: + out(" $DMD is undefined") + else: + out(" $DMD = '" + dmdEnvVar + "'") + out(" Mode = '" + mode + "'") + out("}\n") + + # Print command line. Strip dirs so the output is deterministic, which is + # needed for testing. + out(separator, end="") + out("# " + " ".join(map(os.path.basename, sys.argv)) + "\n") + + # Print invocation(s). + if type(dmdEnvVar) is not tuple: + printInvocation("", dmdEnvVar, mode) + else: + printInvocation(" 1", dmdEnvVar[0], mode) + printInvocation(" 2", dmdEnvVar[1], mode) + + # Print records. + if mode in ["live", "cumulative"]: + liveOrCumulativeUsableSize, liveOrCumulativeBlocks = printRecords( + mode, liveOrCumulativeRecords, heapUsableSize + ) + elif mode == "dark-matter": + twiceReportedUsableSize, twiceReportedBlocks = printRecords( + "twice-reported", twiceReportedRecords, heapUsableSize + ) + + unreportedUsableSize, unreportedBlocks = printRecords( + "unreported", unreportedRecords, heapUsableSize + ) + + onceReportedUsableSize, onceReportedBlocks = printRecords( + "once-reported", onceReportedRecords, heapUsableSize + ) + + # Print summary. + out(separator) + out("Summary {") + if mode in ["live", "cumulative"]: + out( + " Total: {:} bytes in {:} blocks".format( + number(liveOrCumulativeUsableSize), number(liveOrCumulativeBlocks) + ) + ) + elif mode == "dark-matter": + fmt = " {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)" + out(fmt.format("Total:", number(heapUsableSize), 100, number(heapBlocks), 100)) + out( + fmt.format( + "Unreported:", + number(unreportedUsableSize), + perc(unreportedUsableSize, heapUsableSize), + number(unreportedBlocks), + perc(unreportedBlocks, heapBlocks), + ) + ) + out( + fmt.format( + "Once-reported:", + number(onceReportedUsableSize), + perc(onceReportedUsableSize, heapUsableSize), + number(onceReportedBlocks), + perc(onceReportedBlocks, heapBlocks), + ) + ) + out( + fmt.format( + "Twice-reported:", + number(twiceReportedUsableSize), + perc(twiceReportedUsableSize, heapUsableSize), + number(twiceReportedBlocks), + perc(twiceReportedBlocks, heapBlocks), + ) + ) + out("}\n") + + +############################# +# Pretty printer for DMD JSON +############################# + + +def prettyPrintDmdJson(out, j): + out.write("{\n") + + out.write(' "version": {0},\n'.format(j["version"])) + out.write(' "invocation": ') + json.dump(j["invocation"], out, sort_keys=True) + out.write(",\n") + + out.write(' "blockList": [') + first = True + for b in j["blockList"]: + out.write("" if first else ",") + out.write("\n ") + json.dump(b, out, sort_keys=True) + first = False + out.write("\n ],\n") + + out.write(' "traceTable": {') + first = True + for k, l in j["traceTable"].iteritems(): + out.write("" if first else ",") + out.write('\n "{0}": {1}'.format(k, json.dumps(l))) + first = False + out.write("\n },\n") + + out.write(' "frameTable": {') + first = True + for k, v in j["frameTable"].iteritems(): + out.write("" if first else ",") + out.write('\n "{0}": {1}'.format(k, json.dumps(v))) + first = False + out.write("\n }\n") + + out.write("}\n") + + +################################################################## +# Code for clamping addresses using conservative pointer analysis. +################################################################## + +# Start is the address of the first byte of the block, while end is +# the address of the first byte after the final byte in the block. +class AddrRange: + def __init__(self, block, length): + self.block = block + self.start = int(block, 16) + self.length = length + self.end = self.start + self.length + + assert self.start > 0 + assert length >= 0 + + +class ClampStats: + def __init__(self): + # Number of pointers already pointing to the start of a block. + self.startBlockPtr = 0 + + # Number of pointers pointing to the middle of a block. These + # are clamped to the start of the block they point into. + self.midBlockPtr = 0 + + # Number of null pointers. + self.nullPtr = 0 + + # Number of non-null pointers that didn't point into the middle + # of any blocks. These are clamped to null. + self.nonNullNonBlockPtr = 0 + + def clampedBlockAddr(self, sameAddress): + if sameAddress: + self.startBlockPtr += 1 + else: + self.midBlockPtr += 1 + + def nullAddr(self): + self.nullPtr += 1 + + def clampedNonBlockAddr(self): + self.nonNullNonBlockPtr += 1 + + def log(self): + sys.stderr.write("Results:\n") + sys.stderr.write( + " Number of pointers already pointing to start of blocks: " + + str(self.startBlockPtr) + + "\n" + ) + sys.stderr.write( + " Number of pointers clamped to start of blocks: " + + str(self.midBlockPtr) + + "\n" + ) + sys.stderr.write( + " Number of non-null pointers not pointing into blocks " + "clamped to null: " + str(self.nonNullNonBlockPtr) + "\n" + ) + sys.stderr.write(" Number of null pointers: " + str(self.nullPtr) + "\n") + + +# Search the block ranges array for a block that address points into. +# The search is carried out in an array of starting addresses for each blocks +# because it is faster. +def clampAddress(blockRanges, blockStarts, clampStats, address): + i = bisect_right(blockStarts, address) + + # Any addresses completely out of the range should have been eliminated already. + assert i > 0 + r = blockRanges[i - 1] + assert r.start <= address + + if address >= r.end: + assert address < blockRanges[i].start + clampStats.clampedNonBlockAddr() + return "0" + + clampStats.clampedBlockAddr(r.start == address) + return r.block + + +def clampBlockList(args, inputFileName, isZipped, opener): + # XXX This isn't very efficient because we end up reading and writing + # the file multiple times. + with opener(inputFileName, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + # Check that the invocation is reasonable for contents clamping. + invocation = j["invocation"] + if invocation["mode"] != "scan": + raise Exception("Log was taken in mode " + invocation["mode"] + " not scan") + + sys.stderr.write("Creating block range list.\n") + blockList = j["blockList"] + blockRanges = [] + for block in blockList: + blockRanges.append(AddrRange(block["addr"], block["req"])) + blockRanges.sort(key=lambda r: r.start) + + # Make sure there are no overlapping blocks. + prevRange = blockRanges[0] + for currRange in blockRanges[1:]: + assert prevRange.end <= currRange.start + prevRange = currRange + + sys.stderr.write("Clamping block contents.\n") + clampStats = ClampStats() + firstAddr = blockRanges[0].start + lastAddr = blockRanges[-1].end + + blockStarts = [] + for r in blockRanges: + blockStarts.append(r.start) + + for block in blockList: + # Small blocks don't have any contents. + if "contents" not in block: + continue + + cont = block["contents"] + for i in range(len(cont)): + address = int(cont[i], 16) + + if address == 0: + clampStats.nullAddr() + continue + + # If the address is before the first block or after the last + # block then it can't be within a block. + if address < firstAddr or address >= lastAddr: + clampStats.clampedNonBlockAddr() + cont[i] = "0" + continue + + cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address) + + # Remove any trailing nulls. + while len(cont) and cont[-1] == "0": + cont.pop() + + if args.print_clamp_stats: + clampStats.log() + + sys.stderr.write("Saving file.\n") + tmpFile = tempfile.NamedTemporaryFile(delete=False) + tmpFilename = tmpFile.name + if isZipped: + tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile) + prettyPrintDmdJson(tmpFile, j) + tmpFile.close() + shutil.move(tmpFilename, inputFileName) + + +def main(): + args = parseCommandLine() + digest = getDigestFromFile(args, args.input_file) + if args.input_file2: + digest2 = getDigestFromFile(args, args.input_file2) + digest = diffDigests(args, digest, digest2) + printDigest(args, digest) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/dmd/moz.build b/memory/replace/dmd/moz.build new file mode 100644 index 0000000000..6f3121df48 --- /dev/null +++ b/memory/replace/dmd/moz.build @@ -0,0 +1,37 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "DMD.h", +] + +UNIFIED_SOURCES += [ + "DMD.cpp", +] + +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + UNIFIED_SOURCES += [ + "/mfbt/HashFunctions.cpp", + "/mfbt/JSONWriter.cpp", + "/mfbt/Poison.cpp", + "/mozglue/misc/StackWalk.cpp", + ] + if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "dbghelp", + ] + +ReplaceMalloc("dmd") + +DEFINES["MOZ_NO_MOZALLOC"] = True +DEFINES["IMPL_MFBT"] = True + +if CONFIG["MOZ_OPTIMIZE"]: + DEFINES["MOZ_OPTIMIZE"] = True + +DisableStlWrapping() + +TEST_DIRS += ["test"] diff --git a/memory/replace/dmd/test/.eslintrc.js b/memory/replace/dmd/test/.eslintrc.js new file mode 100644 index 0000000000..69e89d0054 --- /dev/null +++ b/memory/replace/dmd/test/.eslintrc.js @@ -0,0 +1,5 @@ +"use strict"; + +module.exports = { + extends: ["plugin:mozilla/xpcshell-test"], +}; diff --git a/memory/replace/dmd/test/SmokeDMD.cpp b/memory/replace/dmd/test/SmokeDMD.cpp new file mode 100644 index 0000000000..e36ae57d50 --- /dev/null +++ b/memory/replace/dmd/test/SmokeDMD.cpp @@ -0,0 +1,376 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This program is used by the DMD xpcshell test. It is run under DMD and +// produces some output. The xpcshell test then post-processes and checks this +// output. +// +// Note that this file does not have "Test" or "test" in its name, because that +// will cause the build system to not record breakpad symbols for it, which +// will stop the post-processing (which includes stack fixing) from working +// correctly. + +// This is required on some systems such as Fedora to allow +// building with -O0 together with --warnings-as-errors due to +// a check in /usr/include/features.h +#undef _FORTIFY_SOURCE + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mozilla/Assertions.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/UniquePtr.h" +#include "DMD.h" + +using mozilla::MakeUnique; +using namespace mozilla::dmd; + +DMDFuncs::Singleton DMDFuncs::sSingleton; + +class FpWriteFunc : public mozilla::JSONWriteFunc { + public: + explicit FpWriteFunc(const char* aFilename) { + mFp = fopen(aFilename, "w"); + if (!mFp) { + fprintf(stderr, "SmokeDMD: can't create %s file: %s\n", aFilename, + strerror(errno)); + exit(1); + } + } + + ~FpWriteFunc() { fclose(mFp); } + + void Write(const mozilla::Span<const char>& aStr) override { + for (const char c : aStr) { + fputc(c, mFp); + } + } + + private: + FILE* mFp; +}; + +// This stops otherwise-unused variables from being optimized away. +static void UseItOrLoseIt(void* aPtr, int aSeven) { + char buf[64]; + int n = sprintf(buf, "%p\n", aPtr); + if (n == 20 + aSeven) { + fprintf(stderr, "well, that is surprising"); + } +} + +// This function checks that heap blocks that have the same stack trace but +// different (or no) reporters get aggregated separately. +void Foo(int aSeven) { + char* a[6]; + for (int i = 0; i < aSeven - 1; i++) { + a[i] = (char*)malloc(128 - 16 * i); + UseItOrLoseIt(a[i], aSeven); + } + + // Oddly, some versions of clang will cause identical stack traces to be + // generated for adjacent calls to Report(), which breaks the test. Inserting + // the UseItOrLoseIt() calls in between is enough to prevent this. + + Report(a[2]); // reported + + UseItOrLoseIt(a[2], aSeven); + + for (int i = 0; i < aSeven - 5; i++) { + Report(a[i]); // reported + UseItOrLoseIt(a[i], aSeven); + } + + UseItOrLoseIt(a[2], aSeven); + + Report(a[3]); // reported + + // a[4], a[5] unreported +} + +void TestEmpty(const char* aTestName, const char* aMode) { + char filename[128]; + sprintf(filename, "complete-%s-%s.json", aTestName, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + char options[128]; + sprintf(options, "--mode=%s --stacks=full", aMode); + ResetEverything(options); + + // Zero for everything. + Analyze(std::move(f)); +} + +void TestFull(const char* aTestName, int aNum, const char* aMode, int aSeven) { + char filename[128]; + sprintf(filename, "complete-%s%d-%s.json", aTestName, aNum, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + // The --show-dump-stats=yes is there just to give that option some basic + // testing, e.g. ensure it doesn't crash. It's hard to test much beyond that. + char options[128]; + sprintf(options, "--mode=%s --stacks=full --show-dump-stats=yes", aMode); + ResetEverything(options); + + // Analyze 1: 1 freed, 9 out of 10 unreported. + // Analyze 2: still present and unreported. + int i; + char* a = nullptr; + for (i = 0; i < aSeven + 3; i++) { + a = (char*)malloc(100); + UseItOrLoseIt(a, aSeven); + } + free(a); + + // A no-op. + free(nullptr); + + // Note: 16 bytes is the smallest requested size that gives consistent + // behaviour across all platforms with jemalloc. + // Analyze 1: reported. + // Analyze 2: thrice-reported. + char* a2 = (char*)malloc(16); + Report(a2); + + // Analyze 1: reported. + // Analyze 2: reportedness carries over, due to ReportOnAlloc. + char* b = (char*)malloc(10); + ReportOnAlloc(b); + + // ReportOnAlloc, then freed. + // Analyze 1: freed, irrelevant. + // Analyze 2: freed, irrelevant. + char* b2 = (char*)malloc(16); + ReportOnAlloc(b2); + free(b2); + + // Analyze 1: reported 4 times. + // Analyze 2: freed, irrelevant. + char* c = (char*)calloc(10, 3); + Report(c); + for (int i = 0; i < aSeven - 4; i++) { + Report(c); + } + + // Analyze 1: ignored. + // Analyze 2: irrelevant. + Report((void*)(intptr_t)i); + + // jemalloc rounds this up to 8192. + // Analyze 1: reported. + // Analyze 2: freed. + char* e = (char*)malloc(4096); + e = (char*)realloc(e, 7169); + Report(e); + + // First realloc is like malloc; second realloc is shrinking. + // Analyze 1: reported. + // Analyze 2: re-reported. + char* e2 = (char*)realloc(nullptr, 1024); + e2 = (char*)realloc(e2, 512); + Report(e2); + + // First realloc is like malloc; second realloc creates a min-sized block. + // XXX: on Windows, second realloc frees the block. + // Analyze 1: reported. + // Analyze 2: freed, irrelevant. + char* e3 = (char*)realloc(nullptr, 1023); + // e3 = (char*) realloc(e3, 0); + MOZ_ASSERT(e3); + Report(e3); + + // Analyze 1: freed, irrelevant. + // Analyze 2: freed, irrelevant. + char* f1 = (char*)malloc(64); + UseItOrLoseIt(f1, aSeven); + free(f1); + + // Analyze 1: ignored. + // Analyze 2: irrelevant. + Report((void*)(intptr_t)0x0); + + // Analyze 1: mixture of reported and unreported. + // Analyze 2: all unreported. + Foo(aSeven); + + // Analyze 1: twice-reported. + // Analyze 2: twice-reported. + char* g1 = (char*)malloc(77); + ReportOnAlloc(g1); + ReportOnAlloc(g1); + + // Analyze 1: mixture of reported and unreported. + // Analyze 2: all unreported. + // Nb: this Foo() call is deliberately not adjacent to the previous one. See + // the comment about adjacent calls in Foo() for more details. + Foo(aSeven); + + // Analyze 1: twice-reported. + // Analyze 2: once-reported. + char* g2 = (char*)malloc(78); + Report(g2); + ReportOnAlloc(g2); + + // Analyze 1: twice-reported. + // Analyze 2: once-reported. + char* g3 = (char*)malloc(79); + ReportOnAlloc(g3); + Report(g3); + + // All the odd-ball ones. + // Analyze 1: all unreported. + // Analyze 2: all freed, irrelevant. + // XXX: no memalign on Mac + // void* w = memalign(64, 65); // rounds up to 128 + // UseItOrLoseIt(w, aSeven); + + // XXX: posix_memalign doesn't work on B2G + // void* x; + // posix_memalign(&y, 128, 129); // rounds up to 256 + // UseItOrLoseIt(x, aSeven); + + // XXX: valloc doesn't work on Windows. + // void* y = valloc(1); // rounds up to 4096 + // UseItOrLoseIt(y, aSeven); + + // XXX: C11 only + // void* z = aligned_alloc(64, 256); + // UseItOrLoseIt(z, aSeven); + + if (aNum == 1) { + // Analyze 1. + Analyze(std::move(f)); + } + + ClearReports(); + + //--------- + + Report(a2); + Report(a2); + free(c); + free(e); + Report(e2); + free(e3); + // free(w); + // free(x); + // free(y); + // free(z); + + // Do some allocations that will only show up in cumulative mode. + for (int i = 0; i < 100; i++) { + void* v = malloc(128); + UseItOrLoseIt(v, aSeven); + free(v); + } + + if (aNum == 2) { + // Analyze 2. + Analyze(std::move(f)); + } +} + +void TestPartial(const char* aTestName, const char* aMode, int aSeven) { + char filename[128]; + sprintf(filename, "complete-%s-%s.json", aTestName, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + char options[128]; + sprintf(options, "--mode=%s", aMode); + ResetEverything(options); + + int kTenThousand = aSeven + 9993; + char* s; + + // The output of this function is deterministic but it relies on the + // probability and seeds given to the FastBernoulliTrial instance in + // ResetBernoulli(). If they change, the output will change too. + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 16) = 0.0469. + // So we expect about 0.0469 * 10000 == 469. + // We actually get 511. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(16); + UseItOrLoseIt(s, aSeven); + } + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 128) = 0.3193. + // So we expect about 0.3193 * 10000 == 3193. + // We actually get 3136. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(128); + UseItOrLoseIt(s, aSeven); + } + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 1024) = 0.9539. + // So we expect about 0.9539 * 10000 == 9539. + // We actually get 9531. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(1024); + UseItOrLoseIt(s, aSeven); + } + + Analyze(std::move(f)); +} + +void TestScan(int aSeven) { + auto f = MakeUnique<FpWriteFunc>("basic-scan.json"); + + ResetEverything("--mode=scan"); + + uintptr_t* p = (uintptr_t*)malloc(6 * sizeof(uintptr_t)); + UseItOrLoseIt(p, aSeven); + + // Hard-coded values checked by scan-test.py + p[0] = 0x123; // outside a block, small value + p[1] = 0x0; // null + p[2] = (uintptr_t)((uint8_t*)p - 1); // pointer outside a block, but nearby + p[3] = (uintptr_t)p; // pointer to start of a block + p[4] = (uintptr_t)((uint8_t*)p + 1); // pointer into a block + p[5] = 0x0; // trailing null + + Analyze(std::move(f)); +} + +void RunTests() { + // This test relies on the compiler not doing various optimizations, such as + // eliding unused malloc() calls or unrolling loops with fixed iteration + // counts. So we compile it with -O0 (or equivalent), which probably prevents + // that. We also use the following variable for various loop iteration + // counts, just in case compilers might unroll very small loops even with + // -O0. + int seven = 7; + + // Make sure that DMD is actually running; it is initialized on the first + // allocation. + int* x = (int*)malloc(100); + UseItOrLoseIt(x, seven); + MOZ_RELEASE_ASSERT(IsRunning()); + + // Please keep this in sync with run_test in test_dmd.js. + + TestEmpty("empty", "live"); + TestEmpty("empty", "dark-matter"); + TestEmpty("empty", "cumulative"); + + TestFull("full", 1, "live", seven); + TestFull("full", 1, "dark-matter", seven); + + TestFull("full", 2, "dark-matter", seven); + TestFull("full", 2, "cumulative", seven); + + TestPartial("partial", "live", seven); + + TestScan(seven); +} + +int main() { + RunTests(); + + return 0; +} diff --git a/memory/replace/dmd/test/basic-scan-32-expected.txt b/memory/replace/dmd/test/basic-scan-32-expected.txt new file mode 100644 index 0000000000..9f6f4db325 --- /dev/null +++ b/memory/replace/dmd/test/basic-scan-32-expected.txt @@ -0,0 +1,25 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o basic-scan-32-actual.txt --clamp-contents basic-scan.json + +Invocation { + $DMD = '--mode=scan' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 1 + 32 bytes (24 requested / 8 slop) + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 32 bytes in 1 blocks +} + diff --git a/memory/replace/dmd/test/basic-scan-64-expected.txt b/memory/replace/dmd/test/basic-scan-64-expected.txt new file mode 100644 index 0000000000..59effc07b7 --- /dev/null +++ b/memory/replace/dmd/test/basic-scan-64-expected.txt @@ -0,0 +1,25 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o basic-scan-64-actual.txt --clamp-contents basic-scan.json + +Invocation { + $DMD = '--mode=scan' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 1 + 48 bytes (48 requested / 0 slop) + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 48 bytes in 1 blocks +} + diff --git a/memory/replace/dmd/test/complete-empty-cumulative-expected.txt b/memory/replace/dmd/test/complete-empty-cumulative-expected.txt new file mode 100644 index 0000000000..2486015d0b --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-cumulative-expected.txt @@ -0,0 +1,18 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-cumulative-actual.txt complete-empty-cumulative.json + +Invocation { + $DMD = '--mode=cumulative --stacks=full' + Mode = 'cumulative' +} + +#----------------------------------------------------------------- + +# no cumulative heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes in 0 blocks +} + diff --git a/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt b/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt new file mode 100644 index 0000000000..0020cddde3 --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt @@ -0,0 +1,29 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-dark-matter-actual.txt complete-empty-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +# no twice-reported heap blocks + +#----------------------------------------------------------------- + +# no unreported heap blocks + +#----------------------------------------------------------------- + +# no once-reported heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes (100.00%) in 0 blocks (100.00%) + Unreported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Once-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Twice-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) +} + diff --git a/memory/replace/dmd/test/complete-empty-live-expected.txt b/memory/replace/dmd/test/complete-empty-live-expected.txt new file mode 100644 index 0000000000..d0d1721965 --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-live-expected.txt @@ -0,0 +1,18 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-live-actual.txt complete-empty-live.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +# no live heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes in 0 blocks +} + diff --git a/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt b/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt new file mode 100644 index 0000000000..2c7d6b6343 --- /dev/null +++ b/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt @@ -0,0 +1,265 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full1-dark-matter-actual.txt complete-full1-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full --show-dump-stats=yes' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + 1 block in heap block record 1 of 4 + 80 bytes (79 requested / 1 slop) + 0.66% of the heap (0.66% cumulative) + 29.41% of twice-reported (29.41% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 2 of 4 + 80 bytes (78 requested / 2 slop) + 0.66% of the heap (1.32% cumulative) + 29.41% of twice-reported (58.82% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 3 of 4 + 80 bytes (77 requested / 3 slop) + 0.66% of the heap (1.98% cumulative) + 29.41% of twice-reported (88.24% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 4 of 4 + 32 bytes (30 requested / 2 slop) + 0.26% of the heap (2.25% cumulative) + 11.76% of twice-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Unreported { + 9 blocks in heap block record 1 of 3 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 8.33% of the heap (8.33% cumulative) + 81.82% of unreported (81.82% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 2 blocks in heap block record 2 of 3 + 112 bytes (112 requested / 0 slop) + Individual block sizes: 64; 48 + 0.93% of the heap (9.26% cumulative) + 9.09% of unreported (90.91% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 2 blocks in heap block record 3 of 3 + 112 bytes (112 requested / 0 slop) + Individual block sizes: 64; 48 + 0.93% of the heap (10.19% cumulative) + 9.09% of unreported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Once-reported { + 1 block in heap block record 1 of 11 + 8,192 bytes (7,169 requested / 1,023 slop) + 67.72% of the heap (67.72% cumulative) + 77.34% of once-reported (77.34% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 2 of 11 + 1,024 bytes (1,023 requested / 1 slop) + 8.47% of the heap (76.19% cumulative) + 9.67% of once-reported (87.01% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 3 of 11 + 512 bytes (512 requested / 0 slop) + 4.23% of the heap (80.42% cumulative) + 4.83% of once-reported (91.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 2 blocks in heap block record 4 of 11 + 240 bytes (240 requested / 0 slop) + Individual block sizes: 128; 112 + 1.98% of the heap (82.41% cumulative) + 2.27% of once-reported (94.11% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 2 blocks in heap block record 5 of 11 + 240 bytes (240 requested / 0 slop) + Individual block sizes: 128; 112 + 1.98% of the heap (84.39% cumulative) + 2.27% of once-reported (96.37% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 6 of 11 + 96 bytes (96 requested / 0 slop) + 0.79% of the heap (85.19% cumulative) + 0.91% of once-reported (97.28% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 7 of 11 + 96 bytes (96 requested / 0 slop) + 0.79% of the heap (85.98% cumulative) + 0.91% of once-reported (98.19% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 8 of 11 + 80 bytes (80 requested / 0 slop) + 0.66% of the heap (86.64% cumulative) + 0.76% of once-reported (98.94% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 9 of 11 + 80 bytes (80 requested / 0 slop) + 0.66% of the heap (87.30% cumulative) + 0.76% of once-reported (99.70% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 10 of 11 + 16 bytes (16 requested / 0 slop) + 0.13% of the heap (87.43% cumulative) + 0.15% of once-reported (99.85% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 11 of 11 + 16 bytes (10 requested / 6 slop) + 0.13% of the heap (87.57% cumulative) + 0.15% of once-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 12,096 bytes (100.00%) in 30 blocks (100.00%) + Unreported: 1,232 bytes ( 10.19%) in 13 blocks ( 43.33%) + Once-reported: 10,592 bytes ( 87.57%) in 13 blocks ( 43.33%) + Twice-reported: 272 bytes ( 2.25%) in 4 blocks ( 13.33%) +} + diff --git a/memory/replace/dmd/test/complete-full1-live-expected.txt b/memory/replace/dmd/test/complete-full1-live-expected.txt new file mode 100644 index 0000000000..eaa1883e1f --- /dev/null +++ b/memory/replace/dmd/test/complete-full1-live-expected.txt @@ -0,0 +1,127 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full1-live-actual.txt complete-full1-live.json + +Invocation { + $DMD = '--mode=live --stacks=full --show-dump-stats=yes' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 12 + 8,192 bytes (7,169 requested / 1,023 slop) + 67.72% of the heap (67.72% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 2 of 12 + 1,024 bytes (1,023 requested / 1 slop) + 8.47% of the heap (76.19% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 9 blocks in heap block record 3 of 12 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 8.33% of the heap (84.52% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 6 blocks in heap block record 4 of 12 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 4.37% of the heap (88.89% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 6 blocks in heap block record 5 of 12 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 4.37% of the heap (93.25% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 6 of 12 + 512 bytes (512 requested / 0 slop) + 4.23% of the heap (97.49% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 7 of 12 + 80 bytes (79 requested / 1 slop) + 0.66% of the heap (98.15% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 8 of 12 + 80 bytes (78 requested / 2 slop) + 0.66% of the heap (98.81% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 9 of 12 + 80 bytes (77 requested / 3 slop) + 0.66% of the heap (99.47% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 10 of 12 + 32 bytes (30 requested / 2 slop) + 0.26% of the heap (99.74% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 11 of 12 + 16 bytes (16 requested / 0 slop) + 0.13% of the heap (99.87% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 12 of 12 + 16 bytes (10 requested / 6 slop) + 0.13% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 12,096 bytes in 30 blocks +} + diff --git a/memory/replace/dmd/test/complete-full2-cumulative-expected.txt b/memory/replace/dmd/test/complete-full2-cumulative-expected.txt new file mode 100644 index 0000000000..5a225b9b8e --- /dev/null +++ b/memory/replace/dmd/test/complete-full2-cumulative-expected.txt @@ -0,0 +1,173 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full2-cumulative-actual.txt complete-full2-cumulative.json + +Invocation { + $DMD = '--mode=cumulative --stacks=full --show-dump-stats=yes' + Mode = 'cumulative' +} + +#----------------------------------------------------------------- + +Cumulative { + 100 blocks in heap block record 1 of 17 + 12,800 bytes (12,800 requested / 0 slop) + Individual block sizes: 128 x 100 + 42.37% of the heap (42.37% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 2 of 17 + 8,192 bytes (7,169 requested / 1,023 slop) + 27.12% of the heap (69.49% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 3 of 17 + 4,096 bytes (4,096 requested / 0 slop) + 13.56% of the heap (83.05% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 10 blocks in heap block record 4 of 17 + 1,120 bytes (1,000 requested / 120 slop) + Individual block sizes: 112 x 10 + 3.71% of the heap (86.76% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 5 of 17 + 1,024 bytes (1,024 requested / 0 slop) + 3.39% of the heap (90.15% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 6 of 17 + 1,024 bytes (1,023 requested / 1 slop) + 3.39% of the heap (93.54% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 6 blocks in heap block record 7 of 17 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 1.75% of the heap (95.29% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 6 blocks in heap block record 8 of 17 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 1.75% of the heap (97.03% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 9 of 17 + 512 bytes (512 requested / 0 slop) + 1.69% of the heap (98.73% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 10 of 17 + 80 bytes (79 requested / 1 slop) + 0.26% of the heap (98.99% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 11 of 17 + 80 bytes (78 requested / 2 slop) + 0.26% of the heap (99.26% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 12 of 17 + 80 bytes (77 requested / 3 slop) + 0.26% of the heap (99.52% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 13 of 17 + 64 bytes (64 requested / 0 slop) + 0.21% of the heap (99.74% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 14 of 17 + 32 bytes (30 requested / 2 slop) + 0.11% of the heap (99.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 15 of 17 + 16 bytes (16 requested / 0 slop) + 0.05% of the heap (99.89% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 16 of 17 + 16 bytes (16 requested / 0 slop) + 0.05% of the heap (99.95% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 17 of 17 + 16 bytes (10 requested / 6 slop) + 0.05% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 30,208 bytes in 135 blocks +} + diff --git a/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt b/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt new file mode 100644 index 0000000000..5f9585a8c6 --- /dev/null +++ b/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt @@ -0,0 +1,140 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full2-dark-matter-actual.txt complete-full2-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full --show-dump-stats=yes' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + 1 block in heap block record 1 of 2 + 80 bytes (77 requested / 3 slop) + 2.81% of the heap (2.81% cumulative) + 83.33% of twice-reported (83.33% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 2 of 2 + 16 bytes (16 requested / 0 slop) + 0.56% of the heap (3.37% cumulative) + 16.67% of twice-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Unreported { + 9 blocks in heap block record 1 of 3 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 35.39% of the heap (35.39% cumulative) + 48.84% of unreported (48.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 6 blocks in heap block record 2 of 3 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 18.54% of the heap (53.93% cumulative) + 25.58% of unreported (74.42% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 6 blocks in heap block record 3 of 3 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 18.54% of the heap (72.47% cumulative) + 25.58% of unreported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Once-reported { + 1 block in heap block record 1 of 4 + 512 bytes (512 requested / 0 slop) + 17.98% of the heap (17.98% cumulative) + 74.42% of once-reported (74.42% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 2 of 4 + 80 bytes (79 requested / 1 slop) + 2.81% of the heap (20.79% cumulative) + 11.63% of once-reported (86.05% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 3 of 4 + 80 bytes (78 requested / 2 slop) + 2.81% of the heap (23.60% cumulative) + 11.63% of once-reported (97.67% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 4 of 4 + 16 bytes (10 requested / 6 slop) + 0.56% of the heap (24.16% cumulative) + 2.33% of once-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 2,848 bytes (100.00%) in 27 blocks (100.00%) + Unreported: 2,064 bytes ( 72.47%) in 21 blocks ( 77.78%) + Once-reported: 688 bytes ( 24.16%) in 4 blocks ( 14.81%) + Twice-reported: 96 bytes ( 3.37%) in 2 blocks ( 7.41%) +} + diff --git a/memory/replace/dmd/test/complete-partial-live-expected.txt b/memory/replace/dmd/test/complete-partial-live-expected.txt new file mode 100644 index 0000000000..e7f27b0ee6 --- /dev/null +++ b/memory/replace/dmd/test/complete-partial-live-expected.txt @@ -0,0 +1,56 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-partial-live-actual.txt complete-partial-live.json + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 9,531 blocks in heap block record 1 of 4 + 9,759,744 bytes (9,759,744 requested / 0 slop) + Individual block sizes: 1,024 x 9,531 + 83.56% of the heap (83.56% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 16,822 blocks in heap block record 2 of 4 + 1,510,672 bytes (1,510,672 requested / 0 slop) + Individual block sizes: 1,024 x 469; 128 x 6,864; 16 x 9,489 + 12.93% of the heap (96.49% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +Live { + 3,136 blocks in heap block record 3 of 4 + 401,408 bytes (401,408 requested / 0 slop) + Individual block sizes: 128 x 3,136 + 3.44% of the heap (99.93% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 511 blocks in heap block record 4 of 4 + 8,176 bytes (8,176 requested / 0 slop) + Individual block sizes: 16 x 511 + 0.07% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 11,680,000 bytes in 30,000 blocks +} + diff --git a/memory/replace/dmd/test/moz.build b/memory/replace/dmd/test/moz.build new file mode 100644 index 0000000000..b869a86cc7 --- /dev/null +++ b/memory/replace/dmd/test/moz.build @@ -0,0 +1,29 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +GeckoSimplePrograms( + [ + "SmokeDMD", + ], + linkage=None, +) + +# See the comment at the top of SmokeDMD.cpp:RunTests(). +if CONFIG["CC_TYPE"] == "clang-cl": + CXXFLAGS += ["-Od", "-clang:-fno-lto"] +else: + CXXFLAGS += ["-O0", "-fno-lto"] + +DEFINES["MOZ_NO_MOZALLOC"] = True + +DisableStlWrapping() + +XPCSHELL_TESTS_MANIFESTS += [ + "xpcshell.ini", +] + +if CONFIG["CC_TYPE"] in ("clang", "gcc"): + CXXFLAGS += ["-Wno-error=shadow"] diff --git a/memory/replace/dmd/test/scan-test.py b/memory/replace/dmd/test/scan-test.py new file mode 100644 index 0000000000..11cbfaa32c --- /dev/null +++ b/memory/replace/dmd/test/scan-test.py @@ -0,0 +1,104 @@ +#! /usr/bin/env python +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Testing for the JSON file emitted by DMD heap scan mode when running SmokeDMD.""" + +from __future__ import absolute_import, print_function, division + +import argparse +import gzip +import json +import sys + +# The DMD output version this script handles. +outputVersion = 5 + + +def parseCommandLine(): + description = """ +Ensure that DMD heap scan mode creates the correct output when run with SmokeDMD. +This is only for testing. Input files can be gzipped. +""" + p = argparse.ArgumentParser(description=description) + + p.add_argument( + "--clamp-contents", + action="store_true", + help="expect that the contents of the JSON input file have had " + "their addresses clamped", + ) + + p.add_argument("input_file", help="a file produced by DMD") + + return p.parse_args(sys.argv[1:]) + + +def checkScanContents(contents, expected): + if len(contents) != len(expected): + raise Exception( + "Expected " + + str(len(expected)) + + " things in contents but found " + + str(len(contents)) + ) + + for i in range(len(expected)): + if contents[i] != expected[i]: + raise Exception( + "Expected to find " + + expected[i] + + " at offset " + + str(i) + + " but found " + + contents[i] + ) + + +def main(): + args = parseCommandLine() + + # Handle gzipped input if necessary. + isZipped = args.input_file.endswith(".gz") + opener = gzip.open if isZipped else open + + with opener(args.input_file, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + invocation = j["invocation"] + + mode = invocation["mode"] + if mode != "scan": + raise Exception("bad 'mode' property: '{:s}'".format(mode)) + + blockList = j["blockList"] + + if len(blockList) != 1: + raise Exception("Expected only one block") + + b = blockList[0] + + # The expected values are based on hard-coded values in SmokeDMD.cpp. + if args.clamp_contents: + expected = ["0", "0", "0", b["addr"], b["addr"]] + else: + addr = int(b["addr"], 16) + expected = [ + "123", + "0", + str(format(addr - 1, "x")), + b["addr"], + str(format(addr + 1, "x")), + "0", + ] + + checkScanContents(b["contents"], expected) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/dmd/test/script-diff-dark-matter-expected.txt b/memory/replace/dmd/test/script-diff-dark-matter-expected.txt new file mode 100644 index 0000000000..b1fc28bac5 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter-expected.txt @@ -0,0 +1,127 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-diff-dark-matter-actual.txt script-diff-dark-matter1.json script-diff-dark-matter2.json + +Invocation 1 { + $DMD = '--mode=dark-matter' + Mode = 'dark-matter' +} + +Invocation 2 { + $DMD = '1' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + -1 blocks in heap block record 1 of 1 + -1,088 bytes (-1,064 requested / -24 slop) + Individual block sizes: -1,024; -127; 63 + 15.46% of the heap (15.46% cumulative) + 100.00% of twice-reported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } + Reported again at { + #01: R2 (R2.cpp:99) + } +} + +#----------------------------------------------------------------- + +Unreported { + 4 blocks in heap block record 1 of 5 + 16,384 bytes (16,384 requested / 0 slop) + Individual block sizes: 4,096 x 4 + -232.76% of the heap (-232.76% cumulative) + 371.01% of unreported (371.01% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +Unreported { + 7 blocks in heap block record 2 of 5 + -11,968 bytes (-12,016 requested / 48 slop) + Individual block sizes: -15,360; 2,048; 512 x 2; 128; -127; 64 x 4; 63 + 170.02% of the heap (-62.74% cumulative) + -271.01% of unreported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } +} + +Unreported { + 0 blocks in heap block record 3 of 5 + 0 bytes (-384 requested / 384 slop) + Individual block sizes: (no change) + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Unreported { + -2 blocks in heap block record 4 of 5 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 8,192 x 2; -4,096 x 4 + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Unreported { + 0 blocks in heap block record 5 of 5 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 20,480; -16,384; -8,192; 4,096 + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +#----------------------------------------------------------------- + +Once-reported { + -3 blocks in heap block record 1 of 2 + -10,240 bytes (-10,192 requested / -48 slop) + Individual block sizes: -4,096 x 2; -2,048 + 145.48% of the heap (145.48% cumulative) + 98.77% of once-reported (98.77% cumulative) + Allocated at { + #01: D (D.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } +} + +Once-reported { + -1 blocks in heap block record 2 of 2 + -127 bytes (-151 requested / 24 slop) + 1.80% of the heap (147.28% cumulative) + 1.23% of once-reported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: -7,039 bytes (100.00%) in 4 blocks (100.00%) + Unreported: 4,416 bytes (-62.74%) in 9 blocks (225.00%) + Once-reported: -10,367 bytes (147.28%) in -4 blocks (-100.00%) + Twice-reported: -1,088 bytes ( 15.46%) in -1 blocks (-25.00%) +} + diff --git a/memory/replace/dmd/test/script-diff-dark-matter1.json b/memory/replace/dmd/test/script-diff-dark-matter1.json new file mode 100644 index 0000000000..c8edafebe9 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter1.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=dark-matter", + "mode": "dark-matter" + }, + "blockList": [ + {"req": 4096, "alloc": "A", "num": 4}, + + {"req": 4096, "alloc": "B", "num": 3}, + {"req": 4096, "alloc": "B"}, + + {"req": 4096, "alloc": "C", "num": 2}, + {"req": 4096, "alloc": "C", "num": 2}, + + {"req": 4096, "alloc": "D", "reps": ["R1"], "num": 2}, + {"req": 2000, "slop": 48, "alloc": "D", "reps": ["R1"]}, + + {"req": 15360, "alloc": "F"}, + {"req": 512, "alloc": "F", "num": 2}, + {"req": 127, "alloc": "F"}, + {"req": 1024, "alloc": "F", "reps": ["R1"]}, + {"req": 127, "alloc": "F", "reps": ["R1"]}, + {"req": 1000, "slop": 24, "alloc": "F", "reps": ["R1", "R2"]}, + {"req": 127, "alloc": "F", "reps": ["R1", "R2"]}, + + {"req": 4096 }, + {"req": 8192 }, + {"req": 16384 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-dark-matter2.json b/memory/replace/dmd/test/script-diff-dark-matter2.json new file mode 100644 index 0000000000..94b8888b89 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter2.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "1", + "mode": "dark-matter" + }, + "blockList": [ + {"req": 4096, "alloc": "A", "num": 4}, + + {"req": 8192, "alloc": "B"}, + {"req": 8192, "alloc": "B"}, + + {"req": 4000, "slop": 96, "alloc": "C", "num": 4}, + + {"req": 4096, "alloc": "E", "num": 4}, + + {"req": 2000, "slop": 48, "alloc": "F"}, + {"req": 1000, "slop": 24, "alloc": "F", "reps": ["R1"]}, + {"req": 512, "alloc": "F"}, + {"req": 512, "alloc": "F"}, + {"req": 512, "alloc": "F"}, + {"req": 512, "alloc": "F"}, + {"req": 128, "alloc": "F"}, + {"req": 63, "alloc": "F", "reps": ["R1", "R2"]}, + {"req": 64, "alloc": "F", "num": 4}, + {"req": 63, "alloc": "F"}, + + {"req": 4096, "num": 2 }, + {"req": 20480 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-live-expected.txt b/memory/replace/dmd/test/script-diff-live-expected.txt new file mode 100644 index 0000000000..ecd291ad8c --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live-expected.txt @@ -0,0 +1,81 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-diff-live-actual.txt script-diff-live1.json script-diff-live2.json + +Invocation 1 { + $DMD = '--mode=live' + Mode = 'live' +} + +Invocation 2 { + $DMD = '--mode=live --stacks=partial' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 4 blocks in heap block record 1 of 6 + 16,384 bytes (16,384 requested / 0 slop) + Individual block sizes: 4,096 x 4 + -232.76% of the heap (-232.76% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 6 + -13,183 bytes (-13,231 requested / 48 slop) + Individual block sizes: -15,360; 2,048; -1,024; 512 x 2; 128; -127 x 3; 64 x 4; 63 x 2 + 187.29% of the heap (-45.48% cumulative) + Allocated at { + #01: F (F.cpp:99) + } +} + +Live { + -3 blocks in heap block record 3 of 6 + -10,240 bytes (-10,192 requested / -48 slop) + Individual block sizes: -4,096 x 2; -2,048 + 145.48% of the heap (100.00% cumulative) + Allocated at { + #01: D (D.cpp:99) + } +} + +Live { + 0 blocks in heap block record 4 of 6 + 0 bytes (-384 requested / 384 slop) + Individual block sizes: (no change) + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + 0 blocks in heap block record 5 of 6 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 20,480; -16,384; -8,192; 4,096 + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +Live { + -2 blocks in heap block record 6 of 6 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 8,192 x 2; -4,096 x 4 + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: -7,039 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-diff-live1.json b/memory/replace/dmd/test/script-diff-live1.json new file mode 100644 index 0000000000..87e07aed5e --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live1.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live", + "mode": "live" + }, + "blockList": [ + {"req": 4096, "alloc": "A", "num": 4}, + + {"req": 4096, "alloc": "B", "num": 4}, + + {"req": 4096, "alloc": "C", "num": 4}, + + {"req": 4096, "alloc": "D"}, + {"req": 4096, "alloc": "D"}, + {"req": 2000, "slop": 48, "alloc": "D"}, + + {"req": 15360, "alloc": "F"}, + {"req": 512, "alloc": "F"}, + {"req": 512, "alloc": "F"}, + {"req": 127, "alloc": "F"}, + {"req": 1024, "alloc": "F"}, + {"req": 127, "alloc": "F"}, + {"req": 1000, "slop": 24, "alloc": "F"}, + {"req": 127, "alloc": "F"}, + + {"req": 4096 }, + {"req": 8192 }, + {"req": 16384 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-live2.json b/memory/replace/dmd/test/script-diff-live2.json new file mode 100644 index 0000000000..4c7476f4c3 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live2.json @@ -0,0 +1,53 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live --stacks=partial", + "mode": "live" + }, + "blockList": [ + {"req": 4096, "alloc": "A", "num": 3}, + {"req": 4096, "alloc": "A"}, + + {"req": 8192, "alloc": "B"}, + {"req": 8192, "alloc": "B"}, + + {"req": 4000, "slop": 96, "alloc": "C", "num": 4}, + + {"req": 4096, "alloc": "E"}, + {"req": 4096, "alloc": "E"}, + {"req": 4096, "alloc": "E"}, + {"req": 4096, "alloc": "E"}, + + {"req": 2000, "slop": 48, "alloc": "F"}, + {"req": 1000, "slop": 24, "alloc": "F"}, + {"req": 512, "alloc": "F", "num": 4}, + {"req": 128, "alloc": "F"}, + {"req": 63, "alloc": "F"}, + {"req": 64, "alloc": "F", "num": 4}, + {"req": 63, "alloc": "F"}, + + {"req": 4096 }, + {"req": 4096 }, + {"req": 20480 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt b/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt new file mode 100644 index 0000000000..9428ef45fb --- /dev/null +++ b/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt @@ -0,0 +1,72 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-ignore-alloc-fns-actual.txt --ignore-alloc-fns script-ignore-alloc-fns.json + +Invocation { + $DMD = '1' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +# no twice-reported heap blocks + +#----------------------------------------------------------------- + +Unreported { + 1 block in heap block record 1 of 4 + 1,048,576 bytes (1,048,576 requested / 0 slop) + 93.22% of the heap (93.22% cumulative) + 93.22% of unreported (93.22% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Unreported { + 1 block in heap block record 2 of 4 + 65,536 bytes (65,536 requested / 0 slop) + 5.83% of the heap (99.05% cumulative) + 5.83% of unreported (99.05% cumulative) + Allocated at { + #01: js::jit::JitRuntime::initialize(JSContext*) (Ion.cpp:301) + } +} + +Unreported { + 1 block in heap block record 3 of 4 + 8,192 bytes (8,000 requested / 192 slop) + 0.73% of the heap (99.78% cumulative) + 0.73% of unreported (99.78% cumulative) + Allocated at { + #01: mozilla::Vector::growStorageBy(unsigned long) (Vector.h:802) + #02: D (D.cpp:99) + } +} + +Unreported { + 1 block in heap block record 4 of 4 + 2,500 bytes (2,500 requested / 0 slop) + 0.22% of the heap (100.00% cumulative) + 0.22% of unreported (100.00% cumulative) + Allocated at { + #01: g_type_create_instance (/usr/lib/x86_64-linux-gnu/libgobject-2.0.so.0) + #02: not_an_alloc_function_so_alloc_functions_below_here_will_not_be_stripped (blah) + #03: replace_posix_memalign (replace_malloc.h:120) + #04: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0) + #05: another_non_alloc_function (blah) + } +} + +#----------------------------------------------------------------- + +# no once-reported heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 1,124,804 bytes (100.00%) in 4 blocks (100.00%) + Unreported: 1,124,804 bytes (100.00%) in 4 blocks (100.00%) + Once-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Twice-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) +} + diff --git a/memory/replace/dmd/test/script-ignore-alloc-fns.json b/memory/replace/dmd/test/script-ignore-alloc-fns.json new file mode 100644 index 0000000000..900d33e759 --- /dev/null +++ b/memory/replace/dmd/test/script-ignore-alloc-fns.json @@ -0,0 +1,46 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "1", + "mode": "dark-matter" + }, + "blockList": [ + {"req": 1048576, "alloc": "A"}, + {"req": 65536, "alloc": "B"}, + {"req": 8000, "slop": 192, "alloc": "C"}, + {"req": 2500, "alloc": "D"} + ], + "traceTable": { + "A": ["AA", "AB", "AC", "AD"], + "B": ["BA", "BB", "BC"], + "C": ["CA", "CB", "CC", "CD"], + "D": ["DA", "DB", "DD", "DD", "DE", "DF", "DG", "DH", "DI", "DJ"] + }, + "frameTable": { + "AA": "#00: replace_malloc (DMD.cpp:1106)", + "AB": "#00: moz_xmalloc (mozalloc.cpp:68)", + "AC": "#00: operator new(unsigned long) (mozalloc.h:208)", + "AD": "#00: A (A.cpp:99)", + + "BA": "#00: replace_calloc (DMD.cpp:1125)", + "BB": "#00: js_calloc(unsigned long) (Utility.h:107)", + "BC": "#06: js::jit::JitRuntime::initialize(JSContext*) (Ion.cpp:301)", + + "CA": "#00: replace_realloc (DMD.cpp:1153)", + "CB": "#00: bool* mozilla::MallocAllocPolicy::pod_realloc<bool>(bool*, unsigned long, unsigned long) (AllocPolicy.h:74)", + "CC": "#00: mozilla::Vector::growStorageBy(unsigned long) (Vector.h:802)", + "CD": "#00: D (D.cpp:99)", + + "DA": "#00: replace_memalign (DMD.cpp:1181)", + "DB": "#00: replace_posix_memalign (replace_malloc.h:120)", + "DC": "#00: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DD": "#00: g_slice_alloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DE": "#00: g_slice_alloc0 (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DF": "#00: g_type_create_instance (/usr/lib/x86_64-linux-gnu/libgobject-2.0.so.0)", + "DG": "#00: not_an_alloc_function_so_alloc_functions_below_here_will_not_be_stripped (blah)", + "DH": "#00: replace_posix_memalign (replace_malloc.h:120)", + "DI": "#00: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DJ": "#00: another_non_alloc_function (blah)" + } +} + diff --git a/memory/replace/dmd/test/script-max-frames-1-expected.txt b/memory/replace/dmd/test/script-max-frames-1-expected.txt new file mode 100644 index 0000000000..65a00762bb --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-1-expected.txt @@ -0,0 +1,26 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-1-actual.txt --max-frames=1 script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 4 blocks in heap block record 1 of 1 + 4,416 bytes (4,404 requested / 12 slop) + Individual block sizes: 4,096; 128; 112; 80 + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames-3-expected.txt b/memory/replace/dmd/test/script-max-frames-3-expected.txt new file mode 100644 index 0000000000..5df4914738 --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-3-expected.txt @@ -0,0 +1,48 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-3-actual.txt --max-frames=3 --no-fix-stacks script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 2 blocks in heap block record 1 of 3 + 4,224 bytes (4,224 requested / 0 slop) + Individual block sizes: 4,096; 128 + 95.65% of the heap (95.65% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + } +} + +Live { + 1 block in heap block record 2 of 3 + 112 bytes (100 requested / 12 slop) + 2.54% of the heap (98.19% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: X (X.cpp:99) + #03: Y (Y.cpp:99) + } +} + +Live { + 1 block in heap block record 3 of 3 + 80 bytes (80 requested / 0 slop) + 1.81% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames-8-expected.txt b/memory/replace/dmd/test/script-max-frames-8-expected.txt new file mode 100644 index 0000000000..174992d5b8 --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-8-expected.txt @@ -0,0 +1,69 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-8-actual.txt script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 4 + 4,096 bytes (4,096 requested / 0 slop) + 92.75% of the heap (92.75% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + #04: H (H.cpp:99) + #05: I (I.cpp:99) + #06: J (J.cpp:99) + #07: K (K.cpp:99) + #08: L (L.cpp:99) + } +} + +Live { + 1 block in heap block record 2 of 4 + 128 bytes (128 requested / 0 slop) + 2.90% of the heap (95.65% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + #04: R (R.cpp:99) + #05: S (S.cpp:99) + #06: T (T.cpp:99) + #07: U (U.cpp:99) + #08: V (V.cpp:99) + } +} + +Live { + 1 block in heap block record 3 of 4 + 112 bytes (100 requested / 12 slop) + 2.54% of the heap (98.19% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: X (X.cpp:99) + #03: Y (Y.cpp:99) + #04: Z (Z.cpp:99) + } +} + +Live { + 1 block in heap block record 4 of 4 + 80 bytes (80 requested / 0 slop) + 1.81% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames.json b/memory/replace/dmd/test/script-max-frames.json new file mode 100644 index 0000000000..690d50fa7e --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames.json @@ -0,0 +1,43 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live --stacks=full", + "mode": "live" + }, + "blockList": [ + {"req": 4096, "alloc": "A"}, + {"req": 128, "alloc": "B"}, + {"req": 100, "slop":12, "alloc": "C"}, + {"req": 80, "alloc": "D"} + ], + "traceTable": { + "A": ["E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"], + "B": ["E", "F", "G", "R", "S", "T", "U", "V"], + "C": ["E", "X", "Y", "Z"], + "D": ["E"] + }, + "frameTable": { + "E": "#00: E (E.cpp:99)", + "F": "#00: F (F.cpp:99)", + "G": "#00: G (G.cpp:99)", + "H": "#00: H (H.cpp:99)", + "I": "#00: I (I.cpp:99)", + "J": "#00: J (J.cpp:99)", + "K": "#00: K (K.cpp:99)", + "L": "#00: L (L.cpp:99)", + "M": "#00: M (M.cpp:99)", + "N": "#00: N (N.cpp:99)", + "O": "#00: O (O.cpp:99)", + "P": "#00: P (P.cpp:99)", + "Q": "#00: Q (Q.cpp:99)", + "R": "#00: R (R.cpp:99)", + "S": "#00: S (S.cpp:99)", + "T": "#00: T (T.cpp:99)", + "U": "#00: U (U.cpp:99)", + "V": "#00: V (V.cpp:99)", + "W": "#00: W (W.cpp:99)", + "X": "#00: X (X.cpp:99)", + "Y": "#00: Y (Y.cpp:99)", + "Z": "#00: Z (Z.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt b/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt new file mode 100644 index 0000000000..8de03d953b --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-num-blocks-actual.txt --sort-by=num-blocks script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 8 blocks in heap block record 1 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (33.32% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.67% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 3 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (100.00% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-req-expected.txt b/memory/replace/dmd/test/script-sort-by-req-expected.txt new file mode 100644 index 0000000000..3ab21ba8f7 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-req-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-req-actual.txt --sort-by=req --no-fix-stacks script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 5 blocks in heap block record 1 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (33.33% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.68% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 8 blocks in heap block record 3 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-slop-expected.txt b/memory/replace/dmd/test/script-sort-by-slop-expected.txt new file mode 100644 index 0000000000..c325c7ed40 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-slop-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-slop-actual.txt --sort-by=slop script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 8 blocks in heap block record 1 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (33.32% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.67% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 3 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (100.00% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-usable-expected.txt b/memory/replace/dmd/test/script-sort-by-usable-expected.txt new file mode 100644 index 0000000000..8239a4759e --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-usable-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-usable-actual.txt --sort-by=usable script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 5 blocks in heap block record 1 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (33.35% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (66.68% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Live { + 8 blocks in heap block record 3 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by.json.gz b/memory/replace/dmd/test/script-sort-by.json.gz Binary files differnew file mode 100644 index 0000000000..b2308bab40 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by.json.gz diff --git a/memory/replace/dmd/test/test_dmd.js b/memory/replace/dmd/test/test_dmd.js new file mode 100644 index 0000000000..ec7ee49bfa --- /dev/null +++ b/memory/replace/dmd/test/test_dmd.js @@ -0,0 +1,228 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*-*/ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const { FileUtils } = ChromeUtils.import( + "resource://gre/modules/FileUtils.jsm" +); +const { Services } = ChromeUtils.import("resource://gre/modules/Services.jsm"); + +// The xpcshell test harness sets PYTHON so we can read it here. +var gEnv = Cc["@mozilla.org/process/environment;1"].getService( + Ci.nsIEnvironment +); +var gPythonName = gEnv.get("PYTHON"); + +// If we're testing locally, the executable file is in "CurProcD". Otherwise, +// it is in another location that we have to find. +function getExecutable(aFilename) { + let file = FileUtils.getFile("CurProcD", [aFilename]); + if (!file.exists()) { + file = FileUtils.getFile("CurWorkD", []); + while (file.path.includes("xpcshell")) { + file = file.parent; + } + file.append("bin"); + file.append(aFilename); + } + return file; +} + +var gIsWindows = Services.appinfo.OS === "WINNT"; +var gDmdTestFile = getExecutable("SmokeDMD" + (gIsWindows ? ".exe" : "")); + +var gDmdScriptFile = getExecutable("dmd.py"); + +var gScanTestFile = FileUtils.getFile("CurWorkD", ["scan-test.py"]); + +function readFile(aFile) { + let fstream = Cc["@mozilla.org/network/file-input-stream;1"].createInstance( + Ci.nsIFileInputStream + ); + let cstream = Cc["@mozilla.org/intl/converter-input-stream;1"].createInstance( + Ci.nsIConverterInputStream + ); + fstream.init(aFile, -1, 0, 0); + cstream.init(fstream, "UTF-8", 0, 0); + + let data = ""; + let str = {}; + let read = 0; + do { + // Read as much as we can and put it in str.value. + read = cstream.readString(0xffffffff, str); + data += str.value; + } while (read != 0); + + cstream.close(); // this closes fstream + return data.replace(/\r/g, ""); // normalize line endings +} + +function runProcess(aExeFile, aArgs) { + let process = Cc["@mozilla.org/process/util;1"].createInstance(Ci.nsIProcess); + process.init(aExeFile); + process.run(/* blocking = */ true, aArgs, aArgs.length); + return process.exitValue; +} + +function test(aPrefix, aArgs) { + // DMD writes the JSON files to CurWorkD, so we do likewise here with + // |actualFile| for consistency. It is removed once we've finished. + let expectedFile = FileUtils.getFile("CurWorkD", [aPrefix + "-expected.txt"]); + let actualFile = FileUtils.getFile("CurWorkD", [aPrefix + "-actual.txt"]); + + // Run dmd.py on the JSON file, producing |actualFile|. + + let args = [ + gDmdScriptFile.path, + "--filter-stacks-for-testing", + "-o", + actualFile.path, + ].concat(aArgs); + + runProcess(new FileUtils.File(gPythonName), args); + + // Compare |expectedFile| with |actualFile|. We produce nice diffs with + // /usr/bin/diff on systems that have it (Mac and Linux). Otherwise (Windows) + // we do a string compare of the file contents and then print them both if + // they don't match. + + let success; + try { + let rv = runProcess(new FileUtils.File("/usr/bin/diff"), [ + "-u", + expectedFile.path, + actualFile.path, + ]); + success = rv == 0; + } catch (e) { + let expectedData = readFile(expectedFile); + let actualData = readFile(actualFile); + success = expectedData === actualData; + if (!success) { + expectedData = expectedData.split("\n"); + actualData = actualData.split("\n"); + for (let i = 0; i < expectedData.length; i++) { + print("EXPECTED:" + expectedData[i]); + } + for (let i = 0; i < actualData.length; i++) { + print(" ACTUAL:" + actualData[i]); + } + } + } + + ok(success, aPrefix); + + actualFile.remove(true); +} + +// Run scan-test.py on the JSON file and see if it succeeds. +function scanTest(aJsonFilePath, aExtraArgs) { + let args = [gScanTestFile.path, aJsonFilePath].concat(aExtraArgs); + + return runProcess(new FileUtils.File(gPythonName), args) == 0; +} + +function run_test() { + let jsonFile, jsonFile2; + + // These tests do complete end-to-end testing of DMD, i.e. both the C++ code + // that generates the JSON output, and the script that post-processes that + // output. + // + // Run these synchronously, because test() updates the complete*.json files + // in-place (to fix stacks) when it runs dmd.py, and that's not safe to do + // asynchronously. + + gEnv.set("DMD", "1"); + + runProcess(gDmdTestFile, []); + + function test2(aTestName, aMode) { + let name = "complete-" + aTestName + "-" + aMode; + jsonFile = FileUtils.getFile("CurWorkD", [name + ".json"]); + test(name, [jsonFile.path]); + jsonFile.remove(true); + } + + // Please keep this in sync with RunTests() in SmokeDMD.cpp. + + test2("empty", "live"); + test2("empty", "dark-matter"); + test2("empty", "cumulative"); + + test2("full1", "live"); + test2("full1", "dark-matter"); + + test2("full2", "dark-matter"); + test2("full2", "cumulative"); + + test2("partial", "live"); + + // Heap scan testing. + jsonFile = FileUtils.getFile("CurWorkD", ["basic-scan.json"]); + ok(scanTest(jsonFile.path), "Basic scan test"); + + let is64Bit = Services.appinfo.is64Bit; + let basicScanFileName = "basic-scan-" + (is64Bit ? "64" : "32"); + test(basicScanFileName, ["--clamp-contents", jsonFile.path]); + ok( + scanTest(jsonFile.path, ["--clamp-contents"]), + "Scan with address clamping" + ); + + // Run the generic test a second time to ensure that the first time produced + // valid JSON output. "--clamp-contents" is passed in so we don't have to have + // more variants of the files. + test(basicScanFileName, ["--clamp-contents", jsonFile.path]); + jsonFile.remove(true); + + // These tests only test the post-processing script. They use hand-written + // JSON files as input. Ideally the JSON files would contain comments + // explaining how they work, but JSON doesn't allow comments, so I've put + // explanations here. + + // This just tests that stack traces of various lengths are truncated + // appropriately. The number of records in the output is different for each + // of the tested values. + jsonFile = FileUtils.getFile("CurWorkD", ["script-max-frames.json"]); + test("script-max-frames-8", [jsonFile.path]); // --max-frames=8 is the default + test("script-max-frames-3", [ + "--max-frames=3", + "--no-fix-stacks", + jsonFile.path, + ]); + test("script-max-frames-1", ["--max-frames=1", jsonFile.path]); + + // This file has three records that are shown in a different order for each + // of the different sort values. It also tests the handling of gzipped JSON + // files. + jsonFile = FileUtils.getFile("CurWorkD", ["script-sort-by.json.gz"]); + test("script-sort-by-usable", ["--sort-by=usable", jsonFile.path]); + test("script-sort-by-req", [ + "--sort-by=req", + "--no-fix-stacks", + jsonFile.path, + ]); + test("script-sort-by-slop", ["--sort-by=slop", jsonFile.path]); + test("script-sort-by-num-blocks", ["--sort-by=num-blocks", jsonFile.path]); + + // This file has several real stack traces taken from Firefox execution, each + // of which tests a different allocator function (or functions). + jsonFile = FileUtils.getFile("CurWorkD", ["script-ignore-alloc-fns.json"]); + test("script-ignore-alloc-fns", ["--ignore-alloc-fns", jsonFile.path]); + + // This tests "live"-mode diffs. + jsonFile = FileUtils.getFile("CurWorkD", ["script-diff-live1.json"]); + jsonFile2 = FileUtils.getFile("CurWorkD", ["script-diff-live2.json"]); + test("script-diff-live", [jsonFile.path, jsonFile2.path]); + + // This tests "dark-matter"-mode diffs. + jsonFile = FileUtils.getFile("CurWorkD", ["script-diff-dark-matter1.json"]); + jsonFile2 = FileUtils.getFile("CurWorkD", ["script-diff-dark-matter2.json"]); + test("script-diff-dark-matter", [jsonFile.path, jsonFile2.path]); +} diff --git a/memory/replace/dmd/test/xpcshell.ini b/memory/replace/dmd/test/xpcshell.ini new file mode 100644 index 0000000000..7b4bdb2dad --- /dev/null +++ b/memory/replace/dmd/test/xpcshell.ini @@ -0,0 +1,34 @@ +[DEFAULT] +support-files = + basic-scan-32-expected.txt + basic-scan-64-expected.txt + complete-empty-live-expected.txt + complete-empty-dark-matter-expected.txt + complete-empty-cumulative-expected.txt + complete-full1-live-expected.txt + complete-full1-dark-matter-expected.txt + complete-full2-dark-matter-expected.txt + complete-full2-cumulative-expected.txt + complete-partial-live-expected.txt + scan-test.py + script-max-frames.json + script-max-frames-8-expected.txt + script-max-frames-3-expected.txt + script-max-frames-1-expected.txt + script-sort-by.json.gz + script-sort-by-usable-expected.txt + script-sort-by-req-expected.txt + script-sort-by-slop-expected.txt + script-sort-by-num-blocks-expected.txt + script-ignore-alloc-fns.json + script-ignore-alloc-fns-expected.txt + script-diff-live1.json + script-diff-live2.json + script-diff-live-expected.txt + script-diff-dark-matter1.json + script-diff-dark-matter2.json + script-diff-dark-matter-expected.txt + +[test_dmd.js] +dmd = true +skip-if = !(os=='linux' || os=='mac' || (os=='win' && !pgo)) || (os == "win" && processor == "aarch64") # aarch64 due to 1536250 diff --git a/memory/replace/logalloc/FdPrintf.cpp b/memory/replace/logalloc/FdPrintf.cpp new file mode 100644 index 0000000000..d8e27ea133 --- /dev/null +++ b/memory/replace/logalloc/FdPrintf.cpp @@ -0,0 +1,199 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <cstdarg> + +#ifdef _WIN32 +# include <windows.h> +#else +# include <unistd.h> +#endif +#include <cmath> +#include <cstring> +#include "mozilla/Assertions.h" +#include "mozilla/Unused.h" + +/* Template class allowing a limited number of increments on a value */ +template <typename T> +class CheckedIncrement { + public: + CheckedIncrement(T aValue, size_t aMaxIncrement) + : mValue(aValue), mMaxIncrement(aMaxIncrement) {} + + T operator++(int) { + if (!mMaxIncrement) { + MOZ_CRASH("overflow detected"); + } + mMaxIncrement--; + return mValue++; + } + + T& operator++() { + (*this)++; + return mValue; + } + + void advance(T end) { + // Only makes sense if T is a pointer type. + size_t diff = end - mValue; + if (diff > mMaxIncrement) { + MOZ_CRASH("overflow detected"); + } + mMaxIncrement -= diff; + mValue = end; + }; + + void rewind(T pos) { + size_t diff = mValue - pos; + mMaxIncrement += diff; + mValue = pos; + } + + operator T() { return mValue; } + T value() { return mValue; } + + private: + T mValue; + size_t mMaxIncrement; +}; + +template <typename T> +static unsigned NumDigits(T n) { + if (n < 1) { + // We want one digit, it will be 0. + return 1; + } + + double l = log10(static_cast<double>(n)); + double cl = ceil(l); + return l == cl ? unsigned(cl) + 1 : unsigned(cl); +} + +static void LeftPad(CheckedIncrement<char*>& b, size_t pad) { + while (pad-- > 0) { + *(b++) = ' '; + } +} + +// Write the digits into the buffer. +static void WriteDigits(CheckedIncrement<char*>& b, size_t i, + size_t num_digits) { + size_t x = pow(10, double(num_digits - 1)); + do { + *(b++) = "0123456789"[(i / x) % 10]; + x /= 10; + } while (x > 0); +} + +void FdPrintf(intptr_t aFd, const char* aFormat, ...) { + if (aFd == 0) { + return; + } + char buf[256]; + CheckedIncrement<char*> b(buf, sizeof(buf)); + CheckedIncrement<const char*> f(aFormat, strlen(aFormat) + 1); + va_list ap; + va_start(ap, aFormat); + while (true) { + switch (*f) { + case '\0': + goto out; + + case '%': { + // The start of the format specifier is used if this specifier is + // invalid. + const char* start = f; + + // Read the field width + f++; + char* end = nullptr; + size_t width = strtoul(f, &end, 10); + // If strtol can't find a number that's okay, that means 0 in our + // case, but we must advance f). + f.advance(end); + + switch (*f) { + case 'z': { + if (*(++f) == 'u') { + size_t i = va_arg(ap, size_t); + + size_t num_digits = NumDigits(i); + LeftPad(b, width > num_digits ? width - num_digits : 0); + WriteDigits(b, i, num_digits); + } else { + // If the format specifier is unknown then write out '%' and + // rewind to the beginning of the specifier causing it to be + // printed normally. + *(b++) = '%'; + f.rewind(start); + } + break; + } + + case 'p': { + intptr_t ptr = va_arg(ap, intptr_t); + *(b++) = '0'; + *(b++) = 'x'; + int x = sizeof(intptr_t) * 8; + bool wrote_msb = false; + do { + x -= 4; + size_t hex_digit = ptr >> x & 0xf; + if (hex_digit || wrote_msb) { + *(b++) = "0123456789abcdef"[hex_digit]; + wrote_msb = true; + } + } while (x > 0); + if (!wrote_msb) { + *(b++) = '0'; + } + break; + } + + case 's': { + const char* str = va_arg(ap, const char*); + size_t len = strlen(str); + + LeftPad(b, width > len ? width - len : 0); + + while (*str) { + *(b++) = *(str++); + } + + break; + } + + case '%': + // Print a single raw '%'. + *(b++) = '%'; + break; + + default: + // If the format specifier is unknown then write out '%' and + // rewind to the beginning of the specifier causing it to be + // printed normally. + *(b++) = '%'; + f.rewind(start); + break; + } + break; + } + default: + *(b++) = *f; + break; + } + f++; + } +out: +#ifdef _WIN32 + // See comment in FdPrintf.h as to why WriteFile is used. + DWORD written; + WriteFile(reinterpret_cast<HANDLE>(aFd), buf, b - buf, &written, nullptr); +#else + MOZ_UNUSED(write(aFd, buf, b - buf)); +#endif + va_end(ap); +} diff --git a/memory/replace/logalloc/FdPrintf.h b/memory/replace/logalloc/FdPrintf.h new file mode 100644 index 0000000000..f390d57ed5 --- /dev/null +++ b/memory/replace/logalloc/FdPrintf.h @@ -0,0 +1,27 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __FdPrintf_h__ +#define __FdPrintf_h__ + +/* We can't use libc's (f)printf because it would reenter in replace_malloc, + * So use a custom and simplified version. Only %p, %zu, %s and %% are + * supported, %zu, %s, support width specifiers. + * + * /!\ This function used a fixed-size internal buffer. The caller is + * expected to not use a format string that may overflow. + * The aFd argument is a file descriptor on UNIX and a native win32 file + * handle on Windows (from CreateFile). We can't use the windows POSIX + * APIs is that they don't support O_APPEND in a multi-process-safe way, + * while CreateFile does. + */ +extern void FdPrintf(intptr_t aFd, const char* aFormat, ...) +#ifdef __GNUC__ + __attribute__((format(printf, 2, 3))) +#endif + ; + +#endif /* __FdPrintf_h__ */ diff --git a/memory/replace/logalloc/LogAlloc.cpp b/memory/replace/logalloc/LogAlloc.cpp new file mode 100644 index 0000000000..8d6aad0675 --- /dev/null +++ b/memory/replace/logalloc/LogAlloc.cpp @@ -0,0 +1,236 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <cstdlib> +#include <cstdio> +#include <fcntl.h> + +#ifdef _WIN32 +# include <windows.h> +# include <io.h> +# include <process.h> +#else +# include <unistd.h> +# include <pthread.h> +#endif + +#include "replace_malloc.h" +#include "FdPrintf.h" +#include "Mutex.h" + +static malloc_table_t sFuncs; +static intptr_t sFd = 0; +static bool sStdoutOrStderr = false; + +static Mutex sMutex; + +#ifndef _WIN32 +static void prefork() { sMutex.Lock(); } + +static void postfork() { sMutex.Unlock(); } +#endif + +static size_t GetPid() { return size_t(getpid()); } + +static size_t GetTid() { +#if defined(_WIN32) + return size_t(GetCurrentThreadId()); +#else + return size_t(pthread_self()); +#endif +} + +#ifdef ANDROID +/* Android doesn't have pthread_atfork defined in pthread.h */ +extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void), + void (*)(void)); +#endif + +class LogAllocBridge : public ReplaceMallocBridge { + virtual void InitDebugFd(mozilla::DebugFdRegistry& aRegistry) override { + if (!sStdoutOrStderr) { + aRegistry.RegisterHandle(sFd); + } + } +}; + +/* Do a simple, text-form, log of all calls to replace-malloc functions. + * Use locking to guarantee that an allocation that did happen is logged + * before any other allocation/free happens. + */ + +static void* replace_malloc(size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.malloc(aSize); + FdPrintf(sFd, "%zu %zu malloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr); + return ptr; +} + +static int replace_posix_memalign(void** aPtr, size_t aAlignment, + size_t aSize) { + MutexAutoLock lock(sMutex); + int ret = sFuncs.posix_memalign(aPtr, aAlignment, aSize); + FdPrintf(sFd, "%zu %zu posix_memalign(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, (ret == 0) ? *aPtr : nullptr); + return ret; +} + +static void* replace_aligned_alloc(size_t aAlignment, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.aligned_alloc(aAlignment, aSize); + FdPrintf(sFd, "%zu %zu aligned_alloc(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, ptr); + return ptr; +} + +static void* replace_calloc(size_t aNum, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.calloc(aNum, aSize); + FdPrintf(sFd, "%zu %zu calloc(%zu,%zu)=%p\n", GetPid(), GetTid(), aNum, aSize, + ptr); + return ptr; +} + +static void* replace_realloc(void* aPtr, size_t aSize) { + MutexAutoLock lock(sMutex); + void* new_ptr = sFuncs.realloc(aPtr, aSize); + FdPrintf(sFd, "%zu %zu realloc(%p,%zu)=%p\n", GetPid(), GetTid(), aPtr, aSize, + new_ptr); + return new_ptr; +} + +static void replace_free(void* aPtr) { + MutexAutoLock lock(sMutex); + FdPrintf(sFd, "%zu %zu free(%p)\n", GetPid(), GetTid(), aPtr); + sFuncs.free(aPtr); +} + +static void* replace_memalign(size_t aAlignment, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.memalign(aAlignment, aSize); + FdPrintf(sFd, "%zu %zu memalign(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, ptr); + return ptr; +} + +static void* replace_valloc(size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.valloc(aSize); + FdPrintf(sFd, "%zu %zu valloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr); + return ptr; +} + +static void replace_jemalloc_stats(jemalloc_stats_t* aStats, + jemalloc_bin_stats_t* aBinStats) { + MutexAutoLock lock(sMutex); + sFuncs.jemalloc_stats_internal(aStats, aBinStats); + FdPrintf(sFd, "%zu %zu jemalloc_stats()\n", GetPid(), GetTid()); +} + +void replace_init(malloc_table_t* aTable, ReplaceMallocBridge** aBridge) { + /* Initialize output file descriptor from the MALLOC_LOG environment + * variable. Numbers up to 9999 are considered as a preopened file + * descriptor number. Other values are considered as a file name. */ +#ifdef _WIN32 + wchar_t* log = _wgetenv(L"MALLOC_LOG"); +#else + char* log = getenv("MALLOC_LOG"); +#endif + if (log && *log) { + int fd = 0; + const auto* fd_num = log; + while (*fd_num) { + /* Reject non digits. */ + if (*fd_num < '0' || *fd_num > '9') { + fd = -1; + break; + } + fd = fd * 10 + (*fd_num - '0'); + /* Reject values >= 10000. */ + if (fd >= 10000) { + fd = -1; + break; + } + fd_num++; + } + if (fd == 1 || fd == 2) { + sStdoutOrStderr = true; + } +#ifdef _WIN32 + // See comment in FdPrintf.h as to why CreateFile is used. + HANDLE handle; + if (fd > 0) { + handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); + } else { + handle = + CreateFileW(log, FILE_APPEND_DATA, FILE_SHARE_READ | FILE_SHARE_WRITE, + nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr); + } + if (handle != INVALID_HANDLE_VALUE) { + sFd = reinterpret_cast<intptr_t>(handle); + } +#else + if (fd == -1) { + fd = open(log, O_WRONLY | O_CREAT | O_APPEND, 0644); + } + if (fd > 0) { + sFd = fd; + } +#endif + } + + // Don't initialize if we weren't passed a valid MALLOC_LOG. + if (sFd == 0) { + return; + } + + sMutex.Init(); + static LogAllocBridge bridge; + sFuncs = *aTable; +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#define MALLOC_DECL(name, ...) aTable->name = replace_##name; +#include "malloc_decls.h" + aTable->jemalloc_stats_internal = replace_jemalloc_stats; + if (!getenv("MALLOC_LOG_MINIMAL")) { + aTable->posix_memalign = replace_posix_memalign; + aTable->aligned_alloc = replace_aligned_alloc; + aTable->valloc = replace_valloc; + } + *aBridge = &bridge; + +#ifndef _WIN32 + /* When another thread has acquired a lock before forking, the child + * process will inherit the lock state but the thread, being nonexistent + * in the child process, will never release it, leading to a dead-lock + * whenever the child process gets the lock. We thus need to ensure no + * other thread is holding the lock before forking, by acquiring it + * ourselves, and releasing it after forking, both in the parent and child + * processes. + * Windows doesn't have this problem since there is no fork(). + * The real allocator, however, might be doing the same thing (jemalloc + * does). But pthread_atfork `prepare` handlers (first argument) are + * processed in reverse order they were established. But replace_init + * runs before the real allocator has had any chance to initialize and + * call pthread_atfork itself. This leads to its prefork running before + * ours. This leads to a race condition that can lead to a deadlock like + * the following: + * - thread A forks. + * - libc calls real allocator's prefork, so thread A holds the real + * allocator lock. + * - thread B calls malloc, which calls our replace_malloc. + * - consequently, thread B holds our lock. + * - thread B then proceeds to call the real allocator's malloc, and + * waits for the real allocator's lock, which thread A holds. + * - libc calls our prefork, so thread A waits for our lock, which + * thread B holds. + * To avoid this race condition, the real allocator's prefork must be + * called after ours, which means it needs to be registered before ours. + * So trick the real allocator into initializing itself without more side + * effects by calling malloc with a size it can't possibly allocate. */ + sFuncs.malloc(-1); + pthread_atfork(prefork, postfork, postfork); +#endif +} diff --git a/memory/replace/logalloc/README b/memory/replace/logalloc/README new file mode 100644 index 0000000000..c2e8cf66ce --- /dev/null +++ b/memory/replace/logalloc/README @@ -0,0 +1,95 @@ +Logalloc is a replace-malloc library for Firefox (see +memory/build/replace_malloc.h) that dumps a log of memory allocations to a +given file descriptor or file name. That log can then be replayed against +Firefox's default memory allocator independently or through another +replace-malloc library, allowing the testing of other allocators under the +exact same workload. + +To get an allocation log the following environment variable when starting +Firefox: + MALLOC_LOG=/path/to/log-file + or + MALLOC_LOG=number + +When MALLOC_LOG is a number below 10000, it is considered as a file +descriptor number that is fed to Firefox when it is started. Otherwise, +it is considered as a file name. + +As those allocation logs can grow large quite quickly, it can be useful +to pipe the output to a compression tool. + +MALLOC_LOG=1 would send to Firefox's stdout, MALLOC_LOG=2 would send to +its stderr. Since in both cases that could be mixed with other output +from Firefox, it is usually better to use another file descriptor +by shell redirections, such as: + + MALLOC_LOG=3 firefox 3>&1 1>&2 | gzip -c > log.gz + +(3>&1 copies the `| gzip` pipe file descriptor to file descriptor #3, 1>&2 +then copies stderr to stdout. This leads to: fd1 and fd2 sending to stderr +of the parent process (the shell), and fd3 sending to gzip.) + +Each line of the allocations log is formatted as follows: + <pid> <tid> <function>([<args>])[=<result>] +where <args> is a comma separated list of values. The number of <args> and +the presence of <result> depend on the <function>. + +Example log: + 18545 18545 malloc(32)=0x7f90495120e0 + 18545 18545 calloc(1,148)=0x7f9049537480 + 18545 18545 realloc(0x7f90495120e0,64)=0x7f9049536680 + 18545 18545 posix_memalign(256,240)=0x7f9049583300 + 18545 18545 jemalloc_stats() + 18545 18545 free(0x7f9049536680) + +This log can be replayed with the logalloc-replay tool in +memory/replace/logalloc/replay. However, as the goal of that tool is to +reproduce the recorded memory allocations, it needs to avoid as much as +possible doing its own allocations for bookkeeping. Reading the logs as +they are would require data structures and memory allocations. As a +consequence, the logs need to be preprocessed beforehand. + +The logalloc_munge.py script is responsible for that preprocessing. It simply +takes a raw log on its stdin, and outputs the preprocessed log on its stdout. +It replaces pointer addresses with indexes the logalloc-replay tool can use +in a large (almost) linear array of allocation tracking slots (prefixed with +'#'). It also replaces the pids with numbers starting from 1 (such as the +first seen pid number is 1, the second is 2, etc.). + +The above example log would become the following, once preprocessed: + 1 1 malloc(32)=#1 + 1 1 calloc(1,148)=#2 + 1 1 realloc(#1,64)=#1 + 1 1 posix_memalign(256,240)=#3 + 1 1 jemalloc_stats() + 1 1 free(#1) + +The logalloc-replay tool then takes the preprocessed log on its stdin and +replays the allocations printed there, but will only replay those with the +same process id as the first line (which normally is 1). + +As the log files are simple text files, though, it is easy to separate out +the different processes log with e.g. grep, and feed the separate processes +logs to logalloc-replay. + +The logalloc-replay program won't output anything unless jemalloc_stats +records appears in the log. You can expect those to be recorded when going +to about:memory in Firefox, but they can also be added after preprocessing. + +Here is an example of what one can do: + + gunzip -c log.gz | python logalloc_munge.py | \ + awk '$1 == "2" { print $0 } !(NR % 10000) { print "2 1 jemalloc_stats()" }' | \ + ./logalloc-replay + +The above command replays the allocations of process #2, with some stats +output every 10000 records. + +The logalloc-replay tool itself being hooked with replace-malloc, it is possible +to set LD_PRELOAD/DYLD_INSERT_LIBRARIES/MOZ_REPLACE_MALLOC_LIB and replay a log +through a different allocator. For example: + + LD_PRELOAD=libreplace_jemalloc.so logalloc-replay < log + +Will replay the log against jemalloc4 (which is, as of writing, what +libreplace_jemalloc.so contains). diff --git a/memory/replace/logalloc/moz.build b/memory/replace/logalloc/moz.build new file mode 100644 index 0000000000..c52d9e69e0 --- /dev/null +++ b/memory/replace/logalloc/moz.build @@ -0,0 +1,30 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ReplaceMalloc("logalloc") + +SOURCES += [ + "FdPrintf.cpp", + "LogAlloc.cpp", +] + +DisableStlWrapping() +NO_PGO = True +DEFINES["MOZ_NO_MOZALLOC"] = True + +LOCAL_INCLUDES += [ + "/memory/build", +] + +# Android doesn't have pthread_atfork, but we have our own in mozglue. +if CONFIG["OS_TARGET"] == "Android" and FORCE_SHARED_LIB: + USE_LIBS += [ + "mozglue", + ] + +DIRS += [ + "replay", +] diff --git a/memory/replace/logalloc/replay/Makefile.in b/memory/replace/logalloc/replay/Makefile.in new file mode 100644 index 0000000000..8f5b5589b6 --- /dev/null +++ b/memory/replace/logalloc/replay/Makefile.in @@ -0,0 +1,42 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ifndef CROSS_COMPILE +ifndef MOZ_CODE_COVERAGE + +ifeq ($(OS_TARGET),WINNT) +LOGALLOC_VAR = MOZ_REPLACE_MALLOC_LIB +else +ifeq ($(OS_TARGET),Darwin) +LOGALLOC_VAR = DYLD_INSERT_LIBRARIES +else +LOGALLOC_VAR = LD_PRELOAD +endif +endif + +ifndef MOZ_REPLACE_MALLOC_STATIC +LOGALLOC = $(LOGALLOC_VAR)=$(CURDIR)/../$(DLL_PREFIX)logalloc$(DLL_SUFFIX) +endif + +expected_output.log: $(srcdir)/replay.log +# The logalloc-replay program will only replay entries from the first pid, +# so the expected output only contains entries beginning with "1 " + grep "^1 " $< > $@ + +check:: $(srcdir)/replay.log expected_output.log $(srcdir)/expected_output_minimal.log +# Test with MALLOC_LOG as a file descriptor number +# We filter out anything happening before the first jemalloc_stats (first +# command in replay.log) because starting with libstdc++ 5, a static +# initializer in the STL allocates memory, which we obviously don't have +# in expected_output.log. + MALLOC_LOG=1 $(LOGALLOC) ./$(PROGRAM) < $< | sed -n '/jemalloc_stats/,$$p' | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - expected_output.log +# Test with MALLOC_LOG as a file name + $(RM) test_output.log + MALLOC_LOG=test_output.log $(LOGALLOC) ./$(PROGRAM) < $< + sed -n '/jemalloc_stats/,$$p' test_output.log | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - expected_output.log + + MALLOC_LOG=1 MALLOC_LOG_MINIMAL=1 $(LOGALLOC) ./$(PROGRAM) < $< | sed -n '/jemalloc_stats/,$$p' | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - $(srcdir)/expected_output_minimal.log + +endif +endif diff --git a/memory/replace/logalloc/replay/Replay.cpp b/memory/replace/logalloc/replay/Replay.cpp new file mode 100644 index 0000000000..daf0580389 --- /dev/null +++ b/memory/replace/logalloc/replay/Replay.cpp @@ -0,0 +1,739 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define MOZ_MEMORY_IMPL +#include "mozmemory_wrap.h" + +#ifdef _WIN32 +# include <windows.h> +# include <io.h> +typedef intptr_t ssize_t; +#else +# include <sys/mman.h> +# include <unistd.h> +#endif +#include <algorithm> +#include <cmath> +#include <cstdio> +#include <cstring> + +#include "mozilla/Assertions.h" +#include "mozilla/MathAlgorithms.h" +#include "FdPrintf.h" + +static void die(const char* message) { + /* Here, it doesn't matter that fprintf may allocate memory. */ + fprintf(stderr, "%s\n", message); + exit(1); +} + +/* We don't want to be using malloc() to allocate our internal tracking + * data, because that would change the parameters of what is being measured, + * so we want to use data types that directly use mmap/VirtualAlloc. */ +template <typename T, size_t Len> +class MappedArray { + public: + MappedArray() : mPtr(nullptr) {} + + ~MappedArray() { + if (mPtr) { +#ifdef _WIN32 + VirtualFree(mPtr, sizeof(T) * Len, MEM_RELEASE); +#else + munmap(mPtr, sizeof(T) * Len); +#endif + } + } + + T& operator[](size_t aIndex) const { + if (mPtr) { + return mPtr[aIndex]; + } + +#ifdef _WIN32 + mPtr = reinterpret_cast<T*>(VirtualAlloc( + nullptr, sizeof(T) * Len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + if (mPtr == nullptr) { + die("VirtualAlloc error"); + } +#else + mPtr = reinterpret_cast<T*>(mmap(nullptr, sizeof(T) * Len, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0)); + if (mPtr == MAP_FAILED) { + die("Mmap error"); + } +#endif + return mPtr[aIndex]; + } + + private: + mutable T* mPtr; +}; + +/* Type for records of allocations. */ +struct MemSlot { + void* mPtr; + + // mRequest is only valid if mPtr is non-null. It doesn't need to be cleared + // when memory is freed or realloc()ed. + size_t mRequest; +}; + +/* An almost infinite list of slots. + * In essence, this is a linked list of arrays of groups of slots. + * Each group is 1MB. On 64-bits, one group allows to store 64k allocations. + * Each MemSlotList instance can store 1023 such groups, which means more + * than 67M allocations. In case more would be needed, we chain to another + * MemSlotList, and so on. + * Using 1023 groups makes the MemSlotList itself page sized on 32-bits + * and 2 pages-sized on 64-bits. + */ +class MemSlotList { + static const size_t kGroups = 1024 - 1; + static const size_t kGroupSize = (1024 * 1024) / sizeof(MemSlot); + + MappedArray<MemSlot, kGroupSize> mSlots[kGroups]; + MappedArray<MemSlotList, 1> mNext; + + public: + MemSlot& operator[](size_t aIndex) const { + if (aIndex < kGroupSize * kGroups) { + return mSlots[aIndex / kGroupSize][aIndex % kGroupSize]; + } + aIndex -= kGroupSize * kGroups; + return mNext[0][aIndex]; + } +}; + +/* Helper class for memory buffers */ +class Buffer { + public: + Buffer() : mBuf(nullptr), mLength(0) {} + + Buffer(const void* aBuf, size_t aLength) + : mBuf(reinterpret_cast<const char*>(aBuf)), mLength(aLength) {} + + /* Constructor for string literals. */ + template <size_t Size> + explicit Buffer(const char (&aStr)[Size]) : mBuf(aStr), mLength(Size - 1) {} + + /* Returns a sub-buffer up-to but not including the given aNeedle character. + * The "parent" buffer itself is altered to begin after the aNeedle + * character. + * If the aNeedle character is not found, return the entire buffer, and empty + * the "parent" buffer. */ + Buffer SplitChar(char aNeedle) { + char* buf = const_cast<char*>(mBuf); + char* c = reinterpret_cast<char*>(memchr(buf, aNeedle, mLength)); + if (!c) { + return Split(mLength); + } + + Buffer result = Split(c - buf); + // Remove the aNeedle character itself. + Split(1); + return result; + } + + /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is + * amputated of those aLength characters. If the "parent" buffer is smaller + * than aLength, then its length is used instead. */ + Buffer Split(size_t aLength) { + Buffer result(mBuf, std::min(aLength, mLength)); + mLength -= result.mLength; + mBuf += result.mLength; + return result; + } + + /* Move the buffer (including its content) to the memory address of the aOther + * buffer. */ + void Slide(Buffer aOther) { + memmove(const_cast<char*>(aOther.mBuf), mBuf, mLength); + mBuf = aOther.mBuf; + } + + /* Returns whether the two involved buffers have the same content. */ + bool operator==(Buffer aOther) { + return mLength == aOther.mLength && + (mBuf == aOther.mBuf || !strncmp(mBuf, aOther.mBuf, mLength)); + } + + /* Returns whether the buffer is empty. */ + explicit operator bool() { return mLength; } + + /* Returns the memory location of the buffer. */ + const char* get() { return mBuf; } + + /* Returns the memory location of the end of the buffer (technically, the + * first byte after the buffer). */ + const char* GetEnd() { return mBuf + mLength; } + + /* Extend the buffer over the content of the other buffer, assuming it is + * adjacent. */ + void Extend(Buffer aOther) { + MOZ_ASSERT(aOther.mBuf == GetEnd()); + mLength += aOther.mLength; + } + + private: + const char* mBuf; + size_t mLength; +}; + +/* Helper class to read from a file descriptor line by line. */ +class FdReader { + public: + explicit FdReader(int aFd) + : mFd(aFd), mData(&mRawBuf, 0), mBuf(&mRawBuf, sizeof(mRawBuf)) {} + + /* Read a line from the file descriptor and returns it as a Buffer instance */ + Buffer ReadLine() { + while (true) { + Buffer result = mData.SplitChar('\n'); + + /* There are essentially three different cases here: + * - '\n' was found "early". In this case, the end of the result buffer + * is before the beginning of the mData buffer (since SplitChar + * amputated it). + * - '\n' was found as the last character of mData. In this case, mData + * is empty, but still points at the end of mBuf. result points to what + * used to be in mData, without the last character. + * - '\n' was not found. In this case too, mData is empty and points at + * the end of mBuf. But result points to the entire buffer that used to + * be pointed by mData. + * Only in the latter case do both result and mData's end match, and it's + * the only case where we need to refill the buffer. + */ + if (result.GetEnd() != mData.GetEnd()) { + return result; + } + + /* Since SplitChar emptied mData, make it point to what it had before. */ + mData = result; + + /* And move it to the beginning of the read buffer. */ + mData.Slide(mBuf); + + FillBuffer(); + + if (!mData) { + return Buffer(); + } + } + } + + private: + /* Fill the read buffer. */ + void FillBuffer() { + size_t size = mBuf.GetEnd() - mData.GetEnd(); + Buffer remainder(mData.GetEnd(), size); + + ssize_t len = 1; + while (remainder && len > 0) { + len = ::read(mFd, const_cast<char*>(remainder.get()), size); + if (len < 0) { + die("Read error"); + } + size -= len; + mData.Extend(remainder.Split(len)); + } + } + + /* File descriptor to read from. */ + int mFd; + /* Part of data that was read from the file descriptor but not returned with + * ReadLine yet. */ + Buffer mData; + /* Buffer representation of mRawBuf */ + Buffer mBuf; + /* read() buffer */ + char mRawBuf[4096]; +}; + +MOZ_BEGIN_EXTERN_C + +/* Function declarations for all the replace_malloc _impl functions. + * See memory/build/replace_malloc.c */ +#define MALLOC_DECL(name, return_type, ...) \ + return_type name##_impl(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC +#include "malloc_decls.h" + +#define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC +#include "malloc_decls.h" + +#ifdef ANDROID + +/* mozjemalloc and jemalloc use pthread_atfork, which Android doesn't have. + * While gecko has one in libmozglue, the replay program can't use that. + * Since we're not going to fork anyways, make it a dummy function. */ +int pthread_atfork(void (*aPrepare)(void), void (*aParent)(void), + void (*aChild)(void)) { + return 0; +} +#endif + +MOZ_END_EXTERN_C + +size_t parseNumber(Buffer aBuf) { + if (!aBuf) { + die("Malformed input"); + } + + size_t result = 0; + for (const char *c = aBuf.get(), *end = aBuf.GetEnd(); c < end; c++) { + if (*c < '0' || *c > '9') { + die("Malformed input"); + } + result *= 10; + result += *c - '0'; + } + return result; +} + +/* Class to handle dispatching the replay function calls to replace-malloc. */ +class Replay { + public: + Replay() + : mOps(0), + mNumUsedSlots(0), + mTotalRequestedSize(0), + mTotalAllocatedSize(0), + mCalculateSlop(false) { +#ifdef _WIN32 + // See comment in FdPrintf.h as to why native win32 handles are used. + mStdErr = reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE)); +#else + mStdErr = fileno(stderr); +#endif + } + + void enableSlopCalculation() { mCalculateSlop = true; } + + MemSlot& operator[](size_t index) const { return mSlots[index]; } + + void malloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::malloc_impl(size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void posix_memalign(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + void* ptr; + if (::posix_memalign_impl(&ptr, alignment, size) == 0) { + aSlot.mPtr = ptr; + aSlot.mRequest = size; + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } else { + aSlot.mPtr = nullptr; + } + } + + void aligned_alloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::aligned_alloc_impl(alignment, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void calloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t num = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::calloc_impl(num, size); + if (aSlot.mPtr) { + aSlot.mRequest = num * size; + if (mCalculateSlop) { + mTotalRequestedSize += num * size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void realloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + Buffer dummy = aArgs.SplitChar('#'); + if (dummy) { + die("Malformed input"); + } + size_t slot_id = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + MemSlot& old_slot = (*this)[slot_id]; + void* old_ptr = old_slot.mPtr; + old_slot.mPtr = nullptr; + aSlot.mPtr = ::realloc_impl(old_ptr, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void free(Buffer& aArgs, Buffer& aResult) { + if (aResult) { + die("Malformed input"); + } + mOps++; + Buffer dummy = aArgs.SplitChar('#'); + if (dummy) { + die("Malformed input"); + } + size_t slot_id = parseNumber(aArgs); + MemSlot& slot = (*this)[slot_id]; + ::free_impl(slot.mPtr); + slot.mPtr = nullptr; + } + + void memalign(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::memalign_impl(alignment, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void valloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::valloc_impl(size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void jemalloc_stats(Buffer& aArgs, Buffer& aResult) { + if (aArgs || aResult) { + die("Malformed input"); + } + mOps++; + jemalloc_stats_t stats; + jemalloc_bin_stats_t bin_stats[JEMALLOC_MAX_STATS_BINS]; + ::jemalloc_stats_internal(&stats, bin_stats); + + size_t num_objects = 0; + size_t num_sloppy_objects = 0; + size_t total_allocated = 0; + size_t total_slop = 0; + size_t large_slop = 0; + size_t large_used = 0; + size_t huge_slop = 0; + size_t huge_used = 0; + size_t bin_slop[JEMALLOC_MAX_STATS_BINS] = {0}; + + for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { + MemSlot& slot = mSlots[slot_id]; + if (slot.mPtr) { + size_t used = ::malloc_usable_size_impl(slot.mPtr); + size_t slop = used - slot.mRequest; + total_allocated += used; + total_slop += slop; + num_objects++; + if (slop) { + num_sloppy_objects++; + } + + if (used <= stats.page_size / 2) { + // We know that this is an inefficient linear search, but there's a + // small number of bins and this is simple. + for (unsigned i = 0; i < JEMALLOC_MAX_STATS_BINS; i++) { + auto& bin = bin_stats[i]; + if (used == bin.size) { + bin_slop[i] += slop; + break; + } + } + } else if (used <= stats.large_max) { + large_slop += slop; + large_used += used; + } else { + huge_slop += slop; + huge_used += used; + } + } + } + + FdPrintf(mStdErr, "\n"); + FdPrintf(mStdErr, "Objects: %9zu\n", num_objects); + FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots); + FdPrintf(mStdErr, "Ops: %9zu\n", mOps); + FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped); + FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated); + FdPrintf(mStdErr, "waste: %9zu\n", stats.waste); + FdPrintf(mStdErr, "dirty: %9zu\n", stats.page_cache); + FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping); + FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused); + FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max); + FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.page_size / 2); + FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max); + if (mCalculateSlop) { + size_t slop = mTotalAllocatedSize - mTotalRequestedSize; + FdPrintf(mStdErr, + "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n", + slop / 1024, mTotalAllocatedSize / 1024, + percent(slop, mTotalAllocatedSize)); + } + FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n", + num_sloppy_objects, num_objects, + percent(num_sloppy_objects, num_objects)); + FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n", + total_slop / 1024, total_allocated / 1024, + percent(total_slop, total_allocated)); + + FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size", + "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)", + "used (r)"); + for (auto& bin : bin_stats) { + if (bin.size) { + FdPrintf(mStdErr, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n", + bin.size, bin.bytes_unused / 1024, bin.bytes_total / 1024, + percent(bin.bytes_total - bin.bytes_unused, bin.bytes_total), + bin.num_non_full_runs, bin.num_runs, + percent(bin.num_runs - bin.num_non_full_runs, bin.num_runs)); + } + } + + FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent"); + for (unsigned i = 0; i < JEMALLOC_MAX_STATS_BINS; i++) { + auto& bin = bin_stats[i]; + if (bin.size) { + size_t used = bin.bytes_total - bin.bytes_unused; + FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i], + used, percent(bin_slop[i], used)); + } + } + FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used, + percent(large_slop, large_used)); + FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used, + percent(huge_slop, huge_used)); + + unsigned last_size = 0; + for (auto& bin : bin_stats) { + if (bin.size == 0) { + break; + } + + if (bin.size <= 16) { + // 1 byte buckets. + print_distribution(bin.size, last_size, 1); + } else if (bin.size <= stats.quantum_max) { + // 4 buckets, (4 bytes per bucket with a 16 byte quantum). + print_distribution(bin.size, last_size, stats.quantum / 4); + } else { + // 16 buckets. + print_distribution(bin.size, last_size, (bin.size - last_size) / 16); + } + + last_size = bin.size; + } + + // 16 buckets. + print_distribution(stats.page_size, last_size, + (stats.page_size - last_size) / 16); + + // Buckets are 1/4 of the page size (12 buckets). + print_distribution(stats.page_size * 4, stats.page_size, + stats.page_size / 4); + + /* TODO: Add more data, like actual RSS as measured by OS, but compensated + * for the replay internal data. */ + } + + private: + const size_t MAX_NUM_BUCKETS = 16; + + /* + * Create and print frequency distributions of memory requests. + */ + void print_distribution(size_t size, size_t next_smallest, + size_t bucket_size) { + unsigned shift = mozilla::CeilingLog2(bucket_size); + + // The number of slots. + const unsigned array_slots = (size - next_smallest) >> shift; + + // The translation to turn a slot index into a memory request size. + const unsigned array_offset = 1 + next_smallest; + const size_t array_offset_add = (1 << shift) + next_smallest; + + // Avoid a variable length array. + MOZ_RELEASE_ASSERT(array_slots <= MAX_NUM_BUCKETS); + size_t requests[MAX_NUM_BUCKETS]; + memset(requests, 0, sizeof(size_t) * array_slots); + size_t total_requests = 0; + + for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { + MemSlot& slot = mSlots[slot_id]; + if (slot.mPtr && slot.mRequest > next_smallest && slot.mRequest <= size) { + requests[(slot.mRequest - array_offset) >> shift]++; + total_requests++; + } + } + + FdPrintf(mStdErr, "\n%zu-bin Distribution:\n", size); + FdPrintf(mStdErr, " request : count percent\n"); + size_t range_start = next_smallest + 1; + for (size_t j = 0; j < array_slots; j++) { + size_t range_end = (j << shift) + array_offset_add; + FdPrintf(mStdErr, "%5zu - %5zu: %6zu %6zu%%\n", range_start, range_end, + requests[j], percent(requests[j], total_requests)); + range_start = range_end + 1; + } + } + + static size_t percent(size_t a, size_t b) { + if (!b) { + return 0; + } + return size_t(round(double(a) / double(b) * 100.0)); + } + + MemSlot& SlotForResult(Buffer& aResult) { + /* Parse result value and get the corresponding slot. */ + Buffer dummy = aResult.SplitChar('='); + Buffer dummy2 = aResult.SplitChar('#'); + if (dummy || dummy2) { + die("Malformed input"); + } + + size_t slot_id = parseNumber(aResult); + mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1); + + return mSlots[slot_id]; + } + + intptr_t mStdErr; + size_t mOps; + + // The number of slots that have been used. It is used to iterate over slots + // without accessing those we haven't initialised. + size_t mNumUsedSlots; + + MemSlotList mSlots; + size_t mTotalRequestedSize; + size_t mTotalAllocatedSize; + // Whether to calculate slop for all allocations over the runtime of a + // process. + bool mCalculateSlop; +}; + +int main(int argc, const char* argv[]) { + size_t first_pid = 0; + FdReader reader(0); + Replay replay; + + for (int i = 1; i < argc; i++) { + const char* option = argv[i]; + if (strcmp(option, "-s") == 0) { + replay.enableSlopCalculation(); + } else { + fprintf(stderr, "Unknown command line option: %s\n", option); + return EXIT_FAILURE; + } + } + + /* Read log from stdin and dispatch function calls to the Replay instance. + * The log format is essentially: + * <pid> <tid> <function>([<args>])[=<result>] + * <args> is a comma separated list of arguments. + * + * The logs are expected to be preprocessed so that allocations are + * attributed a tracking slot. The input is trusted not to have crazy + * values for these slot numbers. + * + * <result>, as well as some of the args to some of the function calls are + * such slot numbers. + */ + while (true) { + Buffer line = reader.ReadLine(); + + if (!line) { + break; + } + + size_t pid = parseNumber(line.SplitChar(' ')); + if (!first_pid) { + first_pid = pid; + } + + /* The log may contain data for several processes, only entries for the + * very first that appears are treated. */ + if (first_pid != pid) { + continue; + } + + /* The log contains thread ids for manual analysis, but we just ignore them + * for now. */ + parseNumber(line.SplitChar(' ')); + + Buffer func = line.SplitChar('('); + Buffer args = line.SplitChar(')'); + + if (func == Buffer("jemalloc_stats")) { + replay.jemalloc_stats(args, line); + } else if (func == Buffer("free")) { + replay.free(args, line); + } else if (func == Buffer("malloc")) { + replay.malloc(args, line); + } else if (func == Buffer("posix_memalign")) { + replay.posix_memalign(args, line); + } else if (func == Buffer("aligned_alloc")) { + replay.aligned_alloc(args, line); + } else if (func == Buffer("calloc")) { + replay.calloc(args, line); + } else if (func == Buffer("realloc")) { + replay.realloc(args, line); + } else if (func == Buffer("memalign")) { + replay.memalign(args, line); + } else if (func == Buffer("valloc")) { + replay.valloc(args, line); + } else { + die("Malformed input"); + } + } + + return 0; +} diff --git a/memory/replace/logalloc/replay/expected_output_minimal.log b/memory/replace/logalloc/replay/expected_output_minimal.log new file mode 100644 index 0000000000..332fe20957 --- /dev/null +++ b/memory/replace/logalloc/replay/expected_output_minimal.log @@ -0,0 +1,17 @@ +1 1 jemalloc_stats() +1 1 malloc(42)=#1 +1 1 malloc(24)=#2 +1 1 free(#1) +1 1 memalign(4096,1024)=#1 +1 1 calloc(4,42)=#3 +1 1 free(#2) +1 1 realloc(#3,84)=#2 +1 1 memalign(256,1024)=#3 +1 1 memalign(512,1024)=#4 +1 1 memalign(4096,1024)=#5 +1 1 jemalloc_stats() +1 1 free(#5) +1 1 free(#4) +1 1 free(#3) +1 1 free(#2) +1 1 free(#1) diff --git a/memory/replace/logalloc/replay/logalloc_munge.py b/memory/replace/logalloc/replay/logalloc_munge.py new file mode 100644 index 0000000000..bea397d505 --- /dev/null +++ b/memory/replace/logalloc/replay/logalloc_munge.py @@ -0,0 +1,151 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +This script takes a log from the replace-malloc logalloc library on stdin +and munges it so that it can be used with the logalloc-replay tool. + +Given the following output: + 13663 malloc(42)=0x7f0c33502040 + 13663 malloc(24)=0x7f0c33503040 + 13663 free(0x7f0c33502040) +The resulting output is: + 1 malloc(42)=#1 + 1 malloc(24)=#2 + 1 free(#1) + +See README for more details. +""" + +from __future__ import absolute_import, print_function +import sys +from collections import ( + defaultdict, + deque, +) + + +class IdMapping(object): + """Class to map values to ids. + + Each value is associated to an increasing id, starting from 1. + When a value is removed, its id is recycled and will be reused for + subsequent values. + """ + + def __init__(self): + self.id = 1 + self._values = {} + self._recycle = deque() + + def __getitem__(self, value): + if value not in self._values: + if self._recycle: + self._values[value] = self._recycle.popleft() + else: + self._values[value] = self.id + self.id += 1 + return self._values[value] + + def __delitem__(self, value): + if value == 0: + return + self._recycle.append(self._values[value]) + del self._values[value] + + def __contains__(self, value): + return value == 0 or value in self._values + + +class Ignored(Exception): + pass + + +def split_log_line(line): + try: + # The format for each line is: + # <pid> [<tid>] <function>([<args>])[=<result>] + # + # The original format didn't include the tid, so we try to parse + # lines whether they have one or not. + pid, func_call = line.split(" ", 1) + call, result = func_call.split(")") + func, args = call.split("(") + args = args.split(",") if args else [] + if result: + if result[0] != "=": + raise Ignored("Malformed input") + result = result[1:] + if " " in func: + tid, func = func.split(" ", 1) + else: + tid = pid + return pid, tid, func, args, result + except Exception: + raise Ignored("Malformed input") + + +NUM_ARGUMENTS = { + "jemalloc_stats": 0, + "free": 1, + "malloc": 1, + "posix_memalign": 2, + "aligned_alloc": 2, + "calloc": 2, + "realloc": 2, + "memalign": 2, + "valloc": 1, +} + + +def main(): + pids = IdMapping() + processes = defaultdict(lambda: {"pointers": IdMapping(), "tids": IdMapping()}) + for line in sys.stdin: + line = line.strip() + + try: + pid, tid, func, args, result = split_log_line(line) + + # Replace pid with an id. + pid = pids[int(pid)] + + process = processes[pid] + tid = process["tids"][int(tid)] + + pointers = process["pointers"] + + if func not in NUM_ARGUMENTS: + raise Ignored("Unknown function") + + if len(args) != NUM_ARGUMENTS[func]: + raise Ignored("Malformed input") + + if func in ("jemalloc_stats", "free") and result: + raise Ignored("Malformed input") + + if func in ("free", "realloc"): + ptr = int(args[0], 16) + if ptr and ptr not in pointers: + raise Ignored("Did not see an alloc for pointer") + args[0] = "#%d" % pointers[ptr] + del pointers[ptr] + + if result: + result = int(result, 16) + if not result: + raise Ignored("Result is NULL") + result = "#%d" % pointers[result] + + print( + "%d %d %s(%s)%s" + % (pid, tid, func, ",".join(args), "=%s" % result if result else "") + ) + + except Exception as e: + print('Ignored "%s": %s' % (line, e.message), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/logalloc/replay/moz.build b/memory/replace/logalloc/replay/moz.build new file mode 100644 index 0000000000..3e0c8c395a --- /dev/null +++ b/memory/replace/logalloc/replay/moz.build @@ -0,0 +1,69 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Program("logalloc-replay") + +SOURCES += [ + "/mfbt/Assertions.cpp", + "/mfbt/Poison.cpp", + "/mfbt/RandomNum.cpp", + "/mfbt/TaggedAnonymousMemory.cpp", + "/mfbt/Unused.cpp", + "Replay.cpp", +] + +if CONFIG["OS_TARGET"] == "Darwin": + # Work around "warning: 'aligned_alloc' is only available on macOS 10.15 or newer" + # when building with MACOSX_DEPLOYMENT_TARGET < 10.15 with >= 10.15 SDK. + # We have our own definition of the function, so it doesn't matter what the SDK says. + SOURCES["Replay.cpp"].flags += ["-Wno-unguarded-availability-new"] + +if CONFIG["MOZ_REPLACE_MALLOC_STATIC"] and (CONFIG["MOZ_DMD"] or CONFIG["MOZ_PHC"]): + UNIFIED_SOURCES += [ + "/mfbt/HashFunctions.cpp", + "/mfbt/JSONWriter.cpp", + "/mozglue/misc/StackWalk.cpp", + ] + if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "dbghelp", + ] + if CONFIG["MOZ_LINKER"] and CONFIG["MOZ_WIDGET_TOOLKIT"] == "android": + LOCAL_INCLUDES += [ + "/mozglue/linker", + ] + DEFINES["__wrap_dladdr"] = "dladdr" + + +if CONFIG["MOZ_DMD"] or CONFIG["MOZ_PHC"]: + if CONFIG["MOZ_BUILD_APP"] == "memory": + EXPORTS.mozilla += [ + "/mozglue/misc/StackWalk.h", + ] + +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + SOURCES += [ + "../FdPrintf.cpp", + ] + +LOCAL_INCLUDES += [ + "..", +] + +# Link replace-malloc and the default allocator. +USE_LIBS += [ + "memory", +] + +# The memory library defines this, so it's needed here too. +DEFINES["IMPL_MFBT"] = True + +if CONFIG["MOZ_NEEDS_LIBATOMIC"]: + OS_LIBS += ["atomic"] + +DisableStlWrapping() + +include("/mozglue/build/replace_malloc.mozbuild") diff --git a/memory/replace/logalloc/replay/replay.log b/memory/replace/logalloc/replay/replay.log new file mode 100644 index 0000000000..f1e6de788b --- /dev/null +++ b/memory/replace/logalloc/replay/replay.log @@ -0,0 +1,18 @@ +1 1 jemalloc_stats() +1 1 malloc(42)=#1 +1 1 malloc(24)=#2 +2 2 malloc(42)=#1 +1 1 free(#1) +1 1 posix_memalign(4096,1024)=#1 +1 1 calloc(4,42)=#3 +1 1 free(#2) +1 1 realloc(#3,84)=#2 +1 1 aligned_alloc(256,1024)=#3 +1 1 memalign(512,1024)=#4 +1 1 valloc(1024)=#5 +1 1 jemalloc_stats() +1 1 free(#5) +1 1 free(#4) +1 1 free(#3) +1 1 free(#2) +1 1 free(#1) diff --git a/memory/replace/moz.build b/memory/replace/moz.build new file mode 100644 index 0000000000..a76c5aac58 --- /dev/null +++ b/memory/replace/moz.build @@ -0,0 +1,25 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +@template +def ReplaceMalloc(name): + if CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + DEFINES["MOZ_REPLACE_MALLOC_PREFIX"] = name.replace("-", "_") + FINAL_LIBRARY = "memory" + else: + SharedLibrary(name) + + +DIRS += [ + "logalloc", +] + +if CONFIG["MOZ_DMD"]: + DIRS += ["dmd"] + +if CONFIG["MOZ_PHC"]: + DIRS += ["phc"] diff --git a/memory/replace/phc/PHC.cpp b/memory/replace/phc/PHC.cpp new file mode 100644 index 0000000000..7ddeac75b7 --- /dev/null +++ b/memory/replace/phc/PHC.cpp @@ -0,0 +1,1592 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// PHC is a probabilistic heap checker. A tiny fraction of randomly chosen heap +// allocations are subject to some expensive checking via the use of OS page +// access protection. A failed check triggers a crash, whereupon useful +// information about the failure is put into the crash report. The cost and +// coverage for each user is minimal, but spread over the entire user base the +// coverage becomes significant. +// +// The idea comes from Chromium, where it is called GWP-ASAN. (Firefox uses PHC +// as the name because GWP-ASAN is long, awkward, and doesn't have any +// particular meaning.) +// +// In the current implementation up to 64 allocations per process can become +// PHC allocations. These allocations must be page-sized or smaller. Each PHC +// allocation gets its own page, and when the allocation is freed its page is +// marked inaccessible until the page is reused for another allocation. This +// means that a use-after-free defect (which includes double-frees) will be +// caught if the use occurs before the page is reused for another allocation. +// The crash report will contain stack traces for the allocation site, the free +// site, and the use-after-free site, which is often enough to diagnose the +// defect. +// +// Also, each PHC allocation is followed by a guard page. The PHC allocation is +// positioned so that its end abuts the guard page (or as close as possible, +// given alignment constraints). This means that a bounds violation at the end +// of the allocation (overflow) will be caught. The crash report will contain +// stack traces for the allocation site and the bounds violation use site, +// which is often enough to diagnose the defect. +// +// (A bounds violation at the start of the allocation (underflow) will not be +// caught, unless it is sufficiently large to hit the preceding allocation's +// guard page, which is not that likely. It would be possible to look more +// assiduously for underflow by randomly placing some allocations at the end of +// the page and some at the start of the page, and GWP-ASAN does this. PHC does +// not, however, because overflow is likely to be much more common than +// underflow in practice.) +// +// We use a simple heuristic to categorize a guard page access as overflow or +// underflow: if the address falls in the lower half of the guard page, we +// assume it is overflow, otherwise we assume it is underflow. More +// sophisticated heuristics are possible, but this one is very simple, and it is +// likely that most overflows/underflows in practice are very close to the page +// boundary. +// +// The design space for the randomization strategy is large. The current +// implementation has a large random delay before it starts operating, and a +// small random delay between each PHC allocation attempt. Each freed PHC +// allocation is quarantined for a medium random delay before being reused, in +// order to increase the chance of catching UAFs. +// +// The basic cost of PHC's operation is as follows. +// +// - The physical memory cost is 64 * 4 KiB = 256 KiB per process (assuming 4 +// KiB pages) plus some metadata (including stack traces) for each page. +// +// - The virtual memory cost is the physical memory cost plus the guard pages: +// another 64 * 4 KiB = 256 KiB per process. PHC is currently only enabled on +// 64-bit platforms so the impact of the virtual memory usage is negligible. +// +// - Every allocation requires a size check and a decrement-and-check of an +// atomic counter. When the counter reaches zero a PHC allocation can occur, +// which involves marking a page as accessible and getting a stack trace for +// the allocation site. Otherwise, mozjemalloc performs the allocation. +// +// - Every deallocation requires a range check on the pointer to see if it +// involves a PHC allocation. (The choice to only do PHC allocations that are +// a page or smaller enables this range check, because the 64 pages are +// contiguous. Allowing larger allocations would make this more complicated, +// and we definitely don't want something as slow as a hash table lookup on +// every deallocation.) PHC deallocations involve marking a page as +// inaccessible and getting a stack trace for the deallocation site. +// +// Note that calls to realloc(), free(), and malloc_usable_size() will +// immediately crash if the given pointer falls within a page allocation's +// page, but does not point to the start of the allocation itself. +// +// void* p = malloc(64); +// free(p + 1); // p+1 doesn't point to the allocation start; crash +// +// Such crashes will not have the PHC fields in the crash report. +// +// PHC-specific tests can be run with the following commands: +// - gtests: `./mach gtest '*PHC*'` +// - xpcshell-tests: `./mach test toolkit/crashreporter/test/unit` +// - This runs some non-PHC tests as well. + +#include "PHC.h" + +#include <stdlib.h> +#include <time.h> + +#include <algorithm> + +#ifdef XP_WIN +# include <process.h> +#else +# include <sys/mman.h> +# include <sys/types.h> +# include <pthread.h> +# include <unistd.h> +#endif + +#include "replace_malloc.h" +#include "FdPrintf.h" +#include "Mutex.h" +#include "mozilla/Assertions.h" +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/Maybe.h" +#include "mozilla/StackWalk.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/XorShift128PlusRNG.h" + +using namespace mozilla; + +//--------------------------------------------------------------------------- +// Utilities +//--------------------------------------------------------------------------- + +#ifdef ANDROID +// Android doesn't have pthread_atfork defined in pthread.h. +extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void), + void (*)(void)); +#endif + +#ifndef DISALLOW_COPY_AND_ASSIGN +# define DISALLOW_COPY_AND_ASSIGN(T) \ + T(const T&); \ + void operator=(const T&) +#endif + +static malloc_table_t sMallocTable; + +// This class provides infallible operations for the small number of heap +// allocations that PHC does for itself. It would be nice if we could use the +// InfallibleAllocPolicy from mozalloc, but PHC cannot use mozalloc. +class InfallibleAllocPolicy { + public: + static void AbortOnFailure(const void* aP) { + if (!aP) { + MOZ_CRASH("PHC failed to allocate"); + } + } + + template <class T> + static T* new_() { + void* p = sMallocTable.malloc(sizeof(T)); + AbortOnFailure(p); + return new (p) T; + } +}; + +//--------------------------------------------------------------------------- +// Stack traces +//--------------------------------------------------------------------------- + +// This code is similar to the equivalent code within DMD. + +class StackTrace : public phc::StackTrace { + public: + StackTrace() : phc::StackTrace() {} + + void Clear() { mLength = 0; } + + void Fill(); + + private: + static void StackWalkCallback(uint32_t aFrameNumber, void* aPc, void* aSp, + void* aClosure) { + StackTrace* st = (StackTrace*)aClosure; + MOZ_ASSERT(st->mLength < kMaxFrames); + st->mPcs[st->mLength] = aPc; + st->mLength++; + MOZ_ASSERT(st->mLength == aFrameNumber); + } +}; + +// WARNING WARNING WARNING: this function must only be called when GMut::sMutex +// is *not* locked, otherwise we might get deadlocks. +// +// How? On Windows, MozStackWalk() can lock a mutex, M, from the shared library +// loader. Another thread might call malloc() while holding M locked (when +// loading a shared library) and try to lock GMut::sMutex, causing a deadlock. +// So GMut::sMutex can't be locked during the call to MozStackWalk(). (For +// details, see https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8. On +// Linux, something similar can happen; see bug 824340. So we just disallow it +// on all platforms.) +// +// In DMD, to avoid this problem we temporarily unlock the equivalent mutex for +// the MozStackWalk() call. But that's grotty, and things are a bit different +// here, so we just require that stack traces be obtained before locking +// GMut::sMutex. +// +// Unfortunately, there is no reliable way at compile-time or run-time to ensure +// this pre-condition. Hence this large comment. +// +void StackTrace::Fill() { + mLength = 0; + +#if defined(XP_WIN) && defined(_M_IX86) + // This avoids MozStackWalk(), which causes unusably slow startup on Win32 + // when it is called during static initialization (see bug 1241684). + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Win32: Registers::SyncPopulate() for the + // frame pointer, and GetStackTop() for the stack end. + CONTEXT context; + RtlCaptureContext(&context); + void** fp = reinterpret_cast<void**>(context.Ebp); + + PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb()); + void* stackEnd = static_cast<void*>(pTib->StackBase); + FramePointerStackWalk(StackWalkCallback, /* aSkipFrames = */ 0, kMaxFrames, + this, fp, stackEnd); +#elif defined(XP_MACOSX) + // This avoids MozStackWalk(), which has become unusably slow on Mac due to + // changes in libunwind. + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Mac: Registers::SyncPopulate() for the frame + // pointer, and GetStackTop() for the stack end. + void** fp; + asm( + // Dereference %rbp to get previous %rbp + "movq (%%rbp), %0\n\t" + : "=r"(fp)); + void* stackEnd = pthread_get_stackaddr_np(pthread_self()); + FramePointerStackWalk(StackWalkCallback, /* skipFrames = */ 0, kMaxFrames, + this, fp, stackEnd); +#else + MozStackWalk(StackWalkCallback, /* aSkipFrames = */ 0, kMaxFrames, this); +#endif +} + +//--------------------------------------------------------------------------- +// Logging +//--------------------------------------------------------------------------- + +// Change this to 1 to enable some PHC logging. Useful for debugging. +#define PHC_LOGGING 0 + +#if PHC_LOGGING + +static size_t GetPid() { return size_t(getpid()); } + +static size_t GetTid() { +# if defined(XP_WIN) + return size_t(GetCurrentThreadId()); +# else + return size_t(pthread_self()); +# endif +} + +# if defined(XP_WIN) +# define LOG_STDERR \ + reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE)) +# else +# define LOG_STDERR 2 +# endif +# define LOG(fmt, ...) \ + FdPrintf(LOG_STDERR, "PHC[%zu,%zu,~%zu] " fmt, GetPid(), GetTid(), \ + size_t(GAtomic::Now()), __VA_ARGS__) + +#else + +# define LOG(fmt, ...) + +#endif // PHC_LOGGING + +//--------------------------------------------------------------------------- +// Global state +//--------------------------------------------------------------------------- + +// Throughout this entire file time is measured as the number of sub-page +// allocations performed (by PHC and mozjemalloc combined). `Time` is 64-bit +// because we could have more than 2**32 allocations in a long-running session. +// `Delay` is 32-bit because the delays used within PHC are always much smaller +// than 2**32. +using Time = uint64_t; // A moment in time. +using Delay = uint32_t; // A time duration. + +// PHC only runs if the page size is 4 KiB; anything more is uncommon and would +// use too much memory. So we hardwire this size. +static const size_t kPageSize = 4096; + +// There are two kinds of page. +// - Allocation pages, from which allocations are made. +// - Guard pages, which are never touched by PHC. +// +// These page kinds are interleaved; each allocation page has a guard page on +// either side. +static const size_t kNumAllocPages = 64; +static const size_t kNumAllPages = kNumAllocPages * 2 + 1; + +// The total size of the allocation pages and guard pages. +static const size_t kAllPagesSize = kNumAllPages * kPageSize; + +// The junk value used to fill new allocation in debug builds. It's same value +// as the one used by mozjemalloc. PHC applies it unconditionally in debug +// builds. Unlike mozjemalloc, PHC doesn't consult the MALLOC_OPTIONS +// environment variable to possibly change that behaviour. +// +// Also note that, unlike mozjemalloc, PHC doesn't have a poison value for freed +// allocations because freed allocations are protected by OS page protection. +const uint8_t kAllocJunk = 0xe4; + +// The maximum time. +static const Time kMaxTime = ~(Time(0)); + +// The average delay before doing any page allocations at the start of a +// process. Note that roughly 1 million allocations occur in the main process +// while starting the browser. The delay range is 1..kAvgFirstAllocDelay*2. +static const Delay kAvgFirstAllocDelay = 512 * 1024; + +// The average delay until the next attempted page allocation, once we get past +// the first delay. The delay range is 1..kAvgAllocDelay*2. +static const Delay kAvgAllocDelay = 16 * 1024; + +// The average delay before reusing a freed page. Should be significantly larger +// than kAvgAllocDelay, otherwise there's not much point in having it. The delay +// range is (kAvgAllocDelay / 2)..(kAvgAllocDelay / 2 * 3). This is different to +// the other delay ranges in not having a minimum of 1, because that's such a +// short delay that there is a high likelihood of bad stacks in any crash +// report. +static const Delay kAvgPageReuseDelay = 256 * 1024; + +// Truncate aRnd to the range (1 .. AvgDelay*2). If aRnd is random, this +// results in an average value of aAvgDelay + 0.5, which is close enough to +// aAvgDelay. aAvgDelay must be a power-of-two (otherwise it will crash) for +// speed. +template <Delay AvgDelay> +constexpr Delay Rnd64ToDelay(uint64_t aRnd) { + static_assert(IsPowerOfTwo(AvgDelay), "must be a power of two"); + + return aRnd % (AvgDelay * 2) + 1; +} + +// Maps a pointer to a PHC-specific structure: +// - Nothing +// - A guard page (it is unspecified which one) +// - An allocation page (with an index < kNumAllocPages) +// +// The standard way of handling a PtrKind is to check IsNothing(), and if that +// fails, to check IsGuardPage(), and if that fails, to call AllocPage(). +class PtrKind { + private: + enum class Tag : uint8_t { + Nothing, + GuardPage, + AllocPage, + }; + + Tag mTag; + uintptr_t mIndex; // Only used if mTag == Tag::AllocPage. + + public: + // Detect what a pointer points to. This constructor must be fast because it + // is called for every call to free(), realloc(), malloc_usable_size(), and + // jemalloc_ptr_info(). + PtrKind(const void* aPtr, const uint8_t* aPagesStart, + const uint8_t* aPagesLimit) { + if (!(aPagesStart <= aPtr && aPtr < aPagesLimit)) { + mTag = Tag::Nothing; + } else { + uintptr_t offset = static_cast<const uint8_t*>(aPtr) - aPagesStart; + uintptr_t allPageIndex = offset / kPageSize; + MOZ_ASSERT(allPageIndex < kNumAllPages); + if (allPageIndex & 1) { + // Odd-indexed pages are allocation pages. + uintptr_t allocPageIndex = allPageIndex / 2; + MOZ_ASSERT(allocPageIndex < kNumAllocPages); + mTag = Tag::AllocPage; + mIndex = allocPageIndex; + } else { + // Even-numbered pages are guard pages. + mTag = Tag::GuardPage; + } + } + } + + bool IsNothing() const { return mTag == Tag::Nothing; } + bool IsGuardPage() const { return mTag == Tag::GuardPage; } + + // This should only be called after IsNothing() and IsGuardPage() have been + // checked and failed. + uintptr_t AllocPageIndex() const { + MOZ_RELEASE_ASSERT(mTag == Tag::AllocPage); + return mIndex; + } +}; + +// Shared, atomic, mutable global state. +class GAtomic { + public: + static void Init(Delay aFirstDelay) { + sAllocDelay = aFirstDelay; + + LOG("Initial sAllocDelay <- %zu\n", size_t(aFirstDelay)); + } + + static Time Now() { return sNow; } + + static void IncrementNow() { sNow++; } + + // Decrements the delay and returns the decremented value. + static int32_t DecrementDelay() { return --sAllocDelay; } + + static void SetAllocDelay(Delay aAllocDelay) { sAllocDelay = aAllocDelay; } + + private: + // The current time. Relaxed semantics because it's primarily used for + // determining if an allocation can be recycled yet and therefore it doesn't + // need to be exact. + static Atomic<Time, Relaxed> sNow; + + // Delay until the next attempt at a page allocation. See the comment in + // MaybePageAlloc() for an explanation of why it is a signed integer, and why + // it uses ReleaseAcquire semantics. + static Atomic<Delay, ReleaseAcquire> sAllocDelay; +}; + +Atomic<Time, Relaxed> GAtomic::sNow; +Atomic<Delay, ReleaseAcquire> GAtomic::sAllocDelay; + +// Shared, immutable global state. Initialized by replace_init() and never +// changed after that. replace_init() runs early enough that no synchronization +// is needed. +class GConst { + private: + // The bounds of the allocated pages. + uint8_t* const mPagesStart; + uint8_t* const mPagesLimit; + + // Allocates the allocation pages and the guard pages, contiguously. + uint8_t* AllocAllPages() { + // Allocate the pages so that they are inaccessible. They are never freed, + // because it would happen at process termination when it would be of little + // use. + void* pages = +#ifdef XP_WIN + VirtualAlloc(nullptr, kAllPagesSize, MEM_RESERVE, PAGE_NOACCESS); +#else + mmap(nullptr, kAllPagesSize, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, + 0); +#endif + if (!pages) { + MOZ_CRASH(); + } + + return static_cast<uint8_t*>(pages); + } + + public: + GConst() + : mPagesStart(AllocAllPages()), mPagesLimit(mPagesStart + kAllPagesSize) { + LOG("AllocAllPages at %p..%p\n", mPagesStart, mPagesLimit); + } + + class PtrKind PtrKind(const void* aPtr) { + class PtrKind pk(aPtr, mPagesStart, mPagesLimit); + return pk; + } + + bool IsInFirstGuardPage(const void* aPtr) { + return mPagesStart <= aPtr && aPtr < mPagesStart + kPageSize; + } + + // Get the address of the allocation page referred to via an index. Used when + // marking the page as accessible/inaccessible. + uint8_t* AllocPagePtr(uintptr_t aIndex) { + MOZ_ASSERT(aIndex < kNumAllocPages); + // Multiply by two and add one to account for allocation pages *and* guard + // pages. + return mPagesStart + (2 * aIndex + 1) * kPageSize; + } +}; + +static GConst* gConst; + +// On MacOS, the first __thread/thread_local access calls malloc, which leads +// to an infinite loop. So we use pthread-based TLS instead, which somehow +// doesn't have this problem. +#if !defined(XP_DARWIN) +# define PHC_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) +#else +# define PHC_THREAD_LOCAL(T) \ + detail::ThreadLocal<T, detail::ThreadLocalKeyStorage> +#endif + +// Thread-local state. +class GTls { + GTls(const GTls&) = delete; + + const GTls& operator=(const GTls&) = delete; + + // When true, PHC does as little as possible. + // + // (a) It does not allocate any new page allocations. + // + // (b) It avoids doing any operations that might call malloc/free/etc., which + // would cause re-entry into PHC. (In practice, MozStackWalk() is the + // only such operation.) Note that calls to the functions in sMallocTable + // are ok. + // + // For example, replace_malloc() will just fall back to mozjemalloc. However, + // operations involving existing allocations are more complex, because those + // existing allocations may be page allocations. For example, if + // replace_free() is passed a page allocation on a PHC-disabled thread, it + // will free the page allocation in the usual way, but it will get a dummy + // freeStack in order to avoid calling MozStackWalk(), as per (b) above. + // + // This single disabling mechanism has two distinct uses. + // + // - It's used to prevent re-entry into PHC, which can cause correctness + // problems. For example, consider this sequence. + // + // 1. enter replace_free() + // 2. which calls PageFree() + // 3. which calls MozStackWalk() + // 4. which locks a mutex M, and then calls malloc + // 5. enter replace_malloc() + // 6. which calls MaybePageAlloc() + // 7. which calls MozStackWalk() + // 8. which (re)locks a mutex M --> deadlock + // + // We avoid this sequence by "disabling" the thread in PageFree() (at step + // 2), which causes MaybePageAlloc() to fail, avoiding the call to + // MozStackWalk() (at step 7). + // + // In practice, realloc or free of a PHC allocation is unlikely on a thread + // that is disabled because of this use: MozStackWalk() will probably only + // realloc/free allocations that it allocated itself, but those won't be + // page allocations because PHC is disabled before calling MozStackWalk(). + // + // (Note that MaybePageAlloc() could safely do a page allocation so long as + // it avoided calling MozStackWalk() by getting a dummy allocStack. But it + // wouldn't be useful, and it would prevent the second use below.) + // + // - It's used to prevent PHC allocations in some tests that rely on + // mozjemalloc's exact allocation behaviour, which PHC does not replicate + // exactly. (Note that (b) isn't necessary for this use -- MozStackWalk() + // could be safely called -- but it is necessary for the first use above.) + // + static PHC_THREAD_LOCAL(bool) tlsIsDisabled; + + public: + static void Init() { + if (!tlsIsDisabled.init()) { + MOZ_CRASH(); + } + } + + static void DisableOnCurrentThread() { + MOZ_ASSERT(!GTls::tlsIsDisabled.get()); + tlsIsDisabled.set(true); + } + + static void EnableOnCurrentThread() { + MOZ_ASSERT(GTls::tlsIsDisabled.get()); + tlsIsDisabled.set(false); + } + + static bool IsDisabledOnCurrentThread() { return tlsIsDisabled.get(); } +}; + +PHC_THREAD_LOCAL(bool) GTls::tlsIsDisabled; + +class AutoDisableOnCurrentThread { + AutoDisableOnCurrentThread(const AutoDisableOnCurrentThread&) = delete; + + const AutoDisableOnCurrentThread& operator=( + const AutoDisableOnCurrentThread&) = delete; + + public: + explicit AutoDisableOnCurrentThread() { GTls::DisableOnCurrentThread(); } + ~AutoDisableOnCurrentThread() { GTls::EnableOnCurrentThread(); } +}; + +// This type is used as a proof-of-lock token, to make it clear which functions +// require sMutex to be locked. +using GMutLock = const MutexAutoLock&; + +// Shared, mutable global state. Protected by sMutex; all accessing functions +// take a GMutLock as proof that sMutex is held. +class GMut { + enum class AllocPageState { + NeverAllocated = 0, + InUse = 1, + Freed = 2, + }; + + // Metadata for each allocation page. + class AllocPageInfo { + public: + AllocPageInfo() + : mState(AllocPageState::NeverAllocated), + mArenaId(), + mBaseAddr(nullptr), + mAllocStack(), + mFreeStack(), + mReuseTime(0) {} + + // The current allocation page state. + AllocPageState mState; + + // The arena that the allocation is nominally from. This isn't meaningful + // within PHC, which has no arenas. But it is necessary for reallocation of + // page allocations as normal allocations, such as in this code: + // + // p = moz_arena_malloc(arenaId, 4096); + // realloc(p, 8192); + // + // The realloc is more than one page, and thus too large for PHC to handle. + // Therefore, if PHC handles the first allocation, it must ask mozjemalloc + // to allocate the 8192 bytes in the correct arena, and to do that, it must + // call sMallocTable.moz_arena_malloc with the correct arenaId under the + // covers. Therefore it must record that arenaId. + // + // This field is also needed for jemalloc_ptr_info() to work, because it + // also returns the arena ID (but only in debug builds). + // + // - NeverAllocated: must be 0. + // - InUse | Freed: can be any valid arena ID value. + Maybe<arena_id_t> mArenaId; + + // The starting address of the allocation. Will not be the same as the page + // address unless the allocation is a full page. + // - NeverAllocated: must be 0. + // - InUse | Freed: must be within the allocation page. + uint8_t* mBaseAddr; + + // Usable size is computed as the number of bytes between the pointer and + // the end of the allocation page. This might be bigger than the requested + // size, especially if an outsized alignment is requested. + size_t UsableSize() const { + return mState == AllocPageState::NeverAllocated + ? 0 + : kPageSize - (reinterpret_cast<uintptr_t>(mBaseAddr) & + (kPageSize - 1)); + } + + // The allocation stack. + // - NeverAllocated: Nothing. + // - InUse | Freed: Some. + Maybe<StackTrace> mAllocStack; + + // The free stack. + // - NeverAllocated | InUse: Nothing. + // - Freed: Some. + Maybe<StackTrace> mFreeStack; + + // The time at which the page is available for reuse, as measured against + // GAtomic::sNow. When the page is in use this value will be kMaxTime. + // - NeverAllocated: must be 0. + // - InUse: must be kMaxTime. + // - Freed: must be > 0 and < kMaxTime. + Time mReuseTime; + }; + + public: + // The mutex that protects the other members. + static Mutex sMutex; + + GMut() + : mRNG(RandomSeed<0>(), RandomSeed<1>()), + mAllocPages(), + mNumPageAllocs(0), + mPageAllocHits(0), + mPageAllocMisses(0) { + sMutex.Init(); + } + + uint64_t Random64(GMutLock) { return mRNG.next(); } + + bool IsPageInUse(GMutLock, uintptr_t aIndex) { + return mAllocPages[aIndex].mState == AllocPageState::InUse; + } + + // Is the page free? And if so, has enough time passed that we can use it? + bool IsPageAllocatable(GMutLock, uintptr_t aIndex, Time aNow) { + const AllocPageInfo& page = mAllocPages[aIndex]; + return page.mState != AllocPageState::InUse && aNow >= page.mReuseTime; + } + + Maybe<arena_id_t> PageArena(GMutLock aLock, uintptr_t aIndex) { + const AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageInUse(aLock, page); + + return page.mArenaId; + } + + size_t PageUsableSize(GMutLock aLock, uintptr_t aIndex) { + const AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageInUse(aLock, page); + + return page.UsableSize(); + } + + void SetPageInUse(GMutLock aLock, uintptr_t aIndex, + const Maybe<arena_id_t>& aArenaId, uint8_t* aBaseAddr, + const StackTrace& aAllocStack) { + AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageNotInUse(aLock, page); + + page.mState = AllocPageState::InUse; + page.mArenaId = aArenaId; + page.mBaseAddr = aBaseAddr; + page.mAllocStack = Some(aAllocStack); + page.mFreeStack = Nothing(); + page.mReuseTime = kMaxTime; + + mNumPageAllocs++; + MOZ_RELEASE_ASSERT(mNumPageAllocs <= kNumAllocPages); + } + + void ResizePageInUse(GMutLock aLock, uintptr_t aIndex, + const Maybe<arena_id_t>& aArenaId, uint8_t* aNewBaseAddr, + const StackTrace& aAllocStack) { + AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageInUse(aLock, page); + + // page.mState is not changed. + if (aArenaId.isSome()) { + // Crash if the arenas don't match. + MOZ_RELEASE_ASSERT(page.mArenaId == aArenaId); + } + page.mBaseAddr = aNewBaseAddr; + // We could just keep the original alloc stack, but the realloc stack is + // more recent and therefore seems more useful. + page.mAllocStack = Some(aAllocStack); + // page.mFreeStack is not changed. + // page.mReuseTime is not changed. + }; + + void SetPageFreed(GMutLock aLock, uintptr_t aIndex, + const Maybe<arena_id_t>& aArenaId, + const StackTrace& aFreeStack, Delay aReuseDelay) { + AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageInUse(aLock, page); + + page.mState = AllocPageState::Freed; + + // page.mArenaId is left unchanged, for jemalloc_ptr_info() calls that + // occur after freeing (e.g. in the PtrInfo test in TestJemalloc.cpp). + if (aArenaId.isSome()) { + // Crash if the arenas don't match. + MOZ_RELEASE_ASSERT(page.mArenaId == aArenaId); + } + + // page.musableSize is left unchanged, for reporting on UAF, and for + // jemalloc_ptr_info() calls that occur after freeing (e.g. in the PtrInfo + // test in TestJemalloc.cpp). + + // page.mAllocStack is left unchanged, for reporting on UAF. + + page.mFreeStack = Some(aFreeStack); + page.mReuseTime = GAtomic::Now() + aReuseDelay; + + MOZ_RELEASE_ASSERT(mNumPageAllocs > 0); + mNumPageAllocs--; + } + + static void CrashOnGuardPage(void* aPtr) { + // An operation on a guard page? This is a bounds violation. Deliberately + // touch the page in question, to cause a crash that triggers the usual PHC + // machinery. + LOG("CrashOnGuardPage(%p), bounds violation\n", aPtr); + *static_cast<uint8_t*>(aPtr) = 0; + MOZ_CRASH("unreachable"); + } + + void EnsureValidAndInUse(GMutLock, void* aPtr, uintptr_t aIndex) { + const AllocPageInfo& page = mAllocPages[aIndex]; + + // The pointer must point to the start of the allocation. + MOZ_RELEASE_ASSERT(page.mBaseAddr == aPtr); + + if (page.mState == AllocPageState::Freed) { + // An operation on a freed page? This is a particular kind of + // use-after-free. Deliberately touch the page in question, in order to + // cause a crash that triggers the usual PHC machinery. But unlock sMutex + // first, because that self-same PHC machinery needs to re-lock it, and + // the crash causes non-local control flow so sMutex won't be unlocked + // the normal way in the caller. + LOG("EnsureValidAndInUse(%p), use-after-free\n", aPtr); + sMutex.Unlock(); + *static_cast<uint8_t*>(aPtr) = 0; + MOZ_CRASH("unreachable"); + } + } + + void FillAddrInfo(GMutLock, uintptr_t aIndex, const void* aBaseAddr, + bool isGuardPage, phc::AddrInfo& aOut) { + const AllocPageInfo& page = mAllocPages[aIndex]; + if (isGuardPage) { + aOut.mKind = phc::AddrInfo::Kind::GuardPage; + } else { + switch (page.mState) { + case AllocPageState::NeverAllocated: + aOut.mKind = phc::AddrInfo::Kind::NeverAllocatedPage; + break; + + case AllocPageState::InUse: + aOut.mKind = phc::AddrInfo::Kind::InUsePage; + break; + + case AllocPageState::Freed: + aOut.mKind = phc::AddrInfo::Kind::FreedPage; + break; + + default: + MOZ_CRASH(); + } + } + aOut.mBaseAddr = page.mBaseAddr; + aOut.mUsableSize = page.UsableSize(); + aOut.mAllocStack = page.mAllocStack; + aOut.mFreeStack = page.mFreeStack; + } + + void FillJemallocPtrInfo(GMutLock, const void* aPtr, uintptr_t aIndex, + jemalloc_ptr_info_t* aInfo) { + const AllocPageInfo& page = mAllocPages[aIndex]; + switch (page.mState) { + case AllocPageState::NeverAllocated: + break; + + case AllocPageState::InUse: { + // Only return TagLiveAlloc if the pointer is within the bounds of the + // allocation's usable size. + uint8_t* base = page.mBaseAddr; + uint8_t* limit = base + page.UsableSize(); + if (base <= aPtr && aPtr < limit) { + *aInfo = {TagLiveAlloc, page.mBaseAddr, page.UsableSize(), + page.mArenaId.valueOr(0)}; + return; + } + break; + } + + case AllocPageState::Freed: { + // Only return TagFreedAlloc if the pointer is within the bounds of the + // former allocation's usable size. + uint8_t* base = page.mBaseAddr; + uint8_t* limit = base + page.UsableSize(); + if (base <= aPtr && aPtr < limit) { + *aInfo = {TagFreedAlloc, page.mBaseAddr, page.UsableSize(), + page.mArenaId.valueOr(0)}; + return; + } + break; + } + + default: + MOZ_CRASH(); + } + + // Pointers into guard pages will end up here, as will pointers into + // allocation pages that aren't within the allocation's bounds. + *aInfo = {TagUnknown, nullptr, 0, 0}; + } + + static void prefork() { sMutex.Lock(); } + static void postfork() { sMutex.Unlock(); } + + void IncPageAllocHits(GMutLock) { mPageAllocHits++; } + void IncPageAllocMisses(GMutLock) { mPageAllocMisses++; } + + size_t NumPageAllocs(GMutLock) { return mNumPageAllocs; } + + size_t PageAllocHits(GMutLock) { return mPageAllocHits; } + size_t PageAllocAttempts(GMutLock) { + return mPageAllocHits + mPageAllocMisses; + } + + // This is an integer because FdPrintf only supports integer printing. + size_t PageAllocHitRate(GMutLock) { + return mPageAllocHits * 100 / (mPageAllocHits + mPageAllocMisses); + } + + private: + template <int N> + uint64_t RandomSeed() { + // An older version of this code used RandomUint64() here, but on Mac that + // function uses arc4random(), which can allocate, which would cause + // re-entry, which would be bad. So we just use time() and a local variable + // address. These are mediocre sources of entropy, but good enough for PHC. + static_assert(N == 0 || N == 1, "must be 0 or 1"); + uint64_t seed; + if (N == 0) { + time_t t = time(nullptr); + seed = t ^ (t << 32); + } else { + seed = uintptr_t(&seed) ^ (uintptr_t(&seed) << 32); + } + return seed; + } + + void AssertAllocPageInUse(GMutLock, const AllocPageInfo& aPage) { + MOZ_ASSERT(aPage.mState == AllocPageState::InUse); + // There is nothing to assert about aPage.mArenaId. + MOZ_ASSERT(aPage.mBaseAddr); + MOZ_ASSERT(aPage.UsableSize() > 0); + MOZ_ASSERT(aPage.mAllocStack.isSome()); + MOZ_ASSERT(aPage.mFreeStack.isNothing()); + MOZ_ASSERT(aPage.mReuseTime == kMaxTime); + } + + void AssertAllocPageNotInUse(GMutLock, const AllocPageInfo& aPage) { + // We can assert a lot about `NeverAllocated` pages, but not much about + // `Freed` pages. +#ifdef DEBUG + bool isFresh = aPage.mState == AllocPageState::NeverAllocated; + MOZ_ASSERT(isFresh || aPage.mState == AllocPageState::Freed); + MOZ_ASSERT_IF(isFresh, aPage.mArenaId == Nothing()); + MOZ_ASSERT(isFresh == (aPage.mBaseAddr == nullptr)); + MOZ_ASSERT(isFresh == (aPage.mAllocStack.isNothing())); + MOZ_ASSERT(isFresh == (aPage.mFreeStack.isNothing())); + MOZ_ASSERT(aPage.mReuseTime != kMaxTime); +#endif + } + + // RNG for deciding which allocations to treat specially. It doesn't need to + // be high quality. + // + // This is a raw pointer for the reason explained in the comment above + // GMut's constructor. Don't change it to UniquePtr or anything like that. + non_crypto::XorShift128PlusRNG mRNG; + + AllocPageInfo mAllocPages[kNumAllocPages]; + + // How many page allocs are currently in use (the max is kNumAllocPages). + size_t mNumPageAllocs; + + // How many allocations that could have been page allocs actually were? As + // constrained kNumAllocPages. If the hit ratio isn't close to 100% it's + // likely that the global constants are poorly chosen. + size_t mPageAllocHits; + size_t mPageAllocMisses; +}; + +Mutex GMut::sMutex; + +static GMut* gMut; + +//--------------------------------------------------------------------------- +// Page allocation operations +//--------------------------------------------------------------------------- + +// Attempt a page allocation if the time and the size are right. Allocated +// memory is zeroed if aZero is true. On failure, the caller should attempt a +// normal allocation via sMallocTable. Can be called in a context where +// GMut::sMutex is locked. +static void* MaybePageAlloc(const Maybe<arena_id_t>& aArenaId, size_t aReqSize, + size_t aAlignment, bool aZero) { + MOZ_ASSERT(IsPowerOfTwo(aAlignment)); + + if (aReqSize > kPageSize) { + return nullptr; + } + + GAtomic::IncrementNow(); + + // Decrement the delay. If it's zero, we do a page allocation and reset the + // delay to a random number. Because the assignment to the random number isn't + // atomic w.r.t. the decrement, we might have a sequence like this: + // + // Thread 1 Thread 2 Thread 3 + // -------- -------- -------- + // (a) newDelay = --sAllocDelay (-> 0) + // (b) --sAllocDelay (-> -1) + // (c) (newDelay != 0) fails + // (d) --sAllocDelay (-> -2) + // (e) sAllocDelay = new_random_number() + // + // It's critical that sAllocDelay has ReleaseAcquire semantics, because that + // guarantees that exactly one thread will see sAllocDelay have the value 0. + // (Relaxed semantics wouldn't guarantee that.) + // + // It's also nice that sAllocDelay is signed, given that we can decrement to + // below zero. (Strictly speaking, an unsigned integer would also work due + // to wrapping, but a signed integer is conceptually cleaner.) + // + // Finally, note that the decrements that occur between (a) and (e) above are + // effectively ignored, because (e) clobbers them. This shouldn't be a + // problem; it effectively just adds a little more randomness to + // new_random_number(). An early version of this code tried to account for + // these decrements by doing `sAllocDelay += new_random_number()`. However, if + // new_random_value() is small, the number of decrements between (a) and (e) + // can easily exceed it, whereupon sAllocDelay ends up negative after + // `sAllocDelay += new_random_number()`, and the zero-check never succeeds + // again. (At least, not until sAllocDelay wraps around on overflow, which + // would take a very long time indeed.) + // + int32_t newDelay = GAtomic::DecrementDelay(); + if (newDelay != 0) { + return nullptr; + } + + if (GTls::IsDisabledOnCurrentThread()) { + return nullptr; + } + + // Disable on this thread *before* getting the stack trace. + AutoDisableOnCurrentThread disable; + + // Get the stack trace *before* locking the mutex. If we return nullptr then + // it was a waste, but it's not so frequent, and doing a stack walk while + // the mutex is locked is problematic (see the big comment on + // StackTrace::Fill() for details). + StackTrace allocStack; + allocStack.Fill(); + + MutexAutoLock lock(GMut::sMutex); + + Time now = GAtomic::Now(); + Delay newAllocDelay = Rnd64ToDelay<kAvgAllocDelay>(gMut->Random64(lock)); + + // We start at a random page alloc and wrap around, to ensure pages get even + // amounts of use. + uint8_t* ptr = nullptr; + uint8_t* pagePtr = nullptr; + for (uintptr_t n = 0, i = size_t(gMut->Random64(lock)) % kNumAllocPages; + n < kNumAllocPages; n++, i = (i + 1) % kNumAllocPages) { + if (!gMut->IsPageAllocatable(lock, i, now)) { + continue; + } + + pagePtr = gConst->AllocPagePtr(i); + MOZ_ASSERT(pagePtr); + bool ok = +#ifdef XP_WIN + !!VirtualAlloc(pagePtr, kPageSize, MEM_COMMIT, PAGE_READWRITE); +#else + mprotect(pagePtr, kPageSize, PROT_READ | PROT_WRITE) == 0; +#endif + size_t usableSize = sMallocTable.malloc_good_size(aReqSize); + if (ok) { + MOZ_ASSERT(usableSize > 0); + + // Put the allocation as close to the end of the page as possible, + // allowing for alignment requirements. + ptr = pagePtr + kPageSize - usableSize; + if (aAlignment != 1) { + ptr = reinterpret_cast<uint8_t*>( + (reinterpret_cast<uintptr_t>(ptr) & ~(aAlignment - 1))); + } + + gMut->SetPageInUse(lock, i, aArenaId, ptr, allocStack); + + if (aZero) { + memset(ptr, 0, usableSize); + } else { +#ifdef DEBUG + memset(ptr, kAllocJunk, usableSize); +#endif + } + } + + gMut->IncPageAllocHits(lock); + LOG("PageAlloc(%zu, %zu) -> %p[%zu]/%p (%zu) (z%zu), sAllocDelay <- %zu, " + "fullness %zu/%zu, hits %zu/%zu (%zu%%)\n", + aReqSize, aAlignment, pagePtr, i, ptr, usableSize, size_t(aZero), + size_t(newAllocDelay), gMut->NumPageAllocs(lock), kNumAllocPages, + gMut->PageAllocHits(lock), gMut->PageAllocAttempts(lock), + gMut->PageAllocHitRate(lock)); + break; + } + + if (!pagePtr) { + // No pages are available, or VirtualAlloc/mprotect failed. + gMut->IncPageAllocMisses(lock); + LOG("No PageAlloc(%zu, %zu), sAllocDelay <- %zu, fullness %zu/%zu, hits " + "%zu/%zu " + "(%zu%%)\n", + aReqSize, aAlignment, size_t(newAllocDelay), gMut->NumPageAllocs(lock), + kNumAllocPages, gMut->PageAllocHits(lock), + gMut->PageAllocAttempts(lock), gMut->PageAllocHitRate(lock)); + } + + // Set the new alloc delay. + GAtomic::SetAllocDelay(newAllocDelay); + + return ptr; +} + +static void FreePage(GMutLock aLock, uintptr_t aIndex, + const Maybe<arena_id_t>& aArenaId, + const StackTrace& aFreeStack, Delay aReuseDelay) { + void* pagePtr = gConst->AllocPagePtr(aIndex); +#ifdef XP_WIN + if (!VirtualFree(pagePtr, kPageSize, MEM_DECOMMIT)) { + return; + } +#else + if (!mmap(pagePtr, kPageSize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, + -1, 0)) { + return; + } +#endif + + gMut->SetPageFreed(aLock, aIndex, aArenaId, aFreeStack, aReuseDelay); +} + +//--------------------------------------------------------------------------- +// replace-malloc machinery +//--------------------------------------------------------------------------- + +// This handles malloc, moz_arena_malloc, and realloc-with-a-nullptr. +MOZ_ALWAYS_INLINE static void* PageMalloc(const Maybe<arena_id_t>& aArenaId, + size_t aReqSize) { + void* ptr = MaybePageAlloc(aArenaId, aReqSize, /* aAlignment */ 1, + /* aZero */ false); + return ptr ? ptr + : (aArenaId.isSome() + ? sMallocTable.moz_arena_malloc(*aArenaId, aReqSize) + : sMallocTable.malloc(aReqSize)); +} + +static void* replace_malloc(size_t aReqSize) { + return PageMalloc(Nothing(), aReqSize); +} + +static Delay ReuseDelay(GMutLock aLock) { + return (kAvgPageReuseDelay / 2) + + Rnd64ToDelay<kAvgPageReuseDelay / 2>(gMut->Random64(aLock)); +} + +// This handles both calloc and moz_arena_calloc. +MOZ_ALWAYS_INLINE static void* PageCalloc(const Maybe<arena_id_t>& aArenaId, + size_t aNum, size_t aReqSize) { + CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aNum) * aReqSize; + if (!checkedSize.isValid()) { + return nullptr; + } + + void* ptr = MaybePageAlloc(aArenaId, checkedSize.value(), /* aAlignment */ 1, + /* aZero */ true); + return ptr ? ptr + : (aArenaId.isSome() + ? sMallocTable.moz_arena_calloc(*aArenaId, aNum, aReqSize) + : sMallocTable.calloc(aNum, aReqSize)); +} + +static void* replace_calloc(size_t aNum, size_t aReqSize) { + return PageCalloc(Nothing(), aNum, aReqSize); +} + +// This function handles both realloc and moz_arena_realloc. +// +// As always, realloc is complicated, and doubly so when there are two +// different kinds of allocations in play. Here are the possible transitions, +// and what we do in practice. +// +// - normal-to-normal: This is straightforward and obviously necessary. +// +// - normal-to-page: This is disallowed because it would require getting the +// arenaId of the normal allocation, which isn't possible in non-DEBUG builds +// for security reasons. +// +// - page-to-page: This is done whenever possible, i.e. whenever the new size +// is less than or equal to 4 KiB. This choice counterbalances the +// disallowing of normal-to-page allocations, in order to avoid biasing +// towards or away from page allocations. It always occurs in-place. +// +// - page-to-normal: this is done only when necessary, i.e. only when the new +// size is greater than 4 KiB. This choice naturally flows from the +// prior choice on page-to-page transitions. +// +// In summary: realloc doesn't change the allocation kind unless it must. +// +MOZ_ALWAYS_INLINE static void* PageRealloc(const Maybe<arena_id_t>& aArenaId, + void* aOldPtr, size_t aNewSize) { + if (!aOldPtr) { + // Null pointer. Treat like malloc(aNewSize). + return PageMalloc(aArenaId, aNewSize); + } + + PtrKind pk = gConst->PtrKind(aOldPtr); + if (pk.IsNothing()) { + // A normal-to-normal transition. + return aArenaId.isSome() + ? sMallocTable.moz_arena_realloc(*aArenaId, aOldPtr, aNewSize) + : sMallocTable.realloc(aOldPtr, aNewSize); + } + + if (pk.IsGuardPage()) { + GMut::CrashOnGuardPage(aOldPtr); + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + // A page-to-something transition. + + // Note that `disable` has no effect unless it is emplaced below. + Maybe<AutoDisableOnCurrentThread> disable; + // Get the stack trace *before* locking the mutex. + StackTrace stack; + if (GTls::IsDisabledOnCurrentThread()) { + // PHC is disabled on this thread. Leave the stack empty. + } else { + // Disable on this thread *before* getting the stack trace. + disable.emplace(); + stack.Fill(); + } + + MutexAutoLock lock(GMut::sMutex); + + // Check for realloc() of a freed block. + gMut->EnsureValidAndInUse(lock, aOldPtr, index); + + if (aNewSize <= kPageSize) { + // A page-to-page transition. Just keep using the page allocation. We do + // this even if the thread is disabled, because it doesn't create a new + // page allocation. Note that ResizePageInUse() checks aArenaId. + // + // Move the bytes with memmove(), because the old allocation and the new + // allocation overlap. Move the usable size rather than the requested size, + // because the user might have used malloc_usable_size() and filled up the + // usable size. + size_t oldUsableSize = gMut->PageUsableSize(lock, index); + size_t newUsableSize = sMallocTable.malloc_good_size(aNewSize); + uint8_t* pagePtr = gConst->AllocPagePtr(index); + uint8_t* newPtr = pagePtr + kPageSize - newUsableSize; + memmove(newPtr, aOldPtr, std::min(oldUsableSize, aNewSize)); + gMut->ResizePageInUse(lock, index, aArenaId, newPtr, stack); + LOG("PageRealloc-Reuse(%p, %zu) -> %p\n", aOldPtr, aNewSize, newPtr); + return newPtr; + } + + // A page-to-normal transition (with the new size greater than page-sized). + // (Note that aArenaId is checked below.) + void* newPtr; + if (aArenaId.isSome()) { + newPtr = sMallocTable.moz_arena_malloc(*aArenaId, aNewSize); + } else { + Maybe<arena_id_t> oldArenaId = gMut->PageArena(lock, index); + newPtr = (oldArenaId.isSome() + ? sMallocTable.moz_arena_malloc(*oldArenaId, aNewSize) + : sMallocTable.malloc(aNewSize)); + } + if (!newPtr) { + return nullptr; + } + + MOZ_ASSERT(aNewSize > kPageSize); + + Delay reuseDelay = ReuseDelay(lock); + + // Copy the usable size rather than the requested size, because the user + // might have used malloc_usable_size() and filled up the usable size. Note + // that FreePage() checks aArenaId (via SetPageFreed()). + size_t oldUsableSize = gMut->PageUsableSize(lock, index); + memcpy(newPtr, aOldPtr, std::min(oldUsableSize, aNewSize)); + FreePage(lock, index, aArenaId, stack, reuseDelay); + LOG("PageRealloc-Free(%p[%zu], %zu) -> %p, %zu delay, reuse at ~%zu\n", + aOldPtr, index, aNewSize, newPtr, size_t(reuseDelay), + size_t(GAtomic::Now()) + reuseDelay); + + return newPtr; +} + +static void* replace_realloc(void* aOldPtr, size_t aNewSize) { + return PageRealloc(Nothing(), aOldPtr, aNewSize); +} + +// This handles both free and moz_arena_free. +MOZ_ALWAYS_INLINE static void PageFree(const Maybe<arena_id_t>& aArenaId, + void* aPtr) { + PtrKind pk = gConst->PtrKind(aPtr); + if (pk.IsNothing()) { + // Not a page allocation. + return aArenaId.isSome() ? sMallocTable.moz_arena_free(*aArenaId, aPtr) + : sMallocTable.free(aPtr); + } + + if (pk.IsGuardPage()) { + GMut::CrashOnGuardPage(aPtr); + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + // Note that `disable` has no effect unless it is emplaced below. + Maybe<AutoDisableOnCurrentThread> disable; + // Get the stack trace *before* locking the mutex. + StackTrace freeStack; + if (GTls::IsDisabledOnCurrentThread()) { + // PHC is disabled on this thread. Leave the stack empty. + } else { + // Disable on this thread *before* getting the stack trace. + disable.emplace(); + freeStack.Fill(); + } + + MutexAutoLock lock(GMut::sMutex); + + // Check for a double-free. + gMut->EnsureValidAndInUse(lock, aPtr, index); + + // Note that FreePage() checks aArenaId (via SetPageFreed()). + Delay reuseDelay = ReuseDelay(lock); + FreePage(lock, index, aArenaId, freeStack, reuseDelay); + + LOG("PageFree(%p[%zu]), %zu delay, reuse at ~%zu, fullness %zu/%zu\n", aPtr, + index, size_t(reuseDelay), size_t(GAtomic::Now()) + reuseDelay, + gMut->NumPageAllocs(lock), kNumAllocPages); +} + +static void replace_free(void* aPtr) { return PageFree(Nothing(), aPtr); } + +// This handles memalign and moz_arena_memalign. +MOZ_ALWAYS_INLINE static void* PageMemalign(const Maybe<arena_id_t>& aArenaId, + size_t aAlignment, + size_t aReqSize) { + MOZ_RELEASE_ASSERT(IsPowerOfTwo(aAlignment)); + + // PHC can't satisfy an alignment greater than a page size, so fall back to + // mozjemalloc in that case. + void* ptr = nullptr; + if (aAlignment <= kPageSize) { + ptr = MaybePageAlloc(aArenaId, aReqSize, aAlignment, /* aZero */ false); + } + return ptr ? ptr + : (aArenaId.isSome() + ? sMallocTable.moz_arena_memalign(*aArenaId, aAlignment, + aReqSize) + : sMallocTable.memalign(aAlignment, aReqSize)); +} + +static void* replace_memalign(size_t aAlignment, size_t aReqSize) { + return PageMemalign(Nothing(), aAlignment, aReqSize); +} + +static size_t replace_malloc_usable_size(usable_ptr_t aPtr) { + PtrKind pk = gConst->PtrKind(aPtr); + if (pk.IsNothing()) { + // Not a page allocation. Measure it normally. + return sMallocTable.malloc_usable_size(aPtr); + } + + if (pk.IsGuardPage()) { + GMut::CrashOnGuardPage(const_cast<void*>(aPtr)); + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + MutexAutoLock lock(GMut::sMutex); + + // Check for malloc_usable_size() of a freed block. + gMut->EnsureValidAndInUse(lock, const_cast<void*>(aPtr), index); + + return gMut->PageUsableSize(lock, index); +} + +void replace_jemalloc_stats(jemalloc_stats_t* aStats, + jemalloc_bin_stats_t* aBinStats) { + sMallocTable.jemalloc_stats_internal(aStats, aBinStats); + + // Add all the pages to `mapped`. + size_t mapped = kAllPagesSize; + aStats->mapped += mapped; + + size_t allocated = 0; + { + MutexAutoLock lock(GMut::sMutex); + + // Add usable space of in-use allocations to `allocated`. + for (size_t i = 0; i < kNumAllocPages; i++) { + if (gMut->IsPageInUse(lock, i)) { + allocated += gMut->PageUsableSize(lock, i); + } + } + } + aStats->allocated += allocated; + + // Waste is the gap between `allocated` and `mapped`. + size_t waste = mapped - allocated; + aStats->waste += waste; + + // aStats.page_cache and aStats.bin_unused are left unchanged because PHC + // doesn't have anything corresponding to those. + + // gConst and gMut are normal heap allocations, so they're measured by + // mozjemalloc as `allocated`. Move them into `bookkeeping`. + size_t bookkeeping = sMallocTable.malloc_usable_size(gConst) + + sMallocTable.malloc_usable_size(gMut); + aStats->allocated -= bookkeeping; + aStats->bookkeeping += bookkeeping; +} + +void replace_jemalloc_ptr_info(const void* aPtr, jemalloc_ptr_info_t* aInfo) { + // We need to implement this properly, because various code locations do + // things like checking that allocations are in the expected arena. + PtrKind pk = gConst->PtrKind(aPtr); + if (pk.IsNothing()) { + // Not a page allocation. + return sMallocTable.jemalloc_ptr_info(aPtr, aInfo); + } + + if (pk.IsGuardPage()) { + // Treat a guard page as unknown because there's no better alternative. + *aInfo = {TagUnknown, nullptr, 0, 0}; + return; + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + MutexAutoLock lock(GMut::sMutex); + + gMut->FillJemallocPtrInfo(lock, aPtr, index, aInfo); +#if DEBUG + LOG("JemallocPtrInfo(%p[%zu]) -> {%zu, %p, %zu, %zu}\n", aPtr, index, + size_t(aInfo->tag), aInfo->addr, aInfo->size, aInfo->arenaId); +#else + LOG("JemallocPtrInfo(%p[%zu]) -> {%zu, %p, %zu}\n", aPtr, index, + size_t(aInfo->tag), aInfo->addr, aInfo->size); +#endif +} + +arena_id_t replace_moz_create_arena_with_params(arena_params_t* aParams) { + // No need to do anything special here. + return sMallocTable.moz_create_arena_with_params(aParams); +} + +void replace_moz_dispose_arena(arena_id_t aArenaId) { + // No need to do anything special here. + return sMallocTable.moz_dispose_arena(aArenaId); +} + +void* replace_moz_arena_malloc(arena_id_t aArenaId, size_t aReqSize) { + return PageMalloc(Some(aArenaId), aReqSize); +} + +void* replace_moz_arena_calloc(arena_id_t aArenaId, size_t aNum, + size_t aReqSize) { + return PageCalloc(Some(aArenaId), aNum, aReqSize); +} + +void* replace_moz_arena_realloc(arena_id_t aArenaId, void* aOldPtr, + size_t aNewSize) { + return PageRealloc(Some(aArenaId), aOldPtr, aNewSize); +} + +void replace_moz_arena_free(arena_id_t aArenaId, void* aPtr) { + return PageFree(Some(aArenaId), aPtr); +} + +void* replace_moz_arena_memalign(arena_id_t aArenaId, size_t aAlignment, + size_t aReqSize) { + return PageMemalign(Some(aArenaId), aAlignment, aReqSize); +} + +class PHCBridge : public ReplaceMallocBridge { + virtual bool IsPHCAllocation(const void* aPtr, phc::AddrInfo* aOut) override { + PtrKind pk = gConst->PtrKind(aPtr); + if (pk.IsNothing()) { + return false; + } + + bool isGuardPage = false; + if (pk.IsGuardPage()) { + if ((uintptr_t(aPtr) % kPageSize) < (kPageSize / 2)) { + // The address is in the lower half of a guard page, so it's probably an + // overflow. But first check that it is not on the very first guard + // page, in which case it cannot be an overflow, and we ignore it. + if (gConst->IsInFirstGuardPage(aPtr)) { + return false; + } + + // Get the allocation page preceding this guard page. + pk = gConst->PtrKind(static_cast<const uint8_t*>(aPtr) - kPageSize); + + } else { + // The address is in the upper half of a guard page, so it's probably an + // underflow. Get the allocation page following this guard page. + pk = gConst->PtrKind(static_cast<const uint8_t*>(aPtr) + kPageSize); + } + + // Make a note of the fact that we hit a guard page. + isGuardPage = true; + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + if (aOut) { + MutexAutoLock lock(GMut::sMutex); + gMut->FillAddrInfo(lock, index, aPtr, isGuardPage, *aOut); + LOG("IsPHCAllocation: %zu, %p, %zu, %zu, %zu\n", size_t(aOut->mKind), + aOut->mBaseAddr, aOut->mUsableSize, + aOut->mAllocStack.isSome() ? aOut->mAllocStack->mLength : 0, + aOut->mFreeStack.isSome() ? aOut->mFreeStack->mLength : 0); + } + return true; + } + + virtual void DisablePHCOnCurrentThread() override { + GTls::DisableOnCurrentThread(); + LOG("DisablePHCOnCurrentThread: %zu\n", 0ul); + } + + virtual void ReenablePHCOnCurrentThread() override { + GTls::EnableOnCurrentThread(); + LOG("ReenablePHCOnCurrentThread: %zu\n", 0ul); + } + + virtual bool IsPHCEnabledOnCurrentThread() override { + bool enabled = !GTls::IsDisabledOnCurrentThread(); + LOG("IsPHCEnabledOnCurrentThread: %zu\n", size_t(enabled)); + return enabled; + } +}; + +// WARNING: this function runs *very* early -- before all static initializers +// have run. For this reason, non-scalar globals (gConst, gMut) are allocated +// dynamically (so we can guarantee their construction in this function) rather +// than statically. GAtomic and GTls contain simple static data that doesn't +// involve static initializers so they don't need to be allocated dynamically. +void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) { + // Don't run PHC if the page size isn't 4 KiB. + jemalloc_stats_t stats; + aMallocTable->jemalloc_stats_internal(&stats, nullptr); + if (stats.page_size != kPageSize) { + return; + } + + sMallocTable = *aMallocTable; + + // The choices of which functions to replace are complex enough that we set + // them individually instead of using MALLOC_FUNCS/malloc_decls.h. + + aMallocTable->malloc = replace_malloc; + aMallocTable->calloc = replace_calloc; + aMallocTable->realloc = replace_realloc; + aMallocTable->free = replace_free; + aMallocTable->memalign = replace_memalign; + + // posix_memalign, aligned_alloc & valloc: unset, which means they fall back + // to replace_memalign. + aMallocTable->malloc_usable_size = replace_malloc_usable_size; + // default malloc_good_size: the default suffices. + + aMallocTable->jemalloc_stats_internal = replace_jemalloc_stats; + // jemalloc_purge_freed_pages: the default suffices. + // jemalloc_free_dirty_pages: the default suffices. + // jemalloc_thread_local_arena: the default suffices. + aMallocTable->jemalloc_ptr_info = replace_jemalloc_ptr_info; + + aMallocTable->moz_create_arena_with_params = + replace_moz_create_arena_with_params; + aMallocTable->moz_dispose_arena = replace_moz_dispose_arena; + aMallocTable->moz_arena_malloc = replace_moz_arena_malloc; + aMallocTable->moz_arena_calloc = replace_moz_arena_calloc; + aMallocTable->moz_arena_realloc = replace_moz_arena_realloc; + aMallocTable->moz_arena_free = replace_moz_arena_free; + aMallocTable->moz_arena_memalign = replace_moz_arena_memalign; + + static PHCBridge bridge; + *aBridge = &bridge; + +#ifndef XP_WIN + // Avoid deadlocks when forking by acquiring our state lock prior to forking + // and releasing it after forking. See |LogAlloc|'s |replace_init| for + // in-depth details. + // + // Note: This must run after attempting an allocation so as to give the + // system malloc a chance to insert its own atfork handler. + sMallocTable.malloc(-1); + pthread_atfork(GMut::prefork, GMut::postfork, GMut::postfork); +#endif + + // gConst and gMut are never freed. They live for the life of the process. + gConst = InfallibleAllocPolicy::new_<GConst>(); + GTls::Init(); + gMut = InfallibleAllocPolicy::new_<GMut>(); + { + MutexAutoLock lock(GMut::sMutex); + Delay firstAllocDelay = + Rnd64ToDelay<kAvgFirstAllocDelay>(gMut->Random64(lock)); + GAtomic::Init(firstAllocDelay); + } +} diff --git a/memory/replace/phc/PHC.h b/memory/replace/phc/PHC.h new file mode 100644 index 0000000000..139a0e81c9 --- /dev/null +++ b/memory/replace/phc/PHC.h @@ -0,0 +1,102 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PHC_h +#define PHC_h + +#include "mozilla/Assertions.h" +#include "mozilla/Maybe.h" +#include <stdint.h> +#include <stdlib.h> + +namespace mozilla { +namespace phc { + +// Note: a stack trace may have no frames due to a collection problem. +// +// Also note: a more compact stack trace representation could be achieved with +// some effort. +struct StackTrace { + public: + static const size_t kMaxFrames = 16; + + // The number of PCs in the stack trace. + size_t mLength; + + // The PCs in the stack trace. Only the first mLength are initialized. + const void* mPcs[kMaxFrames]; + + public: + StackTrace() : mLength(0) {} +}; + +// Info from PHC about an address in memory. +class AddrInfo { + public: + enum class Kind { + // The address is not in PHC-managed memory. + Unknown = 0, + + // The address is within a PHC page that has never been allocated. A crash + // involving such an address is unlikely in practice, because it would + // require the crash to happen quite early. + NeverAllocatedPage = 1, + + // The address is within a PHC page that is in use. + InUsePage = 2, + + // The address is within a PHC page that has been allocated and then freed. + // A crash involving such an address most likely indicates a + // use-after-free. (A sufficiently wild write -- e.g. a large buffer + // overflow -- could also trigger it, but this is less likely.) + FreedPage = 3, + + // The address is within a PHC guard page. A crash involving such an + // address most likely indicates a buffer overflow. (Again, a sufficiently + // wild write could unluckily trigger it, but this is less likely.) + GuardPage = 4, + }; + + // The page kind. + Kind mKind; + + // The starting address of the allocation. + // - Unknown | NeverAllocatedPage: nullptr. + // - InUsePage | FreedPage: the address of the allocation within the page. + // - GuardPage: the mBaseAddr value from the preceding allocation page. + const void* mBaseAddr; + + // The usable size, which could be bigger than the requested size. + // - Unknown | NeverAllocatePage: 0. + // - InUsePage | FreedPage: the usable size of the allocation within the page. + // - GuardPage: the mUsableSize value from the preceding allocation page. + size_t mUsableSize; + + // The allocation stack. + // - Unknown | NeverAllocatedPage: Nothing. + // - InUsePage | FreedPage: Some. + // - GuardPage: the mAllocStack value from the preceding allocation page. + mozilla::Maybe<StackTrace> mAllocStack; + + // The free stack. + // - Unknown | NeverAllocatedPage | InUsePage: Nothing. + // - FreedPage: Some. + // - GuardPage: the mFreeStack value from the preceding allocation page. + mozilla::Maybe<StackTrace> mFreeStack; + + // Default to no PHC info. + AddrInfo() + : mKind(Kind::Unknown), + mBaseAddr(nullptr), + mUsableSize(0), + mAllocStack(), + mFreeStack() {} +}; + +} // namespace phc +} // namespace mozilla + +#endif /* PHC_h */ diff --git a/memory/replace/phc/moz.build b/memory/replace/phc/moz.build new file mode 100644 index 0000000000..1a55ab142b --- /dev/null +++ b/memory/replace/phc/moz.build @@ -0,0 +1,36 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ReplaceMalloc("phc") + +DEFINES["MOZ_NO_MOZALLOC"] = True + +LOCAL_INCLUDES += [ + "../logalloc", + "/memory/build", +] + +EXPORTS += [ + "PHC.h", +] + +UNIFIED_SOURCES += [ + "PHC.cpp", +] + +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + SOURCES += [ + "../logalloc/FdPrintf.cpp", + "/mozglue/misc/StackWalk.cpp", + ] + if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "dbghelp", + ] + +TEST_DIRS += ["test"] + +DisableStlWrapping() diff --git a/memory/replace/phc/test/gtest/TestPHC.cpp b/memory/replace/phc/test/gtest/TestPHC.cpp new file mode 100644 index 0000000000..b83db800f2 --- /dev/null +++ b/memory/replace/phc/test/gtest/TestPHC.cpp @@ -0,0 +1,303 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "mozmemory.h" +#include "replace_malloc_bridge.h" +#include "mozilla/Assertions.h" +#include "mozilla/mozalloc.h" +#include "../../PHC.h" + +using namespace mozilla; + +bool PHCInfoEq(phc::AddrInfo& aInfo, phc::AddrInfo::Kind aKind, void* aBaseAddr, + size_t aUsableSize, bool aHasAllocStack, bool aHasFreeStack) { + return aInfo.mKind == aKind && aInfo.mBaseAddr == aBaseAddr && + aInfo.mUsableSize == aUsableSize && + // Proper stack traces will have at least 3 elements. + (aHasAllocStack ? (aInfo.mAllocStack->mLength > 2) + : (aInfo.mAllocStack.isNothing())) && + (aHasFreeStack ? (aInfo.mFreeStack->mLength > 2) + : (aInfo.mFreeStack.isNothing())); +} + +bool JeInfoEq(jemalloc_ptr_info_t& aInfo, PtrInfoTag aTag, void* aAddr, + size_t aSize, arena_id_t arenaId) { + return aInfo.tag == aTag && aInfo.addr == aAddr && aInfo.size == aSize +#ifdef MOZ_DEBUG + && aInfo.arenaId == arenaId +#endif + ; +} + +uint8_t* GetPHCAllocation(size_t aSize, size_t aAlignment = 1) { + // A crude but effective way to get a PHC allocation. + for (int i = 0; i < 2000000; i++) { + void* p = (aAlignment == 1) ? moz_xmalloc(aSize) + : moz_xmemalign(aAlignment, aSize); + if (ReplaceMalloc::IsPHCAllocation(p, nullptr)) { + return (uint8_t*)p; + } + free(p); + } + return nullptr; +} + +static const size_t kPageSize = 4096; + +TEST(PHC, TestPHCAllocations) +{ + // First, check that allocations of various sizes all get put at the end of + // their page as expected. Also, check their sizes are as expected. + +#define ASSERT_POS(n1, n2) \ + p = (uint8_t*)moz_xrealloc(p, (n1)); \ + ASSERT_EQ((reinterpret_cast<uintptr_t>(p) & (kPageSize - 1)), \ + kPageSize - (n2)); \ + ASSERT_EQ(moz_malloc_usable_size(p), (n2)); + + uint8_t* p = GetPHCAllocation(1); + if (!p) { + MOZ_CRASH("failed to get a PHC allocation"); + } + + // On Win64 the smallest possible allocation is 16 bytes. On other platforms + // it is 8 bytes. +#if defined(XP_WIN) && defined(HAVE_64BIT_BUILD) + ASSERT_POS(8U, 16U); +#else + ASSERT_POS(8U, 8U); +#endif + ASSERT_POS(16U, 16U); + ASSERT_POS(32U, 32U); + ASSERT_POS(64U, 64U); + ASSERT_POS(128U, 128U); + ASSERT_POS(256U, 256U); + ASSERT_POS(512U, 512U); + ASSERT_POS(1024U, 1024U); + ASSERT_POS(2048U, 2048U); + ASSERT_POS(4096U, 4096U); + + free(p); + +#undef ASSERT_POS + + // Second, do similar checking with allocations of various alignments. Also + // check that their sizes (which are different to allocations with normal + // alignment) are the same as the sizes of equivalent non-PHC allocations. + +#define ASSERT_ALIGN(a1, a2) \ + p = (uint8_t*)GetPHCAllocation(8, (a1)); \ + ASSERT_EQ((reinterpret_cast<uintptr_t>(p) & (kPageSize - 1)), \ + kPageSize - (a2)); \ + ASSERT_EQ(moz_malloc_usable_size(p), (a2)); \ + free(p); \ + p = (uint8_t*)moz_xmemalign((a1), 8); \ + ASSERT_EQ(moz_malloc_usable_size(p), (a2)); \ + free(p); + + // On Win64 the smallest possible allocation is 16 bytes. On other platforms + // it is 8 bytes. +#if defined(XP_WIN) && defined(HAVE_64BIT_BUILD) + ASSERT_ALIGN(8U, 16U); +#else + ASSERT_ALIGN(8U, 8U); +#endif + ASSERT_ALIGN(16U, 16U); + ASSERT_ALIGN(32U, 32U); + ASSERT_ALIGN(64U, 64U); + ASSERT_ALIGN(128U, 128U); + ASSERT_ALIGN(256U, 256U); + ASSERT_ALIGN(512U, 512U); + ASSERT_ALIGN(1024U, 1024U); + ASSERT_ALIGN(2048U, 2048U); + ASSERT_ALIGN(4096U, 4096U); + +#undef ASSERT_ALIGN +} + +TEST(PHC, TestPHCInfo) +{ + int stackVar; + phc::AddrInfo phcInfo; + jemalloc_ptr_info_t jeInfo; + + // Test a default AddrInfo. + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::Unknown, nullptr, 0ul, + false, false)); + + // Test some non-PHC allocation addresses. + ASSERT_FALSE(ReplaceMalloc::IsPHCAllocation(nullptr, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::Unknown, nullptr, 0, + false, false)); + ASSERT_FALSE(ReplaceMalloc::IsPHCAllocation(&stackVar, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::Unknown, nullptr, 0, + false, false)); + + uint8_t* p = GetPHCAllocation(32); + if (!p) { + MOZ_CRASH("failed to get a PHC allocation"); + } + + // Test an in-use PHC allocation: first byte within it. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::InUsePage, p, 32ul, true, false)); + ASSERT_EQ(moz_malloc_usable_size(p), 32ul); + jemalloc_ptr_info(p, &jeInfo); + + // Test an in-use PHC allocation: last byte within it. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p + 31, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::InUsePage, p, 32ul, true, false)); + ASSERT_TRUE(JeInfoEq(jeInfo, TagLiveAlloc, p, 32, 0)); + jemalloc_ptr_info(p + 31, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagLiveAlloc, p, 32, 0)); + + // Test an in-use PHC allocation: last byte before it. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p - 1, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::InUsePage, p, 32ul, true, false)); + jemalloc_ptr_info(p - 1, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: first byte on its allocation page. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p + 32 - kPageSize, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::InUsePage, p, 32ul, true, false)); + jemalloc_ptr_info(p + 32 - kPageSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: first byte in the following guard page. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p + 32, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, p, 32ul, true, false)); + jemalloc_ptr_info(p + 32, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: last byte in the lower half of the + // following guard page. + ASSERT_TRUE( + ReplaceMalloc::IsPHCAllocation(p + 32 + (kPageSize / 2 - 1), &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, p, 32ul, true, false)); + jemalloc_ptr_info(p + 32 + (kPageSize / 2 - 1), &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: last byte in the preceding guard page. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p + 31 - kPageSize, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, p, 32ul, true, false)); + jemalloc_ptr_info(p + 31 - kPageSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: first byte in the upper half of the + // preceding guard page. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation( + p + 31 - kPageSize - (kPageSize / 2 - 1), &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, p, 32ul, true, false)); + jemalloc_ptr_info(p + 31 - kPageSize - (kPageSize / 2 - 1), &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + free(p); + + // Test a freed PHC allocation: first byte within it. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::FreedPage, p, 32ul, true, true)); + jemalloc_ptr_info(p, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagFreedAlloc, p, 32, 0)); + + // Test a freed PHC allocation: last byte within it. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p + 31, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::FreedPage, p, 32ul, true, true)); + jemalloc_ptr_info(p + 31, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagFreedAlloc, p, 32, 0)); + + // Test a freed PHC allocation: last byte before it. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p - 1, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::FreedPage, p, 32ul, true, true)); + jemalloc_ptr_info(p - 1, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: first byte on its allocation page. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p + 32 - kPageSize, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::FreedPage, p, 32ul, true, true)); + jemalloc_ptr_info(p + 32 - kPageSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: first byte in the following guard page. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p + 32, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, p, 32ul, true, true)); + jemalloc_ptr_info(p + 32, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: last byte in the lower half of the following + // guard page. + ASSERT_TRUE( + ReplaceMalloc::IsPHCAllocation(p + 32 + (kPageSize / 2 - 1), &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, p, 32ul, true, true)); + jemalloc_ptr_info(p + 32 + (kPageSize / 2 - 1), &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: last byte in the preceding guard page. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p + 31 - kPageSize, &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, p, 32ul, true, true)); + jemalloc_ptr_info(p + 31 - kPageSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: first byte in the upper half of the preceding + // guard page. + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation( + p + 31 - kPageSize - (kPageSize / 2 - 1), &phcInfo)); + ASSERT_TRUE( + PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, p, 32ul, true, true)); + jemalloc_ptr_info(p + 31 - kPageSize - (kPageSize / 2 - 1), &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // There are no tests for `mKind == NeverAllocatedPage` because it's not + // possible to reliably get ahold of such a page. +} + +TEST(PHC, TestPHCDisabling) +{ + uint8_t* p = GetPHCAllocation(32); + uint8_t* q = GetPHCAllocation(32); + if (!p || !q) { + MOZ_CRASH("failed to get a PHC allocation"); + } + + ASSERT_TRUE(ReplaceMalloc::IsPHCEnabledOnCurrentThread()); + ReplaceMalloc::DisablePHCOnCurrentThread(); + ASSERT_FALSE(ReplaceMalloc::IsPHCEnabledOnCurrentThread()); + + // Test realloc() on a PHC allocation while PHC is disabled on the thread. + uint8_t* p2 = (uint8_t*)realloc(p, 128); + // The small realloc is fulfilled within the same page, but it does move. + ASSERT_TRUE(p2 == p - 96); + ASSERT_TRUE(ReplaceMalloc::IsPHCAllocation(p2, nullptr)); + uint8_t* p3 = (uint8_t*)realloc(p2, 8192); + // The big realloc is not in-place, and the result is not a PHC allocation. + ASSERT_TRUE(p3 != p2); + ASSERT_FALSE(ReplaceMalloc::IsPHCAllocation(p3, nullptr)); + free(p3); + + // Test free() on a PHC allocation while PHC is disabled on the thread. + free(q); + + // These must not be PHC allocations. + uint8_t* r = GetPHCAllocation(32); // This will fail. + ASSERT_FALSE(!!r); + + ReplaceMalloc::ReenablePHCOnCurrentThread(); + ASSERT_TRUE(ReplaceMalloc::IsPHCEnabledOnCurrentThread()); +} diff --git a/memory/replace/phc/test/gtest/moz.build b/memory/replace/phc/test/gtest/moz.build new file mode 100644 index 0000000000..82ccaaf9c6 --- /dev/null +++ b/memory/replace/phc/test/gtest/moz.build @@ -0,0 +1,15 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "TestPHC.cpp", +] + +LOCAL_INCLUDES += [ + "../../", +] + +FINAL_LIBRARY = "xul-gtest" diff --git a/memory/replace/phc/test/moz.build b/memory/replace/phc/test/moz.build new file mode 100644 index 0000000000..8208ae849d --- /dev/null +++ b/memory/replace/phc/test/moz.build @@ -0,0 +1,9 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# The gtests won't work in a SpiderMonkey-only build. +if CONFIG["MOZ_WIDGET_TOOLKIT"]: + TEST_DIRS += ["gtest"] |