diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /memory/replace | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'memory/replace')
50 files changed, 8011 insertions, 0 deletions
diff --git a/memory/replace/dmd/DMD.cpp b/memory/replace/dmd/DMD.cpp new file mode 100644 index 0000000000..54a154e60e --- /dev/null +++ b/memory/replace/dmd/DMD.cpp @@ -0,0 +1,1884 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if !defined(MOZ_PROFILING) +# error "DMD requires MOZ_PROFILING" +#endif + +#ifdef XP_WIN +# include <windows.h> +# include <process.h> +#else +# include <pthread.h> +# include <sys/types.h> +# include <unistd.h> +#endif + +#ifdef ANDROID +# include <android/log.h> +#endif + +#include "nscore.h" + +#include "mozilla/Assertions.h" +#include "mozilla/FastBernoulliTrial.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/Likely.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/PodOperations.h" +#include "mozilla/StackWalk.h" +#include "mozilla/ThreadLocal.h" + +// CodeAddressService is defined entirely in the header, so this does not make +// DMD depend on XPCOM's object file. +#include "CodeAddressService.h" + +// replace_malloc.h needs to be included before replace_malloc_bridge.h, +// which DMD.h includes, so DMD.h needs to be included after replace_malloc.h. +#include "replace_malloc.h" +#include "DMD.h" + +namespace mozilla { +namespace dmd { + +class DMDBridge : public ReplaceMallocBridge { + virtual DMDFuncs* GetDMDFuncs() override; +}; + +static DMDBridge* gDMDBridge; +static DMDFuncs gDMDFuncs; + +DMDFuncs* DMDBridge::GetDMDFuncs() { return &gDMDFuncs; } + +MOZ_FORMAT_PRINTF(1, 2) +inline void StatusMsg(const char* aFmt, ...) { + va_list ap; + va_start(ap, aFmt); + gDMDFuncs.StatusMsg(aFmt, ap); + va_end(ap); +} + +//--------------------------------------------------------------------------- +// Utilities +//--------------------------------------------------------------------------- + +#ifndef DISALLOW_COPY_AND_ASSIGN +# define DISALLOW_COPY_AND_ASSIGN(T) \ + T(const T&); \ + void operator=(const T&) +#endif + +static malloc_table_t gMallocTable; + +// This provides infallible allocations (they abort on OOM). We use it for all +// of DMD's own allocations, which fall into the following three cases. +// +// - Direct allocations (the easy case). +// +// - Indirect allocations in mozilla::{Vector,HashSet,HashMap} -- this class +// serves as their AllocPolicy. +// +// - Other indirect allocations (e.g. MozStackWalk) -- see the comments on +// Thread::mBlockIntercepts and in replace_malloc for how these work. +// +// It would be nice if we could use the InfallibleAllocPolicy from mozalloc, +// but DMD cannot use mozalloc. +// +class InfallibleAllocPolicy { + static void ExitOnFailure(const void* aP); + + public: + template <typename T> + static T* maybe_pod_malloc(size_t aNumElems) { + if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) + return nullptr; + return (T*)gMallocTable.malloc(aNumElems * sizeof(T)); + } + + template <typename T> + static T* maybe_pod_calloc(size_t aNumElems) { + return (T*)gMallocTable.calloc(aNumElems, sizeof(T)); + } + + template <typename T> + static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) + return nullptr; + return (T*)gMallocTable.realloc(aPtr, aNewSize * sizeof(T)); + } + + static void* malloc_(size_t aSize) { + void* p = gMallocTable.malloc(aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_malloc(size_t aNumElems) { + T* p = maybe_pod_malloc<T>(aNumElems); + ExitOnFailure(p); + return p; + } + + static void* calloc_(size_t aCount, size_t aSize) { + void* p = gMallocTable.calloc(aCount, aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_calloc(size_t aNumElems) { + T* p = maybe_pod_calloc<T>(aNumElems); + ExitOnFailure(p); + return p; + } + + static void* realloc_(void* aPtr, size_t aNewSize) { + void* p = gMallocTable.realloc(aPtr, aNewSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize); + ExitOnFailure(p); + return p; + } + + static void* memalign_(size_t aAlignment, size_t aSize) { + void* p = gMallocTable.memalign(aAlignment, aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static void free_(T* aPtr, size_t aSize = 0) { + gMallocTable.free(aPtr); + } + + static char* strdup_(const char* aStr) { + char* s = (char*)InfallibleAllocPolicy::malloc_(strlen(aStr) + 1); + strcpy(s, aStr); + return s; + } + + template <class T> + static T* new_() { + void* mem = malloc_(sizeof(T)); + return new (mem) T; + } + + template <class T, typename P1> + static T* new_(const P1& aP1) { + void* mem = malloc_(sizeof(T)); + return new (mem) T(aP1); + } + + template <class T> + static void delete_(T* aPtr) { + if (aPtr) { + aPtr->~T(); + InfallibleAllocPolicy::free_(aPtr); + } + } + + static void reportAllocOverflow() { ExitOnFailure(nullptr); } + bool checkSimulatedOOM() const { return true; } +}; + +// This is only needed because of the |const void*| vs |void*| arg mismatch. +static size_t MallocSizeOf(const void* aPtr) { + return gMallocTable.malloc_usable_size(const_cast<void*>(aPtr)); +} + +void DMDFuncs::StatusMsg(const char* aFmt, va_list aAp) { +#ifdef ANDROID + __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, aAp); +#else + // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL. + size_t size = strlen(aFmt) + 64; + char* fmt = (char*)InfallibleAllocPolicy::malloc_(size); + snprintf(fmt, size, "DMD[%d] %s", getpid(), aFmt); + vfprintf(stderr, fmt, aAp); + InfallibleAllocPolicy::free_(fmt); +#endif +} + +/* static */ +void InfallibleAllocPolicy::ExitOnFailure(const void* aP) { + if (!aP) { + MOZ_CRASH("DMD out of memory; aborting"); + } +} + +static double Percent(size_t part, size_t whole) { + return (whole == 0) ? 0 : 100 * (double)part / whole; +} + +// Commifies the number. +static char* Show(size_t n, char* buf, size_t buflen) { + int nc = 0, i = 0, lasti = buflen - 2; + buf[lasti + 1] = '\0'; + if (n == 0) { + buf[lasti - i] = '0'; + i++; + } else { + while (n > 0) { + if (((i - nc) % 3) == 0 && i != 0) { + buf[lasti - i] = ','; + i++; + nc++; + } + buf[lasti - i] = static_cast<char>((n % 10) + '0'); + i++; + n /= 10; + } + } + int firstCharIndex = lasti - i + 1; + + MOZ_ASSERT(firstCharIndex >= 0); + return &buf[firstCharIndex]; +} + +//--------------------------------------------------------------------------- +// Options (Part 1) +//--------------------------------------------------------------------------- + +class Options { + template <typename T> + struct NumOption { + const T mDefault; + const T mMax; + T mActual; + NumOption(T aDefault, T aMax) + : mDefault(aDefault), mMax(aMax), mActual(aDefault) {} + }; + + // DMD has several modes. These modes affect what data is recorded and + // written to the output file, and the written data affects the + // post-processing that dmd.py can do. + // + // Users specify the mode as soon as DMD starts. This leads to minimal memory + // usage and log file size. It has the disadvantage that is inflexible -- if + // you want to change modes you have to re-run DMD. But in practice changing + // modes seems to be rare, so it's not much of a problem. + // + // An alternative possibility would be to always record and output *all* the + // information needed for all modes. This would let you choose the mode when + // running dmd.py, and so you could do multiple kinds of profiling on a + // single DMD run. But if you are only interested in one of the simpler + // modes, you'd pay the price of (a) increased memory usage and (b) *very* + // large log files. + // + // Finally, another alternative possibility would be to do mode selection + // partly at DMD startup or recording, and then partly in dmd.py. This would + // give some extra flexibility at moderate memory and file size cost. But + // certain mode pairs wouldn't work, which would be confusing. + // + enum class Mode { + // For each live block, this mode outputs: size (usable and slop) and + // (possibly) and allocation stack. This mode is good for live heap + // profiling. + Live, + + // Like "Live", but for each live block it also outputs: zero or more + // report stacks. This mode is good for identifying where memory reporters + // should be added. This is the default mode. + DarkMatter, + + // Like "Live", but also outputs the same data for dead blocks. This mode + // does cumulative heap profiling, which is good for identifying where large + // amounts of short-lived allocations ("heap churn") occur. + Cumulative, + + // Like "Live", but this mode also outputs for each live block the address + // of the block and the values contained in the blocks. This mode is useful + // for investigating leaks, by helping to figure out which blocks refer to + // other blocks. This mode force-enables full stacks coverage. + Scan + }; + + // With full stacks, every heap block gets a stack trace recorded for it. + // This is complete but slow. + // + // With partial stacks, not all heap blocks will get a stack trace recorded. + // A Bernoulli trial (see mfbt/FastBernoulliTrial.h for details) is performed + // for each heap block to decide if it gets one. Because bigger heap blocks + // are more likely to get a stack trace, even though most heap *blocks* won't + // get a stack trace, most heap *bytes* will. + enum class Stacks { Full, Partial }; + + char* mDMDEnvVar; // a saved copy, for later printing + + Mode mMode; + Stacks mStacks; + bool mShowDumpStats; + + void BadArg(const char* aArg); + static const char* ValueIfMatch(const char* aArg, const char* aOptionName); + static bool GetLong(const char* aArg, const char* aOptionName, long aMin, + long aMax, long* aValue); + static bool GetBool(const char* aArg, const char* aOptionName, bool* aValue); + + public: + explicit Options(const char* aDMDEnvVar); + + bool IsLiveMode() const { return mMode == Mode::Live; } + bool IsDarkMatterMode() const { return mMode == Mode::DarkMatter; } + bool IsCumulativeMode() const { return mMode == Mode::Cumulative; } + bool IsScanMode() const { return mMode == Mode::Scan; } + + const char* ModeString() const; + + const char* DMDEnvVar() const { return mDMDEnvVar; } + + bool DoFullStacks() const { return mStacks == Stacks::Full; } + size_t ShowDumpStats() const { return mShowDumpStats; } +}; + +static Options* gOptions; + +//--------------------------------------------------------------------------- +// The global lock +//--------------------------------------------------------------------------- + +// MutexBase implements the platform-specific parts of a mutex. + +#ifdef XP_WIN + +class MutexBase { + CRITICAL_SECTION mCS; + + DISALLOW_COPY_AND_ASSIGN(MutexBase); + + public: + MutexBase() { InitializeCriticalSection(&mCS); } + ~MutexBase() { DeleteCriticalSection(&mCS); } + + void Lock() { EnterCriticalSection(&mCS); } + void Unlock() { LeaveCriticalSection(&mCS); } +}; + +#else + +class MutexBase { + pthread_mutex_t mMutex; + + MutexBase(const MutexBase&) = delete; + + const MutexBase& operator=(const MutexBase&) = delete; + + public: + MutexBase() { pthread_mutex_init(&mMutex, nullptr); } + + void Lock() { pthread_mutex_lock(&mMutex); } + void Unlock() { pthread_mutex_unlock(&mMutex); } +}; + +#endif + +class Mutex : private MutexBase { + bool mIsLocked; + + Mutex(const Mutex&) = delete; + + const Mutex& operator=(const Mutex&) = delete; + + public: + Mutex() : mIsLocked(false) {} + + void Lock() { + MutexBase::Lock(); + MOZ_ASSERT(!mIsLocked); + mIsLocked = true; + } + + void Unlock() { + MOZ_ASSERT(mIsLocked); + mIsLocked = false; + MutexBase::Unlock(); + } + + bool IsLocked() { return mIsLocked; } +}; + +// This lock must be held while manipulating global state such as +// gStackTraceTable, gLiveBlockTable, gDeadBlockTable. Note that gOptions is +// *not* protected by this lock because it is only written to by Options(), +// which is only invoked at start-up and in ResetEverything(), which is only +// used by SmokeDMD.cpp. +static Mutex* gStateLock = nullptr; + +class AutoLockState { + AutoLockState(const AutoLockState&) = delete; + + const AutoLockState& operator=(const AutoLockState&) = delete; + + public: + AutoLockState() { gStateLock->Lock(); } + ~AutoLockState() { gStateLock->Unlock(); } +}; + +class AutoUnlockState { + AutoUnlockState(const AutoUnlockState&) = delete; + + const AutoUnlockState& operator=(const AutoUnlockState&) = delete; + + public: + AutoUnlockState() { gStateLock->Unlock(); } + ~AutoUnlockState() { gStateLock->Lock(); } +}; + +//--------------------------------------------------------------------------- +// Per-thread blocking of intercepts +//--------------------------------------------------------------------------- + +// On MacOS, the first __thread/thread_local access calls malloc, which leads +// to an infinite loop. So we use pthread-based TLS instead, which somehow +// doesn't have this problem. +#if !defined(XP_DARWIN) +# define DMD_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) +#else +# define DMD_THREAD_LOCAL(T) \ + detail::ThreadLocal<T, detail::ThreadLocalKeyStorage> +#endif + +class Thread { + // Required for allocation via InfallibleAllocPolicy::new_. + friend class InfallibleAllocPolicy; + + // When true, this blocks intercepts, which allows malloc interception + // functions to themselves call malloc. (Nb: for direct calls to malloc we + // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes + // indirectly call vanilla malloc via functions like MozStackWalk.) + bool mBlockIntercepts; + + Thread() : mBlockIntercepts(false) {} + + Thread(const Thread&) = delete; + + const Thread& operator=(const Thread&) = delete; + + static DMD_THREAD_LOCAL(Thread*) tlsThread; + + public: + static void Init() { + if (!tlsThread.init()) { + MOZ_CRASH(); + } + } + + static Thread* Fetch() { + Thread* t = tlsThread.get(); + if (MOZ_UNLIKELY(!t)) { + // This memory is never freed, even if the thread dies. It's a leak, but + // only a tiny one. + t = InfallibleAllocPolicy::new_<Thread>(); + tlsThread.set(t); + } + + return t; + } + + bool BlockIntercepts() { + MOZ_ASSERT(!mBlockIntercepts); + return mBlockIntercepts = true; + } + + bool UnblockIntercepts() { + MOZ_ASSERT(mBlockIntercepts); + return mBlockIntercepts = false; + } + + bool InterceptsAreBlocked() const { return mBlockIntercepts; } +}; + +DMD_THREAD_LOCAL(Thread*) Thread::tlsThread; + +// An object of this class must be created (on the stack) before running any +// code that might allocate. +class AutoBlockIntercepts { + Thread* const mT; + + AutoBlockIntercepts(const AutoBlockIntercepts&) = delete; + + const AutoBlockIntercepts& operator=(const AutoBlockIntercepts&) = delete; + + public: + explicit AutoBlockIntercepts(Thread* aT) : mT(aT) { mT->BlockIntercepts(); } + ~AutoBlockIntercepts() { + MOZ_ASSERT(mT->InterceptsAreBlocked()); + mT->UnblockIntercepts(); + } +}; + +//--------------------------------------------------------------------------- +// Location service +//--------------------------------------------------------------------------- + +struct DescribeCodeAddressLock { + static void Unlock() { gStateLock->Unlock(); } + static void Lock() { gStateLock->Lock(); } + static bool IsLocked() { return gStateLock->IsLocked(); } +}; + +typedef CodeAddressService<InfallibleAllocPolicy, DescribeCodeAddressLock> + CodeAddressService; + +//--------------------------------------------------------------------------- +// Stack traces +//--------------------------------------------------------------------------- + +class StackTrace { + public: + static const uint32_t MaxFrames = 24; + + private: + uint32_t mLength; // The number of PCs. + const void* mPcs[MaxFrames]; // The PCs themselves. + + public: + StackTrace() : mLength(0) {} + StackTrace(const StackTrace& aOther) : mLength(aOther.mLength) { + PodCopy(mPcs, aOther.mPcs, mLength); + } + + uint32_t Length() const { return mLength; } + const void* Pc(uint32_t i) const { + MOZ_ASSERT(i < mLength); + return mPcs[i]; + } + + uint32_t Size() const { return mLength * sizeof(mPcs[0]); } + + // The stack trace returned by this function is interned in gStackTraceTable, + // and so is immortal and unmovable. + static const StackTrace* Get(Thread* aT); + + // Hash policy. + + typedef StackTrace* Lookup; + + static mozilla::HashNumber hash(const StackTrace* const& aSt) { + return mozilla::HashBytes(aSt->mPcs, aSt->Size()); + } + + static bool match(const StackTrace* const& aA, const StackTrace* const& aB) { + return aA->mLength == aB->mLength && + memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0; + } + + private: + static void StackWalkCallback(uint32_t aFrameNumber, void* aPc, void* aSp, + void* aClosure) { + StackTrace* st = (StackTrace*)aClosure; + MOZ_ASSERT(st->mLength < MaxFrames); + st->mPcs[st->mLength] = aPc; + st->mLength++; + MOZ_ASSERT(st->mLength == aFrameNumber); + } +}; + +typedef mozilla::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy> + StackTraceTable; +static StackTraceTable* gStackTraceTable = nullptr; + +typedef mozilla::HashSet<const StackTrace*, + mozilla::DefaultHasher<const StackTrace*>, + InfallibleAllocPolicy> + StackTraceSet; + +typedef mozilla::HashSet<const void*, mozilla::DefaultHasher<const void*>, + InfallibleAllocPolicy> + PointerSet; +typedef mozilla::HashMap<const void*, uint32_t, + mozilla::DefaultHasher<const void*>, + InfallibleAllocPolicy> + PointerIdMap; + +// We won't GC the stack trace table until it this many elements. +static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024; + +/* static */ const StackTrace* StackTrace::Get(Thread* aT) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(aT->InterceptsAreBlocked()); + + // On Windows, MozStackWalk can acquire a lock from the shared library + // loader. Another thread might call malloc while holding that lock (when + // loading a shared library). So we can't be in gStateLock during the call + // to MozStackWalk. For details, see + // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8 + // On Linux, something similar can happen; see bug 824340. + // So let's just release it on all platforms. + StackTrace tmp; + { + AutoUnlockState unlock; + // In each of the following cases, skipFrames is chosen so that the + // first frame in each stack trace is a replace_* function (or as close as + // possible, given the vagaries of inlining on different platforms). +#if defined(XP_WIN) && defined(_M_IX86) + // This avoids MozStackWalk(), which causes unusably slow startup on Win32 + // when it is called during static initialization (see bug 1241684). + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Win32: REGISTERS_SYNC_POPULATE() for the + // frame pointer, and GetStackTop() for the stack end. + CONTEXT context; + RtlCaptureContext(&context); + void** fp = reinterpret_cast<void**>(context.Ebp); + + PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb()); + void* stackEnd = static_cast<void*>(pTib->StackBase); + FramePointerStackWalk(StackWalkCallback, MaxFrames, &tmp, fp, stackEnd); +#elif defined(XP_MACOSX) + // This avoids MozStackWalk(), which has become unusably slow on Mac due to + // changes in libunwind. + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Mac: REGISTERS_SYNC_POPULATE() for the frame + // pointer, and GetStackTop() for the stack end. +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wframe-address" + void** fp = reinterpret_cast<void**>(__builtin_frame_address(1)); +# pragma GCC diagnostic pop + void* stackEnd = pthread_get_stackaddr_np(pthread_self()); + FramePointerStackWalk(StackWalkCallback, MaxFrames, &tmp, fp, stackEnd); +#else + MozStackWalk(StackWalkCallback, nullptr, MaxFrames, &tmp); +#endif + } + + StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp); + if (!p) { + StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp); + MOZ_ALWAYS_TRUE(gStackTraceTable->add(p, stnew)); + } + return *p; +} + +//--------------------------------------------------------------------------- +// Heap blocks +//--------------------------------------------------------------------------- + +// This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit +// is zero) with a 1-bit tag. +// +// |T| is the pointer type, e.g. |int*|, not the pointed-to type. This makes +// is easier to have const pointers, e.g. |TaggedPtr<const int*>|. +template <typename T> +class TaggedPtr { + union { + T mPtr; + uintptr_t mUint; + }; + + static const uintptr_t kTagMask = uintptr_t(0x1); + static const uintptr_t kPtrMask = ~kTagMask; + + static bool IsTwoByteAligned(T aPtr) { + return (uintptr_t(aPtr) & kTagMask) == 0; + } + + public: + TaggedPtr() : mPtr(nullptr) {} + + TaggedPtr(T aPtr, bool aBool) : mPtr(aPtr) { + MOZ_ASSERT(IsTwoByteAligned(aPtr)); + uintptr_t tag = uintptr_t(aBool); + MOZ_ASSERT(tag <= kTagMask); + mUint |= (tag & kTagMask); + } + + void Set(T aPtr, bool aBool) { + MOZ_ASSERT(IsTwoByteAligned(aPtr)); + mPtr = aPtr; + uintptr_t tag = uintptr_t(aBool); + MOZ_ASSERT(tag <= kTagMask); + mUint |= (tag & kTagMask); + } + + T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); } + + bool Tag() const { return bool(mUint & kTagMask); } +}; + +// A live heap block. Stores both basic data and data about reports, if we're +// in DarkMatter mode. +class LiveBlock { + const void* mPtr; + const size_t mReqSize; // size requested + + // The stack trace where this block was allocated, or nullptr if we didn't + // record one. + const StackTrace* const mAllocStackTrace; + + // This array has two elements because we record at most two reports of a + // block. + // - Ptr: |mReportStackTrace| - stack trace where this block was reported. + // nullptr if not reported. + // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on + // allocation? If so, DMD must not clear the report at the end of + // Analyze(). Only relevant if |mReportStackTrace| is non-nullptr. + // + // |mPtr| is used as the key in LiveBlockTable, so it's ok for this member + // to be |mutable|. + // + // Only used in DarkMatter mode. + mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2]; + + public: + LiveBlock(const void* aPtr, size_t aReqSize, + const StackTrace* aAllocStackTrace) + : mPtr(aPtr), mReqSize(aReqSize), mAllocStackTrace(aAllocStackTrace) {} + + const void* Address() const { return mPtr; } + + size_t ReqSize() const { return mReqSize; } + + size_t SlopSize() const { return MallocSizeOf(mPtr) - mReqSize; } + + const StackTrace* AllocStackTrace() const { return mAllocStackTrace; } + + const StackTrace* ReportStackTrace1() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[0].Ptr(); + } + + const StackTrace* ReportStackTrace2() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[1].Ptr(); + } + + bool ReportedOnAlloc1() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[0].Tag(); + } + + bool ReportedOnAlloc2() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[1].Tag(); + } + + void AddStackTracesToTable(StackTraceSet& aStackTraces) const { + if (AllocStackTrace()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(AllocStackTrace())); + } + if (gOptions->IsDarkMatterMode()) { + if (ReportStackTrace1()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(ReportStackTrace1())); + } + if (ReportStackTrace2()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(ReportStackTrace2())); + } + } + } + + uint32_t NumReports() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + if (ReportStackTrace2()) { + MOZ_ASSERT(ReportStackTrace1()); + return 2; + } + if (ReportStackTrace1()) { + return 1; + } + return 0; + } + + // This is |const| thanks to the |mutable| fields above. + void Report(Thread* aT, bool aReportedOnAlloc) const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + // We don't bother recording reports after the 2nd one. + uint32_t numReports = NumReports(); + if (numReports < 2) { + mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT), + aReportedOnAlloc); + } + } + + void UnreportIfNotReportedOnAlloc() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) { + mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0); + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + + } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) { + // Shift the 2nd report down to the 1st one. + mReportStackTrace_mReportedOnAlloc[0] = + mReportStackTrace_mReportedOnAlloc[1]; + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + + } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) { + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + } + } + + // Hash policy. + + typedef const void* Lookup; + + static mozilla::HashNumber hash(const void* const& aPtr) { + return mozilla::HashGeneric(aPtr); + } + + static bool match(const LiveBlock& aB, const void* const& aPtr) { + return aB.mPtr == aPtr; + } +}; + +// A table of live blocks where the lookup key is the block address. +typedef mozilla::HashSet<LiveBlock, LiveBlock, InfallibleAllocPolicy> + LiveBlockTable; +static LiveBlockTable* gLiveBlockTable = nullptr; + +class AggregatedLiveBlockHashPolicy { + public: + typedef const LiveBlock* const Lookup; + + static mozilla::HashNumber hash(const LiveBlock* const& aB) { + return gOptions->IsDarkMatterMode() + ? mozilla::HashGeneric( + aB->ReqSize(), aB->SlopSize(), aB->AllocStackTrace(), + aB->ReportedOnAlloc1(), aB->ReportedOnAlloc2()) + : mozilla::HashGeneric(aB->ReqSize(), aB->SlopSize(), + aB->AllocStackTrace()); + } + + static bool match(const LiveBlock* const& aA, const LiveBlock* const& aB) { + return gOptions->IsDarkMatterMode() + ? aA->ReqSize() == aB->ReqSize() && + aA->SlopSize() == aB->SlopSize() && + aA->AllocStackTrace() == aB->AllocStackTrace() && + aA->ReportStackTrace1() == aB->ReportStackTrace1() && + aA->ReportStackTrace2() == aB->ReportStackTrace2() + : aA->ReqSize() == aB->ReqSize() && + aA->SlopSize() == aB->SlopSize() && + aA->AllocStackTrace() == aB->AllocStackTrace(); + } +}; + +// A table of live blocks where the lookup key is everything but the block +// address. For aggregating similar live blocks at output time. +typedef mozilla::HashMap<const LiveBlock*, size_t, + AggregatedLiveBlockHashPolicy, InfallibleAllocPolicy> + AggregatedLiveBlockTable; + +// A freed heap block. +class DeadBlock { + const size_t mReqSize; // size requested + const size_t mSlopSize; // slop above size requested + + // The stack trace where this block was allocated. + const StackTrace* const mAllocStackTrace; + + public: + DeadBlock() : mReqSize(0), mSlopSize(0), mAllocStackTrace(nullptr) {} + + explicit DeadBlock(const LiveBlock& aLb) + : mReqSize(aLb.ReqSize()), + mSlopSize(aLb.SlopSize()), + mAllocStackTrace(aLb.AllocStackTrace()) {} + + ~DeadBlock() = default; + + size_t ReqSize() const { return mReqSize; } + size_t SlopSize() const { return mSlopSize; } + + const StackTrace* AllocStackTrace() const { return mAllocStackTrace; } + + void AddStackTracesToTable(StackTraceSet& aStackTraces) const { + if (AllocStackTrace()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(AllocStackTrace())); + } + } + + // Hash policy. + + typedef DeadBlock Lookup; + + static mozilla::HashNumber hash(const DeadBlock& aB) { + return mozilla::HashGeneric(aB.ReqSize(), aB.SlopSize(), + aB.AllocStackTrace()); + } + + static bool match(const DeadBlock& aA, const DeadBlock& aB) { + return aA.ReqSize() == aB.ReqSize() && aA.SlopSize() == aB.SlopSize() && + aA.AllocStackTrace() == aB.AllocStackTrace(); + } +}; + +// For each unique DeadBlock value we store a count of how many actual dead +// blocks have that value. +typedef mozilla::HashMap<DeadBlock, size_t, DeadBlock, InfallibleAllocPolicy> + DeadBlockTable; +static DeadBlockTable* gDeadBlockTable = nullptr; + +// Add the dead block to the dead block table, if that's appropriate. +void MaybeAddToDeadBlockTable(const DeadBlock& aDb) { + if (gOptions->IsCumulativeMode() && aDb.AllocStackTrace()) { + AutoLockState lock; + if (DeadBlockTable::AddPtr p = gDeadBlockTable->lookupForAdd(aDb)) { + p->value() += 1; + } else { + MOZ_ALWAYS_TRUE(gDeadBlockTable->add(p, aDb, 1)); + } + } +} + +// Add a pointer to each live stack trace into the given StackTraceSet. (A +// stack trace is live if it's used by one of the live blocks.) +static void GatherUsedStackTraces(StackTraceSet& aStackTraces) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + aStackTraces.clear(); + MOZ_ALWAYS_TRUE(aStackTraces.reserve(512)); + + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().AddStackTracesToTable(aStackTraces); + } + + for (auto iter = gDeadBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().key().AddStackTracesToTable(aStackTraces); + } +} + +// Delete stack traces that we aren't using, and compact our hashtable. +static void GCStackTraces() { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + StackTraceSet usedStackTraces; + GatherUsedStackTraces(usedStackTraces); + + // Delete all unused stack traces from gStackTraceTable. The ModIterator + // destructor will automatically rehash and compact the table. + for (auto iter = gStackTraceTable->modIter(); !iter.done(); iter.next()) { + StackTrace* const& st = iter.get(); + if (!usedStackTraces.has(st)) { + iter.remove(); + InfallibleAllocPolicy::delete_(st); + } + } + + // Schedule a GC when we have twice as many stack traces as we had right after + // this GC finished. + gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count(); +} + +//--------------------------------------------------------------------------- +// malloc/free callbacks +//--------------------------------------------------------------------------- + +static FastBernoulliTrial* gBernoulli; + +// In testing, a probability of 0.003 resulted in ~25% of heap blocks getting +// a stack trace and ~80% of heap bytes getting a stack trace. (This is +// possible because big heap blocks are more likely to get a stack trace.) +// +// We deliberately choose not to give the user control over this probability +// (other than effectively setting it to 1 via --stacks=full) because it's +// quite inscrutable and generally the user just wants "faster and imprecise" +// or "slower and precise". +// +// The random number seeds are arbitrary and were obtained from random.org. If +// you change them you'll need to change the tests as well, because their +// expected output is based on the particular sequence of trial results that we +// get with these seeds. +static void ResetBernoulli() { + new (gBernoulli) + FastBernoulliTrial(0.003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0); +} + +static void AllocCallback(void* aPtr, size_t aReqSize, Thread* aT) { + if (!aPtr) { + return; + } + + AutoLockState lock; + AutoBlockIntercepts block(aT); + + size_t actualSize = gMallocTable.malloc_usable_size(aPtr); + + // We may or may not record the allocation stack trace, depending on the + // options and the outcome of a Bernoulli trial. + bool getTrace = gOptions->DoFullStacks() || gBernoulli->trial(actualSize); + LiveBlock b(aPtr, aReqSize, getTrace ? StackTrace::Get(aT) : nullptr); + LiveBlockTable::AddPtr p = gLiveBlockTable->lookupForAdd(aPtr); + if (!p) { + // Most common case: there wasn't a record already. + MOZ_ALWAYS_TRUE(gLiveBlockTable->add(p, b)); + } else { + // Edge-case: there was a record for the same address. We'll assume the + // allocator is not giving out a pointer to an existing allocation, so + // this means the previously recorded allocation was freed while we were + // blocking interceptions. This can happen while processing the data in + // e.g. AnalyzeImpl. + if (gOptions->IsCumulativeMode()) { + // Copy it out so it can be added to the dead block list later. + DeadBlock db(*p); + MaybeAddToDeadBlockTable(db); + } + gLiveBlockTable->remove(p); + MOZ_ALWAYS_TRUE(gLiveBlockTable->putNew(aPtr, b)); + } +} + +static void FreeCallback(void* aPtr, Thread* aT, DeadBlock* aDeadBlock) { + if (!aPtr) { + return; + } + + AutoLockState lock; + AutoBlockIntercepts block(aT); + + if (LiveBlockTable::Ptr lb = gLiveBlockTable->lookup(aPtr)) { + if (gOptions->IsCumulativeMode()) { + // Copy it out so it can be added to the dead block list later. + new (aDeadBlock) DeadBlock(*lb); + } + gLiveBlockTable->remove(lb); + } else { + // We have no record of the block. It must be a bogus pointer, or one that + // DMD wasn't able to see allocated. This should be extremely rare. + } + + if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) { + GCStackTraces(); + } +} + +//--------------------------------------------------------------------------- +// malloc/free interception +//--------------------------------------------------------------------------- + +static bool Init(malloc_table_t* aMallocTable); + +} // namespace dmd +} // namespace mozilla + +static void* replace_malloc(size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + // Intercepts are blocked, which means this must be a call to malloc + // triggered indirectly by DMD (e.g. via MozStackWalk). Be infallible. + return InfallibleAllocPolicy::malloc_(aSize); + } + + // This must be a call to malloc from outside DMD. Intercept it. + void* ptr = gMallocTable.malloc(aSize); + AllocCallback(ptr, aSize, t); + return ptr; +} + +static void* replace_calloc(size_t aCount, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::calloc_(aCount, aSize); + } + + // |aCount * aSize| could overflow, but if that happens then + // |gMallocTable.calloc()| will return nullptr and |AllocCallback()| will + // return immediately without using the overflowed value. + void* ptr = gMallocTable.calloc(aCount, aSize); + AllocCallback(ptr, aCount * aSize, t); + return ptr; +} + +static void* replace_realloc(void* aOldPtr, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::realloc_(aOldPtr, aSize); + } + + // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. + if (!aOldPtr) { + return replace_malloc(aSize); + } + + // Be very careful here! Must remove the block from the table before doing + // the realloc to avoid races, just like in replace_free(). + // Nb: This does an unnecessary hashtable remove+add if the block doesn't + // move, but doing better isn't worth the effort. + DeadBlock db; + FreeCallback(aOldPtr, t, &db); + void* ptr = gMallocTable.realloc(aOldPtr, aSize); + if (ptr) { + AllocCallback(ptr, aSize, t); + MaybeAddToDeadBlockTable(db); + } else { + // If realloc fails, we undo the prior operations by re-inserting the old + // pointer into the live block table. We don't have to do anything with the + // dead block list because the dead block hasn't yet been inserted. The + // block will end up looking like it was allocated for the first time here, + // which is untrue, and the slop bytes will be zero, which may be untrue. + // But this case is rare and doing better isn't worth the effort. + AllocCallback(aOldPtr, gMallocTable.malloc_usable_size(aOldPtr), t); + } + return ptr; +} + +static void* replace_memalign(size_t aAlignment, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::memalign_(aAlignment, aSize); + } + + void* ptr = gMallocTable.memalign(aAlignment, aSize); + AllocCallback(ptr, aSize, t); + return ptr; +} + +static void replace_free(void* aPtr) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::free_(aPtr); + } + + // Do the actual free after updating the table. Otherwise, another thread + // could call malloc and get the freed block and update the table, and then + // our update here would remove the newly-malloc'd block. + DeadBlock db; + FreeCallback(aPtr, t, &db); + MaybeAddToDeadBlockTable(db); + gMallocTable.free(aPtr); +} + +void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) { + if (mozilla::dmd::Init(aMallocTable)) { +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#define MALLOC_DECL(name, ...) aMallocTable->name = replace_##name; +#include "malloc_decls.h" + *aBridge = mozilla::dmd::gDMDBridge; + } +} + +namespace mozilla { +namespace dmd { + +//--------------------------------------------------------------------------- +// Options (Part 2) +//--------------------------------------------------------------------------- + +// Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah" +// (where "blah" is non-empty) and return the pointer to "blah". |aArg| can +// have leading space chars (but not other whitespace). +const char* Options::ValueIfMatch(const char* aArg, const char* aOptionName) { + MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain + size_t optionLen = strlen(aOptionName); + if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' && + aArg[optionLen + 1]) { + return aArg + optionLen + 1; + } + return nullptr; +} + +// Extracts a |long| value for an option from an argument. It must be within +// the range |aMin..aMax| (inclusive). +bool Options::GetLong(const char* aArg, const char* aOptionName, long aMin, + long aMax, long* aValue) { + if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { + char* endPtr; + *aValue = strtol(optionValue, &endPtr, /* base */ 10); + if (!*endPtr && aMin <= *aValue && *aValue <= aMax && *aValue != LONG_MIN && + *aValue != LONG_MAX) { + return true; + } + } + return false; +} + +// Extracts a |bool| value for an option -- encoded as "yes" or "no" -- from an +// argument. +bool Options::GetBool(const char* aArg, const char* aOptionName, bool* aValue) { + if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { + if (strcmp(optionValue, "yes") == 0) { + *aValue = true; + return true; + } + if (strcmp(optionValue, "no") == 0) { + *aValue = false; + return true; + } + } + return false; +} + +Options::Options(const char* aDMDEnvVar) + : mDMDEnvVar(aDMDEnvVar ? InfallibleAllocPolicy::strdup_(aDMDEnvVar) + : nullptr), + mMode(Mode::DarkMatter), + mStacks(Stacks::Partial), + mShowDumpStats(false) { + char* e = mDMDEnvVar; + if (e && strcmp(e, "1") != 0) { + bool isEnd = false; + while (!isEnd) { + // Consume leading whitespace. + while (isspace(*e)) { + e++; + } + + // Save the start of the arg. + const char* arg = e; + + // Find the first char after the arg, and temporarily change it to '\0' + // to isolate the arg. + while (!isspace(*e) && *e != '\0') { + e++; + } + char replacedChar = *e; + isEnd = replacedChar == '\0'; + *e = '\0'; + + // Handle arg + bool myBool; + if (strcmp(arg, "--mode=live") == 0) { + mMode = Mode::Live; + } else if (strcmp(arg, "--mode=dark-matter") == 0) { + mMode = Mode::DarkMatter; + } else if (strcmp(arg, "--mode=cumulative") == 0) { + mMode = Mode::Cumulative; + } else if (strcmp(arg, "--mode=scan") == 0) { + mMode = Mode::Scan; + + } else if (strcmp(arg, "--stacks=full") == 0) { + mStacks = Stacks::Full; + } else if (strcmp(arg, "--stacks=partial") == 0) { + mStacks = Stacks::Partial; + + } else if (GetBool(arg, "--show-dump-stats", &myBool)) { + mShowDumpStats = myBool; + + } else if (strcmp(arg, "") == 0) { + // This can only happen if there is trailing whitespace. Ignore. + MOZ_ASSERT(isEnd); + + } else { + BadArg(arg); + } + + // Undo the temporary isolation. + *e = replacedChar; + } + } + + if (mMode == Mode::Scan) { + mStacks = Stacks::Full; + } +} + +void Options::BadArg(const char* aArg) { + StatusMsg("\n"); + StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg); + StatusMsg("See the output of |mach help run| for the allowed options.\n"); + exit(1); +} + +const char* Options::ModeString() const { + switch (mMode) { + case Mode::Live: + return "live"; + case Mode::DarkMatter: + return "dark-matter"; + case Mode::Cumulative: + return "cumulative"; + case Mode::Scan: + return "scan"; + default: + MOZ_ASSERT(false); + return "(unknown DMD mode)"; + } +} + +//--------------------------------------------------------------------------- +// DMD start-up +//--------------------------------------------------------------------------- + +#ifndef XP_WIN +static void prefork() { + if (gStateLock) { + gStateLock->Lock(); + } +} + +static void postfork() { + if (gStateLock) { + gStateLock->Unlock(); + } +} +#endif + +// WARNING: this function runs *very* early -- before all static initializers +// have run. For this reason, non-scalar globals such as gStateLock and +// gStackTraceTable are allocated dynamically (so we can guarantee their +// construction in this function) rather than statically. +static bool Init(malloc_table_t* aMallocTable) { + // DMD is controlled by the |DMD| environment variable. + const char* e = getenv("DMD"); + + if (!e) { + return false; + } + // Initialize the function table first, because StatusMsg uses + // InfallibleAllocPolicy::malloc_, which uses it. + gMallocTable = *aMallocTable; + + StatusMsg("$DMD = '%s'\n", e); + + gDMDBridge = InfallibleAllocPolicy::new_<DMDBridge>(); + +#ifndef XP_WIN + // Avoid deadlocks when forking by acquiring our state lock prior to forking + // and releasing it after forking. See |LogAlloc|'s |replace_init| for + // in-depth details. + // + // Note: This must run after attempting an allocation so as to give the + // system malloc a chance to insert its own atfork handler. + pthread_atfork(prefork, postfork, postfork); +#endif + // Parse $DMD env var. + gOptions = InfallibleAllocPolicy::new_<Options>(e); + + gStateLock = InfallibleAllocPolicy::new_<Mutex>(); + + gBernoulli = (FastBernoulliTrial*)InfallibleAllocPolicy::malloc_( + sizeof(FastBernoulliTrial)); + ResetBernoulli(); + + Thread::Init(); + + { + AutoLockState lock; + + gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>(8192); + gLiveBlockTable = InfallibleAllocPolicy::new_<LiveBlockTable>(8192); + + // Create this even if the mode isn't Cumulative (albeit with a small + // size), in case the mode is changed later on (as is done by SmokeDMD.cpp, + // for example). + size_t tableSize = gOptions->IsCumulativeMode() ? 8192 : 4; + gDeadBlockTable = InfallibleAllocPolicy::new_<DeadBlockTable>(tableSize); + } + + return true; +} + +//--------------------------------------------------------------------------- +// Block reporting and unreporting +//--------------------------------------------------------------------------- + +static void ReportHelper(const void* aPtr, bool aReportedOnAlloc) { + if (!gOptions->IsDarkMatterMode() || !aPtr) { + return; + } + + Thread* t = Thread::Fetch(); + + AutoBlockIntercepts block(t); + AutoLockState lock; + + if (LiveBlockTable::Ptr p = gLiveBlockTable->lookup(aPtr)) { + p->Report(t, aReportedOnAlloc); + } else { + // We have no record of the block. It must be a bogus pointer. This should + // be extremely rare because Report() is almost always called in + // conjunction with a malloc_size_of-style function. Print a message so + // that we get some feedback. + StatusMsg("Unknown pointer %p\n", aPtr); + } +} + +void DMDFuncs::Report(const void* aPtr) { + ReportHelper(aPtr, /* onAlloc */ false); +} + +void DMDFuncs::ReportOnAlloc(const void* aPtr) { + ReportHelper(aPtr, /* onAlloc */ true); +} + +//--------------------------------------------------------------------------- +// DMD output +//--------------------------------------------------------------------------- + +// The version number of the output format. Increment this if you make +// backwards-incompatible changes to the format. See DMD.h for the version +// history. +static const int kOutputVersionNumber = 5; + +// Note that, unlike most SizeOf* functions, this function does not take a +// |mozilla::MallocSizeOf| argument. That's because those arguments are +// primarily to aid DMD track heap blocks... but DMD deliberately doesn't track +// heap blocks it allocated for itself! +// +// SizeOfInternal should be called while you're holding the state lock and +// while intercepts are blocked; SizeOf acquires the lock and blocks +// intercepts. + +static void SizeOfInternal(Sizes* aSizes) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + aSizes->Clear(); + + StackTraceSet usedStackTraces; + GatherUsedStackTraces(usedStackTraces); + + for (auto iter = gStackTraceTable->iter(); !iter.done(); iter.next()) { + StackTrace* const& st = iter.get(); + + if (usedStackTraces.has(st)) { + aSizes->mStackTracesUsed += MallocSizeOf(st); + } else { + aSizes->mStackTracesUnused += MallocSizeOf(st); + } + } + + aSizes->mStackTraceTable = + gStackTraceTable->shallowSizeOfIncludingThis(MallocSizeOf); + + aSizes->mLiveBlockTable = + gLiveBlockTable->shallowSizeOfIncludingThis(MallocSizeOf); + + aSizes->mDeadBlockTable = + gDeadBlockTable->shallowSizeOfIncludingThis(MallocSizeOf); +} + +void DMDFuncs::SizeOf(Sizes* aSizes) { + aSizes->Clear(); + + AutoBlockIntercepts block(Thread::Fetch()); + AutoLockState lock; + SizeOfInternal(aSizes); +} + +void DMDFuncs::ClearReports() { + if (!gOptions->IsDarkMatterMode()) { + return; + } + + AutoLockState lock; + + // Unreport all blocks that were marked reported by a memory reporter. This + // excludes those that were reported on allocation, because they need to keep + // their reported marking. + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().UnreportIfNotReportedOnAlloc(); + } +} + +class ToIdStringConverter final { + public: + ToIdStringConverter() : mIdMap(512), mNextId(0) {} + + // Converts a pointer to a unique ID. Reuses the existing ID for the pointer + // if it's been seen before. + const char* ToIdString(const void* aPtr) { + uint32_t id; + PointerIdMap::AddPtr p = mIdMap.lookupForAdd(aPtr); + if (!p) { + id = mNextId++; + MOZ_ALWAYS_TRUE(mIdMap.add(p, aPtr, id)); + } else { + id = p->value(); + } + return Base32(id); + } + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const { + return mIdMap.shallowSizeOfExcludingThis(aMallocSizeOf); + } + + private: + // This function converts an integer to base-32. We use base-32 values for + // indexing into the traceTable and the frameTable, for the following reasons. + // + // - Base-32 gives more compact indices than base-16. + // + // - 32 is a power-of-two, which makes the necessary div/mod calculations + // fast. + // + // - We can (and do) choose non-numeric digits for base-32. When + // inspecting/debugging the JSON output, non-numeric indices are easier to + // search for than numeric indices. + // + char* Base32(uint32_t aN) { + static const char digits[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"; + + char* b = mIdBuf + kIdBufLen - 1; + *b = '\0'; + do { + b--; + if (b == mIdBuf) { + MOZ_CRASH("Base32 buffer too small"); + } + *b = digits[aN % 32]; + aN /= 32; + } while (aN); + + return b; + } + + PointerIdMap mIdMap; + uint32_t mNextId; + + // |mIdBuf| must have space for at least eight chars, which is the space + // needed to hold 'Dffffff' (including the terminating null char), which is + // the base-32 representation of 0xffffffff. + static const size_t kIdBufLen = 16; + char mIdBuf[kIdBufLen]; +}; + +// Helper class for converting a pointer value to a string. +class ToStringConverter { + public: + const char* ToPtrString(const void* aPtr) { + snprintf(kPtrBuf, sizeof(kPtrBuf) - 1, "%" PRIxPTR, (uintptr_t)aPtr); + return kPtrBuf; + } + + private: + char kPtrBuf[32]; +}; + +static void WriteBlockContents(JSONWriter& aWriter, const LiveBlock& aBlock) { + size_t numWords = aBlock.ReqSize() / sizeof(uintptr_t*); + if (numWords == 0) { + return; + } + + aWriter.StartArrayProperty("contents", aWriter.SingleLineStyle); + { + const uintptr_t** block = (const uintptr_t**)aBlock.Address(); + ToStringConverter sc; + for (size_t i = 0; i < numWords; ++i) { + aWriter.StringElement(MakeStringSpan(sc.ToPtrString(block[i]))); + } + } + aWriter.EndArray(); +} + +static void AnalyzeImpl(UniquePtr<JSONWriteFunc> aWriter) { + // Some blocks may have been allocated while creating |aWriter|. Those blocks + // will be freed at the end of this function when |write| is destroyed. The + // allocations will have occurred while intercepts were not blocked, so the + // frees better be as well, otherwise we'll get assertion failures. + // Therefore, this declaration must precede the AutoBlockIntercepts + // declaration, to ensure that |write| is destroyed *after* intercepts are + // unblocked. + JSONWriter writer(std::move(aWriter)); + + AutoBlockIntercepts block(Thread::Fetch()); + AutoLockState lock; + + // Allocate this on the heap instead of the stack because it's fairly large. + auto locService = InfallibleAllocPolicy::new_<CodeAddressService>(); + + StackTraceSet usedStackTraces(512); + PointerSet usedPcs(512); + + size_t iscSize; + + static int analysisCount = 1; + StatusMsg("Dump %d {\n", analysisCount++); + + writer.Start(); + { + writer.IntProperty("version", kOutputVersionNumber); + + writer.StartObjectProperty("invocation"); + { + const char* var = gOptions->DMDEnvVar(); + if (var) { + writer.StringProperty("dmdEnvVar", MakeStringSpan(var)); + } else { + writer.NullProperty("dmdEnvVar"); + } + + writer.StringProperty("mode", MakeStringSpan(gOptions->ModeString())); + } + writer.EndObject(); + + StatusMsg(" Constructing the heap block list...\n"); + + ToIdStringConverter isc; + ToStringConverter sc; + + writer.StartArrayProperty("blockList"); + { + // Lambda that writes out a live block. + auto writeLiveBlock = [&](const LiveBlock& aB, size_t aNum) { + aB.AddStackTracesToTable(usedStackTraces); + + MOZ_ASSERT_IF(gOptions->IsScanMode(), aNum == 1); + + writer.StartObjectElement(writer.SingleLineStyle); + { + if (gOptions->IsScanMode()) { + writer.StringProperty("addr", + MakeStringSpan(sc.ToPtrString(aB.Address()))); + WriteBlockContents(writer, aB); + } + writer.IntProperty("req", aB.ReqSize()); + if (aB.SlopSize() > 0) { + writer.IntProperty("slop", aB.SlopSize()); + } + + if (aB.AllocStackTrace()) { + writer.StringProperty( + "alloc", MakeStringSpan(isc.ToIdString(aB.AllocStackTrace()))); + } + + if (gOptions->IsDarkMatterMode() && aB.NumReports() > 0) { + writer.StartArrayProperty("reps"); + { + if (aB.ReportStackTrace1()) { + writer.StringElement( + MakeStringSpan(isc.ToIdString(aB.ReportStackTrace1()))); + } + if (aB.ReportStackTrace2()) { + writer.StringElement( + MakeStringSpan(isc.ToIdString(aB.ReportStackTrace2()))); + } + } + writer.EndArray(); + } + + if (aNum > 1) { + writer.IntProperty("num", aNum); + } + } + writer.EndObject(); + }; + + // Live blocks. + if (!gOptions->IsScanMode()) { + // At this point we typically have many LiveBlocks that differ only in + // their address. Aggregate them to reduce the size of the output file. + AggregatedLiveBlockTable agg(8192); + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + const LiveBlock& b = iter.get(); + b.AddStackTracesToTable(usedStackTraces); + + if (AggregatedLiveBlockTable::AddPtr p = agg.lookupForAdd(&b)) { + p->value() += 1; + } else { + MOZ_ALWAYS_TRUE(agg.add(p, &b, 1)); + } + } + + // Now iterate over the aggregated table. + for (auto iter = agg.iter(); !iter.done(); iter.next()) { + const LiveBlock& b = *iter.get().key(); + size_t num = iter.get().value(); + writeLiveBlock(b, num); + } + + } else { + // In scan mode we cannot aggregate because we print each live block's + // address and contents. + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + const LiveBlock& b = iter.get(); + b.AddStackTracesToTable(usedStackTraces); + + writeLiveBlock(b, 1); + } + } + + // Dead blocks. + for (auto iter = gDeadBlockTable->iter(); !iter.done(); iter.next()) { + const DeadBlock& b = iter.get().key(); + b.AddStackTracesToTable(usedStackTraces); + + size_t num = iter.get().value(); + MOZ_ASSERT(num > 0); + + writer.StartObjectElement(writer.SingleLineStyle); + { + writer.IntProperty("req", b.ReqSize()); + if (b.SlopSize() > 0) { + writer.IntProperty("slop", b.SlopSize()); + } + if (b.AllocStackTrace()) { + writer.StringProperty( + "alloc", MakeStringSpan(isc.ToIdString(b.AllocStackTrace()))); + } + + if (num > 1) { + writer.IntProperty("num", num); + } + } + writer.EndObject(); + } + } + writer.EndArray(); + + StatusMsg(" Constructing the stack trace table...\n"); + + writer.StartObjectProperty("traceTable"); + { + for (auto iter = usedStackTraces.iter(); !iter.done(); iter.next()) { + const StackTrace* const st = iter.get(); + writer.StartArrayProperty(MakeStringSpan(isc.ToIdString(st)), + writer.SingleLineStyle); + { + for (uint32_t i = 0; i < st->Length(); i++) { + const void* pc = st->Pc(i); + writer.StringElement(MakeStringSpan(isc.ToIdString(pc))); + MOZ_ALWAYS_TRUE(usedPcs.put(pc)); + } + } + writer.EndArray(); + } + } + writer.EndObject(); + + StatusMsg(" Constructing the stack frame table...\n"); + + writer.StartObjectProperty("frameTable"); + { + static const size_t locBufLen = 1024; + char locBuf[locBufLen]; + + for (auto iter = usedPcs.iter(); !iter.done(); iter.next()) { + const void* const pc = iter.get(); + + // Use 0 for the frame number. See the JSON format description comment + // in DMD.h to understand why. + locService->GetLocation(0, pc, locBuf, locBufLen); + writer.StringProperty(MakeStringSpan(isc.ToIdString(pc)), + MakeStringSpan(locBuf)); + } + } + writer.EndObject(); + + iscSize = isc.sizeOfExcludingThis(MallocSizeOf); + } + writer.End(); + + if (gOptions->ShowDumpStats()) { + Sizes sizes; + SizeOfInternal(&sizes); + + static const size_t kBufLen = 64; + char buf1[kBufLen]; + char buf2[kBufLen]; + char buf3[kBufLen]; + + StatusMsg(" Execution measurements {\n"); + + StatusMsg(" Data structures that persist after Dump() ends {\n"); + + StatusMsg(" Used stack traces: %10s bytes\n", + Show(sizes.mStackTracesUsed, buf1, kBufLen)); + + StatusMsg(" Unused stack traces: %10s bytes\n", + Show(sizes.mStackTracesUnused, buf1, kBufLen)); + + StatusMsg(" Stack trace table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mStackTraceTable, buf1, kBufLen), + Show(gStackTraceTable->capacity(), buf2, kBufLen), + Show(gStackTraceTable->count(), buf3, kBufLen)); + + StatusMsg(" Live block table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mLiveBlockTable, buf1, kBufLen), + Show(gLiveBlockTable->capacity(), buf2, kBufLen), + Show(gLiveBlockTable->count(), buf3, kBufLen)); + + StatusMsg(" Dead block table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mDeadBlockTable, buf1, kBufLen), + Show(gDeadBlockTable->capacity(), buf2, kBufLen), + Show(gDeadBlockTable->count(), buf3, kBufLen)); + + StatusMsg(" }\n"); + StatusMsg(" Data structures that are destroyed after Dump() ends {\n"); + + StatusMsg( + " Location service: %10s bytes\n", + Show(locService->SizeOfIncludingThis(MallocSizeOf), buf1, kBufLen)); + StatusMsg(" Used stack traces set: %10s bytes\n", + Show(usedStackTraces.shallowSizeOfExcludingThis(MallocSizeOf), + buf1, kBufLen)); + StatusMsg( + " Used PCs set: %10s bytes\n", + Show(usedPcs.shallowSizeOfExcludingThis(MallocSizeOf), buf1, kBufLen)); + StatusMsg(" Pointer ID map: %10s bytes\n", + Show(iscSize, buf1, kBufLen)); + + StatusMsg(" }\n"); + StatusMsg(" Counts {\n"); + + size_t hits = locService->NumCacheHits(); + size_t misses = locService->NumCacheMisses(); + size_t requests = hits + misses; + StatusMsg(" Location service: %10s requests\n", + Show(requests, buf1, kBufLen)); + + size_t count = locService->CacheCount(); + size_t capacity = locService->CacheCapacity(); + StatusMsg( + " Location service cache: " + "%4.1f%% hit rate, %.1f%% occupancy at end\n", + Percent(hits, requests), Percent(count, capacity)); + + StatusMsg(" }\n"); + StatusMsg(" }\n"); + } + + InfallibleAllocPolicy::delete_(locService); + + StatusMsg("}\n"); +} + +void DMDFuncs::Analyze(UniquePtr<JSONWriteFunc> aWriter) { + AnalyzeImpl(std::move(aWriter)); + ClearReports(); +} + +//--------------------------------------------------------------------------- +// Testing +//--------------------------------------------------------------------------- + +void DMDFuncs::ResetEverything(const char* aOptions) { + AutoLockState lock; + + // Reset options. + InfallibleAllocPolicy::delete_(gOptions); + gOptions = InfallibleAllocPolicy::new_<Options>(aOptions); + + // Clear all existing blocks. + gLiveBlockTable->clear(); + gDeadBlockTable->clear(); + + // Reset gBernoulli to a deterministic state. (Its current state depends on + // all previous trials.) + ResetBernoulli(); +} + +} // namespace dmd +} // namespace mozilla diff --git a/memory/replace/dmd/DMD.h b/memory/replace/dmd/DMD.h new file mode 100644 index 0000000000..c057047800 --- /dev/null +++ b/memory/replace/dmd/DMD.h @@ -0,0 +1,291 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DMD_h___ +#define DMD_h___ + +#include <stdarg.h> +#include <string.h> + +#include <utility> + +#include "mozilla/DebugOnly.h" +#include "mozilla/Types.h" +#include "mozilla/UniquePtr.h" +#include "replace_malloc_bridge.h" + +namespace mozilla { + +class JSONWriteFunc; + +namespace dmd { + +struct Sizes { + size_t mStackTracesUsed; + size_t mStackTracesUnused; + size_t mStackTraceTable; + size_t mLiveBlockTable; + size_t mDeadBlockTable; + + Sizes() { Clear(); } + void Clear() { memset(this, 0, sizeof(Sizes)); } +}; + +// See further below for a description of each method. The DMDFuncs class +// should contain a virtual method for each of them (except IsRunning, +// which can be inferred from the DMDFuncs singleton existing). +struct DMDFuncs { + virtual void Report(const void*); + + virtual void ReportOnAlloc(const void*); + + virtual void ClearReports(); + + virtual void Analyze(UniquePtr<JSONWriteFunc>); + + virtual void SizeOf(Sizes*); + + virtual void StatusMsg(const char*, va_list) MOZ_FORMAT_PRINTF(2, 0); + + virtual void ResetEverything(const char*); + +#ifndef REPLACE_MALLOC_IMPL + // We deliberately don't use ReplaceMalloc::GetDMDFuncs here, because if we + // did, the following would happen. + // - The code footprint of each call to Get() larger as GetDMDFuncs ends + // up inlined. + // - When no replace-malloc library is loaded, the number of instructions + // executed is equivalent, but don't necessarily fit in the same cache + // line. + // - When a non-DMD replace-malloc library is loaded, the overhead is + // higher because there is first a check for the replace malloc bridge + // and then for the DMDFuncs singleton. + // Initializing the DMDFuncs singleton on the first access makes the + // overhead even worse. Either Get() is inlined and massive, or it isn't + // and a simple value check becomes a function call. + static DMDFuncs* Get() { return sSingleton.Get(); } + + private: + // Wrapper class keeping a pointer to the DMD functions. It is statically + // initialized because it needs to be set early enough. + // Debug builds also check that it's never accessed before the static + // initialization actually occured, which could be the case if some other + // static initializer ended up calling into DMD. + class Singleton { + public: + Singleton() + : mValue(ReplaceMalloc::GetDMDFuncs()) +# ifdef DEBUG + , + mInitialized(true) +# endif + { + } + + DMDFuncs* Get() { + MOZ_ASSERT(mInitialized); + return mValue; + } + + private: + DMDFuncs* mValue; +# ifdef DEBUG + bool mInitialized; +# endif + }; + + // This singleton pointer must be defined on the program side. In Gecko, + // this is done in xpcom/base/nsMemoryInfoDumper.cpp. + static /* DMDFuncs:: */ Singleton sSingleton; +#endif +}; + +#ifndef REPLACE_MALLOC_IMPL +// Mark a heap block as reported by a memory reporter. +inline void Report(const void* aPtr) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->Report(aPtr); + } +} + +// Mark a heap block as reported immediately on allocation. +inline void ReportOnAlloc(const void* aPtr) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ReportOnAlloc(aPtr); + } +} + +// Clears existing reportedness data from any prior runs of the memory +// reporters. The following sequence should be used. +// - ClearReports() +// - run the memory reporters +// - Analyze() +// This sequence avoids spurious twice-reported warnings. +inline void ClearReports() { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ClearReports(); + } +} + +// Determines which heap blocks have been reported, and dumps JSON output +// (via |aWriter|) describing the heap. +// +// The following sample output contains comments that explain the format and +// design choices. The output files can be quite large, so a number of +// decisions were made to minimize size, such as using short property names and +// omitting properties whenever possible. +// +// { +// // The version number of the format, which will be incremented each time +// // backwards-incompatible changes are made. A mandatory integer. +// // +// // Version history: +// // - 1: Bug 1044709 +// // - 2: Bug 1094552 +// // - 3: Bug 1100851 +// // - 4: Bug 1121830 +// // - 5: Bug 1253512 +// "version": 5, +// +// // Information about how DMD was invoked. A mandatory object. +// "invocation": { +// // The contents of the $DMD environment variable. A string, or |null| if +// // $DMD is undefined. +// "dmdEnvVar": "--mode=dark-matter", +// +// // The profiling mode. A mandatory string taking one of the following +// // values: "live", "dark-matter", "cumulative", "scan". +// "mode": "dark-matter", +// }, +// +// // Details of all analyzed heap blocks. A mandatory array. +// "blockList": [ +// // An example of a heap block. +// { +// // Requested size, in bytes. This is a mandatory integer. +// "req": 3584, +// +// // Requested slop size, in bytes. This is mandatory if it is non-zero, +// // but omitted otherwise. +// "slop": 512, +// +// // The stack trace at which the block was allocated. An optional +// // string that indexes into the "traceTable" object. If omitted, no +// // allocation stack trace was recorded for the block. +// "alloc": "A", +// +// // One or more stack traces at which this heap block was reported by a +// // memory reporter. An optional array that will only be present in +// // "dark-matter" mode. The elements are strings that index into +// // the "traceTable" object. +// "reps": ["B"] +// +// // The number of heap blocks with exactly the above properties. This +// // is mandatory if it is greater than one, but omitted otherwise. +// // (Blocks with identical properties don't have to be aggregated via +// // this property, but it can greatly reduce output file size.) +// "num": 5, +// +// // The address of the block. This is mandatory in "scan" mode, but +// // omitted otherwise. +// "addr": "4e4e4e4e", +// +// // The contents of the block, read one word at a time. This is +// // mandatory in "scan" mode for blocks at least one word long, but +// // omitted otherwise. +// "contents": ["0", "6", "7f7f7f7f", "0"] +// } +// ], +// +// // The stack traces referenced by elements of the "blockList" array. This +// // could be an array, but making it an object makes it easier to see +// // which stacks correspond to which references in the "blockList" array. +// "traceTable": { +// // Each property corresponds to a stack trace mentioned in the "blocks" +// // object. Each element is an index into the "frameTable" object. +// "A": ["D", "E"], +// "B": ["F", "G"] +// }, +// +// // The stack frames referenced by the "traceTable" object. The +// // descriptions can be quite long, so they are stored separately from the +// // "traceTable" object so that each one only has to be written once. +// // This could also be an array, but again, making it an object makes it +// // easier to see which frames correspond to which references in the +// // "traceTable" object. +// "frameTable": { +// // Each property key is a frame key mentioned in the "traceTable" object. +// // Each property value is a string containing a frame description. Each +// // frame description must be in a format recognized by `fix_stacks.py`, +// // which requires a frame number at the start. Because each stack frame +// // description in this table can be shared between multiple stack +// // traces, we use a dummy value of #00. The proper frame number can be +// // reconstructed later by scripts that output stack traces in a +// // conventional non-shared format. +// "D": "#00: foo (Foo.cpp:123)", +// "E": "#00: bar (Bar.cpp:234)", +// "F": "#00: baz (Baz.cpp:345)", +// "G": "#00: quux (Quux.cpp:456)" +// } +// } +// +// Implementation note: normally, this function wouldn't be templated, but in +// that case, the function is compiled, which makes the destructor for the +// UniquePtr fire up, and that needs JSONWriteFunc to be fully defined. That, +// in turn, requires to include JSONWriter.h, which includes +// double-conversion.h, which ends up breaking various things built with +// -Werror for various reasons. +// +template <typename JSONWriteFunc> +inline void Analyze(UniquePtr<JSONWriteFunc> aWriteFunc) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->Analyze(std::move(aWriteFunc)); + } +} + +// Gets the size of various data structures. Used to implement a memory +// reporter for DMD. +inline void SizeOf(Sizes* aSizes) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->SizeOf(aSizes); + } +} + +// Prints a status message prefixed with "DMD[<pid>]". Use sparingly. +MOZ_FORMAT_PRINTF(1, 2) +inline void StatusMsg(const char* aFmt, ...) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + va_list ap; + va_start(ap, aFmt); + funcs->StatusMsg(aFmt, ap); + va_end(ap); + } +} + +// Indicates whether or not DMD is running. +inline bool IsRunning() { return !!DMDFuncs::Get(); } + +// Resets all DMD options and then sets new ones according to those specified +// in |aOptions|. Also clears all recorded data about allocations. Only used +// for testing purposes. +inline void ResetEverything(const char* aOptions) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ResetEverything(aOptions); + } +} +#endif + +} // namespace dmd +} // namespace mozilla + +#endif /* DMD_h___ */ diff --git a/memory/replace/dmd/README b/memory/replace/dmd/README new file mode 100644 index 0000000000..537893358a --- /dev/null +++ b/memory/replace/dmd/README @@ -0,0 +1,3 @@ +This is DMD. See +https://firefox-source-docs.mozilla.org/performance/memory/dmd.html for +details on how to use it. diff --git a/memory/replace/dmd/block_analyzer.py b/memory/replace/dmd/block_analyzer.py new file mode 100644 index 0000000000..1f907b38a7 --- /dev/null +++ b/memory/replace/dmd/block_analyzer.py @@ -0,0 +1,292 @@ +#!/usr/bin/env python3 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# From a scan mode DMD log, extract some information about a +# particular block, such as its allocation stack or which other blocks +# contain pointers to it. This can be useful when investigating leaks +# caused by unknown references to refcounted objects. + +import argparse +import gzip +import json +import re +import sys + +# The DMD output version this script handles. +outputVersion = 5 + +# If --ignore-alloc-fns is specified, stack frames containing functions that +# match these strings will be removed from the *start* of stack traces. (Once +# we hit a non-matching frame, any subsequent frames won't be removed even if +# they do match.) +allocatorFns = [ + "malloc (", + "replace_malloc", + "replace_calloc", + "replace_realloc", + "replace_memalign", + "replace_posix_memalign", + "malloc_zone_malloc", + "moz_xmalloc", + "moz_xcalloc", + "moz_xrealloc", + "operator new(", + "operator new[](", + "g_malloc", + "g_slice_alloc", + "callocCanGC", + "reallocCanGC", + "vpx_malloc", + "vpx_calloc", + "vpx_realloc", + "vpx_memalign", + "js_malloc", + "js_calloc", + "js_realloc", + "pod_malloc", + "pod_calloc", + "pod_realloc", + "nsTArrayInfallibleAllocator::Malloc", + "Allocator<ReplaceMallocBase>::malloc(", + "mozilla::dmd::StackTrace::Get(", + "mozilla::dmd::AllocCallback(", + "mozilla::dom::DOMArena::Allocate(", + # This one necessary to fully filter some sequences of allocation functions + # that happen in practice. Note that ??? entries that follow non-allocation + # functions won't be stripped, as explained above. + "???", +] + +#### + +# Command line arguments + + +def range_1_24(string): + value = int(string) + if value < 1 or value > 24: + msg = "{:s} is not in the range 1..24".format(string) + raise argparse.ArgumentTypeError(msg) + return value + + +parser = argparse.ArgumentParser( + description="Analyze the heap graph to find out things about an object. \ +By default this prints out information about blocks that point to the given block." +) + +parser.add_argument("dmd_log_file_name", help="clamped DMD log file name") + +parser.add_argument("block", help="address of the block of interest") + +parser.add_argument( + "--info", + dest="info", + action="store_true", + default=False, + help="Print out information about the block.", +) + +parser.add_argument( + "-sfl", + "--max-stack-frame-length", + type=int, + default=300, + help="Maximum number of characters to print from each stack frame", +) + +parser.add_argument( + "-a", + "--ignore-alloc-fns", + action="store_true", + help="ignore allocation functions at the start of traces", +) + +parser.add_argument( + "-f", + "--max-frames", + type=range_1_24, + default=8, + help="maximum number of frames to consider in each trace", +) + +parser.add_argument( + "-c", + "--chain-reports", + action="store_true", + help="if only one block is found to hold onto the object, report " + "the next one, too", +) + + +#### + + +class BlockData: + def __init__(self, json_block): + self.addr = json_block["addr"] + + if "contents" in json_block: + contents = json_block["contents"] + else: + contents = [] + self.contents = [] + for c in contents: + self.contents.append(int(c, 16)) + + self.req_size = json_block["req"] + + self.alloc_stack = json_block["alloc"] + + +def print_trace_segment(args, stacks, block): + (traceTable, frameTable) = stacks + + for l in traceTable[block.alloc_stack]: + # The 5: is to remove the bogus leading "#00: " from the stack frame. + print(" " + frameTable[l][5 : args.max_stack_frame_length]) + + +def show_referrers(args, blocks, stacks, block): + visited = set([]) + + anyFound = False + + while True: + referrers = {} + + for b, data in blocks.items(): + which_edge = 0 + for e in data.contents: + if e == block: + # 8 is the number of bytes per word on a 64-bit system. + # XXX This means that this output will be wrong for logs from 32-bit systems! + referrers.setdefault(b, []).append(8 * which_edge) + anyFound = True + which_edge += 1 + + for r in referrers: + sys.stdout.write( + "0x{} size = {} bytes".format(blocks[r].addr, blocks[r].req_size) + ) + plural = "s" if len(referrers[r]) > 1 else "" + print( + " at byte offset" + + plural + + " " + + (", ".join(str(x) for x in referrers[r])) + ) + print_trace_segment(args, stacks, blocks[r]) + print("") + + if args.chain_reports: + if len(referrers) == 0: + sys.stdout.write("Found no more referrers.\n") + break + if len(referrers) > 1: + sys.stdout.write("Found too many referrers.\n") + break + + sys.stdout.write("Chaining to next referrer.\n\n") + for r in referrers: + block = r + if block in visited: + sys.stdout.write("Found a loop.\n") + break + visited.add(block) + else: + break + + if not anyFound: + print("No referrers found.") + + +def show_block_info(args, blocks, stacks, block): + b = blocks[block] + sys.stdout.write("block: 0x{}\n".format(b.addr)) + sys.stdout.write("requested size: {} bytes\n".format(b.req_size)) + sys.stdout.write("\n") + sys.stdout.write("block contents: ") + for c in b.contents: + v = "0" if c == 0 else blocks[c].addr + sys.stdout.write("0x{} ".format(v)) + sys.stdout.write("\n\n") + sys.stdout.write("allocation stack:\n") + print_trace_segment(args, stacks, b) + return + + +def cleanupTraceTable(args, frameTable, traceTable): + # Remove allocation functions at the start of traces. + if args.ignore_alloc_fns: + # Build a regexp that matches every function in allocatorFns. + escapedAllocatorFns = map(re.escape, allocatorFns) + fn_re = re.compile("|".join(escapedAllocatorFns)) + + # Remove allocator fns from each stack trace. + for traceKey, frameKeys in traceTable.items(): + numSkippedFrames = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if re.search(fn_re, frameDesc): + numSkippedFrames += 1 + else: + break + if numSkippedFrames > 0: + traceTable[traceKey] = frameKeys[numSkippedFrames:] + + # Trim the number of frames. + for traceKey, frameKeys in traceTable.items(): + if len(frameKeys) > args.max_frames: + traceTable[traceKey] = frameKeys[: args.max_frames] + + +def loadGraph(options): + # Handle gzipped input if necessary. + isZipped = options.dmd_log_file_name.endswith(".gz") + opener = gzip.open if isZipped else open + + with opener(options.dmd_log_file_name, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + block_list = j["blockList"] + blocks = {} + + for json_block in block_list: + blocks[int(json_block["addr"], 16)] = BlockData(json_block) + + traceTable = j["traceTable"] + frameTable = j["frameTable"] + + cleanupTraceTable(options, frameTable, traceTable) + + return (blocks, (traceTable, frameTable)) + + +def analyzeLogs(): + options = parser.parse_args() + + (blocks, stacks) = loadGraph(options) + + block = int(options.block, 16) + + if block not in blocks: + print("Object " + options.block + " not found in traces.") + print("It could still be the target of some nodes.") + return + + if options.info: + show_block_info(options, blocks, stacks, block) + return + + show_referrers(options, blocks, stacks, block) + + +if __name__ == "__main__": + analyzeLogs() diff --git a/memory/replace/dmd/dmd.py b/memory/replace/dmd/dmd.py new file mode 100755 index 0000000000..3e20d8d6bd --- /dev/null +++ b/memory/replace/dmd/dmd.py @@ -0,0 +1,1028 @@ +#! /usr/bin/env python3 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""This script analyzes a JSON file emitted by DMD.""" + +import argparse +import collections +import gzip +import io +import json +import os +import platform +import re +import shutil +import sys +import tempfile +from bisect import bisect_right +from functools import cmp_to_key +from typing import Callable + +# The DMD output version this script handles. +outputVersion = 5 + +# If --ignore-alloc-fns is specified, stack frames containing functions that +# match these strings will be removed from the *start* of stack traces. (Once +# we hit a non-matching frame, any subsequent frames won't be removed even if +# they do match.) +allocatorFns = [ + # Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc, + # pod_malloc, malloc_zone_*, g_malloc. + "malloc", + # Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc, + # pod_calloc, malloc_zone_calloc, pod_callocCanGC. + "calloc", + # Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc, + # pod_realloc, pod_reallocCanGC. + "realloc", + # Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign, + # moz_xmemalign, vpx_memalign, malloc_zone_memalign. + "memalign", + "operator new(", + "operator new[](", + "g_slice_alloc", + # This one is necessary to fully filter some sequences of allocation + # functions that happen in practice. Note that ??? entries that follow + # non-allocation functions won't be stripped, as explained above. + "???", + # Match DMD internals. + "mozilla::dmd::AllocCallback", + "mozilla::dmd::StackTrace::Get", +] + + +def cmp(a, b): + return (a > b) - (a < b) + + +class Record(object): + """A record is an aggregation of heap blocks that have identical stack + traces. It can also be used to represent the difference between two + records.""" + + def __init__(self): + self.numBlocks = 0 + self.reqSize = 0 + self.slopSize = 0 + self.usableSize = 0 + self.allocatedAtDesc = None + self.reportedAtDescs = [] + self.usableSizes = collections.defaultdict(int) + + def isZero(self, args): + return ( + self.numBlocks == 0 + and self.reqSize == 0 + and self.slopSize == 0 + and self.usableSize == 0 + and len(self.usableSizes) == 0 + ) + + def negate(self): + self.numBlocks = -self.numBlocks + self.reqSize = -self.reqSize + self.slopSize = -self.slopSize + self.usableSize = -self.usableSize + + negatedUsableSizes = collections.defaultdict(int) + for usableSize, count in self.usableSizes.items(): + negatedUsableSizes[-usableSize] = count + self.usableSizes = negatedUsableSizes + + def subtract(self, r): + # We should only be calling this on records with matching stack traces. + # Check this. + assert self.allocatedAtDesc == r.allocatedAtDesc + assert self.reportedAtDescs == r.reportedAtDescs + + self.numBlocks -= r.numBlocks + self.reqSize -= r.reqSize + self.slopSize -= r.slopSize + self.usableSize -= r.usableSize + + usableSizes1 = self.usableSizes + usableSizes2 = r.usableSizes + usableSizes3 = collections.defaultdict(int) + for usableSize in usableSizes1: + counts1 = usableSizes1[usableSize] + if usableSize in usableSizes2: + counts2 = usableSizes2[usableSize] + del usableSizes2[usableSize] + counts3 = counts1 - counts2 + if counts3 != 0: + if counts3 < 0: + usableSize = -usableSize + counts3 = -counts3 + usableSizes3[usableSize] = counts3 + else: + usableSizes3[usableSize] = counts1 + + for usableSize in usableSizes2: + usableSizes3[-usableSize] = usableSizes2[usableSize] + + self.usableSizes = usableSizes3 + + @staticmethod + def cmpByUsableSize(r1, r2): + # Sort by usable size, then by req size. + return cmp(abs(r1.usableSize), abs(r2.usableSize)) or Record.cmpByReqSize( + r1, r2 + ) + + @staticmethod + def cmpByReqSize(r1, r2): + # Sort by req size. + return cmp(abs(r1.reqSize), abs(r2.reqSize)) + + @staticmethod + def cmpBySlopSize(r1, r2): + # Sort by slop size. + return cmp(abs(r1.slopSize), abs(r2.slopSize)) + + @staticmethod + def cmpByNumBlocks(r1, r2): + # Sort by block counts, then by usable size. + return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or Record.cmpByUsableSize( + r1, r2 + ) + + +sortByChoices = { + "usable": Record.cmpByUsableSize, # the default + "req": Record.cmpByReqSize, + "slop": Record.cmpBySlopSize, + "num-blocks": Record.cmpByNumBlocks, +} + + +def parseCommandLine(): + # 24 is the maximum number of frames that DMD will produce. + def range_1_24(string): + value = int(string) + if value < 1 or value > 24: + msg = "{:s} is not in the range 1..24".format(string) + raise argparse.ArgumentTypeError(msg) + return value + + description = """ +Analyze heap data produced by DMD. +If one file is specified, analyze it; if two files are specified, analyze the +difference. +Input files can be gzipped. +Write to stdout unless -o/--output is specified. +Stack traces are fixed to show function names, filenames and line numbers +unless --no-fix-stacks is specified; stack fixing modifies the original file +and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment +variable is used to find breakpad symbols for stack fixing. +""" + p = argparse.ArgumentParser(description=description) + + p.add_argument( + "-o", + "--output", + type=argparse.FileType("w"), + help="output file; stdout if unspecified", + ) + + p.add_argument( + "-f", + "--max-frames", + type=range_1_24, + default=8, + help="maximum number of frames to consider in each trace", + ) + + p.add_argument( + "-s", + "--sort-by", + choices=sortByChoices.keys(), + default="usable", + help="sort the records by a particular metric", + ) + + p.add_argument( + "-a", + "--ignore-alloc-fns", + action="store_true", + help="ignore allocation functions at the start of traces", + ) + + p.add_argument("--no-fix-stacks", action="store_true", help="do not fix stacks") + + p.add_argument( + "--clamp-contents", + action="store_true", + help="for a scan mode log, clamp addresses to the start of live blocks, " + "or zero if not in one", + ) + + p.add_argument( + "--print-clamp-stats", + action="store_true", + help="print information about the results of pointer clamping; mostly " + "useful for debugging clamping", + ) + + p.add_argument( + "--filter-stacks-for-testing", + action="store_true", + help="filter stack traces; only useful for testing purposes", + ) + + p.add_argument( + "--filter", + default=[], + action="append", + help="Only print entries that have a stack that matches the filter. " + "A filter may be negated by prefixing it with `!`. " + "If multiple filters are specified, all of them must match.", + ) + + p.add_argument("input_file", help="a file produced by DMD") + + p.add_argument( + "input_file2", + nargs="?", + help="a file produced by DMD; if present, it is diff'd with input_file", + ) + + return p.parse_args(sys.argv[1:]) + + +# Fix stacks if necessary: first write the output to a tempfile, then replace +# the original file with it. +def fixStackTraces(inputFilename, isZipped, opener): + # This append() call is needed to make the import statements work when this + # script is installed as a symlink. + sys.path.append(os.path.dirname(__file__)) + + bpsyms = os.environ.get("BREAKPAD_SYMBOLS_PATH", None) + sysname = platform.system() + if bpsyms and os.path.exists(bpsyms): + import fix_stacks as fixModule + + def fix(line): + return fixModule.fixSymbols(line, jsonMode=True, breakpadSymsDir=bpsyms) + + elif sysname in ("Linux", "Darwin", "Windows"): + import fix_stacks as fixModule + + def fix(line): + return fixModule.fixSymbols(line, jsonMode=True) + + else: + return + + # Fix stacks, writing output to a temporary file, and then overwrite the + # original file. + tmpFile = tempfile.NamedTemporaryFile(delete=False) + + # If the input is gzipped, then the output (written initially to |tmpFile|) + # should be gzipped as well. + # + # And we want to set its pre-gzipped filename to '' rather than the name of + # the temporary file, so that programs like the Unix 'file' utility don't + # say that it was called 'tmp6ozTxE' (or something like that) before it was + # zipped. So that explains the |filename=''| parameter. + # + # But setting the filename like that clobbers |tmpFile.name|, so we must + # get that now in order to move |tmpFile| at the end. + tmpFilename = tmpFile.name + if isZipped: + tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile, mode="wb") + + with opener(inputFilename, "rb") as inputFile: + for line in inputFile: + tmpFile.write(fix(line)) + + tmpFile.close() + + shutil.move(tmpFilename, inputFilename) + + +def getDigestFromFile(args, inputFile): + # Handle gzipped input if necessary. + isZipped = inputFile.endswith(".gz") + opener = gzip.open if isZipped else open + + # Fix stack traces unless otherwise instructed. + if not args.no_fix_stacks: + fixStackTraces(inputFile, isZipped, opener) + + if args.clamp_contents: + clampBlockList(args, inputFile, isZipped, opener) + + with opener(inputFile, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + # Extract the main parts of the JSON object. + invocation = j["invocation"] + dmdEnvVar = invocation["dmdEnvVar"] + mode = invocation["mode"] + blockList = j["blockList"] + traceTable = j["traceTable"] + frameTable = j["frameTable"] + + # Insert the necessary entries for unrecorded stack traces. Note that 'ut' + # and 'uf' will not overlap with any keys produced by DMD's + # ToIdStringConverter::Base32() function. + unrecordedTraceID = "ut" + unrecordedFrameID = "uf" + traceTable[unrecordedTraceID] = [unrecordedFrameID] + frameTable[ + unrecordedFrameID + ] = "#00: (no stack trace recorded due to --stacks=partial)" + + # For the purposes of this script, 'scan' behaves like 'live'. + if mode == "scan": + mode = "live" + + if mode not in ["live", "dark-matter", "cumulative"]: + raise Exception("bad 'mode' property: '{:s}'".format(mode)) + + # Remove allocation functions at the start of traces. + if args.ignore_alloc_fns: + # Build a regexp that matches every function in allocatorFns. + escapedAllocatorFns = map(re.escape, allocatorFns) + fn_re = re.compile("|".join(escapedAllocatorFns)) + + # Remove allocator fns from each stack trace. + for traceKey, frameKeys in traceTable.items(): + numSkippedFrames = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if re.search(fn_re, frameDesc): + numSkippedFrames += 1 + else: + break + if numSkippedFrames > 0: + traceTable[traceKey] = frameKeys[numSkippedFrames:] + + # Trim the number of frames. + for traceKey, frameKeys in traceTable.items(): + if len(frameKeys) > args.max_frames: + del frameKeys[args.max_frames :] + + def buildTraceDescription(traceTable, frameTable, traceKey): + frameKeys = traceTable[traceKey] + fmt = " #{:02d}{:}" + + if args.filter_stacks_for_testing: + # This option is used by `test_dmd.js`, which runs the code in + # `SmokeDMD.cpp`. When running that test, there is too much + # variation in the stack traces across different machines and + # platforms to do exact output matching. However, every stack trace + # should have at least three frames that contain `DMD` (in one of + # `DMD.cpp`, `SmokeDMD.cpp`, `SmokeDMD`, or `SmokeDMD.exe`). Some + # example frames from automation (where `..` indicates excised path + # segments): + # + # Linux debug, with stack fixing using breakpad syms: + # `#01: replace_realloc(void*, unsigned long) [../dmd/DMD.cpp:1110]` + # + # Linux opt, with native stack fixing: + # `#02: TestFull(char const*, int, char const*, int) (../dmd/test/SmokeDMD.cpp:165)` + # + # Mac opt, with native stack fixing: + # `#03: RunTests() (../build/tests/bin/SmokeDMD + 0x21f9)` + # + # Windows opt, with native stack fixing failing due to a missing PDB: + # `#04: ??? (..\\build\\tests\\bin\\SmokeDMD.exe + 0x1c58)` + # + # If we see three such frames, we replace the entire stack trace + # with a single, predictable frame. This imprecise matching will at + # least detect if stack fixing fails completely. + dmd_frame_matches = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if "DMD" in frameDesc: + dmd_frame_matches += 1 + if dmd_frame_matches >= 3: + return [fmt.format(1, ": ... DMD.cpp ...")] + + # The frame number is always '#00' (see DMD.h for why), so we have to + # replace that with the correct frame number. + desc = [] + for n, frameKey in enumerate(traceTable[traceKey], start=1): + desc.append(fmt.format(n, frameTable[frameKey][3:])) + return desc + + # Aggregate blocks into records. All sufficiently similar blocks go into a + # single record. + + if mode in ["live", "cumulative"]: + liveOrCumulativeRecords = collections.defaultdict(Record) + elif mode == "dark-matter": + unreportedRecords = collections.defaultdict(Record) + onceReportedRecords = collections.defaultdict(Record) + twiceReportedRecords = collections.defaultdict(Record) + + heapUsableSize = 0 + heapBlocks = 0 + + recordKeyPartCache = {} + + for block in blockList: + # For each block we compute a |recordKey|, and all blocks with the same + # |recordKey| are aggregated into a single record. The |recordKey| is + # derived from the block's 'alloc' and 'reps' (if present) stack + # traces. + # + # We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing + # traces for equality. We can't use trace keys or frame keys because + # they're not comparable across different DMD runs (which is relevant + # when doing diffs). + # + # Using frame descriptions also fits in with the stack trimming done + # for --max-frames, which requires that stack traces with common + # beginnings but different endings to be considered equivalent. E.g. if + # we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4] + # and we trim the final frame of each they should be considered + # equivalent because the untrimmed frame descriptions (D1 and D2) + # match. + # + # Having said all that, during a single invocation of dmd.py on a + # single DMD file, for a single frameKey value the record key will + # always be the same, and we might encounter it 1000s of times. So we + # cache prior results for speed. + def makeRecordKeyPart(traceKey): + if traceKey in recordKeyPartCache: + return recordKeyPartCache[traceKey] + + recordKeyPart = str( + list(map(lambda frameKey: frameTable[frameKey], traceTable[traceKey])) + ) + recordKeyPartCache[traceKey] = recordKeyPart + return recordKeyPart + + allocatedAtTraceKey = block.get("alloc", unrecordedTraceID) + if mode in ["live", "cumulative"]: + recordKey = makeRecordKeyPart(allocatedAtTraceKey) + records = liveOrCumulativeRecords + elif mode == "dark-matter": + recordKey = makeRecordKeyPart(allocatedAtTraceKey) + if "reps" in block: + reportedAtTraceKeys = block["reps"] + for reportedAtTraceKey in reportedAtTraceKeys: + recordKey += makeRecordKeyPart(reportedAtTraceKey) + if len(reportedAtTraceKeys) == 1: + records = onceReportedRecords + else: + records = twiceReportedRecords + else: + records = unreportedRecords + + record = records[recordKey] + + if "req" not in block: + raise Exception("'req' property missing in block'") + + reqSize = block["req"] + slopSize = block.get("slop", 0) + + if "num" in block: + num = block["num"] + else: + num = 1 + + usableSize = reqSize + slopSize + heapUsableSize += num * usableSize + heapBlocks += num + + record.numBlocks += num + record.reqSize += num * reqSize + record.slopSize += num * slopSize + record.usableSize += num * usableSize + if record.allocatedAtDesc is None: + record.allocatedAtDesc = buildTraceDescription( + traceTable, frameTable, allocatedAtTraceKey + ) + + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + if "reps" in block and record.reportedAtDescs == []: + + def f(k): + return buildTraceDescription(traceTable, frameTable, k) + + record.reportedAtDescs = list(map(f, reportedAtTraceKeys)) + record.usableSizes[usableSize] += num + + # All the processed data for a single DMD file is called a "digest". + digest = {} + digest["dmdEnvVar"] = dmdEnvVar + digest["mode"] = mode + digest["heapUsableSize"] = heapUsableSize + digest["heapBlocks"] = heapBlocks + if mode in ["live", "cumulative"]: + digest["liveOrCumulativeRecords"] = liveOrCumulativeRecords + elif mode == "dark-matter": + digest["unreportedRecords"] = unreportedRecords + digest["onceReportedRecords"] = onceReportedRecords + digest["twiceReportedRecords"] = twiceReportedRecords + return digest + + +def diffRecords(args, records1, records2): + records3 = {} + + # Process records1. + for k in records1: + r1 = records1[k] + if k in records2: + # This record is present in both records1 and records2. + r2 = records2[k] + del records2[k] + r2.subtract(r1) + if not r2.isZero(args): + records3[k] = r2 + else: + # This record is present only in records1. + r1.negate() + records3[k] = r1 + + for k in records2: + # This record is present only in records2. + records3[k] = records2[k] + + return records3 + + +def diffDigests(args, d1, d2): + if d1["mode"] != d2["mode"]: + raise Exception("the input files have different 'mode' properties") + + d3 = {} + d3["dmdEnvVar"] = (d1["dmdEnvVar"], d2["dmdEnvVar"]) + d3["mode"] = d1["mode"] + d3["heapUsableSize"] = d2["heapUsableSize"] - d1["heapUsableSize"] + d3["heapBlocks"] = d2["heapBlocks"] - d1["heapBlocks"] + if d1["mode"] in ["live", "cumulative"]: + d3["liveOrCumulativeRecords"] = diffRecords( + args, d1["liveOrCumulativeRecords"], d2["liveOrCumulativeRecords"] + ) + elif d1["mode"] == "dark-matter": + d3["unreportedRecords"] = diffRecords( + args, d1["unreportedRecords"], d2["unreportedRecords"] + ) + d3["onceReportedRecords"] = diffRecords( + args, d1["onceReportedRecords"], d2["onceReportedRecords"] + ) + d3["twiceReportedRecords"] = diffRecords( + args, d1["twiceReportedRecords"], d2["twiceReportedRecords"] + ) + return d3 + + +def printDigest(args, digest): + dmdEnvVar = digest["dmdEnvVar"] + mode = digest["mode"] + heapUsableSize = digest["heapUsableSize"] + heapBlocks = digest["heapBlocks"] + if mode in ["live", "cumulative"]: + liveOrCumulativeRecords = digest["liveOrCumulativeRecords"] + elif mode == "dark-matter": + unreportedRecords = digest["unreportedRecords"] + onceReportedRecords = digest["onceReportedRecords"] + twiceReportedRecords = digest["twiceReportedRecords"] + + separator = "#" + "-" * 65 + "\n" + + def number(n): + """Format a number with comma as a separator.""" + return "{:,d}".format(n) + + def perc(m, n): + return 0 if n == 0 else (100 * m / n) + + def plural(n): + return "" if n == 1 else "s" + + # Prints to stdout, or to file if -o/--output was specified. + def out(*arguments, **kwargs): + print(*arguments, file=args.output, **kwargs) + + def printStack(traceDesc): + for frameDesc in traceDesc: + out(frameDesc) + + def printRecords(recordKind, records, heapUsableSize): + RecordKind = recordKind.capitalize() + out(separator) + numRecords = len(records) + cmpRecords = sortByChoices[args.sort_by] + sortedRecords = sorted( + records.values(), key=cmp_to_key(cmpRecords), reverse=True + ) + kindBlocks = 0 + kindUsableSize = 0 + maxRecord = 1000 + + def is_match(rec: Record, key: str): + return any(key in desc for desc in rec.allocatedAtDesc) + + for arg in args.filter: + key: str + cond: Callable[[Record], bool] + if arg.startswith("\\"): + # just in case you really need to start a filter with '!' (or '\') + key = arg[1:] + cond = is_match + elif arg.startswith("!"): + key = arg[1:] + + def cond(rec, key): + return not is_match(rec, key) # noqa: E731 + + else: + key = arg + cond = is_match + sortedRecords = [rec for rec in sortedRecords if cond(rec, key)] + + # First iteration: get totals, etc. + for record in sortedRecords: + kindBlocks += record.numBlocks + kindUsableSize += record.usableSize + + # Second iteration: print. + if numRecords == 0: + out("# no {:} heap blocks\n".format(recordKind)) + + kindCumulativeUsableSize = 0 + for i, record in enumerate(sortedRecords, start=1): + # Stop printing at the |maxRecord|th record. + if i == maxRecord: + out( + "# {:}: stopping after {:,d} heap block records\n".format( + RecordKind, i + ) + ) + break + + kindCumulativeUsableSize += record.usableSize + + out(RecordKind + " {") + out( + " {:} block{:} in heap block record {:,d} of {:,d}".format( + number(record.numBlocks), plural(record.numBlocks), i, numRecords + ) + ) + out( + " {:} bytes ({:} requested / {:} slop)".format( + number(record.usableSize), + number(record.reqSize), + number(record.slopSize), + ) + ) + + usableSizes = sorted( + record.usableSizes.items(), key=lambda x: abs(x[0]), reverse=True + ) + hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1 + + if not hasSingleBlock: + out(" Individual block sizes: ", end="") + if len(usableSizes) == 0: + out("(no change)", end="") + else: + isFirst = True + for usableSize, count in usableSizes: + if not isFirst: + out("; ", end="") + out("{:}".format(number(usableSize)), end="") + if count > 1: + out(" x {:,d}".format(count), end="") + isFirst = False + out() + + out( + " {:4.2f}% of the heap ({:4.2f}% cumulative)".format( + perc(record.usableSize, heapUsableSize), + perc(kindCumulativeUsableSize, heapUsableSize), + ) + ) + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + out( + " {:4.2f}% of {:} ({:4.2f}% cumulative)".format( + perc(record.usableSize, kindUsableSize), + recordKind, + perc(kindCumulativeUsableSize, kindUsableSize), + ) + ) + out(" Allocated at {") + printStack(record.allocatedAtDesc) + out(" }") + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + for n, reportedAtDesc in enumerate(record.reportedAtDescs): + again = "again " if n > 0 else "" + out(" Reported {:}at {{".format(again)) + printStack(reportedAtDesc) + out(" }") + out("}\n") + + return (kindUsableSize, kindBlocks) + + def printInvocation(n, dmdEnvVar, mode): + out("Invocation{:} {{".format(n)) + if dmdEnvVar is None: + out(" $DMD is undefined") + else: + out(" $DMD = '" + dmdEnvVar + "'") + out(" Mode = '" + mode + "'") + out("}\n") + + # Print command line. Strip dirs so the output is deterministic, which is + # needed for testing. + out(separator, end="") + out("# " + " ".join(map(os.path.basename, sys.argv)) + "\n") + + # Print invocation(s). + if type(dmdEnvVar) is not tuple: + printInvocation("", dmdEnvVar, mode) + else: + printInvocation(" 1", dmdEnvVar[0], mode) + printInvocation(" 2", dmdEnvVar[1], mode) + + # Print records. + if mode in ["live", "cumulative"]: + liveOrCumulativeUsableSize, liveOrCumulativeBlocks = printRecords( + mode, liveOrCumulativeRecords, heapUsableSize + ) + elif mode == "dark-matter": + twiceReportedUsableSize, twiceReportedBlocks = printRecords( + "twice-reported", twiceReportedRecords, heapUsableSize + ) + + unreportedUsableSize, unreportedBlocks = printRecords( + "unreported", unreportedRecords, heapUsableSize + ) + + onceReportedUsableSize, onceReportedBlocks = printRecords( + "once-reported", onceReportedRecords, heapUsableSize + ) + + # Print summary. + out(separator) + out("Summary {") + if mode in ["live", "cumulative"]: + out( + " Total: {:} bytes in {:} blocks".format( + number(liveOrCumulativeUsableSize), number(liveOrCumulativeBlocks) + ) + ) + elif mode == "dark-matter": + fmt = " {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)" + out(fmt.format("Total:", number(heapUsableSize), 100, number(heapBlocks), 100)) + out( + fmt.format( + "Unreported:", + number(unreportedUsableSize), + perc(unreportedUsableSize, heapUsableSize), + number(unreportedBlocks), + perc(unreportedBlocks, heapBlocks), + ) + ) + out( + fmt.format( + "Once-reported:", + number(onceReportedUsableSize), + perc(onceReportedUsableSize, heapUsableSize), + number(onceReportedBlocks), + perc(onceReportedBlocks, heapBlocks), + ) + ) + out( + fmt.format( + "Twice-reported:", + number(twiceReportedUsableSize), + perc(twiceReportedUsableSize, heapUsableSize), + number(twiceReportedBlocks), + perc(twiceReportedBlocks, heapBlocks), + ) + ) + out("}\n") + + +############################# +# Pretty printer for DMD JSON +############################# + + +def prettyPrintDmdJson(out, j): + out.write("{\n") + + out.write(' "version": {0},\n'.format(j["version"])) + out.write(' "invocation": ') + json.dump(j["invocation"], out, sort_keys=True) + out.write(",\n") + + out.write(' "blockList": [') + first = True + for b in j["blockList"]: + out.write("" if first else ",") + out.write("\n ") + json.dump(b, out, sort_keys=True) + first = False + out.write("\n ],\n") + + out.write(' "traceTable": {') + first = True + for k, l in j["traceTable"].items(): + out.write("" if first else ",") + out.write('\n "{0}": {1}'.format(k, json.dumps(l))) + first = False + out.write("\n },\n") + + out.write(' "frameTable": {') + first = True + for k, v in j["frameTable"].items(): + out.write("" if first else ",") + out.write('\n "{0}": {1}'.format(k, json.dumps(v))) + first = False + out.write("\n }\n") + + out.write("}\n") + + +################################################################## +# Code for clamping addresses using conservative pointer analysis. +################################################################## + + +# Start is the address of the first byte of the block, while end is +# the address of the first byte after the final byte in the block. +class AddrRange: + def __init__(self, block, length): + self.block = block + self.start = int(block, 16) + self.length = length + self.end = self.start + self.length + + assert self.start > 0 + assert length >= 0 + + +class ClampStats: + def __init__(self): + # Number of pointers already pointing to the start of a block. + self.startBlockPtr = 0 + + # Number of pointers pointing to the middle of a block. These + # are clamped to the start of the block they point into. + self.midBlockPtr = 0 + + # Number of null pointers. + self.nullPtr = 0 + + # Number of non-null pointers that didn't point into the middle + # of any blocks. These are clamped to null. + self.nonNullNonBlockPtr = 0 + + def clampedBlockAddr(self, sameAddress): + if sameAddress: + self.startBlockPtr += 1 + else: + self.midBlockPtr += 1 + + def nullAddr(self): + self.nullPtr += 1 + + def clampedNonBlockAddr(self): + self.nonNullNonBlockPtr += 1 + + def log(self): + sys.stderr.write("Results:\n") + sys.stderr.write( + " Number of pointers already pointing to start of blocks: " + + str(self.startBlockPtr) + + "\n" + ) + sys.stderr.write( + " Number of pointers clamped to start of blocks: " + + str(self.midBlockPtr) + + "\n" + ) + sys.stderr.write( + " Number of non-null pointers not pointing into blocks " + "clamped to null: " + str(self.nonNullNonBlockPtr) + "\n" + ) + sys.stderr.write(" Number of null pointers: " + str(self.nullPtr) + "\n") + + +# Search the block ranges array for a block that address points into. +# The search is carried out in an array of starting addresses for each blocks +# because it is faster. +def clampAddress(blockRanges, blockStarts, clampStats, address): + i = bisect_right(blockStarts, address) + + # Any addresses completely out of the range should have been eliminated already. + assert i > 0 + r = blockRanges[i - 1] + assert r.start <= address + + if address >= r.end: + assert address < blockRanges[i].start + clampStats.clampedNonBlockAddr() + return "0" + + clampStats.clampedBlockAddr(r.start == address) + return r.block + + +def clampBlockList(args, inputFileName, isZipped, opener): + # XXX This isn't very efficient because we end up reading and writing + # the file multiple times. + with opener(inputFileName, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + # Check that the invocation is reasonable for contents clamping. + invocation = j["invocation"] + if invocation["mode"] != "scan": + raise Exception("Log was taken in mode " + invocation["mode"] + " not scan") + + sys.stderr.write("Creating block range list.\n") + blockList = j["blockList"] + blockRanges = [] + for block in blockList: + blockRanges.append(AddrRange(block["addr"], block["req"])) + blockRanges.sort(key=lambda r: r.start) + + # Make sure there are no overlapping blocks. + prevRange = blockRanges[0] + for currRange in blockRanges[1:]: + assert prevRange.end <= currRange.start + prevRange = currRange + + sys.stderr.write("Clamping block contents.\n") + clampStats = ClampStats() + firstAddr = blockRanges[0].start + lastAddr = blockRanges[-1].end + + blockStarts = [] + for r in blockRanges: + blockStarts.append(r.start) + + for block in blockList: + # Small blocks don't have any contents. + if "contents" not in block: + continue + + cont = block["contents"] + for i in range(len(cont)): + address = int(cont[i], 16) + + if address == 0: + clampStats.nullAddr() + continue + + # If the address is before the first block or after the last + # block then it can't be within a block. + if address < firstAddr or address >= lastAddr: + clampStats.clampedNonBlockAddr() + cont[i] = "0" + continue + + cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address) + + # Remove any trailing nulls. + while len(cont) and cont[-1] == "0": + cont.pop() + + if args.print_clamp_stats: + clampStats.log() + + sys.stderr.write("Saving file.\n") + tmpFile = tempfile.NamedTemporaryFile(delete=False) + tmpFilename = tmpFile.name + if isZipped: + tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile, mode="wb") + prettyPrintDmdJson(io.TextIOWrapper(tmpFile, encoding="utf-8"), j) + tmpFile.close() + shutil.move(tmpFilename, inputFileName) + + +def main(): + args = parseCommandLine() + digest = getDigestFromFile(args, args.input_file) + if args.input_file2: + digest2 = getDigestFromFile(args, args.input_file2) + digest = diffDigests(args, digest, digest2) + printDigest(args, digest) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/dmd/moz.build b/memory/replace/dmd/moz.build new file mode 100644 index 0000000000..6f3121df48 --- /dev/null +++ b/memory/replace/dmd/moz.build @@ -0,0 +1,37 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "DMD.h", +] + +UNIFIED_SOURCES += [ + "DMD.cpp", +] + +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + UNIFIED_SOURCES += [ + "/mfbt/HashFunctions.cpp", + "/mfbt/JSONWriter.cpp", + "/mfbt/Poison.cpp", + "/mozglue/misc/StackWalk.cpp", + ] + if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "dbghelp", + ] + +ReplaceMalloc("dmd") + +DEFINES["MOZ_NO_MOZALLOC"] = True +DEFINES["IMPL_MFBT"] = True + +if CONFIG["MOZ_OPTIMIZE"]: + DEFINES["MOZ_OPTIMIZE"] = True + +DisableStlWrapping() + +TEST_DIRS += ["test"] diff --git a/memory/replace/dmd/test/SmokeDMD.cpp b/memory/replace/dmd/test/SmokeDMD.cpp new file mode 100644 index 0000000000..c72e92a543 --- /dev/null +++ b/memory/replace/dmd/test/SmokeDMD.cpp @@ -0,0 +1,378 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This program is used by the DMD xpcshell test. It is run under DMD and +// produces some output. The xpcshell test then post-processes and checks this +// output. +// +// Note that this file does not have "Test" or "test" in its name, because that +// will cause the build system to not record breakpad symbols for it, which +// will stop the post-processing (which includes stack fixing) from working +// correctly. + +// This is required on some systems such as Fedora to allow +// building with -O0 together with --warnings-as-errors due to +// a check in /usr/include/features.h +#undef _FORTIFY_SOURCE + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mozilla/Assertions.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/Sprintf.h" +#include "mozilla/UniquePtr.h" +#include "DMD.h" + +using mozilla::MakeUnique; +using namespace mozilla::dmd; + +DMDFuncs::Singleton DMDFuncs::sSingleton; + +class FpWriteFunc final : public mozilla::JSONWriteFunc { + public: + explicit FpWriteFunc(const char* aFilename) { + mFp = fopen(aFilename, "w"); + if (!mFp) { + fprintf(stderr, "SmokeDMD: can't create %s file: %s\n", aFilename, + strerror(errno)); + exit(1); + } + } + + ~FpWriteFunc() { fclose(mFp); } + + void Write(const mozilla::Span<const char>& aStr) final { + for (const char c : aStr) { + fputc(c, mFp); + } + } + + private: + FILE* mFp; +}; + +// This stops otherwise-unused variables from being optimized away. +static void UseItOrLoseIt(void* aPtr, int aSeven) { + char buf[64]; + int n = SprintfLiteral(buf, "%p\n", aPtr); + if (n == 20 + aSeven) { + fprintf(stderr, "well, that is surprising"); + } +} + +// This function checks that heap blocks that have the same stack trace but +// different (or no) reporters get aggregated separately. +void Foo(int aSeven) { + char* a[6]; + for (int i = 0; i < aSeven - 1; i++) { + a[i] = (char*)malloc(128 - 16 * i); + UseItOrLoseIt(a[i], aSeven); + } + + // Oddly, some versions of clang will cause identical stack traces to be + // generated for adjacent calls to Report(), which breaks the test. Inserting + // the UseItOrLoseIt() calls in between is enough to prevent this. + + Report(a[2]); // reported + + UseItOrLoseIt(a[2], aSeven); + + for (int i = 0; i < aSeven - 5; i++) { + Report(a[i]); // reported + UseItOrLoseIt(a[i], aSeven); + } + + UseItOrLoseIt(a[2], aSeven); + + Report(a[3]); // reported + + // a[4], a[5] unreported +} + +void TestEmpty(const char* aTestName, const char* aMode) { + char filename[128]; + SprintfLiteral(filename, "complete-%s-%s.json", aTestName, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + char options[128]; + SprintfLiteral(options, "--mode=%s --stacks=full", aMode); + ResetEverything(options); + + // Zero for everything. + Analyze(std::move(f)); +} + +void TestFull(const char* aTestName, int aNum, const char* aMode, int aSeven) { + char filename[128]; + SprintfLiteral(filename, "complete-%s%d-%s.json", aTestName, aNum, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + // The --show-dump-stats=yes is there just to give that option some basic + // testing, e.g. ensure it doesn't crash. It's hard to test much beyond that. + char options[128]; + SprintfLiteral(options, "--mode=%s --stacks=full --show-dump-stats=yes", + aMode); + ResetEverything(options); + + // Analyze 1: 1 freed, 9 out of 10 unreported. + // Analyze 2: still present and unreported. + int i; + char* a = nullptr; + for (i = 0; i < aSeven + 3; i++) { + a = (char*)malloc(100); + UseItOrLoseIt(a, aSeven); + } + free(a); + + // A no-op. + free(nullptr); + + // Note: 16 bytes is the smallest requested size that gives consistent + // behaviour across all platforms with jemalloc. + // Analyze 1: reported. + // Analyze 2: thrice-reported. + char* a2 = (char*)malloc(16); + Report(a2); + + // Analyze 1: reported. + // Analyze 2: reportedness carries over, due to ReportOnAlloc. + char* b = (char*)malloc(10); + ReportOnAlloc(b); + + // ReportOnAlloc, then freed. + // Analyze 1: freed, irrelevant. + // Analyze 2: freed, irrelevant. + char* b2 = (char*)malloc(16); + ReportOnAlloc(b2); + free(b2); + + // Analyze 1: reported 4 times. + // Analyze 2: freed, irrelevant. + char* c = (char*)calloc(10, 3); + Report(c); + for (int i = 0; i < aSeven - 4; i++) { + Report(c); + } + + // Analyze 1: ignored. + // Analyze 2: irrelevant. + Report((void*)(intptr_t)i); + + // jemalloc rounds this up to 8192. + // Analyze 1: reported. + // Analyze 2: freed. + char* e = (char*)malloc(4096); + e = (char*)realloc(e, 7169); + Report(e); + + // First realloc is like malloc; second realloc is shrinking. + // Analyze 1: reported. + // Analyze 2: re-reported. + char* e2 = (char*)realloc(nullptr, 1024); + e2 = (char*)realloc(e2, 512); + Report(e2); + + // First realloc is like malloc; second realloc creates a min-sized block. + // XXX: on Windows, second realloc frees the block. + // Analyze 1: reported. + // Analyze 2: freed, irrelevant. + char* e3 = (char*)realloc(nullptr, 1023); + // e3 = (char*) realloc(e3, 0); + MOZ_ASSERT(e3); + Report(e3); + + // Analyze 1: freed, irrelevant. + // Analyze 2: freed, irrelevant. + char* f1 = (char*)malloc(64); + UseItOrLoseIt(f1, aSeven); + free(f1); + + // Analyze 1: ignored. + // Analyze 2: irrelevant. + Report((void*)(intptr_t)0x0); + + // Analyze 1: mixture of reported and unreported. + // Analyze 2: all unreported. + Foo(aSeven); + + // Analyze 1: twice-reported. + // Analyze 2: twice-reported. + char* g1 = (char*)malloc(77); + ReportOnAlloc(g1); + ReportOnAlloc(g1); + + // Analyze 1: mixture of reported and unreported. + // Analyze 2: all unreported. + // Nb: this Foo() call is deliberately not adjacent to the previous one. See + // the comment about adjacent calls in Foo() for more details. + Foo(aSeven); + + // Analyze 1: twice-reported. + // Analyze 2: once-reported. + char* g2 = (char*)malloc(78); + Report(g2); + ReportOnAlloc(g2); + + // Analyze 1: twice-reported. + // Analyze 2: once-reported. + char* g3 = (char*)malloc(79); + ReportOnAlloc(g3); + Report(g3); + + // All the odd-ball ones. + // Analyze 1: all unreported. + // Analyze 2: all freed, irrelevant. + // XXX: no memalign on Mac + // void* w = memalign(64, 65); // rounds up to 128 + // UseItOrLoseIt(w, aSeven); + + // XXX: posix_memalign doesn't work on B2G + // void* x; + // posix_memalign(&y, 128, 129); // rounds up to 256 + // UseItOrLoseIt(x, aSeven); + + // XXX: valloc doesn't work on Windows. + // void* y = valloc(1); // rounds up to 4096 + // UseItOrLoseIt(y, aSeven); + + // XXX: C11 only + // void* z = aligned_alloc(64, 256); + // UseItOrLoseIt(z, aSeven); + + if (aNum == 1) { + // Analyze 1. + Analyze(std::move(f)); + } + + ClearReports(); + + //--------- + + Report(a2); + Report(a2); + free(c); + free(e); + Report(e2); + free(e3); + // free(w); + // free(x); + // free(y); + // free(z); + + // Do some allocations that will only show up in cumulative mode. + for (int i = 0; i < 100; i++) { + void* v = malloc(128); + UseItOrLoseIt(v, aSeven); + free(v); + } + + if (aNum == 2) { + // Analyze 2. + Analyze(std::move(f)); + } +} + +void TestPartial(const char* aTestName, const char* aMode, int aSeven) { + char filename[128]; + SprintfLiteral(filename, "complete-%s-%s.json", aTestName, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + char options[128]; + SprintfLiteral(options, "--mode=%s", aMode); + ResetEverything(options); + + int kTenThousand = aSeven + 9993; + char* s; + + // The output of this function is deterministic but it relies on the + // probability and seeds given to the FastBernoulliTrial instance in + // ResetBernoulli(). If they change, the output will change too. + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 16) = 0.0469. + // So we expect about 0.0469 * 10000 == 469. + // We actually get 511. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(16); + UseItOrLoseIt(s, aSeven); + } + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 128) = 0.3193. + // So we expect about 0.3193 * 10000 == 3193. + // We actually get 3136. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(128); + UseItOrLoseIt(s, aSeven); + } + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 1024) = 0.9539. + // So we expect about 0.9539 * 10000 == 9539. + // We actually get 9531. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(1024); + UseItOrLoseIt(s, aSeven); + } + + Analyze(std::move(f)); +} + +void TestScan(int aSeven) { + auto f = MakeUnique<FpWriteFunc>("basic-scan.json"); + + ResetEverything("--mode=scan"); + + uintptr_t* p = (uintptr_t*)malloc(6 * sizeof(uintptr_t)); + UseItOrLoseIt(p, aSeven); + + // Hard-coded values checked by scan-test.py + p[0] = 0x123; // outside a block, small value + p[1] = 0x0; // null + p[2] = (uintptr_t)((uint8_t*)p - 1); // pointer outside a block, but nearby + p[3] = (uintptr_t)p; // pointer to start of a block + p[4] = (uintptr_t)((uint8_t*)p + 1); // pointer into a block + p[5] = 0x0; // trailing null + + Analyze(std::move(f)); +} + +void RunTests() { + // This test relies on the compiler not doing various optimizations, such as + // eliding unused malloc() calls or unrolling loops with fixed iteration + // counts. So we compile it with -O0 (or equivalent), which probably prevents + // that. We also use the following variable for various loop iteration + // counts, just in case compilers might unroll very small loops even with + // -O0. + int seven = 7; + + // Make sure that DMD is actually running; it is initialized on the first + // allocation. + int* x = (int*)malloc(100); + UseItOrLoseIt(x, seven); + MOZ_RELEASE_ASSERT(IsRunning()); + + // Please keep this in sync with run_test in test_dmd.js. + + TestEmpty("empty", "live"); + TestEmpty("empty", "dark-matter"); + TestEmpty("empty", "cumulative"); + + TestFull("full", 1, "live", seven); + TestFull("full", 1, "dark-matter", seven); + + TestFull("full", 2, "dark-matter", seven); + TestFull("full", 2, "cumulative", seven); + + TestPartial("partial", "live", seven); + + TestScan(seven); +} + +int main() { + RunTests(); + + return 0; +} diff --git a/memory/replace/dmd/test/basic-scan-32-expected.txt b/memory/replace/dmd/test/basic-scan-32-expected.txt new file mode 100644 index 0000000000..9f6f4db325 --- /dev/null +++ b/memory/replace/dmd/test/basic-scan-32-expected.txt @@ -0,0 +1,25 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o basic-scan-32-actual.txt --clamp-contents basic-scan.json + +Invocation { + $DMD = '--mode=scan' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 1 + 32 bytes (24 requested / 8 slop) + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 32 bytes in 1 blocks +} + diff --git a/memory/replace/dmd/test/basic-scan-64-expected.txt b/memory/replace/dmd/test/basic-scan-64-expected.txt new file mode 100644 index 0000000000..59effc07b7 --- /dev/null +++ b/memory/replace/dmd/test/basic-scan-64-expected.txt @@ -0,0 +1,25 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o basic-scan-64-actual.txt --clamp-contents basic-scan.json + +Invocation { + $DMD = '--mode=scan' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 1 + 48 bytes (48 requested / 0 slop) + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 48 bytes in 1 blocks +} + diff --git a/memory/replace/dmd/test/complete-empty-cumulative-expected.txt b/memory/replace/dmd/test/complete-empty-cumulative-expected.txt new file mode 100644 index 0000000000..2486015d0b --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-cumulative-expected.txt @@ -0,0 +1,18 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-cumulative-actual.txt complete-empty-cumulative.json + +Invocation { + $DMD = '--mode=cumulative --stacks=full' + Mode = 'cumulative' +} + +#----------------------------------------------------------------- + +# no cumulative heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes in 0 blocks +} + diff --git a/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt b/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt new file mode 100644 index 0000000000..0020cddde3 --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt @@ -0,0 +1,29 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-dark-matter-actual.txt complete-empty-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +# no twice-reported heap blocks + +#----------------------------------------------------------------- + +# no unreported heap blocks + +#----------------------------------------------------------------- + +# no once-reported heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes (100.00%) in 0 blocks (100.00%) + Unreported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Once-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Twice-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) +} + diff --git a/memory/replace/dmd/test/complete-empty-live-expected.txt b/memory/replace/dmd/test/complete-empty-live-expected.txt new file mode 100644 index 0000000000..d0d1721965 --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-live-expected.txt @@ -0,0 +1,18 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-live-actual.txt complete-empty-live.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +# no live heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes in 0 blocks +} + diff --git a/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt b/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt new file mode 100644 index 0000000000..2c7d6b6343 --- /dev/null +++ b/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt @@ -0,0 +1,265 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full1-dark-matter-actual.txt complete-full1-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full --show-dump-stats=yes' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + 1 block in heap block record 1 of 4 + 80 bytes (79 requested / 1 slop) + 0.66% of the heap (0.66% cumulative) + 29.41% of twice-reported (29.41% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 2 of 4 + 80 bytes (78 requested / 2 slop) + 0.66% of the heap (1.32% cumulative) + 29.41% of twice-reported (58.82% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 3 of 4 + 80 bytes (77 requested / 3 slop) + 0.66% of the heap (1.98% cumulative) + 29.41% of twice-reported (88.24% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 4 of 4 + 32 bytes (30 requested / 2 slop) + 0.26% of the heap (2.25% cumulative) + 11.76% of twice-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Unreported { + 9 blocks in heap block record 1 of 3 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 8.33% of the heap (8.33% cumulative) + 81.82% of unreported (81.82% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 2 blocks in heap block record 2 of 3 + 112 bytes (112 requested / 0 slop) + Individual block sizes: 64; 48 + 0.93% of the heap (9.26% cumulative) + 9.09% of unreported (90.91% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 2 blocks in heap block record 3 of 3 + 112 bytes (112 requested / 0 slop) + Individual block sizes: 64; 48 + 0.93% of the heap (10.19% cumulative) + 9.09% of unreported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Once-reported { + 1 block in heap block record 1 of 11 + 8,192 bytes (7,169 requested / 1,023 slop) + 67.72% of the heap (67.72% cumulative) + 77.34% of once-reported (77.34% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 2 of 11 + 1,024 bytes (1,023 requested / 1 slop) + 8.47% of the heap (76.19% cumulative) + 9.67% of once-reported (87.01% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 3 of 11 + 512 bytes (512 requested / 0 slop) + 4.23% of the heap (80.42% cumulative) + 4.83% of once-reported (91.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 2 blocks in heap block record 4 of 11 + 240 bytes (240 requested / 0 slop) + Individual block sizes: 128; 112 + 1.98% of the heap (82.41% cumulative) + 2.27% of once-reported (94.11% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 2 blocks in heap block record 5 of 11 + 240 bytes (240 requested / 0 slop) + Individual block sizes: 128; 112 + 1.98% of the heap (84.39% cumulative) + 2.27% of once-reported (96.37% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 6 of 11 + 96 bytes (96 requested / 0 slop) + 0.79% of the heap (85.19% cumulative) + 0.91% of once-reported (97.28% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 7 of 11 + 96 bytes (96 requested / 0 slop) + 0.79% of the heap (85.98% cumulative) + 0.91% of once-reported (98.19% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 8 of 11 + 80 bytes (80 requested / 0 slop) + 0.66% of the heap (86.64% cumulative) + 0.76% of once-reported (98.94% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 9 of 11 + 80 bytes (80 requested / 0 slop) + 0.66% of the heap (87.30% cumulative) + 0.76% of once-reported (99.70% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 10 of 11 + 16 bytes (16 requested / 0 slop) + 0.13% of the heap (87.43% cumulative) + 0.15% of once-reported (99.85% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 11 of 11 + 16 bytes (10 requested / 6 slop) + 0.13% of the heap (87.57% cumulative) + 0.15% of once-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 12,096 bytes (100.00%) in 30 blocks (100.00%) + Unreported: 1,232 bytes ( 10.19%) in 13 blocks ( 43.33%) + Once-reported: 10,592 bytes ( 87.57%) in 13 blocks ( 43.33%) + Twice-reported: 272 bytes ( 2.25%) in 4 blocks ( 13.33%) +} + diff --git a/memory/replace/dmd/test/complete-full1-live-expected.txt b/memory/replace/dmd/test/complete-full1-live-expected.txt new file mode 100644 index 0000000000..eaa1883e1f --- /dev/null +++ b/memory/replace/dmd/test/complete-full1-live-expected.txt @@ -0,0 +1,127 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full1-live-actual.txt complete-full1-live.json + +Invocation { + $DMD = '--mode=live --stacks=full --show-dump-stats=yes' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 12 + 8,192 bytes (7,169 requested / 1,023 slop) + 67.72% of the heap (67.72% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 2 of 12 + 1,024 bytes (1,023 requested / 1 slop) + 8.47% of the heap (76.19% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 9 blocks in heap block record 3 of 12 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 8.33% of the heap (84.52% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 6 blocks in heap block record 4 of 12 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 4.37% of the heap (88.89% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 6 blocks in heap block record 5 of 12 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 4.37% of the heap (93.25% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 6 of 12 + 512 bytes (512 requested / 0 slop) + 4.23% of the heap (97.49% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 7 of 12 + 80 bytes (79 requested / 1 slop) + 0.66% of the heap (98.15% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 8 of 12 + 80 bytes (78 requested / 2 slop) + 0.66% of the heap (98.81% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 9 of 12 + 80 bytes (77 requested / 3 slop) + 0.66% of the heap (99.47% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 10 of 12 + 32 bytes (30 requested / 2 slop) + 0.26% of the heap (99.74% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 11 of 12 + 16 bytes (16 requested / 0 slop) + 0.13% of the heap (99.87% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 12 of 12 + 16 bytes (10 requested / 6 slop) + 0.13% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 12,096 bytes in 30 blocks +} + diff --git a/memory/replace/dmd/test/complete-full2-cumulative-expected.txt b/memory/replace/dmd/test/complete-full2-cumulative-expected.txt new file mode 100644 index 0000000000..5a225b9b8e --- /dev/null +++ b/memory/replace/dmd/test/complete-full2-cumulative-expected.txt @@ -0,0 +1,173 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full2-cumulative-actual.txt complete-full2-cumulative.json + +Invocation { + $DMD = '--mode=cumulative --stacks=full --show-dump-stats=yes' + Mode = 'cumulative' +} + +#----------------------------------------------------------------- + +Cumulative { + 100 blocks in heap block record 1 of 17 + 12,800 bytes (12,800 requested / 0 slop) + Individual block sizes: 128 x 100 + 42.37% of the heap (42.37% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 2 of 17 + 8,192 bytes (7,169 requested / 1,023 slop) + 27.12% of the heap (69.49% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 3 of 17 + 4,096 bytes (4,096 requested / 0 slop) + 13.56% of the heap (83.05% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 10 blocks in heap block record 4 of 17 + 1,120 bytes (1,000 requested / 120 slop) + Individual block sizes: 112 x 10 + 3.71% of the heap (86.76% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 5 of 17 + 1,024 bytes (1,024 requested / 0 slop) + 3.39% of the heap (90.15% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 6 of 17 + 1,024 bytes (1,023 requested / 1 slop) + 3.39% of the heap (93.54% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 6 blocks in heap block record 7 of 17 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 1.75% of the heap (95.29% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 6 blocks in heap block record 8 of 17 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 1.75% of the heap (97.03% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 9 of 17 + 512 bytes (512 requested / 0 slop) + 1.69% of the heap (98.73% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 10 of 17 + 80 bytes (79 requested / 1 slop) + 0.26% of the heap (98.99% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 11 of 17 + 80 bytes (78 requested / 2 slop) + 0.26% of the heap (99.26% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 12 of 17 + 80 bytes (77 requested / 3 slop) + 0.26% of the heap (99.52% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 13 of 17 + 64 bytes (64 requested / 0 slop) + 0.21% of the heap (99.74% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 14 of 17 + 32 bytes (30 requested / 2 slop) + 0.11% of the heap (99.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 15 of 17 + 16 bytes (16 requested / 0 slop) + 0.05% of the heap (99.89% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 16 of 17 + 16 bytes (16 requested / 0 slop) + 0.05% of the heap (99.95% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 17 of 17 + 16 bytes (10 requested / 6 slop) + 0.05% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 30,208 bytes in 135 blocks +} + diff --git a/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt b/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt new file mode 100644 index 0000000000..5f9585a8c6 --- /dev/null +++ b/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt @@ -0,0 +1,140 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full2-dark-matter-actual.txt complete-full2-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full --show-dump-stats=yes' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + 1 block in heap block record 1 of 2 + 80 bytes (77 requested / 3 slop) + 2.81% of the heap (2.81% cumulative) + 83.33% of twice-reported (83.33% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 2 of 2 + 16 bytes (16 requested / 0 slop) + 0.56% of the heap (3.37% cumulative) + 16.67% of twice-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Unreported { + 9 blocks in heap block record 1 of 3 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 35.39% of the heap (35.39% cumulative) + 48.84% of unreported (48.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 6 blocks in heap block record 2 of 3 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 18.54% of the heap (53.93% cumulative) + 25.58% of unreported (74.42% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 6 blocks in heap block record 3 of 3 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 18.54% of the heap (72.47% cumulative) + 25.58% of unreported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Once-reported { + 1 block in heap block record 1 of 4 + 512 bytes (512 requested / 0 slop) + 17.98% of the heap (17.98% cumulative) + 74.42% of once-reported (74.42% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 2 of 4 + 80 bytes (79 requested / 1 slop) + 2.81% of the heap (20.79% cumulative) + 11.63% of once-reported (86.05% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 3 of 4 + 80 bytes (78 requested / 2 slop) + 2.81% of the heap (23.60% cumulative) + 11.63% of once-reported (97.67% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 4 of 4 + 16 bytes (10 requested / 6 slop) + 0.56% of the heap (24.16% cumulative) + 2.33% of once-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 2,848 bytes (100.00%) in 27 blocks (100.00%) + Unreported: 2,064 bytes ( 72.47%) in 21 blocks ( 77.78%) + Once-reported: 688 bytes ( 24.16%) in 4 blocks ( 14.81%) + Twice-reported: 96 bytes ( 3.37%) in 2 blocks ( 7.41%) +} + diff --git a/memory/replace/dmd/test/complete-partial-live-expected.txt b/memory/replace/dmd/test/complete-partial-live-expected.txt new file mode 100644 index 0000000000..e7f27b0ee6 --- /dev/null +++ b/memory/replace/dmd/test/complete-partial-live-expected.txt @@ -0,0 +1,56 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-partial-live-actual.txt complete-partial-live.json + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 9,531 blocks in heap block record 1 of 4 + 9,759,744 bytes (9,759,744 requested / 0 slop) + Individual block sizes: 1,024 x 9,531 + 83.56% of the heap (83.56% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 16,822 blocks in heap block record 2 of 4 + 1,510,672 bytes (1,510,672 requested / 0 slop) + Individual block sizes: 1,024 x 469; 128 x 6,864; 16 x 9,489 + 12.93% of the heap (96.49% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +Live { + 3,136 blocks in heap block record 3 of 4 + 401,408 bytes (401,408 requested / 0 slop) + Individual block sizes: 128 x 3,136 + 3.44% of the heap (99.93% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 511 blocks in heap block record 4 of 4 + 8,176 bytes (8,176 requested / 0 slop) + Individual block sizes: 16 x 511 + 0.07% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 11,680,000 bytes in 30,000 blocks +} + diff --git a/memory/replace/dmd/test/moz.build b/memory/replace/dmd/test/moz.build new file mode 100644 index 0000000000..77e4fa12a0 --- /dev/null +++ b/memory/replace/dmd/test/moz.build @@ -0,0 +1,26 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +GeckoSimplePrograms( + [ + "SmokeDMD", + ], + linkage=None, +) + +# See the comment at the top of SmokeDMD.cpp:RunTests(). +if CONFIG["CC_TYPE"] == "clang-cl": + CXXFLAGS += ["-Od", "-clang:-fno-lto"] +else: + CXXFLAGS += ["-O0", "-fno-lto"] + +DEFINES["MOZ_NO_MOZALLOC"] = True + +DisableStlWrapping() + +XPCSHELL_TESTS_MANIFESTS += [ + "xpcshell.toml", +] diff --git a/memory/replace/dmd/test/scan-test.py b/memory/replace/dmd/test/scan-test.py new file mode 100644 index 0000000000..c282b02693 --- /dev/null +++ b/memory/replace/dmd/test/scan-test.py @@ -0,0 +1,102 @@ +#! /usr/bin/env python +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Testing for the JSON file emitted by DMD heap scan mode when running SmokeDMD.""" + +import argparse +import gzip +import json +import sys + +# The DMD output version this script handles. +outputVersion = 5 + + +def parseCommandLine(): + description = """ +Ensure that DMD heap scan mode creates the correct output when run with SmokeDMD. +This is only for testing. Input files can be gzipped. +""" + p = argparse.ArgumentParser(description=description) + + p.add_argument( + "--clamp-contents", + action="store_true", + help="expect that the contents of the JSON input file have had " + "their addresses clamped", + ) + + p.add_argument("input_file", help="a file produced by DMD") + + return p.parse_args(sys.argv[1:]) + + +def checkScanContents(contents, expected): + if len(contents) != len(expected): + raise Exception( + "Expected " + + str(len(expected)) + + " things in contents but found " + + str(len(contents)) + ) + + for i in range(len(expected)): + if contents[i] != expected[i]: + raise Exception( + "Expected to find " + + expected[i] + + " at offset " + + str(i) + + " but found " + + contents[i] + ) + + +def main(): + args = parseCommandLine() + + # Handle gzipped input if necessary. + isZipped = args.input_file.endswith(".gz") + opener = gzip.open if isZipped else open + + with opener(args.input_file, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + invocation = j["invocation"] + + mode = invocation["mode"] + if mode != "scan": + raise Exception("bad 'mode' property: '{:s}'".format(mode)) + + blockList = j["blockList"] + + if len(blockList) != 1: + raise Exception("Expected only one block") + + b = blockList[0] + + # The expected values are based on hard-coded values in SmokeDMD.cpp. + if args.clamp_contents: + expected = ["0", "0", "0", b["addr"], b["addr"]] + else: + addr = int(b["addr"], 16) + expected = [ + "123", + "0", + str(format(addr - 1, "x")), + b["addr"], + str(format(addr + 1, "x")), + "0", + ] + + checkScanContents(b["contents"], expected) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/dmd/test/script-diff-dark-matter-expected.txt b/memory/replace/dmd/test/script-diff-dark-matter-expected.txt new file mode 100644 index 0000000000..b1fc28bac5 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter-expected.txt @@ -0,0 +1,127 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-diff-dark-matter-actual.txt script-diff-dark-matter1.json script-diff-dark-matter2.json + +Invocation 1 { + $DMD = '--mode=dark-matter' + Mode = 'dark-matter' +} + +Invocation 2 { + $DMD = '1' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + -1 blocks in heap block record 1 of 1 + -1,088 bytes (-1,064 requested / -24 slop) + Individual block sizes: -1,024; -127; 63 + 15.46% of the heap (15.46% cumulative) + 100.00% of twice-reported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } + Reported again at { + #01: R2 (R2.cpp:99) + } +} + +#----------------------------------------------------------------- + +Unreported { + 4 blocks in heap block record 1 of 5 + 16,384 bytes (16,384 requested / 0 slop) + Individual block sizes: 4,096 x 4 + -232.76% of the heap (-232.76% cumulative) + 371.01% of unreported (371.01% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +Unreported { + 7 blocks in heap block record 2 of 5 + -11,968 bytes (-12,016 requested / 48 slop) + Individual block sizes: -15,360; 2,048; 512 x 2; 128; -127; 64 x 4; 63 + 170.02% of the heap (-62.74% cumulative) + -271.01% of unreported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } +} + +Unreported { + 0 blocks in heap block record 3 of 5 + 0 bytes (-384 requested / 384 slop) + Individual block sizes: (no change) + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Unreported { + -2 blocks in heap block record 4 of 5 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 8,192 x 2; -4,096 x 4 + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Unreported { + 0 blocks in heap block record 5 of 5 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 20,480; -16,384; -8,192; 4,096 + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +#----------------------------------------------------------------- + +Once-reported { + -3 blocks in heap block record 1 of 2 + -10,240 bytes (-10,192 requested / -48 slop) + Individual block sizes: -4,096 x 2; -2,048 + 145.48% of the heap (145.48% cumulative) + 98.77% of once-reported (98.77% cumulative) + Allocated at { + #01: D (D.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } +} + +Once-reported { + -1 blocks in heap block record 2 of 2 + -127 bytes (-151 requested / 24 slop) + 1.80% of the heap (147.28% cumulative) + 1.23% of once-reported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: -7,039 bytes (100.00%) in 4 blocks (100.00%) + Unreported: 4,416 bytes (-62.74%) in 9 blocks (225.00%) + Once-reported: -10,367 bytes (147.28%) in -4 blocks (-100.00%) + Twice-reported: -1,088 bytes ( 15.46%) in -1 blocks (-25.00%) +} + diff --git a/memory/replace/dmd/test/script-diff-dark-matter1.json b/memory/replace/dmd/test/script-diff-dark-matter1.json new file mode 100644 index 0000000000..1175394150 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter1.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=dark-matter", + "mode": "dark-matter" + }, + "blockList": [ + { "req": 4096, "alloc": "A", "num": 4 }, + + { "req": 4096, "alloc": "B", "num": 3 }, + { "req": 4096, "alloc": "B" }, + + { "req": 4096, "alloc": "C", "num": 2 }, + { "req": 4096, "alloc": "C", "num": 2 }, + + { "req": 4096, "alloc": "D", "reps": ["R1"], "num": 2 }, + { "req": 2000, "slop": 48, "alloc": "D", "reps": ["R1"] }, + + { "req": 15360, "alloc": "F" }, + { "req": 512, "alloc": "F", "num": 2 }, + { "req": 127, "alloc": "F" }, + { "req": 1024, "alloc": "F", "reps": ["R1"] }, + { "req": 127, "alloc": "F", "reps": ["R1"] }, + { "req": 1000, "slop": 24, "alloc": "F", "reps": ["R1", "R2"] }, + { "req": 127, "alloc": "F", "reps": ["R1", "R2"] }, + + { "req": 4096 }, + { "req": 8192 }, + { "req": 16384 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-dark-matter2.json b/memory/replace/dmd/test/script-diff-dark-matter2.json new file mode 100644 index 0000000000..2c3061223f --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter2.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "1", + "mode": "dark-matter" + }, + "blockList": [ + { "req": 4096, "alloc": "A", "num": 4 }, + + { "req": 8192, "alloc": "B" }, + { "req": 8192, "alloc": "B" }, + + { "req": 4000, "slop": 96, "alloc": "C", "num": 4 }, + + { "req": 4096, "alloc": "E", "num": 4 }, + + { "req": 2000, "slop": 48, "alloc": "F" }, + { "req": 1000, "slop": 24, "alloc": "F", "reps": ["R1"] }, + { "req": 512, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 128, "alloc": "F" }, + { "req": 63, "alloc": "F", "reps": ["R1", "R2"] }, + { "req": 64, "alloc": "F", "num": 4 }, + { "req": 63, "alloc": "F" }, + + { "req": 4096, "num": 2 }, + { "req": 20480 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-live-expected.txt b/memory/replace/dmd/test/script-diff-live-expected.txt new file mode 100644 index 0000000000..20208c0768 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live-expected.txt @@ -0,0 +1,81 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-diff-live-actual.txt script-diff-live1.json script-diff-live2.json + +Invocation 1 { + $DMD = '--mode=live' + Mode = 'live' +} + +Invocation 2 { + $DMD = '--mode=live --stacks=partial' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 4 blocks in heap block record 1 of 6 + 16,384 bytes (16,384 requested / 0 slop) + Individual block sizes: 4,096 x 4 + -232.76% of the heap (-232.76% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 6 + -13,183 bytes (-13,231 requested / 48 slop) + Individual block sizes: -15,360; 2,048; -1,024; 512 x 2; 128; -127 x 3; 64 x 4; 63 x 2 + 187.29% of the heap (-45.48% cumulative) + Allocated at { + #01: F (F.cpp:99) + } +} + +Live { + -3 blocks in heap block record 3 of 6 + -10,240 bytes (-10,192 requested / -48 slop) + Individual block sizes: -4,096 x 2; -2,048 + 145.48% of the heap (100.00% cumulative) + Allocated at { + #01: D (D.cpp:99) + } +} + +Live { + 0 blocks in heap block record 4 of 6 + 0 bytes (-384 requested / 384 slop) + Individual block sizes: (no change) + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + -2 blocks in heap block record 5 of 6 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 8,192 x 2; -4,096 x 4 + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 0 blocks in heap block record 6 of 6 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 20,480; -16,384; -8,192; 4,096 + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: -7,039 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-diff-live1.json b/memory/replace/dmd/test/script-diff-live1.json new file mode 100644 index 0000000000..1296b9ea09 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live1.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live", + "mode": "live" + }, + "blockList": [ + { "req": 4096, "alloc": "A", "num": 4 }, + + { "req": 4096, "alloc": "B", "num": 4 }, + + { "req": 4096, "alloc": "C", "num": 4 }, + + { "req": 4096, "alloc": "D" }, + { "req": 4096, "alloc": "D" }, + { "req": 2000, "slop": 48, "alloc": "D" }, + + { "req": 15360, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 127, "alloc": "F" }, + { "req": 1024, "alloc": "F" }, + { "req": 127, "alloc": "F" }, + { "req": 1000, "slop": 24, "alloc": "F" }, + { "req": 127, "alloc": "F" }, + + { "req": 4096 }, + { "req": 8192 }, + { "req": 16384 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-live2.json b/memory/replace/dmd/test/script-diff-live2.json new file mode 100644 index 0000000000..723ea5ff35 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live2.json @@ -0,0 +1,53 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live --stacks=partial", + "mode": "live" + }, + "blockList": [ + { "req": 4096, "alloc": "A", "num": 3 }, + { "req": 4096, "alloc": "A" }, + + { "req": 8192, "alloc": "B" }, + { "req": 8192, "alloc": "B" }, + + { "req": 4000, "slop": 96, "alloc": "C", "num": 4 }, + + { "req": 4096, "alloc": "E" }, + { "req": 4096, "alloc": "E" }, + { "req": 4096, "alloc": "E" }, + { "req": 4096, "alloc": "E" }, + + { "req": 2000, "slop": 48, "alloc": "F" }, + { "req": 1000, "slop": 24, "alloc": "F" }, + { "req": 512, "alloc": "F", "num": 4 }, + { "req": 128, "alloc": "F" }, + { "req": 63, "alloc": "F" }, + { "req": 64, "alloc": "F", "num": 4 }, + { "req": 63, "alloc": "F" }, + + { "req": 4096 }, + { "req": 4096 }, + { "req": 20480 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt b/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt new file mode 100644 index 0000000000..9428ef45fb --- /dev/null +++ b/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt @@ -0,0 +1,72 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-ignore-alloc-fns-actual.txt --ignore-alloc-fns script-ignore-alloc-fns.json + +Invocation { + $DMD = '1' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +# no twice-reported heap blocks + +#----------------------------------------------------------------- + +Unreported { + 1 block in heap block record 1 of 4 + 1,048,576 bytes (1,048,576 requested / 0 slop) + 93.22% of the heap (93.22% cumulative) + 93.22% of unreported (93.22% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Unreported { + 1 block in heap block record 2 of 4 + 65,536 bytes (65,536 requested / 0 slop) + 5.83% of the heap (99.05% cumulative) + 5.83% of unreported (99.05% cumulative) + Allocated at { + #01: js::jit::JitRuntime::initialize(JSContext*) (Ion.cpp:301) + } +} + +Unreported { + 1 block in heap block record 3 of 4 + 8,192 bytes (8,000 requested / 192 slop) + 0.73% of the heap (99.78% cumulative) + 0.73% of unreported (99.78% cumulative) + Allocated at { + #01: mozilla::Vector::growStorageBy(unsigned long) (Vector.h:802) + #02: D (D.cpp:99) + } +} + +Unreported { + 1 block in heap block record 4 of 4 + 2,500 bytes (2,500 requested / 0 slop) + 0.22% of the heap (100.00% cumulative) + 0.22% of unreported (100.00% cumulative) + Allocated at { + #01: g_type_create_instance (/usr/lib/x86_64-linux-gnu/libgobject-2.0.so.0) + #02: not_an_alloc_function_so_alloc_functions_below_here_will_not_be_stripped (blah) + #03: replace_posix_memalign (replace_malloc.h:120) + #04: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0) + #05: another_non_alloc_function (blah) + } +} + +#----------------------------------------------------------------- + +# no once-reported heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 1,124,804 bytes (100.00%) in 4 blocks (100.00%) + Unreported: 1,124,804 bytes (100.00%) in 4 blocks (100.00%) + Once-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Twice-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) +} + diff --git a/memory/replace/dmd/test/script-ignore-alloc-fns.json b/memory/replace/dmd/test/script-ignore-alloc-fns.json new file mode 100644 index 0000000000..a6c7c8419a --- /dev/null +++ b/memory/replace/dmd/test/script-ignore-alloc-fns.json @@ -0,0 +1,45 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "1", + "mode": "dark-matter" + }, + "blockList": [ + { "req": 1048576, "alloc": "A" }, + { "req": 65536, "alloc": "B" }, + { "req": 8000, "slop": 192, "alloc": "C" }, + { "req": 2500, "alloc": "D" } + ], + "traceTable": { + "A": ["AA", "AB", "AC", "AD"], + "B": ["BA", "BB", "BC"], + "C": ["CA", "CB", "CC", "CD"], + "D": ["DA", "DB", "DD", "DD", "DE", "DF", "DG", "DH", "DI", "DJ"] + }, + "frameTable": { + "AA": "#00: replace_malloc (DMD.cpp:1106)", + "AB": "#00: moz_xmalloc (mozalloc.cpp:68)", + "AC": "#00: operator new(unsigned long) (mozalloc.h:208)", + "AD": "#00: A (A.cpp:99)", + + "BA": "#00: replace_calloc (DMD.cpp:1125)", + "BB": "#00: js_calloc(unsigned long) (Utility.h:107)", + "BC": "#06: js::jit::JitRuntime::initialize(JSContext*) (Ion.cpp:301)", + + "CA": "#00: replace_realloc (DMD.cpp:1153)", + "CB": "#00: bool* mozilla::MallocAllocPolicy::pod_realloc<bool>(bool*, unsigned long, unsigned long) (AllocPolicy.h:74)", + "CC": "#00: mozilla::Vector::growStorageBy(unsigned long) (Vector.h:802)", + "CD": "#00: D (D.cpp:99)", + + "DA": "#00: replace_memalign (DMD.cpp:1181)", + "DB": "#00: replace_posix_memalign (replace_malloc.h:120)", + "DC": "#00: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DD": "#00: g_slice_alloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DE": "#00: g_slice_alloc0 (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DF": "#00: g_type_create_instance (/usr/lib/x86_64-linux-gnu/libgobject-2.0.so.0)", + "DG": "#00: not_an_alloc_function_so_alloc_functions_below_here_will_not_be_stripped (blah)", + "DH": "#00: replace_posix_memalign (replace_malloc.h:120)", + "DI": "#00: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DJ": "#00: another_non_alloc_function (blah)" + } +} diff --git a/memory/replace/dmd/test/script-max-frames-1-expected.txt b/memory/replace/dmd/test/script-max-frames-1-expected.txt new file mode 100644 index 0000000000..65a00762bb --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-1-expected.txt @@ -0,0 +1,26 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-1-actual.txt --max-frames=1 script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 4 blocks in heap block record 1 of 1 + 4,416 bytes (4,404 requested / 12 slop) + Individual block sizes: 4,096; 128; 112; 80 + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames-3-expected.txt b/memory/replace/dmd/test/script-max-frames-3-expected.txt new file mode 100644 index 0000000000..5df4914738 --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-3-expected.txt @@ -0,0 +1,48 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-3-actual.txt --max-frames=3 --no-fix-stacks script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 2 blocks in heap block record 1 of 3 + 4,224 bytes (4,224 requested / 0 slop) + Individual block sizes: 4,096; 128 + 95.65% of the heap (95.65% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + } +} + +Live { + 1 block in heap block record 2 of 3 + 112 bytes (100 requested / 12 slop) + 2.54% of the heap (98.19% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: X (X.cpp:99) + #03: Y (Y.cpp:99) + } +} + +Live { + 1 block in heap block record 3 of 3 + 80 bytes (80 requested / 0 slop) + 1.81% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames-8-expected.txt b/memory/replace/dmd/test/script-max-frames-8-expected.txt new file mode 100644 index 0000000000..174992d5b8 --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-8-expected.txt @@ -0,0 +1,69 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-8-actual.txt script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 4 + 4,096 bytes (4,096 requested / 0 slop) + 92.75% of the heap (92.75% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + #04: H (H.cpp:99) + #05: I (I.cpp:99) + #06: J (J.cpp:99) + #07: K (K.cpp:99) + #08: L (L.cpp:99) + } +} + +Live { + 1 block in heap block record 2 of 4 + 128 bytes (128 requested / 0 slop) + 2.90% of the heap (95.65% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + #04: R (R.cpp:99) + #05: S (S.cpp:99) + #06: T (T.cpp:99) + #07: U (U.cpp:99) + #08: V (V.cpp:99) + } +} + +Live { + 1 block in heap block record 3 of 4 + 112 bytes (100 requested / 12 slop) + 2.54% of the heap (98.19% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: X (X.cpp:99) + #03: Y (Y.cpp:99) + #04: Z (Z.cpp:99) + } +} + +Live { + 1 block in heap block record 4 of 4 + 80 bytes (80 requested / 0 slop) + 1.81% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames.json b/memory/replace/dmd/test/script-max-frames.json new file mode 100644 index 0000000000..6de17a88c3 --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames.json @@ -0,0 +1,43 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live --stacks=full", + "mode": "live" + }, + "blockList": [ + { "req": 4096, "alloc": "A" }, + { "req": 128, "alloc": "B" }, + { "req": 100, "slop": 12, "alloc": "C" }, + { "req": 80, "alloc": "D" } + ], + "traceTable": { + "A": ["E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"], + "B": ["E", "F", "G", "R", "S", "T", "U", "V"], + "C": ["E", "X", "Y", "Z"], + "D": ["E"] + }, + "frameTable": { + "E": "#00: E (E.cpp:99)", + "F": "#00: F (F.cpp:99)", + "G": "#00: G (G.cpp:99)", + "H": "#00: H (H.cpp:99)", + "I": "#00: I (I.cpp:99)", + "J": "#00: J (J.cpp:99)", + "K": "#00: K (K.cpp:99)", + "L": "#00: L (L.cpp:99)", + "M": "#00: M (M.cpp:99)", + "N": "#00: N (N.cpp:99)", + "O": "#00: O (O.cpp:99)", + "P": "#00: P (P.cpp:99)", + "Q": "#00: Q (Q.cpp:99)", + "R": "#00: R (R.cpp:99)", + "S": "#00: S (S.cpp:99)", + "T": "#00: T (T.cpp:99)", + "U": "#00: U (U.cpp:99)", + "V": "#00: V (V.cpp:99)", + "W": "#00: W (W.cpp:99)", + "X": "#00: X (X.cpp:99)", + "Y": "#00: Y (Y.cpp:99)", + "Z": "#00: Z (Z.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt b/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt new file mode 100644 index 0000000000..8de03d953b --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-num-blocks-actual.txt --sort-by=num-blocks script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 8 blocks in heap block record 1 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (33.32% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.67% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 3 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (100.00% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-req-expected.txt b/memory/replace/dmd/test/script-sort-by-req-expected.txt new file mode 100644 index 0000000000..3ab21ba8f7 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-req-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-req-actual.txt --sort-by=req --no-fix-stacks script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 5 blocks in heap block record 1 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (33.33% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.68% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 8 blocks in heap block record 3 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-slop-expected.txt b/memory/replace/dmd/test/script-sort-by-slop-expected.txt new file mode 100644 index 0000000000..c325c7ed40 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-slop-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-slop-actual.txt --sort-by=slop script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 8 blocks in heap block record 1 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (33.32% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.67% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 3 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (100.00% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-usable-expected.txt b/memory/replace/dmd/test/script-sort-by-usable-expected.txt new file mode 100644 index 0000000000..8239a4759e --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-usable-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-usable-actual.txt --sort-by=usable script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 5 blocks in heap block record 1 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (33.35% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (66.68% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Live { + 8 blocks in heap block record 3 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by.json.gz b/memory/replace/dmd/test/script-sort-by.json.gz Binary files differnew file mode 100644 index 0000000000..f665c945c2 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by.json.gz diff --git a/memory/replace/dmd/test/test_dmd.js b/memory/replace/dmd/test/test_dmd.js new file mode 100644 index 0000000000..e092c79a02 --- /dev/null +++ b/memory/replace/dmd/test/test_dmd.js @@ -0,0 +1,232 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*-*/ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const { FileUtils } = ChromeUtils.importESModule( + "resource://gre/modules/FileUtils.sys.mjs" +); + +// The xpcshell test harness sets PYTHON so we can read it here. +var gPythonName = Services.env.get("PYTHON"); + +const gCwd = Services.dirsvc.get("CurWorkD", Ci.nsIFile); + +function getRelativeFile(...components) { + return new FileUtils.File(PathUtils.join(gCwd.path, ...components)); +} + +// If we're testing locally, the executable file is in "CurProcD". Otherwise, +// it is in another location that we have to find. +function getExecutable(aFilename) { + let file = new FileUtils.File( + PathUtils.join(Services.dirsvc.get("CurProcD", Ci.nsIFile).path, aFilename) + ); + if (!file.exists()) { + file = gCwd.clone(); + while (file.path.includes("xpcshell")) { + file = file.parent; + } + file.append("bin"); + file.append(aFilename); + } + return file; +} + +var gIsWindows = Services.appinfo.OS === "WINNT"; +var gDmdTestFile = getExecutable("SmokeDMD" + (gIsWindows ? ".exe" : "")); + +var gDmdScriptFile = getExecutable("dmd.py"); + +var gScanTestFile = getRelativeFile("scan-test.py"); + +function readFile(aFile) { + let fstream = Cc["@mozilla.org/network/file-input-stream;1"].createInstance( + Ci.nsIFileInputStream + ); + let cstream = Cc["@mozilla.org/intl/converter-input-stream;1"].createInstance( + Ci.nsIConverterInputStream + ); + fstream.init(aFile, -1, 0, 0); + cstream.init(fstream, "UTF-8", 0, 0); + + let data = ""; + let str = {}; + let read = 0; + do { + // Read as much as we can and put it in str.value. + read = cstream.readString(0xffffffff, str); + data += str.value; + } while (read != 0); + + cstream.close(); // this closes fstream + return data.replace(/\r/g, ""); // normalize line endings +} + +function runProcess(aExeFile, aArgs) { + let process = Cc["@mozilla.org/process/util;1"].createInstance(Ci.nsIProcess); + process.init(aExeFile); + process.run(/* blocking = */ true, aArgs, aArgs.length); + return process.exitValue; +} + +function test(aPrefix, aArgs) { + // DMD writes the JSON files to CurWorkD, so we do likewise here with + // |actualFile| for consistency. It is removed once we've finished. + let expectedFile = getRelativeFile(`${aPrefix}-expected.txt`); + let actualFile = getRelativeFile(`${aPrefix}-actual.txt`); + + // Run dmd.py on the JSON file, producing |actualFile|. + + let args = [ + gDmdScriptFile.path, + "--filter-stacks-for-testing", + "-o", + actualFile.path, + ].concat(aArgs); + + runProcess(new FileUtils.File(gPythonName), args); + + // Compare |expectedFile| with |actualFile|. We produce nice diffs with + // /usr/bin/diff on systems that have it (Mac and Linux). Otherwise (Windows) + // we do a string compare of the file contents and then print them both if + // they don't match. + + let success; + try { + let rv = runProcess(new FileUtils.File("/usr/bin/diff"), [ + "-u", + expectedFile.path, + actualFile.path, + ]); + success = rv == 0; + } catch (e) { + let expectedData = readFile(expectedFile); + let actualData = readFile(actualFile); + success = expectedData === actualData; + if (!success) { + expectedData = expectedData.split("\n"); + actualData = actualData.split("\n"); + for (let i = 0; i < expectedData.length; i++) { + print("EXPECTED:" + expectedData[i]); + } + for (let i = 0; i < actualData.length; i++) { + print(" ACTUAL:" + actualData[i]); + } + } + } + + ok(success, aPrefix); + + actualFile.remove(true); +} + +// Run scan-test.py on the JSON file and see if it succeeds. +function scanTest(aJsonFilePath, aExtraArgs) { + let args = [gScanTestFile.path, aJsonFilePath].concat(aExtraArgs); + + return runProcess(new FileUtils.File(gPythonName), args) == 0; +} + +function run_test() { + let jsonFile, jsonFile2; + + // These tests do complete end-to-end testing of DMD, i.e. both the C++ code + // that generates the JSON output, and the script that post-processes that + // output. + // + // Run these synchronously, because test() updates the complete*.json files + // in-place (to fix stacks) when it runs dmd.py, and that's not safe to do + // asynchronously. + + Services.env.set("DMD", "1"); + + runProcess(gDmdTestFile, []); + + function test2(aTestName, aMode) { + let name = "complete-" + aTestName + "-" + aMode; + jsonFile = getRelativeFile(`${name}.json`); + test(name, [jsonFile.path]); + jsonFile.remove(true); + } + + // Please keep this in sync with RunTests() in SmokeDMD.cpp. + + test2("empty", "live"); + test2("empty", "dark-matter"); + test2("empty", "cumulative"); + + test2("full1", "live"); + test2("full1", "dark-matter"); + + test2("full2", "dark-matter"); + test2("full2", "cumulative"); + + test2("partial", "live"); + + // Heap scan testing. + jsonFile = getRelativeFile("basic-scan.json"); + ok(scanTest(jsonFile.path), "Basic scan test"); + + let is64Bit = Services.appinfo.is64Bit; + let basicScanFileName = "basic-scan-" + (is64Bit ? "64" : "32"); + test(basicScanFileName, ["--clamp-contents", jsonFile.path]); + ok( + scanTest(jsonFile.path, ["--clamp-contents"]), + "Scan with address clamping" + ); + + // Run the generic test a second time to ensure that the first time produced + // valid JSON output. "--clamp-contents" is passed in so we don't have to have + // more variants of the files. + test(basicScanFileName, ["--clamp-contents", jsonFile.path]); + jsonFile.remove(true); + + // These tests only test the post-processing script. They use hand-written + // JSON files as input. Ideally the JSON files would contain comments + // explaining how they work, but JSON doesn't allow comments, so I've put + // explanations here. + + // This just tests that stack traces of various lengths are truncated + // appropriately. The number of records in the output is different for each + // of the tested values. + jsonFile = getRelativeFile("script-max-frames.json"); + test("script-max-frames-8", [jsonFile.path]); // --max-frames=8 is the default + test("script-max-frames-3", [ + "--max-frames=3", + "--no-fix-stacks", + jsonFile.path, + ]); + test("script-max-frames-1", ["--max-frames=1", jsonFile.path]); + + // This file has three records that are shown in a different order for each + // of the different sort values. It also tests the handling of gzipped JSON + // files. + jsonFile = getRelativeFile("script-sort-by.json.gz"); + test("script-sort-by-usable", ["--sort-by=usable", jsonFile.path]); + test("script-sort-by-req", [ + "--sort-by=req", + "--no-fix-stacks", + jsonFile.path, + ]); + test("script-sort-by-slop", ["--sort-by=slop", jsonFile.path]); + test("script-sort-by-num-blocks", ["--sort-by=num-blocks", jsonFile.path]); + + // This file has several real stack traces taken from Firefox execution, each + // of which tests a different allocator function (or functions). + jsonFile = getRelativeFile("script-ignore-alloc-fns.json"); + test("script-ignore-alloc-fns", ["--ignore-alloc-fns", jsonFile.path]); + + // This tests "live"-mode diffs. + jsonFile = getRelativeFile("script-diff-live1.json"); + jsonFile2 = getRelativeFile("script-diff-live2.json"); + test("script-diff-live", [jsonFile.path, jsonFile2.path]); + + // This tests "dark-matter"-mode diffs. + jsonFile = getRelativeFile("script-diff-dark-matter1.json"); + jsonFile2 = getRelativeFile("script-diff-dark-matter2.json"); + test("script-diff-dark-matter", [jsonFile.path, jsonFile2.path]); +} diff --git a/memory/replace/dmd/test/xpcshell.toml b/memory/replace/dmd/test/xpcshell.toml new file mode 100644 index 0000000000..ef05a8ec26 --- /dev/null +++ b/memory/replace/dmd/test/xpcshell.toml @@ -0,0 +1,35 @@ +[DEFAULT] +support-files = [ + "basic-scan-32-expected.txt", + "basic-scan-64-expected.txt", + "complete-empty-live-expected.txt", + "complete-empty-dark-matter-expected.txt", + "complete-empty-cumulative-expected.txt", + "complete-full1-live-expected.txt", + "complete-full1-dark-matter-expected.txt", + "complete-full2-dark-matter-expected.txt", + "complete-full2-cumulative-expected.txt", + "complete-partial-live-expected.txt", + "scan-test.py", + "script-max-frames.json", + "script-max-frames-8-expected.txt", + "script-max-frames-3-expected.txt", + "script-max-frames-1-expected.txt", + "script-sort-by.json.gz", + "script-sort-by-usable-expected.txt", + "script-sort-by-req-expected.txt", + "script-sort-by-slop-expected.txt", + "script-sort-by-num-blocks-expected.txt", + "script-ignore-alloc-fns.json", + "script-ignore-alloc-fns-expected.txt", + "script-diff-live1.json", + "script-diff-live2.json", + "script-diff-live-expected.txt", + "script-diff-dark-matter1.json", + "script-diff-dark-matter2.json", + "script-diff-dark-matter-expected.txt", +] + +["test_dmd.js"] +dmd = true +skip-if = ["!(os=='linux' || os=='mac' || (os=='win' && !pgo))"] diff --git a/memory/replace/logalloc/LogAlloc.cpp b/memory/replace/logalloc/LogAlloc.cpp new file mode 100644 index 0000000000..a976b0c674 --- /dev/null +++ b/memory/replace/logalloc/LogAlloc.cpp @@ -0,0 +1,238 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <cstdlib> +#include <fcntl.h> + +#ifdef _WIN32 +# include <windows.h> +# include <io.h> +# include <process.h> +#else +# include <unistd.h> +# include <pthread.h> +#endif + +#include "replace_malloc.h" +#include "FdPrintf.h" +#include "Mutex.h" + +static malloc_table_t sFuncs; +static intptr_t sFd = 0; +static bool sStdoutOrStderr = false; + +static Mutex sMutex MOZ_UNANNOTATED; + +#ifndef _WIN32 +static void prefork() MOZ_NO_THREAD_SAFETY_ANALYSIS { sMutex.Lock(); } +static void postfork_parent() MOZ_NO_THREAD_SAFETY_ANALYSIS { sMutex.Unlock(); } +static void postfork_child() { sMutex.Init(); } +#endif + +static size_t GetPid() { return size_t(getpid()); } + +static size_t GetTid() { +#if defined(_WIN32) + return size_t(GetCurrentThreadId()); +#else + return size_t(pthread_self()); +#endif +} + +#ifdef ANDROID +/* Android doesn't have pthread_atfork defined in pthread.h */ +extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void), + void (*)(void)); +#endif + +class LogAllocBridge : public ReplaceMallocBridge { + virtual void InitDebugFd(mozilla::DebugFdRegistry& aRegistry) override { + if (!sStdoutOrStderr) { + aRegistry.RegisterHandle(sFd); + } + } +}; + +/* Do a simple, text-form, log of all calls to replace-malloc functions. + * Use locking to guarantee that an allocation that did happen is logged + * before any other allocation/free happens. + */ + +static void* replace_malloc(size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.malloc(aSize); + FdPrintf(sFd, "%zu %zu malloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr); + return ptr; +} + +static int replace_posix_memalign(void** aPtr, size_t aAlignment, + size_t aSize) { + MutexAutoLock lock(sMutex); + int ret = sFuncs.posix_memalign(aPtr, aAlignment, aSize); + FdPrintf(sFd, "%zu %zu posix_memalign(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, (ret == 0) ? *aPtr : nullptr); + return ret; +} + +static void* replace_aligned_alloc(size_t aAlignment, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.aligned_alloc(aAlignment, aSize); + FdPrintf(sFd, "%zu %zu aligned_alloc(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, ptr); + return ptr; +} + +static void* replace_calloc(size_t aNum, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.calloc(aNum, aSize); + FdPrintf(sFd, "%zu %zu calloc(%zu,%zu)=%p\n", GetPid(), GetTid(), aNum, aSize, + ptr); + return ptr; +} + +static void* replace_realloc(void* aPtr, size_t aSize) { + MutexAutoLock lock(sMutex); + void* new_ptr = sFuncs.realloc(aPtr, aSize); + FdPrintf(sFd, "%zu %zu realloc(%p,%zu)=%p\n", GetPid(), GetTid(), aPtr, aSize, + new_ptr); + return new_ptr; +} + +static void replace_free(void* aPtr) { + MutexAutoLock lock(sMutex); + FdPrintf(sFd, "%zu %zu free(%p)\n", GetPid(), GetTid(), aPtr); + sFuncs.free(aPtr); +} + +static void* replace_memalign(size_t aAlignment, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.memalign(aAlignment, aSize); + FdPrintf(sFd, "%zu %zu memalign(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, ptr); + return ptr; +} + +static void* replace_valloc(size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.valloc(aSize); + FdPrintf(sFd, "%zu %zu valloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr); + return ptr; +} + +static void replace_jemalloc_stats(jemalloc_stats_t* aStats, + jemalloc_bin_stats_t* aBinStats) { + MutexAutoLock lock(sMutex); + sFuncs.jemalloc_stats_internal(aStats, aBinStats); + FdPrintf(sFd, "%zu %zu jemalloc_stats()\n", GetPid(), GetTid()); +} + +void replace_init(malloc_table_t* aTable, ReplaceMallocBridge** aBridge) { + /* Initialize output file descriptor from the MALLOC_LOG environment + * variable. Numbers up to 9999 are considered as a preopened file + * descriptor number. Other values are considered as a file name. */ +#ifdef _WIN32 + wchar_t* log = _wgetenv(L"MALLOC_LOG"); +#else + char* log = getenv("MALLOC_LOG"); +#endif + if (log && *log) { + int fd = 0; + const auto* fd_num = log; + while (*fd_num) { + /* Reject non digits. */ + if (*fd_num < '0' || *fd_num > '9') { + fd = -1; + break; + } + fd = fd * 10 + (*fd_num - '0'); + /* Reject values >= 10000. */ + if (fd >= 10000) { + fd = -1; + break; + } + fd_num++; + } + if (fd == 1 || fd == 2) { + sStdoutOrStderr = true; + } +#ifdef _WIN32 + // See comment in FdPrintf.h as to why CreateFile is used. + HANDLE handle; + if (fd > 0) { + handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); + } else { + handle = + CreateFileW(log, FILE_APPEND_DATA, FILE_SHARE_READ | FILE_SHARE_WRITE, + nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr); + } + if (handle != INVALID_HANDLE_VALUE) { + sFd = reinterpret_cast<intptr_t>(handle); + } +#else + if (fd == -1) { + fd = open(log, O_WRONLY | O_CREAT | O_APPEND, 0644); + } + if (fd > 0) { + sFd = fd; + } +#endif + } + + // Don't initialize if we weren't passed a valid MALLOC_LOG. + if (sFd == 0) { + return; + } + + sMutex.Init(); + static LogAllocBridge bridge; + sFuncs = *aTable; +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#define MALLOC_DECL(name, ...) aTable->name = replace_##name; +#include "malloc_decls.h" + aTable->jemalloc_stats_internal = replace_jemalloc_stats; + if (!getenv("MALLOC_LOG_MINIMAL")) { + aTable->posix_memalign = replace_posix_memalign; + aTable->aligned_alloc = replace_aligned_alloc; + aTable->valloc = replace_valloc; + } + *aBridge = &bridge; + +#ifndef _WIN32 + /* When another thread has acquired a lock before forking, the child + * process will inherit the lock state but the thread, being nonexistent + * in the child process, will never release it, leading to a dead-lock + * whenever the child process gets the lock. We thus need to ensure no + * other thread is holding the lock before forking, by acquiring it + * ourselves, and releasing it after forking in the parent process and + * resetting it to its initial state in the child process. The latter is + * important because some implementations (notably macOS) prevent a lock from + * being unlocked by a different thread than the one which locked it in the + * first place. + * Windows doesn't have this problem since there is no fork(). + * The real allocator, however, might be doing the same thing (jemalloc + * does). But pthread_atfork `prepare` handlers (first argument) are + * processed in reverse order they were established. But replace_init + * runs before the real allocator has had any chance to initialize and + * call pthread_atfork itself. This leads to its prefork running before + * ours. This leads to a race condition that can lead to a deadlock like + * the following: + * - thread A forks. + * - libc calls real allocator's prefork, so thread A holds the real + * allocator lock. + * - thread B calls malloc, which calls our replace_malloc. + * - consequently, thread B holds our lock. + * - thread B then proceeds to call the real allocator's malloc, and + * waits for the real allocator's lock, which thread A holds. + * - libc calls our prefork, so thread A waits for our lock, which + * thread B holds. + * To avoid this race condition, the real allocator's prefork must be + * called after ours, which means it needs to be registered before ours. + * So trick the real allocator into initializing itself without more side + * effects by calling malloc with a size it can't possibly allocate. */ + sFuncs.malloc(-1); + pthread_atfork(prefork, postfork_parent, postfork_child); +#endif +} diff --git a/memory/replace/logalloc/README b/memory/replace/logalloc/README new file mode 100644 index 0000000000..c2e8cf66ce --- /dev/null +++ b/memory/replace/logalloc/README @@ -0,0 +1,95 @@ +Logalloc is a replace-malloc library for Firefox (see +memory/build/replace_malloc.h) that dumps a log of memory allocations to a +given file descriptor or file name. That log can then be replayed against +Firefox's default memory allocator independently or through another +replace-malloc library, allowing the testing of other allocators under the +exact same workload. + +To get an allocation log the following environment variable when starting +Firefox: + MALLOC_LOG=/path/to/log-file + or + MALLOC_LOG=number + +When MALLOC_LOG is a number below 10000, it is considered as a file +descriptor number that is fed to Firefox when it is started. Otherwise, +it is considered as a file name. + +As those allocation logs can grow large quite quickly, it can be useful +to pipe the output to a compression tool. + +MALLOC_LOG=1 would send to Firefox's stdout, MALLOC_LOG=2 would send to +its stderr. Since in both cases that could be mixed with other output +from Firefox, it is usually better to use another file descriptor +by shell redirections, such as: + + MALLOC_LOG=3 firefox 3>&1 1>&2 | gzip -c > log.gz + +(3>&1 copies the `| gzip` pipe file descriptor to file descriptor #3, 1>&2 +then copies stderr to stdout. This leads to: fd1 and fd2 sending to stderr +of the parent process (the shell), and fd3 sending to gzip.) + +Each line of the allocations log is formatted as follows: + <pid> <tid> <function>([<args>])[=<result>] +where <args> is a comma separated list of values. The number of <args> and +the presence of <result> depend on the <function>. + +Example log: + 18545 18545 malloc(32)=0x7f90495120e0 + 18545 18545 calloc(1,148)=0x7f9049537480 + 18545 18545 realloc(0x7f90495120e0,64)=0x7f9049536680 + 18545 18545 posix_memalign(256,240)=0x7f9049583300 + 18545 18545 jemalloc_stats() + 18545 18545 free(0x7f9049536680) + +This log can be replayed with the logalloc-replay tool in +memory/replace/logalloc/replay. However, as the goal of that tool is to +reproduce the recorded memory allocations, it needs to avoid as much as +possible doing its own allocations for bookkeeping. Reading the logs as +they are would require data structures and memory allocations. As a +consequence, the logs need to be preprocessed beforehand. + +The logalloc_munge.py script is responsible for that preprocessing. It simply +takes a raw log on its stdin, and outputs the preprocessed log on its stdout. +It replaces pointer addresses with indexes the logalloc-replay tool can use +in a large (almost) linear array of allocation tracking slots (prefixed with +'#'). It also replaces the pids with numbers starting from 1 (such as the +first seen pid number is 1, the second is 2, etc.). + +The above example log would become the following, once preprocessed: + 1 1 malloc(32)=#1 + 1 1 calloc(1,148)=#2 + 1 1 realloc(#1,64)=#1 + 1 1 posix_memalign(256,240)=#3 + 1 1 jemalloc_stats() + 1 1 free(#1) + +The logalloc-replay tool then takes the preprocessed log on its stdin and +replays the allocations printed there, but will only replay those with the +same process id as the first line (which normally is 1). + +As the log files are simple text files, though, it is easy to separate out +the different processes log with e.g. grep, and feed the separate processes +logs to logalloc-replay. + +The logalloc-replay program won't output anything unless jemalloc_stats +records appears in the log. You can expect those to be recorded when going +to about:memory in Firefox, but they can also be added after preprocessing. + +Here is an example of what one can do: + + gunzip -c log.gz | python logalloc_munge.py | \ + awk '$1 == "2" { print $0 } !(NR % 10000) { print "2 1 jemalloc_stats()" }' | \ + ./logalloc-replay + +The above command replays the allocations of process #2, with some stats +output every 10000 records. + +The logalloc-replay tool itself being hooked with replace-malloc, it is possible +to set LD_PRELOAD/DYLD_INSERT_LIBRARIES/MOZ_REPLACE_MALLOC_LIB and replay a log +through a different allocator. For example: + + LD_PRELOAD=libreplace_jemalloc.so logalloc-replay < log + +Will replay the log against jemalloc4 (which is, as of writing, what +libreplace_jemalloc.so contains). diff --git a/memory/replace/logalloc/moz.build b/memory/replace/logalloc/moz.build new file mode 100644 index 0000000000..e0588cfb16 --- /dev/null +++ b/memory/replace/logalloc/moz.build @@ -0,0 +1,36 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ReplaceMalloc("logalloc") + +SOURCES += [ + "LogAlloc.cpp", +] + +# FdPrintf is statically linked if we're using static linking to mozjemalloc +# and PHC is also compiled-in. +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"] or not CONFIG["MOZ_PHC"]: + SOURCES += [ + "/memory/build/FdPrintf.cpp", + ] + +DisableStlWrapping() +NO_PGO = True +DEFINES["MOZ_NO_MOZALLOC"] = True + +LOCAL_INCLUDES += [ + "/memory/build", +] + +# Android doesn't have pthread_atfork, but we have our own in mozglue. +if CONFIG["OS_TARGET"] == "Android" and FORCE_SHARED_LIB: + USE_LIBS += [ + "mozglue", + ] + +DIRS += [ + "replay", +] diff --git a/memory/replace/logalloc/replay/Makefile.in b/memory/replace/logalloc/replay/Makefile.in new file mode 100644 index 0000000000..73659add98 --- /dev/null +++ b/memory/replace/logalloc/replay/Makefile.in @@ -0,0 +1,48 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ifdef MOZ_CODE_COVERAGE +SKIP = 1 +endif + +ifdef CROSS_COMPILE +SKIP = 1 +endif + +ifneq ($(SKIP),1) + +ifeq ($(OS_TARGET),WINNT) +LOGALLOC_VAR = MOZ_REPLACE_MALLOC_LIB +else +ifeq ($(OS_TARGET),Darwin) +LOGALLOC_VAR = DYLD_INSERT_LIBRARIES +else +LOGALLOC_VAR = LD_PRELOAD +endif +endif + +ifndef MOZ_REPLACE_MALLOC_STATIC +LOGALLOC = $(LOGALLOC_VAR)=$(CURDIR)/../$(DLL_PREFIX)logalloc$(DLL_SUFFIX) +endif + +expected_output.log: $(srcdir)/replay.log +# The logalloc-replay program will only replay entries from the first pid, +# so the expected output only contains entries beginning with "1 " + grep "^1 " $< > $@ + +check:: $(srcdir)/replay.log expected_output.log $(srcdir)/expected_output_minimal.log +# Test with MALLOC_LOG as a file descriptor number +# We filter out anything happening before the first jemalloc_stats (first +# command in replay.log) because starting with libstdc++ 5, a static +# initializer in the STL allocates memory, which we obviously don't have +# in expected_output.log. + MALLOC_LOG=1 $(LOGALLOC) ./$(PROGRAM) < $< | sed -n '/jemalloc_stats/,$$p' | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - expected_output.log +# Test with MALLOC_LOG as a file name + $(RM) test_output.log + MALLOC_LOG=test_output.log $(LOGALLOC) ./$(PROGRAM) < $< + sed -n '/jemalloc_stats/,$$p' test_output.log | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - expected_output.log + + MALLOC_LOG=1 MALLOC_LOG_MINIMAL=1 $(LOGALLOC) ./$(PROGRAM) < $< | sed -n '/jemalloc_stats/,$$p' | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - $(srcdir)/expected_output_minimal.log + +endif diff --git a/memory/replace/logalloc/replay/Replay.cpp b/memory/replace/logalloc/replay/Replay.cpp new file mode 100644 index 0000000000..b5ad0c540e --- /dev/null +++ b/memory/replace/logalloc/replay/Replay.cpp @@ -0,0 +1,1159 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define MOZ_MEMORY_IMPL +#include "mozmemory_wrap.h" + +#ifdef _WIN32 +# include <windows.h> +# include <io.h> +typedef intptr_t ssize_t; +#else +# include <sys/mman.h> +# include <unistd.h> +#endif +#ifdef XP_LINUX +# include <fcntl.h> +# include <stdlib.h> +#endif +#include <algorithm> +#include <cmath> +#include <cstdio> +#include <cstring> + +#include "mozilla/Assertions.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/Maybe.h" +#include "FdPrintf.h" + +using namespace mozilla; + +static void die(const char* message) { + /* Here, it doesn't matter that fprintf may allocate memory. */ + fprintf(stderr, "%s\n", message); + exit(1); +} + +#ifdef XP_LINUX +static size_t sPageSize = []() { return sysconf(_SC_PAGESIZE); }(); +#endif + +/* We don't want to be using malloc() to allocate our internal tracking + * data, because that would change the parameters of what is being measured, + * so we want to use data types that directly use mmap/VirtualAlloc. */ +template <typename T, size_t Len> +class MappedArray { + public: + MappedArray() : mPtr(nullptr) { +#ifdef XP_LINUX + MOZ_RELEASE_ASSERT(!((sizeof(T) * Len) & (sPageSize - 1)), + "MappedArray size must be a multiple of the page size"); +#endif + } + + ~MappedArray() { + if (mPtr) { +#ifdef _WIN32 + VirtualFree(mPtr, sizeof(T) * Len, MEM_RELEASE); +#elif defined(XP_LINUX) + munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(mPtr) - + sPageSize), + sizeof(T) * Len + sPageSize * 2); +#else + munmap(mPtr, sizeof(T) * Len); +#endif + } + } + + T& operator[](size_t aIndex) const { + if (mPtr) { + return mPtr[aIndex]; + } + +#ifdef _WIN32 + mPtr = reinterpret_cast<T*>(VirtualAlloc( + nullptr, sizeof(T) * Len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + if (mPtr == nullptr) { + die("VirtualAlloc error"); + } +#else + size_t data_size = sizeof(T) * Len; + size_t size = data_size; +# ifdef XP_LINUX + // See below + size += sPageSize * 2; +# endif + mPtr = reinterpret_cast<T*>(mmap(nullptr, size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0)); + if (mPtr == MAP_FAILED) { + die("Mmap error"); + } +# ifdef XP_LINUX + // On Linux we request a page on either side of the allocation and + // mprotect them. This prevents mappings in /proc/self/smaps from being + // merged and allows us to parse this file to calculate the allocator's RSS. + MOZ_ASSERT(0 == mprotect(mPtr, sPageSize, 0)); + MOZ_ASSERT(0 == mprotect(reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(mPtr) + data_size + + sPageSize), + sPageSize, 0)); + mPtr = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(mPtr) + sPageSize); +# endif +#endif + return mPtr[aIndex]; + } + + bool ownsMapping(uintptr_t addr) const { return addr == (uintptr_t)mPtr; } + + bool allocated() const { return !!mPtr; } + + private: + mutable T* mPtr; +}; + +/* Type for records of allocations. */ +struct MemSlot { + void* mPtr; + + // mRequest is only valid if mPtr is non-null. It doesn't need to be cleared + // when memory is freed or realloc()ed. + size_t mRequest; +}; + +/* An almost infinite list of slots. + * In essence, this is a linked list of arrays of groups of slots. + * Each group is 1MB. On 64-bits, one group allows to store 64k allocations. + * Each MemSlotList instance can store 1023 such groups, which means more + * than 67M allocations. In case more would be needed, we chain to another + * MemSlotList, and so on. + * Using 1023 groups makes the MemSlotList itself page sized on 32-bits + * and 2 pages-sized on 64-bits. + */ +class MemSlotList { + static constexpr size_t kGroups = 1024 - 1; + static constexpr size_t kGroupSize = (1024 * 1024) / sizeof(MemSlot); + + MappedArray<MemSlot, kGroupSize> mSlots[kGroups]; + MappedArray<MemSlotList, 1> mNext; + + public: + MemSlot& operator[](size_t aIndex) const { + if (aIndex < kGroupSize * kGroups) { + return mSlots[aIndex / kGroupSize][aIndex % kGroupSize]; + } + aIndex -= kGroupSize * kGroups; + return mNext[0][aIndex]; + } + + // Ask if any of the memory-mapped buffers use this range. + bool ownsMapping(uintptr_t aStart) const { + for (const auto& slot : mSlots) { + if (slot.allocated() && slot.ownsMapping(aStart)) { + return true; + } + } + return mNext.ownsMapping(aStart) || + (mNext.allocated() && mNext[0].ownsMapping(aStart)); + } +}; + +/* Helper class for memory buffers */ +class Buffer { + public: + Buffer() : mBuf(nullptr), mLength(0) {} + + Buffer(const void* aBuf, size_t aLength) + : mBuf(reinterpret_cast<const char*>(aBuf)), mLength(aLength) {} + + /* Constructor for string literals. */ + template <size_t Size> + explicit Buffer(const char (&aStr)[Size]) : mBuf(aStr), mLength(Size - 1) {} + + /* Returns a sub-buffer up-to but not including the given aNeedle character. + * The "parent" buffer itself is altered to begin after the aNeedle + * character. + * If the aNeedle character is not found, return the entire buffer, and empty + * the "parent" buffer. */ + Buffer SplitChar(char aNeedle) { + char* buf = const_cast<char*>(mBuf); + char* c = reinterpret_cast<char*>(memchr(buf, aNeedle, mLength)); + if (!c) { + return Split(mLength); + } + + Buffer result = Split(c - buf); + // Remove the aNeedle character itself. + Split(1); + return result; + } + + // Advance to the position after aNeedle. This is like SplitChar but does not + // return the skipped portion. + void Skip(char aNeedle, unsigned nTimes = 1) { + for (unsigned i = 0; i < nTimes; i++) { + SplitChar(aNeedle); + } + } + + void SkipWhitespace() { + while (mLength > 0) { + if (!IsSpace(mBuf[0])) { + break; + } + mBuf++; + mLength--; + } + } + + static bool IsSpace(char c) { + switch (c) { + case ' ': + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + return true; + } + return false; + } + + /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is + * amputated of those aLength characters. If the "parent" buffer is smaller + * than aLength, then its length is used instead. */ + Buffer Split(size_t aLength) { + Buffer result(mBuf, std::min(aLength, mLength)); + mLength -= result.mLength; + mBuf += result.mLength; + return result; + } + + /* Move the buffer (including its content) to the memory address of the aOther + * buffer. */ + void Slide(Buffer aOther) { + memmove(const_cast<char*>(aOther.mBuf), mBuf, mLength); + mBuf = aOther.mBuf; + } + + /* Returns whether the two involved buffers have the same content. */ + bool operator==(Buffer aOther) { + return mLength == aOther.mLength && + (mBuf == aOther.mBuf || !strncmp(mBuf, aOther.mBuf, mLength)); + } + + bool operator!=(Buffer aOther) { return !(*this == aOther); } + + /* Returns true if the buffer is not empty. */ + explicit operator bool() { return mLength; } + + char operator[](size_t n) const { return mBuf[n]; } + + /* Returns the memory location of the buffer. */ + const char* get() { return mBuf; } + + /* Returns the memory location of the end of the buffer (technically, the + * first byte after the buffer). */ + const char* GetEnd() { return mBuf + mLength; } + + /* Extend the buffer over the content of the other buffer, assuming it is + * adjacent. */ + void Extend(Buffer aOther) { + MOZ_ASSERT(aOther.mBuf == GetEnd()); + mLength += aOther.mLength; + } + + size_t Length() const { return mLength; } + + private: + const char* mBuf; + size_t mLength; +}; + +/* Helper class to read from a file descriptor line by line. */ +class FdReader { + public: + explicit FdReader(int aFd, bool aNeedClose = false) + : mFd(aFd), + mNeedClose(aNeedClose), + mData(&mRawBuf, 0), + mBuf(&mRawBuf, sizeof(mRawBuf)) {} + + FdReader(FdReader&& aOther) noexcept + : mFd(aOther.mFd), + mNeedClose(aOther.mNeedClose), + mData(&mRawBuf, 0), + mBuf(&mRawBuf, sizeof(mRawBuf)) { + memcpy(mRawBuf, aOther.mRawBuf, sizeof(mRawBuf)); + aOther.mFd = -1; + aOther.mNeedClose = false; + aOther.mData = Buffer(); + aOther.mBuf = Buffer(); + } + + FdReader& operator=(const FdReader&) = delete; + FdReader(const FdReader&) = delete; + + ~FdReader() { + if (mNeedClose) { + close(mFd); + } + } + + /* Read a line from the file descriptor and returns it as a Buffer instance */ + Buffer ReadLine() { + while (true) { + Buffer result = mData.SplitChar('\n'); + + /* There are essentially three different cases here: + * - '\n' was found "early". In this case, the end of the result buffer + * is before the beginning of the mData buffer (since SplitChar + * amputated it). + * - '\n' was found as the last character of mData. In this case, mData + * is empty, but still points at the end of mBuf. result points to what + * used to be in mData, without the last character. + * - '\n' was not found. In this case too, mData is empty and points at + * the end of mBuf. But result points to the entire buffer that used to + * be pointed by mData. + * Only in the latter case do both result and mData's end match, and it's + * the only case where we need to refill the buffer. + */ + if (result.GetEnd() != mData.GetEnd()) { + return result; + } + + /* Since SplitChar emptied mData, make it point to what it had before. */ + mData = result; + + /* And move it to the beginning of the read buffer. */ + mData.Slide(mBuf); + + FillBuffer(); + + if (!mData) { + return Buffer(); + } + } + } + + private: + /* Fill the read buffer. */ + void FillBuffer() { + size_t size = mBuf.GetEnd() - mData.GetEnd(); + Buffer remainder(mData.GetEnd(), size); + + ssize_t len = 1; + while (remainder && len > 0) { + len = ::read(mFd, const_cast<char*>(remainder.get()), size); + if (len < 0) { + die("Read error"); + } + size -= len; + mData.Extend(remainder.Split(len)); + } + } + + /* File descriptor to read from. */ + int mFd; + bool mNeedClose; + + /* Part of data that was read from the file descriptor but not returned with + * ReadLine yet. */ + Buffer mData; + /* Buffer representation of mRawBuf */ + Buffer mBuf; + /* read() buffer */ + char mRawBuf[4096]; +}; + +MOZ_BEGIN_EXTERN_C + +/* Function declarations for all the replace_malloc _impl functions. + * See memory/build/replace_malloc.c */ +#define MALLOC_DECL(name, return_type, ...) \ + return_type name##_impl(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC +#include "malloc_decls.h" + +#define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC +#include "malloc_decls.h" + +#ifdef ANDROID + +/* mozjemalloc and jemalloc use pthread_atfork, which Android doesn't have. + * While gecko has one in libmozglue, the replay program can't use that. + * Since we're not going to fork anyways, make it a dummy function. */ +int pthread_atfork(void (*aPrepare)(void), void (*aParent)(void), + void (*aChild)(void)) { + return 0; +} +#endif + +MOZ_END_EXTERN_C + +template <unsigned Base = 10> +size_t parseNumber(Buffer aBuf) { + if (!aBuf) { + die("Malformed input"); + } + + size_t result = 0; + for (const char *c = aBuf.get(), *end = aBuf.GetEnd(); c < end; c++) { + result *= Base; + if ((*c >= '0' && *c <= '9')) { + result += *c - '0'; + } else if (Base == 16 && *c >= 'a' && *c <= 'f') { + result += *c - 'a' + 10; + } else if (Base == 16 && *c >= 'A' && *c <= 'F') { + result += *c - 'A' + 10; + } else { + die("Malformed input"); + } + } + return result; +} + +static size_t percent(size_t a, size_t b) { + if (!b) { + return 0; + } + return size_t(round(double(a) / double(b) * 100.0)); +} + +class Distribution { + public: + // Default constructor used for array initialisation. + Distribution() + : mMaxSize(0), + mNextSmallest(0), + mShift(0), + mArrayOffset(0), + mArraySlots(0), + mTotalRequests(0), + mRequests{0} {} + + Distribution(size_t max_size, size_t next_smallest, size_t bucket_size) + : mMaxSize(max_size), + mNextSmallest(next_smallest), + mShift(CeilingLog2(bucket_size)), + mArrayOffset(1 + next_smallest), + mArraySlots((max_size - next_smallest) >> mShift), + mTotalRequests(0), + mRequests{ + 0, + } { + MOZ_ASSERT(mMaxSize); + MOZ_RELEASE_ASSERT(mArraySlots <= MAX_NUM_BUCKETS); + } + + Distribution& operator=(const Distribution& aOther) = default; + + void addRequest(size_t request) { + MOZ_ASSERT(mMaxSize); + + mRequests[(request - mArrayOffset) >> mShift]++; + mTotalRequests++; + } + + void printDist(intptr_t std_err) { + MOZ_ASSERT(mMaxSize); + + // The translation to turn a slot index into a memory request size. + const size_t array_offset_add = (1 << mShift) + mNextSmallest; + + FdPrintf(std_err, "\n%zu-bin Distribution:\n", mMaxSize); + FdPrintf(std_err, " request : count percent\n"); + size_t range_start = mNextSmallest + 1; + for (size_t j = 0; j < mArraySlots; j++) { + size_t range_end = (j << mShift) + array_offset_add; + FdPrintf(std_err, "%5zu - %5zu: %6zu %6zu%%\n", range_start, range_end, + mRequests[j], percent(mRequests[j], mTotalRequests)); + range_start = range_end + 1; + } + } + + size_t maxSize() const { return mMaxSize; } + + private: + static constexpr size_t MAX_NUM_BUCKETS = 16; + + // If size is zero this distribution is uninitialised. + size_t mMaxSize; + size_t mNextSmallest; + + // Parameters to convert a size into a slot number. + unsigned mShift; + unsigned mArrayOffset; + + // The number of slots. + unsigned mArraySlots; + + size_t mTotalRequests; + size_t mRequests[MAX_NUM_BUCKETS]; +}; + +#ifdef XP_LINUX +struct MemoryMap { + uintptr_t mStart; + uintptr_t mEnd; + bool mReadable; + bool mPrivate; + bool mAnon; + bool mIsStack; + bool mIsSpecial; + size_t mRSS; + + bool IsCandidate() const { + // Candidates mappings are: + // * anonymous + // * they are private (not shared), + // * anonymous or "[heap]" (not another area such as stack), + // + // The only mappings we're falsely including are the .bss segments for + // shared libraries. + return mReadable && mPrivate && mAnon && !mIsStack && !mIsSpecial; + } +}; + +class SMapsReader : private FdReader { + private: + explicit SMapsReader(FdReader&& reader) : FdReader(std::move(reader)) {} + + public: + static Maybe<SMapsReader> open() { + int fd = ::open(FILENAME, O_RDONLY); + if (fd < 0) { + perror(FILENAME); + return mozilla::Nothing(); + } + + return Some(SMapsReader(FdReader(fd, true))); + } + + Maybe<MemoryMap> readMap(intptr_t aStdErr) { + // This is not very tolerant of format changes because things like + // parseNumber will crash if they get a bad value. TODO: make this + // soft-fail. + + Buffer line = ReadLine(); + if (!line) { + return Nothing(); + } + + // We're going to be at the start of an entry, start tokenising the first + // line. + + // Range + Buffer range = line.SplitChar(' '); + uintptr_t range_start = parseNumber<16>(range.SplitChar('-')); + uintptr_t range_end = parseNumber<16>(range); + + // Mode. + Buffer mode = line.SplitChar(' '); + if (mode.Length() != 4) { + FdPrintf(aStdErr, "Couldn't parse SMAPS file\n"); + return Nothing(); + } + bool readable = mode[0] == 'r'; + bool private_ = mode[3] == 'p'; + + // Offset, device and inode. + line.SkipWhitespace(); + bool zero_offset = !parseNumber<16>(line.SplitChar(' ')); + line.SkipWhitespace(); + bool no_device = line.SplitChar(' ') == Buffer("00:00"); + line.SkipWhitespace(); + bool zero_inode = !parseNumber(line.SplitChar(' ')); + bool is_anon = zero_offset && no_device && zero_inode; + + // Filename, or empty for anon mappings. + line.SkipWhitespace(); + Buffer filename = line.SplitChar(' '); + + bool is_stack; + bool is_special; + if (filename && filename[0] == '[') { + is_stack = filename == Buffer("[stack]"); + is_special = filename == Buffer("[vdso]") || + filename == Buffer("[vvar]") || + filename == Buffer("[vsyscall]"); + } else { + is_stack = false; + is_special = false; + } + + size_t rss = 0; + while ((line = ReadLine())) { + Buffer field = line.SplitChar(':'); + if (field == Buffer("VmFlags")) { + // This is the last field, at least in the current format. Break this + // loop to read the next mapping. + break; + } + + if (field == Buffer("Rss")) { + line.SkipWhitespace(); + Buffer value = line.SplitChar(' '); + rss = parseNumber(value) * 1024; + } + } + + return Some(MemoryMap({range_start, range_end, readable, private_, is_anon, + is_stack, is_special, rss})); + } + + static constexpr char FILENAME[] = "/proc/self/smaps"; +}; +#endif // XP_LINUX + +/* Class to handle dispatching the replay function calls to replace-malloc. */ +class Replay { + public: + Replay() { +#ifdef _WIN32 + // See comment in FdPrintf.h as to why native win32 handles are used. + mStdErr = reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE)); +#else + mStdErr = fileno(stderr); +#endif +#ifdef XP_LINUX + BuildInitialMapInfo(); +#endif + } + + void enableSlopCalculation() { mCalculateSlop = true; } + void enableMemset() { mDoMemset = true; } + + MemSlot& operator[](size_t index) const { return mSlots[index]; } + + void malloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::malloc_impl(size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void posix_memalign(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + void* ptr; + if (::posix_memalign_impl(&ptr, alignment, size) == 0) { + aSlot.mPtr = ptr; + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } else { + aSlot.mPtr = nullptr; + } + } + + void aligned_alloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::aligned_alloc_impl(alignment, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void calloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t num = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::calloc_impl(num, size); + if (aSlot.mPtr) { + aSlot.mRequest = num * size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += num * size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void realloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + Buffer dummy = aArgs.SplitChar('#'); + if (dummy) { + die("Malformed input"); + } + size_t slot_id = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + MemSlot& old_slot = (*this)[slot_id]; + void* old_ptr = old_slot.mPtr; + old_slot.mPtr = nullptr; + aSlot.mPtr = ::realloc_impl(old_ptr, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void free(Buffer& aArgs, Buffer& aResult) { + if (aResult) { + die("Malformed input"); + } + mOps++; + Buffer dummy = aArgs.SplitChar('#'); + if (dummy) { + die("Malformed input"); + } + size_t slot_id = parseNumber(aArgs); + MemSlot& slot = (*this)[slot_id]; + ::free_impl(slot.mPtr); + slot.mPtr = nullptr; + } + + void memalign(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::memalign_impl(alignment, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void valloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::valloc_impl(size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void jemalloc_stats(Buffer& aArgs, Buffer& aResult) { + if (aArgs || aResult) { + die("Malformed input"); + } + mOps++; + jemalloc_stats_t stats; + // Using a variable length array here is a GCC & Clang extension. But it + // allows us to place this on the stack and not alter jemalloc's profiling. + const size_t num_bins = ::jemalloc_stats_num_bins(); + const size_t MAX_NUM_BINS = 100; + if (num_bins > MAX_NUM_BINS) { + die("Exceeded maximum number of jemalloc stats bins"); + } + jemalloc_bin_stats_t bin_stats[MAX_NUM_BINS] = {{0}}; + ::jemalloc_stats_internal(&stats, bin_stats); + +#ifdef XP_LINUX + size_t rss = get_rss(); +#endif + + size_t num_objects = 0; + size_t num_sloppy_objects = 0; + size_t total_allocated = 0; + size_t total_slop = 0; + size_t large_slop = 0; + size_t large_used = 0; + size_t huge_slop = 0; + size_t huge_used = 0; + size_t bin_slop[MAX_NUM_BINS] = {0}; + + for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { + MemSlot& slot = mSlots[slot_id]; + if (slot.mPtr) { + size_t used = ::malloc_usable_size_impl(slot.mPtr); + size_t slop = used - slot.mRequest; + total_allocated += used; + total_slop += slop; + num_objects++; + if (slop) { + num_sloppy_objects++; + } + + if (used <= + (stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max)) { + // We know that this is an inefficient linear search, but there's a + // small number of bins and this is simple. + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + if (used == bin.size) { + bin_slop[i] += slop; + break; + } + } + } else if (used <= stats.large_max) { + large_slop += slop; + large_used += used; + } else { + huge_slop += slop; + huge_used += used; + } + } + } + + // This formula corresponds to the calculation of wasted (from committed and + // the other parameters) within jemalloc_stats() + size_t committed = stats.allocated + stats.waste + stats.page_cache + + stats.bookkeeping + stats.bin_unused; + + FdPrintf(mStdErr, "\n"); + FdPrintf(mStdErr, "Objects: %9zu\n", num_objects); + FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots); + FdPrintf(mStdErr, "Ops: %9zu\n", mOps); + FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped); + FdPrintf(mStdErr, "committed: %9zu\n", committed); +#ifdef XP_LINUX + if (rss) { + FdPrintf(mStdErr, "rss: %9zu\n", rss); + } +#endif + FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated); + FdPrintf(mStdErr, "waste: %9zu\n", stats.waste); + FdPrintf(mStdErr, "dirty: %9zu\n", stats.page_cache); + FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping); + FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused); + FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max); + FdPrintf(mStdErr, "quantum-wide-max: %9zu\n", stats.quantum_wide_max); + FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.subpage_max); + FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max); + if (mCalculateSlop) { + size_t slop = mTotalAllocatedSize - mTotalRequestedSize; + FdPrintf(mStdErr, + "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n", + slop / 1024, mTotalAllocatedSize / 1024, + percent(slop, mTotalAllocatedSize)); + } + FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n", + num_sloppy_objects, num_objects, + percent(num_sloppy_objects, num_objects)); + FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n", + total_slop / 1024, total_allocated / 1024, + percent(total_slop, total_allocated)); + + FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size", + "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)", + "used (r)"); + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + MOZ_ASSERT(bin.size); + FdPrintf(mStdErr, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n", + bin.size, bin.bytes_unused / 1024, bin.bytes_total / 1024, + percent(bin.bytes_total - bin.bytes_unused, bin.bytes_total), + bin.num_non_full_runs, bin.num_runs, + percent(bin.num_runs - bin.num_non_full_runs, bin.num_runs)); + } + + FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent"); + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + size_t used = bin.bytes_total - bin.bytes_unused; + FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i], used, + percent(bin_slop[i], used)); + } + FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used, + percent(large_slop, large_used)); + FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used, + percent(huge_slop, huge_used)); + + print_distributions(stats, bin_stats); + } + + private: + /* + * Create and print frequency distributions of memory requests. + */ + void print_distributions(jemalloc_stats_t& stats, + jemalloc_bin_stats_t* bin_stats) { + const size_t num_bins = ::jemalloc_stats_num_bins(); + + // We compute distributions for all of the bins for small allocations + // (num_bins) plus two more distributions for larger allocations. + Distribution dists[num_bins + 2]; + + unsigned last_size = 0; + unsigned num_dists = 0; + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + auto& dist = dists[num_dists++]; + + MOZ_ASSERT(bin.size); + if (bin.size <= 16) { + // 1 byte buckets. + dist = Distribution(bin.size, last_size, 1); + } else if (bin.size <= stats.quantum_max) { + // 4 buckets, (4 bytes per bucket with a 16 byte quantum). + dist = Distribution(bin.size, last_size, stats.quantum / 4); + } else if (bin.size <= stats.quantum_wide_max) { + // 8 buckets, (32 bytes per bucket with a 256 byte quantum-wide). + dist = Distribution(bin.size, last_size, stats.quantum_wide / 8); + } else { + // 16 buckets. + dist = Distribution(bin.size, last_size, (bin.size - last_size) / 16); + } + last_size = bin.size; + } + + // 16 buckets. + dists[num_dists] = Distribution(stats.page_size, last_size, + (stats.page_size - last_size) / 16); + num_dists++; + + // Buckets are 1/4 of the page size (12 buckets). + dists[num_dists] = + Distribution(stats.page_size * 4, stats.page_size, stats.page_size / 4); + num_dists++; + + MOZ_RELEASE_ASSERT(num_dists <= num_bins + 2); + + for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { + MemSlot& slot = mSlots[slot_id]; + if (slot.mPtr) { + for (size_t i = 0; i < num_dists; i++) { + if (slot.mRequest <= dists[i].maxSize()) { + dists[i].addRequest(slot.mRequest); + break; + } + } + } + } + + for (unsigned i = 0; i < num_dists; i++) { + dists[i].printDist(mStdErr); + } + } + +#ifdef XP_LINUX + size_t get_rss() { + if (mGetRSSFailed) { + return 0; + } + + // On Linux we can determine the RSS of the heap area by examining the + // smaps file. + mozilla::Maybe<SMapsReader> reader = SMapsReader::open(); + if (!reader) { + mGetRSSFailed = true; + return 0; + } + + size_t rss = 0; + while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) { + if (map->IsCandidate() && !mSlots.ownsMapping(map->mStart) && + !InitialMapsContains(map->mStart)) { + rss += map->mRSS; + } + } + + return rss; + } + + bool InitialMapsContains(uintptr_t aRangeStart) { + for (unsigned i = 0; i < mNumInitialMaps; i++) { + MOZ_ASSERT(i < MAX_INITIAL_MAPS); + + if (mInitialMaps[i] == aRangeStart) { + return true; + } + } + return false; + } + + public: + void BuildInitialMapInfo() { + if (mGetRSSFailed) { + return; + } + + Maybe<SMapsReader> reader = SMapsReader::open(); + if (!reader) { + mGetRSSFailed = true; + return; + } + + while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) { + if (map->IsCandidate()) { + if (mNumInitialMaps >= MAX_INITIAL_MAPS) { + FdPrintf(mStdErr, "Too many initial mappings, can't compute RSS\n"); + mGetRSSFailed = false; + return; + } + + mInitialMaps[mNumInitialMaps++] = map->mStart; + } + } + } +#endif + + private: + MemSlot& SlotForResult(Buffer& aResult) { + /* Parse result value and get the corresponding slot. */ + Buffer dummy = aResult.SplitChar('='); + Buffer dummy2 = aResult.SplitChar('#'); + if (dummy || dummy2) { + die("Malformed input"); + } + + size_t slot_id = parseNumber(aResult); + mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1); + + return mSlots[slot_id]; + } + + void MaybeCommit(MemSlot& aSlot) { + if (mDoMemset) { + // Write any byte, 0x55 isn't significant. + memset(aSlot.mPtr, 0x55, aSlot.mRequest); + } + } + + intptr_t mStdErr; + size_t mOps = 0; + + // The number of slots that have been used. It is used to iterate over slots + // without accessing those we haven't initialised. + size_t mNumUsedSlots = 0; + + MemSlotList mSlots; + size_t mTotalRequestedSize = 0; + size_t mTotalAllocatedSize = 0; + // Whether to calculate slop for all allocations over the runtime of a + // process. + bool mCalculateSlop = false; + bool mDoMemset = false; + +#ifdef XP_LINUX + // If we have a failure reading smaps info then this is used to disable that + // feature. + bool mGetRSSFailed = false; + + // The initial memory mappings are recorded here at start up. We exclude + // memory in these mappings when computing RSS. We assume they do not grow + // and that no regions are allocated near them, this is true because they'll + // only record the .bss and .data segments from our binary and shared objects + // or regions that logalloc-replay has created for MappedArrays. + // + // 64 should be enough for anybody. + static constexpr unsigned MAX_INITIAL_MAPS = 64; + uintptr_t mInitialMaps[MAX_INITIAL_MAPS]; + unsigned mNumInitialMaps = 0; +#endif // XP_LINUX +}; + +static Replay replay; + +int main(int argc, const char* argv[]) { + size_t first_pid = 0; + FdReader reader(0); + + for (int i = 1; i < argc; i++) { + const char* option = argv[i]; + if (strcmp(option, "-s") == 0) { + // Do accounting to calculate allocation slop. + replay.enableSlopCalculation(); + } else if (strcmp(option, "-c") == 0) { + // Touch memory as we allocate it. + replay.enableMemset(); + } else { + fprintf(stderr, "Unknown command line option: %s\n", option); + return EXIT_FAILURE; + } + } + + /* Read log from stdin and dispatch function calls to the Replay instance. + * The log format is essentially: + * <pid> <tid> <function>([<args>])[=<result>] + * <args> is a comma separated list of arguments. + * + * The logs are expected to be preprocessed so that allocations are + * attributed a tracking slot. The input is trusted not to have crazy + * values for these slot numbers. + * + * <result>, as well as some of the args to some of the function calls are + * such slot numbers. + */ + while (true) { + Buffer line = reader.ReadLine(); + + if (!line) { + break; + } + + size_t pid = parseNumber(line.SplitChar(' ')); + if (!first_pid) { + first_pid = pid; + } + + /* The log may contain data for several processes, only entries for the + * very first that appears are treated. */ + if (first_pid != pid) { + continue; + } + + /* The log contains thread ids for manual analysis, but we just ignore them + * for now. */ + parseNumber(line.SplitChar(' ')); + + Buffer func = line.SplitChar('('); + Buffer args = line.SplitChar(')'); + + if (func == Buffer("jemalloc_stats")) { + replay.jemalloc_stats(args, line); + } else if (func == Buffer("free")) { + replay.free(args, line); + } else if (func == Buffer("malloc")) { + replay.malloc(args, line); + } else if (func == Buffer("posix_memalign")) { + replay.posix_memalign(args, line); + } else if (func == Buffer("aligned_alloc")) { + replay.aligned_alloc(args, line); + } else if (func == Buffer("calloc")) { + replay.calloc(args, line); + } else if (func == Buffer("realloc")) { + replay.realloc(args, line); + } else if (func == Buffer("memalign")) { + replay.memalign(args, line); + } else if (func == Buffer("valloc")) { + replay.valloc(args, line); + } else { + die("Malformed input"); + } + } + + return 0; +} diff --git a/memory/replace/logalloc/replay/expected_output_minimal.log b/memory/replace/logalloc/replay/expected_output_minimal.log new file mode 100644 index 0000000000..332fe20957 --- /dev/null +++ b/memory/replace/logalloc/replay/expected_output_minimal.log @@ -0,0 +1,17 @@ +1 1 jemalloc_stats() +1 1 malloc(42)=#1 +1 1 malloc(24)=#2 +1 1 free(#1) +1 1 memalign(4096,1024)=#1 +1 1 calloc(4,42)=#3 +1 1 free(#2) +1 1 realloc(#3,84)=#2 +1 1 memalign(256,1024)=#3 +1 1 memalign(512,1024)=#4 +1 1 memalign(4096,1024)=#5 +1 1 jemalloc_stats() +1 1 free(#5) +1 1 free(#4) +1 1 free(#3) +1 1 free(#2) +1 1 free(#1) diff --git a/memory/replace/logalloc/replay/logalloc_munge.py b/memory/replace/logalloc/replay/logalloc_munge.py new file mode 100644 index 0000000000..52d0032463 --- /dev/null +++ b/memory/replace/logalloc/replay/logalloc_munge.py @@ -0,0 +1,147 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +This script takes a log from the replace-malloc logalloc library on stdin +and munges it so that it can be used with the logalloc-replay tool. + +Given the following output: + 13663 malloc(42)=0x7f0c33502040 + 13663 malloc(24)=0x7f0c33503040 + 13663 free(0x7f0c33502040) +The resulting output is: + 1 malloc(42)=#1 + 1 malloc(24)=#2 + 1 free(#1) + +See README for more details. +""" + +import sys +from collections import defaultdict, deque + + +class IdMapping(object): + """Class to map values to ids. + + Each value is associated to an increasing id, starting from 1. + When a value is removed, its id is recycled and will be reused for + subsequent values. + """ + + def __init__(self): + self.id = 1 + self._values = {} + self._recycle = deque() + + def __getitem__(self, value): + if value not in self._values: + if self._recycle: + self._values[value] = self._recycle.popleft() + else: + self._values[value] = self.id + self.id += 1 + return self._values[value] + + def __delitem__(self, value): + if value == 0: + return + self._recycle.append(self._values[value]) + del self._values[value] + + def __contains__(self, value): + return value == 0 or value in self._values + + +class Ignored(Exception): + pass + + +def split_log_line(line): + try: + # The format for each line is: + # <pid> [<tid>] <function>([<args>])[=<result>] + # + # The original format didn't include the tid, so we try to parse + # lines whether they have one or not. + pid, func_call = line.split(" ", 1) + call, result = func_call.split(")") + func, args = call.split("(") + args = args.split(",") if args else [] + if result: + if result[0] != "=": + raise Ignored("Malformed input") + result = result[1:] + if " " in func: + tid, func = func.split(" ", 1) + else: + tid = pid + return pid, tid, func, args, result + except Exception: + raise Ignored("Malformed input") + + +NUM_ARGUMENTS = { + "jemalloc_stats": 0, + "free": 1, + "malloc": 1, + "posix_memalign": 2, + "aligned_alloc": 2, + "calloc": 2, + "realloc": 2, + "memalign": 2, + "valloc": 1, +} + + +def main(): + pids = IdMapping() + processes = defaultdict(lambda: {"pointers": IdMapping(), "tids": IdMapping()}) + for line in sys.stdin: + line = line.strip() + + try: + pid, tid, func, args, result = split_log_line(line) + + # Replace pid with an id. + pid = pids[int(pid)] + + process = processes[pid] + tid = process["tids"][int(tid)] + + pointers = process["pointers"] + + if func not in NUM_ARGUMENTS: + raise Ignored("Unknown function") + + if len(args) != NUM_ARGUMENTS[func]: + raise Ignored("Malformed input") + + if func in ("jemalloc_stats", "free") and result: + raise Ignored("Malformed input") + + if func in ("free", "realloc"): + ptr = int(args[0], 16) + if ptr and ptr not in pointers: + raise Ignored("Did not see an alloc for pointer") + args[0] = "#%d" % pointers[ptr] + del pointers[ptr] + + if result: + result = int(result, 16) + if not result: + raise Ignored("Result is NULL") + result = "#%d" % pointers[result] + + print( + "%d %d %s(%s)%s" + % (pid, tid, func, ",".join(args), "=%s" % result if result else "") + ) + + except Exception as e: + print('Ignored "%s": %s' % (line, e), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/logalloc/replay/moz.build b/memory/replace/logalloc/replay/moz.build new file mode 100644 index 0000000000..4ebf48f842 --- /dev/null +++ b/memory/replace/logalloc/replay/moz.build @@ -0,0 +1,87 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Program("logalloc-replay") + +SOURCES += [ + "/mfbt/Assertions.cpp", + "/mfbt/Poison.cpp", + "/mfbt/RandomNum.cpp", + "/mfbt/TaggedAnonymousMemory.cpp", + "/mfbt/Unused.cpp", + "/mozglue/misc/StackWalk.cpp", + "Replay.cpp", +] + +if CONFIG["OS_TARGET"] == "WINNT": + SOURCES += [ + "/mozglue/misc/ProcessType.cpp", + ] + +if CONFIG["OS_TARGET"] == "Linux": + LDFLAGS += ["-static-libstdc++"] + +if CONFIG["OS_TARGET"] == "Darwin": + # Work around "warning: 'aligned_alloc' is only available on macOS 10.15 or newer" + # when building with MACOSX_DEPLOYMENT_TARGET < 10.15 with >= 10.15 SDK. + # We have our own definition of the function, so it doesn't matter what the SDK says. + SOURCES["Replay.cpp"].flags += ["-Wno-unguarded-availability-new"] + +if CONFIG["MOZ_REPLACE_MALLOC_STATIC"] and (CONFIG["MOZ_DMD"] or CONFIG["MOZ_PHC"]): + UNIFIED_SOURCES += [ + "/mfbt/HashFunctions.cpp", + "/mfbt/JSONWriter.cpp", + ] + +if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "advapi32", + "dbghelp", + ] + +if CONFIG["MOZ_LINKER"] and CONFIG["MOZ_WIDGET_TOOLKIT"] == "android": + LOCAL_INCLUDES += [ + "/mozglue/linker", + ] + DEFINES["__wrap_dladdr"] = "dladdr" + + +if CONFIG["MOZ_BUILD_APP"] == "memory": + EXPORTS.mozilla += [ + "/mozglue/misc/StackWalk.h", + ] + +if CONFIG["MOZ_BUILD_APP"] == "memory" or CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + UNIFIED_SOURCES += [ + "/mfbt/double-conversion/double-conversion/bignum-dtoa.cc", + "/mfbt/double-conversion/double-conversion/bignum.cc", + "/mfbt/double-conversion/double-conversion/cached-powers.cc", + "/mfbt/double-conversion/double-conversion/double-to-string.cc", + "/mfbt/double-conversion/double-conversion/fast-dtoa.cc", + "/mfbt/double-conversion/double-conversion/fixed-dtoa.cc", + "/mfbt/double-conversion/double-conversion/string-to-double.cc", + "/mfbt/double-conversion/double-conversion/strtod.cc", + "/mozglue/misc/Printf.cpp", + ] + +LOCAL_INCLUDES += [ + "/memory/build", +] + +# Link replace-malloc and the default allocator. +USE_LIBS += [ + "memory", +] + +# The memory library defines this, so it's needed here too. +DEFINES["IMPL_MFBT"] = True + +if CONFIG["MOZ_NEEDS_LIBATOMIC"]: + OS_LIBS += ["atomic"] + +DisableStlWrapping() + +include("/mozglue/build/replace_malloc.mozbuild") diff --git a/memory/replace/logalloc/replay/replay.log b/memory/replace/logalloc/replay/replay.log new file mode 100644 index 0000000000..f1e6de788b --- /dev/null +++ b/memory/replace/logalloc/replay/replay.log @@ -0,0 +1,18 @@ +1 1 jemalloc_stats() +1 1 malloc(42)=#1 +1 1 malloc(24)=#2 +2 2 malloc(42)=#1 +1 1 free(#1) +1 1 posix_memalign(4096,1024)=#1 +1 1 calloc(4,42)=#3 +1 1 free(#2) +1 1 realloc(#3,84)=#2 +1 1 aligned_alloc(256,1024)=#3 +1 1 memalign(512,1024)=#4 +1 1 valloc(1024)=#5 +1 1 jemalloc_stats() +1 1 free(#5) +1 1 free(#4) +1 1 free(#3) +1 1 free(#2) +1 1 free(#1) diff --git a/memory/replace/moz.build b/memory/replace/moz.build new file mode 100644 index 0000000000..7385faceb5 --- /dev/null +++ b/memory/replace/moz.build @@ -0,0 +1,22 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +@template +def ReplaceMalloc(name): + if CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + DEFINES["MOZ_REPLACE_MALLOC_PREFIX"] = name.replace("-", "_") + FINAL_LIBRARY = "memory" + else: + SharedLibrary(name) + + +DIRS += [ + "logalloc", +] + +if CONFIG["MOZ_DMD"]: + DIRS += ["dmd"] diff --git a/memory/replace/phc/moz.build b/memory/replace/phc/moz.build new file mode 100644 index 0000000000..e2d233642c --- /dev/null +++ b/memory/replace/phc/moz.build @@ -0,0 +1,50 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ReplaceMalloc("phc") + +DEFINES["MOZ_NO_MOZALLOC"] = True +DEFINES["IMPL_MFBT"] = True + +LOCAL_INCLUDES += [ + "../logalloc", + "/memory/build", +] + +EXPORTS += [ + "PHC.h", +] + +UNIFIED_SOURCES += [ + "PHC.cpp", +] + +if CONFIG["MOZ_BUILD_APP"] == "memory": + UNIFIED_SOURCES += [ + "/mfbt/double-conversion/double-conversion/bignum-dtoa.cc", + "/mfbt/double-conversion/double-conversion/bignum.cc", + "/mfbt/double-conversion/double-conversion/cached-powers.cc", + "/mfbt/double-conversion/double-conversion/double-to-string.cc", + "/mfbt/double-conversion/double-conversion/fast-dtoa.cc", + "/mfbt/double-conversion/double-conversion/fixed-dtoa.cc", + "/mfbt/double-conversion/double-conversion/string-to-double.cc", + "/mfbt/double-conversion/double-conversion/strtod.cc", + "/mozglue/misc/Printf.cpp", + ] + +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + UNIFIED_SOURCES += [ + "../logalloc/FdPrintf.cpp", + "/mozglue/misc/StackWalk.cpp", + ] + if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "dbghelp", + ] + +TEST_DIRS += ["test"] + +DisableStlWrapping() diff --git a/memory/replace/phc/test/moz.build b/memory/replace/phc/test/moz.build new file mode 100644 index 0000000000..8208ae849d --- /dev/null +++ b/memory/replace/phc/test/moz.build @@ -0,0 +1,9 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# The gtests won't work in a SpiderMonkey-only build. +if CONFIG["MOZ_WIDGET_TOOLKIT"]: + TEST_DIRS += ["gtest"] |