diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /memory | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'memory')
103 files changed, 21814 insertions, 0 deletions
diff --git a/memory/app.mozbuild b/memory/app.mozbuild new file mode 100644 index 0000000000..fc8d4f8002 --- /dev/null +++ b/memory/app.mozbuild @@ -0,0 +1,27 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Indirectly necessary for chromium's lock.h, included from LogAlloc.cpp +EXPORTS.mozilla += [ + "/mozglue/misc/Printf.h", + "/xpcom/base/Logging.h", +] + +DIRS += [ + "/memory", + "/mfbt", +] + +EXPORTS.mozilla += [ + "/mozglue/misc/IntegerPrintfMacros.h", + "/mozglue/misc/Sprintf.h", +] + +if CONFIG["OS_ARCH"] == "WINNT": + EXPORTS.mozilla += [ + "/mozglue/misc/StackWalk_windows.h", + "/mozglue/misc/StackWalkThread.h", + ] diff --git a/memory/build/FdPrintf.cpp b/memory/build/FdPrintf.cpp new file mode 100644 index 0000000000..272f3d0db8 --- /dev/null +++ b/memory/build/FdPrintf.cpp @@ -0,0 +1,200 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <cstdarg> + +#ifdef _WIN32 +# include <windows.h> +#else +# include <unistd.h> +#endif +#include <cmath> +#include <cstring> +#include "mozilla/Assertions.h" +#include "mozilla/Unused.h" +#include "FdPrintf.h" + +/* Template class allowing a limited number of increments on a value */ +template <typename T> +class CheckedIncrement { + public: + CheckedIncrement(T aValue, size_t aMaxIncrement) + : mValue(aValue), mMaxIncrement(aMaxIncrement) {} + + T operator++(int) { + if (!mMaxIncrement) { + MOZ_CRASH("overflow detected"); + } + mMaxIncrement--; + return mValue++; + } + + T& operator++() { + (*this)++; + return mValue; + } + + void advance(T end) { + // Only makes sense if T is a pointer type. + size_t diff = end - mValue; + if (diff > mMaxIncrement) { + MOZ_CRASH("overflow detected"); + } + mMaxIncrement -= diff; + mValue = end; + } + + void rewind(T pos) { + size_t diff = mValue - pos; + mMaxIncrement += diff; + mValue = pos; + } + + operator T() { return mValue; } + T value() { return mValue; } + + private: + T mValue; + size_t mMaxIncrement; +}; + +template <typename T> +static unsigned NumDigits(T n) { + if (n < 1) { + // We want one digit, it will be 0. + return 1; + } + + double l = log10(static_cast<double>(n)); + double cl = ceil(l); + return l == cl ? unsigned(cl) + 1 : unsigned(cl); +} + +static void LeftPad(CheckedIncrement<char*>& b, size_t pad) { + while (pad-- > 0) { + *(b++) = ' '; + } +} + +// Write the digits into the buffer. +static void WriteDigits(CheckedIncrement<char*>& b, size_t i, + size_t num_digits) { + size_t x = pow(10, double(num_digits - 1)); + do { + *(b++) = "0123456789"[(i / x) % 10]; + x /= 10; + } while (x > 0); +} + +void FdPrintf(intptr_t aFd, const char* aFormat, ...) { + if (aFd == 0) { + return; + } + char buf[256]; + CheckedIncrement<char*> b(buf, sizeof(buf)); + CheckedIncrement<const char*> f(aFormat, strlen(aFormat) + 1); + va_list ap; + va_start(ap, aFormat); + while (true) { + switch (*f) { + case '\0': + goto out; + + case '%': { + // The start of the format specifier is used if this specifier is + // invalid. + const char* start = f; + + // Read the field width + f++; + char* end = nullptr; + size_t width = strtoul(f, &end, 10); + // If strtol can't find a number that's okay, that means 0 in our + // case, but we must advance f). + f.advance(end); + + switch (*f) { + case 'z': { + if (*(++f) == 'u') { + size_t i = va_arg(ap, size_t); + + size_t num_digits = NumDigits(i); + LeftPad(b, width > num_digits ? width - num_digits : 0); + WriteDigits(b, i, num_digits); + } else { + // If the format specifier is unknown then write out '%' and + // rewind to the beginning of the specifier causing it to be + // printed normally. + *(b++) = '%'; + f.rewind(start); + } + break; + } + + case 'p': { + intptr_t ptr = va_arg(ap, intptr_t); + *(b++) = '0'; + *(b++) = 'x'; + int x = sizeof(intptr_t) * 8; + bool wrote_msb = false; + do { + x -= 4; + size_t hex_digit = ptr >> x & 0xf; + if (hex_digit || wrote_msb) { + *(b++) = "0123456789abcdef"[hex_digit]; + wrote_msb = true; + } + } while (x > 0); + if (!wrote_msb) { + *(b++) = '0'; + } + break; + } + + case 's': { + const char* str = va_arg(ap, const char*); + size_t len = strlen(str); + + LeftPad(b, width > len ? width - len : 0); + + while (*str) { + *(b++) = *(str++); + } + + break; + } + + case '%': + // Print a single raw '%'. + *(b++) = '%'; + break; + + default: + // If the format specifier is unknown then write out '%' and + // rewind to the beginning of the specifier causing it to be + // printed normally. + *(b++) = '%'; + f.rewind(start); + break; + } + break; + } + default: + *(b++) = *f; + break; + } + f++; + } +out: +#ifdef _WIN32 + // See comment in FdPrintf.h as to why WriteFile is used. + DWORD written; + WriteFile(reinterpret_cast<HANDLE>(aFd), buf, b - buf, &written, nullptr); +#else + MOZ_UNUSED(write(aFd, buf, b - buf)); +#endif + va_end(ap); +} diff --git a/memory/build/FdPrintf.h b/memory/build/FdPrintf.h new file mode 100644 index 0000000000..257084243b --- /dev/null +++ b/memory/build/FdPrintf.h @@ -0,0 +1,27 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __FdPrintf_h__ +#define __FdPrintf_h__ + +/* We can't use libc's (f)printf because it would reenter in replace_malloc, + * So use a custom and simplified version. Only %p, %zu, %s and %% are + * supported, %zu, %s, support width specifiers. + * + * /!\ This function used a fixed-size internal buffer. The caller is + * expected to not use a format string that may overflow. + * The aFd argument is a file descriptor on UNIX and a native win32 file + * handle on Windows (from CreateFile). We can't use the windows POSIX + * APIs is that they don't support O_APPEND in a multi-process-safe way, + * while CreateFile does. + */ +void FdPrintf(intptr_t aFd, const char* aFormat, ...) +#ifdef __GNUC__ + __attribute__((format(printf, 2, 3))) +#endif + ; + +#endif /* __FdPrintf_h__ */ diff --git a/memory/build/Makefile.in b/memory/build/Makefile.in new file mode 100644 index 0000000000..1ddd1121be --- /dev/null +++ b/memory/build/Makefile.in @@ -0,0 +1,9 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Workaround for alignment problems in gcov code. See +# https://bugzilla.mozilla.org/show_bug.cgi?id=1413570#c2. +ifeq ($(TARGET_CPU),x86) +PROFILE_GEN_CFLAGS += -mno-sse2 +endif diff --git a/memory/build/Mutex.cpp b/memory/build/Mutex.cpp new file mode 100644 index 0000000000..1450210293 --- /dev/null +++ b/memory/build/Mutex.cpp @@ -0,0 +1,26 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Mutex.h" + +#include <errno.h> + +#include "mozilla/Assertions.h" + +bool Mutex::TryLock() { +#if defined(XP_WIN) + return !!TryEnterCriticalSection(&mMutex); +#elif defined(XP_DARWIN) + return os_unfair_lock_trylock(&mMutex); +#else + switch (pthread_mutex_trylock(&mMutex)) { + case 0: + return true; + case EBUSY: + return false; + default: + MOZ_CRASH("pthread_mutex_trylock error."); + } +#endif +} diff --git a/memory/build/Mutex.h b/memory/build/Mutex.h new file mode 100644 index 0000000000..f360084f64 --- /dev/null +++ b/memory/build/Mutex.h @@ -0,0 +1,263 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef Mutex_h +#define Mutex_h + +#if defined(XP_WIN) +# include <windows.h> +#else +# include <pthread.h> +#endif +#if defined(XP_DARWIN) +# include <os/lock.h> +#endif + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/ThreadSafety.h" + +#if defined(XP_DARWIN) +// For information about the following undocumented flags and functions see +// https://github.com/apple/darwin-xnu/blob/main/bsd/sys/ulock.h and +// https://github.com/apple/darwin-libplatform/blob/main/private/os/lock_private.h +# define OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION (0x00010000) +# define OS_UNFAIR_LOCK_ADAPTIVE_SPIN (0x00040000) + +extern "C" { + +typedef uint32_t os_unfair_lock_options_t; +OS_UNFAIR_LOCK_AVAILABILITY +OS_EXPORT OS_NOTHROW OS_NONNULL_ALL void os_unfair_lock_lock_with_options( + os_unfair_lock_t lock, os_unfair_lock_options_t options); +} +#endif // defined(XP_DARWIN) + +// Mutexes based on spinlocks. We can't use normal pthread spinlocks in all +// places, because they require malloc()ed memory, which causes bootstrapping +// issues in some cases. We also can't use constructors, because for statics, +// they would fire after the first use of malloc, resetting the locks. +struct MOZ_CAPABILITY("mutex") Mutex { +#if defined(XP_WIN) + CRITICAL_SECTION mMutex; +#elif defined(XP_DARWIN) + os_unfair_lock mMutex; +#else + pthread_mutex_t mMutex; +#endif + + // Initializes a mutex. Returns whether initialization succeeded. + inline bool Init() { +#if defined(XP_WIN) + if (!InitializeCriticalSectionAndSpinCount(&mMutex, 5000)) { + return false; + } +#elif defined(XP_DARWIN) + mMutex = OS_UNFAIR_LOCK_INIT; +#elif defined(XP_LINUX) && !defined(ANDROID) + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr) != 0) { + return false; + } + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); + if (pthread_mutex_init(&mMutex, &attr) != 0) { + pthread_mutexattr_destroy(&attr); + return false; + } + pthread_mutexattr_destroy(&attr); +#else + if (pthread_mutex_init(&mMutex, nullptr) != 0) { + return false; + } +#endif + return true; + } + + inline void Lock() MOZ_CAPABILITY_ACQUIRE() { +#if defined(XP_WIN) + EnterCriticalSection(&mMutex); +#elif defined(XP_DARWIN) + // We rely on a non-public function to improve performance here. + // The OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION flag informs the kernel that + // the calling thread is able to make progress even in absence of actions + // from other threads and the OS_UNFAIR_LOCK_ADAPTIVE_SPIN one causes the + // kernel to spin on a contested lock if the owning thread is running on + // the same physical core (presumably only on x86 CPUs given that ARM + // macs don't have cores capable of SMT). + os_unfair_lock_lock_with_options( + &mMutex, + OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION | OS_UNFAIR_LOCK_ADAPTIVE_SPIN); +#else + pthread_mutex_lock(&mMutex); +#endif + } + + [[nodiscard]] bool TryLock() MOZ_TRY_ACQUIRE(true); + + inline void Unlock() MOZ_CAPABILITY_RELEASE() { +#if defined(XP_WIN) + LeaveCriticalSection(&mMutex); +#elif defined(XP_DARWIN) + os_unfair_lock_unlock(&mMutex); +#else + pthread_mutex_unlock(&mMutex); +#endif + } + +#if defined(XP_DARWIN) + static bool SpinInKernelSpace(); + static const bool gSpinInKernelSpace; +#endif // XP_DARWIN +}; + +// Mutex that can be used for static initialization. +// On Windows, CRITICAL_SECTION requires a function call to be initialized, +// but for the initialization lock, a static initializer calling the +// function would be called too late. We need no-function-call +// initialization, which SRWLock provides. +// Ideally, we'd use the same type of locks everywhere, but SRWLocks +// everywhere incur a performance penalty. See bug 1418389. +#if defined(XP_WIN) +struct MOZ_CAPABILITY("mutex") StaticMutex { + SRWLOCK mMutex; + + inline void Lock() MOZ_CAPABILITY_ACQUIRE() { + AcquireSRWLockExclusive(&mMutex); + } + + inline void Unlock() MOZ_CAPABILITY_RELEASE() { + ReleaseSRWLockExclusive(&mMutex); + } +}; + +// Normally, we'd use a constexpr constructor, but MSVC likes to create +// static initializers anyways. +# define STATIC_MUTEX_INIT SRWLOCK_INIT + +#else +typedef Mutex StaticMutex; + +# if defined(XP_DARWIN) +# define STATIC_MUTEX_INIT OS_UNFAIR_LOCK_INIT +# elif defined(XP_LINUX) && !defined(ANDROID) +# define STATIC_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# else +# define STATIC_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER +# endif + +#endif + +#ifdef XP_WIN +typedef DWORD ThreadId; +inline ThreadId GetThreadId() { return GetCurrentThreadId(); } +#else +typedef pthread_t ThreadId; +inline ThreadId GetThreadId() { return pthread_self(); } +#endif + +class MOZ_CAPABILITY("mutex") MaybeMutex : public Mutex { + public: + enum DoLock { + MUST_LOCK, + AVOID_LOCK_UNSAFE, + }; + + bool Init(DoLock aDoLock) { + mDoLock = aDoLock; +#ifdef MOZ_DEBUG + mThreadId = GetThreadId(); +#endif + return Mutex::Init(); + } + +#ifndef XP_WIN + // Re initialise after fork(), assumes that mDoLock is already initialised. + void Reinit(pthread_t aForkingThread) { + if (mDoLock == MUST_LOCK) { + Mutex::Init(); + return; + } +# ifdef MOZ_DEBUG + // If this is an eluded lock we can only safely re-initialise it if the + // thread that called fork is the one that owns the lock. + if (pthread_equal(mThreadId, aForkingThread)) { + mThreadId = GetThreadId(); + Mutex::Init(); + } else { + // We can't guantee that whatever resource this lock protects (probably a + // jemalloc arena) is in a consistent state. + mDeniedAfterFork = true; + } +# endif + } +#endif + + inline void Lock() MOZ_CAPABILITY_ACQUIRE() { + if (ShouldLock()) { + Mutex::Lock(); + } + } + + inline void Unlock() MOZ_CAPABILITY_RELEASE() { + if (ShouldLock()) { + Mutex::Unlock(); + } + } + + // Return true if we can use this resource from this thread, either because + // we'll use the lock or because this is the only thread that will access the + // protected resource. +#ifdef MOZ_DEBUG + bool SafeOnThisThread() const { + return mDoLock == MUST_LOCK || GetThreadId() == mThreadId; + } +#endif + + bool LockIsEnabled() const { return mDoLock == MUST_LOCK; } + + private: + bool ShouldLock() { +#ifndef XP_WIN + MOZ_ASSERT(!mDeniedAfterFork); +#endif + + if (mDoLock == MUST_LOCK) { + return true; + } + + MOZ_ASSERT(GetThreadId() == mThreadId); + return false; + } + + DoLock mDoLock; +#ifdef MOZ_DEBUG + ThreadId mThreadId; +# ifndef XP_WIN + bool mDeniedAfterFork = false; +# endif +#endif +}; + +template <typename T> +struct MOZ_SCOPED_CAPABILITY MOZ_RAII AutoLock { + explicit AutoLock(T& aMutex) MOZ_CAPABILITY_ACQUIRE(aMutex) : mMutex(aMutex) { + mMutex.Lock(); + } + + ~AutoLock() MOZ_CAPABILITY_RELEASE() { mMutex.Unlock(); } + + AutoLock(const AutoLock&) = delete; + AutoLock(AutoLock&&) = delete; + + private: + T& mMutex; +}; + +using MutexAutoLock = AutoLock<Mutex>; + +using MaybeMutexAutoLock = AutoLock<MaybeMutex>; + +#endif diff --git a/memory/build/PHC.cpp b/memory/build/PHC.cpp new file mode 100644 index 0000000000..94cb5596a5 --- /dev/null +++ b/memory/build/PHC.cpp @@ -0,0 +1,1837 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// PHC is a probabilistic heap checker. A tiny fraction of randomly chosen heap +// allocations are subject to some expensive checking via the use of OS page +// access protection. A failed check triggers a crash, whereupon useful +// information about the failure is put into the crash report. The cost and +// coverage for each user is minimal, but spread over the entire user base the +// coverage becomes significant. +// +// The idea comes from Chromium, where it is called GWP-ASAN. (Firefox uses PHC +// as the name because GWP-ASAN is long, awkward, and doesn't have any +// particular meaning.) +// +// In the current implementation up to 64 allocations per process can become +// PHC allocations. These allocations must be page-sized or smaller. Each PHC +// allocation gets its own page, and when the allocation is freed its page is +// marked inaccessible until the page is reused for another allocation. This +// means that a use-after-free defect (which includes double-frees) will be +// caught if the use occurs before the page is reused for another allocation. +// The crash report will contain stack traces for the allocation site, the free +// site, and the use-after-free site, which is often enough to diagnose the +// defect. +// +// Also, each PHC allocation is followed by a guard page. The PHC allocation is +// positioned so that its end abuts the guard page (or as close as possible, +// given alignment constraints). This means that a bounds violation at the end +// of the allocation (overflow) will be caught. The crash report will contain +// stack traces for the allocation site and the bounds violation use site, +// which is often enough to diagnose the defect. +// +// (A bounds violation at the start of the allocation (underflow) will not be +// caught, unless it is sufficiently large to hit the preceding allocation's +// guard page, which is not that likely. It would be possible to look more +// assiduously for underflow by randomly placing some allocations at the end of +// the page and some at the start of the page, and GWP-ASAN does this. PHC does +// not, however, because overflow is likely to be much more common than +// underflow in practice.) +// +// We use a simple heuristic to categorize a guard page access as overflow or +// underflow: if the address falls in the lower half of the guard page, we +// assume it is overflow, otherwise we assume it is underflow. More +// sophisticated heuristics are possible, but this one is very simple, and it is +// likely that most overflows/underflows in practice are very close to the page +// boundary. +// +// The design space for the randomization strategy is large. The current +// implementation has a large random delay before it starts operating, and a +// small random delay between each PHC allocation attempt. Each freed PHC +// allocation is quarantined for a medium random delay before being reused, in +// order to increase the chance of catching UAFs. +// +// The basic cost of PHC's operation is as follows. +// +// - The physical memory cost is 64 pages plus some metadata (including stack +// traces) for each page. This amounts to 256 KiB per process on +// architectures with 4 KiB pages and 1024 KiB on macOS/AArch64 which uses +// 16 KiB pages. +// +// - The virtual memory cost is the physical memory cost plus the guard pages: +// another 64 pages. This amounts to another 256 KiB per process on +// architectures with 4 KiB pages and 1024 KiB on macOS/AArch64 which uses +// 16 KiB pages. PHC is currently only enabled on 64-bit platforms so the +// impact of the virtual memory usage is negligible. +// +// - Every allocation requires a size check and a decrement-and-check of an +// atomic counter. When the counter reaches zero a PHC allocation can occur, +// which involves marking a page as accessible and getting a stack trace for +// the allocation site. Otherwise, mozjemalloc performs the allocation. +// +// - Every deallocation requires a range check on the pointer to see if it +// involves a PHC allocation. (The choice to only do PHC allocations that are +// a page or smaller enables this range check, because the 64 pages are +// contiguous. Allowing larger allocations would make this more complicated, +// and we definitely don't want something as slow as a hash table lookup on +// every deallocation.) PHC deallocations involve marking a page as +// inaccessible and getting a stack trace for the deallocation site. +// +// Note that calls to realloc(), free(), and malloc_usable_size() will +// immediately crash if the given pointer falls within a page allocation's +// page, but does not point to the start of the allocation itself. +// +// void* p = malloc(64); +// free(p + 1); // p+1 doesn't point to the allocation start; crash +// +// Such crashes will not have the PHC fields in the crash report. +// +// PHC-specific tests can be run with the following commands: +// - gtests: `./mach gtest '*PHC*'` +// - xpcshell-tests: `./mach test toolkit/crashreporter/test/unit` +// - This runs some non-PHC tests as well. + +#include "PHC.h" + +#include <stdlib.h> +#include <time.h> + +#include <algorithm> + +#ifdef XP_WIN +# include <process.h> +#else +# include <sys/mman.h> +# include <sys/types.h> +# include <pthread.h> +# include <unistd.h> +#endif + +#include "mozjemalloc.h" + +#include "mozjemalloc.h" +#include "FdPrintf.h" +#include "Mutex.h" +#include "mozilla/Assertions.h" +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/Maybe.h" +#include "mozilla/StackWalk.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/XorShift128PlusRNG.h" + +using namespace mozilla; + +//--------------------------------------------------------------------------- +// Utilities +//--------------------------------------------------------------------------- + +#ifdef ANDROID +// Android doesn't have pthread_atfork defined in pthread.h. +extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void), + void (*)(void)); +#endif + +#ifndef DISALLOW_COPY_AND_ASSIGN +# define DISALLOW_COPY_AND_ASSIGN(T) \ + T(const T&); \ + void operator=(const T&) +#endif + +// This class provides infallible operations for the small number of heap +// allocations that PHC does for itself. It would be nice if we could use the +// InfallibleAllocPolicy from mozalloc, but PHC cannot use mozalloc. +class InfallibleAllocPolicy { + public: + static void AbortOnFailure(const void* aP) { + if (!aP) { + MOZ_CRASH("PHC failed to allocate"); + } + } + + template <class T> + static T* new_() { + void* p = MozJemalloc::malloc(sizeof(T)); + AbortOnFailure(p); + return new (p) T; + } +}; + +//--------------------------------------------------------------------------- +// Stack traces +//--------------------------------------------------------------------------- + +// This code is similar to the equivalent code within DMD. + +class StackTrace : public phc::StackTrace { + public: + StackTrace() = default; + + void Clear() { mLength = 0; } + + void Fill(); + + private: + static void StackWalkCallback(uint32_t aFrameNumber, void* aPc, void* aSp, + void* aClosure) { + StackTrace* st = (StackTrace*)aClosure; + MOZ_ASSERT(st->mLength < kMaxFrames); + st->mPcs[st->mLength] = aPc; + st->mLength++; + MOZ_ASSERT(st->mLength == aFrameNumber); + } +}; + +// WARNING WARNING WARNING: this function must only be called when GMut::sMutex +// is *not* locked, otherwise we might get deadlocks. +// +// How? On Windows, MozStackWalk() can lock a mutex, M, from the shared library +// loader. Another thread might call malloc() while holding M locked (when +// loading a shared library) and try to lock GMut::sMutex, causing a deadlock. +// So GMut::sMutex can't be locked during the call to MozStackWalk(). (For +// details, see https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8. On +// Linux, something similar can happen; see bug 824340. So we just disallow it +// on all platforms.) +// +// In DMD, to avoid this problem we temporarily unlock the equivalent mutex for +// the MozStackWalk() call. But that's grotty, and things are a bit different +// here, so we just require that stack traces be obtained before locking +// GMut::sMutex. +// +// Unfortunately, there is no reliable way at compile-time or run-time to ensure +// this pre-condition. Hence this large comment. +// +void StackTrace::Fill() { + mLength = 0; + +// These ifdefs should be kept in sync with the conditions in +// phc_implies_frame_pointers in build/moz.configure/memory.configure +#if defined(XP_WIN) && defined(_M_IX86) + // This avoids MozStackWalk(), which causes unusably slow startup on Win32 + // when it is called during static initialization (see bug 1241684). + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Win32: Registers::SyncPopulate() for the + // frame pointer, and GetStackTop() for the stack end. + CONTEXT context; + RtlCaptureContext(&context); + void** fp = reinterpret_cast<void**>(context.Ebp); + + PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb()); + void* stackEnd = static_cast<void*>(pTib->StackBase); + FramePointerStackWalk(StackWalkCallback, kMaxFrames, this, fp, stackEnd); +#elif defined(XP_DARWIN) + // This avoids MozStackWalk(), which has become unusably slow on Mac due to + // changes in libunwind. + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Mac: Registers::SyncPopulate() for the frame + // pointer, and GetStackTop() for the stack end. +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wframe-address" + void** fp = reinterpret_cast<void**>(__builtin_frame_address(1)); +# pragma GCC diagnostic pop + void* stackEnd = pthread_get_stackaddr_np(pthread_self()); + FramePointerStackWalk(StackWalkCallback, kMaxFrames, this, fp, stackEnd); +#else + MozStackWalk(StackWalkCallback, nullptr, kMaxFrames, this); +#endif +} + +//--------------------------------------------------------------------------- +// Logging +//--------------------------------------------------------------------------- + +// Change this to 1 to enable some PHC logging. Useful for debugging. +#define PHC_LOGGING 0 + +#if PHC_LOGGING + +static size_t GetPid() { return size_t(getpid()); } + +static size_t GetTid() { +# if defined(XP_WIN) + return size_t(GetCurrentThreadId()); +# else + return size_t(pthread_self()); +# endif +} + +# if defined(XP_WIN) +# define LOG_STDERR \ + reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE)) +# else +# define LOG_STDERR 2 +# endif +# define LOG(fmt, ...) \ + FdPrintf(LOG_STDERR, "PHC[%zu,%zu,~%zu] " fmt, GetPid(), GetTid(), \ + size_t(GAtomic::Now()), ##__VA_ARGS__) + +#else + +# define LOG(fmt, ...) + +#endif // PHC_LOGGING + +//--------------------------------------------------------------------------- +// Global state +//--------------------------------------------------------------------------- + +// Throughout this entire file time is measured as the number of sub-page +// allocations performed (by PHC and mozjemalloc combined). `Time` is 64-bit +// because we could have more than 2**32 allocations in a long-running session. +// `Delay` is 32-bit because the delays used within PHC are always much smaller +// than 2**32. +using Time = uint64_t; // A moment in time. +using Delay = uint32_t; // A time duration. + +// PHC only runs if the page size is 4 KiB; anything more is uncommon and would +// use too much memory. So we hardwire this size for all platforms but macOS +// on ARM processors. For the latter we make an exception because the minimum +// page size supported is 16KiB so there's no way to go below that. +static const size_t kPageSize = +#if defined(XP_DARWIN) && defined(__aarch64__) + 16384 +#else + 4096 +#endif + ; + +// We align the PHC area to a multiple of the jemalloc and JS GC chunk size +// (both use 1MB aligned chunks) so that their address computations don't lead +// from non-PHC memory into PHC memory causing misleading PHC stacks to be +// attached to a crash report. +static const size_t kPhcAlign = 1024 * 1024; + +static_assert(IsPowerOfTwo(kPhcAlign)); +static_assert((kPhcAlign % kPageSize) == 0); + +// There are two kinds of page. +// - Allocation pages, from which allocations are made. +// - Guard pages, which are never touched by PHC. +// +// These page kinds are interleaved; each allocation page has a guard page on +// either side. +#ifdef EARLY_BETA_OR_EARLIER +static const size_t kNumAllocPages = kPageSize == 4096 ? 4096 : 1024; +#else +// This will use between 82KiB and 1.1MiB per process (depending on how many +// objects are currently allocated). We will tune this in the future. +static const size_t kNumAllocPages = kPageSize == 4096 ? 256 : 64; +#endif +static const size_t kNumAllPages = kNumAllocPages * 2 + 1; + +// The total size of the allocation pages and guard pages. +static const size_t kAllPagesSize = kNumAllPages * kPageSize; + +// jemalloc adds a guard page to the end of our allocation, see the comment in +// AllocAllPages() for more information. +static const size_t kAllPagesJemallocSize = kAllPagesSize - kPageSize; + +// The default state for PHC. Either Enabled or OnlyFree. +#define DEFAULT_STATE mozilla::phc::OnlyFree + +// The maximum time. +static const Time kMaxTime = ~(Time(0)); + +// Truncate aRnd to the range (1 .. aAvgDelay*2). If aRnd is random, this +// results in an average value of aAvgDelay + 0.5, which is close enough to +// aAvgDelay. aAvgDelay must be a power-of-two for speed. +constexpr Delay Rnd64ToDelay(Delay aAvgDelay, uint64_t aRnd) { + MOZ_ASSERT(IsPowerOfTwo(aAvgDelay), "must be a power of two"); + + return (aRnd & (uint64_t(aAvgDelay) * 2 - 1)) + 1; +} + +static Delay CheckProbability(int64_t aProb) { + // Limit delays calculated from prefs to 0x80000000, this is the largest + // power-of-two that fits in a Delay since it is a uint32_t. + // The minimum is 2 that way not every allocation goes straight to PHC. + return RoundUpPow2( + std::min(std::max(aProb, int64_t(2)), int64_t(0x80000000))); +} + +// Maps a pointer to a PHC-specific structure: +// - Nothing +// - A guard page (it is unspecified which one) +// - An allocation page (with an index < kNumAllocPages) +// +// The standard way of handling a PtrKind is to check IsNothing(), and if that +// fails, to check IsGuardPage(), and if that fails, to call AllocPage(). +class PtrKind { + private: + enum class Tag : uint8_t { + Nothing, + GuardPage, + AllocPage, + }; + + Tag mTag; + uintptr_t mIndex; // Only used if mTag == Tag::AllocPage. + + public: + // Detect what a pointer points to. This constructor must be fast because it + // is called for every call to free(), realloc(), malloc_usable_size(), and + // jemalloc_ptr_info(). + PtrKind(const void* aPtr, const uint8_t* aPagesStart, + const uint8_t* aPagesLimit) { + if (!(aPagesStart <= aPtr && aPtr < aPagesLimit)) { + mTag = Tag::Nothing; + } else { + uintptr_t offset = static_cast<const uint8_t*>(aPtr) - aPagesStart; + uintptr_t allPageIndex = offset / kPageSize; + MOZ_ASSERT(allPageIndex < kNumAllPages); + if (allPageIndex & 1) { + // Odd-indexed pages are allocation pages. + uintptr_t allocPageIndex = allPageIndex / 2; + MOZ_ASSERT(allocPageIndex < kNumAllocPages); + mTag = Tag::AllocPage; + mIndex = allocPageIndex; + } else { + // Even-numbered pages are guard pages. + mTag = Tag::GuardPage; + } + } + } + + bool IsNothing() const { return mTag == Tag::Nothing; } + bool IsGuardPage() const { return mTag == Tag::GuardPage; } + + // This should only be called after IsNothing() and IsGuardPage() have been + // checked and failed. + uintptr_t AllocPageIndex() const { + MOZ_RELEASE_ASSERT(mTag == Tag::AllocPage); + return mIndex; + } +}; + +// Shared, atomic, mutable global state. +class GAtomic { + public: + static void Init(Delay aFirstDelay) { + sAllocDelay = aFirstDelay; + + LOG("Initial sAllocDelay <- %zu\n", size_t(aFirstDelay)); + } + + static Time Now() { return sNow; } + + static void IncrementNow() { sNow++; } + + // Decrements the delay and returns the decremented value. + static int32_t DecrementDelay() { return --sAllocDelay; } + + static void SetAllocDelay(Delay aAllocDelay) { sAllocDelay = aAllocDelay; } + + static bool AllocDelayHasWrapped(Delay aAvgAllocDelay, + Delay aAvgFirstAllocDelay) { + // Delay is unsigned so we can't test for less that zero. Instead test if + // it has wrapped around by comparing with the maximum value we ever use. + return sAllocDelay > 2 * std::max(aAvgAllocDelay, aAvgFirstAllocDelay); + } + + private: + // The current time. Relaxed semantics because it's primarily used for + // determining if an allocation can be recycled yet and therefore it doesn't + // need to be exact. + static Atomic<Time, Relaxed> sNow; + + // Delay until the next attempt at a page allocation. See the comment in + // MaybePageAlloc() for an explanation of why it uses ReleaseAcquire + // semantics. + static Atomic<Delay, ReleaseAcquire> sAllocDelay; +}; + +Atomic<Time, Relaxed> GAtomic::sNow; +Atomic<Delay, ReleaseAcquire> GAtomic::sAllocDelay; + +// Shared, immutable global state. Initialized by replace_init() and never +// changed after that. replace_init() runs early enough that no synchronization +// is needed. +class GConst { + private: + // The bounds of the allocated pages. + uint8_t* const mPagesStart; + uint8_t* const mPagesLimit; + + // Allocates the allocation pages and the guard pages, contiguously. + uint8_t* AllocAllPages() { + // The memory allocated here is never freed, because it would happen at + // process termination when it would be of little use. + + // We can rely on jemalloc's behaviour that when it allocates memory aligned + // with its own chunk size it will over-allocate and guarantee that the + // memory after the end of our allocation, but before the next chunk, is + // decommitted and inaccessible. Elsewhere in PHC we assume that we own + // that page (so that memory errors in it get caught by PHC) but here we + // use kAllPagesJemallocSize which subtracts jemalloc's guard page. + void* pages = MozJemalloc::memalign(kPhcAlign, kAllPagesJemallocSize); + if (!pages) { + MOZ_CRASH(); + } + + // Make the pages inaccessible. +#ifdef XP_WIN + if (!VirtualFree(pages, kAllPagesJemallocSize, MEM_DECOMMIT)) { + MOZ_CRASH("VirtualFree failed"); + } +#else + if (mmap(pages, kAllPagesJemallocSize, PROT_NONE, + MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == MAP_FAILED) { + MOZ_CRASH("mmap failed"); + } +#endif + + return static_cast<uint8_t*>(pages); + } + + public: + GConst() + : mPagesStart(AllocAllPages()), mPagesLimit(mPagesStart + kAllPagesSize) { + LOG("AllocAllPages at %p..%p\n", mPagesStart, mPagesLimit); + } + + class PtrKind PtrKind(const void* aPtr) { + class PtrKind pk(aPtr, mPagesStart, mPagesLimit); + return pk; + } + + bool IsInFirstGuardPage(const void* aPtr) { + return mPagesStart <= aPtr && aPtr < mPagesStart + kPageSize; + } + + // Get the address of the allocation page referred to via an index. Used when + // marking the page as accessible/inaccessible. + uint8_t* AllocPagePtr(uintptr_t aIndex) { + MOZ_ASSERT(aIndex < kNumAllocPages); + // Multiply by two and add one to account for allocation pages *and* guard + // pages. + return mPagesStart + (2 * aIndex + 1) * kPageSize; + } +}; + +static GConst* gConst; + +// This type is used as a proof-of-lock token, to make it clear which functions +// require sMutex to be locked. +using GMutLock = const MutexAutoLock&; + +// Shared, mutable global state. Protected by sMutex; all accessing functions +// take a GMutLock as proof that sMutex is held. +class GMut { + enum class AllocPageState { + NeverAllocated = 0, + InUse = 1, + Freed = 2, + }; + + // Metadata for each allocation page. + class AllocPageInfo { + public: + AllocPageInfo() + : mState(AllocPageState::NeverAllocated), + mBaseAddr(nullptr), + mReuseTime(0) {} + + // The current allocation page state. + AllocPageState mState; + + // The arena that the allocation is nominally from. This isn't meaningful + // within PHC, which has no arenas. But it is necessary for reallocation of + // page allocations as normal allocations, such as in this code: + // + // p = moz_arena_malloc(arenaId, 4096); + // realloc(p, 8192); + // + // The realloc is more than one page, and thus too large for PHC to handle. + // Therefore, if PHC handles the first allocation, it must ask mozjemalloc + // to allocate the 8192 bytes in the correct arena, and to do that, it must + // call MozJemalloc::moz_arena_malloc with the correct arenaId under the + // covers. Therefore it must record that arenaId. + // + // This field is also needed for jemalloc_ptr_info() to work, because it + // also returns the arena ID (but only in debug builds). + // + // - NeverAllocated: must be 0. + // - InUse | Freed: can be any valid arena ID value. + Maybe<arena_id_t> mArenaId; + + // The starting address of the allocation. Will not be the same as the page + // address unless the allocation is a full page. + // - NeverAllocated: must be 0. + // - InUse | Freed: must be within the allocation page. + uint8_t* mBaseAddr; + + // Usable size is computed as the number of bytes between the pointer and + // the end of the allocation page. This might be bigger than the requested + // size, especially if an outsized alignment is requested. + size_t UsableSize() const { + return mState == AllocPageState::NeverAllocated + ? 0 + : kPageSize - (reinterpret_cast<uintptr_t>(mBaseAddr) & + (kPageSize - 1)); + } + + // The internal fragmentation for this allocation. + size_t FragmentationBytes() const { + MOZ_ASSERT(kPageSize >= UsableSize()); + return mState == AllocPageState::InUse ? kPageSize - UsableSize() : 0; + } + + // The allocation stack. + // - NeverAllocated: Nothing. + // - InUse | Freed: Some. + Maybe<StackTrace> mAllocStack; + + // The free stack. + // - NeverAllocated | InUse: Nothing. + // - Freed: Some. + Maybe<StackTrace> mFreeStack; + + // The time at which the page is available for reuse, as measured against + // GAtomic::sNow. When the page is in use this value will be kMaxTime. + // - NeverAllocated: must be 0. + // - InUse: must be kMaxTime. + // - Freed: must be > 0 and < kMaxTime. + Time mReuseTime; + }; + + public: + // The mutex that protects the other members. + static Mutex sMutex MOZ_UNANNOTATED; + + GMut() : mRNG(RandomSeed<0>(), RandomSeed<1>()) { sMutex.Init(); } + + uint64_t Random64(GMutLock) { return mRNG.next(); } + + bool IsPageInUse(GMutLock, uintptr_t aIndex) { + return mAllocPages[aIndex].mState == AllocPageState::InUse; + } + + // Is the page free? And if so, has enough time passed that we can use it? + bool IsPageAllocatable(GMutLock, uintptr_t aIndex, Time aNow) { + const AllocPageInfo& page = mAllocPages[aIndex]; + return page.mState != AllocPageState::InUse && aNow >= page.mReuseTime; + } + + // Get the address of the allocation page referred to via an index. Used + // when checking pointers against page boundaries. + uint8_t* AllocPageBaseAddr(GMutLock, uintptr_t aIndex) { + return mAllocPages[aIndex].mBaseAddr; + } + + Maybe<arena_id_t> PageArena(GMutLock aLock, uintptr_t aIndex) { + const AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageInUse(aLock, page); + + return page.mArenaId; + } + + size_t PageUsableSize(GMutLock aLock, uintptr_t aIndex) { + const AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageInUse(aLock, page); + + return page.UsableSize(); + } + + // The total fragmentation in PHC + size_t FragmentationBytes() const { + size_t sum = 0; + for (const auto& page : mAllocPages) { + sum += page.FragmentationBytes(); + } + return sum; + } + + void SetPageInUse(GMutLock aLock, uintptr_t aIndex, + const Maybe<arena_id_t>& aArenaId, uint8_t* aBaseAddr, + const StackTrace& aAllocStack) { + AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageNotInUse(aLock, page); + + page.mState = AllocPageState::InUse; + page.mArenaId = aArenaId; + page.mBaseAddr = aBaseAddr; + page.mAllocStack = Some(aAllocStack); + page.mFreeStack = Nothing(); + page.mReuseTime = kMaxTime; + } + +#if PHC_LOGGING + Time GetFreeTime(uintptr_t aIndex) const { return mFreeTime[aIndex]; } +#endif + + void ResizePageInUse(GMutLock aLock, uintptr_t aIndex, + const Maybe<arena_id_t>& aArenaId, uint8_t* aNewBaseAddr, + const StackTrace& aAllocStack) { + AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageInUse(aLock, page); + + // page.mState is not changed. + if (aArenaId.isSome()) { + // Crash if the arenas don't match. + MOZ_RELEASE_ASSERT(page.mArenaId == aArenaId); + } + page.mBaseAddr = aNewBaseAddr; + // We could just keep the original alloc stack, but the realloc stack is + // more recent and therefore seems more useful. + page.mAllocStack = Some(aAllocStack); + // page.mFreeStack is not changed. + // page.mReuseTime is not changed. + }; + + void SetPageFreed(GMutLock aLock, uintptr_t aIndex, + const Maybe<arena_id_t>& aArenaId, + const StackTrace& aFreeStack, Delay aReuseDelay) { + AllocPageInfo& page = mAllocPages[aIndex]; + AssertAllocPageInUse(aLock, page); + + page.mState = AllocPageState::Freed; + + // page.mArenaId is left unchanged, for jemalloc_ptr_info() calls that + // occur after freeing (e.g. in the PtrInfo test in TestJemalloc.cpp). + if (aArenaId.isSome()) { + // Crash if the arenas don't match. + MOZ_RELEASE_ASSERT(page.mArenaId == aArenaId); + } + + // page.musableSize is left unchanged, for reporting on UAF, and for + // jemalloc_ptr_info() calls that occur after freeing (e.g. in the PtrInfo + // test in TestJemalloc.cpp). + + // page.mAllocStack is left unchanged, for reporting on UAF. + + page.mFreeStack = Some(aFreeStack); + Time now = GAtomic::Now(); +#if PHC_LOGGING + mFreeTime[aIndex] = now; +#endif + page.mReuseTime = now + aReuseDelay; + } + + static void CrashOnGuardPage(void* aPtr) { + // An operation on a guard page? This is a bounds violation. Deliberately + // touch the page in question to cause a crash that triggers the usual PHC + // machinery. + LOG("CrashOnGuardPage(%p), bounds violation\n", aPtr); + *static_cast<uint8_t*>(aPtr) = 0; + MOZ_CRASH("unreachable"); + } + + void EnsureValidAndInUse(GMutLock, void* aPtr, uintptr_t aIndex) + MOZ_REQUIRES(sMutex) { + const AllocPageInfo& page = mAllocPages[aIndex]; + + // The pointer must point to the start of the allocation. + MOZ_RELEASE_ASSERT(page.mBaseAddr == aPtr); + + if (page.mState == AllocPageState::Freed) { + LOG("EnsureValidAndInUse(%p), use-after-free\n", aPtr); + // An operation on a freed page? This is a particular kind of + // use-after-free. Deliberately touch the page in question, in order to + // cause a crash that triggers the usual PHC machinery. But unlock sMutex + // first, because that self-same PHC machinery needs to re-lock it, and + // the crash causes non-local control flow so sMutex won't be unlocked + // the normal way in the caller. + sMutex.Unlock(); + *static_cast<uint8_t*>(aPtr) = 0; + MOZ_CRASH("unreachable"); + } + } + + // This expects GMUt::sMutex to be locked but can't check it with a parameter + // since we try-lock it. + void FillAddrInfo(uintptr_t aIndex, const void* aBaseAddr, bool isGuardPage, + phc::AddrInfo& aOut) { + const AllocPageInfo& page = mAllocPages[aIndex]; + if (isGuardPage) { + aOut.mKind = phc::AddrInfo::Kind::GuardPage; + } else { + switch (page.mState) { + case AllocPageState::NeverAllocated: + aOut.mKind = phc::AddrInfo::Kind::NeverAllocatedPage; + break; + + case AllocPageState::InUse: + aOut.mKind = phc::AddrInfo::Kind::InUsePage; + break; + + case AllocPageState::Freed: + aOut.mKind = phc::AddrInfo::Kind::FreedPage; + break; + + default: + MOZ_CRASH(); + } + } + aOut.mBaseAddr = page.mBaseAddr; + aOut.mUsableSize = page.UsableSize(); + aOut.mAllocStack = page.mAllocStack; + aOut.mFreeStack = page.mFreeStack; + } + + void FillJemallocPtrInfo(GMutLock, const void* aPtr, uintptr_t aIndex, + jemalloc_ptr_info_t* aInfo) { + const AllocPageInfo& page = mAllocPages[aIndex]; + switch (page.mState) { + case AllocPageState::NeverAllocated: + break; + + case AllocPageState::InUse: { + // Only return TagLiveAlloc if the pointer is within the bounds of the + // allocation's usable size. + uint8_t* base = page.mBaseAddr; + uint8_t* limit = base + page.UsableSize(); + if (base <= aPtr && aPtr < limit) { + *aInfo = {TagLiveAlloc, page.mBaseAddr, page.UsableSize(), + page.mArenaId.valueOr(0)}; + return; + } + break; + } + + case AllocPageState::Freed: { + // Only return TagFreedAlloc if the pointer is within the bounds of the + // former allocation's usable size. + uint8_t* base = page.mBaseAddr; + uint8_t* limit = base + page.UsableSize(); + if (base <= aPtr && aPtr < limit) { + *aInfo = {TagFreedAlloc, page.mBaseAddr, page.UsableSize(), + page.mArenaId.valueOr(0)}; + return; + } + break; + } + + default: + MOZ_CRASH(); + } + + // Pointers into guard pages will end up here, as will pointers into + // allocation pages that aren't within the allocation's bounds. + *aInfo = {TagUnknown, nullptr, 0, 0}; + } + +#ifndef XP_WIN + static void prefork() MOZ_NO_THREAD_SAFETY_ANALYSIS { sMutex.Lock(); } + static void postfork_parent() MOZ_NO_THREAD_SAFETY_ANALYSIS { + sMutex.Unlock(); + } + static void postfork_child() { sMutex.Init(); } +#endif + +#if PHC_LOGGING + void IncPageAllocHits(GMutLock) { mPageAllocHits++; } + void IncPageAllocMisses(GMutLock) { mPageAllocMisses++; } +#else + void IncPageAllocHits(GMutLock) {} + void IncPageAllocMisses(GMutLock) {} +#endif + + phc::PHCStats GetPageStats(GMutLock) { + phc::PHCStats stats; + + for (const auto& page : mAllocPages) { + stats.mSlotsAllocated += page.mState == AllocPageState::InUse ? 1 : 0; + stats.mSlotsFreed += page.mState == AllocPageState::Freed ? 1 : 0; + } + stats.mSlotsUnused = + kNumAllocPages - stats.mSlotsAllocated - stats.mSlotsFreed; + + return stats; + } + +#if PHC_LOGGING + size_t PageAllocHits(GMutLock) { return mPageAllocHits; } + size_t PageAllocAttempts(GMutLock) { + return mPageAllocHits + mPageAllocMisses; + } + + // This is an integer because FdPrintf only supports integer printing. + size_t PageAllocHitRate(GMutLock) { + return mPageAllocHits * 100 / (mPageAllocHits + mPageAllocMisses); + } +#endif + + // Should we make new PHC allocations? + bool ShouldMakeNewAllocations() const { + return mPhcState == mozilla::phc::Enabled; + } + + using PHCState = mozilla::phc::PHCState; + void SetState(PHCState aState) { + if (mPhcState != PHCState::Enabled && aState == PHCState::Enabled) { + MutexAutoLock lock(GMut::sMutex); + GAtomic::Init(Rnd64ToDelay(mAvgFirstAllocDelay, Random64(lock))); + } + + mPhcState = aState; + } + + void SetProbabilities(int64_t aAvgDelayFirst, int64_t aAvgDelayNormal, + int64_t aAvgDelayPageReuse) { + MutexAutoLock lock(GMut::sMutex); + + mAvgFirstAllocDelay = CheckProbability(aAvgDelayFirst); + mAvgAllocDelay = CheckProbability(aAvgDelayNormal); + mAvgPageReuseDelay = CheckProbability(aAvgDelayPageReuse); + } + + private: + template <int N> + uint64_t RandomSeed() { + // An older version of this code used RandomUint64() here, but on Mac that + // function uses arc4random(), which can allocate, which would cause + // re-entry, which would be bad. So we just use time() and a local variable + // address. These are mediocre sources of entropy, but good enough for PHC. + static_assert(N == 0 || N == 1, "must be 0 or 1"); + uint64_t seed; + if (N == 0) { + time_t t = time(nullptr); + seed = t ^ (t << 32); + } else { + seed = uintptr_t(&seed) ^ (uintptr_t(&seed) << 32); + } + return seed; + } + + void AssertAllocPageInUse(GMutLock, const AllocPageInfo& aPage) { + MOZ_ASSERT(aPage.mState == AllocPageState::InUse); + // There is nothing to assert about aPage.mArenaId. + MOZ_ASSERT(aPage.mBaseAddr); + MOZ_ASSERT(aPage.UsableSize() > 0); + MOZ_ASSERT(aPage.mAllocStack.isSome()); + MOZ_ASSERT(aPage.mFreeStack.isNothing()); + MOZ_ASSERT(aPage.mReuseTime == kMaxTime); + } + + void AssertAllocPageNotInUse(GMutLock, const AllocPageInfo& aPage) { + // We can assert a lot about `NeverAllocated` pages, but not much about + // `Freed` pages. +#ifdef DEBUG + bool isFresh = aPage.mState == AllocPageState::NeverAllocated; + MOZ_ASSERT(isFresh || aPage.mState == AllocPageState::Freed); + MOZ_ASSERT_IF(isFresh, aPage.mArenaId == Nothing()); + MOZ_ASSERT(isFresh == (aPage.mBaseAddr == nullptr)); + MOZ_ASSERT(isFresh == (aPage.mAllocStack.isNothing())); + MOZ_ASSERT(isFresh == (aPage.mFreeStack.isNothing())); + MOZ_ASSERT(aPage.mReuseTime != kMaxTime); +#endif + } + + // RNG for deciding which allocations to treat specially. It doesn't need to + // be high quality. + // + // This is a raw pointer for the reason explained in the comment above + // GMut's constructor. Don't change it to UniquePtr or anything like that. + non_crypto::XorShift128PlusRNG mRNG; + + AllocPageInfo mAllocPages[kNumAllocPages]; +#if PHC_LOGGING + Time mFreeTime[kNumAllocPages]; + + // How many allocations that could have been page allocs actually were? As + // constrained kNumAllocPages. If the hit ratio isn't close to 100% it's + // likely that the global constants are poorly chosen. + size_t mPageAllocHits = 0; + size_t mPageAllocMisses = 0; +#endif + + // This will only ever be updated from one thread. The other threads should + // eventually get the update. + Atomic<PHCState, Relaxed> mPhcState = + Atomic<PHCState, Relaxed>(DEFAULT_STATE); + + // The average delay before doing any page allocations at the start of a + // process. Note that roughly 1 million allocations occur in the main process + // while starting the browser. The delay range is 1..gAvgFirstAllocDelay*2. + Delay mAvgFirstAllocDelay = 64 * 1024; + + // The average delay until the next attempted page allocation, once we get + // past the first delay. The delay range is 1..kAvgAllocDelay*2. + Delay mAvgAllocDelay = 16 * 1024; + + // The average delay before reusing a freed page. Should be significantly + // larger than kAvgAllocDelay, otherwise there's not much point in having it. + // The delay range is (kAvgAllocDelay / 2)..(kAvgAllocDelay / 2 * 3). This is + // different to the other delay ranges in not having a minimum of 1, because + // that's such a short delay that there is a high likelihood of bad stacks in + // any crash report. + Delay mAvgPageReuseDelay = 256 * 1024; + + public: + Delay GetAvgAllocDelay(const MutexAutoLock&) { return mAvgAllocDelay; } + Delay GetAvgFirstAllocDelay(const MutexAutoLock&) { + return mAvgFirstAllocDelay; + } + Delay GetAvgPageReuseDelay(const MutexAutoLock&) { + return mAvgPageReuseDelay; + } +}; + +Mutex GMut::sMutex; + +static GMut* gMut; + +// When PHC wants to crash we first have to unlock so that the crash reporter +// can call into PHC to lockup its pointer. That also means that before calling +// PHCCrash please ensure that state is consistent. Because this can report an +// arbitrary string, use of it must be reviewed by Firefox data stewards. +static void PHCCrash(GMutLock, const char* aMessage) + MOZ_REQUIRES(GMut::sMutex) { + GMut::sMutex.Unlock(); + MOZ_CRASH_UNSAFE(aMessage); +} + +// On MacOS, the first __thread/thread_local access calls malloc, which leads +// to an infinite loop. So we use pthread-based TLS instead, which somehow +// doesn't have this problem. +#if !defined(XP_DARWIN) +# define PHC_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) +#else +# define PHC_THREAD_LOCAL(T) \ + detail::ThreadLocal<T, detail::ThreadLocalKeyStorage> +#endif + +// Thread-local state. +class GTls { + public: + GTls(const GTls&) = delete; + + const GTls& operator=(const GTls&) = delete; + + // When true, PHC does as little as possible. + // + // (a) It does not allocate any new page allocations. + // + // (b) It avoids doing any operations that might call malloc/free/etc., which + // would cause re-entry into PHC. (In practice, MozStackWalk() is the + // only such operation.) Note that calls to the functions in MozJemalloc + // are ok. + // + // For example, replace_malloc() will just fall back to mozjemalloc. However, + // operations involving existing allocations are more complex, because those + // existing allocations may be page allocations. For example, if + // replace_free() is passed a page allocation on a PHC-disabled thread, it + // will free the page allocation in the usual way, but it will get a dummy + // freeStack in order to avoid calling MozStackWalk(), as per (b) above. + // + // This single disabling mechanism has two distinct uses. + // + // - It's used to prevent re-entry into PHC, which can cause correctness + // problems. For example, consider this sequence. + // + // 1. enter replace_free() + // 2. which calls PageFree() + // 3. which calls MozStackWalk() + // 4. which locks a mutex M, and then calls malloc + // 5. enter replace_malloc() + // 6. which calls MaybePageAlloc() + // 7. which calls MozStackWalk() + // 8. which (re)locks a mutex M --> deadlock + // + // We avoid this sequence by "disabling" the thread in PageFree() (at step + // 2), which causes MaybePageAlloc() to fail, avoiding the call to + // MozStackWalk() (at step 7). + // + // In practice, realloc or free of a PHC allocation is unlikely on a thread + // that is disabled because of this use: MozStackWalk() will probably only + // realloc/free allocations that it allocated itself, but those won't be + // page allocations because PHC is disabled before calling MozStackWalk(). + // + // (Note that MaybePageAlloc() could safely do a page allocation so long as + // it avoided calling MozStackWalk() by getting a dummy allocStack. But it + // wouldn't be useful, and it would prevent the second use below.) + // + // - It's used to prevent PHC allocations in some tests that rely on + // mozjemalloc's exact allocation behaviour, which PHC does not replicate + // exactly. (Note that (b) isn't necessary for this use -- MozStackWalk() + // could be safely called -- but it is necessary for the first use above.) + // + + static void Init() { + if (!tlsIsDisabled.init()) { + MOZ_CRASH(); + } + } + + static void DisableOnCurrentThread() { + MOZ_ASSERT(!GTls::tlsIsDisabled.get()); + tlsIsDisabled.set(true); + } + + static void EnableOnCurrentThread() { + MOZ_ASSERT(GTls::tlsIsDisabled.get()); + MutexAutoLock lock(GMut::sMutex); + Delay avg_delay = gMut->GetAvgAllocDelay(lock); + Delay avg_first_delay = gMut->GetAvgFirstAllocDelay(lock); + if (GAtomic::AllocDelayHasWrapped(avg_delay, avg_first_delay)) { + GAtomic::SetAllocDelay(Rnd64ToDelay(avg_delay, gMut->Random64(lock))); + } + tlsIsDisabled.set(false); + } + + static bool IsDisabledOnCurrentThread() { return tlsIsDisabled.get(); } + + private: + static PHC_THREAD_LOCAL(bool) tlsIsDisabled; +}; + +PHC_THREAD_LOCAL(bool) GTls::tlsIsDisabled; + +class AutoDisableOnCurrentThread { + public: + AutoDisableOnCurrentThread(const AutoDisableOnCurrentThread&) = delete; + + const AutoDisableOnCurrentThread& operator=( + const AutoDisableOnCurrentThread&) = delete; + + explicit AutoDisableOnCurrentThread() { GTls::DisableOnCurrentThread(); } + ~AutoDisableOnCurrentThread() { GTls::EnableOnCurrentThread(); } +}; + +//--------------------------------------------------------------------------- +// Initialisation +//--------------------------------------------------------------------------- + +// WARNING: this function runs *very* early -- before all static initializers +// have run. For this reason, non-scalar globals (gConst, gMut) are allocated +// dynamically (so we can guarantee their construction in this function) rather +// than statically. GAtomic and GTls contain simple static data that doesn't +// involve static initializers so they don't need to be allocated dynamically. +static bool phc_init() { + if (GetKernelPageSize() != kPageSize) { + return false; + } + + // gConst and gMut are never freed. They live for the life of the process. + gConst = InfallibleAllocPolicy::new_<GConst>(); + + GTls::Init(); + gMut = InfallibleAllocPolicy::new_<GMut>(); + +#ifndef XP_WIN + // Avoid deadlocks when forking by acquiring our state lock prior to forking + // and releasing it after forking. See |LogAlloc|'s |replace_init| for + // in-depth details. + pthread_atfork(GMut::prefork, GMut::postfork_parent, GMut::postfork_child); +#endif + + return true; +} + +static inline bool maybe_init() { + static bool sInitSuccess = []() { return phc_init(); }(); + return sInitSuccess; +} + +//--------------------------------------------------------------------------- +// Page allocation operations +//--------------------------------------------------------------------------- + +// Attempt a page allocation if the time and the size are right. Allocated +// memory is zeroed if aZero is true. On failure, the caller should attempt a +// normal allocation via MozJemalloc. Can be called in a context where +// GMut::sMutex is locked. +static void* MaybePageAlloc(const Maybe<arena_id_t>& aArenaId, size_t aReqSize, + size_t aAlignment, bool aZero) { + MOZ_ASSERT(IsPowerOfTwo(aAlignment)); + + if (!maybe_init()) { + return nullptr; + } + + if (aReqSize > kPageSize) { + return nullptr; + } + + MOZ_ASSERT(gMut); + if (!gMut->ShouldMakeNewAllocations()) { + return nullptr; + } + + GAtomic::IncrementNow(); + + // Decrement the delay. If it's zero, we do a page allocation and reset the + // delay to a random number. Because the assignment to the random number isn't + // atomic w.r.t. the decrement, we might have a sequence like this: + // + // Thread 1 Thread 2 Thread 3 + // -------- -------- -------- + // (a) newDelay = --sAllocDelay (-> 0) + // (b) --sAllocDelay (-> -1) + // (c) (newDelay != 0) fails + // (d) --sAllocDelay (-> -2) + // (e) sAllocDelay = new_random_number() + // + // It's critical that sAllocDelay has ReleaseAcquire semantics, because that + // guarantees that exactly one thread will see sAllocDelay have the value 0. + // (Relaxed semantics wouldn't guarantee that.) + // + // Note that sAllocDelay is unsigned and we expect that it will wrap after + // being decremented "below" zero. It must be unsigned so that IsPowerOfTwo() + // can work on some Delay values. + // + // Finally, note that the decrements that occur between (a) and (e) above are + // effectively ignored, because (e) clobbers them. This shouldn't be a + // problem; it effectively just adds a little more randomness to + // new_random_number(). An early version of this code tried to account for + // these decrements by doing `sAllocDelay += new_random_number()`. However, if + // new_random_value() is small, the number of decrements between (a) and (e) + // can easily exceed it, whereupon sAllocDelay ends up negative after + // `sAllocDelay += new_random_number()`, and the zero-check never succeeds + // again. (At least, not until sAllocDelay wraps around on overflow, which + // would take a very long time indeed.) + // + int32_t newDelay = GAtomic::DecrementDelay(); + if (newDelay != 0) { + return nullptr; + } + + if (GTls::IsDisabledOnCurrentThread()) { + return nullptr; + } + + // Disable on this thread *before* getting the stack trace. + AutoDisableOnCurrentThread disable; + + // Get the stack trace *before* locking the mutex. If we return nullptr then + // it was a waste, but it's not so frequent, and doing a stack walk while + // the mutex is locked is problematic (see the big comment on + // StackTrace::Fill() for details). + StackTrace allocStack; + allocStack.Fill(); + + MutexAutoLock lock(GMut::sMutex); + + Time now = GAtomic::Now(); + Delay newAllocDelay = + Rnd64ToDelay(gMut->GetAvgAllocDelay(lock), gMut->Random64(lock)); + + // We start at a random page alloc and wrap around, to ensure pages get even + // amounts of use. + uint8_t* ptr = nullptr; + uint8_t* pagePtr = nullptr; + for (uintptr_t n = 0, i = size_t(gMut->Random64(lock)) % kNumAllocPages; + n < kNumAllocPages; n++, i = (i + 1) % kNumAllocPages) { + if (!gMut->IsPageAllocatable(lock, i, now)) { + continue; + } + +#if PHC_LOGGING + Time lifetime = 0; +#endif + pagePtr = gConst->AllocPagePtr(i); + MOZ_ASSERT(pagePtr); + bool ok = +#ifdef XP_WIN + !!VirtualAlloc(pagePtr, kPageSize, MEM_COMMIT, PAGE_READWRITE); +#else + mprotect(pagePtr, kPageSize, PROT_READ | PROT_WRITE) == 0; +#endif + + if (!ok) { + pagePtr = nullptr; + continue; + } + + size_t usableSize = MozJemalloc::malloc_good_size(aReqSize); + MOZ_ASSERT(usableSize > 0); + + // Put the allocation as close to the end of the page as possible, + // allowing for alignment requirements. + ptr = pagePtr + kPageSize - usableSize; + if (aAlignment != 1) { + ptr = reinterpret_cast<uint8_t*>( + (reinterpret_cast<uintptr_t>(ptr) & ~(aAlignment - 1))); + } + +#if PHC_LOGGING + Time then = gMut->GetFreeTime(i); + lifetime = then != 0 ? now - then : 0; +#endif + + gMut->SetPageInUse(lock, i, aArenaId, ptr, allocStack); + + if (aZero) { + memset(ptr, 0, usableSize); + } else { +#ifdef DEBUG + memset(ptr, kAllocJunk, usableSize); +#endif + } + + gMut->IncPageAllocHits(lock); +#if PHC_LOGGING + phc::PHCStats stats = gMut->GetPageStats(lock); +#endif + LOG("PageAlloc(%zu, %zu) -> %p[%zu]/%p (%zu) (z%zu), sAllocDelay <- %zu, " + "fullness %zu/%zu/%zu, hits %zu/%zu (%zu%%), lifetime %zu\n", + aReqSize, aAlignment, pagePtr, i, ptr, usableSize, size_t(aZero), + size_t(newAllocDelay), stats.mSlotsAllocated, stats.mSlotsFreed, + kNumAllocPages, gMut->PageAllocHits(lock), + gMut->PageAllocAttempts(lock), gMut->PageAllocHitRate(lock), lifetime); + break; + } + + if (!pagePtr) { + // No pages are available, or VirtualAlloc/mprotect failed. + gMut->IncPageAllocMisses(lock); +#if PHC_LOGGING + phc::PHCStats stats = gMut->GetPageStats(lock); +#endif + LOG("No PageAlloc(%zu, %zu), sAllocDelay <- %zu, fullness %zu/%zu/%zu, " + "hits %zu/%zu (%zu%%)\n", + aReqSize, aAlignment, size_t(newAllocDelay), stats.mSlotsAllocated, + stats.mSlotsFreed, kNumAllocPages, gMut->PageAllocHits(lock), + gMut->PageAllocAttempts(lock), gMut->PageAllocHitRate(lock)); + } + + // Set the new alloc delay. + GAtomic::SetAllocDelay(newAllocDelay); + + return ptr; +} + +static void FreePage(GMutLock aLock, uintptr_t aIndex, + const Maybe<arena_id_t>& aArenaId, + const StackTrace& aFreeStack, Delay aReuseDelay) + MOZ_REQUIRES(GMut::sMutex) { + void* pagePtr = gConst->AllocPagePtr(aIndex); + +#ifdef XP_WIN + if (!VirtualFree(pagePtr, kPageSize, MEM_DECOMMIT)) { + PHCCrash(aLock, "VirtualFree failed"); + } +#else + if (mmap(pagePtr, kPageSize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, + -1, 0) == MAP_FAILED) { + PHCCrash(aLock, "mmap failed"); + } +#endif + + gMut->SetPageFreed(aLock, aIndex, aArenaId, aFreeStack, aReuseDelay); +} + +//--------------------------------------------------------------------------- +// replace-malloc machinery +//--------------------------------------------------------------------------- + +// This handles malloc, moz_arena_malloc, and realloc-with-a-nullptr. +MOZ_ALWAYS_INLINE static void* PageMalloc(const Maybe<arena_id_t>& aArenaId, + size_t aReqSize) { + void* ptr = MaybePageAlloc(aArenaId, aReqSize, /* aAlignment */ 1, + /* aZero */ false); + return ptr ? ptr + : (aArenaId.isSome() + ? MozJemalloc::moz_arena_malloc(*aArenaId, aReqSize) + : MozJemalloc::malloc(aReqSize)); +} + +inline void* MozJemallocPHC::malloc(size_t aReqSize) { + return PageMalloc(Nothing(), aReqSize); +} + +static Delay ReuseDelay(GMutLock aLock) { + Delay avg_reuse_delay = gMut->GetAvgPageReuseDelay(aLock); + return (avg_reuse_delay / 2) + + Rnd64ToDelay(avg_reuse_delay / 2, gMut->Random64(aLock)); +} + +// This handles both calloc and moz_arena_calloc. +MOZ_ALWAYS_INLINE static void* PageCalloc(const Maybe<arena_id_t>& aArenaId, + size_t aNum, size_t aReqSize) { + CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aNum) * aReqSize; + if (!checkedSize.isValid()) { + return nullptr; + } + + void* ptr = MaybePageAlloc(aArenaId, checkedSize.value(), /* aAlignment */ 1, + /* aZero */ true); + return ptr ? ptr + : (aArenaId.isSome() + ? MozJemalloc::moz_arena_calloc(*aArenaId, aNum, aReqSize) + : MozJemalloc::calloc(aNum, aReqSize)); +} + +inline void* MozJemallocPHC::calloc(size_t aNum, size_t aReqSize) { + return PageCalloc(Nothing(), aNum, aReqSize); +} + +// This function handles both realloc and moz_arena_realloc. +// +// As always, realloc is complicated, and doubly so when there are two +// different kinds of allocations in play. Here are the possible transitions, +// and what we do in practice. +// +// - normal-to-normal: This is straightforward and obviously necessary. +// +// - normal-to-page: This is disallowed because it would require getting the +// arenaId of the normal allocation, which isn't possible in non-DEBUG builds +// for security reasons. +// +// - page-to-page: This is done whenever possible, i.e. whenever the new size +// is less than or equal to 4 KiB. This choice counterbalances the +// disallowing of normal-to-page allocations, in order to avoid biasing +// towards or away from page allocations. It always occurs in-place. +// +// - page-to-normal: this is done only when necessary, i.e. only when the new +// size is greater than 4 KiB. This choice naturally flows from the +// prior choice on page-to-page transitions. +// +// In summary: realloc doesn't change the allocation kind unless it must. +// +MOZ_ALWAYS_INLINE static void* MaybePageRealloc( + const Maybe<arena_id_t>& aArenaId, void* aOldPtr, size_t aNewSize) { + if (!aOldPtr) { + // Null pointer. Treat like malloc(aNewSize). + return PageMalloc(aArenaId, aNewSize); + } + + if (!maybe_init()) { + return nullptr; + } + + PtrKind pk = gConst->PtrKind(aOldPtr); + if (pk.IsNothing()) { + // A normal-to-normal transition. + return nullptr; + } + + if (pk.IsGuardPage()) { + GMut::CrashOnGuardPage(aOldPtr); + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + // A page-to-something transition. + + // Note that `disable` has no effect unless it is emplaced below. + Maybe<AutoDisableOnCurrentThread> disable; + // Get the stack trace *before* locking the mutex. + StackTrace stack; + if (GTls::IsDisabledOnCurrentThread()) { + // PHC is disabled on this thread. Leave the stack empty. + } else { + // Disable on this thread *before* getting the stack trace. + disable.emplace(); + stack.Fill(); + } + + MutexAutoLock lock(GMut::sMutex); + + // Check for realloc() of a freed block. + gMut->EnsureValidAndInUse(lock, aOldPtr, index); + + if (aNewSize <= kPageSize && gMut->ShouldMakeNewAllocations()) { + // A page-to-page transition. Just keep using the page allocation. We do + // this even if the thread is disabled, because it doesn't create a new + // page allocation. Note that ResizePageInUse() checks aArenaId. + // + // Move the bytes with memmove(), because the old allocation and the new + // allocation overlap. Move the usable size rather than the requested size, + // because the user might have used malloc_usable_size() and filled up the + // usable size. + size_t oldUsableSize = gMut->PageUsableSize(lock, index); + size_t newUsableSize = MozJemalloc::malloc_good_size(aNewSize); + uint8_t* pagePtr = gConst->AllocPagePtr(index); + uint8_t* newPtr = pagePtr + kPageSize - newUsableSize; + memmove(newPtr, aOldPtr, std::min(oldUsableSize, aNewSize)); + gMut->ResizePageInUse(lock, index, aArenaId, newPtr, stack); + LOG("PageRealloc-Reuse(%p, %zu) -> %p\n", aOldPtr, aNewSize, newPtr); + return newPtr; + } + + // A page-to-normal transition (with the new size greater than page-sized). + // (Note that aArenaId is checked below.) + void* newPtr; + if (aArenaId.isSome()) { + newPtr = MozJemalloc::moz_arena_malloc(*aArenaId, aNewSize); + } else { + Maybe<arena_id_t> oldArenaId = gMut->PageArena(lock, index); + newPtr = (oldArenaId.isSome() + ? MozJemalloc::moz_arena_malloc(*oldArenaId, aNewSize) + : MozJemalloc::malloc(aNewSize)); + } + if (!newPtr) { + return nullptr; + } + + Delay reuseDelay = ReuseDelay(lock); + + // Copy the usable size rather than the requested size, because the user + // might have used malloc_usable_size() and filled up the usable size. Note + // that FreePage() checks aArenaId (via SetPageFreed()). + size_t oldUsableSize = gMut->PageUsableSize(lock, index); + memcpy(newPtr, aOldPtr, std::min(oldUsableSize, aNewSize)); + FreePage(lock, index, aArenaId, stack, reuseDelay); + LOG("PageRealloc-Free(%p[%zu], %zu) -> %p, %zu delay, reuse at ~%zu\n", + aOldPtr, index, aNewSize, newPtr, size_t(reuseDelay), + size_t(GAtomic::Now()) + reuseDelay); + + return newPtr; +} + +MOZ_ALWAYS_INLINE static void* PageRealloc(const Maybe<arena_id_t>& aArenaId, + void* aOldPtr, size_t aNewSize) { + void* ptr = MaybePageRealloc(aArenaId, aOldPtr, aNewSize); + + return ptr ? ptr + : (aArenaId.isSome() ? MozJemalloc::moz_arena_realloc( + *aArenaId, aOldPtr, aNewSize) + : MozJemalloc::realloc(aOldPtr, aNewSize)); +} + +inline void* MozJemallocPHC::realloc(void* aOldPtr, size_t aNewSize) { + return PageRealloc(Nothing(), aOldPtr, aNewSize); +} + +// This handles both free and moz_arena_free. +MOZ_ALWAYS_INLINE static bool MaybePageFree(const Maybe<arena_id_t>& aArenaId, + void* aPtr) { + if (!maybe_init()) { + return false; + } + + PtrKind pk = gConst->PtrKind(aPtr); + if (pk.IsNothing()) { + // Not a page allocation. + return false; + } + + if (pk.IsGuardPage()) { + GMut::CrashOnGuardPage(aPtr); + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + // Note that `disable` has no effect unless it is emplaced below. + Maybe<AutoDisableOnCurrentThread> disable; + // Get the stack trace *before* locking the mutex. + StackTrace freeStack; + if (GTls::IsDisabledOnCurrentThread()) { + // PHC is disabled on this thread. Leave the stack empty. + } else { + // Disable on this thread *before* getting the stack trace. + disable.emplace(); + freeStack.Fill(); + } + + MutexAutoLock lock(GMut::sMutex); + + // Check for a double-free. + gMut->EnsureValidAndInUse(lock, aPtr, index); + + // Note that FreePage() checks aArenaId (via SetPageFreed()). + Delay reuseDelay = ReuseDelay(lock); + FreePage(lock, index, aArenaId, freeStack, reuseDelay); + +#if PHC_LOGGING + phc::PHCStats stats = gMut->GetPageStats(lock); +#endif + LOG("PageFree(%p[%zu]), %zu delay, reuse at ~%zu, fullness %zu/%zu/%zu\n", + aPtr, index, size_t(reuseDelay), size_t(GAtomic::Now()) + reuseDelay, + stats.mSlotsAllocated, stats.mSlotsFreed, kNumAllocPages); + + return true; +} + +MOZ_ALWAYS_INLINE static void PageFree(const Maybe<arena_id_t>& aArenaId, + void* aPtr) { + bool res = MaybePageFree(aArenaId, aPtr); + if (!res) { + aArenaId.isSome() ? MozJemalloc::moz_arena_free(*aArenaId, aPtr) + : MozJemalloc::free(aPtr); + } +} + +inline void MozJemallocPHC::free(void* aPtr) { PageFree(Nothing(), aPtr); } + +// This handles memalign and moz_arena_memalign. +MOZ_ALWAYS_INLINE static void* PageMemalign(const Maybe<arena_id_t>& aArenaId, + size_t aAlignment, + size_t aReqSize) { + MOZ_RELEASE_ASSERT(IsPowerOfTwo(aAlignment)); + + // PHC can't satisfy an alignment greater than a page size, so fall back to + // mozjemalloc in that case. + void* ptr = nullptr; + if (aAlignment <= kPageSize) { + ptr = MaybePageAlloc(aArenaId, aReqSize, aAlignment, /* aZero */ false); + } + return ptr ? ptr + : (aArenaId.isSome() + ? MozJemalloc::moz_arena_memalign(*aArenaId, aAlignment, + aReqSize) + : MozJemalloc::memalign(aAlignment, aReqSize)); +} + +inline void* MozJemallocPHC::memalign(size_t aAlignment, size_t aReqSize) { + return PageMemalign(Nothing(), aAlignment, aReqSize); +} + +inline size_t MozJemallocPHC::malloc_usable_size(usable_ptr_t aPtr) { + if (!maybe_init()) { + return MozJemalloc::malloc_usable_size(aPtr); + } + + PtrKind pk = gConst->PtrKind(aPtr); + if (pk.IsNothing()) { + // Not a page allocation. Measure it normally. + return MozJemalloc::malloc_usable_size(aPtr); + } + + if (pk.IsGuardPage()) { + GMut::CrashOnGuardPage(const_cast<void*>(aPtr)); + } + + // At this point we know aPtr lands within an allocation page, due to the + // math done in the PtrKind constructor. But if aPtr points to memory + // before the base address of the allocation, we return 0. + uintptr_t index = pk.AllocPageIndex(); + + MutexAutoLock lock(GMut::sMutex); + + void* pageBaseAddr = gMut->AllocPageBaseAddr(lock, index); + + if (MOZ_UNLIKELY(aPtr < pageBaseAddr)) { + return 0; + } + + return gMut->PageUsableSize(lock, index); +} + +static size_t metadata_size() { + return MozJemalloc::malloc_usable_size(gConst) + + MozJemalloc::malloc_usable_size(gMut); +} + +inline void MozJemallocPHC::jemalloc_stats_internal( + jemalloc_stats_t* aStats, jemalloc_bin_stats_t* aBinStats) { + MozJemalloc::jemalloc_stats_internal(aStats, aBinStats); + + if (!maybe_init()) { + // If we're not initialised, then we're not using any additional memory and + // have nothing to add to the report. + return; + } + + // We allocate our memory from jemalloc so it has already counted our memory + // usage within "mapped" and "allocated", we must subtract the memory we + // allocated from jemalloc from allocated before adding in only the parts that + // we have allocated out to Firefox. + + aStats->allocated -= kAllPagesJemallocSize; + + size_t allocated = 0; + { + MutexAutoLock lock(GMut::sMutex); + + // Add usable space of in-use allocations to `allocated`. + for (size_t i = 0; i < kNumAllocPages; i++) { + if (gMut->IsPageInUse(lock, i)) { + allocated += gMut->PageUsableSize(lock, i); + } + } + } + aStats->allocated += allocated; + + // guards is the gap between `allocated` and `mapped`. In some ways this + // almost fits into aStats->wasted since it feels like wasted memory. However + // wasted should only include committed memory and these guard pages are + // uncommitted. Therefore we don't include it anywhere. + // size_t guards = mapped - allocated; + + // aStats.page_cache and aStats.bin_unused are left unchanged because PHC + // doesn't have anything corresponding to those. + + // The metadata is stored in normal heap allocations, so they're measured by + // mozjemalloc as `allocated`. Move them into `bookkeeping`. + // They're also reported under explicit/heap-overhead/phc/fragmentation in + // about:memory. + size_t bookkeeping = metadata_size(); + aStats->allocated -= bookkeeping; + aStats->bookkeeping += bookkeeping; +} + +inline void MozJemallocPHC::jemalloc_ptr_info(const void* aPtr, + jemalloc_ptr_info_t* aInfo) { + if (!maybe_init()) { + return MozJemalloc::jemalloc_ptr_info(aPtr, aInfo); + } + + // We need to implement this properly, because various code locations do + // things like checking that allocations are in the expected arena. + PtrKind pk = gConst->PtrKind(aPtr); + if (pk.IsNothing()) { + // Not a page allocation. + return MozJemalloc::jemalloc_ptr_info(aPtr, aInfo); + } + + if (pk.IsGuardPage()) { + // Treat a guard page as unknown because there's no better alternative. + *aInfo = {TagUnknown, nullptr, 0, 0}; + return; + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + MutexAutoLock lock(GMut::sMutex); + + gMut->FillJemallocPtrInfo(lock, aPtr, index, aInfo); +#if DEBUG + LOG("JemallocPtrInfo(%p[%zu]) -> {%zu, %p, %zu, %zu}\n", aPtr, index, + size_t(aInfo->tag), aInfo->addr, aInfo->size, aInfo->arenaId); +#else + LOG("JemallocPtrInfo(%p[%zu]) -> {%zu, %p, %zu}\n", aPtr, index, + size_t(aInfo->tag), aInfo->addr, aInfo->size); +#endif +} + +inline void* MozJemallocPHC::moz_arena_malloc(arena_id_t aArenaId, + size_t aReqSize) { + return PageMalloc(Some(aArenaId), aReqSize); +} + +inline void* MozJemallocPHC::moz_arena_calloc(arena_id_t aArenaId, size_t aNum, + size_t aReqSize) { + return PageCalloc(Some(aArenaId), aNum, aReqSize); +} + +inline void* MozJemallocPHC::moz_arena_realloc(arena_id_t aArenaId, + void* aOldPtr, size_t aNewSize) { + return PageRealloc(Some(aArenaId), aOldPtr, aNewSize); +} + +inline void MozJemallocPHC::moz_arena_free(arena_id_t aArenaId, void* aPtr) { + return PageFree(Some(aArenaId), aPtr); +} + +inline void* MozJemallocPHC::moz_arena_memalign(arena_id_t aArenaId, + size_t aAlignment, + size_t aReqSize) { + return PageMemalign(Some(aArenaId), aAlignment, aReqSize); +} + +namespace mozilla::phc { + +bool IsPHCAllocation(const void* aPtr, AddrInfo* aOut) { + if (!maybe_init()) { + return false; + } + + PtrKind pk = gConst->PtrKind(aPtr); + if (pk.IsNothing()) { + return false; + } + + bool isGuardPage = false; + if (pk.IsGuardPage()) { + if ((uintptr_t(aPtr) % kPageSize) < (kPageSize / 2)) { + // The address is in the lower half of a guard page, so it's probably an + // overflow. But first check that it is not on the very first guard + // page, in which case it cannot be an overflow, and we ignore it. + if (gConst->IsInFirstGuardPage(aPtr)) { + return false; + } + + // Get the allocation page preceding this guard page. + pk = gConst->PtrKind(static_cast<const uint8_t*>(aPtr) - kPageSize); + + } else { + // The address is in the upper half of a guard page, so it's probably an + // underflow. Get the allocation page following this guard page. + pk = gConst->PtrKind(static_cast<const uint8_t*>(aPtr) + kPageSize); + } + + // Make a note of the fact that we hit a guard page. + isGuardPage = true; + } + + // At this point we know we have an allocation page. + uintptr_t index = pk.AllocPageIndex(); + + if (aOut) { + if (GMut::sMutex.TryLock()) { + gMut->FillAddrInfo(index, aPtr, isGuardPage, *aOut); + LOG("IsPHCAllocation: %zu, %p, %zu, %zu, %zu\n", size_t(aOut->mKind), + aOut->mBaseAddr, aOut->mUsableSize, + aOut->mAllocStack.isSome() ? aOut->mAllocStack->mLength : 0, + aOut->mFreeStack.isSome() ? aOut->mFreeStack->mLength : 0); + GMut::sMutex.Unlock(); + } else { + LOG("IsPHCAllocation: PHC is locked\n"); + aOut->mPhcWasLocked = true; + } + } + return true; +} + +void DisablePHCOnCurrentThread() { + GTls::DisableOnCurrentThread(); + LOG("DisablePHCOnCurrentThread: %zu\n", 0ul); +} + +void ReenablePHCOnCurrentThread() { + GTls::EnableOnCurrentThread(); + LOG("ReenablePHCOnCurrentThread: %zu\n", 0ul); +} + +bool IsPHCEnabledOnCurrentThread() { + bool enabled = !GTls::IsDisabledOnCurrentThread(); + LOG("IsPHCEnabledOnCurrentThread: %zu\n", size_t(enabled)); + return enabled; +} + +void PHCMemoryUsage(MemoryUsage& aMemoryUsage) { + if (!maybe_init()) { + aMemoryUsage = MemoryUsage(); + return; + } + + aMemoryUsage.mMetadataBytes = metadata_size(); + if (gMut) { + MutexAutoLock lock(GMut::sMutex); + aMemoryUsage.mFragmentationBytes = gMut->FragmentationBytes(); + } else { + aMemoryUsage.mFragmentationBytes = 0; + } +} + +void GetPHCStats(PHCStats& aStats) { + if (!maybe_init()) { + aStats = PHCStats(); + return; + } + + MutexAutoLock lock(GMut::sMutex); + + aStats = gMut->GetPageStats(lock); +} + +// Enable or Disable PHC at runtime. If PHC is disabled it will still trap +// bad uses of previous allocations, but won't track any new allocations. +void SetPHCState(PHCState aState) { + if (!maybe_init()) { + return; + } + + gMut->SetState(aState); +} + +void SetPHCProbabilities(int64_t aAvgDelayFirst, int64_t aAvgDelayNormal, + int64_t aAvgDelayPageReuse) { + if (!maybe_init()) { + return; + } + + gMut->SetProbabilities(aAvgDelayFirst, aAvgDelayNormal, aAvgDelayPageReuse); +} + +} // namespace mozilla::phc diff --git a/memory/build/PHC.h b/memory/build/PHC.h new file mode 100644 index 0000000000..78820a5cf3 --- /dev/null +++ b/memory/build/PHC.h @@ -0,0 +1,160 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PHC_h +#define PHC_h + +#include "mozilla/Assertions.h" +#include "mozilla/Maybe.h" +#include <stdint.h> +#include <stdlib.h> + +#include "mozmemory_wrap.h" + +namespace mozilla { +namespace phc { + +// Note: a stack trace may have no frames due to a collection problem. +// +// Also note: a more compact stack trace representation could be achieved with +// some effort. +struct StackTrace { + public: + static const size_t kMaxFrames = 16; + + // The number of PCs in the stack trace. + size_t mLength; + + // The PCs in the stack trace. Only the first mLength are initialized. + const void* mPcs[kMaxFrames]; + + public: + StackTrace() : mLength(0) {} +}; + +// Info from PHC about an address in memory. +class AddrInfo { + public: + enum class Kind { + // The address is not in PHC-managed memory. + Unknown = 0, + + // The address is within a PHC page that has never been allocated. A crash + // involving such an address is unlikely in practice, because it would + // require the crash to happen quite early. + NeverAllocatedPage = 1, + + // The address is within a PHC page that is in use. + InUsePage = 2, + + // The address is within a PHC page that has been allocated and then freed. + // A crash involving such an address most likely indicates a + // use-after-free. (A sufficiently wild write -- e.g. a large buffer + // overflow -- could also trigger it, but this is less likely.) + FreedPage = 3, + + // The address is within a PHC guard page. A crash involving such an + // address most likely indicates a buffer overflow. (Again, a sufficiently + // wild write could unluckily trigger it, but this is less likely.) + GuardPage = 4, + }; + + // The page kind. + Kind mKind; + + // The starting address of the allocation. + // - Unknown | NeverAllocatedPage: nullptr. + // - InUsePage | FreedPage: the address of the allocation within the page. + // - GuardPage: the mBaseAddr value from the preceding allocation page. + const void* mBaseAddr; + + // The usable size, which could be bigger than the requested size. + // - Unknown | NeverAllocatePage: 0. + // - InUsePage | FreedPage: the usable size of the allocation within the page. + // - GuardPage: the mUsableSize value from the preceding allocation page. + size_t mUsableSize; + + // The allocation stack. + // - Unknown | NeverAllocatedPage: Nothing. + // - InUsePage | FreedPage: Some. + // - GuardPage: the mAllocStack value from the preceding allocation page. + mozilla::Maybe<StackTrace> mAllocStack; + + // The free stack. + // - Unknown | NeverAllocatedPage | InUsePage: Nothing. + // - FreedPage: Some. + // - GuardPage: the mFreeStack value from the preceding allocation page. + mozilla::Maybe<StackTrace> mFreeStack; + + // True if PHC was locked and therefore we couldn't retrive some infomation. + bool mPhcWasLocked = false; + + // Default to no PHC info. + AddrInfo() : mKind(Kind::Unknown), mBaseAddr(nullptr), mUsableSize(0) {} +}; + +// Global instance that is retrieved by the process generating the crash report +extern AddrInfo gAddrInfo; + +// If this is a PHC-handled address, return true, and if an AddrInfo is +// provided, fill in all of its fields. Otherwise, return false and leave +// AddrInfo unchanged. +MOZ_JEMALLOC_API bool IsPHCAllocation(const void*, AddrInfo*); + +// Disable PHC allocations on the current thread. Only useful for tests. Note +// that PHC deallocations will still occur as needed. +MOZ_JEMALLOC_API void DisablePHCOnCurrentThread(); + +// Re-enable PHC allocations on the current thread. Only useful for tests. +MOZ_JEMALLOC_API void ReenablePHCOnCurrentThread(); + +// Test whether PHC allocations are enabled on the current thread. Only +// useful for tests. +MOZ_JEMALLOC_API bool IsPHCEnabledOnCurrentThread(); + +// PHC has three different states: +// * Not compiled in +// * OnlyFree - The memory allocator is hooked but new allocations +// requests will be forwarded to mozjemalloc, free() will +// correctly free any PHC allocations and realloc() will +// "move" PHC allocations to mozjemalloc allocations. +// * Enabled - Full use. +enum PHCState { + OnlyFree, + Enabled, +}; + +MOZ_JEMALLOC_API void SetPHCState(PHCState aState); + +MOZ_JEMALLOC_API void SetPHCProbabilities(int64_t aAvgDelayFirst, + int64_t aAvgDelayNormal, + int64_t aAvgDelayPageReuse); + +struct MemoryUsage { + // The amount of memory used for PHC metadata, eg information about each + // allocation including stacks. + size_t mMetadataBytes = 0; + + // The amount of memory lost due to rounding allocation sizes up to the + // nearest page. AKA internal fragmentation. + size_t mFragmentationBytes = 0; +}; + +MOZ_JEMALLOC_API void PHCMemoryUsage(MemoryUsage& aMemoryUsage); + +struct PHCStats { + size_t mSlotsAllocated = 0; + size_t mSlotsFreed = 0; + size_t mSlotsUnused = 0; +}; + +// Return PHC memory usage information by filling in the supplied structure. +MOZ_JEMALLOC_API void GetPHCStats(PHCStats& aStats); + +} // namespace phc +} // namespace mozilla + +#endif /* PHC_h */ diff --git a/memory/build/Utils.h b/memory/build/Utils.h new file mode 100644 index 0000000000..b1a05e7ae3 --- /dev/null +++ b/memory/build/Utils.h @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef Utils_h +#define Utils_h + +#include <type_traits> + +#include "mozilla/CheckedInt.h" +#include "mozilla/TemplateLib.h" + +// Helper for log2 of powers of 2 at compile time. +template <size_t N> +struct Log2 : mozilla::tl::CeilingLog2<N> { + using mozilla::tl::CeilingLog2<N>::value; + static_assert(1ULL << value == N, "Number is not a power of 2"); +}; +#define LOG2(N) Log2<N>::value + +enum class Order { + eLess = -1, + eEqual = 0, + eGreater = 1, +}; + +// Compare two integers. Returns whether the first integer is Less, +// Equal or Greater than the second integer. +template <typename T> +Order CompareInt(T aValue1, T aValue2) { + static_assert(std::is_integral_v<T>, "Type must be integral"); + if (aValue1 < aValue2) { + return Order::eLess; + } + if (aValue1 > aValue2) { + return Order::eGreater; + } + return Order::eEqual; +} + +// Compare two addresses. Returns whether the first address is Less, +// Equal or Greater than the second address. +template <typename T> +Order CompareAddr(T* aAddr1, T* aAddr2) { + return CompareInt(uintptr_t(aAddr1), uintptr_t(aAddr2)); +} + +// Helper for (fast) comparison of fractions without involving divisions or +// floats. +class Fraction { + public: + explicit constexpr Fraction(size_t aNumerator, size_t aDenominator) + : mNumerator(aNumerator), mDenominator(aDenominator) {} + + MOZ_IMPLICIT constexpr Fraction(long double aValue) + // We use an arbitrary power of two as denominator that provides enough + // precision for our use case. + : mNumerator(aValue * 4096), mDenominator(4096) {} + + inline bool operator<(const Fraction& aOther) const { +#ifndef MOZ_DEBUG + // We are comparing A / B < C / D, with all A, B, C and D being positive + // numbers. Multiplying both sides with B * D, we have: + // (A * B * D) / B < (C * B * D) / D, which can then be simplified as + // A * D < C * B. When can thus compare our fractions without actually + // doing any division. + // This however assumes the multiplied quantities are small enough not + // to overflow the multiplication. We use CheckedInt on debug builds + // to enforce the assumption. + return mNumerator * aOther.mDenominator < aOther.mNumerator * mDenominator; +#else + mozilla::CheckedInt<size_t> numerator(mNumerator); + mozilla::CheckedInt<size_t> denominator(mDenominator); + // value() asserts when the multiplication overflowed. + size_t lhs = (numerator * aOther.mDenominator).value(); + size_t rhs = (aOther.mNumerator * denominator).value(); + return lhs < rhs; +#endif + } + + inline bool operator>(const Fraction& aOther) const { return aOther < *this; } + + inline bool operator>=(const Fraction& aOther) const { + return !(*this < aOther); + } + + inline bool operator<=(const Fraction& aOther) const { + return !(*this > aOther); + } + + inline bool operator==(const Fraction& aOther) const { +#ifndef MOZ_DEBUG + // Same logic as operator< + return mNumerator * aOther.mDenominator == aOther.mNumerator * mDenominator; +#else + mozilla::CheckedInt<size_t> numerator(mNumerator); + mozilla::CheckedInt<size_t> denominator(mDenominator); + size_t lhs = (numerator * aOther.mDenominator).value(); + size_t rhs = (aOther.mNumerator * denominator).value(); + return lhs == rhs; +#endif + } + + inline bool operator!=(const Fraction& aOther) const { + return !(*this == aOther); + } + + private: + size_t mNumerator; + size_t mDenominator; +}; + +#endif diff --git a/memory/build/fallback.cpp b/memory/build/fallback.cpp new file mode 100644 index 0000000000..6f1791bed9 --- /dev/null +++ b/memory/build/fallback.cpp @@ -0,0 +1,40 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozmemory.h" +#include "mozjemalloc.h" +#include <stdlib.h> + +#ifndef HAVE_MEMALIGN +MOZ_MEMORY_API void* memalign(size_t aAlignment, size_t aSize) { +# ifdef XP_WIN + return _aligned_malloc(aSize, aAlignment); +# else + void* ret; + if (posix_memalign(&ret, aAlignment, aSize) != 0) { + return nullptr; + } + return ret; +# endif +} +#endif + +struct SystemMalloc { +#define MALLOC_DECL(name, return_type, ...) \ + static inline return_type name(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \ + return ::name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \ + } +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#include "malloc_decls.h" +}; + +#define MALLOC_DECL(name, return_type, ...) \ + MOZ_JEMALLOC_API return_type name(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \ + return DummyArenaAllocator<SystemMalloc>::name( \ + ARGS_HELPER(ARGS, ##__VA_ARGS__)); \ + } +#define MALLOC_FUNCS MALLOC_FUNCS_ARENA +#include "malloc_decls.h" diff --git a/memory/build/malloc_decls.h b/memory/build/malloc_decls.h new file mode 100644 index 0000000000..ec56344a37 --- /dev/null +++ b/memory/build/malloc_decls.h @@ -0,0 +1,161 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Helper header to declare all the supported malloc functions. +// MALLOC_DECL arguments are: +// - function name +// - return type +// - argument types + +#ifndef malloc_decls_h +# define malloc_decls_h + +# include "mozjemalloc_types.h" + +# define MALLOC_FUNCS_MALLOC_BASE 1 +# define MALLOC_FUNCS_MALLOC_EXTRA 2 +# define MALLOC_FUNCS_MALLOC \ + (MALLOC_FUNCS_MALLOC_BASE | MALLOC_FUNCS_MALLOC_EXTRA) +# define MALLOC_FUNCS_JEMALLOC 4 +# define MALLOC_FUNCS_ARENA_BASE 8 +# define MALLOC_FUNCS_ARENA_ALLOC 16 +# define MALLOC_FUNCS_ARENA \ + (MALLOC_FUNCS_ARENA_BASE | MALLOC_FUNCS_ARENA_ALLOC) +# define MALLOC_FUNCS_ALL \ + (MALLOC_FUNCS_MALLOC | MALLOC_FUNCS_JEMALLOC | MALLOC_FUNCS_ARENA) + +#endif // malloc_decls_h + +#ifndef MALLOC_FUNCS +# define MALLOC_FUNCS MALLOC_FUNCS_ALL +#endif + +#ifdef MALLOC_DECL +// NOTHROW_MALLOC_DECL is intended for functions where the standard library +// declares the functions in question as `throw()`. Not all platforms +// consistent declare certain functions as `throw()`, though. + +// Bionic and OS X don't seem to care about `throw()`ness. +# if defined(ANDROID) || defined(XP_DARWIN) +# undef NOTHROW_MALLOC_DECL +# define NOTHROW_MALLOC_DECL MALLOC_DECL +// Some places don't care about the distinction. +# elif !defined(NOTHROW_MALLOC_DECL) +# define NOTHROW_MALLOC_DECL MALLOC_DECL +# endif + +# if MALLOC_FUNCS & MALLOC_FUNCS_MALLOC_BASE +MALLOC_DECL(malloc, void*, size_t) +MALLOC_DECL(calloc, void*, size_t, size_t) +MALLOC_DECL(realloc, void*, void*, size_t) +NOTHROW_MALLOC_DECL(free, void, void*) +NOTHROW_MALLOC_DECL(memalign, void*, size_t, size_t) +# endif +# if MALLOC_FUNCS & MALLOC_FUNCS_MALLOC_EXTRA +NOTHROW_MALLOC_DECL(posix_memalign, int, void**, size_t, size_t) +NOTHROW_MALLOC_DECL(aligned_alloc, void*, size_t, size_t) +NOTHROW_MALLOC_DECL(valloc, void*, size_t) +NOTHROW_MALLOC_DECL(malloc_usable_size, size_t, usable_ptr_t) +MALLOC_DECL(malloc_good_size, size_t, size_t) +# endif + +# if MALLOC_FUNCS & MALLOC_FUNCS_JEMALLOC +// The 2nd argument points to an optional array exactly +// jemalloc_stats_num_bins() long to be filled in (if non-null). +// This must only be called on the main thread. +MALLOC_DECL(jemalloc_stats_internal, void, jemalloc_stats_t*, + jemalloc_bin_stats_t*) + +// Return the size of the jemalloc_bin_stats_t array. +MALLOC_DECL(jemalloc_stats_num_bins, size_t) + +// Tell jemalloc this is the main thread. jemalloc will use this to validate +// that main thread only arenas are only used on the main thread. +MALLOC_DECL(jemalloc_set_main_thread, void) + +// On some operating systems (Mac), we use madvise(MADV_FREE) to hand pages +// back to the operating system. On Mac, the operating system doesn't take +// this memory back immediately; instead, the OS takes it back only when the +// machine is running out of physical memory. +// +// This is great from the standpoint of efficiency, but it makes measuring our +// actual RSS difficult, because pages which we've MADV_FREE'd shouldn't count +// against our RSS. +// +// This function explicitly purges any MADV_FREE'd pages from physical memory, +// causing our reported RSS match the amount of memory we're actually using. +// +// Note that this call is expensive in two ways. First, it may be slow to +// execute, because it may make a number of slow syscalls to free memory. This +// function holds the big jemalloc locks, so basically all threads are blocked +// while this function runs. +// +// This function is also expensive in that the next time we go to access a page +// which we've just explicitly decommitted, the operating system has to attach +// to it a physical page! If we hadn't run this function, the OS would have +// less work to do. +// +// If MALLOC_DOUBLE_PURGE is not defined, this function does nothing. +// +// It may only be used from the main thread. +MALLOC_DECL(jemalloc_purge_freed_pages, void) + +// Free all unused dirty pages in all arenas. Calling this function will slow +// down subsequent allocations so it is recommended to use it only when +// memory needs to be reclaimed at all costs (see bug 805855). This function +// provides functionality similar to mallctl("arenas.purge") in jemalloc 3. +// It may only be used from the main thread. +MALLOC_DECL(jemalloc_free_dirty_pages, void) + +// Opt in or out of a thread local arena (bool argument is whether to opt-in +// (true) or out (false)). +MALLOC_DECL(jemalloc_thread_local_arena, void, bool) + +// Provide information about any allocation enclosing the given address. +MALLOC_DECL(jemalloc_ptr_info, void, const void*, jemalloc_ptr_info_t*) +# endif + +# if MALLOC_FUNCS & MALLOC_FUNCS_ARENA_BASE + +// Creates a separate arena, and returns its id, valid to use with moz_arena_* +// functions. A helper is provided in mozmemory.h that doesn't take any +// arena_params_t: moz_create_arena. +MALLOC_DECL(moz_create_arena_with_params, arena_id_t, arena_params_t*) + +// Dispose of the given arena. Subsequent uses of the arena will crash. +// Passing an invalid id (inexistent or already disposed) to this function +// will crash. The arena must be empty prior to calling this function. +MALLOC_DECL(moz_dispose_arena, void, arena_id_t) + +// Set the default modifier for mMaxDirty. The value is the number of shifts +// applied to the value. Positive value is handled as <<, negative >>. +// Arenas may override the default modifier. +MALLOC_DECL(moz_set_max_dirty_page_modifier, void, int32_t) + +# endif + +# if MALLOC_FUNCS & MALLOC_FUNCS_ARENA_ALLOC +// Same as the functions without the moz_arena_ prefix, but using arenas +// created with moz_create_arena. +// The contract, even if not enforced at runtime in some configurations, +// is that moz_arena_realloc and moz_arena_free will crash if the given +// arena doesn't own the given pointer. All functions will crash if the +// arena id is invalid. +// Although discouraged, plain realloc and free can still be used on +// pointers allocated with these functions. Realloc will properly keep +// new pointers in the same arena as the original. +MALLOC_DECL(moz_arena_malloc, void*, arena_id_t, size_t) +MALLOC_DECL(moz_arena_calloc, void*, arena_id_t, size_t, size_t) +MALLOC_DECL(moz_arena_realloc, void*, arena_id_t, void*, size_t) +MALLOC_DECL(moz_arena_free, void, arena_id_t, void*) +MALLOC_DECL(moz_arena_memalign, void*, arena_id_t, size_t, size_t) +# endif + +#endif // MALLOC_DECL + +#undef NOTHROW_MALLOC_DECL +#undef MALLOC_DECL +#undef MALLOC_FUNCS diff --git a/memory/build/moz.build b/memory/build/moz.build new file mode 100644 index 0000000000..da67977330 --- /dev/null +++ b/memory/build/moz.build @@ -0,0 +1,68 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "malloc_decls.h", + "mozjemalloc_types.h", + "mozmemory.h", + "mozmemory_utils.h", + "mozmemory_wrap.h", + "PHC.h", +] + +LIBRARY_DEFINES["MOZ_HAS_MOZGLUE"] = True +DEFINES["MOZ_MEMORY_IMPL"] = True + +if CONFIG["MOZ_REPLACE_MALLOC"]: + EXPORTS += [ + "replace_malloc.h", + "replace_malloc_bridge.h", + ] + +if CONFIG["MOZ_PHC"]: + DEFINES["MOZ_PHC"] = True + UNIFIED_SOURCES += [ + "FdPrintf.cpp", + # PHC.cpp is #included in mozjemalloc.cpp + ] + +if CONFIG["MOZ_MEMORY"]: + UNIFIED_SOURCES += [ + "mozjemalloc.cpp", + "mozmemory_wrap.cpp", + "Mutex.cpp", + ] +else: + UNIFIED_SOURCES += [ + "fallback.cpp", + ] + +if CONFIG["OS_TARGET"] == "Darwin" and ( + CONFIG["MOZ_REPLACE_MALLOC"] or CONFIG["MOZ_MEMORY"] +): + SOURCES += [ + "zone.c", + ] + +Library("memory") + +if CONFIG["OS_TARGET"] == "Android" and CONFIG["CC_TYPE"] == "clang": + CXXFLAGS += [ + "-Wno-tautological-pointer-compare", + ] + +if CONFIG["MOZ_BUILD_APP"] != "memory": + FINAL_LIBRARY = "mozglue" + +if CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + DEFINES["MOZ_REPLACE_MALLOC_STATIC"] = True + +DisableStlWrapping() + +if CONFIG["CC_TYPE"] == "clang-cl": + AllowCompilerWarnings() # workaround for bug 1090497 + +TEST_DIRS += ["test"] diff --git a/memory/build/mozjemalloc.cpp b/memory/build/mozjemalloc.cpp new file mode 100644 index 0000000000..7a8211747f --- /dev/null +++ b/memory/build/mozjemalloc.cpp @@ -0,0 +1,5397 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Portions of this file were originally under the following license: +// +// Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>. +// All rights reserved. +// Copyright (C) 2007-2017 Mozilla Foundation. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice(s), this list of conditions and the following disclaimer as +// the first lines of this file unmodified other than the possible +// addition of one or more copyright notices. +// 2. Redistributions in binary form must reproduce the above copyright +// notice(s), this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ***************************************************************************** +// +// This allocator implementation is designed to provide scalable performance +// for multi-threaded programs on multi-processor systems. The following +// features are included for this purpose: +// +// + Multiple arenas are used if there are multiple CPUs, which reduces lock +// contention and cache sloshing. +// +// + Cache line sharing between arenas is avoided for internal data +// structures. +// +// + Memory is managed in chunks and runs (chunks can be split into runs), +// rather than as individual pages. This provides a constant-time +// mechanism for associating allocations with particular arenas. +// +// Allocation requests are rounded up to the nearest size class, and no record +// of the original request size is maintained. Allocations are broken into +// categories according to size class. Assuming runtime defaults, the size +// classes in each category are as follows (for x86, x86_64 and Apple Silicon): +// +// |=========================================================| +// | Category | Subcategory | x86 | x86_64 | Mac ARM | +// |---------------------------+---------+---------+---------| +// | Word size | 32 bit | 64 bit | 64 bit | +// | Page size | 4 Kb | 4 Kb | 16 Kb | +// |=========================================================| +// | Small | Tiny | 4/-w | -w | - | +// | | | 8 | 8/-w | 8 | +// | |----------------+---------|---------|---------| +// | | Quantum-spaced | 16 | 16 | 16 | +// | | | 32 | 32 | 32 | +// | | | 48 | 48 | 48 | +// | | | ... | ... | ... | +// | | | 480 | 480 | 480 | +// | | | 496 | 496 | 496 | +// | |----------------+---------|---------|---------| +// | | Quantum-wide- | 512 | 512 | 512 | +// | | spaced | 768 | 768 | 768 | +// | | | ... | ... | ... | +// | | | 3584 | 3584 | 3584 | +// | | | 3840 | 3840 | 3840 | +// | |----------------+---------|---------|---------| +// | | Sub-page | - | - | 4096 | +// | | | - | - | 8 kB | +// |=========================================================| +// | Large | 4 kB | 4 kB | - | +// | | 8 kB | 8 kB | - | +// | | 12 kB | 12 kB | - | +// | | 16 kB | 16 kB | 16 kB | +// | | ... | ... | - | +// | | 32 kB | 32 kB | 32 kB | +// | | ... | ... | ... | +// | | 1008 kB | 1008 kB | 1008 kB | +// | | 1012 kB | 1012 kB | - | +// | | 1016 kB | 1016 kB | - | +// | | 1020 kB | 1020 kB | - | +// |=========================================================| +// | Huge | 1 MB | 1 MB | 1 MB | +// | | 2 MB | 2 MB | 2 MB | +// | | 3 MB | 3 MB | 3 MB | +// | | ... | ... | ... | +// |=========================================================| +// +// Legend: +// n: Size class exists for this platform. +// n/-w: This size class doesn't exist on Windows (see kMinTinyClass). +// -: This size class doesn't exist for this platform. +// ...: Size classes follow a pattern here. +// +// NOTE: Due to Mozilla bug 691003, we cannot reserve less than one word for an +// allocation on Linux or Mac. So on 32-bit *nix, the smallest bucket size is +// 4 bytes, and on 64-bit, the smallest bucket size is 8 bytes. +// +// A different mechanism is used for each category: +// +// Small : Each size class is segregated into its own set of runs. Each run +// maintains a bitmap of which regions are free/allocated. +// +// Large : Each allocation is backed by a dedicated run. Metadata are stored +// in the associated arena chunk header maps. +// +// Huge : Each allocation is backed by a dedicated contiguous set of chunks. +// Metadata are stored in a separate red-black tree. +// +// ***************************************************************************** + +#include "mozmemory_wrap.h" +#include "mozjemalloc.h" +#include "mozjemalloc_types.h" + +#include <cstring> +#include <cerrno> +#include <optional> +#include <type_traits> +#ifdef XP_WIN +# include <io.h> +# include <windows.h> +#else +# include <sys/mman.h> +# include <unistd.h> +#endif +#ifdef XP_DARWIN +# include <libkern/OSAtomic.h> +# include <mach/mach_init.h> +# include <mach/vm_map.h> +#endif + +#include "mozilla/Atomics.h" +#include "mozilla/Alignment.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/Assertions.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/DoublyLinkedList.h" +#include "mozilla/HelperMacros.h" +#include "mozilla/Likely.h" +#include "mozilla/Literals.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/RandomNum.h" +// Note: MozTaggedAnonymousMmap() could call an LD_PRELOADed mmap +// instead of the one defined here; use only MozTagAnonymousMemory(). +#include "mozilla/TaggedAnonymousMemory.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" +#include "mozilla/XorShift128PlusRNG.h" +#include "mozilla/fallible.h" +#include "rb.h" +#include "Mutex.h" +#include "PHC.h" +#include "Utils.h" + +#if defined(XP_WIN) +# include "mozmemory_utils.h" +#endif + +// For GetGeckoProcessType(), when it's used. +#if defined(XP_WIN) && !defined(JS_STANDALONE) +# include "mozilla/ProcessType.h" +#endif + +using namespace mozilla; + +// On Linux, we use madvise(MADV_DONTNEED) to release memory back to the +// operating system. If we release 1MB of live pages with MADV_DONTNEED, our +// RSS will decrease by 1MB (almost) immediately. +// +// On Mac, we use madvise(MADV_FREE). Unlike MADV_DONTNEED on Linux, MADV_FREE +// on Mac doesn't cause the OS to release the specified pages immediately; the +// OS keeps them in our process until the machine comes under memory pressure. +// +// It's therefore difficult to measure the process's RSS on Mac, since, in the +// absence of memory pressure, the contribution from the heap to RSS will not +// decrease due to our madvise calls. +// +// We therefore define MALLOC_DOUBLE_PURGE on Mac. This causes jemalloc to +// track which pages have been MADV_FREE'd. You can then call +// jemalloc_purge_freed_pages(), which will force the OS to release those +// MADV_FREE'd pages, making the process's RSS reflect its true memory usage. +// +// The jemalloc_purge_freed_pages definition in memory/build/mozmemory.h needs +// to be adjusted if MALLOC_DOUBLE_PURGE is ever enabled on Linux. + +#ifdef XP_DARWIN +# define MALLOC_DOUBLE_PURGE +#endif + +#ifdef XP_WIN +# define MALLOC_DECOMMIT +#endif + +// Define MALLOC_RUNTIME_CONFIG depending on MOZ_DEBUG. Overriding this as +// a build option allows us to build mozjemalloc/firefox without runtime asserts +// but with runtime configuration. Making some testing easier. + +#ifdef MOZ_DEBUG +# define MALLOC_RUNTIME_CONFIG +#endif + +// When MALLOC_STATIC_PAGESIZE is defined, the page size is fixed at +// compile-time for better performance, as opposed to determined at +// runtime. Some platforms can have different page sizes at runtime +// depending on kernel configuration, so they are opted out by default. +// Debug builds are opted out too, for test coverage. +#ifndef MALLOC_RUNTIME_CONFIG +# if !defined(__ia64__) && !defined(__sparc__) && !defined(__mips__) && \ + !defined(__aarch64__) && !defined(__powerpc__) && !defined(XP_MACOSX) && \ + !defined(__loongarch__) +# define MALLOC_STATIC_PAGESIZE 1 +# endif +#endif + +#ifdef XP_WIN +# define STDERR_FILENO 2 + +// Implement getenv without using malloc. +static char mozillaMallocOptionsBuf[64]; + +# define getenv xgetenv +static char* getenv(const char* name) { + if (GetEnvironmentVariableA(name, mozillaMallocOptionsBuf, + sizeof(mozillaMallocOptionsBuf)) > 0) { + return mozillaMallocOptionsBuf; + } + + return nullptr; +} +#endif + +#ifndef XP_WIN +// Newer Linux systems support MADV_FREE, but we're not supporting +// that properly. bug #1406304. +# if defined(XP_LINUX) && defined(MADV_FREE) +# undef MADV_FREE +# endif +# ifndef MADV_FREE +# define MADV_FREE MADV_DONTNEED +# endif +#endif + +// Some tools, such as /dev/dsp wrappers, LD_PRELOAD libraries that +// happen to override mmap() and call dlsym() from their overridden +// mmap(). The problem is that dlsym() calls malloc(), and this ends +// up in a dead lock in jemalloc. +// On these systems, we prefer to directly use the system call. +// We do that for Linux systems and kfreebsd with GNU userland. +// Note sanity checks are not done (alignment of offset, ...) because +// the uses of mmap are pretty limited, in jemalloc. +// +// On Alpha, glibc has a bug that prevents syscall() to work for system +// calls with 6 arguments. +#if (defined(XP_LINUX) && !defined(__alpha__)) || \ + (defined(__FreeBSD_kernel__) && defined(__GLIBC__)) +# include <sys/syscall.h> +# if defined(SYS_mmap) || defined(SYS_mmap2) +static inline void* _mmap(void* addr, size_t length, int prot, int flags, + int fd, off_t offset) { +// S390 only passes one argument to the mmap system call, which is a +// pointer to a structure containing the arguments. +# ifdef __s390__ + struct { + void* addr; + size_t length; + long prot; + long flags; + long fd; + off_t offset; + } args = {addr, length, prot, flags, fd, offset}; + return (void*)syscall(SYS_mmap, &args); +# else +# if defined(ANDROID) && defined(__aarch64__) && defined(SYS_mmap2) + // Android NDK defines SYS_mmap2 for AArch64 despite it not supporting mmap2. +# undef SYS_mmap2 +# endif +# ifdef SYS_mmap2 + return (void*)syscall(SYS_mmap2, addr, length, prot, flags, fd, offset >> 12); +# else + return (void*)syscall(SYS_mmap, addr, length, prot, flags, fd, offset); +# endif +# endif +} +# define mmap _mmap +# define munmap(a, l) syscall(SYS_munmap, a, l) +# endif +#endif + +// *************************************************************************** +// Structures for chunk headers for chunks used for non-huge allocations. + +struct arena_t; + +// Each element of the chunk map corresponds to one page within the chunk. +struct arena_chunk_map_t { + // Linkage for run trees. There are two disjoint uses: + // + // 1) arena_t's tree or available runs. + // 2) arena_run_t conceptually uses this linkage for in-use non-full + // runs, rather than directly embedding linkage. + RedBlackTreeNode<arena_chunk_map_t> link; + + // Run address (or size) and various flags are stored together. The bit + // layout looks like (assuming 32-bit system): + // + // ???????? ???????? ????---- -mckdzla + // + // ? : Unallocated: Run address for first/last pages, unset for internal + // pages. + // Small: Run address. + // Large: Run size for first page, unset for trailing pages. + // - : Unused. + // m : MADV_FREE/MADV_DONTNEED'ed? + // c : decommitted? + // k : key? + // d : dirty? + // z : zeroed? + // l : large? + // a : allocated? + // + // Following are example bit patterns for the three types of runs. + // + // r : run address + // s : run size + // x : don't care + // - : 0 + // [cdzla] : bit set + // + // Unallocated: + // ssssssss ssssssss ssss---- --c----- + // xxxxxxxx xxxxxxxx xxxx---- ----d--- + // ssssssss ssssssss ssss---- -----z-- + // + // Small: + // rrrrrrrr rrrrrrrr rrrr---- -------a + // rrrrrrrr rrrrrrrr rrrr---- -------a + // rrrrrrrr rrrrrrrr rrrr---- -------a + // + // Large: + // ssssssss ssssssss ssss---- ------la + // -------- -------- -------- ------la + // -------- -------- -------- ------la + size_t bits; + +// Note that CHUNK_MAP_DECOMMITTED's meaning varies depending on whether +// MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are defined. +// +// If MALLOC_DECOMMIT is defined, a page which is CHUNK_MAP_DECOMMITTED must be +// re-committed with pages_commit() before it may be touched. If +// MALLOC_DECOMMIT is defined, MALLOC_DOUBLE_PURGE may not be defined. +// +// If neither MALLOC_DECOMMIT nor MALLOC_DOUBLE_PURGE is defined, pages which +// are madvised (with either MADV_DONTNEED or MADV_FREE) are marked with +// CHUNK_MAP_MADVISED. +// +// Otherwise, if MALLOC_DECOMMIT is not defined and MALLOC_DOUBLE_PURGE is +// defined, then a page which is madvised is marked as CHUNK_MAP_MADVISED. +// When it's finally freed with jemalloc_purge_freed_pages, the page is marked +// as CHUNK_MAP_DECOMMITTED. +#define CHUNK_MAP_MADVISED ((size_t)0x40U) +#define CHUNK_MAP_DECOMMITTED ((size_t)0x20U) +#define CHUNK_MAP_MADVISED_OR_DECOMMITTED \ + (CHUNK_MAP_MADVISED | CHUNK_MAP_DECOMMITTED) +#define CHUNK_MAP_KEY ((size_t)0x10U) +#define CHUNK_MAP_DIRTY ((size_t)0x08U) +#define CHUNK_MAP_ZEROED ((size_t)0x04U) +#define CHUNK_MAP_LARGE ((size_t)0x02U) +#define CHUNK_MAP_ALLOCATED ((size_t)0x01U) +}; + +// Arena chunk header. +struct arena_chunk_t { + // Arena that owns the chunk. + arena_t* arena; + + // Linkage for the arena's tree of dirty chunks. + RedBlackTreeNode<arena_chunk_t> link_dirty; + +#ifdef MALLOC_DOUBLE_PURGE + // If we're double-purging, we maintain a linked list of chunks which + // have pages which have been madvise(MADV_FREE)'d but not explicitly + // purged. + // + // We're currently lazy and don't remove a chunk from this list when + // all its madvised pages are recommitted. + DoublyLinkedListElement<arena_chunk_t> chunks_madvised_elem; +#endif + + // Number of dirty pages. + size_t ndirty; + + // Map of pages within chunk that keeps track of free/large/small. + arena_chunk_map_t map[1]; // Dynamically sized. +}; + +// *************************************************************************** +// Constants defining allocator size classes and behavior. + +// Maximum size of L1 cache line. This is used to avoid cache line aliasing, +// so over-estimates are okay (up to a point), but under-estimates will +// negatively affect performance. +static const size_t kCacheLineSize = 64; + +// Our size classes are inclusive ranges of memory sizes. By describing the +// minimums and how memory is allocated in each range the maximums can be +// calculated. + +// Smallest size class to support. On Windows the smallest allocation size +// must be 8 bytes on 32-bit, 16 bytes on 64-bit. On Linux and Mac, even +// malloc(1) must reserve a word's worth of memory (see Mozilla bug 691003). +#ifdef XP_WIN +static const size_t kMinTinyClass = sizeof(void*) * 2; +#else +static const size_t kMinTinyClass = sizeof(void*); +#endif + +// Maximum tiny size class. +static const size_t kMaxTinyClass = 8; + +// Smallest quantum-spaced size classes. It could actually also be labelled a +// tiny allocation, and is spaced as such from the largest tiny size class. +// Tiny classes being powers of 2, this is twice as large as the largest of +// them. +static const size_t kMinQuantumClass = kMaxTinyClass * 2; +static const size_t kMinQuantumWideClass = 512; +static const size_t kMinSubPageClass = 4_KiB; + +// Amount (quantum) separating quantum-spaced size classes. +static const size_t kQuantum = 16; +static const size_t kQuantumMask = kQuantum - 1; +static const size_t kQuantumWide = 256; +static const size_t kQuantumWideMask = kQuantumWide - 1; + +static const size_t kMaxQuantumClass = kMinQuantumWideClass - kQuantum; +static const size_t kMaxQuantumWideClass = kMinSubPageClass - kQuantumWide; + +// We can optimise some divisions to shifts if these are powers of two. +static_assert(mozilla::IsPowerOfTwo(kQuantum), + "kQuantum is not a power of two"); +static_assert(mozilla::IsPowerOfTwo(kQuantumWide), + "kQuantumWide is not a power of two"); + +static_assert(kMaxQuantumClass % kQuantum == 0, + "kMaxQuantumClass is not a multiple of kQuantum"); +static_assert(kMaxQuantumWideClass % kQuantumWide == 0, + "kMaxQuantumWideClass is not a multiple of kQuantumWide"); +static_assert(kQuantum < kQuantumWide, + "kQuantum must be smaller than kQuantumWide"); +static_assert(mozilla::IsPowerOfTwo(kMinSubPageClass), + "kMinSubPageClass is not a power of two"); + +// Number of (2^n)-spaced tiny classes. +static const size_t kNumTinyClasses = + LOG2(kMaxTinyClass) - LOG2(kMinTinyClass) + 1; + +// Number of quantum-spaced classes. We add kQuantum(Max) before subtracting to +// avoid underflow when a class is empty (Max<Min). +static const size_t kNumQuantumClasses = + (kMaxQuantumClass + kQuantum - kMinQuantumClass) / kQuantum; +static const size_t kNumQuantumWideClasses = + (kMaxQuantumWideClass + kQuantumWide - kMinQuantumWideClass) / kQuantumWide; + +// Size and alignment of memory chunks that are allocated by the OS's virtual +// memory system. +static const size_t kChunkSize = 1_MiB; +static const size_t kChunkSizeMask = kChunkSize - 1; + +#ifdef MALLOC_STATIC_PAGESIZE +// VM page size. It must divide the runtime CPU page size or the code +// will abort. +// Platform specific page size conditions copied from js/public/HeapAPI.h +# if defined(__powerpc64__) +static const size_t gPageSize = 64_KiB; +# elif defined(__loongarch64) +static const size_t gPageSize = 16_KiB; +# else +static const size_t gPageSize = 4_KiB; +# endif +static const size_t gRealPageSize = gPageSize; + +#else +// When MALLOC_OPTIONS contains one or several `P`s, the page size used +// across the allocator is multiplied by 2 for each `P`, but we also keep +// the real page size for code paths that need it. gPageSize is thus a +// power of two greater or equal to gRealPageSize. +static size_t gRealPageSize; +static size_t gPageSize; +#endif + +#ifdef MALLOC_STATIC_PAGESIZE +# define DECLARE_GLOBAL(type, name) +# define DEFINE_GLOBALS +# define END_GLOBALS +# define DEFINE_GLOBAL(type) static const type +# define GLOBAL_LOG2 LOG2 +# define GLOBAL_ASSERT_HELPER1(x) static_assert(x, #x) +# define GLOBAL_ASSERT_HELPER2(x, y) static_assert(x, y) +# define GLOBAL_ASSERT(...) \ + MACRO_CALL( \ + MOZ_PASTE_PREFIX_AND_ARG_COUNT(GLOBAL_ASSERT_HELPER, __VA_ARGS__), \ + (__VA_ARGS__)) +# define GLOBAL_CONSTEXPR constexpr +#else +# define DECLARE_GLOBAL(type, name) static type name; +# define DEFINE_GLOBALS static void DefineGlobals() { +# define END_GLOBALS } +# define DEFINE_GLOBAL(type) +# define GLOBAL_LOG2 FloorLog2 +# define GLOBAL_ASSERT MOZ_RELEASE_ASSERT +# define GLOBAL_CONSTEXPR +#endif + +DECLARE_GLOBAL(size_t, gMaxSubPageClass) +DECLARE_GLOBAL(uint8_t, gNumSubPageClasses) +DECLARE_GLOBAL(uint8_t, gPageSize2Pow) +DECLARE_GLOBAL(size_t, gPageSizeMask) +DECLARE_GLOBAL(size_t, gChunkNumPages) +DECLARE_GLOBAL(size_t, gChunkHeaderNumPages) +DECLARE_GLOBAL(size_t, gMaxLargeClass) + +DEFINE_GLOBALS + +// Largest sub-page size class, or zero if there are none +DEFINE_GLOBAL(size_t) +gMaxSubPageClass = gPageSize / 2 >= kMinSubPageClass ? gPageSize / 2 : 0; + +// Max size class for bins. +#define gMaxBinClass \ + (gMaxSubPageClass ? gMaxSubPageClass : kMaxQuantumWideClass) + +// Number of sub-page bins. +DEFINE_GLOBAL(uint8_t) +gNumSubPageClasses = []() GLOBAL_CONSTEXPR -> uint8_t { + if GLOBAL_CONSTEXPR (gMaxSubPageClass != 0) { + return FloorLog2(gMaxSubPageClass) - LOG2(kMinSubPageClass) + 1; + } + return 0; +}(); + +DEFINE_GLOBAL(uint8_t) gPageSize2Pow = GLOBAL_LOG2(gPageSize); +DEFINE_GLOBAL(size_t) gPageSizeMask = gPageSize - 1; + +// Number of pages in a chunk. +DEFINE_GLOBAL(size_t) gChunkNumPages = kChunkSize >> gPageSize2Pow; + +// Number of pages necessary for a chunk header plus a guard page. +DEFINE_GLOBAL(size_t) +gChunkHeaderNumPages = + 1 + (((sizeof(arena_chunk_t) + + sizeof(arena_chunk_map_t) * (gChunkNumPages - 1) + gPageSizeMask) & + ~gPageSizeMask) >> + gPageSize2Pow); + +// One chunk, minus the header, minus a guard page +DEFINE_GLOBAL(size_t) +gMaxLargeClass = + kChunkSize - gPageSize - (gChunkHeaderNumPages << gPageSize2Pow); + +// Various sanity checks that regard configuration. +GLOBAL_ASSERT(1ULL << gPageSize2Pow == gPageSize, + "Page size is not a power of two"); +GLOBAL_ASSERT(kQuantum >= sizeof(void*)); +GLOBAL_ASSERT(kQuantum <= kQuantumWide); +GLOBAL_ASSERT(!kNumQuantumWideClasses || + kQuantumWide <= (kMinSubPageClass - kMaxQuantumClass)); + +GLOBAL_ASSERT(kQuantumWide <= kMaxQuantumClass); + +GLOBAL_ASSERT(gMaxSubPageClass >= kMinSubPageClass || gMaxSubPageClass == 0); +GLOBAL_ASSERT(gMaxLargeClass >= gMaxSubPageClass); +GLOBAL_ASSERT(kChunkSize >= gPageSize); +GLOBAL_ASSERT(kQuantum * 4 <= kChunkSize); + +END_GLOBALS + +// Recycle at most 128 MiB of chunks. This means we retain at most +// 6.25% of the process address space on a 32-bit OS for later use. +static const size_t gRecycleLimit = 128_MiB; + +// The current amount of recycled bytes, updated atomically. +static Atomic<size_t, ReleaseAcquire> gRecycledSize; + +// Maximum number of dirty pages per arena. +#define DIRTY_MAX_DEFAULT (1U << 8) + +static size_t opt_dirty_max = DIRTY_MAX_DEFAULT; + +// Return the smallest chunk multiple that is >= s. +#define CHUNK_CEILING(s) (((s) + kChunkSizeMask) & ~kChunkSizeMask) + +// Return the smallest cacheline multiple that is >= s. +#define CACHELINE_CEILING(s) \ + (((s) + (kCacheLineSize - 1)) & ~(kCacheLineSize - 1)) + +// Return the smallest quantum multiple that is >= a. +#define QUANTUM_CEILING(a) (((a) + (kQuantumMask)) & ~(kQuantumMask)) +#define QUANTUM_WIDE_CEILING(a) \ + (((a) + (kQuantumWideMask)) & ~(kQuantumWideMask)) + +// Return the smallest sub page-size that is >= a. +#define SUBPAGE_CEILING(a) (RoundUpPow2(a)) + +// Return the smallest pagesize multiple that is >= s. +#define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask) + +// Number of all the small-allocated classes +#define NUM_SMALL_CLASSES \ + (kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses + \ + gNumSubPageClasses) + +// *************************************************************************** +// MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive. +#if defined(MALLOC_DECOMMIT) && defined(MALLOC_DOUBLE_PURGE) +# error MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive. +#endif + +static void* base_alloc(size_t aSize); + +// Set to true once the allocator has been initialized. +#if defined(_MSC_VER) && !defined(__clang__) +// MSVC may create a static initializer for an Atomic<bool>, which may actually +// run after `malloc_init` has been called once, which triggers multiple +// initializations. +// We work around the problem by not using an Atomic<bool> at all. There is a +// theoretical problem with using `malloc_initialized` non-atomically, but +// practically, this is only true if `malloc_init` is never called before +// threads are created. +static bool malloc_initialized; +#else +static Atomic<bool, MemoryOrdering::ReleaseAcquire> malloc_initialized; +#endif + +static StaticMutex gInitLock MOZ_UNANNOTATED = {STATIC_MUTEX_INIT}; + +// *************************************************************************** +// Statistics data structures. + +struct arena_stats_t { + // Number of bytes currently mapped. + size_t mapped; + + // Current number of committed pages. + size_t committed; + + // Per-size-category statistics. + size_t allocated_small; + + size_t allocated_large; +}; + +// *************************************************************************** +// Extent data structures. + +enum ChunkType { + UNKNOWN_CHUNK, + ZEROED_CHUNK, // chunk only contains zeroes. + ARENA_CHUNK, // used to back arena runs created by arena_t::AllocRun. + HUGE_CHUNK, // used to back huge allocations (e.g. arena_t::MallocHuge). + RECYCLED_CHUNK, // chunk has been stored for future use by chunk_recycle. +}; + +// Tree of extents. +struct extent_node_t { + union { + // Linkage for the size/address-ordered tree for chunk recycling. + RedBlackTreeNode<extent_node_t> mLinkBySize; + // Arena id for huge allocations. It's meant to match mArena->mId, + // which only holds true when the arena hasn't been disposed of. + arena_id_t mArenaId; + }; + + // Linkage for the address-ordered tree. + RedBlackTreeNode<extent_node_t> mLinkByAddr; + + // Pointer to the extent that this tree node is responsible for. + void* mAddr; + + // Total region size. + size_t mSize; + + union { + // What type of chunk is there; used for chunk recycling. + ChunkType mChunkType; + + // A pointer to the associated arena, for huge allocations. + arena_t* mArena; + }; +}; + +struct ExtentTreeSzTrait { + static RedBlackTreeNode<extent_node_t>& GetTreeNode(extent_node_t* aThis) { + return aThis->mLinkBySize; + } + + static inline Order Compare(extent_node_t* aNode, extent_node_t* aOther) { + Order ret = CompareInt(aNode->mSize, aOther->mSize); + return (ret != Order::eEqual) ? ret + : CompareAddr(aNode->mAddr, aOther->mAddr); + } +}; + +struct ExtentTreeTrait { + static RedBlackTreeNode<extent_node_t>& GetTreeNode(extent_node_t* aThis) { + return aThis->mLinkByAddr; + } + + static inline Order Compare(extent_node_t* aNode, extent_node_t* aOther) { + return CompareAddr(aNode->mAddr, aOther->mAddr); + } +}; + +struct ExtentTreeBoundsTrait : public ExtentTreeTrait { + static inline Order Compare(extent_node_t* aKey, extent_node_t* aNode) { + uintptr_t key_addr = reinterpret_cast<uintptr_t>(aKey->mAddr); + uintptr_t node_addr = reinterpret_cast<uintptr_t>(aNode->mAddr); + size_t node_size = aNode->mSize; + + // Is aKey within aNode? + if (node_addr <= key_addr && key_addr < node_addr + node_size) { + return Order::eEqual; + } + + return CompareAddr(aKey->mAddr, aNode->mAddr); + } +}; + +// Describe size classes to which allocations are rounded up to. +// TODO: add large and huge types when the arena allocation code +// changes in a way that allows it to be beneficial. +class SizeClass { + public: + enum ClassType { + Tiny, + Quantum, + QuantumWide, + SubPage, + Large, + }; + + explicit inline SizeClass(size_t aSize) { + if (aSize <= kMaxTinyClass) { + mType = Tiny; + mSize = std::max(RoundUpPow2(aSize), kMinTinyClass); + } else if (aSize <= kMaxQuantumClass) { + mType = Quantum; + mSize = QUANTUM_CEILING(aSize); + } else if (aSize <= kMaxQuantumWideClass) { + mType = QuantumWide; + mSize = QUANTUM_WIDE_CEILING(aSize); + } else if (aSize <= gMaxSubPageClass) { + mType = SubPage; + mSize = SUBPAGE_CEILING(aSize); + } else if (aSize <= gMaxLargeClass) { + mType = Large; + mSize = PAGE_CEILING(aSize); + } else { + MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Invalid size"); + } + } + + SizeClass& operator=(const SizeClass& aOther) = default; + + bool operator==(const SizeClass& aOther) { return aOther.mSize == mSize; } + + size_t Size() { return mSize; } + + ClassType Type() { return mType; } + + SizeClass Next() { return SizeClass(mSize + 1); } + + private: + ClassType mType; + size_t mSize; +}; + +// Fast division +// +// During deallocation we want to divide by the size class. This class +// provides a routine and sets up a constant as follows. +// +// To divide by a number D that is not a power of two we multiply by (2^17 / +// D) and then right shift by 17 positions. +// +// X / D +// +// becomes +// +// (X * m) >> p +// +// Where m is calculated during the FastDivisor constructor similarly to: +// +// m = 2^p / D +// +template <typename T> +class FastDivisor { + private: + // The shift amount (p) is chosen to minimise the size of m while + // working for divisors up to 65536 in steps of 16. I arrived at 17 + // experimentally. I wanted a low number to minimise the range of m + // so it can fit in a uint16_t, 16 didn't work but 17 worked perfectly. + // + // We'd need to increase this if we allocated memory on smaller boundaries + // than 16. + static const unsigned p = 17; + + // We can fit the inverted divisor in 16 bits, but we template it here for + // convenience. + T m; + + public: + // Needed so mBins can be constructed. + FastDivisor() : m(0) {} + + FastDivisor(unsigned div, unsigned max) { + MOZ_ASSERT(div <= max); + + // divide_inv_shift is large enough. + MOZ_ASSERT((1U << p) >= div); + + // The calculation here for m is formula 26 from Section + // 10-9 "Unsigned Division by Divisors >= 1" in + // Henry S. Warren, Jr.'s Hacker's Delight, 2nd Ed. + unsigned m_ = ((1U << p) + div - 1 - (((1U << p) - 1) % div)) / div; + + // Make sure that max * m does not overflow. + MOZ_DIAGNOSTIC_ASSERT(max < UINT_MAX / m_); + + MOZ_ASSERT(m_ <= std::numeric_limits<T>::max()); + m = static_cast<T>(m_); + + // Initialisation made m non-zero. + MOZ_ASSERT(m); + + // Test that all the divisions in the range we expected would work. +#ifdef MOZ_DEBUG + for (unsigned num = 0; num < max; num += div) { + MOZ_ASSERT(num / div == divide(num)); + } +#endif + } + + // Note that this always occurs in uint32_t regardless of m's type. If m is + // a uint16_t it will be zero-extended before the multiplication. We also use + // uint32_t rather than something that could possibly be larger because it is + // most-likely the cheapest multiplication. + inline uint32_t divide(uint32_t num) const { + // Check that m was initialised. + MOZ_ASSERT(m); + return (num * m) >> p; + } +}; + +template <typename T> +unsigned inline operator/(unsigned num, FastDivisor<T> divisor) { + return divisor.divide(num); +} + +// *************************************************************************** +// Radix tree data structures. +// +// The number of bits passed to the template is the number of significant bits +// in an address to do a radix lookup with. +// +// An address is looked up by splitting it in kBitsPerLevel bit chunks, except +// the most significant bits, where the bit chunk is kBitsAtLevel1 which can be +// different if Bits is not a multiple of kBitsPerLevel. +// +// With e.g. sizeof(void*)=4, Bits=16 and kBitsPerLevel=8, an address is split +// like the following: +// 0x12345678 -> mRoot[0x12][0x34] +template <size_t Bits> +class AddressRadixTree { +// Size of each radix tree node (as a power of 2). +// This impacts tree depth. +#ifdef HAVE_64BIT_BUILD + static const size_t kNodeSize = kCacheLineSize; +#else + static const size_t kNodeSize = 16_KiB; +#endif + static const size_t kBitsPerLevel = LOG2(kNodeSize) - LOG2(sizeof(void*)); + static const size_t kBitsAtLevel1 = + (Bits % kBitsPerLevel) ? Bits % kBitsPerLevel : kBitsPerLevel; + static const size_t kHeight = (Bits + kBitsPerLevel - 1) / kBitsPerLevel; + static_assert(kBitsAtLevel1 + (kHeight - 1) * kBitsPerLevel == Bits, + "AddressRadixTree parameters don't work out"); + + Mutex mLock MOZ_UNANNOTATED; + void** mRoot; + + public: + bool Init(); + + inline void* Get(void* aAddr); + + // Returns whether the value was properly set. + inline bool Set(void* aAddr, void* aValue); + + inline bool Unset(void* aAddr) { return Set(aAddr, nullptr); } + + private: + inline void** GetSlot(void* aAddr, bool aCreate = false); +}; + +// *************************************************************************** +// Arena data structures. + +struct arena_bin_t; + +struct ArenaChunkMapLink { + static RedBlackTreeNode<arena_chunk_map_t>& GetTreeNode( + arena_chunk_map_t* aThis) { + return aThis->link; + } +}; + +struct ArenaRunTreeTrait : public ArenaChunkMapLink { + static inline Order Compare(arena_chunk_map_t* aNode, + arena_chunk_map_t* aOther) { + MOZ_ASSERT(aNode); + MOZ_ASSERT(aOther); + return CompareAddr(aNode, aOther); + } +}; + +struct ArenaAvailTreeTrait : public ArenaChunkMapLink { + static inline Order Compare(arena_chunk_map_t* aNode, + arena_chunk_map_t* aOther) { + size_t size1 = aNode->bits & ~gPageSizeMask; + size_t size2 = aOther->bits & ~gPageSizeMask; + Order ret = CompareInt(size1, size2); + return (ret != Order::eEqual) + ? ret + : CompareAddr((aNode->bits & CHUNK_MAP_KEY) ? nullptr : aNode, + aOther); + } +}; + +struct ArenaDirtyChunkTrait { + static RedBlackTreeNode<arena_chunk_t>& GetTreeNode(arena_chunk_t* aThis) { + return aThis->link_dirty; + } + + static inline Order Compare(arena_chunk_t* aNode, arena_chunk_t* aOther) { + MOZ_ASSERT(aNode); + MOZ_ASSERT(aOther); + return CompareAddr(aNode, aOther); + } +}; + +#ifdef MALLOC_DOUBLE_PURGE +namespace mozilla { + +template <> +struct GetDoublyLinkedListElement<arena_chunk_t> { + static DoublyLinkedListElement<arena_chunk_t>& Get(arena_chunk_t* aThis) { + return aThis->chunks_madvised_elem; + } +}; +} // namespace mozilla +#endif + +struct arena_run_t { +#if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED) + uint32_t mMagic; +# define ARENA_RUN_MAGIC 0x384adf93 + + // On 64-bit platforms, having the arena_bin_t pointer following + // the mMagic field means there's padding between both fields, making + // the run header larger than necessary. + // But when MOZ_DIAGNOSTIC_ASSERT_ENABLED is not set, starting the + // header with this field followed by the arena_bin_t pointer yields + // the same padding. We do want the mMagic field to appear first, so + // depending whether MOZ_DIAGNOSTIC_ASSERT_ENABLED is set or not, we + // move some field to avoid padding. + + // Number of free regions in run. + unsigned mNumFree; +#endif + + // Bin this run is associated with. + arena_bin_t* mBin; + + // Index of first element that might have a free region. + unsigned mRegionsMinElement; + +#if !defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED) + // Number of free regions in run. + unsigned mNumFree; +#endif + + // Bitmask of in-use regions (0: in use, 1: free). + unsigned mRegionsMask[1]; // Dynamically sized. +}; + +struct arena_bin_t { + // Current run being used to service allocations of this bin's size + // class. + arena_run_t* mCurrentRun; + + // Tree of non-full runs. This tree is used when looking for an + // existing run when mCurrentRun is no longer usable. We choose the + // non-full run that is lowest in memory; this policy tends to keep + // objects packed well, and it can also help reduce the number of + // almost-empty chunks. + RedBlackTree<arena_chunk_map_t, ArenaRunTreeTrait> mNonFullRuns; + + // Bin's size class. + size_t mSizeClass; + + // Total number of regions in a run for this bin's size class. + uint32_t mRunNumRegions; + + // Number of elements in a run's mRegionsMask for this bin's size class. + uint32_t mRunNumRegionsMask; + + // Offset of first region in a run for this bin's size class. + uint32_t mRunFirstRegionOffset; + + // Current number of runs in this bin, full or otherwise. + uint32_t mNumRuns; + + // A constant for fast division by size class. This value is 16 bits wide so + // it is placed last. + FastDivisor<uint16_t> mSizeDivisor; + + // Total number of pages in a run for this bin's size class. + uint8_t mRunSizePages; + + // Amount of overhead runs are allowed to have. + static constexpr double kRunOverhead = 1.6_percent; + static constexpr double kRunRelaxedOverhead = 2.4_percent; + + // Initialize a bin for the given size class. + // The generated run sizes, for a page size of 4 KiB, are: + // size|run size|run size|run size|run + // class|size class|size class|size class|size + // 4 4 KiB 8 4 KiB 16 4 KiB 32 4 KiB + // 48 4 KiB 64 4 KiB 80 4 KiB 96 4 KiB + // 112 4 KiB 128 8 KiB 144 4 KiB 160 8 KiB + // 176 4 KiB 192 4 KiB 208 8 KiB 224 4 KiB + // 240 8 KiB 256 16 KiB 272 8 KiB 288 4 KiB + // 304 12 KiB 320 12 KiB 336 4 KiB 352 8 KiB + // 368 4 KiB 384 8 KiB 400 20 KiB 416 16 KiB + // 432 12 KiB 448 4 KiB 464 16 KiB 480 8 KiB + // 496 20 KiB 512 32 KiB 768 16 KiB 1024 64 KiB + // 1280 24 KiB 1536 32 KiB 1792 16 KiB 2048 128 KiB + // 2304 16 KiB 2560 48 KiB 2816 36 KiB 3072 64 KiB + // 3328 36 KiB 3584 32 KiB 3840 64 KiB + inline void Init(SizeClass aSizeClass); +}; + +// We try to keep the above structure aligned with common cache lines sizes, +// often that's 64 bytes on x86 and ARM, we don't make assumptions for other +// architectures. +#if defined(__x86_64__) || defined(__aarch64__) +// On 64bit platforms this structure is often 48 bytes +// long, which means every other array element will be properly aligned. +static_assert(sizeof(arena_bin_t) == 48); +#elif defined(__x86__) || defined(__arm__) +static_assert(sizeof(arena_bin_t) == 32); +#endif + +struct arena_t { +#if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED) + uint32_t mMagic; +# define ARENA_MAGIC 0x947d3d24 +#endif + + // Linkage for the tree of arenas by id. + RedBlackTreeNode<arena_t> mLink; + + // Arena id, that we keep away from the beginning of the struct so that + // free list pointers in TypedBaseAlloc<arena_t> don't overflow in it, + // and it keeps the value it had after the destructor. + arena_id_t mId; + + // All operations on this arena require that lock be locked. The MaybeMutex + // class well elude locking if the arena is accessed from a single thread + // only. + MaybeMutex mLock MOZ_UNANNOTATED; + + arena_stats_t mStats; + + private: + // Tree of dirty-page-containing chunks this arena manages. + RedBlackTree<arena_chunk_t, ArenaDirtyChunkTrait> mChunksDirty; + +#ifdef MALLOC_DOUBLE_PURGE + // Head of a linked list of MADV_FREE'd-page-containing chunks this + // arena manages. + DoublyLinkedList<arena_chunk_t> mChunksMAdvised; +#endif + + // In order to avoid rapid chunk allocation/deallocation when an arena + // oscillates right on the cusp of needing a new chunk, cache the most + // recently freed chunk. The spare is left in the arena's chunk trees + // until it is deleted. + // + // There is one spare chunk per arena, rather than one spare total, in + // order to avoid interactions between multiple threads that could make + // a single spare inadequate. + arena_chunk_t* mSpare; + + // A per-arena opt-in to randomize the offset of small allocations + bool mRandomizeSmallAllocations; + + // Whether this is a private arena. Multiple public arenas are just a + // performance optimization and not a safety feature. + // + // Since, for example, we don't want thread-local arenas to grow too much, we + // use the default arena for bigger allocations. We use this member to allow + // realloc() to switch out of our arena if needed (which is not allowed for + // private arenas for security). + bool mIsPrivate; + + // A pseudorandom number generator. Initially null, it gets initialized + // on first use to avoid recursive malloc initialization (e.g. on OSX + // arc4random allocates memory). + mozilla::non_crypto::XorShift128PlusRNG* mPRNG; + + public: + // Current count of pages within unused runs that are potentially + // dirty, and for which madvise(... MADV_FREE) has not been called. By + // tracking this, we can institute a limit on how much dirty unused + // memory is mapped for each arena. + size_t mNumDirty; + + // Maximum value allowed for mNumDirty. + size_t mMaxDirty; + + int32_t mMaxDirtyIncreaseOverride; + int32_t mMaxDirtyDecreaseOverride; + + private: + // Size/address-ordered tree of this arena's available runs. This tree + // is used for first-best-fit run allocation. + RedBlackTree<arena_chunk_map_t, ArenaAvailTreeTrait> mRunsAvail; + + public: + // mBins is used to store rings of free regions of the following sizes, + // assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS. + // + // | mBins[i] | size | + // +----------+------+ + // | 0 | 2 | + // | 1 | 4 | + // | 2 | 8 | + // +----------+------+ + // | 3 | 16 | + // | 4 | 32 | + // | 5 | 48 | + // | 6 | 64 | + // | : : + // | : : + // | 33 | 496 | + // | 34 | 512 | + // +----------+------+ + // | 35 | 768 | + // | 36 | 1024 | + // | : : + // | : : + // | 46 | 3584 | + // | 47 | 3840 | + // +----------+------+ + arena_bin_t mBins[1]; // Dynamically sized. + + explicit arena_t(arena_params_t* aParams, bool aIsPrivate); + ~arena_t(); + + private: + void InitChunk(arena_chunk_t* aChunk); + + // This may return a chunk that should be destroyed with chunk_dealloc outside + // of the arena lock. It is not the same chunk as was passed in (since that + // chunk now becomes mSpare). + [[nodiscard]] arena_chunk_t* DeallocChunk(arena_chunk_t* aChunk); + + arena_run_t* AllocRun(size_t aSize, bool aLarge, bool aZero); + + arena_chunk_t* DallocRun(arena_run_t* aRun, bool aDirty); + + [[nodiscard]] bool SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge, + bool aZero); + + void TrimRunHead(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize, + size_t aNewSize); + + void TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize, + size_t aNewSize, bool dirty); + + arena_run_t* GetNonFullBinRun(arena_bin_t* aBin); + + inline uint8_t FindFreeBitInMask(uint32_t aMask, uint32_t& aRng); + + inline void* ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin); + + inline void* MallocSmall(size_t aSize, bool aZero); + + void* MallocLarge(size_t aSize, bool aZero); + + void* MallocHuge(size_t aSize, bool aZero); + + void* PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize); + + void* PallocHuge(size_t aSize, size_t aAlignment, bool aZero); + + void RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize, + size_t aOldSize); + + bool RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize, + size_t aOldSize); + + void* RallocSmallOrLarge(void* aPtr, size_t aSize, size_t aOldSize); + + void* RallocHuge(void* aPtr, size_t aSize, size_t aOldSize); + + public: + inline void* Malloc(size_t aSize, bool aZero); + + void* Palloc(size_t aAlignment, size_t aSize); + + // This may return a chunk that should be destroyed with chunk_dealloc outside + // of the arena lock. It is not the same chunk as was passed in (since that + // chunk now becomes mSpare). + [[nodiscard]] inline arena_chunk_t* DallocSmall(arena_chunk_t* aChunk, + void* aPtr, + arena_chunk_map_t* aMapElm); + + [[nodiscard]] arena_chunk_t* DallocLarge(arena_chunk_t* aChunk, void* aPtr); + + void* Ralloc(void* aPtr, size_t aSize, size_t aOldSize); + + size_t EffectiveMaxDirty(); + + // Passing one means purging all. + void Purge(size_t aMaxDirty); + + void HardPurge(); + + bool IsMainThreadOnly() const { return !mLock.LockIsEnabled(); } + + void* operator new(size_t aCount) = delete; + + void* operator new(size_t aCount, const fallible_t&) noexcept; + + void operator delete(void*); +}; + +struct ArenaTreeTrait { + static RedBlackTreeNode<arena_t>& GetTreeNode(arena_t* aThis) { + return aThis->mLink; + } + + static inline Order Compare(arena_t* aNode, arena_t* aOther) { + MOZ_ASSERT(aNode); + MOZ_ASSERT(aOther); + return CompareInt(aNode->mId, aOther->mId); + } +}; + +// Bookkeeping for all the arenas used by the allocator. +// Arenas are separated in two categories: +// - "private" arenas, used through the moz_arena_* API +// - all the other arenas: the default arena, and thread-local arenas, +// used by the standard API. +class ArenaCollection { + public: + bool Init() { + mArenas.Init(); + mPrivateArenas.Init(); + mMainThreadArenas.Init(); + arena_params_t params; + // The main arena allows more dirty pages than the default for other arenas. + params.mMaxDirty = opt_dirty_max; + mDefaultArena = + mLock.Init() ? CreateArena(/* aIsPrivate = */ false, ¶ms) : nullptr; + return bool(mDefaultArena); + } + + inline arena_t* GetById(arena_id_t aArenaId, bool aIsPrivate); + + arena_t* CreateArena(bool aIsPrivate, arena_params_t* aParams); + + void DisposeArena(arena_t* aArena) { + MutexAutoLock lock(mLock); + Tree& tree = + aArena->IsMainThreadOnly() ? mMainThreadArenas : mPrivateArenas; + + MOZ_RELEASE_ASSERT(tree.Search(aArena), "Arena not in tree"); + tree.Remove(aArena); + delete aArena; + } + + void SetDefaultMaxDirtyPageModifier(int32_t aModifier) { + mDefaultMaxDirtyPageModifier = aModifier; + } + int32_t DefaultMaxDirtyPageModifier() { return mDefaultMaxDirtyPageModifier; } + + using Tree = RedBlackTree<arena_t, ArenaTreeTrait>; + + struct Iterator : Tree::Iterator { + explicit Iterator(Tree* aTree, Tree* aSecondTree, + Tree* aThirdTree = nullptr) + : Tree::Iterator(aTree), + mSecondTree(aSecondTree), + mThirdTree(aThirdTree) {} + + Item<Iterator> begin() { + return Item<Iterator>(this, *Tree::Iterator::begin()); + } + + Item<Iterator> end() { return Item<Iterator>(this, nullptr); } + + arena_t* Next() { + arena_t* result = Tree::Iterator::Next(); + if (!result && mSecondTree) { + new (this) Iterator(mSecondTree, mThirdTree); + result = *Tree::Iterator::begin(); + } + return result; + } + + private: + Tree* mSecondTree; + Tree* mThirdTree; + }; + + Iterator iter() { + if (IsOnMainThreadWeak()) { + return Iterator(&mArenas, &mPrivateArenas, &mMainThreadArenas); + } + return Iterator(&mArenas, &mPrivateArenas); + } + + inline arena_t* GetDefault() { return mDefaultArena; } + + Mutex mLock MOZ_UNANNOTATED; + + // We're running on the main thread which is set by a call to SetMainThread(). + bool IsOnMainThread() const { + return mMainThreadId.isSome() && mMainThreadId.value() == GetThreadId(); + } + + // We're running on the main thread or SetMainThread() has never been called. + bool IsOnMainThreadWeak() const { + return mMainThreadId.isNothing() || IsOnMainThread(); + } + + // After a fork set the new thread ID in the child. + void PostForkFixMainThread() { + if (mMainThreadId.isSome()) { + // Only if the main thread has been defined. + mMainThreadId = Some(GetThreadId()); + } + } + + void SetMainThread() { + MutexAutoLock lock(mLock); + MOZ_ASSERT(mMainThreadId.isNothing()); + mMainThreadId = Some(GetThreadId()); + } + + private: + const static arena_id_t MAIN_THREAD_ARENA_BIT = 0x1; + + inline arena_t* GetByIdInternal(Tree& aTree, arena_id_t aArenaId); + + arena_id_t MakeRandArenaId(bool aIsMainThreadOnly) const; + static bool ArenaIdIsMainThreadOnly(arena_id_t aArenaId) { + return aArenaId & MAIN_THREAD_ARENA_BIT; + } + + arena_t* mDefaultArena; + arena_id_t mLastPublicArenaId; + + // Accessing mArenas and mPrivateArenas can only be done while holding mLock. + // Since mMainThreadArenas can only be used from the main thread, it can be + // accessed without a lock which is why it is a seperate tree. + Tree mArenas; + Tree mPrivateArenas; + Tree mMainThreadArenas; + Atomic<int32_t, MemoryOrdering::Relaxed> mDefaultMaxDirtyPageModifier; + Maybe<ThreadId> mMainThreadId; +}; + +static ArenaCollection gArenas; + +// ****** +// Chunks. +static AddressRadixTree<(sizeof(void*) << 3) - LOG2(kChunkSize)> gChunkRTree; + +// Protects chunk-related data structures. +static Mutex chunks_mtx; + +// Trees of chunks that were previously allocated (trees differ only in node +// ordering). These are used when allocating chunks, in an attempt to re-use +// address space. Depending on function, different tree orderings are needed, +// which is why there are two trees with the same contents. +static RedBlackTree<extent_node_t, ExtentTreeSzTrait> gChunksBySize + MOZ_GUARDED_BY(chunks_mtx); +static RedBlackTree<extent_node_t, ExtentTreeTrait> gChunksByAddress + MOZ_GUARDED_BY(chunks_mtx); + +// Protects huge allocation-related data structures. +static Mutex huge_mtx; + +// Tree of chunks that are stand-alone huge allocations. +static RedBlackTree<extent_node_t, ExtentTreeTrait> huge + MOZ_GUARDED_BY(huge_mtx); + +// Huge allocation statistics. +static size_t huge_allocated MOZ_GUARDED_BY(huge_mtx); +static size_t huge_mapped MOZ_GUARDED_BY(huge_mtx); + +// ************************** +// base (internal allocation). + +static Mutex base_mtx; + +// Current pages that are being used for internal memory allocations. These +// pages are carved up in cacheline-size quanta, so that there is no chance of +// false cache line sharing. +static void* base_pages MOZ_GUARDED_BY(base_mtx); +static void* base_next_addr MOZ_GUARDED_BY(base_mtx); +static void* base_next_decommitted MOZ_GUARDED_BY(base_mtx); +// Address immediately past base_pages. +static void* base_past_addr MOZ_GUARDED_BY(base_mtx); +static size_t base_mapped MOZ_GUARDED_BY(base_mtx); +static size_t base_committed MOZ_GUARDED_BY(base_mtx); + +// ****** +// Arenas. + +// The arena associated with the current thread (per +// jemalloc_thread_local_arena) On OSX, __thread/thread_local circles back +// calling malloc to allocate storage on first access on each thread, which +// leads to an infinite loop, but pthread-based TLS somehow doesn't have this +// problem. +#if !defined(XP_DARWIN) +static MOZ_THREAD_LOCAL(arena_t*) thread_arena; +#else +static detail::ThreadLocal<arena_t*, detail::ThreadLocalKeyStorage> + thread_arena; +#endif + +// ***************************** +// Runtime configuration options. + +#ifdef MALLOC_RUNTIME_CONFIG +# define MALLOC_RUNTIME_VAR static +#else +# define MALLOC_RUNTIME_VAR static const +#endif + +enum PoisonType { + NONE, + SOME, + ALL, +}; + +MALLOC_RUNTIME_VAR bool opt_junk = false; +MALLOC_RUNTIME_VAR bool opt_zero = false; + +#ifdef EARLY_BETA_OR_EARLIER +MALLOC_RUNTIME_VAR PoisonType opt_poison = ALL; +#else +MALLOC_RUNTIME_VAR PoisonType opt_poison = SOME; +#endif + +MALLOC_RUNTIME_VAR size_t opt_poison_size = kCacheLineSize * 4; + +static bool opt_randomize_small = true; + +// *************************************************************************** +// Begin forward declarations. + +static void* chunk_alloc(size_t aSize, size_t aAlignment, bool aBase); +static void chunk_dealloc(void* aChunk, size_t aSize, ChunkType aType); +#ifdef MOZ_DEBUG +static void chunk_assert_zero(void* aPtr, size_t aSize); +#endif +static void huge_dalloc(void* aPtr, arena_t* aArena); +static bool malloc_init_hard(); + +#ifndef XP_WIN +# ifdef XP_DARWIN +# define FORK_HOOK extern "C" +# else +# define FORK_HOOK static +# endif +FORK_HOOK void _malloc_prefork(void); +FORK_HOOK void _malloc_postfork_parent(void); +FORK_HOOK void _malloc_postfork_child(void); +#endif + +// End forward declarations. +// *************************************************************************** + +// FreeBSD's pthreads implementation calls malloc(3), so the malloc +// implementation has to take pains to avoid infinite recursion during +// initialization. +// Returns whether the allocator was successfully initialized. +static inline bool malloc_init() { + if (!malloc_initialized) { + return malloc_init_hard(); + } + return true; +} + +static void _malloc_message(const char* p) { +#if !defined(XP_WIN) +# define _write write +#endif + // Pretend to check _write() errors to suppress gcc warnings about + // warn_unused_result annotations in some versions of glibc headers. + if (_write(STDERR_FILENO, p, (unsigned int)strlen(p)) < 0) { + return; + } +} + +template <typename... Args> +static void _malloc_message(const char* p, Args... args) { + _malloc_message(p); + _malloc_message(args...); +} + +#ifdef ANDROID +// Android's pthread.h does not declare pthread_atfork() until SDK 21. +extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void), + void (*)(void)); +#endif + +// *************************************************************************** +// Begin Utility functions/macros. + +// Return the chunk address for allocation address a. +static inline arena_chunk_t* GetChunkForPtr(const void* aPtr) { + return (arena_chunk_t*)(uintptr_t(aPtr) & ~kChunkSizeMask); +} + +// Return the chunk offset of address a. +static inline size_t GetChunkOffsetForPtr(const void* aPtr) { + return (size_t)(uintptr_t(aPtr) & kChunkSizeMask); +} + +static inline const char* _getprogname(void) { return "<jemalloc>"; } + +static inline void MaybePoison(void* aPtr, size_t aSize) { + size_t size; + switch (opt_poison) { + case NONE: + return; + case SOME: + size = std::min(aSize, opt_poison_size); + break; + case ALL: + size = aSize; + break; + } + MOZ_ASSERT(size != 0 && size <= aSize); + memset(aPtr, kAllocPoison, size); +} + +// Fill the given range of memory with zeroes or junk depending on opt_junk and +// opt_zero. +static inline void ApplyZeroOrJunk(void* aPtr, size_t aSize) { + if (opt_junk) { + memset(aPtr, kAllocJunk, aSize); + } else if (opt_zero) { + memset(aPtr, 0, aSize); + } +} + +// On Windows, delay crashing on OOM. +#ifdef XP_WIN + +// Implementation of VirtualAlloc wrapper (bug 1716727). +namespace MozAllocRetries { + +// Maximum retry count on OOM. +constexpr size_t kMaxAttempts = 10; +// Minimum delay time between retries. (The actual delay time may be larger. See +// Microsoft's documentation for ::Sleep() for details.) +constexpr size_t kDelayMs = 50; + +using StallSpecs = ::mozilla::StallSpecs; + +static constexpr StallSpecs maxStall = {.maxAttempts = kMaxAttempts, + .delayMs = kDelayMs}; + +static inline StallSpecs GetStallSpecs() { +# if defined(JS_STANDALONE) + // GetGeckoProcessType() isn't available in this configuration. (SpiderMonkey + // on Windows mostly skips this in favor of directly calling ::VirtualAlloc(), + // though, so it's probably not going to matter whether we stall here or not.) + return maxStall; +# else + switch (GetGeckoProcessType()) { + // For the main process, stall for the maximum permissible time period. (The + // main process is the most important one to keep alive.) + case GeckoProcessType::GeckoProcessType_Default: + return maxStall; + + // For all other process types, stall for at most half as long. + default: + return {.maxAttempts = maxStall.maxAttempts / 2, + .delayMs = maxStall.delayMs}; + } +# endif +} + +// Drop-in wrapper around VirtualAlloc. When out of memory, may attempt to stall +// and retry rather than returning immediately, in hopes that the page file is +// about to be expanded by Windows. +// +// Ref: +// https://docs.microsoft.com/en-us/troubleshoot/windows-client/performance/slow-page-file-growth-memory-allocation-errors +[[nodiscard]] void* MozVirtualAlloc(LPVOID lpAddress, SIZE_T dwSize, + DWORD flAllocationType, DWORD flProtect) { + DWORD const lastError = ::GetLastError(); + + constexpr auto IsOOMError = [] { + switch (::GetLastError()) { + // This is the usual error result from VirtualAlloc for OOM. + case ERROR_COMMITMENT_LIMIT: + // Although rare, this has also been observed in low-memory situations. + // (Presumably this means Windows can't allocate enough kernel-side space + // for its own internal representation of the process's virtual address + // space.) + case ERROR_NOT_ENOUGH_MEMORY: + return true; + } + return false; + }; + + { + void* ptr = ::VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect); + if (MOZ_LIKELY(ptr)) return ptr; + + // We can't do anything for errors other than OOM... + if (!IsOOMError()) return nullptr; + // ... or if this wasn't a request to commit memory in the first place. + // (This function has no strategy for resolving MEM_RESERVE failures.) + if (!(flAllocationType & MEM_COMMIT)) return nullptr; + } + + // Retry as many times as desired (possibly zero). + const StallSpecs stallSpecs = GetStallSpecs(); + + const auto ret = + stallSpecs.StallAndRetry(&::Sleep, [&]() -> std::optional<void*> { + void* ptr = + ::VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect); + + if (ptr) { + // The OOM status has been handled, and should not be reported to + // telemetry. + if (IsOOMError()) { + ::SetLastError(lastError); + } + return ptr; + } + + // Failure for some reason other than OOM. + if (!IsOOMError()) { + return nullptr; + } + + return std::nullopt; + }); + + return ret.value_or(nullptr); +} +} // namespace MozAllocRetries + +using MozAllocRetries::MozVirtualAlloc; + +namespace mozilla { +MOZ_JEMALLOC_API StallSpecs GetAllocatorStallSpecs() { + return ::MozAllocRetries::GetStallSpecs(); +} +} // namespace mozilla + +#endif // XP_WIN + +// *************************************************************************** + +static inline void pages_decommit(void* aAddr, size_t aSize) { +#ifdef XP_WIN + // The region starting at addr may have been allocated in multiple calls + // to VirtualAlloc and recycled, so decommitting the entire region in one + // go may not be valid. However, since we allocate at least a chunk at a + // time, we may touch any region in chunksized increments. + size_t pages_size = std::min(aSize, kChunkSize - GetChunkOffsetForPtr(aAddr)); + while (aSize > 0) { + // This will cause Access Violation on read and write and thus act as a + // guard page or region as well. + if (!VirtualFree(aAddr, pages_size, MEM_DECOMMIT)) { + MOZ_CRASH(); + } + aAddr = (void*)((uintptr_t)aAddr + pages_size); + aSize -= pages_size; + pages_size = std::min(aSize, kChunkSize); + } +#else + if (mmap(aAddr, aSize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, + 0) == MAP_FAILED) { + // We'd like to report the OOM for our tooling, but we can't allocate + // memory at this point, so avoid the use of printf. + const char out_of_mappings[] = + "[unhandlable oom] Failed to mmap, likely no more mappings " + "available " __FILE__ " : " MOZ_STRINGIFY(__LINE__); + if (errno == ENOMEM) { +# ifndef ANDROID + fputs(out_of_mappings, stderr); + fflush(stderr); +# endif + MOZ_CRASH_ANNOTATE(out_of_mappings); + } + MOZ_REALLY_CRASH(__LINE__); + } + MozTagAnonymousMemory(aAddr, aSize, "jemalloc-decommitted"); +#endif +} + +// Commit pages. Returns whether pages were committed. +[[nodiscard]] static inline bool pages_commit(void* aAddr, size_t aSize) { +#ifdef XP_WIN + // The region starting at addr may have been allocated in multiple calls + // to VirtualAlloc and recycled, so committing the entire region in one + // go may not be valid. However, since we allocate at least a chunk at a + // time, we may touch any region in chunksized increments. + size_t pages_size = std::min(aSize, kChunkSize - GetChunkOffsetForPtr(aAddr)); + while (aSize > 0) { + if (!MozVirtualAlloc(aAddr, pages_size, MEM_COMMIT, PAGE_READWRITE)) { + return false; + } + aAddr = (void*)((uintptr_t)aAddr + pages_size); + aSize -= pages_size; + pages_size = std::min(aSize, kChunkSize); + } +#else + if (mmap(aAddr, aSize, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == MAP_FAILED) { + return false; + } + MozTagAnonymousMemory(aAddr, aSize, "jemalloc"); +#endif + return true; +} + +static bool base_pages_alloc(size_t minsize) MOZ_REQUIRES(base_mtx) { + size_t csize; + size_t pminsize; + + MOZ_ASSERT(minsize != 0); + csize = CHUNK_CEILING(minsize); + base_pages = chunk_alloc(csize, kChunkSize, true); + if (!base_pages) { + return true; + } + base_next_addr = base_pages; + base_past_addr = (void*)((uintptr_t)base_pages + csize); + // Leave enough pages for minsize committed, since otherwise they would + // have to be immediately recommitted. + pminsize = PAGE_CEILING(minsize); + base_next_decommitted = (void*)((uintptr_t)base_pages + pminsize); + if (pminsize < csize) { + pages_decommit(base_next_decommitted, csize - pminsize); + } + base_mapped += csize; + base_committed += pminsize; + + return false; +} + +static void* base_alloc(size_t aSize) { + void* ret; + size_t csize; + + // Round size up to nearest multiple of the cacheline size. + csize = CACHELINE_CEILING(aSize); + + MutexAutoLock lock(base_mtx); + // Make sure there's enough space for the allocation. + if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { + if (base_pages_alloc(csize)) { + return nullptr; + } + } + // Allocate. + ret = base_next_addr; + base_next_addr = (void*)((uintptr_t)base_next_addr + csize); + // Make sure enough pages are committed for the new allocation. + if ((uintptr_t)base_next_addr > (uintptr_t)base_next_decommitted) { + void* pbase_next_addr = (void*)(PAGE_CEILING((uintptr_t)base_next_addr)); + + if (!pages_commit( + base_next_decommitted, + (uintptr_t)pbase_next_addr - (uintptr_t)base_next_decommitted)) { + return nullptr; + } + + base_committed += + (uintptr_t)pbase_next_addr - (uintptr_t)base_next_decommitted; + base_next_decommitted = pbase_next_addr; + } + + return ret; +} + +static void* base_calloc(size_t aNumber, size_t aSize) { + void* ret = base_alloc(aNumber * aSize); + if (ret) { + memset(ret, 0, aNumber * aSize); + } + return ret; +} + +// A specialization of the base allocator with a free list. +template <typename T> +struct TypedBaseAlloc { + static T* sFirstFree; + + static size_t size_of() { return sizeof(T); } + + static T* alloc() { + T* ret; + + base_mtx.Lock(); + if (sFirstFree) { + ret = sFirstFree; + sFirstFree = *(T**)ret; + base_mtx.Unlock(); + } else { + base_mtx.Unlock(); + ret = (T*)base_alloc(size_of()); + } + + return ret; + } + + static void dealloc(T* aNode) { + MutexAutoLock lock(base_mtx); + *(T**)aNode = sFirstFree; + sFirstFree = aNode; + } +}; + +using ExtentAlloc = TypedBaseAlloc<extent_node_t>; + +template <> +extent_node_t* ExtentAlloc::sFirstFree = nullptr; + +template <> +arena_t* TypedBaseAlloc<arena_t>::sFirstFree = nullptr; + +template <> +size_t TypedBaseAlloc<arena_t>::size_of() { + // Allocate enough space for trailing bins. + return sizeof(arena_t) + (sizeof(arena_bin_t) * (NUM_SMALL_CLASSES - 1)); +} + +template <typename T> +struct BaseAllocFreePolicy { + void operator()(T* aPtr) { TypedBaseAlloc<T>::dealloc(aPtr); } +}; + +using UniqueBaseNode = + UniquePtr<extent_node_t, BaseAllocFreePolicy<extent_node_t>>; + +// End Utility functions/macros. +// *************************************************************************** +// Begin chunk management functions. + +#ifdef XP_WIN + +static void* pages_map(void* aAddr, size_t aSize) { + void* ret = nullptr; + ret = MozVirtualAlloc(aAddr, aSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + return ret; +} + +static void pages_unmap(void* aAddr, size_t aSize) { + if (VirtualFree(aAddr, 0, MEM_RELEASE) == 0) { + _malloc_message(_getprogname(), ": (malloc) Error in VirtualFree()\n"); + } +} +#else + +static void pages_unmap(void* aAddr, size_t aSize) { + if (munmap(aAddr, aSize) == -1) { + char buf[64]; + + if (strerror_r(errno, buf, sizeof(buf)) == 0) { + _malloc_message(_getprogname(), ": (malloc) Error in munmap(): ", buf, + "\n"); + } + } +} + +static void* pages_map(void* aAddr, size_t aSize) { + void* ret; +# if defined(__ia64__) || \ + (defined(__sparc__) && defined(__arch64__) && defined(__linux__)) + // The JS engine assumes that all allocated pointers have their high 17 bits + // clear, which ia64's mmap doesn't support directly. However, we can emulate + // it by passing mmap an "addr" parameter with those bits clear. The mmap will + // return that address, or the nearest available memory above that address, + // providing a near-guarantee that those bits are clear. If they are not, we + // return nullptr below to indicate out-of-memory. + // + // The addr is chosen as 0x0000070000000000, which still allows about 120TB of + // virtual address space. + // + // See Bug 589735 for more information. + bool check_placement = true; + if (!aAddr) { + aAddr = (void*)0x0000070000000000; + check_placement = false; + } +# endif + +# if defined(__sparc__) && defined(__arch64__) && defined(__linux__) + const uintptr_t start = 0x0000070000000000ULL; + const uintptr_t end = 0x0000800000000000ULL; + + // Copied from js/src/gc/Memory.cpp and adapted for this source + uintptr_t hint; + void* region = MAP_FAILED; + for (hint = start; region == MAP_FAILED && hint + aSize <= end; + hint += kChunkSize) { + region = mmap((void*)hint, aSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (region != MAP_FAILED) { + if (((size_t)region + (aSize - 1)) & 0xffff800000000000) { + if (munmap(region, aSize)) { + MOZ_ASSERT(errno == ENOMEM); + } + region = MAP_FAILED; + } + } + } + ret = region; +# else + // We don't use MAP_FIXED here, because it can cause the *replacement* + // of existing mappings, and we only want to create new mappings. + ret = + mmap(aAddr, aSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + MOZ_ASSERT(ret); +# endif + if (ret == MAP_FAILED) { + ret = nullptr; + } +# if defined(__ia64__) || \ + (defined(__sparc__) && defined(__arch64__) && defined(__linux__)) + // If the allocated memory doesn't have its upper 17 bits clear, consider it + // as out of memory. + else if ((long long)ret & 0xffff800000000000) { + munmap(ret, aSize); + ret = nullptr; + } + // If the caller requested a specific memory location, verify that's what mmap + // returned. + else if (check_placement && ret != aAddr) { +# else + else if (aAddr && ret != aAddr) { +# endif + // We succeeded in mapping memory, but not in the right place. + pages_unmap(ret, aSize); + ret = nullptr; + } + if (ret) { + MozTagAnonymousMemory(ret, aSize, "jemalloc"); + } + +# if defined(__ia64__) || \ + (defined(__sparc__) && defined(__arch64__) && defined(__linux__)) + MOZ_ASSERT(!ret || (!check_placement && ret) || + (check_placement && ret == aAddr)); +# else + MOZ_ASSERT(!ret || (!aAddr && ret != aAddr) || (aAddr && ret == aAddr)); +# endif + return ret; +} +#endif + +#ifdef XP_DARWIN +# define VM_COPY_MIN kChunkSize +static inline void pages_copy(void* dest, const void* src, size_t n) { + MOZ_ASSERT((void*)((uintptr_t)dest & ~gPageSizeMask) == dest); + MOZ_ASSERT(n >= VM_COPY_MIN); + MOZ_ASSERT((void*)((uintptr_t)src & ~gPageSizeMask) == src); + + kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)src, (vm_size_t)n, + (vm_address_t)dest); + if (r != KERN_SUCCESS) { + MOZ_CRASH("vm_copy() failed"); + } +} + +#endif + +template <size_t Bits> +bool AddressRadixTree<Bits>::Init() { + mLock.Init(); + mRoot = (void**)base_calloc(1 << kBitsAtLevel1, sizeof(void*)); + return mRoot; +} + +template <size_t Bits> +void** AddressRadixTree<Bits>::GetSlot(void* aKey, bool aCreate) { + uintptr_t key = reinterpret_cast<uintptr_t>(aKey); + uintptr_t subkey; + unsigned i, lshift, height, bits; + void** node; + void** child; + + for (i = lshift = 0, height = kHeight, node = mRoot; i < height - 1; + i++, lshift += bits, node = child) { + bits = i ? kBitsPerLevel : kBitsAtLevel1; + subkey = (key << lshift) >> ((sizeof(void*) << 3) - bits); + child = (void**)node[subkey]; + if (!child && aCreate) { + child = (void**)base_calloc(1 << kBitsPerLevel, sizeof(void*)); + if (child) { + node[subkey] = child; + } + } + if (!child) { + return nullptr; + } + } + + // node is a leaf, so it contains values rather than node + // pointers. + bits = i ? kBitsPerLevel : kBitsAtLevel1; + subkey = (key << lshift) >> ((sizeof(void*) << 3) - bits); + return &node[subkey]; +} + +template <size_t Bits> +void* AddressRadixTree<Bits>::Get(void* aKey) { + void* ret = nullptr; + + void** slot = GetSlot(aKey); + + if (slot) { + ret = *slot; + } +#ifdef MOZ_DEBUG + MutexAutoLock lock(mLock); + + // Suppose that it were possible for a jemalloc-allocated chunk to be + // munmap()ped, followed by a different allocator in another thread re-using + // overlapping virtual memory, all without invalidating the cached rtree + // value. The result would be a false positive (the rtree would claim that + // jemalloc owns memory that it had actually discarded). I don't think this + // scenario is possible, but the following assertion is a prudent sanity + // check. + if (!slot) { + // In case a slot has been created in the meantime. + slot = GetSlot(aKey); + } + if (slot) { + // The MutexAutoLock above should act as a memory barrier, forcing + // the compiler to emit a new read instruction for *slot. + MOZ_ASSERT(ret == *slot); + } else { + MOZ_ASSERT(ret == nullptr); + } +#endif + return ret; +} + +template <size_t Bits> +bool AddressRadixTree<Bits>::Set(void* aKey, void* aValue) { + MutexAutoLock lock(mLock); + void** slot = GetSlot(aKey, /* aCreate = */ true); + if (slot) { + *slot = aValue; + } + return slot; +} + +// pages_trim, chunk_alloc_mmap_slow and chunk_alloc_mmap were cherry-picked +// from upstream jemalloc 3.4.1 to fix Mozilla bug 956501. + +// Return the offset between a and the nearest aligned address at or below a. +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & ((alignment)-1))) + +// Return the smallest alignment multiple that is >= s. +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + ((alignment)-1)) & (~((alignment)-1))) + +static void* pages_trim(void* addr, size_t alloc_size, size_t leadsize, + size_t size) { + void* ret = (void*)((uintptr_t)addr + leadsize); + + MOZ_ASSERT(alloc_size >= leadsize + size); +#ifdef XP_WIN + { + void* new_addr; + + pages_unmap(addr, alloc_size); + new_addr = pages_map(ret, size); + if (new_addr == ret) { + return ret; + } + if (new_addr) { + pages_unmap(new_addr, size); + } + return nullptr; + } +#else + { + size_t trailsize = alloc_size - leadsize - size; + + if (leadsize != 0) { + pages_unmap(addr, leadsize); + } + if (trailsize != 0) { + pages_unmap((void*)((uintptr_t)ret + size), trailsize); + } + return ret; + } +#endif +} + +static void* chunk_alloc_mmap_slow(size_t size, size_t alignment) { + void *ret, *pages; + size_t alloc_size, leadsize; + + alloc_size = size + alignment - gRealPageSize; + // Beware size_t wrap-around. + if (alloc_size < size) { + return nullptr; + } + do { + pages = pages_map(nullptr, alloc_size); + if (!pages) { + return nullptr; + } + leadsize = + ALIGNMENT_CEILING((uintptr_t)pages, alignment) - (uintptr_t)pages; + ret = pages_trim(pages, alloc_size, leadsize, size); + } while (!ret); + + MOZ_ASSERT(ret); + return ret; +} + +static void* chunk_alloc_mmap(size_t size, size_t alignment) { + void* ret; + size_t offset; + + // Ideally, there would be a way to specify alignment to mmap() (like + // NetBSD has), but in the absence of such a feature, we have to work + // hard to efficiently create aligned mappings. The reliable, but + // slow method is to create a mapping that is over-sized, then trim the + // excess. However, that always results in one or two calls to + // pages_unmap(). + // + // Optimistically try mapping precisely the right amount before falling + // back to the slow method, with the expectation that the optimistic + // approach works most of the time. + ret = pages_map(nullptr, size); + if (!ret) { + return nullptr; + } + offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); + if (offset != 0) { + pages_unmap(ret, size); + return chunk_alloc_mmap_slow(size, alignment); + } + + MOZ_ASSERT(ret); + return ret; +} + +// Purge and release the pages in the chunk of length `length` at `addr` to +// the OS. +// Returns whether the pages are guaranteed to be full of zeroes when the +// function returns. +// The force_zero argument explicitly requests that the memory is guaranteed +// to be full of zeroes when the function returns. +static bool pages_purge(void* addr, size_t length, bool force_zero) { + pages_decommit(addr, length); + return true; +} + +static void* chunk_recycle(size_t aSize, size_t aAlignment) { + extent_node_t key; + + size_t alloc_size = aSize + aAlignment - kChunkSize; + // Beware size_t wrap-around. + if (alloc_size < aSize) { + return nullptr; + } + key.mAddr = nullptr; + key.mSize = alloc_size; + chunks_mtx.Lock(); + extent_node_t* node = gChunksBySize.SearchOrNext(&key); + if (!node) { + chunks_mtx.Unlock(); + return nullptr; + } + size_t leadsize = ALIGNMENT_CEILING((uintptr_t)node->mAddr, aAlignment) - + (uintptr_t)node->mAddr; + MOZ_ASSERT(node->mSize >= leadsize + aSize); + size_t trailsize = node->mSize - leadsize - aSize; + void* ret = (void*)((uintptr_t)node->mAddr + leadsize); + + // All recycled chunks are zeroed (because they're purged) before being + // recycled. + MOZ_ASSERT(node->mChunkType == ZEROED_CHUNK); + + // Remove node from the tree. + gChunksBySize.Remove(node); + gChunksByAddress.Remove(node); + if (leadsize != 0) { + // Insert the leading space as a smaller chunk. + node->mSize = leadsize; + gChunksBySize.Insert(node); + gChunksByAddress.Insert(node); + node = nullptr; + } + if (trailsize != 0) { + // Insert the trailing space as a smaller chunk. + if (!node) { + // An additional node is required, but + // TypedBaseAlloc::alloc() can cause a new base chunk to be + // allocated. Drop chunks_mtx in order to avoid + // deadlock, and if node allocation fails, deallocate + // the result before returning an error. + chunks_mtx.Unlock(); + node = ExtentAlloc::alloc(); + if (!node) { + chunk_dealloc(ret, aSize, ZEROED_CHUNK); + return nullptr; + } + chunks_mtx.Lock(); + } + node->mAddr = (void*)((uintptr_t)(ret) + aSize); + node->mSize = trailsize; + node->mChunkType = ZEROED_CHUNK; + gChunksBySize.Insert(node); + gChunksByAddress.Insert(node); + node = nullptr; + } + + gRecycledSize -= aSize; + + chunks_mtx.Unlock(); + + if (node) { + ExtentAlloc::dealloc(node); + } + if (!pages_commit(ret, aSize)) { + return nullptr; + } + + return ret; +} + +#ifdef XP_WIN +// On Windows, calls to VirtualAlloc and VirtualFree must be matched, making it +// awkward to recycle allocations of varying sizes. Therefore we only allow +// recycling when the size equals the chunksize, unless deallocation is entirely +// disabled. +# define CAN_RECYCLE(size) ((size) == kChunkSize) +#else +# define CAN_RECYCLE(size) true +#endif + +// Allocates `size` bytes of system memory aligned for `alignment`. +// `base` indicates whether the memory will be used for the base allocator +// (e.g. base_alloc). +// `zeroed` is an outvalue that returns whether the allocated memory is +// guaranteed to be full of zeroes. It can be omitted when the caller doesn't +// care about the result. +static void* chunk_alloc(size_t aSize, size_t aAlignment, bool aBase) { + void* ret = nullptr; + + MOZ_ASSERT(aSize != 0); + MOZ_ASSERT((aSize & kChunkSizeMask) == 0); + MOZ_ASSERT(aAlignment != 0); + MOZ_ASSERT((aAlignment & kChunkSizeMask) == 0); + + // Base allocations can't be fulfilled by recycling because of + // possible deadlock or infinite recursion. + if (CAN_RECYCLE(aSize) && !aBase) { + ret = chunk_recycle(aSize, aAlignment); + } + if (!ret) { + ret = chunk_alloc_mmap(aSize, aAlignment); + } + if (ret && !aBase) { + if (!gChunkRTree.Set(ret, ret)) { + chunk_dealloc(ret, aSize, UNKNOWN_CHUNK); + return nullptr; + } + } + + MOZ_ASSERT(GetChunkOffsetForPtr(ret) == 0); + return ret; +} + +#ifdef MOZ_DEBUG +static void chunk_assert_zero(void* aPtr, size_t aSize) { + size_t i; + size_t* p = (size_t*)(uintptr_t)aPtr; + + for (i = 0; i < aSize / sizeof(size_t); i++) { + MOZ_ASSERT(p[i] == 0); + } +} +#endif + +static void chunk_record(void* aChunk, size_t aSize, ChunkType aType) { + extent_node_t key; + + if (aType != ZEROED_CHUNK) { + if (pages_purge(aChunk, aSize, aType == HUGE_CHUNK)) { + aType = ZEROED_CHUNK; + } + } + + // Allocate a node before acquiring chunks_mtx even though it might not + // be needed, because TypedBaseAlloc::alloc() may cause a new base chunk to + // be allocated, which could cause deadlock if chunks_mtx were already + // held. + UniqueBaseNode xnode(ExtentAlloc::alloc()); + // Use xprev to implement conditional deferred deallocation of prev. + UniqueBaseNode xprev; + + // RAII deallocates xnode and xprev defined above after unlocking + // in order to avoid potential dead-locks + MutexAutoLock lock(chunks_mtx); + key.mAddr = (void*)((uintptr_t)aChunk + aSize); + extent_node_t* node = gChunksByAddress.SearchOrNext(&key); + // Try to coalesce forward. + if (node && node->mAddr == key.mAddr) { + // Coalesce chunk with the following address range. This does + // not change the position within gChunksByAddress, so only + // remove/insert from/into gChunksBySize. + gChunksBySize.Remove(node); + node->mAddr = aChunk; + node->mSize += aSize; + if (node->mChunkType != aType) { + node->mChunkType = RECYCLED_CHUNK; + } + gChunksBySize.Insert(node); + } else { + // Coalescing forward failed, so insert a new node. + if (!xnode) { + // TypedBaseAlloc::alloc() failed, which is an exceedingly + // unlikely failure. Leak chunk; its pages have + // already been purged, so this is only a virtual + // memory leak. + return; + } + node = xnode.release(); + node->mAddr = aChunk; + node->mSize = aSize; + node->mChunkType = aType; + gChunksByAddress.Insert(node); + gChunksBySize.Insert(node); + } + + // Try to coalesce backward. + extent_node_t* prev = gChunksByAddress.Prev(node); + if (prev && (void*)((uintptr_t)prev->mAddr + prev->mSize) == aChunk) { + // Coalesce chunk with the previous address range. This does + // not change the position within gChunksByAddress, so only + // remove/insert node from/into gChunksBySize. + gChunksBySize.Remove(prev); + gChunksByAddress.Remove(prev); + + gChunksBySize.Remove(node); + node->mAddr = prev->mAddr; + node->mSize += prev->mSize; + if (node->mChunkType != prev->mChunkType) { + node->mChunkType = RECYCLED_CHUNK; + } + gChunksBySize.Insert(node); + + xprev.reset(prev); + } + + gRecycledSize += aSize; +} + +static void chunk_dealloc(void* aChunk, size_t aSize, ChunkType aType) { + MOZ_ASSERT(aChunk); + MOZ_ASSERT(GetChunkOffsetForPtr(aChunk) == 0); + MOZ_ASSERT(aSize != 0); + MOZ_ASSERT((aSize & kChunkSizeMask) == 0); + + gChunkRTree.Unset(aChunk); + + if (CAN_RECYCLE(aSize)) { + size_t recycled_so_far = gRecycledSize; + // In case some race condition put us above the limit. + if (recycled_so_far < gRecycleLimit) { + size_t recycle_remaining = gRecycleLimit - recycled_so_far; + size_t to_recycle; + if (aSize > recycle_remaining) { + to_recycle = recycle_remaining; + // Drop pages that would overflow the recycle limit + pages_trim(aChunk, aSize, 0, to_recycle); + } else { + to_recycle = aSize; + } + chunk_record(aChunk, to_recycle, aType); + return; + } + } + + pages_unmap(aChunk, aSize); +} + +#undef CAN_RECYCLE + +// End chunk management functions. +// *************************************************************************** +// Begin arena. + +static inline arena_t* thread_local_arena(bool enabled) { + arena_t* arena; + + if (enabled) { + // The arena will essentially be leaked if this function is + // called with `false`, but it doesn't matter at the moment. + // because in practice nothing actually calls this function + // with `false`, except maybe at shutdown. + arena = + gArenas.CreateArena(/* aIsPrivate = */ false, /* aParams = */ nullptr); + } else { + arena = gArenas.GetDefault(); + } + thread_arena.set(arena); + return arena; +} + +inline void MozJemalloc::jemalloc_thread_local_arena(bool aEnabled) { + if (malloc_init()) { + thread_local_arena(aEnabled); + } +} + +// Choose an arena based on a per-thread value. +static inline arena_t* choose_arena(size_t size) { + arena_t* ret = nullptr; + + // We can only use TLS if this is a PIC library, since for the static + // library version, libc's malloc is used by TLS allocation, which + // introduces a bootstrapping issue. + + if (size > kMaxQuantumClass) { + // Force the default arena for larger allocations. + ret = gArenas.GetDefault(); + } else { + // Check TLS to see if our thread has requested a pinned arena. + ret = thread_arena.get(); + // If ret is non-null, it must not be in the first page. + MOZ_DIAGNOSTIC_ASSERT_IF(ret, (size_t)ret >= gPageSize); + if (!ret) { + // Nothing in TLS. Pin this thread to the default arena. + ret = thread_local_arena(false); + } + } + + MOZ_DIAGNOSTIC_ASSERT(ret); + return ret; +} + +inline uint8_t arena_t::FindFreeBitInMask(uint32_t aMask, uint32_t& aRng) { + if (mPRNG != nullptr) { + if (aRng == UINT_MAX) { + aRng = mPRNG->next() % 32; + } + uint8_t bitIndex; + // RotateRight asserts when provided bad input. + aMask = aRng ? RotateRight(aMask, aRng) + : aMask; // Rotate the mask a random number of slots + bitIndex = CountTrailingZeroes32(aMask); + return (bitIndex + aRng) % 32; + } + return CountTrailingZeroes32(aMask); +} + +inline void* arena_t::ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin) { + void* ret; + unsigned i, mask, bit, regind; + uint32_t rndPos = UINT_MAX; + + MOZ_DIAGNOSTIC_ASSERT(aRun->mMagic == ARENA_RUN_MAGIC); + MOZ_ASSERT(aRun->mRegionsMinElement < aBin->mRunNumRegionsMask); + + // Move the first check outside the loop, so that aRun->mRegionsMinElement can + // be updated unconditionally, without the possibility of updating it + // multiple times. + i = aRun->mRegionsMinElement; + mask = aRun->mRegionsMask[i]; + if (mask != 0) { + bit = FindFreeBitInMask(mask, rndPos); + + regind = ((i << (LOG2(sizeof(int)) + 3)) + bit); + MOZ_ASSERT(regind < aBin->mRunNumRegions); + ret = (void*)(((uintptr_t)aRun) + aBin->mRunFirstRegionOffset + + (aBin->mSizeClass * regind)); + + // Clear bit. + mask ^= (1U << bit); + aRun->mRegionsMask[i] = mask; + + return ret; + } + + for (i++; i < aBin->mRunNumRegionsMask; i++) { + mask = aRun->mRegionsMask[i]; + if (mask != 0) { + bit = FindFreeBitInMask(mask, rndPos); + + regind = ((i << (LOG2(sizeof(int)) + 3)) + bit); + MOZ_ASSERT(regind < aBin->mRunNumRegions); + ret = (void*)(((uintptr_t)aRun) + aBin->mRunFirstRegionOffset + + (aBin->mSizeClass * regind)); + + // Clear bit. + mask ^= (1U << bit); + aRun->mRegionsMask[i] = mask; + + // Make a note that nothing before this element + // contains a free region. + aRun->mRegionsMinElement = i; // Low payoff: + (mask == 0); + + return ret; + } + } + // Not reached. + MOZ_DIAGNOSTIC_ASSERT(0); + return nullptr; +} + +static inline void arena_run_reg_dalloc(arena_run_t* run, arena_bin_t* bin, + void* ptr, size_t size) { + uint32_t diff, regind; + unsigned elm, bit; + + MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC); + + // Avoid doing division with a variable divisor if possible. Using + // actual division here can reduce allocator throughput by over 20%! + diff = + (uint32_t)((uintptr_t)ptr - (uintptr_t)run - bin->mRunFirstRegionOffset); + + MOZ_ASSERT(diff <= + (static_cast<unsigned>(bin->mRunSizePages) << gPageSize2Pow)); + regind = diff / bin->mSizeDivisor; + + MOZ_DIAGNOSTIC_ASSERT(diff == regind * size); + MOZ_DIAGNOSTIC_ASSERT(regind < bin->mRunNumRegions); + + elm = regind >> (LOG2(sizeof(int)) + 3); + if (elm < run->mRegionsMinElement) { + run->mRegionsMinElement = elm; + } + bit = regind - (elm << (LOG2(sizeof(int)) + 3)); + MOZ_RELEASE_ASSERT((run->mRegionsMask[elm] & (1U << bit)) == 0, + "Double-free?"); + run->mRegionsMask[elm] |= (1U << bit); +} + +bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge, + bool aZero) { + arena_chunk_t* chunk; + size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; + + chunk = GetChunkForPtr(aRun); + old_ndirty = chunk->ndirty; + run_ind = (unsigned)((uintptr_t(aRun) - uintptr_t(chunk)) >> gPageSize2Pow); + total_pages = (chunk->map[run_ind].bits & ~gPageSizeMask) >> gPageSize2Pow; + need_pages = (aSize >> gPageSize2Pow); + MOZ_ASSERT(need_pages > 0); + MOZ_ASSERT(need_pages <= total_pages); + rem_pages = total_pages - need_pages; + + for (i = 0; i < need_pages; i++) { + // Commit decommitted pages if necessary. If a decommitted + // page is encountered, commit all needed adjacent decommitted + // pages in one operation, in order to reduce system call + // overhead. + if (chunk->map[run_ind + i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) { + size_t j; + + // Advance i+j to just past the index of the last page + // to commit. Clear CHUNK_MAP_DECOMMITTED and + // CHUNK_MAP_MADVISED along the way. + for (j = 0; i + j < need_pages && (chunk->map[run_ind + i + j].bits & + CHUNK_MAP_MADVISED_OR_DECOMMITTED); + j++) { + // DECOMMITTED and MADVISED are mutually exclusive. + MOZ_ASSERT(!(chunk->map[run_ind + i + j].bits & CHUNK_MAP_DECOMMITTED && + chunk->map[run_ind + i + j].bits & CHUNK_MAP_MADVISED)); + + chunk->map[run_ind + i + j].bits &= ~CHUNK_MAP_MADVISED_OR_DECOMMITTED; + } + +#ifdef MALLOC_DECOMMIT + bool committed = pages_commit( + (void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)), + j << gPageSize2Pow); + // pages_commit zeroes pages, so mark them as such if it succeeded. + // That's checked further below to avoid manually zeroing the pages. + for (size_t k = 0; k < j; k++) { + chunk->map[run_ind + i + k].bits |= + committed ? CHUNK_MAP_ZEROED : CHUNK_MAP_DECOMMITTED; + } + if (!committed) { + return false; + } +#endif + + mStats.committed += j; + } + } + + mRunsAvail.Remove(&chunk->map[run_ind]); + + // Keep track of trailing unused pages for later use. + if (rem_pages > 0) { + chunk->map[run_ind + need_pages].bits = + (rem_pages << gPageSize2Pow) | + (chunk->map[run_ind + need_pages].bits & gPageSizeMask); + chunk->map[run_ind + total_pages - 1].bits = + (rem_pages << gPageSize2Pow) | + (chunk->map[run_ind + total_pages - 1].bits & gPageSizeMask); + mRunsAvail.Insert(&chunk->map[run_ind + need_pages]); + } + + for (i = 0; i < need_pages; i++) { + // Zero if necessary. + if (aZero) { + if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED) == 0) { + memset((void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)), 0, + gPageSize); + // CHUNK_MAP_ZEROED is cleared below. + } + } + + // Update dirty page accounting. + if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) { + chunk->ndirty--; + mNumDirty--; + // CHUNK_MAP_DIRTY is cleared below. + } + + // Initialize the chunk map. + if (aLarge) { + chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + } else { + chunk->map[run_ind + i].bits = size_t(aRun) | CHUNK_MAP_ALLOCATED; + } + } + + // Set the run size only in the first element for large runs. This is + // primarily a debugging aid, since the lack of size info for trailing + // pages only matters if the application tries to operate on an + // interior pointer. + if (aLarge) { + chunk->map[run_ind].bits |= aSize; + } + + if (chunk->ndirty == 0 && old_ndirty > 0) { + mChunksDirty.Remove(chunk); + } + return true; +} + +void arena_t::InitChunk(arena_chunk_t* aChunk) { + size_t i; + // WARNING: The following relies on !aZeroed meaning "used to be an arena + // chunk". + // When the chunk we're initializating as an arena chunk is zeroed, we + // mark all runs are decommitted and zeroed. + // When it is not, which we can assume means it's a recycled arena chunk, + // all it can contain is an arena chunk header (which we're overwriting), + // and zeroed or poisoned memory (because a recycled arena chunk will + // have been emptied before being recycled). In that case, we can get + // away with reusing the chunk as-is, marking all runs as madvised. + + size_t flags = CHUNK_MAP_DECOMMITTED | CHUNK_MAP_ZEROED; + + mStats.mapped += kChunkSize; + + aChunk->arena = this; + + // Claim that no pages are in use, since the header is merely overhead. + aChunk->ndirty = 0; + + // Initialize the map to contain one maximal free untouched run. + arena_run_t* run = (arena_run_t*)(uintptr_t(aChunk) + + (gChunkHeaderNumPages << gPageSize2Pow)); + + // Clear the bits for the real header pages. + for (i = 0; i < gChunkHeaderNumPages - 1; i++) { + aChunk->map[i].bits = 0; + } + // Mark the leading guard page (last header page) as decommitted. + aChunk->map[i++].bits = CHUNK_MAP_DECOMMITTED; + + // Mark the area usable for runs as available, note size at start and end + aChunk->map[i++].bits = gMaxLargeClass | flags; + for (; i < gChunkNumPages - 2; i++) { + aChunk->map[i].bits = flags; + } + aChunk->map[gChunkNumPages - 2].bits = gMaxLargeClass | flags; + + // Mark the trailing guard page as decommitted. + aChunk->map[gChunkNumPages - 1].bits = CHUNK_MAP_DECOMMITTED; + +#ifdef MALLOC_DECOMMIT + // Start out decommitted, in order to force a closer correspondence + // between dirty pages and committed untouched pages. This includes + // leading and trailing guard pages. + pages_decommit((void*)(uintptr_t(run) - gPageSize), + gMaxLargeClass + 2 * gPageSize); +#else + // Decommit the last header page (=leading page) as a guard. + pages_decommit((void*)(uintptr_t(run) - gPageSize), gPageSize); + // Decommit the last page as a guard. + pages_decommit((void*)(uintptr_t(aChunk) + kChunkSize - gPageSize), + gPageSize); +#endif + + mStats.committed += gChunkHeaderNumPages - 1; + + // Insert the run into the tree of available runs. + mRunsAvail.Insert(&aChunk->map[gChunkHeaderNumPages]); + +#ifdef MALLOC_DOUBLE_PURGE + new (&aChunk->chunks_madvised_elem) DoublyLinkedListElement<arena_chunk_t>(); +#endif +} + +arena_chunk_t* arena_t::DeallocChunk(arena_chunk_t* aChunk) { + if (mSpare) { + if (mSpare->ndirty > 0) { + aChunk->arena->mChunksDirty.Remove(mSpare); + mNumDirty -= mSpare->ndirty; + mStats.committed -= mSpare->ndirty; + } + +#ifdef MALLOC_DOUBLE_PURGE + if (mChunksMAdvised.ElementProbablyInList(mSpare)) { + mChunksMAdvised.remove(mSpare); + } +#endif + + mStats.mapped -= kChunkSize; + mStats.committed -= gChunkHeaderNumPages - 1; + } + + // Remove run from the tree of available runs, so that the arena does not use + // it. Dirty page flushing only uses the tree of dirty chunks, so leaving this + // chunk in the chunks_* trees is sufficient for that purpose. + mRunsAvail.Remove(&aChunk->map[gChunkHeaderNumPages]); + + arena_chunk_t* chunk_dealloc = mSpare; + mSpare = aChunk; + return chunk_dealloc; +} + +arena_run_t* arena_t::AllocRun(size_t aSize, bool aLarge, bool aZero) { + arena_run_t* run; + arena_chunk_map_t* mapelm; + arena_chunk_map_t key; + + MOZ_ASSERT(aSize <= gMaxLargeClass); + MOZ_ASSERT((aSize & gPageSizeMask) == 0); + + // Search the arena's chunks for the lowest best fit. + key.bits = aSize | CHUNK_MAP_KEY; + mapelm = mRunsAvail.SearchOrNext(&key); + if (mapelm) { + arena_chunk_t* chunk = GetChunkForPtr(mapelm); + size_t pageind = + (uintptr_t(mapelm) - uintptr_t(chunk->map)) / sizeof(arena_chunk_map_t); + + run = (arena_run_t*)(uintptr_t(chunk) + (pageind << gPageSize2Pow)); + } else if (mSpare) { + // Use the spare. + arena_chunk_t* chunk = mSpare; + mSpare = nullptr; + run = (arena_run_t*)(uintptr_t(chunk) + + (gChunkHeaderNumPages << gPageSize2Pow)); + // Insert the run into the tree of available runs. + mRunsAvail.Insert(&chunk->map[gChunkHeaderNumPages]); + } else { + // No usable runs. Create a new chunk from which to allocate + // the run. + arena_chunk_t* chunk = + (arena_chunk_t*)chunk_alloc(kChunkSize, kChunkSize, false); + if (!chunk) { + return nullptr; + } + + InitChunk(chunk); + run = (arena_run_t*)(uintptr_t(chunk) + + (gChunkHeaderNumPages << gPageSize2Pow)); + } + // Update page map. + return SplitRun(run, aSize, aLarge, aZero) ? run : nullptr; +} + +size_t arena_t::EffectiveMaxDirty() { + int32_t modifier = gArenas.DefaultMaxDirtyPageModifier(); + if (modifier) { + int32_t arenaOverride = + modifier > 0 ? mMaxDirtyIncreaseOverride : mMaxDirtyDecreaseOverride; + if (arenaOverride) { + modifier = arenaOverride; + } + } + + return modifier >= 0 ? mMaxDirty << modifier : mMaxDirty >> -modifier; +} + +void arena_t::Purge(size_t aMaxDirty) { + arena_chunk_t* chunk; + size_t i, npages; + +#ifdef MOZ_DEBUG + size_t ndirty = 0; + for (auto chunk : mChunksDirty.iter()) { + ndirty += chunk->ndirty; + } + MOZ_ASSERT(ndirty == mNumDirty); +#endif + MOZ_DIAGNOSTIC_ASSERT(aMaxDirty == 1 || (mNumDirty > aMaxDirty)); + + // Iterate downward through chunks until enough dirty memory has been + // purged. Terminate as soon as possible in order to minimize the + // number of system calls, even if a chunk has only been partially + // purged. + while (mNumDirty > (aMaxDirty >> 1)) { +#ifdef MALLOC_DOUBLE_PURGE + bool madvised = false; +#endif + chunk = mChunksDirty.Last(); + MOZ_DIAGNOSTIC_ASSERT(chunk); + // Last page is DECOMMITTED as a guard page. + MOZ_ASSERT((chunk->map[gChunkNumPages - 1].bits & CHUNK_MAP_DECOMMITTED) != + 0); + for (i = gChunkNumPages - 2; chunk->ndirty > 0; i--) { + MOZ_DIAGNOSTIC_ASSERT(i >= gChunkHeaderNumPages); + + if (chunk->map[i].bits & CHUNK_MAP_DIRTY) { +#ifdef MALLOC_DECOMMIT + const size_t free_operation = CHUNK_MAP_DECOMMITTED; +#else + const size_t free_operation = CHUNK_MAP_MADVISED; +#endif + MOZ_ASSERT((chunk->map[i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) == + 0); + chunk->map[i].bits ^= free_operation | CHUNK_MAP_DIRTY; + // Find adjacent dirty run(s). + for (npages = 1; i > gChunkHeaderNumPages && + (chunk->map[i - 1].bits & CHUNK_MAP_DIRTY); + npages++) { + i--; + MOZ_ASSERT((chunk->map[i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) == + 0); + chunk->map[i].bits ^= free_operation | CHUNK_MAP_DIRTY; + } + chunk->ndirty -= npages; + mNumDirty -= npages; + +#ifdef MALLOC_DECOMMIT + pages_decommit((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)), + (npages << gPageSize2Pow)); +#else +# ifdef XP_SOLARIS + posix_madvise((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)), + (npages << gPageSize2Pow), MADV_FREE); +# else + madvise((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)), + (npages << gPageSize2Pow), MADV_FREE); +# endif +# ifdef MALLOC_DOUBLE_PURGE + madvised = true; +# endif +#endif + mStats.committed -= npages; + + if (mNumDirty <= (aMaxDirty >> 1)) { + break; + } + } + } + + if (chunk->ndirty == 0) { + mChunksDirty.Remove(chunk); + } +#ifdef MALLOC_DOUBLE_PURGE + if (madvised) { + // The chunk might already be in the list, but this + // makes sure it's at the front. + if (mChunksMAdvised.ElementProbablyInList(chunk)) { + mChunksMAdvised.remove(chunk); + } + mChunksMAdvised.pushFront(chunk); + } +#endif + } +} + +arena_chunk_t* arena_t::DallocRun(arena_run_t* aRun, bool aDirty) { + arena_chunk_t* chunk; + size_t size, run_ind, run_pages; + + chunk = GetChunkForPtr(aRun); + run_ind = (size_t)((uintptr_t(aRun) - uintptr_t(chunk)) >> gPageSize2Pow); + MOZ_DIAGNOSTIC_ASSERT(run_ind >= gChunkHeaderNumPages); + MOZ_RELEASE_ASSERT(run_ind < gChunkNumPages - 1); + if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0) { + size = chunk->map[run_ind].bits & ~gPageSizeMask; + run_pages = (size >> gPageSize2Pow); + } else { + run_pages = aRun->mBin->mRunSizePages; + size = run_pages << gPageSize2Pow; + } + + // Mark pages as unallocated in the chunk map. + if (aDirty) { + size_t i; + + for (i = 0; i < run_pages; i++) { + MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) == + 0); + chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY; + } + + if (chunk->ndirty == 0) { + mChunksDirty.Insert(chunk); + } + chunk->ndirty += run_pages; + mNumDirty += run_pages; + } else { + size_t i; + + for (i = 0; i < run_pages; i++) { + chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED); + } + } + chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & gPageSizeMask); + chunk->map[run_ind + run_pages - 1].bits = + size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask); + + // Try to coalesce forward. + if (run_ind + run_pages < gChunkNumPages - 1 && + (chunk->map[run_ind + run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) { + size_t nrun_size = chunk->map[run_ind + run_pages].bits & ~gPageSizeMask; + + // Remove successor from tree of available runs; the coalesced run is + // inserted later. + mRunsAvail.Remove(&chunk->map[run_ind + run_pages]); + + size += nrun_size; + run_pages = size >> gPageSize2Pow; + + MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind + run_pages - 1].bits & + ~gPageSizeMask) == nrun_size); + chunk->map[run_ind].bits = + size | (chunk->map[run_ind].bits & gPageSizeMask); + chunk->map[run_ind + run_pages - 1].bits = + size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask); + } + + // Try to coalesce backward. + if (run_ind > gChunkHeaderNumPages && + (chunk->map[run_ind - 1].bits & CHUNK_MAP_ALLOCATED) == 0) { + size_t prun_size = chunk->map[run_ind - 1].bits & ~gPageSizeMask; + + run_ind -= prun_size >> gPageSize2Pow; + + // Remove predecessor from tree of available runs; the coalesced run is + // inserted later. + mRunsAvail.Remove(&chunk->map[run_ind]); + + size += prun_size; + run_pages = size >> gPageSize2Pow; + + MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind].bits & ~gPageSizeMask) == + prun_size); + chunk->map[run_ind].bits = + size | (chunk->map[run_ind].bits & gPageSizeMask); + chunk->map[run_ind + run_pages - 1].bits = + size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask); + } + + // Insert into tree of available runs, now that coalescing is complete. + mRunsAvail.Insert(&chunk->map[run_ind]); + + // Deallocate chunk if it is now completely unused. + arena_chunk_t* chunk_dealloc = nullptr; + if ((chunk->map[gChunkHeaderNumPages].bits & + (~gPageSizeMask | CHUNK_MAP_ALLOCATED)) == gMaxLargeClass) { + chunk_dealloc = DeallocChunk(chunk); + } + + size_t maxDirty = EffectiveMaxDirty(); + if (mNumDirty > maxDirty) { + Purge(maxDirty); + } + + return chunk_dealloc; +} + +void arena_t::TrimRunHead(arena_chunk_t* aChunk, arena_run_t* aRun, + size_t aOldSize, size_t aNewSize) { + size_t pageind = (uintptr_t(aRun) - uintptr_t(aChunk)) >> gPageSize2Pow; + size_t head_npages = (aOldSize - aNewSize) >> gPageSize2Pow; + + MOZ_ASSERT(aOldSize > aNewSize); + + // Update the chunk map so that arena_t::RunDalloc() can treat the + // leading run as separately allocated. + aChunk->map[pageind].bits = + (aOldSize - aNewSize) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + aChunk->map[pageind + head_npages].bits = + aNewSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + +#ifdef MOZ_DEBUG + arena_chunk_t* no_chunk = +#endif + DallocRun(aRun, false); + // This will never release a chunk as there's still at least one allocated + // run. + MOZ_ASSERT(!no_chunk); +} + +void arena_t::TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun, + size_t aOldSize, size_t aNewSize, bool aDirty) { + size_t pageind = (uintptr_t(aRun) - uintptr_t(aChunk)) >> gPageSize2Pow; + size_t npages = aNewSize >> gPageSize2Pow; + + MOZ_ASSERT(aOldSize > aNewSize); + + // Update the chunk map so that arena_t::RunDalloc() can treat the + // trailing run as separately allocated. + aChunk->map[pageind].bits = aNewSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + aChunk->map[pageind + npages].bits = + (aOldSize - aNewSize) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + +#ifdef MOZ_DEBUG + arena_chunk_t* no_chunk = +#endif + DallocRun((arena_run_t*)(uintptr_t(aRun) + aNewSize), aDirty); + + // This will never release a chunk as there's still at least one allocated + // run. + MOZ_ASSERT(!no_chunk); +} + +arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) { + arena_chunk_map_t* mapelm; + arena_run_t* run; + unsigned i, remainder; + + // Look for a usable run. + mapelm = aBin->mNonFullRuns.First(); + if (mapelm) { + // run is guaranteed to have available space. + aBin->mNonFullRuns.Remove(mapelm); + run = (arena_run_t*)(mapelm->bits & ~gPageSizeMask); + return run; + } + // No existing runs have any space available. + + // Allocate a new run. + run = AllocRun(static_cast<size_t>(aBin->mRunSizePages) << gPageSize2Pow, + false, false); + if (!run) { + return nullptr; + } + // Don't initialize if a race in arena_t::RunAlloc() allowed an existing + // run to become usable. + if (run == aBin->mCurrentRun) { + return run; + } + + // Initialize run internals. + run->mBin = aBin; + + for (i = 0; i < aBin->mRunNumRegionsMask - 1; i++) { + run->mRegionsMask[i] = UINT_MAX; + } + remainder = aBin->mRunNumRegions & ((1U << (LOG2(sizeof(int)) + 3)) - 1); + if (remainder == 0) { + run->mRegionsMask[i] = UINT_MAX; + } else { + // The last element has spare bits that need to be unset. + run->mRegionsMask[i] = + (UINT_MAX >> ((1U << (LOG2(sizeof(int)) + 3)) - remainder)); + } + + run->mRegionsMinElement = 0; + + run->mNumFree = aBin->mRunNumRegions; +#if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED) + run->mMagic = ARENA_RUN_MAGIC; +#endif + + aBin->mNumRuns++; + return run; +} + +void arena_bin_t::Init(SizeClass aSizeClass) { + size_t try_run_size; + unsigned try_nregs, try_mask_nelms, try_reg0_offset; + // Size of the run header, excluding mRegionsMask. + static const size_t kFixedHeaderSize = offsetof(arena_run_t, mRegionsMask); + + MOZ_ASSERT(aSizeClass.Size() <= gMaxBinClass); + + try_run_size = gPageSize; + + mCurrentRun = nullptr; + mNonFullRuns.Init(); + mSizeClass = aSizeClass.Size(); + mNumRuns = 0; + + // Run size expansion loop. + while (true) { + try_nregs = ((try_run_size - kFixedHeaderSize) / mSizeClass) + + 1; // Counter-act try_nregs-- in loop. + + // The do..while loop iteratively reduces the number of regions until + // the run header and the regions no longer overlap. A closed formula + // would be quite messy, since there is an interdependency between the + // header's mask length and the number of regions. + do { + try_nregs--; + try_mask_nelms = + (try_nregs >> (LOG2(sizeof(int)) + 3)) + + ((try_nregs & ((1U << (LOG2(sizeof(int)) + 3)) - 1)) ? 1 : 0); + try_reg0_offset = try_run_size - (try_nregs * mSizeClass); + } while (kFixedHeaderSize + (sizeof(unsigned) * try_mask_nelms) > + try_reg0_offset); + + // Try to keep the run overhead below kRunOverhead. + if (Fraction(try_reg0_offset, try_run_size) <= kRunOverhead) { + break; + } + + // If the overhead is larger than the size class, it means the size class + // is small and doesn't align very well with the header. It's desirable to + // have smaller run sizes for them, so relax the overhead requirement. + if (try_reg0_offset > mSizeClass) { + if (Fraction(try_reg0_offset, try_run_size) <= kRunRelaxedOverhead) { + break; + } + } + + // The run header includes one bit per region of the given size. For sizes + // small enough, the number of regions is large enough that growing the run + // size barely moves the needle for the overhead because of all those bits. + // For example, for a size of 8 bytes, adding 4KiB to the run size adds + // close to 512 bits to the header, which is 64 bytes. + // With such overhead, there is no way to get to the wanted overhead above, + // so we give up if the required size for mRegionsMask more than doubles the + // size of the run header. + if (try_mask_nelms * sizeof(unsigned) >= kFixedHeaderSize) { + break; + } + + // If next iteration is going to be larger than the largest possible large + // size class, then we didn't find a setup where the overhead is small + // enough, and we can't do better than the current settings, so just use + // that. + if (try_run_size + gPageSize > gMaxLargeClass) { + break; + } + + // Try more aggressive settings. + try_run_size += gPageSize; + } + + MOZ_ASSERT(kFixedHeaderSize + (sizeof(unsigned) * try_mask_nelms) <= + try_reg0_offset); + MOZ_ASSERT((try_mask_nelms << (LOG2(sizeof(int)) + 3)) >= try_nregs); + + // Copy final settings. + MOZ_ASSERT((try_run_size >> gPageSize2Pow) <= UINT8_MAX); + mRunSizePages = static_cast<uint8_t>(try_run_size >> gPageSize2Pow); + mRunNumRegions = try_nregs; + mRunNumRegionsMask = try_mask_nelms; + mRunFirstRegionOffset = try_reg0_offset; + mSizeDivisor = FastDivisor<uint16_t>(aSizeClass.Size(), try_run_size); +} + +void* arena_t::MallocSmall(size_t aSize, bool aZero) { + void* ret; + arena_bin_t* bin; + arena_run_t* run; + SizeClass sizeClass(aSize); + aSize = sizeClass.Size(); + + switch (sizeClass.Type()) { + case SizeClass::Tiny: + bin = &mBins[FloorLog2(aSize / kMinTinyClass)]; + break; + case SizeClass::Quantum: + // Although we divide 2 things by kQuantum, the compiler will + // reduce `kMinQuantumClass / kQuantum` and `kNumTinyClasses` to a + // single constant. + bin = &mBins[kNumTinyClasses + (aSize / kQuantum) - + (kMinQuantumClass / kQuantum)]; + break; + case SizeClass::QuantumWide: + bin = + &mBins[kNumTinyClasses + kNumQuantumClasses + (aSize / kQuantumWide) - + (kMinQuantumWideClass / kQuantumWide)]; + break; + case SizeClass::SubPage: + bin = + &mBins[kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses + + (FloorLog2(aSize) - LOG2(kMinSubPageClass))]; + break; + default: + MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Unexpected size class type"); + } + MOZ_DIAGNOSTIC_ASSERT(aSize == bin->mSizeClass); + + { + // Before we lock, we determine if we need to randomize the allocation + // because if we do, we need to create the PRNG which might require + // allocating memory (arc4random on OSX for example) and we need to + // avoid the deadlock + if (MOZ_UNLIKELY(mRandomizeSmallAllocations && mPRNG == nullptr)) { + // This is frustrating. Because the code backing RandomUint64 (arc4random + // for example) may allocate memory, and because + // mRandomizeSmallAllocations is true and we haven't yet initilized mPRNG, + // we would re-enter this same case and cause a deadlock inside e.g. + // arc4random. So we temporarily disable mRandomizeSmallAllocations to + // skip this case and then re-enable it + mRandomizeSmallAllocations = false; + mozilla::Maybe<uint64_t> prngState1 = mozilla::RandomUint64(); + mozilla::Maybe<uint64_t> prngState2 = mozilla::RandomUint64(); + void* backing = + base_alloc(sizeof(mozilla::non_crypto::XorShift128PlusRNG)); + mPRNG = new (backing) mozilla::non_crypto::XorShift128PlusRNG( + prngState1.valueOr(0), prngState2.valueOr(0)); + mRandomizeSmallAllocations = true; + } + MOZ_ASSERT(!mRandomizeSmallAllocations || mPRNG); + + MaybeMutexAutoLock lock(mLock); + run = bin->mCurrentRun; + if (MOZ_UNLIKELY(!run || run->mNumFree == 0)) { + run = bin->mCurrentRun = GetNonFullBinRun(bin); + } + if (MOZ_UNLIKELY(!run)) { + return nullptr; + } + MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC); + MOZ_DIAGNOSTIC_ASSERT(run->mNumFree > 0); + ret = ArenaRunRegAlloc(run, bin); + MOZ_DIAGNOSTIC_ASSERT(ret); + run->mNumFree--; + if (!ret) { + return nullptr; + } + + mStats.allocated_small += aSize; + } + + if (!aZero) { + ApplyZeroOrJunk(ret, aSize); + } else { + memset(ret, 0, aSize); + } + + return ret; +} + +void* arena_t::MallocLarge(size_t aSize, bool aZero) { + void* ret; + + // Large allocation. + aSize = PAGE_CEILING(aSize); + + { + MaybeMutexAutoLock lock(mLock); + ret = AllocRun(aSize, true, aZero); + if (!ret) { + return nullptr; + } + mStats.allocated_large += aSize; + } + + if (!aZero) { + ApplyZeroOrJunk(ret, aSize); + } + + return ret; +} + +void* arena_t::Malloc(size_t aSize, bool aZero) { + MOZ_DIAGNOSTIC_ASSERT(mMagic == ARENA_MAGIC); + MOZ_ASSERT(aSize != 0); + + if (aSize <= gMaxBinClass) { + return MallocSmall(aSize, aZero); + } + if (aSize <= gMaxLargeClass) { + return MallocLarge(aSize, aZero); + } + return MallocHuge(aSize, aZero); +} + +// Only handles large allocations that require more than page alignment. +void* arena_t::PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize) { + void* ret; + size_t offset; + arena_chunk_t* chunk; + + MOZ_ASSERT((aSize & gPageSizeMask) == 0); + MOZ_ASSERT((aAlignment & gPageSizeMask) == 0); + + { + MaybeMutexAutoLock lock(mLock); + ret = AllocRun(aAllocSize, true, false); + if (!ret) { + return nullptr; + } + + chunk = GetChunkForPtr(ret); + + offset = uintptr_t(ret) & (aAlignment - 1); + MOZ_ASSERT((offset & gPageSizeMask) == 0); + MOZ_ASSERT(offset < aAllocSize); + if (offset == 0) { + TrimRunTail(chunk, (arena_run_t*)ret, aAllocSize, aSize, false); + } else { + size_t leadsize, trailsize; + + leadsize = aAlignment - offset; + if (leadsize > 0) { + TrimRunHead(chunk, (arena_run_t*)ret, aAllocSize, + aAllocSize - leadsize); + ret = (void*)(uintptr_t(ret) + leadsize); + } + + trailsize = aAllocSize - leadsize - aSize; + if (trailsize != 0) { + // Trim trailing space. + MOZ_ASSERT(trailsize < aAllocSize); + TrimRunTail(chunk, (arena_run_t*)ret, aSize + trailsize, aSize, false); + } + } + + mStats.allocated_large += aSize; + } + + ApplyZeroOrJunk(ret, aSize); + return ret; +} + +void* arena_t::Palloc(size_t aAlignment, size_t aSize) { + void* ret; + size_t ceil_size; + + // Round size up to the nearest multiple of alignment. + // + // This done, we can take advantage of the fact that for each small + // size class, every object is aligned at the smallest power of two + // that is non-zero in the base two representation of the size. For + // example: + // + // Size | Base 2 | Minimum alignment + // -----+----------+------------------ + // 96 | 1100000 | 32 + // 144 | 10100000 | 32 + // 192 | 11000000 | 64 + // + // Depending on runtime settings, it is possible that arena_malloc() + // will further round up to a power of two, but that never causes + // correctness issues. + ceil_size = ALIGNMENT_CEILING(aSize, aAlignment); + + // (ceil_size < aSize) protects against the combination of maximal + // alignment and size greater than maximal alignment. + if (ceil_size < aSize) { + // size_t overflow. + return nullptr; + } + + if (ceil_size <= gPageSize || + (aAlignment <= gPageSize && ceil_size <= gMaxLargeClass)) { + ret = Malloc(ceil_size, false); + } else { + size_t run_size; + + // We can't achieve sub-page alignment, so round up alignment + // permanently; it makes later calculations simpler. + aAlignment = PAGE_CEILING(aAlignment); + ceil_size = PAGE_CEILING(aSize); + + // (ceil_size < aSize) protects against very large sizes within + // pagesize of SIZE_T_MAX. + // + // (ceil_size + aAlignment < ceil_size) protects against the + // combination of maximal alignment and ceil_size large enough + // to cause overflow. This is similar to the first overflow + // check above, but it needs to be repeated due to the new + // ceil_size value, which may now be *equal* to maximal + // alignment, whereas before we only detected overflow if the + // original size was *greater* than maximal alignment. + if (ceil_size < aSize || ceil_size + aAlignment < ceil_size) { + // size_t overflow. + return nullptr; + } + + // Calculate the size of the over-size run that arena_palloc() + // would need to allocate in order to guarantee the alignment. + if (ceil_size >= aAlignment) { + run_size = ceil_size + aAlignment - gPageSize; + } else { + // It is possible that (aAlignment << 1) will cause + // overflow, but it doesn't matter because we also + // subtract pagesize, which in the case of overflow + // leaves us with a very large run_size. That causes + // the first conditional below to fail, which means + // that the bogus run_size value never gets used for + // anything important. + run_size = (aAlignment << 1) - gPageSize; + } + + if (run_size <= gMaxLargeClass) { + ret = PallocLarge(aAlignment, ceil_size, run_size); + } else if (aAlignment <= kChunkSize) { + ret = MallocHuge(ceil_size, false); + } else { + ret = PallocHuge(ceil_size, aAlignment, false); + } + } + + MOZ_ASSERT((uintptr_t(ret) & (aAlignment - 1)) == 0); + return ret; +} + +class AllocInfo { + public: + template <bool Validate = false> + static inline AllocInfo Get(const void* aPtr) { + // If the allocator is not initialized, the pointer can't belong to it. + if (Validate && !malloc_initialized) { + return AllocInfo(); + } + + auto chunk = GetChunkForPtr(aPtr); + if (Validate) { + if (!chunk || !gChunkRTree.Get(chunk)) { + return AllocInfo(); + } + } + + if (chunk != aPtr) { + MOZ_DIAGNOSTIC_ASSERT(chunk->arena->mMagic == ARENA_MAGIC); + size_t pageind = (((uintptr_t)aPtr - (uintptr_t)chunk) >> gPageSize2Pow); + return GetInChunk(aPtr, chunk, pageind); + } + + extent_node_t key; + + // Huge allocation + key.mAddr = chunk; + MutexAutoLock lock(huge_mtx); + extent_node_t* node = huge.Search(&key); + if (Validate && !node) { + return AllocInfo(); + } + return AllocInfo(node->mSize, node); + } + + // Get the allocation information for a pointer we know is within a chunk + // (Small or large, not huge). + static inline AllocInfo GetInChunk(const void* aPtr, arena_chunk_t* aChunk, + size_t pageind) { + size_t mapbits = aChunk->map[pageind].bits; + MOZ_DIAGNOSTIC_ASSERT((mapbits & CHUNK_MAP_ALLOCATED) != 0); + + size_t size; + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + arena_run_t* run = (arena_run_t*)(mapbits & ~gPageSizeMask); + MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC); + size = run->mBin->mSizeClass; + } else { + size = mapbits & ~gPageSizeMask; + MOZ_DIAGNOSTIC_ASSERT(size != 0); + } + + return AllocInfo(size, aChunk); + } + + // Validate ptr before assuming that it points to an allocation. Currently, + // the following validation is performed: + // + // + Check that ptr is not nullptr. + // + // + Check that ptr lies within a mapped chunk. + static inline AllocInfo GetValidated(const void* aPtr) { + return Get<true>(aPtr); + } + + AllocInfo() : mSize(0), mChunk(nullptr) {} + + explicit AllocInfo(size_t aSize, arena_chunk_t* aChunk) + : mSize(aSize), mChunk(aChunk) { + MOZ_ASSERT(mSize <= gMaxLargeClass); + } + + explicit AllocInfo(size_t aSize, extent_node_t* aNode) + : mSize(aSize), mNode(aNode) { + MOZ_ASSERT(mSize > gMaxLargeClass); + } + + size_t Size() { return mSize; } + + arena_t* Arena() { + if (mSize <= gMaxLargeClass) { + return mChunk->arena; + } + // Best effort detection that we're not trying to access an already + // disposed arena. In the case of a disposed arena, the memory location + // pointed by mNode->mArena is either free (but still a valid memory + // region, per TypedBaseAlloc<arena_t>), in which case its id was reset, + // or has been reallocated for a new region, and its id is very likely + // different (per randomness). In both cases, the id is unlikely to + // match what it was for the disposed arena. + MOZ_RELEASE_ASSERT(mNode->mArenaId == mNode->mArena->mId); + return mNode->mArena; + } + + bool IsValid() const { return !!mSize; } + + private: + size_t mSize; + union { + // Pointer to the chunk associated with the allocation for small + // and large allocations. + arena_chunk_t* mChunk; + + // Pointer to the extent node for huge allocations. + extent_node_t* mNode; + }; +}; + +inline void MozJemalloc::jemalloc_ptr_info(const void* aPtr, + jemalloc_ptr_info_t* aInfo) { + arena_chunk_t* chunk = GetChunkForPtr(aPtr); + + // Is the pointer null, or within one chunk's size of null? + // Alternatively, if the allocator is not initialized yet, the pointer + // can't be known. + if (!chunk || !malloc_initialized) { + *aInfo = {TagUnknown, nullptr, 0, 0}; + return; + } + + // Look for huge allocations before looking for |chunk| in gChunkRTree. + // This is necessary because |chunk| won't be in gChunkRTree if it's + // the second or subsequent chunk in a huge allocation. + extent_node_t* node; + extent_node_t key; + { + MutexAutoLock lock(huge_mtx); + key.mAddr = const_cast<void*>(aPtr); + node = + reinterpret_cast<RedBlackTree<extent_node_t, ExtentTreeBoundsTrait>*>( + &huge) + ->Search(&key); + if (node) { + *aInfo = {TagLiveAlloc, node->mAddr, node->mSize, node->mArena->mId}; + return; + } + } + + // It's not a huge allocation. Check if we have a known chunk. + if (!gChunkRTree.Get(chunk)) { + *aInfo = {TagUnknown, nullptr, 0, 0}; + return; + } + + MOZ_DIAGNOSTIC_ASSERT(chunk->arena->mMagic == ARENA_MAGIC); + + // Get the page number within the chunk. + size_t pageind = (((uintptr_t)aPtr - (uintptr_t)chunk) >> gPageSize2Pow); + if (pageind < gChunkHeaderNumPages) { + // Within the chunk header. + *aInfo = {TagUnknown, nullptr, 0, 0}; + return; + } + + size_t mapbits = chunk->map[pageind].bits; + + if (!(mapbits & CHUNK_MAP_ALLOCATED)) { + void* pageaddr = (void*)(uintptr_t(aPtr) & ~gPageSizeMask); + *aInfo = {TagFreedPage, pageaddr, gPageSize, chunk->arena->mId}; + return; + } + + if (mapbits & CHUNK_MAP_LARGE) { + // It's a large allocation. Only the first page of a large + // allocation contains its size, so if the address is not in + // the first page, scan back to find the allocation size. + size_t size; + while (true) { + size = mapbits & ~gPageSizeMask; + if (size != 0) { + break; + } + + // The following two return paths shouldn't occur in + // practice unless there is heap corruption. + pageind--; + MOZ_DIAGNOSTIC_ASSERT(pageind >= gChunkHeaderNumPages); + if (pageind < gChunkHeaderNumPages) { + *aInfo = {TagUnknown, nullptr, 0, 0}; + return; + } + + mapbits = chunk->map[pageind].bits; + MOZ_DIAGNOSTIC_ASSERT(mapbits & CHUNK_MAP_LARGE); + if (!(mapbits & CHUNK_MAP_LARGE)) { + *aInfo = {TagUnknown, nullptr, 0, 0}; + return; + } + } + + void* addr = ((char*)chunk) + (pageind << gPageSize2Pow); + *aInfo = {TagLiveAlloc, addr, size, chunk->arena->mId}; + return; + } + + // It must be a small allocation. + auto run = (arena_run_t*)(mapbits & ~gPageSizeMask); + MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC); + + // The allocation size is stored in the run metadata. + size_t size = run->mBin->mSizeClass; + + // Address of the first possible pointer in the run after its headers. + uintptr_t reg0_addr = (uintptr_t)run + run->mBin->mRunFirstRegionOffset; + if (aPtr < (void*)reg0_addr) { + // In the run header. + *aInfo = {TagUnknown, nullptr, 0, 0}; + return; + } + + // Position in the run. + unsigned regind = ((uintptr_t)aPtr - reg0_addr) / size; + + // Pointer to the allocation's base address. + void* addr = (void*)(reg0_addr + regind * size); + + // Check if the allocation has been freed. + unsigned elm = regind >> (LOG2(sizeof(int)) + 3); + unsigned bit = regind - (elm << (LOG2(sizeof(int)) + 3)); + PtrInfoTag tag = + ((run->mRegionsMask[elm] & (1U << bit))) ? TagFreedAlloc : TagLiveAlloc; + + *aInfo = {tag, addr, size, chunk->arena->mId}; +} + +namespace Debug { +// Helper for debuggers. We don't want it to be inlined and optimized out. +MOZ_NEVER_INLINE jemalloc_ptr_info_t* jemalloc_ptr_info(const void* aPtr) { + static jemalloc_ptr_info_t info; + MozJemalloc::jemalloc_ptr_info(aPtr, &info); + return &info; +} +} // namespace Debug + +arena_chunk_t* arena_t::DallocSmall(arena_chunk_t* aChunk, void* aPtr, + arena_chunk_map_t* aMapElm) { + arena_run_t* run; + arena_bin_t* bin; + size_t size; + + run = (arena_run_t*)(aMapElm->bits & ~gPageSizeMask); + MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC); + bin = run->mBin; + size = bin->mSizeClass; + MOZ_DIAGNOSTIC_ASSERT(uintptr_t(aPtr) >= + uintptr_t(run) + bin->mRunFirstRegionOffset); + + arena_run_reg_dalloc(run, bin, aPtr, size); + run->mNumFree++; + arena_chunk_t* dealloc_chunk = nullptr; + + if (run->mNumFree == bin->mRunNumRegions) { + // Deallocate run. + if (run == bin->mCurrentRun) { + bin->mCurrentRun = nullptr; + } else if (bin->mRunNumRegions != 1) { + size_t run_pageind = + (uintptr_t(run) - uintptr_t(aChunk)) >> gPageSize2Pow; + arena_chunk_map_t* run_mapelm = &aChunk->map[run_pageind]; + + // This block's conditional is necessary because if the + // run only contains one region, then it never gets + // inserted into the non-full runs tree. + MOZ_DIAGNOSTIC_ASSERT(bin->mNonFullRuns.Search(run_mapelm) == run_mapelm); + bin->mNonFullRuns.Remove(run_mapelm); + } +#if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED) + run->mMagic = 0; +#endif + dealloc_chunk = DallocRun(run, true); + bin->mNumRuns--; + } else if (run->mNumFree == 1 && run != bin->mCurrentRun) { + // Make sure that bin->mCurrentRun always refers to the lowest + // non-full run, if one exists. + if (!bin->mCurrentRun) { + bin->mCurrentRun = run; + } else if (uintptr_t(run) < uintptr_t(bin->mCurrentRun)) { + // Switch mCurrentRun. + if (bin->mCurrentRun->mNumFree > 0) { + arena_chunk_t* runcur_chunk = GetChunkForPtr(bin->mCurrentRun); + size_t runcur_pageind = + (uintptr_t(bin->mCurrentRun) - uintptr_t(runcur_chunk)) >> + gPageSize2Pow; + arena_chunk_map_t* runcur_mapelm = &runcur_chunk->map[runcur_pageind]; + + // Insert runcur. + MOZ_DIAGNOSTIC_ASSERT(!bin->mNonFullRuns.Search(runcur_mapelm)); + bin->mNonFullRuns.Insert(runcur_mapelm); + } + bin->mCurrentRun = run; + } else { + size_t run_pageind = + (uintptr_t(run) - uintptr_t(aChunk)) >> gPageSize2Pow; + arena_chunk_map_t* run_mapelm = &aChunk->map[run_pageind]; + + MOZ_DIAGNOSTIC_ASSERT(bin->mNonFullRuns.Search(run_mapelm) == nullptr); + bin->mNonFullRuns.Insert(run_mapelm); + } + } + mStats.allocated_small -= size; + + return dealloc_chunk; +} + +arena_chunk_t* arena_t::DallocLarge(arena_chunk_t* aChunk, void* aPtr) { + MOZ_DIAGNOSTIC_ASSERT((uintptr_t(aPtr) & gPageSizeMask) == 0); + size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow; + size_t size = aChunk->map[pageind].bits & ~gPageSizeMask; + + mStats.allocated_large -= size; + + return DallocRun((arena_run_t*)aPtr, true); +} + +static inline void arena_dalloc(void* aPtr, size_t aOffset, arena_t* aArena) { + MOZ_ASSERT(aPtr); + MOZ_ASSERT(aOffset != 0); + MOZ_ASSERT(GetChunkOffsetForPtr(aPtr) == aOffset); + + auto chunk = (arena_chunk_t*)((uintptr_t)aPtr - aOffset); + auto arena = chunk->arena; + MOZ_ASSERT(arena); + MOZ_DIAGNOSTIC_ASSERT(arena->mMagic == ARENA_MAGIC); + MOZ_RELEASE_ASSERT(!aArena || arena == aArena); + + size_t pageind = aOffset >> gPageSize2Pow; + if (opt_poison) { + AllocInfo info = AllocInfo::GetInChunk(aPtr, chunk, pageind); + MOZ_ASSERT(info.IsValid()); + MaybePoison(aPtr, info.Size()); + } + + arena_chunk_t* chunk_dealloc_delay = nullptr; + + { + MaybeMutexAutoLock lock(arena->mLock); + arena_chunk_map_t* mapelm = &chunk->map[pageind]; + MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_DECOMMITTED) == 0, + "Freeing in decommitted page."); + MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0, + "Double-free?"); + if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + // Small allocation. + chunk_dealloc_delay = arena->DallocSmall(chunk, aPtr, mapelm); + } else { + // Large allocation. + chunk_dealloc_delay = arena->DallocLarge(chunk, aPtr); + } + } + + if (chunk_dealloc_delay) { + chunk_dealloc((void*)chunk_dealloc_delay, kChunkSize, ARENA_CHUNK); + } +} + +static inline void idalloc(void* ptr, arena_t* aArena) { + size_t offset; + + MOZ_ASSERT(ptr); + + offset = GetChunkOffsetForPtr(ptr); + if (offset != 0) { + arena_dalloc(ptr, offset, aArena); + } else { + huge_dalloc(ptr, aArena); + } +} + +void arena_t::RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize, + size_t aOldSize) { + MOZ_ASSERT(aSize < aOldSize); + + // Shrink the run, and make trailing pages available for other + // allocations. + MaybeMutexAutoLock lock(mLock); + TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true); + mStats.allocated_large -= aOldSize - aSize; +} + +// Returns whether reallocation was successful. +bool arena_t::RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize, + size_t aOldSize) { + size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow; + size_t npages = aOldSize >> gPageSize2Pow; + + MaybeMutexAutoLock lock(mLock); + MOZ_DIAGNOSTIC_ASSERT(aOldSize == + (aChunk->map[pageind].bits & ~gPageSizeMask)); + + // Try to extend the run. + MOZ_ASSERT(aSize > aOldSize); + if (pageind + npages < gChunkNumPages - 1 && + (aChunk->map[pageind + npages].bits & CHUNK_MAP_ALLOCATED) == 0 && + (aChunk->map[pageind + npages].bits & ~gPageSizeMask) >= + aSize - aOldSize) { + // The next run is available and sufficiently large. Split the + // following run, then merge the first part with the existing + // allocation. + if (!SplitRun((arena_run_t*)(uintptr_t(aChunk) + + ((pageind + npages) << gPageSize2Pow)), + aSize - aOldSize, true, false)) { + return false; + } + + aChunk->map[pageind].bits = aSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + aChunk->map[pageind + npages].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + mStats.allocated_large += aSize - aOldSize; + return true; + } + + return false; +} + +void* arena_t::RallocSmallOrLarge(void* aPtr, size_t aSize, size_t aOldSize) { + void* ret; + size_t copysize; + SizeClass sizeClass(aSize); + + // Try to avoid moving the allocation. + if (aOldSize <= gMaxLargeClass && sizeClass.Size() == aOldSize) { + if (aSize < aOldSize) { + MaybePoison((void*)(uintptr_t(aPtr) + aSize), aOldSize - aSize); + } + return aPtr; + } + if (sizeClass.Type() == SizeClass::Large && aOldSize > gMaxBinClass && + aOldSize <= gMaxLargeClass) { + arena_chunk_t* chunk = GetChunkForPtr(aPtr); + if (sizeClass.Size() < aOldSize) { + // Fill before shrinking in order to avoid a race. + MaybePoison((void*)((uintptr_t)aPtr + aSize), aOldSize - aSize); + RallocShrinkLarge(chunk, aPtr, sizeClass.Size(), aOldSize); + return aPtr; + } + if (RallocGrowLarge(chunk, aPtr, sizeClass.Size(), aOldSize)) { + ApplyZeroOrJunk((void*)((uintptr_t)aPtr + aOldSize), aSize - aOldSize); + return aPtr; + } + } + + // If we get here, then aSize and aOldSize are different enough that we + // need to move the object. In that case, fall back to allocating new + // space and copying. Allow non-private arenas to switch arenas. + ret = (mIsPrivate ? this : choose_arena(aSize))->Malloc(aSize, false); + if (!ret) { + return nullptr; + } + + // Junk/zero-filling were already done by arena_t::Malloc(). + copysize = (aSize < aOldSize) ? aSize : aOldSize; +#ifdef VM_COPY_MIN + if (copysize >= VM_COPY_MIN) { + pages_copy(ret, aPtr, copysize); + } else +#endif + { + memcpy(ret, aPtr, copysize); + } + idalloc(aPtr, this); + return ret; +} + +void* arena_t::Ralloc(void* aPtr, size_t aSize, size_t aOldSize) { + MOZ_DIAGNOSTIC_ASSERT(mMagic == ARENA_MAGIC); + MOZ_ASSERT(aPtr); + MOZ_ASSERT(aSize != 0); + + return (aSize <= gMaxLargeClass) ? RallocSmallOrLarge(aPtr, aSize, aOldSize) + : RallocHuge(aPtr, aSize, aOldSize); +} + +void* arena_t::operator new(size_t aCount, const fallible_t&) noexcept { + MOZ_ASSERT(aCount == sizeof(arena_t)); + return TypedBaseAlloc<arena_t>::alloc(); +} + +void arena_t::operator delete(void* aPtr) { + TypedBaseAlloc<arena_t>::dealloc((arena_t*)aPtr); +} + +arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) { + unsigned i; + + memset(&mLink, 0, sizeof(mLink)); + memset(&mStats, 0, sizeof(arena_stats_t)); + mId = 0; + + // Initialize chunks. + mChunksDirty.Init(); +#ifdef MALLOC_DOUBLE_PURGE + new (&mChunksMAdvised) DoublyLinkedList<arena_chunk_t>(); +#endif + mSpare = nullptr; + + mRandomizeSmallAllocations = opt_randomize_small; + MaybeMutex::DoLock doLock = MaybeMutex::MUST_LOCK; + if (aParams) { + uint32_t randFlags = aParams->mFlags & ARENA_FLAG_RANDOMIZE_SMALL_MASK; + switch (randFlags) { + case ARENA_FLAG_RANDOMIZE_SMALL_ENABLED: + mRandomizeSmallAllocations = true; + break; + case ARENA_FLAG_RANDOMIZE_SMALL_DISABLED: + mRandomizeSmallAllocations = false; + break; + case ARENA_FLAG_RANDOMIZE_SMALL_DEFAULT: + default: + break; + } + + uint32_t threadFlags = aParams->mFlags & ARENA_FLAG_THREAD_MASK; + if (threadFlags == ARENA_FLAG_THREAD_MAIN_THREAD_ONLY) { + // At the moment we require that any ARENA_FLAG_THREAD_MAIN_THREAD_ONLY + // arenas are created and therefore always accessed by the main thread. + // This is for two reasons: + // * it allows jemalloc_stats to read their statistics (we also require + // that jemalloc_stats is only used on the main thread). + // * Only main-thread or threadsafe arenas can be guanteed to be in a + // consistent state after a fork() from the main thread. If fork() + // occurs off-thread then the new child process cannot use these arenas + // (new children should usually exec() or exit() since other data may + // also be inconsistent). + MOZ_ASSERT(gArenas.IsOnMainThread()); + MOZ_ASSERT(aIsPrivate); + doLock = MaybeMutex::AVOID_LOCK_UNSAFE; + } + + mMaxDirtyIncreaseOverride = aParams->mMaxDirtyIncreaseOverride; + mMaxDirtyDecreaseOverride = aParams->mMaxDirtyDecreaseOverride; + } else { + mMaxDirtyIncreaseOverride = 0; + mMaxDirtyDecreaseOverride = 0; + } + + MOZ_RELEASE_ASSERT(mLock.Init(doLock)); + + mPRNG = nullptr; + + mIsPrivate = aIsPrivate; + + mNumDirty = 0; + // The default maximum amount of dirty pages allowed on arenas is a fraction + // of opt_dirty_max. + mMaxDirty = (aParams && aParams->mMaxDirty) ? aParams->mMaxDirty + : (opt_dirty_max / 8); + + mRunsAvail.Init(); + + // Initialize bins. + SizeClass sizeClass(1); + + for (i = 0;; i++) { + arena_bin_t& bin = mBins[i]; + bin.Init(sizeClass); + + // SizeClass doesn't want sizes larger than gMaxBinClass for now. + if (sizeClass.Size() == gMaxBinClass) { + break; + } + sizeClass = sizeClass.Next(); + } + MOZ_ASSERT(i == NUM_SMALL_CLASSES - 1); + +#if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED) + mMagic = ARENA_MAGIC; +#endif +} + +arena_t::~arena_t() { + size_t i; + MaybeMutexAutoLock lock(mLock); + MOZ_RELEASE_ASSERT(!mLink.Left() && !mLink.Right(), + "Arena is still registered"); + MOZ_RELEASE_ASSERT(!mStats.allocated_small && !mStats.allocated_large, + "Arena is not empty"); + if (mSpare) { + chunk_dealloc(mSpare, kChunkSize, ARENA_CHUNK); + } + for (i = 0; i < NUM_SMALL_CLASSES; i++) { + MOZ_RELEASE_ASSERT(!mBins[i].mNonFullRuns.First(), "Bin is not empty"); + } +#ifdef MOZ_DEBUG + { + MutexAutoLock lock(huge_mtx); + // This is an expensive check, so we only do it on debug builds. + for (auto node : huge.iter()) { + MOZ_RELEASE_ASSERT(node->mArenaId != mId, "Arena has huge allocations"); + } + } +#endif + mId = 0; +} + +arena_t* ArenaCollection::CreateArena(bool aIsPrivate, + arena_params_t* aParams) { + arena_t* ret = new (fallible) arena_t(aParams, aIsPrivate); + if (!ret) { + // Only reached if there is an OOM error. + + // OOM here is quite inconvenient to propagate, since dealing with it + // would require a check for failure in the fast path. Instead, punt + // by using the first arena. + // In practice, this is an extremely unlikely failure. + _malloc_message(_getprogname(), ": (malloc) Error initializing arena\n"); + + return mDefaultArena; + } + + MutexAutoLock lock(mLock); + + // For public arenas, it's fine to just use incrementing arena id + if (!aIsPrivate) { + ret->mId = mLastPublicArenaId++; + mArenas.Insert(ret); + return ret; + } + + // For private arenas, generate a cryptographically-secure random id for the + // new arena. If an attacker manages to get control of the process, this + // should make it more difficult for them to "guess" the ID of a memory + // arena, stopping them from getting data they may want + Tree& tree = (ret->IsMainThreadOnly()) ? mMainThreadArenas : mPrivateArenas; + arena_id_t arena_id; + do { + arena_id = MakeRandArenaId(ret->IsMainThreadOnly()); + // Keep looping until we ensure that the random number we just generated + // isn't already in use by another active arena + } while (GetByIdInternal(tree, arena_id)); + + ret->mId = arena_id; + tree.Insert(ret); + return ret; +} + +arena_id_t ArenaCollection::MakeRandArenaId(bool aIsMainThreadOnly) const { + uint64_t rand; + do { + mozilla::Maybe<uint64_t> maybeRandomId = mozilla::RandomUint64(); + MOZ_RELEASE_ASSERT(maybeRandomId.isSome()); + + rand = maybeRandomId.value(); + + // Set or clear the least significant bit depending on if this is a + // main-thread-only arena. We use this in GetById. + if (aIsMainThreadOnly) { + rand = rand | MAIN_THREAD_ARENA_BIT; + } else { + rand = rand & ~MAIN_THREAD_ARENA_BIT; + } + + // Avoid 0 as an arena Id. We use 0 for disposed arenas. + } while (rand == 0); + + return arena_id_t(rand); +} + +// End arena. +// *************************************************************************** +// Begin general internal functions. + +void* arena_t::MallocHuge(size_t aSize, bool aZero) { + return PallocHuge(aSize, kChunkSize, aZero); +} + +void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) { + void* ret; + size_t csize; + size_t psize; + extent_node_t* node; + + // We're going to configure guard pages in the region between the + // page-aligned size and the chunk-aligned size, so if those are the same + // then we need to force that region into existence. + csize = CHUNK_CEILING(aSize + gPageSize); + if (csize < aSize) { + // size is large enough to cause size_t wrap-around. + return nullptr; + } + + // Allocate an extent node with which to track the chunk. + node = ExtentAlloc::alloc(); + if (!node) { + return nullptr; + } + + // Allocate one or more contiguous chunks for this request. + ret = chunk_alloc(csize, aAlignment, false); + if (!ret) { + ExtentAlloc::dealloc(node); + return nullptr; + } + psize = PAGE_CEILING(aSize); +#ifdef MOZ_DEBUG + if (aZero) { + chunk_assert_zero(ret, psize); + } +#endif + + // Insert node into huge. + node->mAddr = ret; + node->mSize = psize; + node->mArena = this; + node->mArenaId = mId; + + { + MutexAutoLock lock(huge_mtx); + huge.Insert(node); + + // Although we allocated space for csize bytes, we indicate that we've + // allocated only psize bytes. + // + // If DECOMMIT is defined, this is a reasonable thing to do, since + // we'll explicitly decommit the bytes in excess of psize. + // + // If DECOMMIT is not defined, then we're relying on the OS to be lazy + // about how it allocates physical pages to mappings. If we never + // touch the pages in excess of psize, the OS won't allocate a physical + // page, and we won't use more than psize bytes of physical memory. + // + // A correct program will only touch memory in excess of how much it + // requested if it first calls malloc_usable_size and finds out how + // much space it has to play with. But because we set node->mSize = + // psize above, malloc_usable_size will return psize, not csize, and + // the program will (hopefully) never touch bytes in excess of psize. + // Thus those bytes won't take up space in physical memory, and we can + // reasonably claim we never "allocated" them in the first place. + huge_allocated += psize; + huge_mapped += csize; + } + + pages_decommit((void*)((uintptr_t)ret + psize), csize - psize); + + if (!aZero) { + ApplyZeroOrJunk(ret, psize); + } + + return ret; +} + +void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) { + void* ret; + size_t copysize; + + // Avoid moving the allocation if the size class would not change. + if (aOldSize > gMaxLargeClass && + CHUNK_CEILING(aSize + gPageSize) == CHUNK_CEILING(aOldSize + gPageSize)) { + size_t psize = PAGE_CEILING(aSize); + if (aSize < aOldSize) { + MaybePoison((void*)((uintptr_t)aPtr + aSize), aOldSize - aSize); + } + if (psize < aOldSize) { + extent_node_t key; + + pages_decommit((void*)((uintptr_t)aPtr + psize), aOldSize - psize); + + // Update recorded size. + MutexAutoLock lock(huge_mtx); + key.mAddr = const_cast<void*>(aPtr); + extent_node_t* node = huge.Search(&key); + MOZ_ASSERT(node); + MOZ_ASSERT(node->mSize == aOldSize); + MOZ_RELEASE_ASSERT(node->mArena == this); + huge_allocated -= aOldSize - psize; + // No need to change huge_mapped, because we didn't (un)map anything. + node->mSize = psize; + } else if (psize > aOldSize) { + if (!pages_commit((void*)((uintptr_t)aPtr + aOldSize), + psize - aOldSize)) { + return nullptr; + } + + // We need to update the recorded size if the size increased, + // so malloc_usable_size doesn't return a value smaller than + // what was requested via realloc(). + extent_node_t key; + MutexAutoLock lock(huge_mtx); + key.mAddr = const_cast<void*>(aPtr); + extent_node_t* node = huge.Search(&key); + MOZ_ASSERT(node); + MOZ_ASSERT(node->mSize == aOldSize); + MOZ_RELEASE_ASSERT(node->mArena == this); + huge_allocated += psize - aOldSize; + // No need to change huge_mapped, because we didn't + // (un)map anything. + node->mSize = psize; + } + + if (aSize > aOldSize) { + ApplyZeroOrJunk((void*)((uintptr_t)aPtr + aOldSize), aSize - aOldSize); + } + return aPtr; + } + + // If we get here, then aSize and aOldSize are different enough that we + // need to use a different size class. In that case, fall back to allocating + // new space and copying. Allow non-private arenas to switch arenas. + ret = (mIsPrivate ? this : choose_arena(aSize))->MallocHuge(aSize, false); + if (!ret) { + return nullptr; + } + + copysize = (aSize < aOldSize) ? aSize : aOldSize; +#ifdef VM_COPY_MIN + if (copysize >= VM_COPY_MIN) { + pages_copy(ret, aPtr, copysize); + } else +#endif + { + memcpy(ret, aPtr, copysize); + } + idalloc(aPtr, this); + return ret; +} + +static void huge_dalloc(void* aPtr, arena_t* aArena) { + extent_node_t* node; + size_t mapped = 0; + { + extent_node_t key; + MutexAutoLock lock(huge_mtx); + + // Extract from tree of huge allocations. + key.mAddr = aPtr; + node = huge.Search(&key); + MOZ_RELEASE_ASSERT(node, "Double-free?"); + MOZ_ASSERT(node->mAddr == aPtr); + MOZ_RELEASE_ASSERT(!aArena || node->mArena == aArena); + // See AllocInfo::Arena. + MOZ_RELEASE_ASSERT(node->mArenaId == node->mArena->mId); + huge.Remove(node); + + mapped = CHUNK_CEILING(node->mSize + gPageSize); + huge_allocated -= node->mSize; + huge_mapped -= mapped; + } + + // Unmap chunk. + chunk_dealloc(node->mAddr, mapped, HUGE_CHUNK); + + ExtentAlloc::dealloc(node); +} + +size_t GetKernelPageSize() { + static size_t kernel_page_size = ([]() { +#ifdef XP_WIN + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwPageSize; +#else + long result = sysconf(_SC_PAGESIZE); + MOZ_ASSERT(result != -1); + return result; +#endif + })(); + return kernel_page_size; +} + +// Returns whether the allocator was successfully initialized. +static bool malloc_init_hard() { + unsigned i; + const char* opts; + + AutoLock<StaticMutex> lock(gInitLock); + + if (malloc_initialized) { + // Another thread initialized the allocator before this one + // acquired gInitLock. + return true; + } + + if (!thread_arena.init()) { + return true; + } + + // Get page size and number of CPUs + const size_t page_size = GetKernelPageSize(); + // We assume that the page size is a power of 2. + MOZ_ASSERT(IsPowerOfTwo(page_size)); +#ifdef MALLOC_STATIC_PAGESIZE + if (gPageSize % page_size) { + _malloc_message( + _getprogname(), + "Compile-time page size does not divide the runtime one.\n"); + MOZ_CRASH(); + } +#else + gRealPageSize = gPageSize = page_size; +#endif + + // Get runtime configuration. + if ((opts = getenv("MALLOC_OPTIONS"))) { + for (i = 0; opts[i] != '\0'; i++) { + // All options are single letters, some take a *prefix* numeric argument. + + // Parse the argument. + unsigned prefix_arg = 0; + while (opts[i] >= '0' && opts[i] <= '9') { + prefix_arg *= 10; + prefix_arg += opts[i] - '0'; + i++; + } + + switch (opts[i]) { + case 'f': + opt_dirty_max >>= prefix_arg ? prefix_arg : 1; + break; + case 'F': + prefix_arg = prefix_arg ? prefix_arg : 1; + if (opt_dirty_max == 0) { + opt_dirty_max = 1; + prefix_arg--; + } + opt_dirty_max <<= prefix_arg; + if (opt_dirty_max == 0) { + // If the shift above overflowed all the bits then clamp the result + // instead. If we started with DIRTY_MAX_DEFAULT then this will + // always be a power of two so choose the maximum power of two that + // fits in a size_t. + opt_dirty_max = size_t(1) << (sizeof(size_t) * CHAR_BIT - 1); + } + break; +#ifdef MALLOC_RUNTIME_CONFIG + case 'j': + opt_junk = false; + break; + case 'J': + opt_junk = true; + break; + case 'q': + // The argument selects how much poisoning to do. + opt_poison = NONE; + break; + case 'Q': + if (opts[i + 1] == 'Q') { + // Maximum poisoning. + i++; + opt_poison = ALL; + } else { + opt_poison = SOME; + opt_poison_size = kCacheLineSize * prefix_arg; + } + break; + case 'z': + opt_zero = false; + break; + case 'Z': + opt_zero = true; + break; +# ifndef MALLOC_STATIC_PAGESIZE + case 'P': + MOZ_ASSERT(gPageSize >= 4_KiB); + MOZ_ASSERT(gPageSize <= 64_KiB); + prefix_arg = prefix_arg ? prefix_arg : 1; + gPageSize <<= prefix_arg; + // We know that if the shift causes gPageSize to be zero then it's + // because it shifted all the bits off. We didn't start with zero. + // Therefore if gPageSize is out of bounds we set it to 64KiB. + if (gPageSize < 4_KiB || gPageSize > 64_KiB) { + gPageSize = 64_KiB; + } + break; +# endif +#endif + case 'r': + opt_randomize_small = false; + break; + case 'R': + opt_randomize_small = true; + break; + default: { + char cbuf[2]; + + cbuf[0] = opts[i]; + cbuf[1] = '\0'; + _malloc_message(_getprogname(), + ": (malloc) Unsupported character " + "in malloc options: '", + cbuf, "'\n"); + } + } + } + } + +#ifndef MALLOC_STATIC_PAGESIZE + DefineGlobals(); +#endif + gRecycledSize = 0; + + // Initialize chunks data. + chunks_mtx.Init(); + MOZ_PUSH_IGNORE_THREAD_SAFETY + gChunksBySize.Init(); + gChunksByAddress.Init(); + MOZ_POP_THREAD_SAFETY + + // Initialize huge allocation data. + huge_mtx.Init(); + MOZ_PUSH_IGNORE_THREAD_SAFETY + huge.Init(); + huge_allocated = 0; + huge_mapped = 0; + MOZ_POP_THREAD_SAFETY + + // Initialize base allocation data structures. + base_mtx.Init(); + MOZ_PUSH_IGNORE_THREAD_SAFETY + base_mapped = 0; + base_committed = 0; + MOZ_POP_THREAD_SAFETY + + // Initialize arenas collection here. + if (!gArenas.Init()) { + return false; + } + + // Assign the default arena to the initial thread. + thread_arena.set(gArenas.GetDefault()); + + if (!gChunkRTree.Init()) { + return false; + } + + malloc_initialized = true; + + // Dummy call so that the function is not removed by dead-code elimination + Debug::jemalloc_ptr_info(nullptr); + +#if !defined(XP_WIN) && !defined(XP_DARWIN) + // Prevent potential deadlock on malloc locks after fork. + pthread_atfork(_malloc_prefork, _malloc_postfork_parent, + _malloc_postfork_child); +#endif + + return true; +} + +// End general internal functions. +// *************************************************************************** +// Begin malloc(3)-compatible functions. + +// The BaseAllocator class is a helper class that implements the base allocator +// functions (malloc, calloc, realloc, free, memalign) for a given arena, +// or an appropriately chosen arena (per choose_arena()) when none is given. +struct BaseAllocator { +#define MALLOC_DECL(name, return_type, ...) \ + inline return_type name(__VA_ARGS__); + +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#include "malloc_decls.h" + + explicit BaseAllocator(arena_t* aArena) : mArena(aArena) {} + + private: + arena_t* mArena; +}; + +#define MALLOC_DECL(name, return_type, ...) \ + inline return_type MozJemalloc::name( \ + ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \ + BaseAllocator allocator(nullptr); \ + return allocator.name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \ + } +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#include "malloc_decls.h" + +inline void* BaseAllocator::malloc(size_t aSize) { + void* ret; + arena_t* arena; + + if (!malloc_init()) { + ret = nullptr; + goto RETURN; + } + + if (aSize == 0) { + aSize = 1; + } + // If mArena is non-null, it must not be in the first page. + MOZ_DIAGNOSTIC_ASSERT_IF(mArena, (size_t)mArena >= gPageSize); + arena = mArena ? mArena : choose_arena(aSize); + ret = arena->Malloc(aSize, /* aZero = */ false); + +RETURN: + if (!ret) { + errno = ENOMEM; + } + + return ret; +} + +inline void* BaseAllocator::memalign(size_t aAlignment, size_t aSize) { + MOZ_ASSERT(((aAlignment - 1) & aAlignment) == 0); + + if (!malloc_init()) { + return nullptr; + } + + if (aSize == 0) { + aSize = 1; + } + + aAlignment = aAlignment < sizeof(void*) ? sizeof(void*) : aAlignment; + arena_t* arena = mArena ? mArena : choose_arena(aSize); + return arena->Palloc(aAlignment, aSize); +} + +inline void* BaseAllocator::calloc(size_t aNum, size_t aSize) { + void* ret; + + if (malloc_init()) { + CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aNum) * aSize; + if (checkedSize.isValid()) { + size_t allocSize = checkedSize.value(); + if (allocSize == 0) { + allocSize = 1; + } + arena_t* arena = mArena ? mArena : choose_arena(allocSize); + ret = arena->Malloc(allocSize, /* aZero = */ true); + } else { + ret = nullptr; + } + } else { + ret = nullptr; + } + + if (!ret) { + errno = ENOMEM; + } + + return ret; +} + +inline void* BaseAllocator::realloc(void* aPtr, size_t aSize) { + void* ret; + + if (aSize == 0) { + aSize = 1; + } + + if (aPtr) { + MOZ_RELEASE_ASSERT(malloc_initialized); + + auto info = AllocInfo::Get(aPtr); + auto arena = info.Arena(); + MOZ_RELEASE_ASSERT(!mArena || arena == mArena); + ret = arena->Ralloc(aPtr, aSize, info.Size()); + } else { + if (!malloc_init()) { + ret = nullptr; + } else { + arena_t* arena = mArena ? mArena : choose_arena(aSize); + ret = arena->Malloc(aSize, /* aZero = */ false); + } + } + + if (!ret) { + errno = ENOMEM; + } + return ret; +} + +inline void BaseAllocator::free(void* aPtr) { + size_t offset; + + // A version of idalloc that checks for nullptr pointer. + offset = GetChunkOffsetForPtr(aPtr); + if (offset != 0) { + MOZ_RELEASE_ASSERT(malloc_initialized); + arena_dalloc(aPtr, offset, mArena); + } else if (aPtr) { + MOZ_RELEASE_ASSERT(malloc_initialized); + huge_dalloc(aPtr, mArena); + } +} + +inline int MozJemalloc::posix_memalign(void** aMemPtr, size_t aAlignment, + size_t aSize) { + return AlignedAllocator<memalign>::posix_memalign(aMemPtr, aAlignment, aSize); +} + +inline void* MozJemalloc::aligned_alloc(size_t aAlignment, size_t aSize) { + return AlignedAllocator<memalign>::aligned_alloc(aAlignment, aSize); +} + +inline void* MozJemalloc::valloc(size_t aSize) { + return AlignedAllocator<memalign>::valloc(aSize); +} + +// End malloc(3)-compatible functions. +// *************************************************************************** +// Begin non-standard functions. + +// This was added by Mozilla for use by SQLite. +inline size_t MozJemalloc::malloc_good_size(size_t aSize) { + if (aSize <= gMaxLargeClass) { + // Small or large + aSize = SizeClass(aSize).Size(); + } else { + // Huge. We use PAGE_CEILING to get psize, instead of using + // CHUNK_CEILING to get csize. This ensures that this + // malloc_usable_size(malloc(n)) always matches + // malloc_good_size(n). + aSize = PAGE_CEILING(aSize); + } + return aSize; +} + +inline size_t MozJemalloc::malloc_usable_size(usable_ptr_t aPtr) { + return AllocInfo::GetValidated(aPtr).Size(); +} + +inline void MozJemalloc::jemalloc_stats_internal( + jemalloc_stats_t* aStats, jemalloc_bin_stats_t* aBinStats) { + size_t non_arena_mapped, chunk_header_size; + + if (!aStats) { + return; + } + if (!malloc_init()) { + memset(aStats, 0, sizeof(*aStats)); + return; + } + if (aBinStats) { + memset(aBinStats, 0, sizeof(jemalloc_bin_stats_t) * NUM_SMALL_CLASSES); + } + + // Gather runtime settings. + aStats->opt_junk = opt_junk; + aStats->opt_zero = opt_zero; + aStats->quantum = kQuantum; + aStats->quantum_max = kMaxQuantumClass; + aStats->quantum_wide = kQuantumWide; + aStats->quantum_wide_max = kMaxQuantumWideClass; + aStats->subpage_max = gMaxSubPageClass; + aStats->large_max = gMaxLargeClass; + aStats->chunksize = kChunkSize; + aStats->page_size = gPageSize; + aStats->dirty_max = opt_dirty_max; + + // Gather current memory usage statistics. + aStats->narenas = 0; + aStats->mapped = 0; + aStats->allocated = 0; + aStats->waste = 0; + aStats->page_cache = 0; + aStats->bookkeeping = 0; + aStats->bin_unused = 0; + + non_arena_mapped = 0; + + // Get huge mapped/allocated. + { + MutexAutoLock lock(huge_mtx); + non_arena_mapped += huge_mapped; + aStats->allocated += huge_allocated; + MOZ_ASSERT(huge_mapped >= huge_allocated); + } + + // Get base mapped/allocated. + { + MutexAutoLock lock(base_mtx); + non_arena_mapped += base_mapped; + aStats->bookkeeping += base_committed; + MOZ_ASSERT(base_mapped >= base_committed); + } + + gArenas.mLock.Lock(); + + // Stats can only read complete information if its run on the main thread. + MOZ_ASSERT(gArenas.IsOnMainThreadWeak()); + + // Iterate over arenas. + for (auto arena : gArenas.iter()) { + // Cannot safely read stats for this arena and therefore stats would be + // incomplete. + MOZ_ASSERT(arena->mLock.SafeOnThisThread()); + + size_t arena_mapped, arena_allocated, arena_committed, arena_dirty, j, + arena_unused, arena_headers; + + arena_headers = 0; + arena_unused = 0; + + { + MaybeMutexAutoLock lock(arena->mLock); + + arena_mapped = arena->mStats.mapped; + + // "committed" counts dirty and allocated memory. + arena_committed = arena->mStats.committed << gPageSize2Pow; + + arena_allocated = + arena->mStats.allocated_small + arena->mStats.allocated_large; + + arena_dirty = arena->mNumDirty << gPageSize2Pow; + + for (j = 0; j < NUM_SMALL_CLASSES; j++) { + arena_bin_t* bin = &arena->mBins[j]; + size_t bin_unused = 0; + size_t num_non_full_runs = 0; + + for (auto mapelm : bin->mNonFullRuns.iter()) { + arena_run_t* run = (arena_run_t*)(mapelm->bits & ~gPageSizeMask); + bin_unused += run->mNumFree * bin->mSizeClass; + num_non_full_runs++; + } + + if (bin->mCurrentRun) { + bin_unused += bin->mCurrentRun->mNumFree * bin->mSizeClass; + num_non_full_runs++; + } + + arena_unused += bin_unused; + arena_headers += bin->mNumRuns * bin->mRunFirstRegionOffset; + if (aBinStats) { + aBinStats[j].size = bin->mSizeClass; + aBinStats[j].num_non_full_runs += num_non_full_runs; + aBinStats[j].num_runs += bin->mNumRuns; + aBinStats[j].bytes_unused += bin_unused; + size_t bytes_per_run = static_cast<size_t>(bin->mRunSizePages) + << gPageSize2Pow; + aBinStats[j].bytes_total += + bin->mNumRuns * (bytes_per_run - bin->mRunFirstRegionOffset); + aBinStats[j].bytes_per_run = bytes_per_run; + } + } + } + + MOZ_ASSERT(arena_mapped >= arena_committed); + MOZ_ASSERT(arena_committed >= arena_allocated + arena_dirty); + + aStats->mapped += arena_mapped; + aStats->allocated += arena_allocated; + aStats->page_cache += arena_dirty; + // "waste" is committed memory that is neither dirty nor + // allocated. If you change this definition please update + // memory/replace/logalloc/replay/Replay.cpp's jemalloc_stats calculation of + // committed. + MOZ_ASSERT(arena_committed >= + (arena_allocated + arena_dirty + arena_unused + arena_headers)); + aStats->waste += arena_committed - arena_allocated - arena_dirty - + arena_unused - arena_headers; + aStats->bin_unused += arena_unused; + aStats->bookkeeping += arena_headers; + aStats->narenas++; + } + gArenas.mLock.Unlock(); + + // Account for arena chunk headers in bookkeeping rather than waste. + chunk_header_size = + ((aStats->mapped / aStats->chunksize) * (gChunkHeaderNumPages - 1)) + << gPageSize2Pow; + + aStats->mapped += non_arena_mapped; + aStats->bookkeeping += chunk_header_size; + aStats->waste -= chunk_header_size; + + MOZ_ASSERT(aStats->mapped >= aStats->allocated + aStats->waste + + aStats->page_cache + aStats->bookkeeping); +} + +inline size_t MozJemalloc::jemalloc_stats_num_bins() { + return NUM_SMALL_CLASSES; +} + +inline void MozJemalloc::jemalloc_set_main_thread() { + MOZ_ASSERT(malloc_initialized); + gArenas.SetMainThread(); +} + +#ifdef MALLOC_DOUBLE_PURGE + +// Explicitly remove all of this chunk's MADV_FREE'd pages from memory. +static void hard_purge_chunk(arena_chunk_t* aChunk) { + // See similar logic in arena_t::Purge(). + for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages; i++) { + // Find all adjacent pages with CHUNK_MAP_MADVISED set. + size_t npages; + for (npages = 0; aChunk->map[i + npages].bits & CHUNK_MAP_MADVISED && + i + npages < gChunkNumPages; + npages++) { + // Turn off the chunk's MADV_FREED bit and turn on its + // DECOMMITTED bit. + MOZ_DIAGNOSTIC_ASSERT( + !(aChunk->map[i + npages].bits & CHUNK_MAP_DECOMMITTED)); + aChunk->map[i + npages].bits ^= CHUNK_MAP_MADVISED_OR_DECOMMITTED; + } + + // We could use mincore to find out which pages are actually + // present, but it's not clear that's better. + if (npages > 0) { + pages_decommit(((char*)aChunk) + (i << gPageSize2Pow), + npages << gPageSize2Pow); + Unused << pages_commit(((char*)aChunk) + (i << gPageSize2Pow), + npages << gPageSize2Pow); + } + i += npages; + } +} + +// Explicitly remove all of this arena's MADV_FREE'd pages from memory. +void arena_t::HardPurge() { + MaybeMutexAutoLock lock(mLock); + + while (!mChunksMAdvised.isEmpty()) { + arena_chunk_t* chunk = mChunksMAdvised.popFront(); + hard_purge_chunk(chunk); + } +} + +inline void MozJemalloc::jemalloc_purge_freed_pages() { + if (malloc_initialized) { + MutexAutoLock lock(gArenas.mLock); + MOZ_ASSERT(gArenas.IsOnMainThreadWeak()); + for (auto arena : gArenas.iter()) { + arena->HardPurge(); + } + } +} + +#else // !defined MALLOC_DOUBLE_PURGE + +inline void MozJemalloc::jemalloc_purge_freed_pages() { + // Do nothing. +} + +#endif // defined MALLOC_DOUBLE_PURGE + +inline void MozJemalloc::jemalloc_free_dirty_pages(void) { + if (malloc_initialized) { + MutexAutoLock lock(gArenas.mLock); + MOZ_ASSERT(gArenas.IsOnMainThreadWeak()); + for (auto arena : gArenas.iter()) { + MaybeMutexAutoLock arena_lock(arena->mLock); + arena->Purge(1); + } + } +} + +inline arena_t* ArenaCollection::GetByIdInternal(Tree& aTree, + arena_id_t aArenaId) { + // Use AlignedStorage2 to avoid running the arena_t constructor, while + // we only need it as a placeholder for mId. + mozilla::AlignedStorage2<arena_t> key; + key.addr()->mId = aArenaId; + return aTree.Search(key.addr()); +} + +inline arena_t* ArenaCollection::GetById(arena_id_t aArenaId, bool aIsPrivate) { + if (!malloc_initialized) { + return nullptr; + } + + Tree* tree = nullptr; + if (aIsPrivate) { + if (ArenaIdIsMainThreadOnly(aArenaId)) { + // Main thread only arena. Do the lookup here without taking the lock. + arena_t* result = GetByIdInternal(mMainThreadArenas, aArenaId); + MOZ_RELEASE_ASSERT(result); + return result; + } + tree = &mPrivateArenas; + } else { + tree = &mArenas; + } + + MutexAutoLock lock(mLock); + arena_t* result = GetByIdInternal(*tree, aArenaId); + MOZ_RELEASE_ASSERT(result); + return result; +} + +inline arena_id_t MozJemalloc::moz_create_arena_with_params( + arena_params_t* aParams) { + if (malloc_init()) { + arena_t* arena = gArenas.CreateArena(/* IsPrivate = */ true, aParams); + return arena->mId; + } + return 0; +} + +inline void MozJemalloc::moz_dispose_arena(arena_id_t aArenaId) { + arena_t* arena = gArenas.GetById(aArenaId, /* IsPrivate = */ true); + MOZ_RELEASE_ASSERT(arena); + gArenas.DisposeArena(arena); +} + +inline void MozJemalloc::moz_set_max_dirty_page_modifier(int32_t aModifier) { + gArenas.SetDefaultMaxDirtyPageModifier(aModifier); +} + +#define MALLOC_DECL(name, return_type, ...) \ + inline return_type MozJemalloc::moz_arena_##name( \ + arena_id_t aArenaId, ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \ + BaseAllocator allocator( \ + gArenas.GetById(aArenaId, /* IsPrivate = */ true)); \ + return allocator.name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \ + } +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#include "malloc_decls.h" + +// End non-standard functions. +// *************************************************************************** +#ifndef XP_WIN +// Begin library-private functions, used by threading libraries for protection +// of malloc during fork(). These functions are only called if the program is +// running in threaded mode, so there is no need to check whether the program +// is threaded here. +// +// Note that the only way to keep the main-thread-only arenas in a consistent +// state for the child is if fork is called from the main thread only. Or the +// child must not use them, eg it should call exec(). We attempt to prevent the +// child for accessing these arenas by refusing to re-initialise them. +static pthread_t gForkingThread; + +FORK_HOOK +void _malloc_prefork(void) MOZ_NO_THREAD_SAFETY_ANALYSIS { + // Acquire all mutexes in a safe order. + gArenas.mLock.Lock(); + gForkingThread = pthread_self(); + + for (auto arena : gArenas.iter()) { + if (arena->mLock.LockIsEnabled()) { + arena->mLock.Lock(); + } + } + + base_mtx.Lock(); + + huge_mtx.Lock(); +} + +FORK_HOOK +void _malloc_postfork_parent(void) MOZ_NO_THREAD_SAFETY_ANALYSIS { + // Release all mutexes, now that fork() has completed. + huge_mtx.Unlock(); + + base_mtx.Unlock(); + + for (auto arena : gArenas.iter()) { + if (arena->mLock.LockIsEnabled()) { + arena->mLock.Unlock(); + } + } + + gArenas.mLock.Unlock(); +} + +FORK_HOOK +void _malloc_postfork_child(void) { + // Reinitialize all mutexes, now that fork() has completed. + huge_mtx.Init(); + + base_mtx.Init(); + + for (auto arena : gArenas.iter()) { + arena->mLock.Reinit(gForkingThread); + } + + gArenas.PostForkFixMainThread(); + gArenas.mLock.Init(); +} +#endif // XP_WIN + +// End library-private functions. +// *************************************************************************** +#ifdef MOZ_REPLACE_MALLOC +// Windows doesn't come with weak imports as they are possible with +// LD_PRELOAD or DYLD_INSERT_LIBRARIES on Linux/OSX. On this platform, +// the replacement functions are defined as variable pointers to the +// function resolved with GetProcAddress() instead of weak definitions +// of functions. On Android, the same needs to happen as well, because +// the Android linker doesn't handle weak linking with non LD_PRELOADed +// libraries, but LD_PRELOADing is not very convenient on Android, with +// the zygote. +# ifdef XP_DARWIN +# define MOZ_REPLACE_WEAK __attribute__((weak_import)) +# elif defined(XP_WIN) || defined(ANDROID) +# define MOZ_DYNAMIC_REPLACE_INIT +# define replace_init replace_init_decl +# elif defined(__GNUC__) +# define MOZ_REPLACE_WEAK __attribute__((weak)) +# endif + +# include "replace_malloc.h" + +# define MALLOC_DECL(name, return_type, ...) CanonicalMalloc::name, + +// The default malloc table, i.e. plain allocations. It never changes. It's +// used by init(), and not used after that. +static const malloc_table_t gDefaultMallocTable = { +# include "malloc_decls.h" +}; + +// The malloc table installed by init(). It never changes from that point +// onward. It will be the same as gDefaultMallocTable if no replace-malloc tool +// is enabled at startup. +static malloc_table_t gOriginalMallocTable = { +# include "malloc_decls.h" +}; + +// The malloc table installed by jemalloc_replace_dynamic(). (Read the +// comments above that function for more details.) +static malloc_table_t gDynamicMallocTable = { +# include "malloc_decls.h" +}; + +// This briefly points to gDefaultMallocTable at startup. After that, it points +// to either gOriginalMallocTable or gDynamicMallocTable. It's atomic to avoid +// races when switching between tables. +static Atomic<malloc_table_t const*, mozilla::MemoryOrdering::Relaxed> + gMallocTablePtr; + +# ifdef MOZ_DYNAMIC_REPLACE_INIT +# undef replace_init +typedef decltype(replace_init_decl) replace_init_impl_t; +static replace_init_impl_t* replace_init = nullptr; +# endif + +# ifdef XP_WIN +typedef HMODULE replace_malloc_handle_t; + +static replace_malloc_handle_t replace_malloc_handle() { + wchar_t replace_malloc_lib[1024]; + if (GetEnvironmentVariableW(L"MOZ_REPLACE_MALLOC_LIB", replace_malloc_lib, + ArrayLength(replace_malloc_lib)) > 0) { + return LoadLibraryW(replace_malloc_lib); + } + return nullptr; +} + +# define REPLACE_MALLOC_GET_INIT_FUNC(handle) \ + (replace_init_impl_t*)GetProcAddress(handle, "replace_init") + +# elif defined(ANDROID) +# include <dlfcn.h> + +typedef void* replace_malloc_handle_t; + +static replace_malloc_handle_t replace_malloc_handle() { + const char* replace_malloc_lib = getenv("MOZ_REPLACE_MALLOC_LIB"); + if (replace_malloc_lib && *replace_malloc_lib) { + return dlopen(replace_malloc_lib, RTLD_LAZY); + } + return nullptr; +} + +# define REPLACE_MALLOC_GET_INIT_FUNC(handle) \ + (replace_init_impl_t*)dlsym(handle, "replace_init") + +# endif + +static void replace_malloc_init_funcs(malloc_table_t*); + +# ifdef MOZ_REPLACE_MALLOC_STATIC +extern "C" void logalloc_init(malloc_table_t*, ReplaceMallocBridge**); + +extern "C" void dmd_init(malloc_table_t*, ReplaceMallocBridge**); +# endif + +void phc_init(malloc_table_t*, ReplaceMallocBridge**); + +bool Equals(const malloc_table_t& aTable1, const malloc_table_t& aTable2) { + return memcmp(&aTable1, &aTable2, sizeof(malloc_table_t)) == 0; +} + +// Below is the malloc implementation overriding jemalloc and calling the +// replacement functions if they exist. +static ReplaceMallocBridge* gReplaceMallocBridge = nullptr; +static void init() { + malloc_table_t tempTable = gDefaultMallocTable; + +# ifdef MOZ_DYNAMIC_REPLACE_INIT + replace_malloc_handle_t handle = replace_malloc_handle(); + if (handle) { + replace_init = REPLACE_MALLOC_GET_INIT_FUNC(handle); + } +# endif + + // Set this *before* calling replace_init, otherwise if replace_init calls + // malloc() we'll get an infinite loop. + gMallocTablePtr = &gDefaultMallocTable; + + // Pass in the default allocator table so replace functions can copy and use + // it for their allocations. The replace_init() function should modify the + // table if it wants to be active, otherwise leave it unmodified. + if (replace_init) { + replace_init(&tempTable, &gReplaceMallocBridge); + } +# ifdef MOZ_REPLACE_MALLOC_STATIC + if (Equals(tempTable, gDefaultMallocTable)) { + logalloc_init(&tempTable, &gReplaceMallocBridge); + } +# ifdef MOZ_DMD + if (Equals(tempTable, gDefaultMallocTable)) { + dmd_init(&tempTable, &gReplaceMallocBridge); + } +# endif +# endif + if (!Equals(tempTable, gDefaultMallocTable)) { + replace_malloc_init_funcs(&tempTable); + } + gOriginalMallocTable = tempTable; + gMallocTablePtr = &gOriginalMallocTable; +} + +// WARNING WARNING WARNING: this function should be used with extreme care. It +// is not as general-purpose as it looks. It is currently used by +// tools/profiler/core/memory_hooks.cpp for counting allocations and probably +// should not be used for any other purpose. +// +// This function allows the original malloc table to be temporarily replaced by +// a different malloc table. Or, if the argument is nullptr, it switches back to +// the original malloc table. +// +// Limitations: +// +// - It is not threadsafe. If multiple threads pass it the same +// `replace_init_func` at the same time, there will be data races writing to +// the malloc_table_t within that function. +// +// - Only one replacement can be installed. No nesting is allowed. +// +// - The new malloc table must be able to free allocations made by the original +// malloc table, and upon removal the original malloc table must be able to +// free allocations made by the new malloc table. This means the new malloc +// table can only do simple things like recording extra information, while +// delegating actual allocation/free operations to the original malloc table. +// +MOZ_JEMALLOC_API void jemalloc_replace_dynamic( + jemalloc_init_func replace_init_func) { + if (replace_init_func) { + malloc_table_t tempTable = gOriginalMallocTable; + (*replace_init_func)(&tempTable, &gReplaceMallocBridge); + if (!Equals(tempTable, gOriginalMallocTable)) { + replace_malloc_init_funcs(&tempTable); + + // Temporarily switch back to the original malloc table. In the + // (supported) non-nested case, this is a no-op. But just in case this is + // a (unsupported) nested call, it makes the overwriting of + // gDynamicMallocTable less racy, because ongoing calls to malloc() and + // friends won't go through gDynamicMallocTable. + gMallocTablePtr = &gOriginalMallocTable; + + gDynamicMallocTable = tempTable; + gMallocTablePtr = &gDynamicMallocTable; + // We assume that dynamic replaces don't occur close enough for a + // thread to still have old copies of the table pointer when the 2nd + // replace occurs. + } + } else { + // Switch back to the original malloc table. + gMallocTablePtr = &gOriginalMallocTable; + } +} + +# define MALLOC_DECL(name, return_type, ...) \ + inline return_type ReplaceMalloc::name( \ + ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \ + if (MOZ_UNLIKELY(!gMallocTablePtr)) { \ + init(); \ + } \ + return (*gMallocTablePtr).name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \ + } +# include "malloc_decls.h" + +MOZ_JEMALLOC_API struct ReplaceMallocBridge* get_bridge(void) { + if (MOZ_UNLIKELY(!gMallocTablePtr)) { + init(); + } + return gReplaceMallocBridge; +} + +// posix_memalign, aligned_alloc, memalign and valloc all implement some kind +// of aligned memory allocation. For convenience, a replace-malloc library can +// skip defining replace_posix_memalign, replace_aligned_alloc and +// replace_valloc, and default implementations will be automatically derived +// from replace_memalign. +static void replace_malloc_init_funcs(malloc_table_t* table) { + if (table->posix_memalign == CanonicalMalloc::posix_memalign && + table->memalign != CanonicalMalloc::memalign) { + table->posix_memalign = + AlignedAllocator<ReplaceMalloc::memalign>::posix_memalign; + } + if (table->aligned_alloc == CanonicalMalloc::aligned_alloc && + table->memalign != CanonicalMalloc::memalign) { + table->aligned_alloc = + AlignedAllocator<ReplaceMalloc::memalign>::aligned_alloc; + } + if (table->valloc == CanonicalMalloc::valloc && + table->memalign != CanonicalMalloc::memalign) { + table->valloc = AlignedAllocator<ReplaceMalloc::memalign>::valloc; + } + if (table->moz_create_arena_with_params == + CanonicalMalloc::moz_create_arena_with_params && + table->malloc != CanonicalMalloc::malloc) { +# define MALLOC_DECL(name, ...) \ + table->name = DummyArenaAllocator<ReplaceMalloc>::name; +# define MALLOC_FUNCS MALLOC_FUNCS_ARENA_BASE +# include "malloc_decls.h" + } + if (table->moz_arena_malloc == CanonicalMalloc::moz_arena_malloc && + table->malloc != CanonicalMalloc::malloc) { +# define MALLOC_DECL(name, ...) \ + table->name = DummyArenaAllocator<ReplaceMalloc>::name; +# define MALLOC_FUNCS MALLOC_FUNCS_ARENA_ALLOC +# include "malloc_decls.h" + } +} + +#endif // MOZ_REPLACE_MALLOC +// *************************************************************************** +// Definition of all the _impl functions +// GENERIC_MALLOC_DECL2_MINGW is only used for the MinGW build, and aliases +// the malloc funcs (e.g. malloc) to the je_ versions. It does not generate +// aliases for the other functions (jemalloc and arena functions). +// +// We do need aliases for the other mozglue.def-redirected functions though, +// these are done at the bottom of mozmemory_wrap.cpp +#define GENERIC_MALLOC_DECL2_MINGW(name, name_impl, return_type, ...) \ + return_type name(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) \ + __attribute__((alias(MOZ_STRINGIFY(name_impl)))); + +#define GENERIC_MALLOC_DECL2(attributes, name, name_impl, return_type, ...) \ + return_type name_impl(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) attributes { \ + return DefaultMalloc::name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \ + } + +#ifndef __MINGW32__ +# define GENERIC_MALLOC_DECL(attributes, name, return_type, ...) \ + GENERIC_MALLOC_DECL2(attributes, name, name##_impl, return_type, \ + ##__VA_ARGS__) +#else +# define GENERIC_MALLOC_DECL(attributes, name, return_type, ...) \ + GENERIC_MALLOC_DECL2(attributes, name, name##_impl, return_type, \ + ##__VA_ARGS__) \ + GENERIC_MALLOC_DECL2_MINGW(name, name##_impl, return_type, ##__VA_ARGS__) +#endif + +#define NOTHROW_MALLOC_DECL(...) \ + MOZ_MEMORY_API MACRO_CALL(GENERIC_MALLOC_DECL, (noexcept(true), __VA_ARGS__)) +#define MALLOC_DECL(...) \ + MOZ_MEMORY_API MACRO_CALL(GENERIC_MALLOC_DECL, (, __VA_ARGS__)) +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC +#include "malloc_decls.h" + +#undef GENERIC_MALLOC_DECL +#define GENERIC_MALLOC_DECL(attributes, name, return_type, ...) \ + GENERIC_MALLOC_DECL2(attributes, name, name, return_type, ##__VA_ARGS__) + +#define MALLOC_DECL(...) \ + MOZ_JEMALLOC_API MACRO_CALL(GENERIC_MALLOC_DECL, (, __VA_ARGS__)) +#define MALLOC_FUNCS (MALLOC_FUNCS_JEMALLOC | MALLOC_FUNCS_ARENA) +#include "malloc_decls.h" +// *************************************************************************** + +#ifdef HAVE_DLOPEN +# include <dlfcn.h> +#endif + +#if defined(__GLIBC__) && !defined(__UCLIBC__) +// glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible +// to inconsistently reference libc's malloc(3)-compatible functions +// (bug 493541). +// +// These definitions interpose hooks in glibc. The functions are actually +// passed an extra argument for the caller return address, which will be +// ignored. + +extern "C" { +MOZ_EXPORT void (*__free_hook)(void*) = free_impl; +MOZ_EXPORT void* (*__malloc_hook)(size_t) = malloc_impl; +MOZ_EXPORT void* (*__realloc_hook)(void*, size_t) = realloc_impl; +MOZ_EXPORT void* (*__memalign_hook)(size_t, size_t) = memalign_impl; +} + +#elif defined(RTLD_DEEPBIND) +// XXX On systems that support RTLD_GROUP or DF_1_GROUP, do their +// implementations permit similar inconsistencies? Should STV_SINGLETON +// visibility be used for interposition where available? +# error \ + "Interposing malloc is unsafe on this system without libc malloc hooks." +#endif + +#ifdef XP_WIN +MOZ_EXPORT void* _recalloc(void* aPtr, size_t aCount, size_t aSize) { + size_t oldsize = aPtr ? AllocInfo::Get(aPtr).Size() : 0; + CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aCount) * aSize; + + if (!checkedSize.isValid()) { + return nullptr; + } + + size_t newsize = checkedSize.value(); + + // In order for all trailing bytes to be zeroed, the caller needs to + // use calloc(), followed by recalloc(). However, the current calloc() + // implementation only zeros the bytes requested, so if recalloc() is + // to work 100% correctly, calloc() will need to change to zero + // trailing bytes. + aPtr = DefaultMalloc::realloc(aPtr, newsize); + if (aPtr && oldsize < newsize) { + memset((void*)((uintptr_t)aPtr + oldsize), 0, newsize - oldsize); + } + + return aPtr; +} + +// This impl of _expand doesn't ever actually expand or shrink blocks: it +// simply replies that you may continue using a shrunk block. +MOZ_EXPORT void* _expand(void* aPtr, size_t newsize) { + if (AllocInfo::Get(aPtr).Size() >= newsize) { + return aPtr; + } + + return nullptr; +} + +MOZ_EXPORT size_t _msize(void* aPtr) { + return DefaultMalloc::malloc_usable_size(aPtr); +} +#endif + +#ifdef MOZ_PHC +// Compile PHC and mozjemalloc together so that PHC can inline mozjemalloc. +# include "PHC.cpp" +#endif diff --git a/memory/build/mozjemalloc.h b/memory/build/mozjemalloc.h new file mode 100644 index 0000000000..df3fbc31b3 --- /dev/null +++ b/memory/build/mozjemalloc.h @@ -0,0 +1,168 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozjemalloc_h +#define mozjemalloc_h + +#include <errno.h> + +#include "mozjemalloc_types.h" +#include "mozilla/MacroArgs.h" + +// Macro helpers + +#define MACRO_CALL(a, b) a b +// Can't use macros recursively, so we need another one doing the same as above. +#define MACRO_CALL2(a, b) a b + +#define ARGS_HELPER(name, ...) \ + MACRO_CALL2(MOZ_PASTE_PREFIX_AND_ARG_COUNT(name, ##__VA_ARGS__), \ + (__VA_ARGS__)) +#define TYPED_ARGS0() +#define TYPED_ARGS1(t1) t1 arg1 +#define TYPED_ARGS2(t1, t2) TYPED_ARGS1(t1), t2 arg2 +#define TYPED_ARGS3(t1, t2, t3) TYPED_ARGS2(t1, t2), t3 arg3 + +#define ARGS0() +#define ARGS1(t1) arg1 +#define ARGS2(t1, t2) ARGS1(t1), arg2 +#define ARGS3(t1, t2, t3) ARGS2(t1, t2), arg3 + +#ifdef MOZ_MEMORY + +size_t GetKernelPageSize(); + +// Implement the set of alignment functions in terms of memalign. +template <void* (*memalign)(size_t, size_t)> +struct AlignedAllocator { + static inline int posix_memalign(void** aMemPtr, size_t aAlignment, + size_t aSize) { + void* result; + + // alignment must be a power of two and a multiple of sizeof(void*) + if (((aAlignment - 1) & aAlignment) != 0 || aAlignment < sizeof(void*)) { + return EINVAL; + } + + // The 0-->1 size promotion is done in the memalign() call below + result = memalign(aAlignment, aSize); + + if (!result) { + return ENOMEM; + } + + *aMemPtr = result; + return 0; + } + + static inline void* aligned_alloc(size_t aAlignment, size_t aSize) { + if (aSize % aAlignment) { + return nullptr; + } + return memalign(aAlignment, aSize); + } + + static inline void* valloc(size_t aSize) { + return memalign(GetKernelPageSize(), aSize); + } +}; + +// These classes each implement the same interface. Writing out the +// interface for each one rather than using inheritance makes things more +// explicit. +// +// Note: compilers are expected to be able to optimize out `this`. + +// The MozJemalloc allocator +struct MozJemalloc { +# define MALLOC_DECL(name, return_type, ...) \ + static inline return_type name(__VA_ARGS__); +# include "malloc_decls.h" +}; + +# ifdef MOZ_PHC +struct MozJemallocPHC : public MozJemalloc { +# define MALLOC_DECL(name, return_type, ...) \ + static return_type name(__VA_ARGS__); +# define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +# include "malloc_decls.h" + + static inline int posix_memalign(void** aMemPtr, size_t aAlignment, + size_t aSize) { + return AlignedAllocator<memalign>::posix_memalign(aMemPtr, aAlignment, + aSize); + } + + static inline void* aligned_alloc(size_t aAlignment, size_t aSize) { + return AlignedAllocator<memalign>::aligned_alloc(aAlignment, aSize); + } + + static inline void* valloc(size_t aSize) { + return AlignedAllocator<memalign>::valloc(aSize); + } + + static size_t malloc_usable_size(usable_ptr_t); + + static void jemalloc_stats_internal(jemalloc_stats_t*, jemalloc_bin_stats_t*); + + static void jemalloc_ptr_info(const void*, jemalloc_ptr_info_t*); + +# define MALLOC_DECL(name, return_type, ...) \ + static return_type name(__VA_ARGS__); +# define MALLOC_FUNCS MALLOC_FUNCS_ARENA_ALLOC +# include "malloc_decls.h" +}; +# endif + +# ifdef MOZ_REPLACE_MALLOC +// The replace-malloc allocator +struct ReplaceMalloc { +# define MALLOC_DECL(name, return_type, ...) \ + static return_type name(__VA_ARGS__); +# include "malloc_decls.h" +}; +# endif + +# ifdef MOZ_PHC +using CanonicalMalloc = MozJemallocPHC; +# else +using CanonicalMalloc = MozJemalloc; +# endif + +# ifdef MOZ_REPLACE_MALLOC +using DefaultMalloc = ReplaceMalloc; +# else +using DefaultMalloc = CanonicalMalloc; +# endif + +// Poison - write "poison" to cells upon deallocation. +constexpr uint8_t kAllocPoison = 0xe5; + +// Junk - write this junk value to freshly allocated cells. +constexpr uint8_t kAllocJunk = 0xe4; + +#endif // MOZ_MEMORY + +// Dummy implementation of the moz_arena_* API, falling back to a given +// implementation of the base allocator. +template <typename T> +struct DummyArenaAllocator { + static arena_id_t moz_create_arena_with_params(arena_params_t*) { return 0; } + + static void moz_dispose_arena(arena_id_t) {} + + static void moz_set_max_dirty_page_modifier(int32_t) {} + +#define MALLOC_DECL(name, return_type, ...) \ + static return_type moz_arena_##name( \ + arena_id_t, ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \ + return T::name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \ + } +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#include "malloc_decls.h" +}; + +#endif diff --git a/memory/build/mozjemalloc_types.h b/memory/build/mozjemalloc_types.h new file mode 100644 index 0000000000..1aff0789d2 --- /dev/null +++ b/memory/build/mozjemalloc_types.h @@ -0,0 +1,200 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Portions of this file were originally under the following license: +// +// Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice(s), this list of conditions and the following disclaimer as +// the first lines of this file unmodified other than the possible +// addition of one or more copyright notices. +// 2. Redistributions in binary form must reproduce the above copyright +// notice(s), this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef _JEMALLOC_TYPES_H_ +#define _JEMALLOC_TYPES_H_ + +#include <stdint.h> + +// grab size_t +#ifdef _MSC_VER +# include <crtdefs.h> +#else +# include <stddef.h> +#endif +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef MALLOC_USABLE_SIZE_CONST_PTR +# define MALLOC_USABLE_SIZE_CONST_PTR const +#endif + +typedef MALLOC_USABLE_SIZE_CONST_PTR void* usable_ptr_t; + +typedef size_t arena_id_t; + +#define ARENA_FLAG_RANDOMIZE_SMALL_MASK 0x3 +#define ARENA_FLAG_RANDOMIZE_SMALL_DEFAULT 0 +#define ARENA_FLAG_RANDOMIZE_SMALL_ENABLED 1 +#define ARENA_FLAG_RANDOMIZE_SMALL_DISABLED 2 + +// Arenas are usually protected by a lock (ARENA_FLAG_THREAD_SAFE) however some +// arenas are accessed by only the main thread +// (ARENA_FLAG_THREAD_MAIN_THREAD_ONLY) and their locking can be skipped. +#define ARENA_FLAG_THREAD_MASK 0x4 +#define ARENA_FLAG_THREAD_MAIN_THREAD_ONLY 0x4 +#define ARENA_FLAG_THREAD_SAFE 0x0 + +typedef struct arena_params_s { + size_t mMaxDirty; + // Arena specific modifiers which override the value passed to + // moz_set_max_dirty_page_modifier. If value > 0 is passed to that function, + // and mMaxDirtyIncreaseOverride != 0, mMaxDirtyIncreaseOverride will be used + // instead, and similarly if value < 0 is passed and mMaxDirtyDecreaseOverride + // != 0, mMaxDirtyDecreaseOverride will be used as the modifier. + int32_t mMaxDirtyIncreaseOverride; + int32_t mMaxDirtyDecreaseOverride; + + uint32_t mFlags; + +#ifdef __cplusplus + arena_params_s() + : mMaxDirty(0), + mMaxDirtyIncreaseOverride(0), + mMaxDirtyDecreaseOverride(0), + mFlags(0) {} +#endif +} arena_params_t; + +// jemalloc_stats() is not a stable interface. When using jemalloc_stats_t, be +// sure that the compiled results of jemalloc.c are in sync with this header +// file. +typedef struct { + // Run-time configuration settings. + bool opt_junk; // Fill allocated memory with kAllocJunk? + bool opt_zero; // Fill allocated memory with 0x0? + size_t narenas; // Number of arenas. + size_t quantum; // Allocation quantum. + size_t quantum_max; // Max quantum-spaced allocation size. + size_t quantum_wide; // Allocation quantum (QuantuWide). + size_t quantum_wide_max; // Max quantum-wide-spaced allocation size. + size_t subpage_max; // Max subpage allocation size. + size_t large_max; // Max sub-chunksize allocation size. + size_t chunksize; // Size of each virtual memory mapping. + size_t page_size; // Size of pages. + size_t dirty_max; // Max dirty pages per arena. + + // Current memory usage statistics. + size_t mapped; // Bytes mapped (not necessarily committed). + size_t allocated; // Bytes allocated (committed, in use by application). + size_t waste; // Bytes committed, not in use by the + // application, and not intentionally left + // unused (i.e., not dirty). + size_t page_cache; // Committed, unused pages kept around as a + // cache. (jemalloc calls these "dirty".) + size_t bookkeeping; // Committed bytes used internally by the + // allocator. + size_t bin_unused; // Bytes committed to a bin but currently unused. +} jemalloc_stats_t; + +typedef struct { + size_t size; // The size of objects in this bin, zero if this + // bin stats array entry is unused (no more bins). + size_t num_non_full_runs; // The number of non-full runs + size_t num_runs; // The number of runs in this bin + size_t bytes_unused; // The unallocated bytes across all these bins + size_t bytes_total; // The total storage area for runs in this bin, + size_t bytes_per_run; // The number of bytes per run, including headers. +} jemalloc_bin_stats_t; + +enum PtrInfoTag { + // The pointer is not currently known to the allocator. + // 'addr', 'size', and 'arenaId' are always 0. + TagUnknown, + + // The pointer is within a live allocation. + // 'addr', 'size', and 'arenaId' describe the allocation. + TagLiveAlloc, + + // The pointer is within a small freed allocation. + // 'addr', 'size', and 'arenaId' describe the allocation. + TagFreedAlloc, + + // The pointer is within a freed page. Details about the original + // allocation, including its size, are not available. + // 'addr', 'size', and 'arenaId' describe the page. + TagFreedPage, +}; + +// The information in jemalloc_ptr_info_t could be represented in a variety of +// ways. The chosen representation has the following properties. +// - The number of fields is minimized. +// - The 'tag' field unambiguously defines the meaning of the subsequent fields. +// Helper functions are used to group together related categories of tags. +typedef struct jemalloc_ptr_info_s { + enum PtrInfoTag tag; + void* addr; // meaning depends on tag; see above + size_t size; // meaning depends on tag; see above + +#ifdef MOZ_DEBUG + arena_id_t arenaId; // meaning depends on tag; see above +#endif + +#ifdef __cplusplus + jemalloc_ptr_info_s() = default; + jemalloc_ptr_info_s(enum PtrInfoTag aTag, void* aAddr, size_t aSize, + arena_id_t aArenaId) + : tag(aTag), + addr(aAddr), + size(aSize) +# ifdef MOZ_DEBUG + , + arenaId(aArenaId) +# endif + { + } +#endif +} jemalloc_ptr_info_t; + +static inline bool jemalloc_ptr_is_live(jemalloc_ptr_info_t* info) { + return info->tag == TagLiveAlloc; +} + +static inline bool jemalloc_ptr_is_freed(jemalloc_ptr_info_t* info) { + return info->tag == TagFreedAlloc || info->tag == TagFreedPage; +} + +static inline bool jemalloc_ptr_is_freed_page(jemalloc_ptr_info_t* info) { + return info->tag == TagFreedPage; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // _JEMALLOC_TYPES_H_ diff --git a/memory/build/mozmemory.h b/memory/build/mozmemory.h new file mode 100644 index 0000000000..bd009052a0 --- /dev/null +++ b/memory/build/mozmemory.h @@ -0,0 +1,81 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozmemory_h +#define mozmemory_h + +// This header is meant to be used when the following functions are +// necessary: +// - malloc_good_size (used to be called je_malloc_usable_in_advance) +// - jemalloc_stats +// - jemalloc_stats_num_bins +// - jemalloc_purge_freed_pages +// - jemalloc_free_dirty_pages +// - jemalloc_thread_local_arena +// - jemalloc_ptr_info + +#ifdef MALLOC_H +# include MALLOC_H +#endif +#include "mozmemory_wrap.h" +#include "mozilla/Attributes.h" +#include "mozilla/Types.h" +#include "mozjemalloc_types.h" +#include "stdbool.h" + +#ifdef MOZ_MEMORY +// On OSX, malloc/malloc.h contains the declaration for malloc_good_size, +// which will call back in jemalloc, through the zone allocator so just use it. +# ifndef XP_DARWIN +MOZ_MEMORY_API size_t malloc_good_size_impl(size_t size); + +// Note: the MOZ_GLUE_IN_PROGRAM ifdef below is there to avoid -Werror turning +// the protective if into errors. MOZ_GLUE_IN_PROGRAM is what triggers MFBT_API +// to use weak imports. +static inline size_t _malloc_good_size(size_t size) { +# if defined(MOZ_GLUE_IN_PROGRAM) && !defined(IMPL_MFBT) + if (!malloc_good_size) return size; +# endif + return malloc_good_size_impl(size); +} + +# define malloc_good_size _malloc_good_size +# endif + +# define MALLOC_DECL(name, return_type, ...) \ + MOZ_JEMALLOC_API return_type name(__VA_ARGS__); +# define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC +# include "malloc_decls.h" + +// jemalloc_stats may only be called on the main thread so that it can access +// main thread only arenas. +# ifdef __cplusplus +static inline void jemalloc_stats(jemalloc_stats_t* aStats, + jemalloc_bin_stats_t* aBinStats = nullptr) { + jemalloc_stats_internal(aStats, aBinStats); +} +# else +static inline void jemalloc_stats(jemalloc_stats_t* aStats) { + jemalloc_stats_internal(aStats, NULL); +} +# endif + +#endif // MOZ_MEMORY + +#define NOTHROW_MALLOC_DECL(name, return_type, ...) \ + MOZ_JEMALLOC_API return_type name(__VA_ARGS__) noexcept(true); +#define MALLOC_DECL(name, return_type, ...) \ + MOZ_JEMALLOC_API return_type name(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_ARENA +#include "malloc_decls.h" + +#ifdef __cplusplus +# define moz_create_arena() moz_create_arena_with_params(nullptr) +#else +# define moz_create_arena() moz_create_arena_with_params(NULL) +#endif + +#endif // mozmemory_h diff --git a/memory/build/mozmemory_utils.h b/memory/build/mozmemory_utils.h new file mode 100644 index 0000000000..af9e96b4c7 --- /dev/null +++ b/memory/build/mozmemory_utils.h @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozjemalloc_utils_h +#define mozjemalloc_utils_h + +#include <optional> +#include <type_traits> + +#if defined(MOZ_MEMORY) && defined(XP_WIN) +# include "mozmemory_wrap.h" +#endif + +namespace mozilla { + +namespace detail { +// Helper for StallAndRetry error messages. +template <typename T> +constexpr bool is_std_optional = false; +template <typename T> +constexpr bool is_std_optional<std::optional<T>> = true; +} // namespace detail + +struct StallSpecs { + // Maximum number of retry-attempts before giving up. + size_t maxAttempts; + // Delay time between successive events. + size_t delayMs; + + // Retry a fallible operation until it succeeds or until we've run out of + // retries. + // + // Note that this invokes `aDelayFunc` immediately upon being called! It's + // intended for use in the unhappy path, after an initial attempt has failed. + // + // The function type here may be read: + // ``` + // fn StallAndRetry<R>( + // delay_func: impl Fn(usize) -> (), + // operation: impl Fn() -> Option<R>, + // ) -> Option<R>; + // ``` + // + template <typename DelayFunc, typename OpFunc> + auto StallAndRetry(DelayFunc&& aDelayFunc, OpFunc&& aOperation) const + -> decltype(aOperation()) { + { + // Explicit typecheck for OpFunc, to provide an explicit error message. + using detail::is_std_optional; + static_assert(is_std_optional<decltype(aOperation())>, + "aOperation() must return std::optional"); + + // (clang's existing error messages suffice for aDelayFunc.) + } + + for (size_t i = 0; i < maxAttempts; ++i) { + aDelayFunc(delayMs); + if (const auto opt = aOperation()) { + return opt; + } + } + return std::nullopt; + } +}; + +#if defined(MOZ_MEMORY) && defined(XP_WIN) +MOZ_JEMALLOC_API StallSpecs GetAllocatorStallSpecs(); +#endif + +} // namespace mozilla + +#endif // mozjemalloc_utils_h diff --git a/memory/build/mozmemory_wrap.cpp b/memory/build/mozmemory_wrap.cpp new file mode 100644 index 0000000000..3420fd9693 --- /dev/null +++ b/memory/build/mozmemory_wrap.cpp @@ -0,0 +1,145 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <string.h> +#include "mozmemory_wrap.h" +#include "mozilla/Types.h" + +// Declare malloc implementation functions with the right return and +// argument types. +#define NOTHROW_MALLOC_DECL(name, return_type, ...) \ + MOZ_MEMORY_API return_type name##_impl(__VA_ARGS__) noexcept(true); +#define MALLOC_DECL(name, return_type, ...) \ + MOZ_MEMORY_API return_type name##_impl(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC +#include "malloc_decls.h" + +// strndup and strdup may be defined as macros in string.h, which would +// clash with the definitions below. +#undef strndup +#undef strdup + +MOZ_MEMORY_API char* strndup_impl(const char* src, size_t len) { + char* dst = (char*)malloc_impl(len + 1); + if (dst) { + strncpy(dst, src, len); + dst[len] = '\0'; + } + return dst; +} + +MOZ_MEMORY_API char* strdup_impl(const char* src) { + size_t len = strlen(src); + return strndup_impl(src, len); +} + +#ifdef ANDROID +# include <stdarg.h> +# include <stdio.h> + +MOZ_MEMORY_API int vasprintf_impl(char** str, const char* fmt, va_list ap) { + char *ptr, *_ptr; + int ret; + + if (str == NULL || fmt == NULL) { + return -1; + } + + ptr = (char*)malloc_impl(128); + if (ptr == NULL) { + *str = NULL; + return -1; + } + + ret = vsnprintf(ptr, 128, fmt, ap); + if (ret < 0) { + free_impl(ptr); + *str = NULL; + return -1; + } + + _ptr = reinterpret_cast<char*>(realloc_impl(ptr, ret + 1)); + if (_ptr == NULL) { + free_impl(ptr); + *str = NULL; + return -1; + } + + *str = _ptr; + + return ret; +} + +MOZ_MEMORY_API int asprintf_impl(char** str, const char* fmt, ...) { + int ret; + va_list ap; + va_start(ap, fmt); + + ret = vasprintf_impl(str, fmt, ap); + + va_end(ap); + + return ret; +} +#endif + +#ifdef XP_WIN +# include <wchar.h> + +// We also need to provide our own impl of wcsdup so that we don't ask +// the CRT for memory from its heap (which will then be unfreeable). +MOZ_MEMORY_API wchar_t* wcsdup_impl(const wchar_t* src) { + size_t len = wcslen(src); + wchar_t* dst = (wchar_t*)malloc_impl((len + 1) * sizeof(wchar_t)); + if (dst) wcsncpy(dst, src, len + 1); + return dst; +} + +MOZ_MEMORY_API void* _aligned_malloc_impl(size_t size, size_t alignment) { + return memalign_impl(alignment, size); +} + +# ifdef __MINGW32__ +MOZ_BEGIN_EXTERN_C +// As in mozjemalloc.cpp, we generate aliases for functions +// redirected in mozglue.def +void* _aligned_malloc(size_t size, size_t alignment) + __attribute__((alias(MOZ_STRINGIFY(_aligned_malloc_impl)))); +void _aligned_free(void* aPtr) __attribute__((alias(MOZ_STRINGIFY(free_impl)))); + +char* strndup(const char* src, size_t len) + __attribute__((alias(MOZ_STRINGIFY(strdup_impl)))); +char* strdup(const char* src) + __attribute__((alias(MOZ_STRINGIFY(strdup_impl)))); +char* _strdup(const char* src) + __attribute__((alias(MOZ_STRINGIFY(strdup_impl)))); +wchar_t* wcsdup(const wchar_t* src) + __attribute__((alias(MOZ_STRINGIFY(wcsdup_impl)))); +wchar_t* _wcsdup(const wchar_t* src) + __attribute__((alias(MOZ_STRINGIFY(wcsdup_impl)))); + +// jemalloc has _aligned_malloc, and friends. libc++.a contains +// references to __imp__aligned_malloc (and friends) because it +// is declared dllimport in the headers. +// +// The linker sees jemalloc's _aligned_malloc symbol in our objects, +// but then libc++.a comes along and needs __imp__aligned_malloc, which +// pulls in those parts of libucrt.a (or libmsvcrt.a in practice), +// which define both __imp__aligned_malloc and _aligned_malloc, and +// this causes a conflict. (And repeat for each of the symbols defined +// here.) +// +// The fix is to define not only an _aligned_malloc symbol (via an +// alias), but also define the __imp__aligned_malloc pointer to it. +// This prevents those parts of libucrt from being pulled in and causing +// conflicts. +// This is done with __MINGW_IMP_SYMBOL to handle x86/x64 differences. +void (*__MINGW_IMP_SYMBOL(_aligned_free))(void*) = _aligned_free; +void* (*__MINGW_IMP_SYMBOL(_aligned_malloc))(size_t, size_t) = _aligned_malloc; +char* (*__MINGW_IMP_SYMBOL(_strdup))(const char* src) = _strdup; +MOZ_END_EXTERN_C +# endif +#endif // XP_WIN diff --git a/memory/build/mozmemory_wrap.h b/memory/build/mozmemory_wrap.h new file mode 100644 index 0000000000..92d0a1176a --- /dev/null +++ b/memory/build/mozmemory_wrap.h @@ -0,0 +1,166 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozmemory_wrap_h +#define mozmemory_wrap_h + +// This header contains #defines which tweak the names of various memory +// allocation functions. +// +// There are several types of functions related to memory allocation +// that are meant to be used publicly by the Gecko codebase: +// +// - malloc implementation functions: +// - malloc +// - posix_memalign +// - aligned_alloc +// - calloc +// - realloc +// - free +// - memalign +// - valloc +// - malloc_usable_size +// - malloc_good_size +// Some of these functions are specific to some systems, but for +// convenience, they are treated as being cross-platform, and available +// as such. +// +// - duplication functions: +// - strndup +// - strdup +// - wcsdup (Windows only) +// +// - jemalloc specific functions: +// - jemalloc_stats +// - jemalloc_stats_num_bins +// - jemalloc_purge_freed_pages +// - jemalloc_free_dirty_pages +// - jemalloc_thread_local_arena +// - jemalloc_ptr_info +// (these functions are native to mozjemalloc) +// +// These functions are all exported as part of libmozglue (see +// $(topsrcdir)/mozglue/build/Makefile.in), with a few implementation +// peculiarities: +// +// - On Windows, the malloc implementation functions are all prefixed with +// "je_", the duplication functions are prefixed with "wrap_", and jemalloc +// specific functions are left unprefixed. All these functions are however +// aliased when exporting them, such that the resulting mozglue.dll exports +// them unprefixed (see $(topsrcdir)/mozglue/build/mozglue.def.in). The +// prefixed malloc implementation and duplication functions are not +// exported. +// +// - On MacOSX, the system libc has a zone allocator, which allows us to +// hook custom malloc implementation functions without exporting them. +// However, since we want things in Firefox to skip the system zone +// allocator, the malloc implementation functions are all exported +// unprefixed, as well as duplication functions. +// Jemalloc-specific functions are also left unprefixed. +// +// - On Android all functions are left unprefixed. +// +// - On other systems (mostly Linux), all functions are left unprefixed. +// +// On all platforms, C++ allocation functions are also exported. +// +// Proper exporting of the various functions is done with the MOZ_MEMORY_API +// and MOZ_JEMALLOC_API macros. MOZ_MEMORY_API is meant to be used for malloc +// implementation and duplication functions, while MOZ_JEMALLOC_API is +// dedicated to jemalloc specific functions. +// +// +// All these functions are meant to be called with no prefix from Gecko code. +// In most cases, this is because that's how they are available at runtime. +// However, on Android, this relies on faulty.lib (the custom dynamic linker) +// resolving mozglue symbols before libc symbols, which is guaranteed by the +// way faulty.lib works (it respects the DT_NEEDED order, and libc always +// appears after mozglue ; which we double check when building anyways) +// +// +// Within libmozglue (when MOZ_MEMORY_IMPL is defined), all the functions +// should be suffixed with "_impl" both for declarations and use. +// That is, the implementation declaration for e.g. strdup would look like: +// char* strdup_impl(const char *) +// That implementation would call malloc by using "malloc_impl". + +#if defined(MOZ_MEMORY_IMPL) && !defined(IMPL_MFBT) +# ifdef MFBT_API // mozilla/Types.h was already included +# error mozmemory_wrap.h has to be included before mozilla/Types.h when MOZ_MEMORY_IMPL is set and IMPL_MFBT is not. +# endif +# define IMPL_MFBT +#endif + +#include "mozilla/Types.h" + +#ifndef MOZ_EXTERN_C +# ifdef __cplusplus +# define MOZ_EXTERN_C extern "C" +# else +# define MOZ_EXTERN_C +# endif +#endif + +#ifdef MOZ_MEMORY_IMPL +# define MOZ_JEMALLOC_API MOZ_EXTERN_C MFBT_API +# if defined(XP_WIN) +# define mozmem_malloc_impl(a) je_##a +# else +# define MOZ_MEMORY_API MOZ_EXTERN_C MFBT_API +# endif +#endif +#ifdef XP_WIN +# define mozmem_dup_impl(a) wrap_##a +#endif + +#if !defined(MOZ_MEMORY_IMPL) +# define MOZ_MEMORY_API MOZ_EXTERN_C MFBT_API +# define MOZ_JEMALLOC_API MOZ_EXTERN_C MFBT_API +#endif + +#ifndef MOZ_MEMORY_API +# define MOZ_MEMORY_API MOZ_EXTERN_C +#endif +#ifndef MOZ_JEMALLOC_API +# define MOZ_JEMALLOC_API MOZ_EXTERN_C +#endif + +#ifndef mozmem_malloc_impl +# define mozmem_malloc_impl(a) a +#endif +#ifndef mozmem_dup_impl +# define mozmem_dup_impl(a) a +#endif + +// Malloc implementation functions +#define malloc_impl mozmem_malloc_impl(malloc) +#define posix_memalign_impl mozmem_malloc_impl(posix_memalign) +#define aligned_alloc_impl mozmem_malloc_impl(aligned_alloc) +#define calloc_impl mozmem_malloc_impl(calloc) +#define realloc_impl mozmem_malloc_impl(realloc) +#define free_impl mozmem_malloc_impl(free) +#define memalign_impl mozmem_malloc_impl(memalign) +#define valloc_impl mozmem_malloc_impl(valloc) +#define malloc_usable_size_impl mozmem_malloc_impl(malloc_usable_size) +#define malloc_good_size_impl mozmem_malloc_impl(malloc_good_size) + +// Duplication functions +#define strndup_impl mozmem_dup_impl(strndup) +#define strdup_impl mozmem_dup_impl(strdup) +#ifdef XP_WIN +# define wcsdup_impl mozmem_dup_impl(wcsdup) +# define _aligned_malloc_impl mozmem_dup_impl(_aligned_malloc) +#endif + +// String functions +#ifdef ANDROID +// Bug 801571 and Bug 879668, libstagefright uses vasprintf, causing malloc()/ +// free() to be mismatched between bionic and mozglue implementation. +# define vasprintf_impl mozmem_dup_impl(vasprintf) +# define asprintf_impl mozmem_dup_impl(asprintf) +#endif + +#endif // mozmemory_wrap_h diff --git a/memory/build/rb.h b/memory/build/rb.h new file mode 100644 index 0000000000..418d206911 --- /dev/null +++ b/memory/build/rb.h @@ -0,0 +1,741 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Portions of this file were originally under the following license: +// +// Copyright (C) 2008 Jason Evans <jasone@FreeBSD.org>. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice(s), this list of conditions and the following disclaimer +// unmodified other than the allowable addition of one or more +// copyright notices. +// 2. Redistributions in binary form must reproduce the above copyright +// notice(s), this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// **************************************************************************** +// +// C++ template implementation of left-leaning red-black trees. +// +// All operations are done non-recursively. Parent pointers are not used, and +// color bits are stored in the least significant bit of right-child pointers, +// thus making node linkage as compact as is possible for red-black trees. +// +// The RedBlackTree template expects two type arguments: the type of the nodes, +// containing a RedBlackTreeNode, and a trait providing two methods: +// - a GetTreeNode method that returns a reference to the RedBlackTreeNode +// corresponding to a given node with the following signature: +// static RedBlackTreeNode<T>& GetTreeNode(T*) +// - a Compare function with the following signature: +// static Order Compare(T* aNode, T* aOther) +// ^^^^^ +// or aKey +// +// Interpretation of comparision function return values: +// +// Order::eLess: aNode < aOther +// Order::eEqual: aNode == aOther +// Order::eGreater: aNode > aOther +// +// In all cases, the aNode or aKey argument is the first argument to the +// comparison function, which makes it possible to write comparison functions +// that treat the first argument specially. +// +// *************************************************************************** + +#ifndef RB_H_ +#define RB_H_ + +#include "mozilla/Alignment.h" +#include "mozilla/Assertions.h" +#include "Utils.h" + +enum NodeColor { + Black = 0, + Red = 1, +}; + +// Node structure. +template <typename T> +class RedBlackTreeNode { + T* mLeft; + // The lowest bit is the color + T* mRightAndColor; + + public: + T* Left() { return mLeft; } + + void SetLeft(T* aValue) { mLeft = aValue; } + + T* Right() { + return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(mRightAndColor) & + uintptr_t(~1)); + } + + void SetRight(T* aValue) { + mRightAndColor = reinterpret_cast<T*>( + (reinterpret_cast<uintptr_t>(aValue) & uintptr_t(~1)) | Color()); + } + + NodeColor Color() { + return static_cast<NodeColor>(reinterpret_cast<uintptr_t>(mRightAndColor) & + 1); + } + + bool IsBlack() { return Color() == NodeColor::Black; } + + bool IsRed() { return Color() == NodeColor::Red; } + + void SetColor(NodeColor aColor) { + mRightAndColor = reinterpret_cast<T*>( + (reinterpret_cast<uintptr_t>(mRightAndColor) & uintptr_t(~1)) | aColor); + } +}; + +// Tree structure. +template <typename T, typename Trait> +class RedBlackTree { + public: + void Init() { mRoot = nullptr; } + + T* First(T* aStart = nullptr) { return First(TreeNode(aStart)).Get(); } + + T* Last(T* aStart = nullptr) { return Last(TreeNode(aStart)).Get(); } + + T* Next(T* aNode) { return Next(TreeNode(aNode)).Get(); } + + T* Prev(T* aNode) { return Prev(TreeNode(aNode)).Get(); } + + T* Search(T* aKey) { return Search(TreeNode(aKey)).Get(); } + + // Find a match if it exists. Otherwise, find the next greater node, if one + // exists. + T* SearchOrNext(T* aKey) { return SearchOrNext(TreeNode(aKey)).Get(); } + + void Insert(T* aNode) { Insert(TreeNode(aNode)); } + + void Remove(T* aNode) { Remove(TreeNode(aNode)); } + + // Helper class to avoid having all the tree traversal code further below + // have to use Trait::GetTreeNode and do manual null pointer checks, adding + // visual noise. Practically speaking TreeNode(nullptr) acts as a virtual + // sentinel, that loops back to itself for Left() and Right() and is always + // black. + class TreeNode { + public: + constexpr TreeNode() : mNode(nullptr) {} + + MOZ_IMPLICIT TreeNode(T* aNode) : mNode(aNode) {} + + TreeNode& operator=(TreeNode aOther) { + mNode = aOther.mNode; + return *this; + } + + TreeNode Left() { + return TreeNode(mNode ? Trait::GetTreeNode(mNode).Left() : nullptr); + } + + void SetLeft(TreeNode aNode) { + MOZ_RELEASE_ASSERT(mNode); + Trait::GetTreeNode(mNode).SetLeft(aNode.mNode); + } + + TreeNode Right() { + return TreeNode(mNode ? Trait::GetTreeNode(mNode).Right() : nullptr); + } + + void SetRight(TreeNode aNode) { + MOZ_RELEASE_ASSERT(mNode); + Trait::GetTreeNode(mNode).SetRight(aNode.mNode); + } + + NodeColor Color() { + return mNode ? Trait::GetTreeNode(mNode).Color() : NodeColor::Black; + } + + bool IsRed() { return Color() == NodeColor::Red; } + + bool IsBlack() { return Color() == NodeColor::Black; } + + void SetColor(NodeColor aColor) { + MOZ_RELEASE_ASSERT(mNode); + Trait::GetTreeNode(mNode).SetColor(aColor); + } + + T* Get() { return mNode; } + + MOZ_IMPLICIT operator bool() { return !!mNode; } + + bool operator==(TreeNode& aOther) { return mNode == aOther.mNode; } + + private: + T* mNode; + }; + + private: + // Ideally we'd use a TreeNode for mRoot, but we need RedBlackTree to stay + // a POD type to avoid a static initializer for gArenas. + T* mRoot; + + TreeNode First(TreeNode aStart) { + TreeNode ret; + for (ret = aStart ? aStart : mRoot; ret.Left(); ret = ret.Left()) { + } + return ret; + } + + TreeNode Last(TreeNode aStart) { + TreeNode ret; + for (ret = aStart ? aStart : mRoot; ret.Right(); ret = ret.Right()) { + } + return ret; + } + + TreeNode Next(TreeNode aNode) { + TreeNode ret; + if (aNode.Right()) { + ret = First(aNode.Right()); + } else { + TreeNode rbp_n_t = mRoot; + MOZ_ASSERT(rbp_n_t); + ret = nullptr; + while (true) { + Order rbp_n_cmp = Trait::Compare(aNode.Get(), rbp_n_t.Get()); + if (rbp_n_cmp == Order::eLess) { + ret = rbp_n_t; + rbp_n_t = rbp_n_t.Left(); + } else if (rbp_n_cmp == Order::eGreater) { + rbp_n_t = rbp_n_t.Right(); + } else { + break; + } + MOZ_ASSERT(rbp_n_t); + } + } + return ret; + } + + TreeNode Prev(TreeNode aNode) { + TreeNode ret; + if (aNode.Left()) { + ret = Last(aNode.Left()); + } else { + TreeNode rbp_p_t = mRoot; + MOZ_ASSERT(rbp_p_t); + ret = nullptr; + while (true) { + Order rbp_p_cmp = Trait::Compare(aNode.Get(), rbp_p_t.Get()); + if (rbp_p_cmp == Order::eLess) { + rbp_p_t = rbp_p_t.Left(); + } else if (rbp_p_cmp == Order::eGreater) { + ret = rbp_p_t; + rbp_p_t = rbp_p_t.Right(); + } else { + break; + } + MOZ_ASSERT(rbp_p_t); + } + } + return ret; + } + + TreeNode Search(TreeNode aKey) { + TreeNode ret = mRoot; + Order rbp_se_cmp; + while (ret && (rbp_se_cmp = Trait::Compare(aKey.Get(), ret.Get())) != + Order::eEqual) { + if (rbp_se_cmp == Order::eLess) { + ret = ret.Left(); + } else { + ret = ret.Right(); + } + } + return ret; + } + + TreeNode SearchOrNext(TreeNode aKey) { + TreeNode ret = nullptr; + TreeNode rbp_ns_t = mRoot; + while (rbp_ns_t) { + Order rbp_ns_cmp = Trait::Compare(aKey.Get(), rbp_ns_t.Get()); + if (rbp_ns_cmp == Order::eLess) { + ret = rbp_ns_t; + rbp_ns_t = rbp_ns_t.Left(); + } else if (rbp_ns_cmp == Order::eGreater) { + rbp_ns_t = rbp_ns_t.Right(); + } else { + ret = rbp_ns_t; + break; + } + } + return ret; + } + + void Insert(TreeNode aNode) { + // rbp_i_s is only used as a placeholder for its RedBlackTreeNode. Use + // AlignedStorage2 to avoid running the TreeNode base class constructor. + mozilla::AlignedStorage2<T> rbp_i_s; + TreeNode rbp_i_g, rbp_i_p, rbp_i_c, rbp_i_t, rbp_i_u; + Order rbp_i_cmp = Order::eEqual; + rbp_i_g = nullptr; + rbp_i_p = rbp_i_s.addr(); + rbp_i_p.SetLeft(mRoot); + rbp_i_p.SetRight(nullptr); + rbp_i_p.SetColor(NodeColor::Black); + rbp_i_c = mRoot; + // Iteratively search down the tree for the insertion point, + // splitting 4-nodes as they are encountered. At the end of each + // iteration, rbp_i_g->rbp_i_p->rbp_i_c is a 3-level path down + // the tree, assuming a sufficiently deep tree. + while (rbp_i_c) { + rbp_i_t = rbp_i_c.Left(); + rbp_i_u = rbp_i_t.Left(); + if (rbp_i_t.IsRed() && rbp_i_u.IsRed()) { + // rbp_i_c is the top of a logical 4-node, so split it. + // This iteration does not move down the tree, due to the + // disruptiveness of node splitting. + // + // Rotate right. + rbp_i_t = RotateRight(rbp_i_c); + // Pass red links up one level. + rbp_i_u = rbp_i_t.Left(); + rbp_i_u.SetColor(NodeColor::Black); + if (rbp_i_p.Left() == rbp_i_c) { + rbp_i_p.SetLeft(rbp_i_t); + rbp_i_c = rbp_i_t; + } else { + // rbp_i_c was the right child of rbp_i_p, so rotate + // left in order to maintain the left-leaning invariant. + MOZ_ASSERT(rbp_i_p.Right() == rbp_i_c); + rbp_i_p.SetRight(rbp_i_t); + rbp_i_u = LeanLeft(rbp_i_p); + if (rbp_i_g.Left() == rbp_i_p) { + rbp_i_g.SetLeft(rbp_i_u); + } else { + MOZ_ASSERT(rbp_i_g.Right() == rbp_i_p); + rbp_i_g.SetRight(rbp_i_u); + } + rbp_i_p = rbp_i_u; + rbp_i_cmp = Trait::Compare(aNode.Get(), rbp_i_p.Get()); + if (rbp_i_cmp == Order::eLess) { + rbp_i_c = rbp_i_p.Left(); + } else { + MOZ_ASSERT(rbp_i_cmp == Order::eGreater); + rbp_i_c = rbp_i_p.Right(); + } + continue; + } + } + rbp_i_g = rbp_i_p; + rbp_i_p = rbp_i_c; + rbp_i_cmp = Trait::Compare(aNode.Get(), rbp_i_c.Get()); + if (rbp_i_cmp == Order::eLess) { + rbp_i_c = rbp_i_c.Left(); + } else { + MOZ_ASSERT(rbp_i_cmp == Order::eGreater); + rbp_i_c = rbp_i_c.Right(); + } + } + // rbp_i_p now refers to the node under which to insert. + aNode.SetLeft(nullptr); + aNode.SetRight(nullptr); + aNode.SetColor(NodeColor::Red); + if (rbp_i_cmp == Order::eGreater) { + rbp_i_p.SetRight(aNode); + rbp_i_t = LeanLeft(rbp_i_p); + if (rbp_i_g.Left() == rbp_i_p) { + rbp_i_g.SetLeft(rbp_i_t); + } else if (rbp_i_g.Right() == rbp_i_p) { + rbp_i_g.SetRight(rbp_i_t); + } + } else { + rbp_i_p.SetLeft(aNode); + } + // Update the root and make sure that it is black. + TreeNode root = TreeNode(rbp_i_s.addr()).Left(); + root.SetColor(NodeColor::Black); + mRoot = root.Get(); + } + + void Remove(TreeNode aNode) { + // rbp_r_s is only used as a placeholder for its RedBlackTreeNode. Use + // AlignedStorage2 to avoid running the TreeNode base class constructor. + mozilla::AlignedStorage2<T> rbp_r_s; + TreeNode rbp_r_p, rbp_r_c, rbp_r_xp, rbp_r_t, rbp_r_u; + Order rbp_r_cmp; + rbp_r_p = TreeNode(rbp_r_s.addr()); + rbp_r_p.SetLeft(mRoot); + rbp_r_p.SetRight(nullptr); + rbp_r_p.SetColor(NodeColor::Black); + rbp_r_c = mRoot; + rbp_r_xp = nullptr; + // Iterate down the tree, but always transform 2-nodes to 3- or + // 4-nodes in order to maintain the invariant that the current + // node is not a 2-node. This allows simple deletion once a leaf + // is reached. Handle the root specially though, since there may + // be no way to convert it from a 2-node to a 3-node. + rbp_r_cmp = Trait::Compare(aNode.Get(), rbp_r_c.Get()); + if (rbp_r_cmp == Order::eLess) { + rbp_r_t = rbp_r_c.Left(); + rbp_r_u = rbp_r_t.Left(); + if (rbp_r_t.IsBlack() && rbp_r_u.IsBlack()) { + // Apply standard transform to prepare for left move. + rbp_r_t = MoveRedLeft(rbp_r_c); + rbp_r_t.SetColor(NodeColor::Black); + rbp_r_p.SetLeft(rbp_r_t); + rbp_r_c = rbp_r_t; + } else { + // Move left. + rbp_r_p = rbp_r_c; + rbp_r_c = rbp_r_c.Left(); + } + } else { + if (rbp_r_cmp == Order::eEqual) { + MOZ_ASSERT(aNode == rbp_r_c); + if (!rbp_r_c.Right()) { + // Delete root node (which is also a leaf node). + if (rbp_r_c.Left()) { + rbp_r_t = LeanRight(rbp_r_c); + rbp_r_t.SetRight(nullptr); + } else { + rbp_r_t = nullptr; + } + rbp_r_p.SetLeft(rbp_r_t); + } else { + // This is the node we want to delete, but we will + // instead swap it with its successor and delete the + // successor. Record enough information to do the + // swap later. rbp_r_xp is the aNode's parent. + rbp_r_xp = rbp_r_p; + rbp_r_cmp = Order::eGreater; // Note that deletion is incomplete. + } + } + if (rbp_r_cmp == Order::eGreater) { + if (rbp_r_c.Right().Left().IsBlack()) { + rbp_r_t = rbp_r_c.Left(); + if (rbp_r_t.IsRed()) { + // Standard transform. + rbp_r_t = MoveRedRight(rbp_r_c); + } else { + // Root-specific transform. + rbp_r_c.SetColor(NodeColor::Red); + rbp_r_u = rbp_r_t.Left(); + if (rbp_r_u.IsRed()) { + rbp_r_u.SetColor(NodeColor::Black); + rbp_r_t = RotateRight(rbp_r_c); + rbp_r_u = RotateLeft(rbp_r_c); + rbp_r_t.SetRight(rbp_r_u); + } else { + rbp_r_t.SetColor(NodeColor::Red); + rbp_r_t = RotateLeft(rbp_r_c); + } + } + rbp_r_p.SetLeft(rbp_r_t); + rbp_r_c = rbp_r_t; + } else { + // Move right. + rbp_r_p = rbp_r_c; + rbp_r_c = rbp_r_c.Right(); + } + } + } + if (rbp_r_cmp != Order::eEqual) { + while (true) { + MOZ_ASSERT(rbp_r_p); + rbp_r_cmp = Trait::Compare(aNode.Get(), rbp_r_c.Get()); + if (rbp_r_cmp == Order::eLess) { + rbp_r_t = rbp_r_c.Left(); + if (!rbp_r_t) { + // rbp_r_c now refers to the successor node to + // relocate, and rbp_r_xp/aNode refer to the + // context for the relocation. + if (rbp_r_xp.Left() == aNode) { + rbp_r_xp.SetLeft(rbp_r_c); + } else { + MOZ_ASSERT(rbp_r_xp.Right() == (aNode)); + rbp_r_xp.SetRight(rbp_r_c); + } + rbp_r_c.SetLeft(aNode.Left()); + rbp_r_c.SetRight(aNode.Right()); + rbp_r_c.SetColor(aNode.Color()); + if (rbp_r_p.Left() == rbp_r_c) { + rbp_r_p.SetLeft(nullptr); + } else { + MOZ_ASSERT(rbp_r_p.Right() == rbp_r_c); + rbp_r_p.SetRight(nullptr); + } + break; + } + rbp_r_u = rbp_r_t.Left(); + if (rbp_r_t.IsBlack() && rbp_r_u.IsBlack()) { + rbp_r_t = MoveRedLeft(rbp_r_c); + if (rbp_r_p.Left() == rbp_r_c) { + rbp_r_p.SetLeft(rbp_r_t); + } else { + rbp_r_p.SetRight(rbp_r_t); + } + rbp_r_c = rbp_r_t; + } else { + rbp_r_p = rbp_r_c; + rbp_r_c = rbp_r_c.Left(); + } + } else { + // Check whether to delete this node (it has to be + // the correct node and a leaf node). + if (rbp_r_cmp == Order::eEqual) { + MOZ_ASSERT(aNode == rbp_r_c); + if (!rbp_r_c.Right()) { + // Delete leaf node. + if (rbp_r_c.Left()) { + rbp_r_t = LeanRight(rbp_r_c); + rbp_r_t.SetRight(nullptr); + } else { + rbp_r_t = nullptr; + } + if (rbp_r_p.Left() == rbp_r_c) { + rbp_r_p.SetLeft(rbp_r_t); + } else { + rbp_r_p.SetRight(rbp_r_t); + } + break; + } + // This is the node we want to delete, but we + // will instead swap it with its successor + // and delete the successor. Record enough + // information to do the swap later. + // rbp_r_xp is aNode's parent. + rbp_r_xp = rbp_r_p; + } + rbp_r_t = rbp_r_c.Right(); + rbp_r_u = rbp_r_t.Left(); + if (rbp_r_u.IsBlack()) { + rbp_r_t = MoveRedRight(rbp_r_c); + if (rbp_r_p.Left() == rbp_r_c) { + rbp_r_p.SetLeft(rbp_r_t); + } else { + rbp_r_p.SetRight(rbp_r_t); + } + rbp_r_c = rbp_r_t; + } else { + rbp_r_p = rbp_r_c; + rbp_r_c = rbp_r_c.Right(); + } + } + } + } + // Update root. + mRoot = TreeNode(rbp_r_s.addr()).Left().Get(); + aNode.SetLeft(nullptr); + aNode.SetRight(nullptr); + aNode.SetColor(NodeColor::Black); + } + + TreeNode RotateLeft(TreeNode aNode) { + TreeNode node = aNode.Right(); + aNode.SetRight(node.Left()); + node.SetLeft(aNode); + return node; + } + + TreeNode RotateRight(TreeNode aNode) { + TreeNode node = aNode.Left(); + aNode.SetLeft(node.Right()); + node.SetRight(aNode); + return node; + } + + TreeNode LeanLeft(TreeNode aNode) { + TreeNode node = RotateLeft(aNode); + NodeColor color = aNode.Color(); + node.SetColor(color); + aNode.SetColor(NodeColor::Red); + return node; + } + + TreeNode LeanRight(TreeNode aNode) { + TreeNode node = RotateRight(aNode); + NodeColor color = aNode.Color(); + node.SetColor(color); + aNode.SetColor(NodeColor::Red); + return node; + } + + TreeNode MoveRedLeft(TreeNode aNode) { + TreeNode node; + TreeNode rbp_mrl_t, rbp_mrl_u; + rbp_mrl_t = aNode.Left(); + rbp_mrl_t.SetColor(NodeColor::Red); + rbp_mrl_t = aNode.Right(); + rbp_mrl_u = rbp_mrl_t.Left(); + if (rbp_mrl_u.IsRed()) { + rbp_mrl_u = RotateRight(rbp_mrl_t); + aNode.SetRight(rbp_mrl_u); + node = RotateLeft(aNode); + rbp_mrl_t = aNode.Right(); + if (rbp_mrl_t.IsRed()) { + rbp_mrl_t.SetColor(NodeColor::Black); + aNode.SetColor(NodeColor::Red); + rbp_mrl_t = RotateLeft(aNode); + node.SetLeft(rbp_mrl_t); + } else { + aNode.SetColor(NodeColor::Black); + } + } else { + aNode.SetColor(NodeColor::Red); + node = RotateLeft(aNode); + } + return node; + } + + TreeNode MoveRedRight(TreeNode aNode) { + TreeNode node; + TreeNode rbp_mrr_t; + rbp_mrr_t = aNode.Left(); + if (rbp_mrr_t.IsRed()) { + TreeNode rbp_mrr_u, rbp_mrr_v; + rbp_mrr_u = rbp_mrr_t.Right(); + rbp_mrr_v = rbp_mrr_u.Left(); + if (rbp_mrr_v.IsRed()) { + rbp_mrr_u.SetColor(aNode.Color()); + rbp_mrr_v.SetColor(NodeColor::Black); + rbp_mrr_u = RotateLeft(rbp_mrr_t); + aNode.SetLeft(rbp_mrr_u); + node = RotateRight(aNode); + rbp_mrr_t = RotateLeft(aNode); + node.SetRight(rbp_mrr_t); + } else { + rbp_mrr_t.SetColor(aNode.Color()); + rbp_mrr_u.SetColor(NodeColor::Red); + node = RotateRight(aNode); + rbp_mrr_t = RotateLeft(aNode); + node.SetRight(rbp_mrr_t); + } + aNode.SetColor(NodeColor::Red); + } else { + rbp_mrr_t.SetColor(NodeColor::Red); + rbp_mrr_t = rbp_mrr_t.Left(); + if (rbp_mrr_t.IsRed()) { + rbp_mrr_t.SetColor(NodeColor::Black); + node = RotateRight(aNode); + rbp_mrr_t = RotateLeft(aNode); + node.SetRight(rbp_mrr_t); + } else { + node = RotateLeft(aNode); + } + } + return node; + } + + // The iterator simulates recursion via an array of pointers that store the + // current path. This is critical to performance, since a series of calls to + // rb_{next,prev}() would require time proportional to (n lg n), whereas this + // implementation only requires time proportional to (n). + // + // Since the iterator caches a path down the tree, any tree modification may + // cause the cached path to become invalid. Don't modify the tree during an + // iteration. + + // Size the path arrays such that they are always large enough, even if a + // tree consumes all of memory. Since each node must contain a minimum of + // two pointers, there can never be more nodes than: + // + // 1 << ((sizeof(void*)<<3) - (log2(sizeof(void*))+1)) + // + // Since the depth of a tree is limited to 3*lg(#nodes), the maximum depth + // is: + // + // (3 * ((sizeof(void*)<<3) - (log2(sizeof(void*))+1))) + // + // This works out to a maximum depth of 87 and 180 for 32- and 64-bit + // systems, respectively (approximately 348 and 1440 bytes, respectively). + public: + class Iterator { + TreeNode mPath[3 * ((sizeof(void*) << 3) - (LOG2(sizeof(void*)) + 1))]; + unsigned mDepth; + + public: + explicit Iterator(RedBlackTree<T, Trait>* aTree) : mDepth(0) { + // Initialize the path to contain the left spine. + if (aTree->mRoot) { + TreeNode node; + mPath[mDepth++] = aTree->mRoot; + while ((node = mPath[mDepth - 1].Left())) { + mPath[mDepth++] = node; + } + } + } + + template <typename Iterator> + class Item { + Iterator* mIterator; + T* mItem; + + public: + Item(Iterator* aIterator, T* aItem) + : mIterator(aIterator), mItem(aItem) {} + + bool operator!=(const Item& aOther) const { + return (mIterator != aOther.mIterator) || (mItem != aOther.mItem); + } + + T* operator*() const { return mItem; } + + const Item& operator++() { + mItem = mIterator->Next(); + return *this; + } + }; + + Item<Iterator> begin() { + return Item<Iterator>(this, + mDepth > 0 ? mPath[mDepth - 1].Get() : nullptr); + } + + Item<Iterator> end() { return Item<Iterator>(this, nullptr); } + + T* Next() { + TreeNode node; + if ((node = mPath[mDepth - 1].Right())) { + // The successor is the left-most node in the right subtree. + mPath[mDepth++] = node; + while ((node = mPath[mDepth - 1].Left())) { + mPath[mDepth++] = node; + } + } else { + // The successor is above the current node. Unwind until a + // left-leaning edge is removed from the path, of the path is empty. + for (mDepth--; mDepth > 0; mDepth--) { + if (mPath[mDepth - 1].Left() == mPath[mDepth]) { + break; + } + } + } + return mDepth > 0 ? mPath[mDepth - 1].Get() : nullptr; + } + }; + + Iterator iter() { return Iterator(this); } +}; + +#endif // RB_H_ diff --git a/memory/build/replace_malloc.h b/memory/build/replace_malloc.h new file mode 100644 index 0000000000..9ea3493f99 --- /dev/null +++ b/memory/build/replace_malloc.h @@ -0,0 +1,113 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef replace_malloc_h +#define replace_malloc_h + +// The replace_malloc facility allows an external library to replace or +// supplement the jemalloc implementation. +// +// The external library may be hooked by setting one of the following +// environment variables to the library path: +// - LD_PRELOAD on Linux, +// - DYLD_INSERT_LIBRARIES on OSX, +// - MOZ_REPLACE_MALLOC_LIB on Windows and Android. +// +// An initialization function is called before any malloc replacement +// function, and has the following declaration: +// +// void replace_init(malloc_table_t*, ReplaceMallocBridge**) +// +// The malloc_table_t pointer given to that function is a table containing +// pointers to the original allocator implementation, so that replacement +// functions can call them back if they need to. The initialization function +// needs to alter that table to replace the function it wants to replace. +// If it needs the original implementation, it thus needs a copy of the +// original table. +// +// The ReplaceMallocBridge* pointer is an outparam that allows the +// replace_init function to return a pointer to its ReplaceMallocBridge +// (see replace_malloc_bridge.h). +// +// The functions to be implemented in the external library are of the form: +// +// void* replace_malloc(size_t size) +// { +// // Fiddle with the size if necessary. +// // orig->malloc doesn't have to be called if the external library +// // provides its own allocator, but in this case it will have to +// // implement all functions. +// void *ptr = orig->malloc(size); +// // Do whatever you want with the ptr. +// return ptr; +// } +// +// where "orig" is a pointer to a copy of the table replace_init got. +// +// See malloc_decls.h for a list of functions that can be replaced this +// way. The implementations are all in the form: +// return_type replace_name(arguments [,...]) +// +// They don't all need to be provided. +// +// Building a replace-malloc library is like rocket science. It can end up +// with things blowing up, especially when trying to use complex types, and +// even more especially when these types come from XPCOM or other parts of the +// Mozilla codebase. +// It is recommended to add the following to a replace-malloc implementation's +// moz.build: +// DISABLE_STL_WRAPPING = True # Avoid STL wrapping +// +// If your replace-malloc implementation lives under memory/replace, these +// are taken care of by memory/replace/defs.mk. + +#ifdef replace_malloc_bridge_h +# error Do not include replace_malloc_bridge.h before replace_malloc.h. \ + In fact, you only need the latter. +#endif + +#define REPLACE_MALLOC_IMPL + +#include "replace_malloc_bridge.h" + +// Implementing a replace-malloc library is incompatible with using mozalloc. +#define MOZ_NO_MOZALLOC 1 + +#include "mozilla/MacroArgs.h" +#include "mozilla/Types.h" + +MOZ_BEGIN_EXTERN_C + +// MOZ_REPLACE_WEAK is only defined in mozjemalloc.cpp. Normally including +// this header will add function definitions. +#ifndef MOZ_REPLACE_WEAK +# define MOZ_REPLACE_WEAK +#endif + +// When building a replace-malloc library for static linking, we want +// each to have a different name for their "public" functions. +// The build system defines MOZ_REPLACE_MALLOC_PREFIX in that case. +#ifdef MOZ_REPLACE_MALLOC_PREFIX +# define replace_init MOZ_CONCAT(MOZ_REPLACE_MALLOC_PREFIX, _init) +# define MOZ_REPLACE_PUBLIC +#else +# define MOZ_REPLACE_PUBLIC MOZ_EXPORT +#endif + +struct ReplaceMallocBridge; +typedef void (*jemalloc_init_func)(malloc_table_t*, + struct ReplaceMallocBridge**); + +// Replace-malloc library initialization function. See top of this file +MOZ_REPLACE_PUBLIC void replace_init( + malloc_table_t*, struct ReplaceMallocBridge**) MOZ_REPLACE_WEAK; + +// ensure this is visible and libxul/etc reference it with a weak ref +MFBT_API void jemalloc_replace_dynamic(jemalloc_init_func); + +MOZ_END_EXTERN_C + +#endif // replace_malloc_h diff --git a/memory/build/replace_malloc_bridge.h b/memory/build/replace_malloc_bridge.h new file mode 100644 index 0000000000..6a0604d896 --- /dev/null +++ b/memory/build/replace_malloc_bridge.h @@ -0,0 +1,212 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef replace_malloc_bridge_h +#define replace_malloc_bridge_h + +// The replace-malloc bridge allows bidirectional method calls between +// a program and the replace-malloc library that has been loaded for it. +// In Firefox, this is used to allow method calls between code in libxul +// and code in the replace-malloc library, without libxul needing to link +// against that library or vice-versa. +// +// Subsystems can add methods for their own need. Replace-malloc libraries +// can decide to implement those methods or not. +// +// Replace-malloc libraries can provide such a bridge by implementing +// a ReplaceMallocBridge-derived class, and a get_bridge function +// returning an instance of that class. The default methods in +// ReplaceMallocBridge are expected to return values that callers would +// understand as "the bridge doesn't implement this method", so that a +// replace-malloc library doesn't have to implement all methods. +// +// The ReplaceMallocBridge class contains definitions for methods for +// all replace-malloc libraries. Each library picks the methods it wants +// to reply to in its ReplaceMallocBridge-derived class instance. +// All methods of ReplaceMallocBridge must be virtual. Similarly, +// anything passed as an argument to those methods must be plain data, or +// an instance of a class with only virtual methods. +// +// Binary compatibility is expected to be maintained, such that a newer +// Firefox can be used with an old replace-malloc library, or an old +// Firefox can be used with a newer replace-malloc library. As such, only +// new virtual methods should be added to ReplaceMallocBridge, and +// each change should have a corresponding bump of the mVersion value. +// At the same time, each virtual method should have a corresponding +// wrapper calling the virtual method on the instance from +// ReplaceMallocBridge::Get(), giving it the version the virtual method +// was added. +// +// Parts that are not relevant to the replace-malloc library end of the +// bridge are hidden when REPLACE_MALLOC_IMPL is not defined, which is +// the case when including replace_malloc.h. + +struct ReplaceMallocBridge; + +#include "mozilla/Types.h" + +MOZ_BEGIN_EXTERN_C + +#ifndef REPLACE_MALLOC_IMPL +// Returns the replace-malloc bridge if there is one to be returned. +MFBT_API ReplaceMallocBridge* get_bridge(); +#endif + +// Table of malloc functions. +// e.g. void* (*malloc)(size_t), etc. + +#define MALLOC_DECL(name, return_type, ...) \ + typedef return_type(name##_impl_t)(__VA_ARGS__); + +#include "malloc_decls.h" + +#define MALLOC_DECL(name, return_type, ...) name##_impl_t* name; + +typedef struct { +#include "malloc_decls.h" +} malloc_table_t; + +MOZ_END_EXTERN_C + +#ifdef __cplusplus + +// Table of malloc hook functions. +// Those functions are called with the arguments and results of malloc +// functions after they are called. +// e.g. void* (*malloc_hook)(void*, size_t), etc. +// They can either return the result they're given, or alter it before +// returning it. +// The hooks corresponding to functions, like free(void*), that return no +// value, don't take an extra argument. +// The table must at least contain a pointer for malloc_hook and free_hook +// functions. They will be used as fallback if no pointer is given for +// other allocation functions, like calloc_hook. +namespace mozilla { +namespace detail { +template <typename R, typename... Args> +struct AllocHookType { + using Type = R (*)(R, Args...); +}; + +template <typename... Args> +struct AllocHookType<void, Args...> { + using Type = void (*)(Args...); +}; + +} // namespace detail +} // namespace mozilla + +# define MALLOC_DECL(name, return_type, ...) \ + typename mozilla::detail::AllocHookType<return_type, ##__VA_ARGS__>::Type \ + name##_hook; + +typedef struct { +# include "malloc_decls.h" + // Like free_hook, but called before realloc_hook. free_hook is called + // instead of not given. + void (*realloc_hook_before)(void* aPtr); +} malloc_hook_table_t; + +namespace mozilla { +namespace dmd { +struct DMDFuncs; +} // namespace dmd + +namespace phc { + +class AddrInfo; + +} // namespace phc + +// Callbacks to register debug file handles for Poison IO interpose. +// See Mozilla(|Un)RegisterDebugHandle in xpcom/build/PoisonIOInterposer.h +struct DebugFdRegistry { + virtual void RegisterHandle(intptr_t aFd); + + virtual void UnRegisterHandle(intptr_t aFd); +}; +} // namespace mozilla + +struct ReplaceMallocBridge { + ReplaceMallocBridge() : mVersion(6) {} + + // This method was added in version 1 of the bridge. + virtual mozilla::dmd::DMDFuncs* GetDMDFuncs() { return nullptr; } + + // Send a DebugFdRegistry instance to the replace-malloc library so that + // it can register/unregister file descriptors whenever needed. The + // instance is valid until the process dies. + // This method was added in version 2 of the bridge. + virtual void InitDebugFd(mozilla::DebugFdRegistry&) {} + + // Register a list of malloc functions and hook functions to the + // replace-malloc library so that it can choose to dispatch to them + // when needed. The details of what is dispatched when is left to the + // replace-malloc library. + // Passing a nullptr for either table will unregister a previously + // registered table under the same name. + // Returns nullptr if registration failed. + // If registration succeeded, a table of "pure" malloc functions is + // returned. Those "pure" malloc functions won't call hooks. + // /!\ Do not rely on registration/unregistration to be instantaneous. + // Functions from a previously registered table may still be called for + // a brief time after RegisterHook returns. + // This method was added in version 3 of the bridge. + virtual const malloc_table_t* RegisterHook( + const char* aName, const malloc_table_t* aTable, + const malloc_hook_table_t* aHookTable) { + return nullptr; + } + +# ifndef REPLACE_MALLOC_IMPL + // Returns the replace-malloc bridge if its version is at least the + // requested one. + static ReplaceMallocBridge* Get(int aMinimumVersion) { + static ReplaceMallocBridge* sSingleton = get_bridge(); + return (sSingleton && sSingleton->mVersion >= aMinimumVersion) ? sSingleton + : nullptr; + } +# endif + + protected: + const int mVersion; +}; + +# ifndef REPLACE_MALLOC_IMPL +// Class containing wrappers for calls to ReplaceMallocBridge methods. +// Those wrappers need to be static methods in a class because compilers +// complain about unused static global functions, and linkers complain +// about multiple definitions of non-static global functions. +// Using a separate class from ReplaceMallocBridge allows the function +// names to be identical. +struct ReplaceMalloc { + // Don't call this method from performance critical code. Use + // mozilla::dmd::DMDFuncs::Get() instead, it has less overhead. + static mozilla::dmd::DMDFuncs* GetDMDFuncs() { + auto singleton = ReplaceMallocBridge::Get(/* minimumVersion */ 1); + return singleton ? singleton->GetDMDFuncs() : nullptr; + } + + static void InitDebugFd(mozilla::DebugFdRegistry& aRegistry) { + auto singleton = ReplaceMallocBridge::Get(/* minimumVersion */ 2); + if (singleton) { + singleton->InitDebugFd(aRegistry); + } + } + + static const malloc_table_t* RegisterHook( + const char* aName, const malloc_table_t* aTable, + const malloc_hook_table_t* aHookTable) { + auto singleton = ReplaceMallocBridge::Get(/* minimumVersion */ 3); + return singleton ? singleton->RegisterHook(aName, aTable, aHookTable) + : nullptr; + } +}; +# endif + +#endif // __cplusplus + +#endif // replace_malloc_bridge_h diff --git a/memory/build/test/TestMozJemallocUtils.cpp b/memory/build/test/TestMozJemallocUtils.cpp new file mode 100644 index 0000000000..06a1a3b5a5 --- /dev/null +++ b/memory/build/test/TestMozJemallocUtils.cpp @@ -0,0 +1,152 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This is a cppunittest, rather than a gtest, in order to assert that no +// additional DLL needs to be linked in to use the function(s) tested herein. + +#include <algorithm> +#include <iostream> +#include <optional> +#include <sstream> +#include <type_traits> + +#include "mozmemory_utils.h" +#include "mozilla/Likely.h" + +static bool TESTS_FAILED = false; + +// Introduce iostream output operators for std::optional, for convenience's +// sake. +// +// (This is technically undefined behavior per [namespace.std], but it's +// unlikely to have any surprising effects when confined to this compilation +// unit.) +namespace std { +template <typename T> +std::ostream& operator<<(std::ostream& o, std::optional<T> const& s) { + if (s) { + return o << "std::optional{" << s.value() << "}"; + } + return o << "std::nullopt"; +} +std::ostream& operator<<(std::ostream& o, std::nullopt_t const& s) { + return o << "std::nullopt"; +} +} // namespace std + +// EXPECT_EQ +// +// Assert that two expressions are equal. Print them, and their values, on +// failure. (Based on the GTest macro of the same name.) +template <typename X, typename Y, size_t Xn, size_t Yn> +void AssertEqualImpl_(X&& x, Y&& y, const char* file, size_t line, + const char (&xStr)[Xn], const char (&yStr)[Yn], + const char* explanation = nullptr) { + if (MOZ_LIKELY(x == y)) return; + + TESTS_FAILED = true; + + std::stringstream sstr; + sstr << file << ':' << line << ": "; + if (explanation) sstr << explanation << "\n\t"; + sstr << "expected " << xStr << " (" << x << ") == " << yStr << " (" << y + << ")\n"; + std::cerr << sstr.str() << std::flush; +} + +#define EXPECT_EQ(x, y) \ + do { \ + AssertEqualImpl_(x, y, __FILE__, __LINE__, #x, #y); \ + } while (0) + +// STATIC_ASSERT_VALUE_IS_OF_TYPE +// +// Assert that a value `v` is of type `t` (ignoring cv-qualification). +#define STATIC_ASSERT_VALUE_IS_OF_TYPE(v, t) \ + static_assert(std::is_same_v<std::remove_cv_t<decltype(v)>, t>) + +// MockSleep +// +// Mock replacement for ::Sleep that merely logs its calls. +struct MockSleep { + size_t calls = 0; + size_t sum = 0; + + void operator()(size_t val) { + ++calls; + sum += val; + } + + bool operator==(MockSleep const& that) const { + return calls == that.calls && sum == that.sum; + } +}; +std::ostream& operator<<(std::ostream& o, MockSleep const& s) { + return o << "MockSleep { count: " << s.calls << ", sum: " << s.sum << " }"; +} + +// MockAlloc +// +// Mock memory allocation mechanism. Eventually returns a value. +template <typename T> +struct MockAlloc { + size_t count; + T value; + + std::optional<T> operator()() { + if (!count--) return value; + return std::nullopt; + } +}; + +int main() { + using mozilla::StallSpecs; + + const StallSpecs stall = {.maxAttempts = 10, .delayMs = 50}; + + // semantic test: stalls as requested but still yields a value, + // up until it doesn't + for (size_t i = 0; i < 20; ++i) { + MockSleep sleep; + auto const ret = + stall.StallAndRetry(sleep, MockAlloc<int>{.count = i, .value = 5}); + STATIC_ASSERT_VALUE_IS_OF_TYPE(ret, std::optional<int>); + + if (i < 10) { + EXPECT_EQ(ret, std::optional<int>(5)); + } else { + EXPECT_EQ(ret, std::nullopt); + } + size_t const expectedCalls = std::min<size_t>(i + 1, 10); + EXPECT_EQ(sleep, + (MockSleep{.calls = expectedCalls, .sum = 50 * expectedCalls})); + } + + // syntactic test: inline capturing lambda is accepted for aOperation + { + MockSleep sleep; + std::optional<int> value{42}; + auto const ret = stall.StallAndRetry(sleep, [&]() { return value; }); + + STATIC_ASSERT_VALUE_IS_OF_TYPE(ret, std::optional<int>); + EXPECT_EQ(ret, std::optional(42)); + EXPECT_EQ(sleep, (MockSleep{.calls = 1, .sum = 50})); + } + + // syntactic test: inline capturing lambda is accepted for aDelayFunc + { + MockSleep sleep; + auto const ret = + stall.StallAndRetry([&](size_t time) { sleep(time); }, + MockAlloc<int>{.count = 0, .value = 105}); + + STATIC_ASSERT_VALUE_IS_OF_TYPE(ret, std::optional<int>); + EXPECT_EQ(ret, std::optional(105)); + EXPECT_EQ(sleep, (MockSleep{.calls = 1, .sum = 50})); + } + + return TESTS_FAILED ? 1 : 0; +} diff --git a/memory/build/test/gtest/TestPHC.cpp b/memory/build/test/gtest/TestPHC.cpp new file mode 100644 index 0000000000..58d72631cb --- /dev/null +++ b/memory/build/test/gtest/TestPHC.cpp @@ -0,0 +1,415 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "mozmemory.h" +#include "mozilla/Assertions.h" +#include "mozilla/mozalloc.h" +#include "PHC.h" + +using namespace mozilla; + +bool PHCInfoEq(phc::AddrInfo& aInfo, phc::AddrInfo::Kind aKind, void* aBaseAddr, + size_t aUsableSize, bool aHasAllocStack, bool aHasFreeStack) { + return aInfo.mKind == aKind && aInfo.mBaseAddr == aBaseAddr && + aInfo.mUsableSize == aUsableSize && + // Proper stack traces will have at least 3 elements. + (aHasAllocStack ? (aInfo.mAllocStack->mLength > 2) + : (aInfo.mAllocStack.isNothing())) && + (aHasFreeStack ? (aInfo.mFreeStack->mLength > 2) + : (aInfo.mFreeStack.isNothing())); +} + +bool JeInfoEq(jemalloc_ptr_info_t& aInfo, PtrInfoTag aTag, void* aAddr, + size_t aSize, arena_id_t arenaId) { + return aInfo.tag == aTag && aInfo.addr == aAddr && aInfo.size == aSize +#ifdef MOZ_DEBUG + && aInfo.arenaId == arenaId +#endif + ; +} + +uint8_t* GetPHCAllocation(size_t aSize, size_t aAlignment = 1) { + // A crude but effective way to get a PHC allocation. + for (int i = 0; i < 2000000; i++) { + void* p = (aAlignment == 1) ? moz_xmalloc(aSize) + : moz_xmemalign(aAlignment, aSize); + if (mozilla::phc::IsPHCAllocation(p, nullptr)) { + return (uint8_t*)p; + } + free(p); + } + return nullptr; +} + +#if defined(XP_DARWIN) && defined(__aarch64__) +static const size_t kPageSize = 16384; +#else +static const size_t kPageSize = 4096; +#endif + +TEST(PHC, TestPHCAllocations) +{ + mozilla::phc::SetPHCState(phc::PHCState::Enabled); + + // First, check that allocations of various sizes all get put at the end of + // their page as expected. Also, check their sizes are as expected. + +#define ASSERT_POS(n1, n2) \ + p = (uint8_t*)moz_xrealloc(p, (n1)); \ + ASSERT_EQ((reinterpret_cast<uintptr_t>(p) & (kPageSize - 1)), \ + kPageSize - (n2)); \ + ASSERT_EQ(moz_malloc_usable_size(p), (n2)); + + uint8_t* p = GetPHCAllocation(1); + if (!p) { + MOZ_CRASH("failed to get a PHC allocation"); + } + + // On Win64 the smallest possible allocation is 16 bytes. On other platforms + // it is 8 bytes. +#if defined(XP_WIN) && defined(HAVE_64BIT_BUILD) + ASSERT_POS(8U, 16U); +#else + ASSERT_POS(8U, 8U); +#endif + for (unsigned i = 16; i <= kPageSize; i *= 2) { + ASSERT_POS(i, i); + } + + free(p); + +#undef ASSERT_POS + + // Second, do similar checking with allocations of various alignments. Also + // check that their sizes (which are different to allocations with normal + // alignment) are the same as the sizes of equivalent non-PHC allocations. + +#define ASSERT_ALIGN(a1, a2) \ + p = (uint8_t*)GetPHCAllocation(8, (a1)); \ + ASSERT_EQ((reinterpret_cast<uintptr_t>(p) & (kPageSize - 1)), \ + kPageSize - (a2)); \ + ASSERT_EQ(moz_malloc_usable_size(p), (a2)); \ + free(p); \ + p = (uint8_t*)moz_xmemalign((a1), 8); \ + ASSERT_EQ(moz_malloc_usable_size(p), (a2)); \ + free(p); + + // On Win64 the smallest possible allocation is 16 bytes. On other platforms + // it is 8 bytes. +#if defined(XP_WIN) && defined(HAVE_64BIT_BUILD) + ASSERT_ALIGN(8U, 16U); +#else + ASSERT_ALIGN(8U, 8U); +#endif + for (unsigned i = 16; i <= kPageSize; i *= 2) { + ASSERT_ALIGN(i, i); + } + +#undef ASSERT_ALIGN +} + +static void TestInUseAllocation(uint8_t* aPtr, size_t aSize) { + phc::AddrInfo phcInfo; + jemalloc_ptr_info_t jeInfo; + + // Test an in-use PHC allocation: first byte within it. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::InUsePage, aPtr, aSize, + true, false)); + ASSERT_EQ(moz_malloc_usable_size(aPtr), aSize); + jemalloc_ptr_info(aPtr, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagLiveAlloc, aPtr, aSize, 0)); + + // Test an in-use PHC allocation: last byte within it. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr + aSize - 1, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::InUsePage, aPtr, aSize, + true, false)); + ASSERT_EQ(moz_malloc_usable_size(aPtr + aSize - 1), aSize); + jemalloc_ptr_info(aPtr + aSize - 1, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagLiveAlloc, aPtr, aSize, 0)); + + // Test an in-use PHC allocation: last byte before it. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr - 1, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::InUsePage, aPtr, aSize, + true, false)); + ASSERT_EQ(moz_malloc_usable_size(aPtr - 1), 0ul); + jemalloc_ptr_info(aPtr - 1, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: first byte on its allocation page. + ASSERT_TRUE( + mozilla::phc::IsPHCAllocation(aPtr + aSize - kPageSize, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::InUsePage, aPtr, aSize, + true, false)); + jemalloc_ptr_info(aPtr + aSize - kPageSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: first byte in the following guard page. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr + aSize, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, aPtr, aSize, + true, false)); + jemalloc_ptr_info(aPtr + aSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: last byte in the lower half of the + // following guard page. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr + aSize + (kPageSize / 2 - 1), + &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, aPtr, aSize, + true, false)); + jemalloc_ptr_info(aPtr + aSize + (kPageSize / 2 - 1), &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: last byte in the preceding guard page. + ASSERT_TRUE( + mozilla::phc::IsPHCAllocation(aPtr + aSize - 1 - kPageSize, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, aPtr, aSize, + true, false)); + jemalloc_ptr_info(aPtr + aSize - 1 - kPageSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test an in-use PHC allocation: first byte in the upper half of the + // preceding guard page. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation( + aPtr + aSize - 1 - kPageSize - (kPageSize / 2 - 1), &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, aPtr, aSize, + true, false)); + jemalloc_ptr_info(aPtr + aSize - 1 - kPageSize - (kPageSize / 2 - 1), + &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); +} + +static void TestFreedAllocation(uint8_t* aPtr, size_t aSize) { + phc::AddrInfo phcInfo; + jemalloc_ptr_info_t jeInfo; + + // Test a freed PHC allocation: first byte within it. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::FreedPage, aPtr, aSize, + true, true)); + jemalloc_ptr_info(aPtr, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagFreedAlloc, aPtr, aSize, 0)); + + // Test a freed PHC allocation: last byte within it. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr + aSize - 1, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::FreedPage, aPtr, aSize, + true, true)); + jemalloc_ptr_info(aPtr + aSize - 1, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagFreedAlloc, aPtr, aSize, 0)); + + // Test a freed PHC allocation: last byte before it. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr - 1, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::FreedPage, aPtr, aSize, + true, true)); + jemalloc_ptr_info(aPtr - 1, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: first byte on its allocation page. + ASSERT_TRUE( + mozilla::phc::IsPHCAllocation(aPtr + aSize - kPageSize, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::FreedPage, aPtr, aSize, + true, true)); + jemalloc_ptr_info(aPtr + aSize - kPageSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: first byte in the following guard page. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr + aSize, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, aPtr, aSize, + true, true)); + jemalloc_ptr_info(aPtr + aSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: last byte in the lower half of the following + // guard page. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(aPtr + aSize + (kPageSize / 2 - 1), + &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, aPtr, aSize, + true, true)); + jemalloc_ptr_info(aPtr + aSize + (kPageSize / 2 - 1), &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: last byte in the preceding guard page. + ASSERT_TRUE( + mozilla::phc::IsPHCAllocation(aPtr + aSize - 1 - kPageSize, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, aPtr, aSize, + true, true)); + jemalloc_ptr_info(aPtr + aSize - 1 - kPageSize, &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); + + // Test a freed PHC allocation: first byte in the upper half of the preceding + // guard page. + ASSERT_TRUE(mozilla::phc::IsPHCAllocation( + aPtr + aSize - 1 - kPageSize - (kPageSize / 2 - 1), &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::GuardPage, aPtr, aSize, + true, true)); + jemalloc_ptr_info(aPtr + aSize - 1 - kPageSize - (kPageSize / 2 - 1), + &jeInfo); + ASSERT_TRUE(JeInfoEq(jeInfo, TagUnknown, nullptr, 0, 0)); +} + +TEST(PHC, TestPHCInfo) +{ + mozilla::phc::SetPHCState(phc::PHCState::Enabled); + + int stackVar; + phc::AddrInfo phcInfo; + + // Test a default AddrInfo. + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::Unknown, nullptr, 0ul, + false, false)); + + // Test some non-PHC allocation addresses. + ASSERT_FALSE(mozilla::phc::IsPHCAllocation(nullptr, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::Unknown, nullptr, 0, + false, false)); + ASSERT_FALSE(mozilla::phc::IsPHCAllocation(&stackVar, &phcInfo)); + ASSERT_TRUE(PHCInfoEq(phcInfo, phc::AddrInfo::Kind::Unknown, nullptr, 0, + false, false)); + + uint8_t* p = GetPHCAllocation(32); + if (!p) { + MOZ_CRASH("failed to get a PHC allocation"); + } + + TestInUseAllocation(p, 32); + + free(p); + + TestFreedAllocation(p, 32); + + // There are no tests for `mKind == NeverAllocatedPage` because it's not + // possible to reliably get ahold of such a page. +} + +TEST(PHC, TestPHCDisablingThread) +{ + mozilla::phc::SetPHCState(phc::PHCState::Enabled); + + uint8_t* p = GetPHCAllocation(32); + uint8_t* q = GetPHCAllocation(32); + if (!p || !q) { + MOZ_CRASH("failed to get a PHC allocation"); + } + + ASSERT_TRUE(mozilla::phc::IsPHCEnabledOnCurrentThread()); + mozilla::phc::DisablePHCOnCurrentThread(); + ASSERT_FALSE(mozilla::phc::IsPHCEnabledOnCurrentThread()); + + // Test realloc() on a PHC allocation while PHC is disabled on the thread. + uint8_t* p2 = (uint8_t*)realloc(p, 128); + // The small realloc is fulfilled within the same page, but it does move. + ASSERT_TRUE(p2 == p - 96); + ASSERT_TRUE(mozilla::phc::IsPHCAllocation(p2, nullptr)); + uint8_t* p3 = (uint8_t*)realloc(p2, 2 * kPageSize); + // The big realloc is not in-place, and the result is not a PHC allocation. + ASSERT_TRUE(p3 != p2); + ASSERT_FALSE(mozilla::phc::IsPHCAllocation(p3, nullptr)); + free(p3); + + // Test free() on a PHC allocation while PHC is disabled on the thread. + free(q); + + // These must not be PHC allocations. + uint8_t* r = GetPHCAllocation(32); // This will fail. + ASSERT_FALSE(!!r); + + mozilla::phc::ReenablePHCOnCurrentThread(); + ASSERT_TRUE(mozilla::phc::IsPHCEnabledOnCurrentThread()); + + // If it really was reenabled we should be able to get PHC allocations + // again. + uint8_t* s = GetPHCAllocation(32); // This should succeed. + ASSERT_TRUE(!!s); + free(s); +} + +TEST(PHC, TestPHCDisablingGlobal) +{ + mozilla::phc::SetPHCState(phc::PHCState::Enabled); + + uint8_t* p1 = GetPHCAllocation(32); + uint8_t* p2 = GetPHCAllocation(32); + uint8_t* q = GetPHCAllocation(32); + if (!p1 || !p2 || !q) { + MOZ_CRASH("failed to get a PHC allocation"); + } + + mozilla::phc::SetPHCState(phc::PHCState::OnlyFree); + + // Test realloc() on a PHC allocation while PHC is disabled on the thread. + uint8_t* p3 = (uint8_t*)realloc(p1, 128); + // The small realloc is evicted from PHC because in "OnlyFree" state PHC + // tries to reduce its memory impact. + ASSERT_TRUE(p3 != p1); + ASSERT_FALSE(mozilla::phc::IsPHCAllocation(p3, nullptr)); + free(p3); + uint8_t* p4 = (uint8_t*)realloc(p2, 2 * kPageSize); + // The big realloc is not in-place, and the result is not a PHC + // allocation, regardless of PHC's state. + ASSERT_TRUE(p4 != p2); + ASSERT_FALSE(mozilla::phc::IsPHCAllocation(p4, nullptr)); + free(p4); + + // Test free() on a PHC allocation while PHC is disabled on the thread. + free(q); + + // These must not be PHC allocations. + uint8_t* r = GetPHCAllocation(32); // This will fail. + ASSERT_FALSE(!!r); + + mozilla::phc::SetPHCState(phc::PHCState::Enabled); + + // If it really was reenabled we should be able to get PHC allocations + // again. + uint8_t* s = GetPHCAllocation(32); // This should succeed. + ASSERT_TRUE(!!s); + free(s); +} + +// This test is disabled for now, see Bug 1845017 and Bug 1845655. +// TEST(PHC, TestPHCExhaustion) +// { +void DisabledPHCExhaustionTest() { + // PHC hardcodes the amount of allocations to track. +#if defined(XP_DARWIN) && defined(__aarch64__) + const unsigned NUM_ALLOCATIONS = 1024; +#else + const unsigned NUM_ALLOCATIONS = 4096; +#endif + uint8_t* allocations[NUM_ALLOCATIONS]; + const unsigned REQUIRED_ALLOCATIONS = NUM_ALLOCATIONS - 50; + + unsigned last_allocation; + for (unsigned i = 0; i < NUM_ALLOCATIONS; i++) { + allocations[i] = GetPHCAllocation(128); + last_allocation = i; + if (i < REQUIRED_ALLOCATIONS) { + // Assert that the first REQUIRED_ALLOCATIONS work. We require only + // REQUIRED_ALLOCATIONS rather than NUM_ALLOCATIONS because sometimes + // some PHC slots are used before the test begins. + ASSERT_TRUE(allocations[i]); + } else if (!allocations[i]) { + // Break the loop if an allocation fails to move to the next phase. + last_allocation--; + break; + } + TestInUseAllocation(allocations[i], 128); + } + + // We should now fail to get an allocation. + ASSERT_FALSE(GetPHCAllocation(128)); + + for (unsigned i = 0; i <= last_allocation; i++) { + free(allocations[i]); + TestFreedAllocation(allocations[i], 128); + } + + // And now that we've released those allocations we should be able to get + // new allocations again. + uint8_t* r = GetPHCAllocation(128); + ASSERT_TRUE(!!r); + free(r); +} diff --git a/memory/build/test/gtest/moz.build b/memory/build/test/gtest/moz.build new file mode 100644 index 0000000000..82ccaaf9c6 --- /dev/null +++ b/memory/build/test/gtest/moz.build @@ -0,0 +1,15 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "TestPHC.cpp", +] + +LOCAL_INCLUDES += [ + "../../", +] + +FINAL_LIBRARY = "xul-gtest" diff --git a/memory/build/test/moz.build b/memory/build/test/moz.build new file mode 100644 index 0000000000..d93cc48c65 --- /dev/null +++ b/memory/build/test/moz.build @@ -0,0 +1,21 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# We don't link these tests against mozglue, but we do use the STL. Avoid the +# implicit linking of `__imp_moz_xalloc` in our STL wrappers. +DisableStlWrapping() + +# Important: for these tests to be run, they also need to be added +# to testing/cppunittest.ini. +CppUnitTests( + [ + "TestMozJemallocUtils", + ] +) +# The gtests won't work in a SpiderMonkey-only build or a build without +# jemalloc. +if CONFIG["MOZ_PHC"]: + TEST_DIRS += ["gtest"] diff --git a/memory/build/zone.c b/memory/build/zone.c new file mode 100644 index 0000000000..7311ccf27b --- /dev/null +++ b/memory/build/zone.c @@ -0,0 +1,377 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozmemory_wrap.h" + +#include <stdlib.h> +#include <mach/mach_types.h> +#include "mozilla/Assertions.h" + +// Malloc implementation functions are MOZ_MEMORY_API, and jemalloc +// specific functions MOZ_JEMALLOC_API; see mozmemory_wrap.h + +#define MALLOC_DECL(name, return_type, ...) \ + MOZ_MEMORY_API return_type name##_impl(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC +#include "malloc_decls.h" + +#define MALLOC_DECL(name, return_type, ...) \ + MOZ_JEMALLOC_API return_type name##_impl(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC +#include "malloc_decls.h" + +// Definitions of the following structs in malloc/malloc.h might be too old +// for the built binary to run on newer versions of OSX. So use the newest +// possible version of those structs. + +typedef struct _malloc_zone_t { + void* reserved1; + void* reserved2; + size_t (*size)(struct _malloc_zone_t*, const void*); + void* (*malloc)(struct _malloc_zone_t*, size_t); + void* (*calloc)(struct _malloc_zone_t*, size_t, size_t); + void* (*valloc)(struct _malloc_zone_t*, size_t); + void (*free)(struct _malloc_zone_t*, void*); + void* (*realloc)(struct _malloc_zone_t*, void*, size_t); + void (*destroy)(struct _malloc_zone_t*); + const char* zone_name; + unsigned (*batch_malloc)(struct _malloc_zone_t*, size_t, void**, unsigned); + void (*batch_free)(struct _malloc_zone_t*, void**, unsigned); + struct malloc_introspection_t* introspect; + unsigned version; + void* (*memalign)(struct _malloc_zone_t*, size_t, size_t); + void (*free_definite_size)(struct _malloc_zone_t*, void*, size_t); + size_t (*pressure_relief)(struct _malloc_zone_t*, size_t); +} malloc_zone_t; + +typedef struct { + vm_address_t address; + vm_size_t size; +} vm_range_t; + +typedef struct malloc_statistics_t { + unsigned blocks_in_use; + size_t size_in_use; + size_t max_size_in_use; + size_t size_allocated; +} malloc_statistics_t; + +typedef kern_return_t memory_reader_t(task_t, vm_address_t, vm_size_t, void**); + +typedef void vm_range_recorder_t(task_t, void*, unsigned type, vm_range_t*, + unsigned); + +typedef struct malloc_introspection_t { + kern_return_t (*enumerator)(task_t, void*, unsigned, vm_address_t, + memory_reader_t, vm_range_recorder_t); + size_t (*good_size)(malloc_zone_t*, size_t); + boolean_t (*check)(malloc_zone_t*); + void (*print)(malloc_zone_t*, boolean_t); + void (*log)(malloc_zone_t*, void*); + void (*force_lock)(malloc_zone_t*); + void (*force_unlock)(malloc_zone_t*); + void (*statistics)(malloc_zone_t*, malloc_statistics_t*); + boolean_t (*zone_locked)(malloc_zone_t*); + boolean_t (*enable_discharge_checking)(malloc_zone_t*); + boolean_t (*disable_discharge_checking)(malloc_zone_t*); + void (*discharge)(malloc_zone_t*, void*); +#ifdef __BLOCKS__ + void (*enumerate_discharged_pointers)(malloc_zone_t*, void (^)(void*, void*)); +#else + void* enumerate_unavailable_without_blocks; +#endif + void (*reinit_lock)(malloc_zone_t*); +} malloc_introspection_t; + +extern kern_return_t malloc_get_all_zones(task_t, memory_reader_t, + vm_address_t**, unsigned*); + +extern malloc_zone_t* malloc_default_zone(void); + +extern void malloc_zone_register(malloc_zone_t* zone); + +extern void malloc_zone_unregister(malloc_zone_t* zone); + +extern malloc_zone_t* malloc_default_purgeable_zone(void); + +extern malloc_zone_t* malloc_zone_from_ptr(const void* ptr); + +extern void malloc_zone_free(malloc_zone_t* zone, void* ptr); + +extern void* malloc_zone_realloc(malloc_zone_t* zone, void* ptr, size_t size); + +// The following is a OSX zone allocator implementation. +// /!\ WARNING. It assumes the underlying malloc implementation's +// malloc_usable_size returns 0 when the given pointer is not owned by +// the allocator. Sadly, OSX does call zone_size with pointers not +// owned by the allocator. + +static size_t zone_size(malloc_zone_t* zone, const void* ptr) { + return malloc_usable_size_impl(ptr); +} + +static void* zone_malloc(malloc_zone_t* zone, size_t size) { + return malloc_impl(size); +} + +static void* zone_calloc(malloc_zone_t* zone, size_t num, size_t size) { + return calloc_impl(num, size); +} + +static void* zone_realloc(malloc_zone_t* zone, void* ptr, size_t size) { + if (malloc_usable_size_impl(ptr)) return realloc_impl(ptr, size); + + // Sometimes, system libraries call malloc_zone_* functions with the wrong + // zone (e.g. CoreFoundation does). In that case, we need to find the real + // one. We can't call libSystem's realloc directly because we're exporting + // realloc from libmozglue and we'd pick that one, so we manually find the + // right zone and realloc with it. + malloc_zone_t* real_zone = malloc_zone_from_ptr(ptr); + // The system allocator crashes voluntarily by default when a pointer can't + // be traced back to a zone. Do the same. + MOZ_RELEASE_ASSERT(real_zone); + MOZ_RELEASE_ASSERT(real_zone != zone); + return malloc_zone_realloc(real_zone, ptr, size); +} + +static void other_zone_free(malloc_zone_t* original_zone, void* ptr) { + // Sometimes, system libraries call malloc_zone_* functions with the wrong + // zone (e.g. CoreFoundation does). In that case, we need to find the real + // one. We can't call libSystem's free directly because we're exporting + // free from libmozglue and we'd pick that one, so we manually find the + // right zone and free with it. + if (!ptr) { + return; + } + malloc_zone_t* zone = malloc_zone_from_ptr(ptr); + // The system allocator crashes voluntarily by default when a pointer can't + // be traced back to a zone. Do the same. + MOZ_RELEASE_ASSERT(zone); + MOZ_RELEASE_ASSERT(zone != original_zone); + return malloc_zone_free(zone, ptr); +} + +static void zone_free(malloc_zone_t* zone, void* ptr) { + if (malloc_usable_size_impl(ptr)) { + free_impl(ptr); + return; + } + other_zone_free(zone, ptr); +} + +static void zone_free_definite_size(malloc_zone_t* zone, void* ptr, + size_t size) { + size_t current_size = malloc_usable_size_impl(ptr); + if (current_size) { + MOZ_ASSERT(current_size == size); + free_impl(ptr); + return; + } + other_zone_free(zone, ptr); +} + +static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { + void* ptr; + if (posix_memalign_impl(&ptr, alignment, size) == 0) return ptr; + return NULL; +} + +static void* zone_valloc(malloc_zone_t* zone, size_t size) { + return valloc_impl(size); +} + +static void zone_destroy(malloc_zone_t* zone) { + // This function should never be called. + MOZ_CRASH(); +} + +static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, + void** results, unsigned num_requested) { + unsigned i; + + for (i = 0; i < num_requested; i++) { + results[i] = malloc_impl(size); + if (!results[i]) break; + } + + return i; +} + +static void zone_batch_free(malloc_zone_t* zone, void** to_be_freed, + unsigned num_to_be_freed) { + unsigned i; + + for (i = 0; i < num_to_be_freed; i++) { + zone_free(zone, to_be_freed[i]); + to_be_freed[i] = NULL; + } +} + +static size_t zone_pressure_relief(malloc_zone_t* zone, size_t goal) { + return 0; +} + +static size_t zone_good_size(malloc_zone_t* zone, size_t size) { + return malloc_good_size_impl(size); +} + +static kern_return_t zone_enumerator(task_t task, void* data, + unsigned type_mask, + vm_address_t zone_address, + memory_reader_t reader, + vm_range_recorder_t recorder) { + return KERN_SUCCESS; +} + +static boolean_t zone_check(malloc_zone_t* zone) { return true; } + +static void zone_print(malloc_zone_t* zone, boolean_t verbose) {} + +static void zone_log(malloc_zone_t* zone, void* address) {} + +extern void _malloc_prefork(void); +extern void _malloc_postfork_child(void); + +static void zone_force_lock(malloc_zone_t* zone) { + // /!\ This calls into mozjemalloc. It works because we're linked in the + // same library. + _malloc_prefork(); +} + +static void zone_force_unlock(malloc_zone_t* zone) { + // /!\ This calls into mozjemalloc. It works because we're linked in the + // same library. + _malloc_postfork_child(); +} + +static void zone_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { + // We make no effort to actually fill the values + stats->blocks_in_use = 0; + stats->size_in_use = 0; + stats->max_size_in_use = 0; + stats->size_allocated = 0; +} + +static boolean_t zone_locked(malloc_zone_t* zone) { + // Pretend no lock is being held + return false; +} + +static void zone_reinit_lock(malloc_zone_t* zone) { + // As of OSX 10.12, this function is only used when force_unlock would + // be used if the zone version were < 9. So just use force_unlock. + zone_force_unlock(zone); +} + +static malloc_zone_t zone; +static struct malloc_introspection_t zone_introspect; + +static malloc_zone_t* get_default_zone() { + malloc_zone_t** zones = NULL; + unsigned int num_zones = 0; + + // On OSX 10.12, malloc_default_zone returns a special zone that is not + // present in the list of registered zones. That zone uses a "lite zone" + // if one is present (apparently enabled when malloc stack logging is + // enabled), or the first registered zone otherwise. In practice this + // means unless malloc stack logging is enabled, the first registered + // zone is the default. + // So get the list of zones to get the first one, instead of relying on + // malloc_default_zone. + if (KERN_SUCCESS != + malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &num_zones)) { + // Reset the value in case the failure happened after it was set. + num_zones = 0; + } + if (num_zones) { + return zones[0]; + } + return malloc_default_zone(); +} + +__attribute__((constructor)) static void register_zone(void) { + malloc_zone_t* default_zone = get_default_zone(); + + zone.size = zone_size; + zone.malloc = zone_malloc; + zone.calloc = zone_calloc; + zone.valloc = zone_valloc; + zone.free = zone_free; + zone.realloc = zone_realloc; + zone.destroy = zone_destroy; +#ifdef MOZ_REPLACE_MALLOC + zone.zone_name = "replace_malloc_zone"; +#else + zone.zone_name = "jemalloc_zone"; +#endif + zone.batch_malloc = zone_batch_malloc; + zone.batch_free = zone_batch_free; + zone.introspect = &zone_introspect; + zone.version = 9; + zone.memalign = zone_memalign; + zone.free_definite_size = zone_free_definite_size; + zone.pressure_relief = zone_pressure_relief; + zone_introspect.enumerator = zone_enumerator; + zone_introspect.good_size = zone_good_size; + zone_introspect.check = zone_check; + zone_introspect.print = zone_print; + zone_introspect.log = zone_log; + zone_introspect.force_lock = zone_force_lock; + zone_introspect.force_unlock = zone_force_unlock; + zone_introspect.statistics = zone_statistics; + zone_introspect.zone_locked = zone_locked; + zone_introspect.enable_discharge_checking = NULL; + zone_introspect.disable_discharge_checking = NULL; + zone_introspect.discharge = NULL; +#ifdef __BLOCKS__ + zone_introspect.enumerate_discharged_pointers = NULL; +#else + zone_introspect.enumerate_unavailable_without_blocks = NULL; +#endif + zone_introspect.reinit_lock = zone_reinit_lock; + + // The default purgeable zone is created lazily by OSX's libc. It uses + // the default zone when it is created for "small" allocations + // (< 15 KiB), but assumes the default zone is a scalable_zone. This + // obviously fails when the default zone is the jemalloc zone, so + // malloc_default_purgeable_zone is called beforehand so that the + // default purgeable zone is created when the default zone is still + // a scalable_zone. + malloc_zone_t* purgeable_zone = malloc_default_purgeable_zone(); + + // There is a problem related to the above with the system nano zone, which + // is hard to work around from here, and that is instead worked around by + // disabling the nano zone through an environment variable + // (MallocNanoZone=0). In Firefox, we do that through + // browser/app/macbuild/Contents/Info.plist.in. + + // Register the custom zone. At this point it won't be the default. + malloc_zone_register(&zone); + + do { + // Unregister and reregister the default zone. On OSX >= 10.6, + // unregistering takes the last registered zone and places it at the + // location of the specified zone. Unregistering the default zone thus + // makes the last registered one the default. On OSX < 10.6, + // unregistering shifts all registered zones. The first registered zone + // then becomes the default. + malloc_zone_unregister(default_zone); + malloc_zone_register(default_zone); + + // On OSX 10.6, having the default purgeable zone appear before the default + // zone makes some things crash because it thinks it owns the default + // zone allocated pointers. We thus unregister/re-register it in order to + // ensure it's always after the default zone. On OSX < 10.6, as + // unregistering shifts registered zones, this simply removes the purgeable + // zone from the list and adds it back at the end, after the default zone. + // On OSX >= 10.6, unregistering replaces the purgeable zone with the last + // registered zone above, i.e the default zone. Registering it again then + // puts it at the end, obviously after the default zone. + malloc_zone_unregister(purgeable_zone); + malloc_zone_register(purgeable_zone); + default_zone = get_default_zone(); + } while (default_zone != &zone); +} diff --git a/memory/gtest/TestJemalloc.cpp b/memory/gtest/TestJemalloc.cpp new file mode 100644 index 0000000000..7cb19674cc --- /dev/null +++ b/memory/gtest/TestJemalloc.cpp @@ -0,0 +1,766 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Literals.h" +#include "mozilla/mozalloc.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" +#include "mozilla/Vector.h" +#include "mozilla/gtest/MozHelpers.h" +#include "mozmemory.h" +#include "nsCOMPtr.h" +#include "Utils.h" + +#include "gtest/gtest.h" + +#ifdef MOZ_PHC +# include "PHC.h" +#endif + +using namespace mozilla; + +class AutoDisablePHCOnCurrentThread { + public: + AutoDisablePHCOnCurrentThread() { +#ifdef MOZ_PHC + mozilla::phc::DisablePHCOnCurrentThread(); +#endif + } + + ~AutoDisablePHCOnCurrentThread() { +#ifdef MOZ_PHC + mozilla::phc::ReenablePHCOnCurrentThread(); +#endif + } +}; + +static inline void TestOne(size_t size) { + size_t req = size; + size_t adv = malloc_good_size(req); + char* p = (char*)malloc(req); + size_t usable = moz_malloc_usable_size(p); + // NB: Using EXPECT here so that we still free the memory on failure. + EXPECT_EQ(adv, usable) << "malloc_good_size(" << req << ") --> " << adv + << "; " + "malloc_usable_size(" + << req << ") --> " << usable; + free(p); +} + +static inline void TestThree(size_t size) { + ASSERT_NO_FATAL_FAILURE(TestOne(size - 1)); + ASSERT_NO_FATAL_FAILURE(TestOne(size)); + ASSERT_NO_FATAL_FAILURE(TestOne(size + 1)); +} + +TEST(Jemalloc, UsableSizeInAdvance) +{ + /* + * Test every size up to a certain point, then (N-1, N, N+1) triplets for a + * various sizes beyond that. + */ + + for (size_t n = 0; n < 16_KiB; n++) ASSERT_NO_FATAL_FAILURE(TestOne(n)); + + for (size_t n = 16_KiB; n < 1_MiB; n += 4_KiB) + ASSERT_NO_FATAL_FAILURE(TestThree(n)); + + for (size_t n = 1_MiB; n < 8_MiB; n += 128_KiB) + ASSERT_NO_FATAL_FAILURE(TestThree(n)); +} + +static int gStaticVar; + +bool InfoEq(jemalloc_ptr_info_t& aInfo, PtrInfoTag aTag, void* aAddr, + size_t aSize, arena_id_t arenaId) { + return aInfo.tag == aTag && aInfo.addr == aAddr && aInfo.size == aSize +#ifdef MOZ_DEBUG + && aInfo.arenaId == arenaId +#endif + ; +} + +bool InfoEqFreedPage(jemalloc_ptr_info_t& aInfo, void* aAddr, size_t aPageSize, + arena_id_t arenaId) { + size_t pageSizeMask = aPageSize - 1; + + return jemalloc_ptr_is_freed_page(&aInfo) && + aInfo.addr == (void*)(uintptr_t(aAddr) & ~pageSizeMask) && + aInfo.size == aPageSize +#ifdef MOZ_DEBUG + && aInfo.arenaId == arenaId +#endif + ; +} + +TEST(Jemalloc, PtrInfo) +{ + arena_id_t arenaId = moz_create_arena(); + ASSERT_TRUE(arenaId != 0); + + jemalloc_stats_t stats; + jemalloc_stats(&stats); + + jemalloc_ptr_info_t info; + Vector<char*> small, large, huge; + + // For small (less than half the page size) allocations, test every position + // within many possible sizes. + size_t small_max = + stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max; + for (size_t n = 0; n <= small_max; n += 8) { + auto p = (char*)moz_arena_malloc(arenaId, n); + size_t usable = moz_malloc_size_of(p); + ASSERT_TRUE(small.append(p)); + for (size_t j = 0; j < usable; j++) { + jemalloc_ptr_info(&p[j], &info); + ASSERT_TRUE(InfoEq(info, TagLiveAlloc, p, usable, arenaId)); + } + } + + // Similar for large (small_max + 1 KiB .. 1MiB - 8KiB) allocations. + for (size_t n = small_max + 1_KiB; n <= stats.large_max; n += 1_KiB) { + auto p = (char*)moz_arena_malloc(arenaId, n); + size_t usable = moz_malloc_size_of(p); + ASSERT_TRUE(large.append(p)); + for (size_t j = 0; j < usable; j += 347) { + jemalloc_ptr_info(&p[j], &info); + ASSERT_TRUE(InfoEq(info, TagLiveAlloc, p, usable, arenaId)); + } + } + + // Similar for huge (> 1MiB - 8KiB) allocations. + for (size_t n = stats.chunksize; n <= 10_MiB; n += 512_KiB) { + auto p = (char*)moz_arena_malloc(arenaId, n); + size_t usable = moz_malloc_size_of(p); + ASSERT_TRUE(huge.append(p)); + for (size_t j = 0; j < usable; j += 567) { + jemalloc_ptr_info(&p[j], &info); + ASSERT_TRUE(InfoEq(info, TagLiveAlloc, p, usable, arenaId)); + } + } + + // The following loops check freed allocations. We step through the vectors + // using prime-sized steps, which gives full coverage of the arrays while + // avoiding deallocating in the same order we allocated. + size_t len; + + // Free the small allocations and recheck them. + int isFreedAlloc = 0, isFreedPage = 0; + len = small.length(); + for (size_t i = 0, j = 0; i < len; i++, j = (j + 19) % len) { + char* p = small[j]; + size_t usable = moz_malloc_size_of(p); + free(p); + for (size_t k = 0; k < usable; k++) { + jemalloc_ptr_info(&p[k], &info); + // There are two valid outcomes here. + if (InfoEq(info, TagFreedAlloc, p, usable, arenaId)) { + isFreedAlloc++; + } else if (InfoEqFreedPage(info, &p[k], stats.page_size, arenaId)) { + isFreedPage++; + } else { + ASSERT_TRUE(false); + } + } + } + // There should be both FreedAlloc and FreedPage results, but a lot more of + // the former. + ASSERT_TRUE(isFreedAlloc != 0); + ASSERT_TRUE(isFreedPage != 0); + ASSERT_TRUE(isFreedAlloc / isFreedPage > 8); + + // Free the large allocations and recheck them. + len = large.length(); + for (size_t i = 0, j = 0; i < len; i++, j = (j + 31) % len) { + char* p = large[j]; + size_t usable = moz_malloc_size_of(p); + free(p); + for (size_t k = 0; k < usable; k += 357) { + jemalloc_ptr_info(&p[k], &info); + ASSERT_TRUE(InfoEqFreedPage(info, &p[k], stats.page_size, arenaId)); + } + } + + // Free the huge allocations and recheck them. + len = huge.length(); + for (size_t i = 0, j = 0; i < len; i++, j = (j + 7) % len) { + char* p = huge[j]; + size_t usable = moz_malloc_size_of(p); + free(p); + for (size_t k = 0; k < usable; k += 587) { + jemalloc_ptr_info(&p[k], &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + } + } + + // Null ptr. + jemalloc_ptr_info(nullptr, &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + + // Near-null ptr. + jemalloc_ptr_info((void*)0x123, &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + + // Maximum address. + jemalloc_ptr_info((void*)uintptr_t(-1), &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + + // Stack memory. + int stackVar; + jemalloc_ptr_info(&stackVar, &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + + // Code memory. + jemalloc_ptr_info((const void*)&jemalloc_ptr_info, &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + + // Static memory. + jemalloc_ptr_info(&gStaticVar, &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + + // Chunk header. + UniquePtr<int> p = MakeUnique<int>(); + size_t chunksizeMask = stats.chunksize - 1; + char* chunk = (char*)(uintptr_t(p.get()) & ~chunksizeMask); + size_t chunkHeaderSize = stats.chunksize - stats.large_max - stats.page_size; + for (size_t i = 0; i < chunkHeaderSize; i += 64) { + jemalloc_ptr_info(&chunk[i], &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + } + + // Run header. + size_t page_sizeMask = stats.page_size - 1; + char* run = (char*)(uintptr_t(p.get()) & ~page_sizeMask); + for (size_t i = 0; i < 4 * sizeof(void*); i++) { + jemalloc_ptr_info(&run[i], &info); + ASSERT_TRUE(InfoEq(info, TagUnknown, nullptr, 0U, 0U)); + } + + // Entire chunk. It's impossible to check what is put into |info| for all of + // these addresses; this is more about checking that we don't crash. + for (size_t i = 0; i < stats.chunksize; i += 256) { + jemalloc_ptr_info(&chunk[i], &info); + } + + moz_dispose_arena(arenaId); +} + +size_t sSizes[] = {1, 42, 79, 918, 1.4_KiB, + 73_KiB, 129_KiB, 1.1_MiB, 2.6_MiB, 5.1_MiB}; + +TEST(Jemalloc, Arenas) +{ + arena_id_t arena = moz_create_arena(); + ASSERT_TRUE(arena != 0); + void* ptr = moz_arena_malloc(arena, 42); + ASSERT_TRUE(ptr != nullptr); + ptr = moz_arena_realloc(arena, ptr, 64); + ASSERT_TRUE(ptr != nullptr); + moz_arena_free(arena, ptr); + ptr = moz_arena_calloc(arena, 24, 2); + // For convenience, free can be used to free arena pointers. + free(ptr); + moz_dispose_arena(arena); + + // Avoid death tests adding some unnecessary (long) delays. + SAVE_GDB_SLEEP_LOCAL(); + + // Can't use an arena after it's disposed. + // ASSERT_DEATH_WRAP(moz_arena_malloc(arena, 80), ""); + + // Arena id 0 can't be used to somehow get to the main arena. + ASSERT_DEATH_WRAP(moz_arena_malloc(0, 80), ""); + + arena = moz_create_arena(); + arena_id_t arena2 = moz_create_arena(); + // Ensure arena2 is used to prevent OSX errors: + (void)arena2; + + // For convenience, realloc can also be used to reallocate arena pointers. + // The result should be in the same arena. Test various size class + // transitions. + for (size_t from_size : sSizes) { + SCOPED_TRACE(testing::Message() << "from_size = " << from_size); + for (size_t to_size : sSizes) { + SCOPED_TRACE(testing::Message() << "to_size = " << to_size); + ptr = moz_arena_malloc(arena, from_size); + ptr = realloc(ptr, to_size); + // Freeing with the wrong arena should crash. + ASSERT_DEATH_WRAP(moz_arena_free(arena2, ptr), ""); + // Likewise for moz_arena_realloc. + ASSERT_DEATH_WRAP(moz_arena_realloc(arena2, ptr, from_size), ""); + // The following will crash if it's not in the right arena. + moz_arena_free(arena, ptr); + } + } + + moz_dispose_arena(arena2); + moz_dispose_arena(arena); + + RESTORE_GDB_SLEEP_LOCAL(); +} + +// Check that a buffer aPtr is entirely filled with a given character from +// aOffset to aSize. For faster comparison, the caller is required to fill a +// reference buffer with the wanted character, and give the size of that +// reference buffer. +static void bulk_compare(char* aPtr, size_t aOffset, size_t aSize, + char* aReference, size_t aReferenceSize) { + for (size_t i = aOffset; i < aSize; i += aReferenceSize) { + size_t length = std::min(aSize - i, aReferenceSize); + if (memcmp(aPtr + i, aReference, length)) { + // We got a mismatch, we now want to report more precisely where. + for (size_t j = i; j < i + length; j++) { + ASSERT_EQ(aPtr[j], *aReference); + } + } + } +} + +// A range iterator for size classes between two given values. +class SizeClassesBetween { + public: + SizeClassesBetween(size_t aStart, size_t aEnd) : mStart(aStart), mEnd(aEnd) {} + + class Iterator { + public: + explicit Iterator(size_t aValue) : mValue(malloc_good_size(aValue)) {} + + operator size_t() const { return mValue; } + size_t operator*() const { return mValue; } + Iterator& operator++() { + mValue = malloc_good_size(mValue + 1); + return *this; + } + + private: + size_t mValue; + }; + + Iterator begin() { return Iterator(mStart); } + Iterator end() { return Iterator(mEnd); } + + private: + size_t mStart, mEnd; +}; + +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + ((alignment)-1)) & (~((alignment)-1))) + +#define ALIGNMENT_FLOOR(s, alignment) ((s) & (~((alignment)-1))) + +static bool IsSameRoundedHugeClass(size_t aSize1, size_t aSize2, + jemalloc_stats_t& aStats) { + return (aSize1 > aStats.large_max && aSize2 > aStats.large_max && + ALIGNMENT_CEILING(aSize1 + aStats.page_size, aStats.chunksize) == + ALIGNMENT_CEILING(aSize2 + aStats.page_size, aStats.chunksize)); +} + +static bool CanReallocInPlace(size_t aFromSize, size_t aToSize, + jemalloc_stats_t& aStats) { + // PHC allocations must be disabled because PHC reallocs differently to + // mozjemalloc. +#ifdef MOZ_PHC + MOZ_RELEASE_ASSERT(!mozilla::phc::IsPHCEnabledOnCurrentThread()); +#endif + + if (aFromSize == malloc_good_size(aToSize)) { + // Same size class: in-place. + return true; + } + if (aFromSize >= aStats.page_size && aFromSize <= aStats.large_max && + aToSize >= aStats.page_size && aToSize <= aStats.large_max) { + // Any large class to any large class: in-place when there is space to. + return true; + } + if (IsSameRoundedHugeClass(aFromSize, aToSize, aStats)) { + // Huge sizes that round up to the same multiple of the chunk size: + // in-place. + return true; + } + return false; +} + +TEST(Jemalloc, InPlace) +{ + // Disable PHC allocations for this test, because CanReallocInPlace() isn't + // valid for PHC allocations. + AutoDisablePHCOnCurrentThread disable; + + jemalloc_stats_t stats; + jemalloc_stats(&stats); + + // Using a separate arena, which is always emptied after an iteration, ensures + // that in-place reallocation happens in all cases it can happen. This test is + // intended for developers to notice they may have to adapt other tests if + // they change the conditions for in-place reallocation. + arena_id_t arena = moz_create_arena(); + + for (size_t from_size : SizeClassesBetween(1, 2 * stats.chunksize)) { + SCOPED_TRACE(testing::Message() << "from_size = " << from_size); + for (size_t to_size : sSizes) { + SCOPED_TRACE(testing::Message() << "to_size = " << to_size); + char* ptr = (char*)moz_arena_malloc(arena, from_size); + char* ptr2 = (char*)moz_arena_realloc(arena, ptr, to_size); + if (CanReallocInPlace(from_size, to_size, stats)) { + EXPECT_EQ(ptr, ptr2); + } else { + EXPECT_NE(ptr, ptr2); + } + moz_arena_free(arena, ptr2); + } + } + + moz_dispose_arena(arena); +} + +// Bug 1474254: disable this test for windows ccov builds because it leads to +// timeout. +#if !defined(XP_WIN) || !defined(MOZ_CODE_COVERAGE) +TEST(Jemalloc, JunkPoison) +{ + // Disable PHC allocations for this test, because CanReallocInPlace() isn't + // valid for PHC allocations, and the testing UAFs aren't valid. + AutoDisablePHCOnCurrentThread disable; + + jemalloc_stats_t stats; + jemalloc_stats(&stats); + + // Avoid death tests adding some unnecessary (long) delays. + SAVE_GDB_SLEEP_LOCAL(); + + // Create buffers in a separate arena, for faster comparisons with + // bulk_compare. + arena_id_t buf_arena = moz_create_arena(); + char* junk_buf = (char*)moz_arena_malloc(buf_arena, stats.page_size); + // Depending on its configuration, the allocator will either fill the + // requested allocation with the junk byte (0xe4) or with zeroes, or do + // nothing, in which case, since we're allocating in a fresh arena, + // we'll be getting zeroes. + char junk = stats.opt_junk ? '\xe4' : '\0'; + for (size_t i = 0; i < stats.page_size; i++) { + ASSERT_EQ(junk_buf[i], junk); + } + + char* poison_buf = (char*)moz_arena_malloc(buf_arena, stats.page_size); + memset(poison_buf, 0xe5, stats.page_size); + + static const char fill = 0x42; + char* fill_buf = (char*)moz_arena_malloc(buf_arena, stats.page_size); + memset(fill_buf, fill, stats.page_size); + + arena_params_t params; + // Allow as many dirty pages in the arena as possible, so that purge never + // happens in it. Purge breaks some of the tests below randomly depending on + // what other things happen on other threads. + params.mMaxDirty = size_t(-1); + arena_id_t arena = moz_create_arena_with_params(¶ms); + + // Mozjemalloc is configured to only poison the first four cache lines. + const size_t poison_check_len = 256; + + // Allocating should junk the buffer, and freeing should poison the buffer. + for (size_t size : sSizes) { + if (size <= stats.large_max) { + SCOPED_TRACE(testing::Message() << "size = " << size); + char* buf = (char*)moz_arena_malloc(arena, size); + size_t allocated = moz_malloc_usable_size(buf); + if (stats.opt_junk || stats.opt_zero) { + ASSERT_NO_FATAL_FAILURE( + bulk_compare(buf, 0, allocated, junk_buf, stats.page_size)); + } + moz_arena_free(arena, buf); + // We purposefully do a use-after-free here, to check that the data was + // poisoned. + ASSERT_NO_FATAL_FAILURE( + bulk_compare(buf, 0, std::min(allocated, poison_check_len), + poison_buf, stats.page_size)); + } + } + + // Shrinking in the same size class should be in place and poison between the + // new allocation size and the old one. + size_t prev = 0; + for (size_t size : SizeClassesBetween(1, 2 * stats.chunksize)) { + SCOPED_TRACE(testing::Message() << "size = " << size); + SCOPED_TRACE(testing::Message() << "prev = " << prev); + char* ptr = (char*)moz_arena_malloc(arena, size); + memset(ptr, fill, moz_malloc_usable_size(ptr)); + char* ptr2 = (char*)moz_arena_realloc(arena, ptr, prev + 1); + ASSERT_EQ(ptr, ptr2); + ASSERT_NO_FATAL_FAILURE( + bulk_compare(ptr, 0, prev + 1, fill_buf, stats.page_size)); + ASSERT_NO_FATAL_FAILURE(bulk_compare(ptr, prev + 1, + std::min(size, poison_check_len), + poison_buf, stats.page_size)); + moz_arena_free(arena, ptr); + prev = size; + } + + // In-place realloc should junk the new bytes when growing and poison the old + // bytes when shrinking. + for (size_t from_size : SizeClassesBetween(1, 2 * stats.chunksize)) { + SCOPED_TRACE(testing::Message() << "from_size = " << from_size); + for (size_t to_size : sSizes) { + SCOPED_TRACE(testing::Message() << "to_size = " << to_size); + if (CanReallocInPlace(from_size, to_size, stats)) { + char* ptr = (char*)moz_arena_malloc(arena, from_size); + memset(ptr, fill, moz_malloc_usable_size(ptr)); + char* ptr2 = (char*)moz_arena_realloc(arena, ptr, to_size); + ASSERT_EQ(ptr, ptr2); + // Shrinking allocation + if (from_size >= to_size) { + ASSERT_NO_FATAL_FAILURE( + bulk_compare(ptr, 0, to_size, fill_buf, stats.page_size)); + // Huge allocations have guards and will crash when accessing + // beyond the valid range. + if (to_size > stats.large_max) { + size_t page_limit = ALIGNMENT_CEILING(to_size, stats.page_size); + ASSERT_NO_FATAL_FAILURE(bulk_compare( + ptr, to_size, std::min(page_limit, poison_check_len), + poison_buf, stats.page_size)); + ASSERT_DEATH_WRAP(ptr[page_limit] = 0, ""); + } else { + ASSERT_NO_FATAL_FAILURE(bulk_compare( + ptr, to_size, std::min(from_size, poison_check_len), poison_buf, + stats.page_size)); + } + } else { + // Enlarging allocation + ASSERT_NO_FATAL_FAILURE( + bulk_compare(ptr, 0, from_size, fill_buf, stats.page_size)); + if (stats.opt_junk || stats.opt_zero) { + ASSERT_NO_FATAL_FAILURE(bulk_compare(ptr, from_size, to_size, + junk_buf, stats.page_size)); + } + // Huge allocation, so should have a guard page following + if (to_size > stats.large_max) { + ASSERT_DEATH_WRAP( + ptr[ALIGNMENT_CEILING(to_size, stats.page_size)] = 0, ""); + } + } + moz_arena_free(arena, ptr2); + } + } + } + + // Growing to a different size class should poison the old allocation, + // preserve the original bytes, and junk the new bytes in the new allocation. + for (size_t from_size : SizeClassesBetween(1, 2 * stats.chunksize)) { + SCOPED_TRACE(testing::Message() << "from_size = " << from_size); + for (size_t to_size : sSizes) { + if (from_size < to_size && malloc_good_size(to_size) != from_size && + !IsSameRoundedHugeClass(from_size, to_size, stats)) { + SCOPED_TRACE(testing::Message() << "to_size = " << to_size); + char* ptr = (char*)moz_arena_malloc(arena, from_size); + memset(ptr, fill, moz_malloc_usable_size(ptr)); + // Avoid in-place realloc by allocating a buffer, expecting it to be + // right after the buffer we just received. Buffers smaller than the + // page size and exactly or larger than the size of the largest large + // size class can't be reallocated in-place. + char* avoid_inplace = nullptr; + if (from_size >= stats.page_size && from_size < stats.large_max) { + avoid_inplace = (char*)moz_arena_malloc(arena, stats.page_size); + ASSERT_EQ(ptr + from_size, avoid_inplace); + } + char* ptr2 = (char*)moz_arena_realloc(arena, ptr, to_size); + ASSERT_NE(ptr, ptr2); + if (from_size <= stats.large_max) { + ASSERT_NO_FATAL_FAILURE( + bulk_compare(ptr, 0, std::min(from_size, poison_check_len), + poison_buf, stats.page_size)); + } + ASSERT_NO_FATAL_FAILURE( + bulk_compare(ptr2, 0, from_size, fill_buf, stats.page_size)); + if (stats.opt_junk || stats.opt_zero) { + size_t rounded_to_size = malloc_good_size(to_size); + ASSERT_NE(to_size, rounded_to_size); + ASSERT_NO_FATAL_FAILURE(bulk_compare(ptr2, from_size, rounded_to_size, + junk_buf, stats.page_size)); + } + moz_arena_free(arena, ptr2); + moz_arena_free(arena, avoid_inplace); + } + } + } + + // Shrinking to a different size class should poison the old allocation, + // preserve the original bytes, and junk the extra bytes in the new + // allocation. + for (size_t from_size : SizeClassesBetween(1, 2 * stats.chunksize)) { + SCOPED_TRACE(testing::Message() << "from_size = " << from_size); + for (size_t to_size : sSizes) { + if (from_size > to_size && + !CanReallocInPlace(from_size, to_size, stats)) { + SCOPED_TRACE(testing::Message() << "to_size = " << to_size); + char* ptr = (char*)moz_arena_malloc(arena, from_size); + memset(ptr, fill, from_size); + char* ptr2 = (char*)moz_arena_realloc(arena, ptr, to_size); + ASSERT_NE(ptr, ptr2); + if (from_size <= stats.large_max) { + ASSERT_NO_FATAL_FAILURE( + bulk_compare(ptr, 0, std::min(from_size, poison_check_len), + poison_buf, stats.page_size)); + } + ASSERT_NO_FATAL_FAILURE( + bulk_compare(ptr2, 0, to_size, fill_buf, stats.page_size)); + if (stats.opt_junk || stats.opt_zero) { + size_t rounded_to_size = malloc_good_size(to_size); + ASSERT_NE(to_size, rounded_to_size); + ASSERT_NO_FATAL_FAILURE(bulk_compare(ptr2, from_size, rounded_to_size, + junk_buf, stats.page_size)); + } + moz_arena_free(arena, ptr2); + } + } + } + + moz_dispose_arena(arena); + + moz_arena_free(buf_arena, poison_buf); + moz_arena_free(buf_arena, junk_buf); + moz_arena_free(buf_arena, fill_buf); + moz_dispose_arena(buf_arena); + + RESTORE_GDB_SLEEP_LOCAL(); +} +#endif // !defined(XP_WIN) || !defined(MOZ_CODE_COVERAGE) + +TEST(Jemalloc, TrailingGuard) +{ + // Disable PHC allocations for this test, because even a single PHC + // allocation occurring can throw it off. + AutoDisablePHCOnCurrentThread disable; + + jemalloc_stats_t stats; + jemalloc_stats(&stats); + + // Avoid death tests adding some unnecessary (long) delays. + SAVE_GDB_SLEEP_LOCAL(); + + arena_id_t arena = moz_create_arena(); + ASSERT_TRUE(arena != 0); + + // Do enough large allocations to fill a chunk, and then one additional one, + // and check that the guard page is still present after the one-but-last + // allocation, i.e. that we didn't allocate the guard. + Vector<void*> ptr_list; + for (size_t cnt = 0; cnt < stats.large_max / stats.page_size; cnt++) { + void* ptr = moz_arena_malloc(arena, stats.page_size); + ASSERT_TRUE(ptr != nullptr); + ASSERT_TRUE(ptr_list.append(ptr)); + } + + void* last_ptr_in_chunk = ptr_list[ptr_list.length() - 1]; + void* extra_ptr = moz_arena_malloc(arena, stats.page_size); + void* guard_page = (void*)ALIGNMENT_CEILING( + (uintptr_t)last_ptr_in_chunk + stats.page_size, stats.page_size); + jemalloc_ptr_info_t info; + jemalloc_ptr_info(guard_page, &info); + ASSERT_TRUE(jemalloc_ptr_is_freed_page(&info)); + + ASSERT_DEATH_WRAP(*(char*)guard_page = 0, ""); + + for (void* ptr : ptr_list) { + moz_arena_free(arena, ptr); + } + moz_arena_free(arena, extra_ptr); + + moz_dispose_arena(arena); + + RESTORE_GDB_SLEEP_LOCAL(); +} + +TEST(Jemalloc, LeadingGuard) +{ + // Disable PHC allocations for this test, because even a single PHC + // allocation occurring can throw it off. + AutoDisablePHCOnCurrentThread disable; + + jemalloc_stats_t stats; + jemalloc_stats(&stats); + + // Avoid death tests adding some unnecessary (long) delays. + SAVE_GDB_SLEEP_LOCAL(); + + arena_id_t arena = moz_create_arena(); + ASSERT_TRUE(arena != 0); + + // Do a simple normal allocation, but force all the allocation space + // in the chunk to be used up. This allows us to check that we get + // the safe area right in the logic that follows (all memory will be + // committed and initialized), and it forces this pointer to the start + // of the zone to sit at the very start of the usable chunk area. + void* ptr = moz_arena_malloc(arena, stats.large_max); + ASSERT_TRUE(ptr != nullptr); + // If ptr is chunk-aligned, the above allocation went wrong. + void* chunk_start = (void*)ALIGNMENT_FLOOR((uintptr_t)ptr, stats.chunksize); + ASSERT_NE((uintptr_t)ptr, (uintptr_t)chunk_start); + // If ptr is 1 page after the chunk start (so right after the header), + // we must have missed adding the guard page. + ASSERT_NE((uintptr_t)ptr, (uintptr_t)chunk_start + stats.page_size); + // The actual start depends on the amount of metadata versus the page + // size, so we can't check equality without pulling in too many + // implementation details. + + // Guard page should be right before data area + void* guard_page = (void*)(((uintptr_t)ptr) - sizeof(void*)); + jemalloc_ptr_info_t info; + jemalloc_ptr_info(guard_page, &info); + ASSERT_TRUE(info.tag == TagUnknown); + ASSERT_DEATH_WRAP(*(char*)guard_page = 0, ""); + + moz_arena_free(arena, ptr); + moz_dispose_arena(arena); + + RESTORE_GDB_SLEEP_LOCAL(); +} + +TEST(Jemalloc, DisposeArena) +{ + jemalloc_stats_t stats; + jemalloc_stats(&stats); + + // Avoid death tests adding some unnecessary (long) delays. + SAVE_GDB_SLEEP_LOCAL(); + + arena_id_t arena = moz_create_arena(); + void* ptr = moz_arena_malloc(arena, 42); + // Disposing of the arena when it's not empty is a MOZ_CRASH-worthy error. + ASSERT_DEATH_WRAP(moz_dispose_arena(arena), ""); + moz_arena_free(arena, ptr); + moz_dispose_arena(arena); + + arena = moz_create_arena(); + ptr = moz_arena_malloc(arena, stats.page_size * 2); + // Disposing of the arena when it's not empty is a MOZ_CRASH-worthy error. + ASSERT_DEATH_WRAP(moz_dispose_arena(arena), ""); + moz_arena_free(arena, ptr); + moz_dispose_arena(arena); + + arena = moz_create_arena(); + ptr = moz_arena_malloc(arena, stats.chunksize * 2); +#ifdef MOZ_DEBUG + // On debug builds, we do the expensive check that arenas are empty. + ASSERT_DEATH_WRAP(moz_dispose_arena(arena), ""); + moz_arena_free(arena, ptr); + moz_dispose_arena(arena); +#else + // Currently, the allocator can't trivially check whether the arena is empty + // of huge allocations, so disposing of it works. + moz_dispose_arena(arena); + // But trying to free a pointer that belongs to it will MOZ_CRASH. + ASSERT_DEATH_WRAP(free(ptr), ""); + // Likewise for realloc + ASSERT_DEATH_WRAP(ptr = realloc(ptr, stats.chunksize * 3), ""); +#endif + + // Using the arena after it's been disposed of is MOZ_CRASH-worthy. + ASSERT_DEATH_WRAP(moz_arena_malloc(arena, 42), ""); + + RESTORE_GDB_SLEEP_LOCAL(); +} diff --git a/memory/gtest/moz.build b/memory/gtest/moz.build new file mode 100644 index 0000000000..973d0b4dda --- /dev/null +++ b/memory/gtest/moz.build @@ -0,0 +1,22 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# skip the test on windows10-aarch64 due to perma-crash - bug 1544961 +if CONFIG["OS_TARGET"] != "Android" and not ( + CONFIG["OS_TARGET"] == "WINNT" and CONFIG["TARGET_CPU"] == "aarch64" +): + UNIFIED_SOURCES += [ + "TestJemalloc.cpp", + ] + + if CONFIG["MOZ_PHC"]: + DEFINES["MOZ_PHC"] = True + +FINAL_LIBRARY = "xul-gtest" + +LOCAL_INCLUDES += [ + "../build", +] diff --git a/memory/moz.build b/memory/moz.build new file mode 100644 index 0000000000..e1da146a3c --- /dev/null +++ b/memory/moz.build @@ -0,0 +1,31 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files("**"): + BUG_COMPONENT = ("Core", "Memory Allocator") + +DIRS += [ + "build", +] + +if CONFIG["MOZ_BUILD_APP"] == "memory": + DIRS += [ + "mozjemalloc_info", + ] +else: + # For now, don't build mozalloc when building with --enable-project=memory + DIRS += [ + "mozalloc", + ] + +if CONFIG["MOZ_WIDGET_TOOLKIT"]: + DIRS += ["volatile"] + +# NB: gtest dir is included in toolkit/toolkit.build due to its dependency +# on libxul. + +if CONFIG["MOZ_REPLACE_MALLOC"]: + DIRS += ["replace"] diff --git a/memory/moz.configure b/memory/moz.configure new file mode 100644 index 0000000000..9745b253cd --- /dev/null +++ b/memory/moz.configure @@ -0,0 +1,7 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +imply_option("--enable-jemalloc", True) diff --git a/memory/mozalloc/cxxalloc.cpp b/memory/mozalloc/cxxalloc.cpp new file mode 100644 index 0000000000..41f419fe2d --- /dev/null +++ b/memory/mozalloc/cxxalloc.cpp @@ -0,0 +1,26 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define MOZ_MEMORY_IMPL +#include "mozmemory_wrap.h" +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC +// See mozmemory_wrap.h for more details. Files that are part of libmozglue, +// need to use _impl suffixes, which is becoming cumbersome. We'll have to use +// something like a malloc.h wrapper and allow the use of the functions without +// a _impl suffix. In the meanwhile, this is enough to get by for C++ code. +#define MALLOC_DECL(name, return_type, ...) \ + MOZ_MEMORY_API return_type name##_impl(__VA_ARGS__); +#include "malloc_decls.h" + +#include "mozilla/Attributes.h" + +extern "C" MFBT_API void* moz_xmalloc(size_t size) MOZ_INFALLIBLE_ALLOCATOR; + +namespace std { +struct nothrow_t; +} + +#define MOZALLOC_EXPORT_NEW MFBT_API + +#include "mozilla/cxxalloc.h" diff --git a/memory/mozalloc/cxxalloc.h b/memory/mozalloc/cxxalloc.h new file mode 100644 index 0000000000..c6fb4bb1dc --- /dev/null +++ b/memory/mozalloc/cxxalloc.h @@ -0,0 +1,82 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_cxxalloc_h +#define mozilla_cxxalloc_h + +/* + * We implement the default operators new/delete as part of + * libmozalloc, replacing their definitions in libstdc++. The + * operator new* definitions in libmozalloc will never return a NULL + * pointer. + * + * Each operator new immediately below returns a pointer to memory + * that can be delete'd by any of + * + * (1) the matching infallible operator delete immediately below + * (2) the matching system |operator delete(void*, std::nothrow)| + * (3) the matching system |operator delete(void*) noexcept(false)| + * + * NB: these are declared |noexcept(false)|, though they will never + * throw that exception. This declaration is consistent with the rule + * that |::operator new() noexcept(false)| will never return NULL. + * + * NB: mozilla::fallible can be used instead of std::nothrow. + */ + +#ifndef MOZALLOC_EXPORT_NEW +# define MOZALLOC_EXPORT_NEW MFBT_API +#endif + +MOZALLOC_EXPORT_NEW void* operator new(size_t size) noexcept(false) { + return moz_xmalloc(size); +} + +MOZALLOC_EXPORT_NEW void* operator new(size_t size, + const std::nothrow_t&) noexcept(true) { + return malloc_impl(size); +} + +MOZALLOC_EXPORT_NEW void* operator new[](size_t size) noexcept(false) { + return moz_xmalloc(size); +} + +MOZALLOC_EXPORT_NEW void* operator new[](size_t size, + const std::nothrow_t&) noexcept(true) { + return malloc_impl(size); +} + +MOZALLOC_EXPORT_NEW void operator delete(void* ptr) noexcept(true) { + return free_impl(ptr); +} + +MOZALLOC_EXPORT_NEW void operator delete(void* ptr, + const std::nothrow_t&) noexcept(true) { + return free_impl(ptr); +} + +MOZALLOC_EXPORT_NEW void operator delete[](void* ptr) noexcept(true) { + return free_impl(ptr); +} + +MOZALLOC_EXPORT_NEW void operator delete[]( + void* ptr, const std::nothrow_t&) noexcept(true) { + return free_impl(ptr); +} + +#if defined(XP_WIN) +// We provide the global sized delete overloads unconditionally because the +// MSVC runtime headers do, despite compiling with /Zc:sizedDealloc- +MOZALLOC_EXPORT_NEW void operator delete(void* ptr, + size_t /*size*/) noexcept(true) { + return free_impl(ptr); +} + +MOZALLOC_EXPORT_NEW void operator delete[](void* ptr, + size_t /*size*/) noexcept(true) { + return free_impl(ptr); +} +#endif + +#endif /* mozilla_cxxalloc_h */ diff --git a/memory/mozalloc/moz.build b/memory/mozalloc/moz.build new file mode 100644 index 0000000000..c5cd784607 --- /dev/null +++ b/memory/mozalloc/moz.build @@ -0,0 +1,59 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +NoVisibilityFlags() + +EXPORTS.mozilla += [ + "cxxalloc.h", + "mozalloc.h", + "mozalloc_abort.h", + "mozalloc_oom.h", +] + +if CONFIG["WRAP_STL_INCLUDES"]: + if CONFIG["CC_TYPE"] in ("clang", "gcc"): + EXPORTS.mozilla += ["throw_gcc.h"] + elif CONFIG["CC_TYPE"] == "clang-cl": + DEFINES["_HAS_EXCEPTIONS"] = 0 + SOURCES += [ + "msvc_raise_wrappers.cpp", + ] + +if CONFIG["OS_TARGET"] == "WINNT": + # Don't build winheap.cpp when mozglue is a static library. + if CONFIG["MOZ_MEMORY"] or not CONFIG["JS_STANDALONE"]: + # Keep this file separate to avoid #include'ing windows.h everywhere. + SOURCES += [ + "winheap.cpp", + ] + +UNIFIED_SOURCES += [ + "mozalloc.cpp", + "mozalloc_abort.cpp", + "mozalloc_oom.cpp", +] + +if CONFIG["MOZ_MEMORY"]: + # In MinGW, we don't want to actually export these functions out of the library + # as the functions in libc++ correctly forward to jemalloc and exporting them + # produces duplicate symbol errors. + if not (CONFIG["CC_TYPE"] == "clang" and CONFIG["OS_TARGET"] == "WINNT"): + SOURCES += [ + "cxxalloc.cpp", + ] + +FINAL_LIBRARY = "mozglue" + +# The strndup declaration in string.h is in an ifdef __USE_GNU section +DEFINES["_GNU_SOURCE"] = True + +DisableStlWrapping() + +LOCAL_INCLUDES += [ + "!/xpcom", + "/memory/build", +] + +DIST_INSTALL = True diff --git a/memory/mozalloc/mozalloc.cpp b/memory/mozalloc/mozalloc.cpp new file mode 100644 index 0000000000..aef8ab943a --- /dev/null +++ b/memory/mozalloc/mozalloc.cpp @@ -0,0 +1,159 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <stddef.h> // for size_t + +#if defined(MALLOC_H) +# include MALLOC_H // for memalign, malloc_size, malloc_us +#endif // if defined(MALLOC_H) + +#if !defined(MOZ_MEMORY) +// When jemalloc is disabled, or when building the static runtime variant, +// we need not to use the suffixes. + +# include <stdlib.h> // for malloc, free +# if defined(XP_UNIX) +# include <unistd.h> +# endif // if defined(XP_UNIX) + +# define malloc_impl malloc +# define calloc_impl calloc +# define realloc_impl realloc +# define free_impl free +# define memalign_impl memalign +# define malloc_usable_size_impl malloc_usable_size +# define strdup_impl strdup +# define strndup_impl strndup + +#endif + +#include <errno.h> +#include <new> // for std::bad_alloc +#include <cstring> + +#include <sys/types.h> + +#include "mozilla/Assertions.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/Likely.h" +#include "mozilla/mozalloc.h" +#include "mozilla/mozalloc_oom.h" // for mozalloc_handle_oom + +#if defined(MOZ_MEMORY) +MOZ_MEMORY_API char* strdup_impl(const char*); +MOZ_MEMORY_API char* strndup_impl(const char*, size_t); +#endif + +void* moz_xmalloc(size_t size) { + void* ptr = malloc_impl(size); + if (MOZ_UNLIKELY(!ptr && size)) { + mozalloc_handle_oom(size); + return moz_xmalloc(size); + } + return ptr; +} + +void* moz_xcalloc(size_t nmemb, size_t size) { + void* ptr = calloc_impl(nmemb, size); + if (MOZ_UNLIKELY(!ptr && nmemb && size)) { + mozilla::CheckedInt<size_t> totalSize = + mozilla::CheckedInt<size_t>(nmemb) * size; + mozalloc_handle_oom(totalSize.isValid() ? totalSize.value() : SIZE_MAX); + return moz_xcalloc(nmemb, size); + } + return ptr; +} + +void* moz_xrealloc(void* ptr, size_t size) { + void* newptr = realloc_impl(ptr, size); + if (MOZ_UNLIKELY(!newptr && size)) { + mozalloc_handle_oom(size); + return moz_xrealloc(ptr, size); + } + return newptr; +} + +char* moz_xstrdup(const char* str) { + char* dup = strdup_impl(str); + if (MOZ_UNLIKELY(!dup)) { + mozalloc_handle_oom(0); + return moz_xstrdup(str); + } + return dup; +} + +#if defined(HAVE_STRNDUP) +char* moz_xstrndup(const char* str, size_t strsize) { + char* dup = strndup_impl(str, strsize); + if (MOZ_UNLIKELY(!dup)) { + mozalloc_handle_oom(strsize); + return moz_xstrndup(str, strsize); + } + return dup; +} +#endif // if defined(HAVE_STRNDUP) + +void* moz_xmemdup(const void* ptr, size_t size) { + void* newPtr = moz_xmalloc(size); + memcpy(newPtr, ptr, size); + return newPtr; +} + +#ifndef __wasm__ +# ifndef HAVE_MEMALIGN +// We always have a definition of memalign, but system headers don't +// necessarily come with a declaration. +extern "C" void* memalign(size_t, size_t); +# endif + +void* moz_xmemalign(size_t boundary, size_t size) { + void* ptr = memalign_impl(boundary, size); + if (MOZ_UNLIKELY(!ptr && EINVAL != errno)) { + mozalloc_handle_oom(size); + return moz_xmemalign(boundary, size); + } + // non-NULL ptr or errno == EINVAL + return ptr; +} +#endif + +size_t moz_malloc_usable_size(void* ptr) { + if (!ptr) return 0; + +#if defined(XP_DARWIN) + return malloc_size(ptr); +#elif defined(HAVE_MALLOC_USABLE_SIZE) || defined(MOZ_MEMORY) + return malloc_usable_size_impl(ptr); +#elif defined(XP_WIN) + return _msize(ptr); +#else + return 0; +#endif +} + +size_t moz_malloc_size_of(const void* ptr) { + return moz_malloc_usable_size((void*)ptr); +} + +#if defined(MOZ_MEMORY) +# include "mozjemalloc_types.h" +// mozmemory.h declares jemalloc_ptr_info(), but including that header in this +// file is complicated. So we just redeclare it here instead, and include +// mozjemalloc_types.h for jemalloc_ptr_info_t. +MOZ_JEMALLOC_API void jemalloc_ptr_info(const void* ptr, + jemalloc_ptr_info_t* info); +#endif + +size_t moz_malloc_enclosing_size_of(const void* ptr) { +#if defined(MOZ_MEMORY) + jemalloc_ptr_info_t info; + jemalloc_ptr_info(ptr, &info); + return jemalloc_ptr_is_live(&info) ? info.size : 0; +#else + return 0; +#endif +} diff --git a/memory/mozalloc/mozalloc.h b/memory/mozalloc/mozalloc.h new file mode 100644 index 0000000000..1ebbb83237 --- /dev/null +++ b/memory/mozalloc/mozalloc.h @@ -0,0 +1,198 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_mozalloc_h +#define mozilla_mozalloc_h + +/* + * https://bugzilla.mozilla.org/show_bug.cgi?id=427099 + */ + +#if defined(__cplusplus) +# include <new> +// Since libstdc++ 6, including the C headers (e.g. stdlib.h) instead of the +// corresponding C++ header (e.g. cstdlib) can cause confusion in C++ code +// using things defined there. Specifically, with stdlib.h, the use of abs() +// in gfx/graphite2/src/inc/UtfCodec.h somehow ends up picking the wrong abs() +# include <cstdlib> +#else +# include <stdlib.h> +#endif + +#if defined(MOZ_MEMORY) && defined(IMPL_MFBT) +# define MOZ_MEMORY_IMPL +# include "mozmemory_wrap.h" +# define MALLOC_FUNCS MALLOC_FUNCS_MALLOC +// See mozmemory_wrap.h for more details. Files that are part of libmozglue, +// need to use _impl suffixes, which is becoming cumbersome. We'll have to use +// something like a malloc.h wrapper and allow the use of the functions without +// a _impl suffix. In the meanwhile, this is enough to get by for C++ code. +# define NOTHROW_MALLOC_DECL(name, return_type, ...) \ + MOZ_MEMORY_API return_type name##_impl(__VA_ARGS__) noexcept(true); +# define MALLOC_DECL(name, return_type, ...) \ + MOZ_MEMORY_API return_type name##_impl(__VA_ARGS__); +# include "malloc_decls.h" +#endif + +#if defined(__cplusplus) +# include "mozilla/fallible.h" +# include "mozilla/mozalloc_abort.h" +# include "mozilla/TemplateLib.h" +#endif +#include "mozilla/Attributes.h" +#include "mozilla/Types.h" + +MOZ_BEGIN_EXTERN_C + +/* + * We need to use malloc_impl and free_impl in this file when they are + * defined, because of how mozglue.dll is linked on Windows, where using + * malloc/free would end up using the symbols from the MSVCRT instead of + * ours. + */ +#ifndef free_impl +# define free_impl free +# define free_impl_ +#endif +#ifndef malloc_impl +# define malloc_impl malloc +# define malloc_impl_ +#endif + +/* + * Each declaration below is analogous to a "standard" allocation + * function, except that the out-of-memory handling is made explicit. + * The |moz_x| versions will never return a NULL pointer; if memory + * is exhausted, they abort. The |moz_| versions may return NULL + * pointers if memory is exhausted: their return value must be checked. + * + * All these allocation functions are *guaranteed* to return a pointer + * to memory allocated in such a way that that memory can be freed by + * passing that pointer to |free()|. + */ + +MFBT_API void* moz_xmalloc(size_t size) MOZ_INFALLIBLE_ALLOCATOR; + +MFBT_API void* moz_xcalloc(size_t nmemb, size_t size) MOZ_INFALLIBLE_ALLOCATOR; + +MFBT_API void* moz_xrealloc(void* ptr, size_t size) MOZ_INFALLIBLE_ALLOCATOR; + +MFBT_API char* moz_xstrdup(const char* str) MOZ_INFALLIBLE_ALLOCATOR; + +#if defined(HAVE_STRNDUP) +MFBT_API char* moz_xstrndup(const char* str, + size_t strsize) MOZ_INFALLIBLE_ALLOCATOR; +#endif /* if defined(HAVE_STRNDUP) */ + +MFBT_API void* moz_xmemdup(const void* ptr, + size_t size) MOZ_INFALLIBLE_ALLOCATOR; + +MFBT_API void* moz_xmemalign(size_t boundary, + size_t size) MOZ_INFALLIBLE_ALLOCATOR; + +MFBT_API size_t moz_malloc_usable_size(void* ptr); + +MFBT_API size_t moz_malloc_size_of(const void* ptr); + +/* + * Like moz_malloc_size_of(), but works reliably with interior pointers, i.e. + * pointers into the middle of a live allocation. + */ +MFBT_API size_t moz_malloc_enclosing_size_of(const void* ptr); + +MOZ_END_EXTERN_C + +#ifdef __cplusplus + +/* NB: This is defined with MFBT_API just to silence vacuous warnings + * about symbol visibility on OS X/gcc. + * These symbols are force-inline mainly for performance reasons, and + * not exported. While the standard doesn't allow that, we are in a + * controlled environment where the issues the standard tries to + * prevent don't apply, and we can't end up in situations where + * operator new and operator delete are inconsistent. */ +# ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Winline-new-delete" +# endif + +# if defined(XP_MACOSX) +# define MOZALLOC_EXPORT_NEW MFBT_API MOZ_ALWAYS_INLINE_EVEN_DEBUG +# else +# define MOZALLOC_EXPORT_NEW MOZ_ALWAYS_INLINE_EVEN_DEBUG +# endif + +# include "mozilla/cxxalloc.h" +# ifdef __clang__ +# pragma clang diagnostic pop +# endif + +/* + * This policy is identical to MallocAllocPolicy, except it uses + * moz_xmalloc/moz_xcalloc/moz_xrealloc instead of + * malloc/calloc/realloc. + */ +class InfallibleAllocPolicy { + public: + template <typename T> + T* maybe_pod_malloc(size_t aNumElems) { + return pod_malloc<T>(aNumElems); + } + + template <typename T> + T* maybe_pod_calloc(size_t aNumElems) { + return pod_calloc<T>(aNumElems); + } + + template <typename T> + T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + return pod_realloc<T>(aPtr, aOldSize, aNewSize); + } + + template <typename T> + T* pod_malloc(size_t aNumElems) { + if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) { + reportAllocOverflow(); + } + return static_cast<T*>(moz_xmalloc(aNumElems * sizeof(T))); + } + + template <typename T> + T* pod_calloc(size_t aNumElems) { + return static_cast<T*>(moz_xcalloc(aNumElems, sizeof(T))); + } + + template <typename T> + T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) { + reportAllocOverflow(); + } + return static_cast<T*>(moz_xrealloc(aPtr, aNewSize * sizeof(T))); + } + + template <typename T> + void free_(T* aPtr, size_t aNumElems = 0) { + free_impl(aPtr); + } + + void reportAllocOverflow() const { mozalloc_abort("alloc overflow"); } + + bool checkSimulatedOOM() const { return true; } +}; + +#endif /* ifdef __cplusplus */ + +#ifdef malloc_impl_ +# undef malloc_impl_ +# undef malloc_impl +#endif +#ifdef free_impl_ +# undef free_impl_ +# undef free_impl +#endif + +#endif /* ifndef mozilla_mozalloc_h */ diff --git a/memory/mozalloc/mozalloc_abort.cpp b/memory/mozalloc/mozalloc_abort.cpp new file mode 100644 index 0000000000..3cfc92533e --- /dev/null +++ b/memory/mozalloc/mozalloc_abort.cpp @@ -0,0 +1,96 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/mozalloc_abort.h" + +#ifdef ANDROID +# include <android/log.h> +#endif +#ifdef MOZ_WIDGET_ANDROID +# include "APKOpen.h" +# include "dlfcn.h" +#endif +#include <stdio.h> +#include <string.h> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +void mozalloc_abort(const char* const msg) { +#ifndef ANDROID + fputs(msg, stderr); + fputs("\n", stderr); +#else + __android_log_print(ANDROID_LOG_ERROR, "Gecko", "mozalloc_abort: %s", msg); +#endif + +#ifdef MOZ_WIDGET_ANDROID + abortThroughJava(msg); +#endif + + MOZ_CRASH_UNSAFE(msg); +} + +#ifdef MOZ_WIDGET_ANDROID +template <size_t N> +void fillAbortMessage(char (&msg)[N], uintptr_t retAddress) { + /* + * On Android, we often don't have reliable backtrace when crashing inside + * abort(). Therefore, we try to find out who is calling abort() and add + * that to the message. + */ + Dl_info info = {}; + dladdr(reinterpret_cast<void*>(retAddress), &info); + + const char* const module = info.dli_fname ? info.dli_fname : ""; + const char* const base_module = strrchr(module, '/'); + const void* const module_offset = + reinterpret_cast<void*>(retAddress - uintptr_t(info.dli_fbase)); + const char* const sym = info.dli_sname ? info.dli_sname : ""; + + SprintfLiteral(msg, "abort() called from %s:%p (%s)", + base_module ? base_module + 1 : module, module_offset, sym); +} +#endif + +#if defined(XP_UNIX) && !defined(MOZ_ASAN) && !defined(MOZ_TSAN) && \ + !defined(LIBFUZZER) +// Define abort() here, so that it is used instead of the system abort(). This +// lets us control the behavior when aborting, in order to get better results +// on *NIX platforms. See mozalloc_abort for details. +// +// For AddressSanitizer, we must not redefine system abort because the ASan +// option "abort_on_error=1" calls abort() and therefore causes the following +// call chain with our redefined abort: +// +// ASan -> abort() -> moz_abort() -> MOZ_CRASH() -> Segmentation fault +// +// That segmentation fault will be interpreted as another bug by ASan and as a +// result, ASan will just exit(1) instead of aborting. +// +// The same applies to ThreadSanitizer when run with "halt_on_error=1" in +// combination with "abort_on_error=1". +// +// When building with libFuzzer, it pulls in the UndefinedBehaviorSanitizer +// runtime which also requires the same workaround as with ASan or TSan. +extern "C" void abort(void) { +# ifdef MOZ_WIDGET_ANDROID + char msg[64] = {}; + fillAbortMessage(msg, uintptr_t(__builtin_return_address(0))); +# else + const char* const msg = "Redirecting call to abort() to mozalloc_abort\n"; +# endif + + mozalloc_abort(msg); + + // We won't reach here because mozalloc_abort() is MOZ_NORETURN. But that + // annotation isn't used on ARM (see mozalloc_abort.h for why) so we add a + // unreachable marker here to avoid a "'noreturn' function does return" + // warning. + MOZ_ASSUME_UNREACHABLE_MARKER(); +} +#endif diff --git a/memory/mozalloc/mozalloc_abort.h b/memory/mozalloc/mozalloc_abort.h new file mode 100644 index 0000000000..b9ff92a18e --- /dev/null +++ b/memory/mozalloc/mozalloc_abort.h @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_mozalloc_abort_h +#define mozilla_mozalloc_abort_h + +#include "mozilla/Attributes.h" +#include "mozilla/Types.h" + +/** + * Terminate this process in such a way that breakpad is triggered, if + * at all possible. + * + * Note: MOZ_NORETURN seems to break crash stacks on ARM, so we don't + * use that annotation there. + */ +extern "C" MFBT_API +#if !defined(__arm__) + MOZ_NORETURN +#endif + void + mozalloc_abort(const char* const msg); + +#endif /* ifndef mozilla_mozalloc_abort_h */ diff --git a/memory/mozalloc/mozalloc_oom.cpp b/memory/mozalloc/mozalloc_oom.cpp new file mode 100644 index 0000000000..efe5dab4a2 --- /dev/null +++ b/memory/mozalloc/mozalloc_oom.cpp @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/mozalloc_abort.h" +#include "mozilla/mozalloc_oom.h" +#include "mozilla/Assertions.h" + +#define OOM_MSG_LEADER "out of memory: 0x" +#define OOM_MSG_DIGITS "0000000000000000" // large enough for 2^64 +#define OOM_MSG_TRAILER " bytes requested" +#define OOM_MSG_FIRST_DIGIT_OFFSET sizeof(OOM_MSG_LEADER) - 1 +#define OOM_MSG_LAST_DIGIT_OFFSET \ + sizeof(OOM_MSG_LEADER) + sizeof(OOM_MSG_DIGITS) - 3 + +MFBT_DATA size_t gOOMAllocationSize = 0; + +static const char* hex = "0123456789ABCDEF"; + +void mozalloc_handle_oom(size_t size) { + char oomMsg[] = OOM_MSG_LEADER OOM_MSG_DIGITS OOM_MSG_TRAILER; + size_t i; + + // NB: this is handle_oom() stage 1, which simply aborts on OOM. + // we might proceed to a stage 2 in which an attempt is made to + // reclaim memory + // Warning: when stage 2 is done by, for example, notifying + // "memory-pressure" synchronously, please audit all + // nsExpirationTrackers and ensure that the actions they take + // on memory-pressure notifications (via NotifyExpired) are safe. + // Note that Document::SelectorCache::NotifyExpired is _known_ + // to not be safe: it will delete the selector it's caching, + // which might be in use at the time under querySelector or + // querySelectorAll. + + gOOMAllocationSize = size; + + static_assert(OOM_MSG_FIRST_DIGIT_OFFSET > 0, + "Loop below will never terminate (i can't go below 0)"); + + // Insert size into the diagnostic message using only primitive operations + for (i = OOM_MSG_LAST_DIGIT_OFFSET; size && i >= OOM_MSG_FIRST_DIGIT_OFFSET; + i--) { + oomMsg[i] = hex[size % 16]; + size /= 16; + } + + mozalloc_abort(oomMsg); +} diff --git a/memory/mozalloc/mozalloc_oom.h b/memory/mozalloc/mozalloc_oom.h new file mode 100644 index 0000000000..87a9c6d7ac --- /dev/null +++ b/memory/mozalloc/mozalloc_oom.h @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_mozalloc_oom_h +#define mozilla_mozalloc_oom_h + +#include "mozalloc.h" + +/** + * Called when memory is critically low. Returns iff it was able to + * remedy the critical memory situation; if not, it will abort(). + */ +#ifdef __wasm__ +__attribute__((import_module("env"))) +__attribute__((import_name("mozalloc_handle_oom"))) +# if defined(__clang__) && (__clang_major__ < 11) +__attribute__((weak)) +# endif +#endif +MFBT_API void +mozalloc_handle_oom(size_t requestedSize); + +extern MFBT_DATA size_t gOOMAllocationSize; + +/* TODO: functions to query system memory usage and register + * critical-memory handlers. */ + +#endif /* ifndef mozilla_mozalloc_oom_h */ diff --git a/memory/mozalloc/msvc_raise_wrappers.cpp b/memory/mozalloc/msvc_raise_wrappers.cpp new file mode 100644 index 0000000000..6eb8bdbe31 --- /dev/null +++ b/memory/mozalloc/msvc_raise_wrappers.cpp @@ -0,0 +1,17 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <exception> +#include "mozalloc_abort.h" + +static void __cdecl RaiseHandler(const std::exception& e) { + mozalloc_abort(e.what()); +} + +static struct StaticScopeStruct final { + StaticScopeStruct() { std::exception::_Set_raise_handler(RaiseHandler); } +} StaticScopeInvoke; diff --git a/memory/mozalloc/throw_gcc.h b/memory/mozalloc/throw_gcc.h new file mode 100644 index 0000000000..6a452ca5fc --- /dev/null +++ b/memory/mozalloc/throw_gcc.h @@ -0,0 +1,152 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_throw_gcc_h +#define mozilla_throw_gcc_h + +#if !defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 14000 + +# include "mozilla/Attributes.h" + +# include <stdio.h> // snprintf +# include <string.h> // strerror + +// For gcc, we define these inline to abort so that we're absolutely +// certain that (i) no exceptions are thrown from Gecko; (ii) these +// errors are always terminal and caught by breakpad. + +# include "mozilla/mozalloc_abort.h" + +// libc++ 4.0.0 and higher use C++11 [[noreturn]] attributes for the functions +// below, and since clang does not allow mixing __attribute__((noreturn)) and +// [[noreturn]], we have to explicitly use the latter here. See bug 1329520. +# if defined(__clang__) +# if __has_feature(cxx_attributes) && defined(_LIBCPP_VERSION) && \ + _LIBCPP_VERSION >= 4000 +# define MOZ_THROW_NORETURN [[noreturn]] +# endif +# endif +# ifndef MOZ_THROW_NORETURN +# define MOZ_THROW_NORETURN MOZ_NORETURN +# endif + +// MinGW doesn't appropriately inline these functions in debug builds, +// so we need to do some extra coercion for it to do so. Bug 1332747 +# ifdef __MINGW32__ +# define MOZ_THROW_INLINE MOZ_ALWAYS_INLINE_EVEN_DEBUG +# define MOZ_THROW_EXPORT +# else +# define MOZ_THROW_INLINE MOZ_ALWAYS_INLINE +# define MOZ_THROW_EXPORT MOZ_EXPORT +# endif + +namespace std { + +// NB: user code is not supposed to touch the std:: namespace. We're +// doing this after careful review because we want to define our own +// exception throwing semantics. Don't try this at home! + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_bad_exception( + void) { + mozalloc_abort("fatal: STL threw bad_exception"); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_bad_alloc( + void) { + mozalloc_abort("fatal: STL threw bad_alloc"); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_bad_cast( + void) { + mozalloc_abort("fatal: STL threw bad_cast"); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_bad_typeid( + void) { + mozalloc_abort("fatal: STL threw bad_typeid"); +} + +// used by <functional> +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void +__throw_bad_function_call(void) { + mozalloc_abort("fatal: STL threw bad_function_call"); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_logic_error( + const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_domain_error( + const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void +__throw_invalid_argument(const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_length_error( + const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_out_of_range( + const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_runtime_error( + const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_range_error( + const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void +__throw_overflow_error(const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void +__throw_underflow_error(const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_ios_failure( + const char* msg) { + mozalloc_abort(msg); +} + +MOZ_THROW_NORETURN MOZ_THROW_EXPORT MOZ_THROW_INLINE void __throw_system_error( + int err) { + char error[128]; + snprintf(error, sizeof(error) - 1, "fatal: STL threw system_error: %s (%d)", + strerror(err), err); + mozalloc_abort(error); +} + +MOZ_THROW_NORETURN MOZ_EXPORT MOZ_ALWAYS_INLINE void __throw_regex_error( + int err) { + char error[128]; + snprintf(error, sizeof(error) - 1, "fatal: STL threw regex_error: %s (%d)", + strerror(err), err); + mozalloc_abort(error); +} + +} // namespace std + +# undef MOZ_THROW_NORETURN +# undef MOZ_THROW_INLINE + +#endif + +#endif // mozilla_throw_gcc_h diff --git a/memory/mozalloc/winheap.cpp b/memory/mozalloc/winheap.cpp new file mode 100644 index 0000000000..1d2e1e5599 --- /dev/null +++ b/memory/mozalloc/winheap.cpp @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: sw=2 ts=4 et : + */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/mozalloc.h" +#include <windows.h> + +#if !defined(MOZ_MEMORY) +# include <malloc.h> +# define malloc_impl malloc +# define calloc_impl calloc +# define realloc_impl realloc +# define free_impl free +#endif + +// Warning: C4273: 'HeapAlloc': inconsistent dll linkage +// The Windows headers define HeapAlloc as dllimport, but we define it as +// dllexport, which is a voluntary inconsistency. +#pragma warning(disable : 4273) + +MFBT_API +LPVOID WINAPI HeapAlloc(_In_ HANDLE hHeap, _In_ DWORD dwFlags, + _In_ SIZE_T dwBytes) { + if (dwFlags & HEAP_ZERO_MEMORY) { + return calloc_impl(1, dwBytes); + } + return malloc_impl(dwBytes); +} + +MFBT_API +LPVOID WINAPI HeapReAlloc(_In_ HANDLE hHeap, _In_ DWORD dwFlags, + _In_ LPVOID lpMem, _In_ SIZE_T dwBytes) { + // The HeapReAlloc contract is that failures preserve the existing + // allocation. We can't try to realloc in-place without possibly + // freeing the original allocation, breaking the contract. + // We also can't guarantee we zero all the memory from the end of + // the original allocation to the end of the new one because of the + // difference between the originally requested size and what + // malloc_usable_size would return us. + // So for both cases, just tell the caller we can't do what they + // requested. + if (dwFlags & (HEAP_REALLOC_IN_PLACE_ONLY | HEAP_ZERO_MEMORY)) { + return NULL; + } + return realloc_impl(lpMem, dwBytes); +} + +MFBT_API +BOOL WINAPI HeapFree(_In_ HANDLE hHeap, _In_ DWORD dwFlags, _In_ LPVOID lpMem) { + free_impl(lpMem); + return true; +} diff --git a/memory/mozjemalloc_info/MozjemallocInfo.cpp b/memory/mozjemalloc_info/MozjemallocInfo.cpp new file mode 100644 index 0000000000..0795be8047 --- /dev/null +++ b/memory/mozjemalloc_info/MozjemallocInfo.cpp @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <stdio.h> +#include <stdlib.h> + +#include "mozmemory.h" + +/* + * Print the configured size classes which we can then use to update + * documentation. + */ +int main() { + jemalloc_stats_t stats; + + const size_t num_bins = jemalloc_stats_num_bins(); + const size_t MAX_NUM_BINS = 100; + if (num_bins > MAX_NUM_BINS) { + fprintf(stderr, "Exceeded maximum number of jemalloc stats bins"); + return 1; + } + jemalloc_bin_stats_t bin_stats[MAX_NUM_BINS] = {{0}}; + jemalloc_stats(&stats, bin_stats); + + printf("Page size: %5zu\n", stats.page_size); + printf("Chunk size: %5zuKiB\n", stats.chunksize / 1024); + + printf("Quantum: %5zu\n", stats.quantum); + printf("Quantum max: %5zu\n", stats.quantum_max); + printf("Sub-page max: %5zu\n", stats.page_size / 2); + printf("Large max: %5zuKiB\n", stats.large_max / 1024); + + printf("\nBin stats:\n"); + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + if (bin.size) { + printf(" Bin %5zu has runs of %3zuKiB\n", bin.size, + bin.bytes_per_run / 1024); + } + } + + return EXIT_SUCCESS; +} diff --git a/memory/mozjemalloc_info/moz.build b/memory/mozjemalloc_info/moz.build new file mode 100644 index 0000000000..76dd3c5001 --- /dev/null +++ b/memory/mozjemalloc_info/moz.build @@ -0,0 +1,39 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Program("mozjemalloc-info") + +SOURCES += [ + "/mfbt/Assertions.cpp", + "/mfbt/RandomNum.cpp", + "/mozglue/misc/StackWalk.cpp", + "MozjemallocInfo.cpp", +] + +# Link replace-malloc and the default allocator. +USE_LIBS += [ + "memory", +] + +# The memory library defines this, so it's needed here too. +DEFINES["IMPL_MFBT"] = True + +if CONFIG["MOZ_NEEDS_LIBATOMIC"]: + OS_LIBS += ["atomic"] + +UNIFIED_SOURCES += [ + "/mfbt/double-conversion/double-conversion/bignum-dtoa.cc", + "/mfbt/double-conversion/double-conversion/bignum.cc", + "/mfbt/double-conversion/double-conversion/cached-powers.cc", + "/mfbt/double-conversion/double-conversion/double-to-string.cc", + "/mfbt/double-conversion/double-conversion/fast-dtoa.cc", + "/mfbt/double-conversion/double-conversion/fixed-dtoa.cc", + "/mozglue/misc/Printf.cpp", +] + +DisableStlWrapping() + +include("/mozglue/build/replace_malloc.mozbuild") diff --git a/memory/replace/dmd/DMD.cpp b/memory/replace/dmd/DMD.cpp new file mode 100644 index 0000000000..54a154e60e --- /dev/null +++ b/memory/replace/dmd/DMD.cpp @@ -0,0 +1,1884 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#if !defined(MOZ_PROFILING) +# error "DMD requires MOZ_PROFILING" +#endif + +#ifdef XP_WIN +# include <windows.h> +# include <process.h> +#else +# include <pthread.h> +# include <sys/types.h> +# include <unistd.h> +#endif + +#ifdef ANDROID +# include <android/log.h> +#endif + +#include "nscore.h" + +#include "mozilla/Assertions.h" +#include "mozilla/FastBernoulliTrial.h" +#include "mozilla/HashFunctions.h" +#include "mozilla/HashTable.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/Likely.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/PodOperations.h" +#include "mozilla/StackWalk.h" +#include "mozilla/ThreadLocal.h" + +// CodeAddressService is defined entirely in the header, so this does not make +// DMD depend on XPCOM's object file. +#include "CodeAddressService.h" + +// replace_malloc.h needs to be included before replace_malloc_bridge.h, +// which DMD.h includes, so DMD.h needs to be included after replace_malloc.h. +#include "replace_malloc.h" +#include "DMD.h" + +namespace mozilla { +namespace dmd { + +class DMDBridge : public ReplaceMallocBridge { + virtual DMDFuncs* GetDMDFuncs() override; +}; + +static DMDBridge* gDMDBridge; +static DMDFuncs gDMDFuncs; + +DMDFuncs* DMDBridge::GetDMDFuncs() { return &gDMDFuncs; } + +MOZ_FORMAT_PRINTF(1, 2) +inline void StatusMsg(const char* aFmt, ...) { + va_list ap; + va_start(ap, aFmt); + gDMDFuncs.StatusMsg(aFmt, ap); + va_end(ap); +} + +//--------------------------------------------------------------------------- +// Utilities +//--------------------------------------------------------------------------- + +#ifndef DISALLOW_COPY_AND_ASSIGN +# define DISALLOW_COPY_AND_ASSIGN(T) \ + T(const T&); \ + void operator=(const T&) +#endif + +static malloc_table_t gMallocTable; + +// This provides infallible allocations (they abort on OOM). We use it for all +// of DMD's own allocations, which fall into the following three cases. +// +// - Direct allocations (the easy case). +// +// - Indirect allocations in mozilla::{Vector,HashSet,HashMap} -- this class +// serves as their AllocPolicy. +// +// - Other indirect allocations (e.g. MozStackWalk) -- see the comments on +// Thread::mBlockIntercepts and in replace_malloc for how these work. +// +// It would be nice if we could use the InfallibleAllocPolicy from mozalloc, +// but DMD cannot use mozalloc. +// +class InfallibleAllocPolicy { + static void ExitOnFailure(const void* aP); + + public: + template <typename T> + static T* maybe_pod_malloc(size_t aNumElems) { + if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) + return nullptr; + return (T*)gMallocTable.malloc(aNumElems * sizeof(T)); + } + + template <typename T> + static T* maybe_pod_calloc(size_t aNumElems) { + return (T*)gMallocTable.calloc(aNumElems, sizeof(T)); + } + + template <typename T> + static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) + return nullptr; + return (T*)gMallocTable.realloc(aPtr, aNewSize * sizeof(T)); + } + + static void* malloc_(size_t aSize) { + void* p = gMallocTable.malloc(aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_malloc(size_t aNumElems) { + T* p = maybe_pod_malloc<T>(aNumElems); + ExitOnFailure(p); + return p; + } + + static void* calloc_(size_t aCount, size_t aSize) { + void* p = gMallocTable.calloc(aCount, aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_calloc(size_t aNumElems) { + T* p = maybe_pod_calloc<T>(aNumElems); + ExitOnFailure(p); + return p; + } + + static void* realloc_(void* aPtr, size_t aNewSize) { + void* p = gMallocTable.realloc(aPtr, aNewSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) { + T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize); + ExitOnFailure(p); + return p; + } + + static void* memalign_(size_t aAlignment, size_t aSize) { + void* p = gMallocTable.memalign(aAlignment, aSize); + ExitOnFailure(p); + return p; + } + + template <typename T> + static void free_(T* aPtr, size_t aSize = 0) { + gMallocTable.free(aPtr); + } + + static char* strdup_(const char* aStr) { + char* s = (char*)InfallibleAllocPolicy::malloc_(strlen(aStr) + 1); + strcpy(s, aStr); + return s; + } + + template <class T> + static T* new_() { + void* mem = malloc_(sizeof(T)); + return new (mem) T; + } + + template <class T, typename P1> + static T* new_(const P1& aP1) { + void* mem = malloc_(sizeof(T)); + return new (mem) T(aP1); + } + + template <class T> + static void delete_(T* aPtr) { + if (aPtr) { + aPtr->~T(); + InfallibleAllocPolicy::free_(aPtr); + } + } + + static void reportAllocOverflow() { ExitOnFailure(nullptr); } + bool checkSimulatedOOM() const { return true; } +}; + +// This is only needed because of the |const void*| vs |void*| arg mismatch. +static size_t MallocSizeOf(const void* aPtr) { + return gMallocTable.malloc_usable_size(const_cast<void*>(aPtr)); +} + +void DMDFuncs::StatusMsg(const char* aFmt, va_list aAp) { +#ifdef ANDROID + __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, aAp); +#else + // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL. + size_t size = strlen(aFmt) + 64; + char* fmt = (char*)InfallibleAllocPolicy::malloc_(size); + snprintf(fmt, size, "DMD[%d] %s", getpid(), aFmt); + vfprintf(stderr, fmt, aAp); + InfallibleAllocPolicy::free_(fmt); +#endif +} + +/* static */ +void InfallibleAllocPolicy::ExitOnFailure(const void* aP) { + if (!aP) { + MOZ_CRASH("DMD out of memory; aborting"); + } +} + +static double Percent(size_t part, size_t whole) { + return (whole == 0) ? 0 : 100 * (double)part / whole; +} + +// Commifies the number. +static char* Show(size_t n, char* buf, size_t buflen) { + int nc = 0, i = 0, lasti = buflen - 2; + buf[lasti + 1] = '\0'; + if (n == 0) { + buf[lasti - i] = '0'; + i++; + } else { + while (n > 0) { + if (((i - nc) % 3) == 0 && i != 0) { + buf[lasti - i] = ','; + i++; + nc++; + } + buf[lasti - i] = static_cast<char>((n % 10) + '0'); + i++; + n /= 10; + } + } + int firstCharIndex = lasti - i + 1; + + MOZ_ASSERT(firstCharIndex >= 0); + return &buf[firstCharIndex]; +} + +//--------------------------------------------------------------------------- +// Options (Part 1) +//--------------------------------------------------------------------------- + +class Options { + template <typename T> + struct NumOption { + const T mDefault; + const T mMax; + T mActual; + NumOption(T aDefault, T aMax) + : mDefault(aDefault), mMax(aMax), mActual(aDefault) {} + }; + + // DMD has several modes. These modes affect what data is recorded and + // written to the output file, and the written data affects the + // post-processing that dmd.py can do. + // + // Users specify the mode as soon as DMD starts. This leads to minimal memory + // usage and log file size. It has the disadvantage that is inflexible -- if + // you want to change modes you have to re-run DMD. But in practice changing + // modes seems to be rare, so it's not much of a problem. + // + // An alternative possibility would be to always record and output *all* the + // information needed for all modes. This would let you choose the mode when + // running dmd.py, and so you could do multiple kinds of profiling on a + // single DMD run. But if you are only interested in one of the simpler + // modes, you'd pay the price of (a) increased memory usage and (b) *very* + // large log files. + // + // Finally, another alternative possibility would be to do mode selection + // partly at DMD startup or recording, and then partly in dmd.py. This would + // give some extra flexibility at moderate memory and file size cost. But + // certain mode pairs wouldn't work, which would be confusing. + // + enum class Mode { + // For each live block, this mode outputs: size (usable and slop) and + // (possibly) and allocation stack. This mode is good for live heap + // profiling. + Live, + + // Like "Live", but for each live block it also outputs: zero or more + // report stacks. This mode is good for identifying where memory reporters + // should be added. This is the default mode. + DarkMatter, + + // Like "Live", but also outputs the same data for dead blocks. This mode + // does cumulative heap profiling, which is good for identifying where large + // amounts of short-lived allocations ("heap churn") occur. + Cumulative, + + // Like "Live", but this mode also outputs for each live block the address + // of the block and the values contained in the blocks. This mode is useful + // for investigating leaks, by helping to figure out which blocks refer to + // other blocks. This mode force-enables full stacks coverage. + Scan + }; + + // With full stacks, every heap block gets a stack trace recorded for it. + // This is complete but slow. + // + // With partial stacks, not all heap blocks will get a stack trace recorded. + // A Bernoulli trial (see mfbt/FastBernoulliTrial.h for details) is performed + // for each heap block to decide if it gets one. Because bigger heap blocks + // are more likely to get a stack trace, even though most heap *blocks* won't + // get a stack trace, most heap *bytes* will. + enum class Stacks { Full, Partial }; + + char* mDMDEnvVar; // a saved copy, for later printing + + Mode mMode; + Stacks mStacks; + bool mShowDumpStats; + + void BadArg(const char* aArg); + static const char* ValueIfMatch(const char* aArg, const char* aOptionName); + static bool GetLong(const char* aArg, const char* aOptionName, long aMin, + long aMax, long* aValue); + static bool GetBool(const char* aArg, const char* aOptionName, bool* aValue); + + public: + explicit Options(const char* aDMDEnvVar); + + bool IsLiveMode() const { return mMode == Mode::Live; } + bool IsDarkMatterMode() const { return mMode == Mode::DarkMatter; } + bool IsCumulativeMode() const { return mMode == Mode::Cumulative; } + bool IsScanMode() const { return mMode == Mode::Scan; } + + const char* ModeString() const; + + const char* DMDEnvVar() const { return mDMDEnvVar; } + + bool DoFullStacks() const { return mStacks == Stacks::Full; } + size_t ShowDumpStats() const { return mShowDumpStats; } +}; + +static Options* gOptions; + +//--------------------------------------------------------------------------- +// The global lock +//--------------------------------------------------------------------------- + +// MutexBase implements the platform-specific parts of a mutex. + +#ifdef XP_WIN + +class MutexBase { + CRITICAL_SECTION mCS; + + DISALLOW_COPY_AND_ASSIGN(MutexBase); + + public: + MutexBase() { InitializeCriticalSection(&mCS); } + ~MutexBase() { DeleteCriticalSection(&mCS); } + + void Lock() { EnterCriticalSection(&mCS); } + void Unlock() { LeaveCriticalSection(&mCS); } +}; + +#else + +class MutexBase { + pthread_mutex_t mMutex; + + MutexBase(const MutexBase&) = delete; + + const MutexBase& operator=(const MutexBase&) = delete; + + public: + MutexBase() { pthread_mutex_init(&mMutex, nullptr); } + + void Lock() { pthread_mutex_lock(&mMutex); } + void Unlock() { pthread_mutex_unlock(&mMutex); } +}; + +#endif + +class Mutex : private MutexBase { + bool mIsLocked; + + Mutex(const Mutex&) = delete; + + const Mutex& operator=(const Mutex&) = delete; + + public: + Mutex() : mIsLocked(false) {} + + void Lock() { + MutexBase::Lock(); + MOZ_ASSERT(!mIsLocked); + mIsLocked = true; + } + + void Unlock() { + MOZ_ASSERT(mIsLocked); + mIsLocked = false; + MutexBase::Unlock(); + } + + bool IsLocked() { return mIsLocked; } +}; + +// This lock must be held while manipulating global state such as +// gStackTraceTable, gLiveBlockTable, gDeadBlockTable. Note that gOptions is +// *not* protected by this lock because it is only written to by Options(), +// which is only invoked at start-up and in ResetEverything(), which is only +// used by SmokeDMD.cpp. +static Mutex* gStateLock = nullptr; + +class AutoLockState { + AutoLockState(const AutoLockState&) = delete; + + const AutoLockState& operator=(const AutoLockState&) = delete; + + public: + AutoLockState() { gStateLock->Lock(); } + ~AutoLockState() { gStateLock->Unlock(); } +}; + +class AutoUnlockState { + AutoUnlockState(const AutoUnlockState&) = delete; + + const AutoUnlockState& operator=(const AutoUnlockState&) = delete; + + public: + AutoUnlockState() { gStateLock->Unlock(); } + ~AutoUnlockState() { gStateLock->Lock(); } +}; + +//--------------------------------------------------------------------------- +// Per-thread blocking of intercepts +//--------------------------------------------------------------------------- + +// On MacOS, the first __thread/thread_local access calls malloc, which leads +// to an infinite loop. So we use pthread-based TLS instead, which somehow +// doesn't have this problem. +#if !defined(XP_DARWIN) +# define DMD_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T) +#else +# define DMD_THREAD_LOCAL(T) \ + detail::ThreadLocal<T, detail::ThreadLocalKeyStorage> +#endif + +class Thread { + // Required for allocation via InfallibleAllocPolicy::new_. + friend class InfallibleAllocPolicy; + + // When true, this blocks intercepts, which allows malloc interception + // functions to themselves call malloc. (Nb: for direct calls to malloc we + // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes + // indirectly call vanilla malloc via functions like MozStackWalk.) + bool mBlockIntercepts; + + Thread() : mBlockIntercepts(false) {} + + Thread(const Thread&) = delete; + + const Thread& operator=(const Thread&) = delete; + + static DMD_THREAD_LOCAL(Thread*) tlsThread; + + public: + static void Init() { + if (!tlsThread.init()) { + MOZ_CRASH(); + } + } + + static Thread* Fetch() { + Thread* t = tlsThread.get(); + if (MOZ_UNLIKELY(!t)) { + // This memory is never freed, even if the thread dies. It's a leak, but + // only a tiny one. + t = InfallibleAllocPolicy::new_<Thread>(); + tlsThread.set(t); + } + + return t; + } + + bool BlockIntercepts() { + MOZ_ASSERT(!mBlockIntercepts); + return mBlockIntercepts = true; + } + + bool UnblockIntercepts() { + MOZ_ASSERT(mBlockIntercepts); + return mBlockIntercepts = false; + } + + bool InterceptsAreBlocked() const { return mBlockIntercepts; } +}; + +DMD_THREAD_LOCAL(Thread*) Thread::tlsThread; + +// An object of this class must be created (on the stack) before running any +// code that might allocate. +class AutoBlockIntercepts { + Thread* const mT; + + AutoBlockIntercepts(const AutoBlockIntercepts&) = delete; + + const AutoBlockIntercepts& operator=(const AutoBlockIntercepts&) = delete; + + public: + explicit AutoBlockIntercepts(Thread* aT) : mT(aT) { mT->BlockIntercepts(); } + ~AutoBlockIntercepts() { + MOZ_ASSERT(mT->InterceptsAreBlocked()); + mT->UnblockIntercepts(); + } +}; + +//--------------------------------------------------------------------------- +// Location service +//--------------------------------------------------------------------------- + +struct DescribeCodeAddressLock { + static void Unlock() { gStateLock->Unlock(); } + static void Lock() { gStateLock->Lock(); } + static bool IsLocked() { return gStateLock->IsLocked(); } +}; + +typedef CodeAddressService<InfallibleAllocPolicy, DescribeCodeAddressLock> + CodeAddressService; + +//--------------------------------------------------------------------------- +// Stack traces +//--------------------------------------------------------------------------- + +class StackTrace { + public: + static const uint32_t MaxFrames = 24; + + private: + uint32_t mLength; // The number of PCs. + const void* mPcs[MaxFrames]; // The PCs themselves. + + public: + StackTrace() : mLength(0) {} + StackTrace(const StackTrace& aOther) : mLength(aOther.mLength) { + PodCopy(mPcs, aOther.mPcs, mLength); + } + + uint32_t Length() const { return mLength; } + const void* Pc(uint32_t i) const { + MOZ_ASSERT(i < mLength); + return mPcs[i]; + } + + uint32_t Size() const { return mLength * sizeof(mPcs[0]); } + + // The stack trace returned by this function is interned in gStackTraceTable, + // and so is immortal and unmovable. + static const StackTrace* Get(Thread* aT); + + // Hash policy. + + typedef StackTrace* Lookup; + + static mozilla::HashNumber hash(const StackTrace* const& aSt) { + return mozilla::HashBytes(aSt->mPcs, aSt->Size()); + } + + static bool match(const StackTrace* const& aA, const StackTrace* const& aB) { + return aA->mLength == aB->mLength && + memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0; + } + + private: + static void StackWalkCallback(uint32_t aFrameNumber, void* aPc, void* aSp, + void* aClosure) { + StackTrace* st = (StackTrace*)aClosure; + MOZ_ASSERT(st->mLength < MaxFrames); + st->mPcs[st->mLength] = aPc; + st->mLength++; + MOZ_ASSERT(st->mLength == aFrameNumber); + } +}; + +typedef mozilla::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy> + StackTraceTable; +static StackTraceTable* gStackTraceTable = nullptr; + +typedef mozilla::HashSet<const StackTrace*, + mozilla::DefaultHasher<const StackTrace*>, + InfallibleAllocPolicy> + StackTraceSet; + +typedef mozilla::HashSet<const void*, mozilla::DefaultHasher<const void*>, + InfallibleAllocPolicy> + PointerSet; +typedef mozilla::HashMap<const void*, uint32_t, + mozilla::DefaultHasher<const void*>, + InfallibleAllocPolicy> + PointerIdMap; + +// We won't GC the stack trace table until it this many elements. +static uint32_t gGCStackTraceTableWhenSizeExceeds = 4 * 1024; + +/* static */ const StackTrace* StackTrace::Get(Thread* aT) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(aT->InterceptsAreBlocked()); + + // On Windows, MozStackWalk can acquire a lock from the shared library + // loader. Another thread might call malloc while holding that lock (when + // loading a shared library). So we can't be in gStateLock during the call + // to MozStackWalk. For details, see + // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8 + // On Linux, something similar can happen; see bug 824340. + // So let's just release it on all platforms. + StackTrace tmp; + { + AutoUnlockState unlock; + // In each of the following cases, skipFrames is chosen so that the + // first frame in each stack trace is a replace_* function (or as close as + // possible, given the vagaries of inlining on different platforms). +#if defined(XP_WIN) && defined(_M_IX86) + // This avoids MozStackWalk(), which causes unusably slow startup on Win32 + // when it is called during static initialization (see bug 1241684). + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Win32: REGISTERS_SYNC_POPULATE() for the + // frame pointer, and GetStackTop() for the stack end. + CONTEXT context; + RtlCaptureContext(&context); + void** fp = reinterpret_cast<void**>(context.Ebp); + + PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb()); + void* stackEnd = static_cast<void*>(pTib->StackBase); + FramePointerStackWalk(StackWalkCallback, MaxFrames, &tmp, fp, stackEnd); +#elif defined(XP_MACOSX) + // This avoids MozStackWalk(), which has become unusably slow on Mac due to + // changes in libunwind. + // + // This code is cribbed from the Gecko Profiler, which also uses + // FramePointerStackWalk() on Mac: REGISTERS_SYNC_POPULATE() for the frame + // pointer, and GetStackTop() for the stack end. +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wframe-address" + void** fp = reinterpret_cast<void**>(__builtin_frame_address(1)); +# pragma GCC diagnostic pop + void* stackEnd = pthread_get_stackaddr_np(pthread_self()); + FramePointerStackWalk(StackWalkCallback, MaxFrames, &tmp, fp, stackEnd); +#else + MozStackWalk(StackWalkCallback, nullptr, MaxFrames, &tmp); +#endif + } + + StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp); + if (!p) { + StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp); + MOZ_ALWAYS_TRUE(gStackTraceTable->add(p, stnew)); + } + return *p; +} + +//--------------------------------------------------------------------------- +// Heap blocks +//--------------------------------------------------------------------------- + +// This class combines a 2-byte-aligned pointer (i.e. one whose bottom bit +// is zero) with a 1-bit tag. +// +// |T| is the pointer type, e.g. |int*|, not the pointed-to type. This makes +// is easier to have const pointers, e.g. |TaggedPtr<const int*>|. +template <typename T> +class TaggedPtr { + union { + T mPtr; + uintptr_t mUint; + }; + + static const uintptr_t kTagMask = uintptr_t(0x1); + static const uintptr_t kPtrMask = ~kTagMask; + + static bool IsTwoByteAligned(T aPtr) { + return (uintptr_t(aPtr) & kTagMask) == 0; + } + + public: + TaggedPtr() : mPtr(nullptr) {} + + TaggedPtr(T aPtr, bool aBool) : mPtr(aPtr) { + MOZ_ASSERT(IsTwoByteAligned(aPtr)); + uintptr_t tag = uintptr_t(aBool); + MOZ_ASSERT(tag <= kTagMask); + mUint |= (tag & kTagMask); + } + + void Set(T aPtr, bool aBool) { + MOZ_ASSERT(IsTwoByteAligned(aPtr)); + mPtr = aPtr; + uintptr_t tag = uintptr_t(aBool); + MOZ_ASSERT(tag <= kTagMask); + mUint |= (tag & kTagMask); + } + + T Ptr() const { return reinterpret_cast<T>(mUint & kPtrMask); } + + bool Tag() const { return bool(mUint & kTagMask); } +}; + +// A live heap block. Stores both basic data and data about reports, if we're +// in DarkMatter mode. +class LiveBlock { + const void* mPtr; + const size_t mReqSize; // size requested + + // The stack trace where this block was allocated, or nullptr if we didn't + // record one. + const StackTrace* const mAllocStackTrace; + + // This array has two elements because we record at most two reports of a + // block. + // - Ptr: |mReportStackTrace| - stack trace where this block was reported. + // nullptr if not reported. + // - Tag bit 0: |mReportedOnAlloc| - was the block reported immediately on + // allocation? If so, DMD must not clear the report at the end of + // Analyze(). Only relevant if |mReportStackTrace| is non-nullptr. + // + // |mPtr| is used as the key in LiveBlockTable, so it's ok for this member + // to be |mutable|. + // + // Only used in DarkMatter mode. + mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2]; + + public: + LiveBlock(const void* aPtr, size_t aReqSize, + const StackTrace* aAllocStackTrace) + : mPtr(aPtr), mReqSize(aReqSize), mAllocStackTrace(aAllocStackTrace) {} + + const void* Address() const { return mPtr; } + + size_t ReqSize() const { return mReqSize; } + + size_t SlopSize() const { return MallocSizeOf(mPtr) - mReqSize; } + + const StackTrace* AllocStackTrace() const { return mAllocStackTrace; } + + const StackTrace* ReportStackTrace1() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[0].Ptr(); + } + + const StackTrace* ReportStackTrace2() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[1].Ptr(); + } + + bool ReportedOnAlloc1() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[0].Tag(); + } + + bool ReportedOnAlloc2() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + return mReportStackTrace_mReportedOnAlloc[1].Tag(); + } + + void AddStackTracesToTable(StackTraceSet& aStackTraces) const { + if (AllocStackTrace()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(AllocStackTrace())); + } + if (gOptions->IsDarkMatterMode()) { + if (ReportStackTrace1()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(ReportStackTrace1())); + } + if (ReportStackTrace2()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(ReportStackTrace2())); + } + } + } + + uint32_t NumReports() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + if (ReportStackTrace2()) { + MOZ_ASSERT(ReportStackTrace1()); + return 2; + } + if (ReportStackTrace1()) { + return 1; + } + return 0; + } + + // This is |const| thanks to the |mutable| fields above. + void Report(Thread* aT, bool aReportedOnAlloc) const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + // We don't bother recording reports after the 2nd one. + uint32_t numReports = NumReports(); + if (numReports < 2) { + mReportStackTrace_mReportedOnAlloc[numReports].Set(StackTrace::Get(aT), + aReportedOnAlloc); + } + } + + void UnreportIfNotReportedOnAlloc() const { + MOZ_ASSERT(gOptions->IsDarkMatterMode()); + if (!ReportedOnAlloc1() && !ReportedOnAlloc2()) { + mReportStackTrace_mReportedOnAlloc[0].Set(nullptr, 0); + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + + } else if (!ReportedOnAlloc1() && ReportedOnAlloc2()) { + // Shift the 2nd report down to the 1st one. + mReportStackTrace_mReportedOnAlloc[0] = + mReportStackTrace_mReportedOnAlloc[1]; + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + + } else if (ReportedOnAlloc1() && !ReportedOnAlloc2()) { + mReportStackTrace_mReportedOnAlloc[1].Set(nullptr, 0); + } + } + + // Hash policy. + + typedef const void* Lookup; + + static mozilla::HashNumber hash(const void* const& aPtr) { + return mozilla::HashGeneric(aPtr); + } + + static bool match(const LiveBlock& aB, const void* const& aPtr) { + return aB.mPtr == aPtr; + } +}; + +// A table of live blocks where the lookup key is the block address. +typedef mozilla::HashSet<LiveBlock, LiveBlock, InfallibleAllocPolicy> + LiveBlockTable; +static LiveBlockTable* gLiveBlockTable = nullptr; + +class AggregatedLiveBlockHashPolicy { + public: + typedef const LiveBlock* const Lookup; + + static mozilla::HashNumber hash(const LiveBlock* const& aB) { + return gOptions->IsDarkMatterMode() + ? mozilla::HashGeneric( + aB->ReqSize(), aB->SlopSize(), aB->AllocStackTrace(), + aB->ReportedOnAlloc1(), aB->ReportedOnAlloc2()) + : mozilla::HashGeneric(aB->ReqSize(), aB->SlopSize(), + aB->AllocStackTrace()); + } + + static bool match(const LiveBlock* const& aA, const LiveBlock* const& aB) { + return gOptions->IsDarkMatterMode() + ? aA->ReqSize() == aB->ReqSize() && + aA->SlopSize() == aB->SlopSize() && + aA->AllocStackTrace() == aB->AllocStackTrace() && + aA->ReportStackTrace1() == aB->ReportStackTrace1() && + aA->ReportStackTrace2() == aB->ReportStackTrace2() + : aA->ReqSize() == aB->ReqSize() && + aA->SlopSize() == aB->SlopSize() && + aA->AllocStackTrace() == aB->AllocStackTrace(); + } +}; + +// A table of live blocks where the lookup key is everything but the block +// address. For aggregating similar live blocks at output time. +typedef mozilla::HashMap<const LiveBlock*, size_t, + AggregatedLiveBlockHashPolicy, InfallibleAllocPolicy> + AggregatedLiveBlockTable; + +// A freed heap block. +class DeadBlock { + const size_t mReqSize; // size requested + const size_t mSlopSize; // slop above size requested + + // The stack trace where this block was allocated. + const StackTrace* const mAllocStackTrace; + + public: + DeadBlock() : mReqSize(0), mSlopSize(0), mAllocStackTrace(nullptr) {} + + explicit DeadBlock(const LiveBlock& aLb) + : mReqSize(aLb.ReqSize()), + mSlopSize(aLb.SlopSize()), + mAllocStackTrace(aLb.AllocStackTrace()) {} + + ~DeadBlock() = default; + + size_t ReqSize() const { return mReqSize; } + size_t SlopSize() const { return mSlopSize; } + + const StackTrace* AllocStackTrace() const { return mAllocStackTrace; } + + void AddStackTracesToTable(StackTraceSet& aStackTraces) const { + if (AllocStackTrace()) { + MOZ_ALWAYS_TRUE(aStackTraces.put(AllocStackTrace())); + } + } + + // Hash policy. + + typedef DeadBlock Lookup; + + static mozilla::HashNumber hash(const DeadBlock& aB) { + return mozilla::HashGeneric(aB.ReqSize(), aB.SlopSize(), + aB.AllocStackTrace()); + } + + static bool match(const DeadBlock& aA, const DeadBlock& aB) { + return aA.ReqSize() == aB.ReqSize() && aA.SlopSize() == aB.SlopSize() && + aA.AllocStackTrace() == aB.AllocStackTrace(); + } +}; + +// For each unique DeadBlock value we store a count of how many actual dead +// blocks have that value. +typedef mozilla::HashMap<DeadBlock, size_t, DeadBlock, InfallibleAllocPolicy> + DeadBlockTable; +static DeadBlockTable* gDeadBlockTable = nullptr; + +// Add the dead block to the dead block table, if that's appropriate. +void MaybeAddToDeadBlockTable(const DeadBlock& aDb) { + if (gOptions->IsCumulativeMode() && aDb.AllocStackTrace()) { + AutoLockState lock; + if (DeadBlockTable::AddPtr p = gDeadBlockTable->lookupForAdd(aDb)) { + p->value() += 1; + } else { + MOZ_ALWAYS_TRUE(gDeadBlockTable->add(p, aDb, 1)); + } + } +} + +// Add a pointer to each live stack trace into the given StackTraceSet. (A +// stack trace is live if it's used by one of the live blocks.) +static void GatherUsedStackTraces(StackTraceSet& aStackTraces) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + aStackTraces.clear(); + MOZ_ALWAYS_TRUE(aStackTraces.reserve(512)); + + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().AddStackTracesToTable(aStackTraces); + } + + for (auto iter = gDeadBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().key().AddStackTracesToTable(aStackTraces); + } +} + +// Delete stack traces that we aren't using, and compact our hashtable. +static void GCStackTraces() { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + StackTraceSet usedStackTraces; + GatherUsedStackTraces(usedStackTraces); + + // Delete all unused stack traces from gStackTraceTable. The ModIterator + // destructor will automatically rehash and compact the table. + for (auto iter = gStackTraceTable->modIter(); !iter.done(); iter.next()) { + StackTrace* const& st = iter.get(); + if (!usedStackTraces.has(st)) { + iter.remove(); + InfallibleAllocPolicy::delete_(st); + } + } + + // Schedule a GC when we have twice as many stack traces as we had right after + // this GC finished. + gGCStackTraceTableWhenSizeExceeds = 2 * gStackTraceTable->count(); +} + +//--------------------------------------------------------------------------- +// malloc/free callbacks +//--------------------------------------------------------------------------- + +static FastBernoulliTrial* gBernoulli; + +// In testing, a probability of 0.003 resulted in ~25% of heap blocks getting +// a stack trace and ~80% of heap bytes getting a stack trace. (This is +// possible because big heap blocks are more likely to get a stack trace.) +// +// We deliberately choose not to give the user control over this probability +// (other than effectively setting it to 1 via --stacks=full) because it's +// quite inscrutable and generally the user just wants "faster and imprecise" +// or "slower and precise". +// +// The random number seeds are arbitrary and were obtained from random.org. If +// you change them you'll need to change the tests as well, because their +// expected output is based on the particular sequence of trial results that we +// get with these seeds. +static void ResetBernoulli() { + new (gBernoulli) + FastBernoulliTrial(0.003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0); +} + +static void AllocCallback(void* aPtr, size_t aReqSize, Thread* aT) { + if (!aPtr) { + return; + } + + AutoLockState lock; + AutoBlockIntercepts block(aT); + + size_t actualSize = gMallocTable.malloc_usable_size(aPtr); + + // We may or may not record the allocation stack trace, depending on the + // options and the outcome of a Bernoulli trial. + bool getTrace = gOptions->DoFullStacks() || gBernoulli->trial(actualSize); + LiveBlock b(aPtr, aReqSize, getTrace ? StackTrace::Get(aT) : nullptr); + LiveBlockTable::AddPtr p = gLiveBlockTable->lookupForAdd(aPtr); + if (!p) { + // Most common case: there wasn't a record already. + MOZ_ALWAYS_TRUE(gLiveBlockTable->add(p, b)); + } else { + // Edge-case: there was a record for the same address. We'll assume the + // allocator is not giving out a pointer to an existing allocation, so + // this means the previously recorded allocation was freed while we were + // blocking interceptions. This can happen while processing the data in + // e.g. AnalyzeImpl. + if (gOptions->IsCumulativeMode()) { + // Copy it out so it can be added to the dead block list later. + DeadBlock db(*p); + MaybeAddToDeadBlockTable(db); + } + gLiveBlockTable->remove(p); + MOZ_ALWAYS_TRUE(gLiveBlockTable->putNew(aPtr, b)); + } +} + +static void FreeCallback(void* aPtr, Thread* aT, DeadBlock* aDeadBlock) { + if (!aPtr) { + return; + } + + AutoLockState lock; + AutoBlockIntercepts block(aT); + + if (LiveBlockTable::Ptr lb = gLiveBlockTable->lookup(aPtr)) { + if (gOptions->IsCumulativeMode()) { + // Copy it out so it can be added to the dead block list later. + new (aDeadBlock) DeadBlock(*lb); + } + gLiveBlockTable->remove(lb); + } else { + // We have no record of the block. It must be a bogus pointer, or one that + // DMD wasn't able to see allocated. This should be extremely rare. + } + + if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) { + GCStackTraces(); + } +} + +//--------------------------------------------------------------------------- +// malloc/free interception +//--------------------------------------------------------------------------- + +static bool Init(malloc_table_t* aMallocTable); + +} // namespace dmd +} // namespace mozilla + +static void* replace_malloc(size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + // Intercepts are blocked, which means this must be a call to malloc + // triggered indirectly by DMD (e.g. via MozStackWalk). Be infallible. + return InfallibleAllocPolicy::malloc_(aSize); + } + + // This must be a call to malloc from outside DMD. Intercept it. + void* ptr = gMallocTable.malloc(aSize); + AllocCallback(ptr, aSize, t); + return ptr; +} + +static void* replace_calloc(size_t aCount, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::calloc_(aCount, aSize); + } + + // |aCount * aSize| could overflow, but if that happens then + // |gMallocTable.calloc()| will return nullptr and |AllocCallback()| will + // return immediately without using the overflowed value. + void* ptr = gMallocTable.calloc(aCount, aSize); + AllocCallback(ptr, aCount * aSize, t); + return ptr; +} + +static void* replace_realloc(void* aOldPtr, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::realloc_(aOldPtr, aSize); + } + + // If |aOldPtr| is nullptr, the call is equivalent to |malloc(aSize)|. + if (!aOldPtr) { + return replace_malloc(aSize); + } + + // Be very careful here! Must remove the block from the table before doing + // the realloc to avoid races, just like in replace_free(). + // Nb: This does an unnecessary hashtable remove+add if the block doesn't + // move, but doing better isn't worth the effort. + DeadBlock db; + FreeCallback(aOldPtr, t, &db); + void* ptr = gMallocTable.realloc(aOldPtr, aSize); + if (ptr) { + AllocCallback(ptr, aSize, t); + MaybeAddToDeadBlockTable(db); + } else { + // If realloc fails, we undo the prior operations by re-inserting the old + // pointer into the live block table. We don't have to do anything with the + // dead block list because the dead block hasn't yet been inserted. The + // block will end up looking like it was allocated for the first time here, + // which is untrue, and the slop bytes will be zero, which may be untrue. + // But this case is rare and doing better isn't worth the effort. + AllocCallback(aOldPtr, gMallocTable.malloc_usable_size(aOldPtr), t); + } + return ptr; +} + +static void* replace_memalign(size_t aAlignment, size_t aSize) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::memalign_(aAlignment, aSize); + } + + void* ptr = gMallocTable.memalign(aAlignment, aSize); + AllocCallback(ptr, aSize, t); + return ptr; +} + +static void replace_free(void* aPtr) { + using namespace mozilla::dmd; + + Thread* t = Thread::Fetch(); + if (t->InterceptsAreBlocked()) { + return InfallibleAllocPolicy::free_(aPtr); + } + + // Do the actual free after updating the table. Otherwise, another thread + // could call malloc and get the freed block and update the table, and then + // our update here would remove the newly-malloc'd block. + DeadBlock db; + FreeCallback(aPtr, t, &db); + MaybeAddToDeadBlockTable(db); + gMallocTable.free(aPtr); +} + +void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) { + if (mozilla::dmd::Init(aMallocTable)) { +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#define MALLOC_DECL(name, ...) aMallocTable->name = replace_##name; +#include "malloc_decls.h" + *aBridge = mozilla::dmd::gDMDBridge; + } +} + +namespace mozilla { +namespace dmd { + +//--------------------------------------------------------------------------- +// Options (Part 2) +//--------------------------------------------------------------------------- + +// Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah" +// (where "blah" is non-empty) and return the pointer to "blah". |aArg| can +// have leading space chars (but not other whitespace). +const char* Options::ValueIfMatch(const char* aArg, const char* aOptionName) { + MOZ_ASSERT(!isspace(*aArg)); // any leading whitespace should not remain + size_t optionLen = strlen(aOptionName); + if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' && + aArg[optionLen + 1]) { + return aArg + optionLen + 1; + } + return nullptr; +} + +// Extracts a |long| value for an option from an argument. It must be within +// the range |aMin..aMax| (inclusive). +bool Options::GetLong(const char* aArg, const char* aOptionName, long aMin, + long aMax, long* aValue) { + if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { + char* endPtr; + *aValue = strtol(optionValue, &endPtr, /* base */ 10); + if (!*endPtr && aMin <= *aValue && *aValue <= aMax && *aValue != LONG_MIN && + *aValue != LONG_MAX) { + return true; + } + } + return false; +} + +// Extracts a |bool| value for an option -- encoded as "yes" or "no" -- from an +// argument. +bool Options::GetBool(const char* aArg, const char* aOptionName, bool* aValue) { + if (const char* optionValue = ValueIfMatch(aArg, aOptionName)) { + if (strcmp(optionValue, "yes") == 0) { + *aValue = true; + return true; + } + if (strcmp(optionValue, "no") == 0) { + *aValue = false; + return true; + } + } + return false; +} + +Options::Options(const char* aDMDEnvVar) + : mDMDEnvVar(aDMDEnvVar ? InfallibleAllocPolicy::strdup_(aDMDEnvVar) + : nullptr), + mMode(Mode::DarkMatter), + mStacks(Stacks::Partial), + mShowDumpStats(false) { + char* e = mDMDEnvVar; + if (e && strcmp(e, "1") != 0) { + bool isEnd = false; + while (!isEnd) { + // Consume leading whitespace. + while (isspace(*e)) { + e++; + } + + // Save the start of the arg. + const char* arg = e; + + // Find the first char after the arg, and temporarily change it to '\0' + // to isolate the arg. + while (!isspace(*e) && *e != '\0') { + e++; + } + char replacedChar = *e; + isEnd = replacedChar == '\0'; + *e = '\0'; + + // Handle arg + bool myBool; + if (strcmp(arg, "--mode=live") == 0) { + mMode = Mode::Live; + } else if (strcmp(arg, "--mode=dark-matter") == 0) { + mMode = Mode::DarkMatter; + } else if (strcmp(arg, "--mode=cumulative") == 0) { + mMode = Mode::Cumulative; + } else if (strcmp(arg, "--mode=scan") == 0) { + mMode = Mode::Scan; + + } else if (strcmp(arg, "--stacks=full") == 0) { + mStacks = Stacks::Full; + } else if (strcmp(arg, "--stacks=partial") == 0) { + mStacks = Stacks::Partial; + + } else if (GetBool(arg, "--show-dump-stats", &myBool)) { + mShowDumpStats = myBool; + + } else if (strcmp(arg, "") == 0) { + // This can only happen if there is trailing whitespace. Ignore. + MOZ_ASSERT(isEnd); + + } else { + BadArg(arg); + } + + // Undo the temporary isolation. + *e = replacedChar; + } + } + + if (mMode == Mode::Scan) { + mStacks = Stacks::Full; + } +} + +void Options::BadArg(const char* aArg) { + StatusMsg("\n"); + StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg); + StatusMsg("See the output of |mach help run| for the allowed options.\n"); + exit(1); +} + +const char* Options::ModeString() const { + switch (mMode) { + case Mode::Live: + return "live"; + case Mode::DarkMatter: + return "dark-matter"; + case Mode::Cumulative: + return "cumulative"; + case Mode::Scan: + return "scan"; + default: + MOZ_ASSERT(false); + return "(unknown DMD mode)"; + } +} + +//--------------------------------------------------------------------------- +// DMD start-up +//--------------------------------------------------------------------------- + +#ifndef XP_WIN +static void prefork() { + if (gStateLock) { + gStateLock->Lock(); + } +} + +static void postfork() { + if (gStateLock) { + gStateLock->Unlock(); + } +} +#endif + +// WARNING: this function runs *very* early -- before all static initializers +// have run. For this reason, non-scalar globals such as gStateLock and +// gStackTraceTable are allocated dynamically (so we can guarantee their +// construction in this function) rather than statically. +static bool Init(malloc_table_t* aMallocTable) { + // DMD is controlled by the |DMD| environment variable. + const char* e = getenv("DMD"); + + if (!e) { + return false; + } + // Initialize the function table first, because StatusMsg uses + // InfallibleAllocPolicy::malloc_, which uses it. + gMallocTable = *aMallocTable; + + StatusMsg("$DMD = '%s'\n", e); + + gDMDBridge = InfallibleAllocPolicy::new_<DMDBridge>(); + +#ifndef XP_WIN + // Avoid deadlocks when forking by acquiring our state lock prior to forking + // and releasing it after forking. See |LogAlloc|'s |replace_init| for + // in-depth details. + // + // Note: This must run after attempting an allocation so as to give the + // system malloc a chance to insert its own atfork handler. + pthread_atfork(prefork, postfork, postfork); +#endif + // Parse $DMD env var. + gOptions = InfallibleAllocPolicy::new_<Options>(e); + + gStateLock = InfallibleAllocPolicy::new_<Mutex>(); + + gBernoulli = (FastBernoulliTrial*)InfallibleAllocPolicy::malloc_( + sizeof(FastBernoulliTrial)); + ResetBernoulli(); + + Thread::Init(); + + { + AutoLockState lock; + + gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>(8192); + gLiveBlockTable = InfallibleAllocPolicy::new_<LiveBlockTable>(8192); + + // Create this even if the mode isn't Cumulative (albeit with a small + // size), in case the mode is changed later on (as is done by SmokeDMD.cpp, + // for example). + size_t tableSize = gOptions->IsCumulativeMode() ? 8192 : 4; + gDeadBlockTable = InfallibleAllocPolicy::new_<DeadBlockTable>(tableSize); + } + + return true; +} + +//--------------------------------------------------------------------------- +// Block reporting and unreporting +//--------------------------------------------------------------------------- + +static void ReportHelper(const void* aPtr, bool aReportedOnAlloc) { + if (!gOptions->IsDarkMatterMode() || !aPtr) { + return; + } + + Thread* t = Thread::Fetch(); + + AutoBlockIntercepts block(t); + AutoLockState lock; + + if (LiveBlockTable::Ptr p = gLiveBlockTable->lookup(aPtr)) { + p->Report(t, aReportedOnAlloc); + } else { + // We have no record of the block. It must be a bogus pointer. This should + // be extremely rare because Report() is almost always called in + // conjunction with a malloc_size_of-style function. Print a message so + // that we get some feedback. + StatusMsg("Unknown pointer %p\n", aPtr); + } +} + +void DMDFuncs::Report(const void* aPtr) { + ReportHelper(aPtr, /* onAlloc */ false); +} + +void DMDFuncs::ReportOnAlloc(const void* aPtr) { + ReportHelper(aPtr, /* onAlloc */ true); +} + +//--------------------------------------------------------------------------- +// DMD output +//--------------------------------------------------------------------------- + +// The version number of the output format. Increment this if you make +// backwards-incompatible changes to the format. See DMD.h for the version +// history. +static const int kOutputVersionNumber = 5; + +// Note that, unlike most SizeOf* functions, this function does not take a +// |mozilla::MallocSizeOf| argument. That's because those arguments are +// primarily to aid DMD track heap blocks... but DMD deliberately doesn't track +// heap blocks it allocated for itself! +// +// SizeOfInternal should be called while you're holding the state lock and +// while intercepts are blocked; SizeOf acquires the lock and blocks +// intercepts. + +static void SizeOfInternal(Sizes* aSizes) { + MOZ_ASSERT(gStateLock->IsLocked()); + MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked()); + + aSizes->Clear(); + + StackTraceSet usedStackTraces; + GatherUsedStackTraces(usedStackTraces); + + for (auto iter = gStackTraceTable->iter(); !iter.done(); iter.next()) { + StackTrace* const& st = iter.get(); + + if (usedStackTraces.has(st)) { + aSizes->mStackTracesUsed += MallocSizeOf(st); + } else { + aSizes->mStackTracesUnused += MallocSizeOf(st); + } + } + + aSizes->mStackTraceTable = + gStackTraceTable->shallowSizeOfIncludingThis(MallocSizeOf); + + aSizes->mLiveBlockTable = + gLiveBlockTable->shallowSizeOfIncludingThis(MallocSizeOf); + + aSizes->mDeadBlockTable = + gDeadBlockTable->shallowSizeOfIncludingThis(MallocSizeOf); +} + +void DMDFuncs::SizeOf(Sizes* aSizes) { + aSizes->Clear(); + + AutoBlockIntercepts block(Thread::Fetch()); + AutoLockState lock; + SizeOfInternal(aSizes); +} + +void DMDFuncs::ClearReports() { + if (!gOptions->IsDarkMatterMode()) { + return; + } + + AutoLockState lock; + + // Unreport all blocks that were marked reported by a memory reporter. This + // excludes those that were reported on allocation, because they need to keep + // their reported marking. + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + iter.get().UnreportIfNotReportedOnAlloc(); + } +} + +class ToIdStringConverter final { + public: + ToIdStringConverter() : mIdMap(512), mNextId(0) {} + + // Converts a pointer to a unique ID. Reuses the existing ID for the pointer + // if it's been seen before. + const char* ToIdString(const void* aPtr) { + uint32_t id; + PointerIdMap::AddPtr p = mIdMap.lookupForAdd(aPtr); + if (!p) { + id = mNextId++; + MOZ_ALWAYS_TRUE(mIdMap.add(p, aPtr, id)); + } else { + id = p->value(); + } + return Base32(id); + } + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const { + return mIdMap.shallowSizeOfExcludingThis(aMallocSizeOf); + } + + private: + // This function converts an integer to base-32. We use base-32 values for + // indexing into the traceTable and the frameTable, for the following reasons. + // + // - Base-32 gives more compact indices than base-16. + // + // - 32 is a power-of-two, which makes the necessary div/mod calculations + // fast. + // + // - We can (and do) choose non-numeric digits for base-32. When + // inspecting/debugging the JSON output, non-numeric indices are easier to + // search for than numeric indices. + // + char* Base32(uint32_t aN) { + static const char digits[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"; + + char* b = mIdBuf + kIdBufLen - 1; + *b = '\0'; + do { + b--; + if (b == mIdBuf) { + MOZ_CRASH("Base32 buffer too small"); + } + *b = digits[aN % 32]; + aN /= 32; + } while (aN); + + return b; + } + + PointerIdMap mIdMap; + uint32_t mNextId; + + // |mIdBuf| must have space for at least eight chars, which is the space + // needed to hold 'Dffffff' (including the terminating null char), which is + // the base-32 representation of 0xffffffff. + static const size_t kIdBufLen = 16; + char mIdBuf[kIdBufLen]; +}; + +// Helper class for converting a pointer value to a string. +class ToStringConverter { + public: + const char* ToPtrString(const void* aPtr) { + snprintf(kPtrBuf, sizeof(kPtrBuf) - 1, "%" PRIxPTR, (uintptr_t)aPtr); + return kPtrBuf; + } + + private: + char kPtrBuf[32]; +}; + +static void WriteBlockContents(JSONWriter& aWriter, const LiveBlock& aBlock) { + size_t numWords = aBlock.ReqSize() / sizeof(uintptr_t*); + if (numWords == 0) { + return; + } + + aWriter.StartArrayProperty("contents", aWriter.SingleLineStyle); + { + const uintptr_t** block = (const uintptr_t**)aBlock.Address(); + ToStringConverter sc; + for (size_t i = 0; i < numWords; ++i) { + aWriter.StringElement(MakeStringSpan(sc.ToPtrString(block[i]))); + } + } + aWriter.EndArray(); +} + +static void AnalyzeImpl(UniquePtr<JSONWriteFunc> aWriter) { + // Some blocks may have been allocated while creating |aWriter|. Those blocks + // will be freed at the end of this function when |write| is destroyed. The + // allocations will have occurred while intercepts were not blocked, so the + // frees better be as well, otherwise we'll get assertion failures. + // Therefore, this declaration must precede the AutoBlockIntercepts + // declaration, to ensure that |write| is destroyed *after* intercepts are + // unblocked. + JSONWriter writer(std::move(aWriter)); + + AutoBlockIntercepts block(Thread::Fetch()); + AutoLockState lock; + + // Allocate this on the heap instead of the stack because it's fairly large. + auto locService = InfallibleAllocPolicy::new_<CodeAddressService>(); + + StackTraceSet usedStackTraces(512); + PointerSet usedPcs(512); + + size_t iscSize; + + static int analysisCount = 1; + StatusMsg("Dump %d {\n", analysisCount++); + + writer.Start(); + { + writer.IntProperty("version", kOutputVersionNumber); + + writer.StartObjectProperty("invocation"); + { + const char* var = gOptions->DMDEnvVar(); + if (var) { + writer.StringProperty("dmdEnvVar", MakeStringSpan(var)); + } else { + writer.NullProperty("dmdEnvVar"); + } + + writer.StringProperty("mode", MakeStringSpan(gOptions->ModeString())); + } + writer.EndObject(); + + StatusMsg(" Constructing the heap block list...\n"); + + ToIdStringConverter isc; + ToStringConverter sc; + + writer.StartArrayProperty("blockList"); + { + // Lambda that writes out a live block. + auto writeLiveBlock = [&](const LiveBlock& aB, size_t aNum) { + aB.AddStackTracesToTable(usedStackTraces); + + MOZ_ASSERT_IF(gOptions->IsScanMode(), aNum == 1); + + writer.StartObjectElement(writer.SingleLineStyle); + { + if (gOptions->IsScanMode()) { + writer.StringProperty("addr", + MakeStringSpan(sc.ToPtrString(aB.Address()))); + WriteBlockContents(writer, aB); + } + writer.IntProperty("req", aB.ReqSize()); + if (aB.SlopSize() > 0) { + writer.IntProperty("slop", aB.SlopSize()); + } + + if (aB.AllocStackTrace()) { + writer.StringProperty( + "alloc", MakeStringSpan(isc.ToIdString(aB.AllocStackTrace()))); + } + + if (gOptions->IsDarkMatterMode() && aB.NumReports() > 0) { + writer.StartArrayProperty("reps"); + { + if (aB.ReportStackTrace1()) { + writer.StringElement( + MakeStringSpan(isc.ToIdString(aB.ReportStackTrace1()))); + } + if (aB.ReportStackTrace2()) { + writer.StringElement( + MakeStringSpan(isc.ToIdString(aB.ReportStackTrace2()))); + } + } + writer.EndArray(); + } + + if (aNum > 1) { + writer.IntProperty("num", aNum); + } + } + writer.EndObject(); + }; + + // Live blocks. + if (!gOptions->IsScanMode()) { + // At this point we typically have many LiveBlocks that differ only in + // their address. Aggregate them to reduce the size of the output file. + AggregatedLiveBlockTable agg(8192); + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + const LiveBlock& b = iter.get(); + b.AddStackTracesToTable(usedStackTraces); + + if (AggregatedLiveBlockTable::AddPtr p = agg.lookupForAdd(&b)) { + p->value() += 1; + } else { + MOZ_ALWAYS_TRUE(agg.add(p, &b, 1)); + } + } + + // Now iterate over the aggregated table. + for (auto iter = agg.iter(); !iter.done(); iter.next()) { + const LiveBlock& b = *iter.get().key(); + size_t num = iter.get().value(); + writeLiveBlock(b, num); + } + + } else { + // In scan mode we cannot aggregate because we print each live block's + // address and contents. + for (auto iter = gLiveBlockTable->iter(); !iter.done(); iter.next()) { + const LiveBlock& b = iter.get(); + b.AddStackTracesToTable(usedStackTraces); + + writeLiveBlock(b, 1); + } + } + + // Dead blocks. + for (auto iter = gDeadBlockTable->iter(); !iter.done(); iter.next()) { + const DeadBlock& b = iter.get().key(); + b.AddStackTracesToTable(usedStackTraces); + + size_t num = iter.get().value(); + MOZ_ASSERT(num > 0); + + writer.StartObjectElement(writer.SingleLineStyle); + { + writer.IntProperty("req", b.ReqSize()); + if (b.SlopSize() > 0) { + writer.IntProperty("slop", b.SlopSize()); + } + if (b.AllocStackTrace()) { + writer.StringProperty( + "alloc", MakeStringSpan(isc.ToIdString(b.AllocStackTrace()))); + } + + if (num > 1) { + writer.IntProperty("num", num); + } + } + writer.EndObject(); + } + } + writer.EndArray(); + + StatusMsg(" Constructing the stack trace table...\n"); + + writer.StartObjectProperty("traceTable"); + { + for (auto iter = usedStackTraces.iter(); !iter.done(); iter.next()) { + const StackTrace* const st = iter.get(); + writer.StartArrayProperty(MakeStringSpan(isc.ToIdString(st)), + writer.SingleLineStyle); + { + for (uint32_t i = 0; i < st->Length(); i++) { + const void* pc = st->Pc(i); + writer.StringElement(MakeStringSpan(isc.ToIdString(pc))); + MOZ_ALWAYS_TRUE(usedPcs.put(pc)); + } + } + writer.EndArray(); + } + } + writer.EndObject(); + + StatusMsg(" Constructing the stack frame table...\n"); + + writer.StartObjectProperty("frameTable"); + { + static const size_t locBufLen = 1024; + char locBuf[locBufLen]; + + for (auto iter = usedPcs.iter(); !iter.done(); iter.next()) { + const void* const pc = iter.get(); + + // Use 0 for the frame number. See the JSON format description comment + // in DMD.h to understand why. + locService->GetLocation(0, pc, locBuf, locBufLen); + writer.StringProperty(MakeStringSpan(isc.ToIdString(pc)), + MakeStringSpan(locBuf)); + } + } + writer.EndObject(); + + iscSize = isc.sizeOfExcludingThis(MallocSizeOf); + } + writer.End(); + + if (gOptions->ShowDumpStats()) { + Sizes sizes; + SizeOfInternal(&sizes); + + static const size_t kBufLen = 64; + char buf1[kBufLen]; + char buf2[kBufLen]; + char buf3[kBufLen]; + + StatusMsg(" Execution measurements {\n"); + + StatusMsg(" Data structures that persist after Dump() ends {\n"); + + StatusMsg(" Used stack traces: %10s bytes\n", + Show(sizes.mStackTracesUsed, buf1, kBufLen)); + + StatusMsg(" Unused stack traces: %10s bytes\n", + Show(sizes.mStackTracesUnused, buf1, kBufLen)); + + StatusMsg(" Stack trace table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mStackTraceTable, buf1, kBufLen), + Show(gStackTraceTable->capacity(), buf2, kBufLen), + Show(gStackTraceTable->count(), buf3, kBufLen)); + + StatusMsg(" Live block table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mLiveBlockTable, buf1, kBufLen), + Show(gLiveBlockTable->capacity(), buf2, kBufLen), + Show(gLiveBlockTable->count(), buf3, kBufLen)); + + StatusMsg(" Dead block table: %10s bytes (%s entries, %s used)\n", + Show(sizes.mDeadBlockTable, buf1, kBufLen), + Show(gDeadBlockTable->capacity(), buf2, kBufLen), + Show(gDeadBlockTable->count(), buf3, kBufLen)); + + StatusMsg(" }\n"); + StatusMsg(" Data structures that are destroyed after Dump() ends {\n"); + + StatusMsg( + " Location service: %10s bytes\n", + Show(locService->SizeOfIncludingThis(MallocSizeOf), buf1, kBufLen)); + StatusMsg(" Used stack traces set: %10s bytes\n", + Show(usedStackTraces.shallowSizeOfExcludingThis(MallocSizeOf), + buf1, kBufLen)); + StatusMsg( + " Used PCs set: %10s bytes\n", + Show(usedPcs.shallowSizeOfExcludingThis(MallocSizeOf), buf1, kBufLen)); + StatusMsg(" Pointer ID map: %10s bytes\n", + Show(iscSize, buf1, kBufLen)); + + StatusMsg(" }\n"); + StatusMsg(" Counts {\n"); + + size_t hits = locService->NumCacheHits(); + size_t misses = locService->NumCacheMisses(); + size_t requests = hits + misses; + StatusMsg(" Location service: %10s requests\n", + Show(requests, buf1, kBufLen)); + + size_t count = locService->CacheCount(); + size_t capacity = locService->CacheCapacity(); + StatusMsg( + " Location service cache: " + "%4.1f%% hit rate, %.1f%% occupancy at end\n", + Percent(hits, requests), Percent(count, capacity)); + + StatusMsg(" }\n"); + StatusMsg(" }\n"); + } + + InfallibleAllocPolicy::delete_(locService); + + StatusMsg("}\n"); +} + +void DMDFuncs::Analyze(UniquePtr<JSONWriteFunc> aWriter) { + AnalyzeImpl(std::move(aWriter)); + ClearReports(); +} + +//--------------------------------------------------------------------------- +// Testing +//--------------------------------------------------------------------------- + +void DMDFuncs::ResetEverything(const char* aOptions) { + AutoLockState lock; + + // Reset options. + InfallibleAllocPolicy::delete_(gOptions); + gOptions = InfallibleAllocPolicy::new_<Options>(aOptions); + + // Clear all existing blocks. + gLiveBlockTable->clear(); + gDeadBlockTable->clear(); + + // Reset gBernoulli to a deterministic state. (Its current state depends on + // all previous trials.) + ResetBernoulli(); +} + +} // namespace dmd +} // namespace mozilla diff --git a/memory/replace/dmd/DMD.h b/memory/replace/dmd/DMD.h new file mode 100644 index 0000000000..c057047800 --- /dev/null +++ b/memory/replace/dmd/DMD.h @@ -0,0 +1,291 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef DMD_h___ +#define DMD_h___ + +#include <stdarg.h> +#include <string.h> + +#include <utility> + +#include "mozilla/DebugOnly.h" +#include "mozilla/Types.h" +#include "mozilla/UniquePtr.h" +#include "replace_malloc_bridge.h" + +namespace mozilla { + +class JSONWriteFunc; + +namespace dmd { + +struct Sizes { + size_t mStackTracesUsed; + size_t mStackTracesUnused; + size_t mStackTraceTable; + size_t mLiveBlockTable; + size_t mDeadBlockTable; + + Sizes() { Clear(); } + void Clear() { memset(this, 0, sizeof(Sizes)); } +}; + +// See further below for a description of each method. The DMDFuncs class +// should contain a virtual method for each of them (except IsRunning, +// which can be inferred from the DMDFuncs singleton existing). +struct DMDFuncs { + virtual void Report(const void*); + + virtual void ReportOnAlloc(const void*); + + virtual void ClearReports(); + + virtual void Analyze(UniquePtr<JSONWriteFunc>); + + virtual void SizeOf(Sizes*); + + virtual void StatusMsg(const char*, va_list) MOZ_FORMAT_PRINTF(2, 0); + + virtual void ResetEverything(const char*); + +#ifndef REPLACE_MALLOC_IMPL + // We deliberately don't use ReplaceMalloc::GetDMDFuncs here, because if we + // did, the following would happen. + // - The code footprint of each call to Get() larger as GetDMDFuncs ends + // up inlined. + // - When no replace-malloc library is loaded, the number of instructions + // executed is equivalent, but don't necessarily fit in the same cache + // line. + // - When a non-DMD replace-malloc library is loaded, the overhead is + // higher because there is first a check for the replace malloc bridge + // and then for the DMDFuncs singleton. + // Initializing the DMDFuncs singleton on the first access makes the + // overhead even worse. Either Get() is inlined and massive, or it isn't + // and a simple value check becomes a function call. + static DMDFuncs* Get() { return sSingleton.Get(); } + + private: + // Wrapper class keeping a pointer to the DMD functions. It is statically + // initialized because it needs to be set early enough. + // Debug builds also check that it's never accessed before the static + // initialization actually occured, which could be the case if some other + // static initializer ended up calling into DMD. + class Singleton { + public: + Singleton() + : mValue(ReplaceMalloc::GetDMDFuncs()) +# ifdef DEBUG + , + mInitialized(true) +# endif + { + } + + DMDFuncs* Get() { + MOZ_ASSERT(mInitialized); + return mValue; + } + + private: + DMDFuncs* mValue; +# ifdef DEBUG + bool mInitialized; +# endif + }; + + // This singleton pointer must be defined on the program side. In Gecko, + // this is done in xpcom/base/nsMemoryInfoDumper.cpp. + static /* DMDFuncs:: */ Singleton sSingleton; +#endif +}; + +#ifndef REPLACE_MALLOC_IMPL +// Mark a heap block as reported by a memory reporter. +inline void Report(const void* aPtr) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->Report(aPtr); + } +} + +// Mark a heap block as reported immediately on allocation. +inline void ReportOnAlloc(const void* aPtr) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ReportOnAlloc(aPtr); + } +} + +// Clears existing reportedness data from any prior runs of the memory +// reporters. The following sequence should be used. +// - ClearReports() +// - run the memory reporters +// - Analyze() +// This sequence avoids spurious twice-reported warnings. +inline void ClearReports() { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ClearReports(); + } +} + +// Determines which heap blocks have been reported, and dumps JSON output +// (via |aWriter|) describing the heap. +// +// The following sample output contains comments that explain the format and +// design choices. The output files can be quite large, so a number of +// decisions were made to minimize size, such as using short property names and +// omitting properties whenever possible. +// +// { +// // The version number of the format, which will be incremented each time +// // backwards-incompatible changes are made. A mandatory integer. +// // +// // Version history: +// // - 1: Bug 1044709 +// // - 2: Bug 1094552 +// // - 3: Bug 1100851 +// // - 4: Bug 1121830 +// // - 5: Bug 1253512 +// "version": 5, +// +// // Information about how DMD was invoked. A mandatory object. +// "invocation": { +// // The contents of the $DMD environment variable. A string, or |null| if +// // $DMD is undefined. +// "dmdEnvVar": "--mode=dark-matter", +// +// // The profiling mode. A mandatory string taking one of the following +// // values: "live", "dark-matter", "cumulative", "scan". +// "mode": "dark-matter", +// }, +// +// // Details of all analyzed heap blocks. A mandatory array. +// "blockList": [ +// // An example of a heap block. +// { +// // Requested size, in bytes. This is a mandatory integer. +// "req": 3584, +// +// // Requested slop size, in bytes. This is mandatory if it is non-zero, +// // but omitted otherwise. +// "slop": 512, +// +// // The stack trace at which the block was allocated. An optional +// // string that indexes into the "traceTable" object. If omitted, no +// // allocation stack trace was recorded for the block. +// "alloc": "A", +// +// // One or more stack traces at which this heap block was reported by a +// // memory reporter. An optional array that will only be present in +// // "dark-matter" mode. The elements are strings that index into +// // the "traceTable" object. +// "reps": ["B"] +// +// // The number of heap blocks with exactly the above properties. This +// // is mandatory if it is greater than one, but omitted otherwise. +// // (Blocks with identical properties don't have to be aggregated via +// // this property, but it can greatly reduce output file size.) +// "num": 5, +// +// // The address of the block. This is mandatory in "scan" mode, but +// // omitted otherwise. +// "addr": "4e4e4e4e", +// +// // The contents of the block, read one word at a time. This is +// // mandatory in "scan" mode for blocks at least one word long, but +// // omitted otherwise. +// "contents": ["0", "6", "7f7f7f7f", "0"] +// } +// ], +// +// // The stack traces referenced by elements of the "blockList" array. This +// // could be an array, but making it an object makes it easier to see +// // which stacks correspond to which references in the "blockList" array. +// "traceTable": { +// // Each property corresponds to a stack trace mentioned in the "blocks" +// // object. Each element is an index into the "frameTable" object. +// "A": ["D", "E"], +// "B": ["F", "G"] +// }, +// +// // The stack frames referenced by the "traceTable" object. The +// // descriptions can be quite long, so they are stored separately from the +// // "traceTable" object so that each one only has to be written once. +// // This could also be an array, but again, making it an object makes it +// // easier to see which frames correspond to which references in the +// // "traceTable" object. +// "frameTable": { +// // Each property key is a frame key mentioned in the "traceTable" object. +// // Each property value is a string containing a frame description. Each +// // frame description must be in a format recognized by `fix_stacks.py`, +// // which requires a frame number at the start. Because each stack frame +// // description in this table can be shared between multiple stack +// // traces, we use a dummy value of #00. The proper frame number can be +// // reconstructed later by scripts that output stack traces in a +// // conventional non-shared format. +// "D": "#00: foo (Foo.cpp:123)", +// "E": "#00: bar (Bar.cpp:234)", +// "F": "#00: baz (Baz.cpp:345)", +// "G": "#00: quux (Quux.cpp:456)" +// } +// } +// +// Implementation note: normally, this function wouldn't be templated, but in +// that case, the function is compiled, which makes the destructor for the +// UniquePtr fire up, and that needs JSONWriteFunc to be fully defined. That, +// in turn, requires to include JSONWriter.h, which includes +// double-conversion.h, which ends up breaking various things built with +// -Werror for various reasons. +// +template <typename JSONWriteFunc> +inline void Analyze(UniquePtr<JSONWriteFunc> aWriteFunc) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->Analyze(std::move(aWriteFunc)); + } +} + +// Gets the size of various data structures. Used to implement a memory +// reporter for DMD. +inline void SizeOf(Sizes* aSizes) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->SizeOf(aSizes); + } +} + +// Prints a status message prefixed with "DMD[<pid>]". Use sparingly. +MOZ_FORMAT_PRINTF(1, 2) +inline void StatusMsg(const char* aFmt, ...) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + va_list ap; + va_start(ap, aFmt); + funcs->StatusMsg(aFmt, ap); + va_end(ap); + } +} + +// Indicates whether or not DMD is running. +inline bool IsRunning() { return !!DMDFuncs::Get(); } + +// Resets all DMD options and then sets new ones according to those specified +// in |aOptions|. Also clears all recorded data about allocations. Only used +// for testing purposes. +inline void ResetEverything(const char* aOptions) { + DMDFuncs* funcs = DMDFuncs::Get(); + if (funcs) { + funcs->ResetEverything(aOptions); + } +} +#endif + +} // namespace dmd +} // namespace mozilla + +#endif /* DMD_h___ */ diff --git a/memory/replace/dmd/README b/memory/replace/dmd/README new file mode 100644 index 0000000000..537893358a --- /dev/null +++ b/memory/replace/dmd/README @@ -0,0 +1,3 @@ +This is DMD. See +https://firefox-source-docs.mozilla.org/performance/memory/dmd.html for +details on how to use it. diff --git a/memory/replace/dmd/block_analyzer.py b/memory/replace/dmd/block_analyzer.py new file mode 100644 index 0000000000..1f907b38a7 --- /dev/null +++ b/memory/replace/dmd/block_analyzer.py @@ -0,0 +1,292 @@ +#!/usr/bin/env python3 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# From a scan mode DMD log, extract some information about a +# particular block, such as its allocation stack or which other blocks +# contain pointers to it. This can be useful when investigating leaks +# caused by unknown references to refcounted objects. + +import argparse +import gzip +import json +import re +import sys + +# The DMD output version this script handles. +outputVersion = 5 + +# If --ignore-alloc-fns is specified, stack frames containing functions that +# match these strings will be removed from the *start* of stack traces. (Once +# we hit a non-matching frame, any subsequent frames won't be removed even if +# they do match.) +allocatorFns = [ + "malloc (", + "replace_malloc", + "replace_calloc", + "replace_realloc", + "replace_memalign", + "replace_posix_memalign", + "malloc_zone_malloc", + "moz_xmalloc", + "moz_xcalloc", + "moz_xrealloc", + "operator new(", + "operator new[](", + "g_malloc", + "g_slice_alloc", + "callocCanGC", + "reallocCanGC", + "vpx_malloc", + "vpx_calloc", + "vpx_realloc", + "vpx_memalign", + "js_malloc", + "js_calloc", + "js_realloc", + "pod_malloc", + "pod_calloc", + "pod_realloc", + "nsTArrayInfallibleAllocator::Malloc", + "Allocator<ReplaceMallocBase>::malloc(", + "mozilla::dmd::StackTrace::Get(", + "mozilla::dmd::AllocCallback(", + "mozilla::dom::DOMArena::Allocate(", + # This one necessary to fully filter some sequences of allocation functions + # that happen in practice. Note that ??? entries that follow non-allocation + # functions won't be stripped, as explained above. + "???", +] + +#### + +# Command line arguments + + +def range_1_24(string): + value = int(string) + if value < 1 or value > 24: + msg = "{:s} is not in the range 1..24".format(string) + raise argparse.ArgumentTypeError(msg) + return value + + +parser = argparse.ArgumentParser( + description="Analyze the heap graph to find out things about an object. \ +By default this prints out information about blocks that point to the given block." +) + +parser.add_argument("dmd_log_file_name", help="clamped DMD log file name") + +parser.add_argument("block", help="address of the block of interest") + +parser.add_argument( + "--info", + dest="info", + action="store_true", + default=False, + help="Print out information about the block.", +) + +parser.add_argument( + "-sfl", + "--max-stack-frame-length", + type=int, + default=300, + help="Maximum number of characters to print from each stack frame", +) + +parser.add_argument( + "-a", + "--ignore-alloc-fns", + action="store_true", + help="ignore allocation functions at the start of traces", +) + +parser.add_argument( + "-f", + "--max-frames", + type=range_1_24, + default=8, + help="maximum number of frames to consider in each trace", +) + +parser.add_argument( + "-c", + "--chain-reports", + action="store_true", + help="if only one block is found to hold onto the object, report " + "the next one, too", +) + + +#### + + +class BlockData: + def __init__(self, json_block): + self.addr = json_block["addr"] + + if "contents" in json_block: + contents = json_block["contents"] + else: + contents = [] + self.contents = [] + for c in contents: + self.contents.append(int(c, 16)) + + self.req_size = json_block["req"] + + self.alloc_stack = json_block["alloc"] + + +def print_trace_segment(args, stacks, block): + (traceTable, frameTable) = stacks + + for l in traceTable[block.alloc_stack]: + # The 5: is to remove the bogus leading "#00: " from the stack frame. + print(" " + frameTable[l][5 : args.max_stack_frame_length]) + + +def show_referrers(args, blocks, stacks, block): + visited = set([]) + + anyFound = False + + while True: + referrers = {} + + for b, data in blocks.items(): + which_edge = 0 + for e in data.contents: + if e == block: + # 8 is the number of bytes per word on a 64-bit system. + # XXX This means that this output will be wrong for logs from 32-bit systems! + referrers.setdefault(b, []).append(8 * which_edge) + anyFound = True + which_edge += 1 + + for r in referrers: + sys.stdout.write( + "0x{} size = {} bytes".format(blocks[r].addr, blocks[r].req_size) + ) + plural = "s" if len(referrers[r]) > 1 else "" + print( + " at byte offset" + + plural + + " " + + (", ".join(str(x) for x in referrers[r])) + ) + print_trace_segment(args, stacks, blocks[r]) + print("") + + if args.chain_reports: + if len(referrers) == 0: + sys.stdout.write("Found no more referrers.\n") + break + if len(referrers) > 1: + sys.stdout.write("Found too many referrers.\n") + break + + sys.stdout.write("Chaining to next referrer.\n\n") + for r in referrers: + block = r + if block in visited: + sys.stdout.write("Found a loop.\n") + break + visited.add(block) + else: + break + + if not anyFound: + print("No referrers found.") + + +def show_block_info(args, blocks, stacks, block): + b = blocks[block] + sys.stdout.write("block: 0x{}\n".format(b.addr)) + sys.stdout.write("requested size: {} bytes\n".format(b.req_size)) + sys.stdout.write("\n") + sys.stdout.write("block contents: ") + for c in b.contents: + v = "0" if c == 0 else blocks[c].addr + sys.stdout.write("0x{} ".format(v)) + sys.stdout.write("\n\n") + sys.stdout.write("allocation stack:\n") + print_trace_segment(args, stacks, b) + return + + +def cleanupTraceTable(args, frameTable, traceTable): + # Remove allocation functions at the start of traces. + if args.ignore_alloc_fns: + # Build a regexp that matches every function in allocatorFns. + escapedAllocatorFns = map(re.escape, allocatorFns) + fn_re = re.compile("|".join(escapedAllocatorFns)) + + # Remove allocator fns from each stack trace. + for traceKey, frameKeys in traceTable.items(): + numSkippedFrames = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if re.search(fn_re, frameDesc): + numSkippedFrames += 1 + else: + break + if numSkippedFrames > 0: + traceTable[traceKey] = frameKeys[numSkippedFrames:] + + # Trim the number of frames. + for traceKey, frameKeys in traceTable.items(): + if len(frameKeys) > args.max_frames: + traceTable[traceKey] = frameKeys[: args.max_frames] + + +def loadGraph(options): + # Handle gzipped input if necessary. + isZipped = options.dmd_log_file_name.endswith(".gz") + opener = gzip.open if isZipped else open + + with opener(options.dmd_log_file_name, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + block_list = j["blockList"] + blocks = {} + + for json_block in block_list: + blocks[int(json_block["addr"], 16)] = BlockData(json_block) + + traceTable = j["traceTable"] + frameTable = j["frameTable"] + + cleanupTraceTable(options, frameTable, traceTable) + + return (blocks, (traceTable, frameTable)) + + +def analyzeLogs(): + options = parser.parse_args() + + (blocks, stacks) = loadGraph(options) + + block = int(options.block, 16) + + if block not in blocks: + print("Object " + options.block + " not found in traces.") + print("It could still be the target of some nodes.") + return + + if options.info: + show_block_info(options, blocks, stacks, block) + return + + show_referrers(options, blocks, stacks, block) + + +if __name__ == "__main__": + analyzeLogs() diff --git a/memory/replace/dmd/dmd.py b/memory/replace/dmd/dmd.py new file mode 100755 index 0000000000..3e20d8d6bd --- /dev/null +++ b/memory/replace/dmd/dmd.py @@ -0,0 +1,1028 @@ +#! /usr/bin/env python3 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""This script analyzes a JSON file emitted by DMD.""" + +import argparse +import collections +import gzip +import io +import json +import os +import platform +import re +import shutil +import sys +import tempfile +from bisect import bisect_right +from functools import cmp_to_key +from typing import Callable + +# The DMD output version this script handles. +outputVersion = 5 + +# If --ignore-alloc-fns is specified, stack frames containing functions that +# match these strings will be removed from the *start* of stack traces. (Once +# we hit a non-matching frame, any subsequent frames won't be removed even if +# they do match.) +allocatorFns = [ + # Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc, + # pod_malloc, malloc_zone_*, g_malloc. + "malloc", + # Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc, + # pod_calloc, malloc_zone_calloc, pod_callocCanGC. + "calloc", + # Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc, + # pod_realloc, pod_reallocCanGC. + "realloc", + # Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign, + # moz_xmemalign, vpx_memalign, malloc_zone_memalign. + "memalign", + "operator new(", + "operator new[](", + "g_slice_alloc", + # This one is necessary to fully filter some sequences of allocation + # functions that happen in practice. Note that ??? entries that follow + # non-allocation functions won't be stripped, as explained above. + "???", + # Match DMD internals. + "mozilla::dmd::AllocCallback", + "mozilla::dmd::StackTrace::Get", +] + + +def cmp(a, b): + return (a > b) - (a < b) + + +class Record(object): + """A record is an aggregation of heap blocks that have identical stack + traces. It can also be used to represent the difference between two + records.""" + + def __init__(self): + self.numBlocks = 0 + self.reqSize = 0 + self.slopSize = 0 + self.usableSize = 0 + self.allocatedAtDesc = None + self.reportedAtDescs = [] + self.usableSizes = collections.defaultdict(int) + + def isZero(self, args): + return ( + self.numBlocks == 0 + and self.reqSize == 0 + and self.slopSize == 0 + and self.usableSize == 0 + and len(self.usableSizes) == 0 + ) + + def negate(self): + self.numBlocks = -self.numBlocks + self.reqSize = -self.reqSize + self.slopSize = -self.slopSize + self.usableSize = -self.usableSize + + negatedUsableSizes = collections.defaultdict(int) + for usableSize, count in self.usableSizes.items(): + negatedUsableSizes[-usableSize] = count + self.usableSizes = negatedUsableSizes + + def subtract(self, r): + # We should only be calling this on records with matching stack traces. + # Check this. + assert self.allocatedAtDesc == r.allocatedAtDesc + assert self.reportedAtDescs == r.reportedAtDescs + + self.numBlocks -= r.numBlocks + self.reqSize -= r.reqSize + self.slopSize -= r.slopSize + self.usableSize -= r.usableSize + + usableSizes1 = self.usableSizes + usableSizes2 = r.usableSizes + usableSizes3 = collections.defaultdict(int) + for usableSize in usableSizes1: + counts1 = usableSizes1[usableSize] + if usableSize in usableSizes2: + counts2 = usableSizes2[usableSize] + del usableSizes2[usableSize] + counts3 = counts1 - counts2 + if counts3 != 0: + if counts3 < 0: + usableSize = -usableSize + counts3 = -counts3 + usableSizes3[usableSize] = counts3 + else: + usableSizes3[usableSize] = counts1 + + for usableSize in usableSizes2: + usableSizes3[-usableSize] = usableSizes2[usableSize] + + self.usableSizes = usableSizes3 + + @staticmethod + def cmpByUsableSize(r1, r2): + # Sort by usable size, then by req size. + return cmp(abs(r1.usableSize), abs(r2.usableSize)) or Record.cmpByReqSize( + r1, r2 + ) + + @staticmethod + def cmpByReqSize(r1, r2): + # Sort by req size. + return cmp(abs(r1.reqSize), abs(r2.reqSize)) + + @staticmethod + def cmpBySlopSize(r1, r2): + # Sort by slop size. + return cmp(abs(r1.slopSize), abs(r2.slopSize)) + + @staticmethod + def cmpByNumBlocks(r1, r2): + # Sort by block counts, then by usable size. + return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or Record.cmpByUsableSize( + r1, r2 + ) + + +sortByChoices = { + "usable": Record.cmpByUsableSize, # the default + "req": Record.cmpByReqSize, + "slop": Record.cmpBySlopSize, + "num-blocks": Record.cmpByNumBlocks, +} + + +def parseCommandLine(): + # 24 is the maximum number of frames that DMD will produce. + def range_1_24(string): + value = int(string) + if value < 1 or value > 24: + msg = "{:s} is not in the range 1..24".format(string) + raise argparse.ArgumentTypeError(msg) + return value + + description = """ +Analyze heap data produced by DMD. +If one file is specified, analyze it; if two files are specified, analyze the +difference. +Input files can be gzipped. +Write to stdout unless -o/--output is specified. +Stack traces are fixed to show function names, filenames and line numbers +unless --no-fix-stacks is specified; stack fixing modifies the original file +and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment +variable is used to find breakpad symbols for stack fixing. +""" + p = argparse.ArgumentParser(description=description) + + p.add_argument( + "-o", + "--output", + type=argparse.FileType("w"), + help="output file; stdout if unspecified", + ) + + p.add_argument( + "-f", + "--max-frames", + type=range_1_24, + default=8, + help="maximum number of frames to consider in each trace", + ) + + p.add_argument( + "-s", + "--sort-by", + choices=sortByChoices.keys(), + default="usable", + help="sort the records by a particular metric", + ) + + p.add_argument( + "-a", + "--ignore-alloc-fns", + action="store_true", + help="ignore allocation functions at the start of traces", + ) + + p.add_argument("--no-fix-stacks", action="store_true", help="do not fix stacks") + + p.add_argument( + "--clamp-contents", + action="store_true", + help="for a scan mode log, clamp addresses to the start of live blocks, " + "or zero if not in one", + ) + + p.add_argument( + "--print-clamp-stats", + action="store_true", + help="print information about the results of pointer clamping; mostly " + "useful for debugging clamping", + ) + + p.add_argument( + "--filter-stacks-for-testing", + action="store_true", + help="filter stack traces; only useful for testing purposes", + ) + + p.add_argument( + "--filter", + default=[], + action="append", + help="Only print entries that have a stack that matches the filter. " + "A filter may be negated by prefixing it with `!`. " + "If multiple filters are specified, all of them must match.", + ) + + p.add_argument("input_file", help="a file produced by DMD") + + p.add_argument( + "input_file2", + nargs="?", + help="a file produced by DMD; if present, it is diff'd with input_file", + ) + + return p.parse_args(sys.argv[1:]) + + +# Fix stacks if necessary: first write the output to a tempfile, then replace +# the original file with it. +def fixStackTraces(inputFilename, isZipped, opener): + # This append() call is needed to make the import statements work when this + # script is installed as a symlink. + sys.path.append(os.path.dirname(__file__)) + + bpsyms = os.environ.get("BREAKPAD_SYMBOLS_PATH", None) + sysname = platform.system() + if bpsyms and os.path.exists(bpsyms): + import fix_stacks as fixModule + + def fix(line): + return fixModule.fixSymbols(line, jsonMode=True, breakpadSymsDir=bpsyms) + + elif sysname in ("Linux", "Darwin", "Windows"): + import fix_stacks as fixModule + + def fix(line): + return fixModule.fixSymbols(line, jsonMode=True) + + else: + return + + # Fix stacks, writing output to a temporary file, and then overwrite the + # original file. + tmpFile = tempfile.NamedTemporaryFile(delete=False) + + # If the input is gzipped, then the output (written initially to |tmpFile|) + # should be gzipped as well. + # + # And we want to set its pre-gzipped filename to '' rather than the name of + # the temporary file, so that programs like the Unix 'file' utility don't + # say that it was called 'tmp6ozTxE' (or something like that) before it was + # zipped. So that explains the |filename=''| parameter. + # + # But setting the filename like that clobbers |tmpFile.name|, so we must + # get that now in order to move |tmpFile| at the end. + tmpFilename = tmpFile.name + if isZipped: + tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile, mode="wb") + + with opener(inputFilename, "rb") as inputFile: + for line in inputFile: + tmpFile.write(fix(line)) + + tmpFile.close() + + shutil.move(tmpFilename, inputFilename) + + +def getDigestFromFile(args, inputFile): + # Handle gzipped input if necessary. + isZipped = inputFile.endswith(".gz") + opener = gzip.open if isZipped else open + + # Fix stack traces unless otherwise instructed. + if not args.no_fix_stacks: + fixStackTraces(inputFile, isZipped, opener) + + if args.clamp_contents: + clampBlockList(args, inputFile, isZipped, opener) + + with opener(inputFile, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + # Extract the main parts of the JSON object. + invocation = j["invocation"] + dmdEnvVar = invocation["dmdEnvVar"] + mode = invocation["mode"] + blockList = j["blockList"] + traceTable = j["traceTable"] + frameTable = j["frameTable"] + + # Insert the necessary entries for unrecorded stack traces. Note that 'ut' + # and 'uf' will not overlap with any keys produced by DMD's + # ToIdStringConverter::Base32() function. + unrecordedTraceID = "ut" + unrecordedFrameID = "uf" + traceTable[unrecordedTraceID] = [unrecordedFrameID] + frameTable[ + unrecordedFrameID + ] = "#00: (no stack trace recorded due to --stacks=partial)" + + # For the purposes of this script, 'scan' behaves like 'live'. + if mode == "scan": + mode = "live" + + if mode not in ["live", "dark-matter", "cumulative"]: + raise Exception("bad 'mode' property: '{:s}'".format(mode)) + + # Remove allocation functions at the start of traces. + if args.ignore_alloc_fns: + # Build a regexp that matches every function in allocatorFns. + escapedAllocatorFns = map(re.escape, allocatorFns) + fn_re = re.compile("|".join(escapedAllocatorFns)) + + # Remove allocator fns from each stack trace. + for traceKey, frameKeys in traceTable.items(): + numSkippedFrames = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if re.search(fn_re, frameDesc): + numSkippedFrames += 1 + else: + break + if numSkippedFrames > 0: + traceTable[traceKey] = frameKeys[numSkippedFrames:] + + # Trim the number of frames. + for traceKey, frameKeys in traceTable.items(): + if len(frameKeys) > args.max_frames: + del frameKeys[args.max_frames :] + + def buildTraceDescription(traceTable, frameTable, traceKey): + frameKeys = traceTable[traceKey] + fmt = " #{:02d}{:}" + + if args.filter_stacks_for_testing: + # This option is used by `test_dmd.js`, which runs the code in + # `SmokeDMD.cpp`. When running that test, there is too much + # variation in the stack traces across different machines and + # platforms to do exact output matching. However, every stack trace + # should have at least three frames that contain `DMD` (in one of + # `DMD.cpp`, `SmokeDMD.cpp`, `SmokeDMD`, or `SmokeDMD.exe`). Some + # example frames from automation (where `..` indicates excised path + # segments): + # + # Linux debug, with stack fixing using breakpad syms: + # `#01: replace_realloc(void*, unsigned long) [../dmd/DMD.cpp:1110]` + # + # Linux opt, with native stack fixing: + # `#02: TestFull(char const*, int, char const*, int) (../dmd/test/SmokeDMD.cpp:165)` + # + # Mac opt, with native stack fixing: + # `#03: RunTests() (../build/tests/bin/SmokeDMD + 0x21f9)` + # + # Windows opt, with native stack fixing failing due to a missing PDB: + # `#04: ??? (..\\build\\tests\\bin\\SmokeDMD.exe + 0x1c58)` + # + # If we see three such frames, we replace the entire stack trace + # with a single, predictable frame. This imprecise matching will at + # least detect if stack fixing fails completely. + dmd_frame_matches = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if "DMD" in frameDesc: + dmd_frame_matches += 1 + if dmd_frame_matches >= 3: + return [fmt.format(1, ": ... DMD.cpp ...")] + + # The frame number is always '#00' (see DMD.h for why), so we have to + # replace that with the correct frame number. + desc = [] + for n, frameKey in enumerate(traceTable[traceKey], start=1): + desc.append(fmt.format(n, frameTable[frameKey][3:])) + return desc + + # Aggregate blocks into records. All sufficiently similar blocks go into a + # single record. + + if mode in ["live", "cumulative"]: + liveOrCumulativeRecords = collections.defaultdict(Record) + elif mode == "dark-matter": + unreportedRecords = collections.defaultdict(Record) + onceReportedRecords = collections.defaultdict(Record) + twiceReportedRecords = collections.defaultdict(Record) + + heapUsableSize = 0 + heapBlocks = 0 + + recordKeyPartCache = {} + + for block in blockList: + # For each block we compute a |recordKey|, and all blocks with the same + # |recordKey| are aggregated into a single record. The |recordKey| is + # derived from the block's 'alloc' and 'reps' (if present) stack + # traces. + # + # We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing + # traces for equality. We can't use trace keys or frame keys because + # they're not comparable across different DMD runs (which is relevant + # when doing diffs). + # + # Using frame descriptions also fits in with the stack trimming done + # for --max-frames, which requires that stack traces with common + # beginnings but different endings to be considered equivalent. E.g. if + # we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4] + # and we trim the final frame of each they should be considered + # equivalent because the untrimmed frame descriptions (D1 and D2) + # match. + # + # Having said all that, during a single invocation of dmd.py on a + # single DMD file, for a single frameKey value the record key will + # always be the same, and we might encounter it 1000s of times. So we + # cache prior results for speed. + def makeRecordKeyPart(traceKey): + if traceKey in recordKeyPartCache: + return recordKeyPartCache[traceKey] + + recordKeyPart = str( + list(map(lambda frameKey: frameTable[frameKey], traceTable[traceKey])) + ) + recordKeyPartCache[traceKey] = recordKeyPart + return recordKeyPart + + allocatedAtTraceKey = block.get("alloc", unrecordedTraceID) + if mode in ["live", "cumulative"]: + recordKey = makeRecordKeyPart(allocatedAtTraceKey) + records = liveOrCumulativeRecords + elif mode == "dark-matter": + recordKey = makeRecordKeyPart(allocatedAtTraceKey) + if "reps" in block: + reportedAtTraceKeys = block["reps"] + for reportedAtTraceKey in reportedAtTraceKeys: + recordKey += makeRecordKeyPart(reportedAtTraceKey) + if len(reportedAtTraceKeys) == 1: + records = onceReportedRecords + else: + records = twiceReportedRecords + else: + records = unreportedRecords + + record = records[recordKey] + + if "req" not in block: + raise Exception("'req' property missing in block'") + + reqSize = block["req"] + slopSize = block.get("slop", 0) + + if "num" in block: + num = block["num"] + else: + num = 1 + + usableSize = reqSize + slopSize + heapUsableSize += num * usableSize + heapBlocks += num + + record.numBlocks += num + record.reqSize += num * reqSize + record.slopSize += num * slopSize + record.usableSize += num * usableSize + if record.allocatedAtDesc is None: + record.allocatedAtDesc = buildTraceDescription( + traceTable, frameTable, allocatedAtTraceKey + ) + + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + if "reps" in block and record.reportedAtDescs == []: + + def f(k): + return buildTraceDescription(traceTable, frameTable, k) + + record.reportedAtDescs = list(map(f, reportedAtTraceKeys)) + record.usableSizes[usableSize] += num + + # All the processed data for a single DMD file is called a "digest". + digest = {} + digest["dmdEnvVar"] = dmdEnvVar + digest["mode"] = mode + digest["heapUsableSize"] = heapUsableSize + digest["heapBlocks"] = heapBlocks + if mode in ["live", "cumulative"]: + digest["liveOrCumulativeRecords"] = liveOrCumulativeRecords + elif mode == "dark-matter": + digest["unreportedRecords"] = unreportedRecords + digest["onceReportedRecords"] = onceReportedRecords + digest["twiceReportedRecords"] = twiceReportedRecords + return digest + + +def diffRecords(args, records1, records2): + records3 = {} + + # Process records1. + for k in records1: + r1 = records1[k] + if k in records2: + # This record is present in both records1 and records2. + r2 = records2[k] + del records2[k] + r2.subtract(r1) + if not r2.isZero(args): + records3[k] = r2 + else: + # This record is present only in records1. + r1.negate() + records3[k] = r1 + + for k in records2: + # This record is present only in records2. + records3[k] = records2[k] + + return records3 + + +def diffDigests(args, d1, d2): + if d1["mode"] != d2["mode"]: + raise Exception("the input files have different 'mode' properties") + + d3 = {} + d3["dmdEnvVar"] = (d1["dmdEnvVar"], d2["dmdEnvVar"]) + d3["mode"] = d1["mode"] + d3["heapUsableSize"] = d2["heapUsableSize"] - d1["heapUsableSize"] + d3["heapBlocks"] = d2["heapBlocks"] - d1["heapBlocks"] + if d1["mode"] in ["live", "cumulative"]: + d3["liveOrCumulativeRecords"] = diffRecords( + args, d1["liveOrCumulativeRecords"], d2["liveOrCumulativeRecords"] + ) + elif d1["mode"] == "dark-matter": + d3["unreportedRecords"] = diffRecords( + args, d1["unreportedRecords"], d2["unreportedRecords"] + ) + d3["onceReportedRecords"] = diffRecords( + args, d1["onceReportedRecords"], d2["onceReportedRecords"] + ) + d3["twiceReportedRecords"] = diffRecords( + args, d1["twiceReportedRecords"], d2["twiceReportedRecords"] + ) + return d3 + + +def printDigest(args, digest): + dmdEnvVar = digest["dmdEnvVar"] + mode = digest["mode"] + heapUsableSize = digest["heapUsableSize"] + heapBlocks = digest["heapBlocks"] + if mode in ["live", "cumulative"]: + liveOrCumulativeRecords = digest["liveOrCumulativeRecords"] + elif mode == "dark-matter": + unreportedRecords = digest["unreportedRecords"] + onceReportedRecords = digest["onceReportedRecords"] + twiceReportedRecords = digest["twiceReportedRecords"] + + separator = "#" + "-" * 65 + "\n" + + def number(n): + """Format a number with comma as a separator.""" + return "{:,d}".format(n) + + def perc(m, n): + return 0 if n == 0 else (100 * m / n) + + def plural(n): + return "" if n == 1 else "s" + + # Prints to stdout, or to file if -o/--output was specified. + def out(*arguments, **kwargs): + print(*arguments, file=args.output, **kwargs) + + def printStack(traceDesc): + for frameDesc in traceDesc: + out(frameDesc) + + def printRecords(recordKind, records, heapUsableSize): + RecordKind = recordKind.capitalize() + out(separator) + numRecords = len(records) + cmpRecords = sortByChoices[args.sort_by] + sortedRecords = sorted( + records.values(), key=cmp_to_key(cmpRecords), reverse=True + ) + kindBlocks = 0 + kindUsableSize = 0 + maxRecord = 1000 + + def is_match(rec: Record, key: str): + return any(key in desc for desc in rec.allocatedAtDesc) + + for arg in args.filter: + key: str + cond: Callable[[Record], bool] + if arg.startswith("\\"): + # just in case you really need to start a filter with '!' (or '\') + key = arg[1:] + cond = is_match + elif arg.startswith("!"): + key = arg[1:] + + def cond(rec, key): + return not is_match(rec, key) # noqa: E731 + + else: + key = arg + cond = is_match + sortedRecords = [rec for rec in sortedRecords if cond(rec, key)] + + # First iteration: get totals, etc. + for record in sortedRecords: + kindBlocks += record.numBlocks + kindUsableSize += record.usableSize + + # Second iteration: print. + if numRecords == 0: + out("# no {:} heap blocks\n".format(recordKind)) + + kindCumulativeUsableSize = 0 + for i, record in enumerate(sortedRecords, start=1): + # Stop printing at the |maxRecord|th record. + if i == maxRecord: + out( + "# {:}: stopping after {:,d} heap block records\n".format( + RecordKind, i + ) + ) + break + + kindCumulativeUsableSize += record.usableSize + + out(RecordKind + " {") + out( + " {:} block{:} in heap block record {:,d} of {:,d}".format( + number(record.numBlocks), plural(record.numBlocks), i, numRecords + ) + ) + out( + " {:} bytes ({:} requested / {:} slop)".format( + number(record.usableSize), + number(record.reqSize), + number(record.slopSize), + ) + ) + + usableSizes = sorted( + record.usableSizes.items(), key=lambda x: abs(x[0]), reverse=True + ) + hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1 + + if not hasSingleBlock: + out(" Individual block sizes: ", end="") + if len(usableSizes) == 0: + out("(no change)", end="") + else: + isFirst = True + for usableSize, count in usableSizes: + if not isFirst: + out("; ", end="") + out("{:}".format(number(usableSize)), end="") + if count > 1: + out(" x {:,d}".format(count), end="") + isFirst = False + out() + + out( + " {:4.2f}% of the heap ({:4.2f}% cumulative)".format( + perc(record.usableSize, heapUsableSize), + perc(kindCumulativeUsableSize, heapUsableSize), + ) + ) + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + out( + " {:4.2f}% of {:} ({:4.2f}% cumulative)".format( + perc(record.usableSize, kindUsableSize), + recordKind, + perc(kindCumulativeUsableSize, kindUsableSize), + ) + ) + out(" Allocated at {") + printStack(record.allocatedAtDesc) + out(" }") + if mode in ["live", "cumulative"]: + pass + elif mode == "dark-matter": + for n, reportedAtDesc in enumerate(record.reportedAtDescs): + again = "again " if n > 0 else "" + out(" Reported {:}at {{".format(again)) + printStack(reportedAtDesc) + out(" }") + out("}\n") + + return (kindUsableSize, kindBlocks) + + def printInvocation(n, dmdEnvVar, mode): + out("Invocation{:} {{".format(n)) + if dmdEnvVar is None: + out(" $DMD is undefined") + else: + out(" $DMD = '" + dmdEnvVar + "'") + out(" Mode = '" + mode + "'") + out("}\n") + + # Print command line. Strip dirs so the output is deterministic, which is + # needed for testing. + out(separator, end="") + out("# " + " ".join(map(os.path.basename, sys.argv)) + "\n") + + # Print invocation(s). + if type(dmdEnvVar) is not tuple: + printInvocation("", dmdEnvVar, mode) + else: + printInvocation(" 1", dmdEnvVar[0], mode) + printInvocation(" 2", dmdEnvVar[1], mode) + + # Print records. + if mode in ["live", "cumulative"]: + liveOrCumulativeUsableSize, liveOrCumulativeBlocks = printRecords( + mode, liveOrCumulativeRecords, heapUsableSize + ) + elif mode == "dark-matter": + twiceReportedUsableSize, twiceReportedBlocks = printRecords( + "twice-reported", twiceReportedRecords, heapUsableSize + ) + + unreportedUsableSize, unreportedBlocks = printRecords( + "unreported", unreportedRecords, heapUsableSize + ) + + onceReportedUsableSize, onceReportedBlocks = printRecords( + "once-reported", onceReportedRecords, heapUsableSize + ) + + # Print summary. + out(separator) + out("Summary {") + if mode in ["live", "cumulative"]: + out( + " Total: {:} bytes in {:} blocks".format( + number(liveOrCumulativeUsableSize), number(liveOrCumulativeBlocks) + ) + ) + elif mode == "dark-matter": + fmt = " {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)" + out(fmt.format("Total:", number(heapUsableSize), 100, number(heapBlocks), 100)) + out( + fmt.format( + "Unreported:", + number(unreportedUsableSize), + perc(unreportedUsableSize, heapUsableSize), + number(unreportedBlocks), + perc(unreportedBlocks, heapBlocks), + ) + ) + out( + fmt.format( + "Once-reported:", + number(onceReportedUsableSize), + perc(onceReportedUsableSize, heapUsableSize), + number(onceReportedBlocks), + perc(onceReportedBlocks, heapBlocks), + ) + ) + out( + fmt.format( + "Twice-reported:", + number(twiceReportedUsableSize), + perc(twiceReportedUsableSize, heapUsableSize), + number(twiceReportedBlocks), + perc(twiceReportedBlocks, heapBlocks), + ) + ) + out("}\n") + + +############################# +# Pretty printer for DMD JSON +############################# + + +def prettyPrintDmdJson(out, j): + out.write("{\n") + + out.write(' "version": {0},\n'.format(j["version"])) + out.write(' "invocation": ') + json.dump(j["invocation"], out, sort_keys=True) + out.write(",\n") + + out.write(' "blockList": [') + first = True + for b in j["blockList"]: + out.write("" if first else ",") + out.write("\n ") + json.dump(b, out, sort_keys=True) + first = False + out.write("\n ],\n") + + out.write(' "traceTable": {') + first = True + for k, l in j["traceTable"].items(): + out.write("" if first else ",") + out.write('\n "{0}": {1}'.format(k, json.dumps(l))) + first = False + out.write("\n },\n") + + out.write(' "frameTable": {') + first = True + for k, v in j["frameTable"].items(): + out.write("" if first else ",") + out.write('\n "{0}": {1}'.format(k, json.dumps(v))) + first = False + out.write("\n }\n") + + out.write("}\n") + + +################################################################## +# Code for clamping addresses using conservative pointer analysis. +################################################################## + + +# Start is the address of the first byte of the block, while end is +# the address of the first byte after the final byte in the block. +class AddrRange: + def __init__(self, block, length): + self.block = block + self.start = int(block, 16) + self.length = length + self.end = self.start + self.length + + assert self.start > 0 + assert length >= 0 + + +class ClampStats: + def __init__(self): + # Number of pointers already pointing to the start of a block. + self.startBlockPtr = 0 + + # Number of pointers pointing to the middle of a block. These + # are clamped to the start of the block they point into. + self.midBlockPtr = 0 + + # Number of null pointers. + self.nullPtr = 0 + + # Number of non-null pointers that didn't point into the middle + # of any blocks. These are clamped to null. + self.nonNullNonBlockPtr = 0 + + def clampedBlockAddr(self, sameAddress): + if sameAddress: + self.startBlockPtr += 1 + else: + self.midBlockPtr += 1 + + def nullAddr(self): + self.nullPtr += 1 + + def clampedNonBlockAddr(self): + self.nonNullNonBlockPtr += 1 + + def log(self): + sys.stderr.write("Results:\n") + sys.stderr.write( + " Number of pointers already pointing to start of blocks: " + + str(self.startBlockPtr) + + "\n" + ) + sys.stderr.write( + " Number of pointers clamped to start of blocks: " + + str(self.midBlockPtr) + + "\n" + ) + sys.stderr.write( + " Number of non-null pointers not pointing into blocks " + "clamped to null: " + str(self.nonNullNonBlockPtr) + "\n" + ) + sys.stderr.write(" Number of null pointers: " + str(self.nullPtr) + "\n") + + +# Search the block ranges array for a block that address points into. +# The search is carried out in an array of starting addresses for each blocks +# because it is faster. +def clampAddress(blockRanges, blockStarts, clampStats, address): + i = bisect_right(blockStarts, address) + + # Any addresses completely out of the range should have been eliminated already. + assert i > 0 + r = blockRanges[i - 1] + assert r.start <= address + + if address >= r.end: + assert address < blockRanges[i].start + clampStats.clampedNonBlockAddr() + return "0" + + clampStats.clampedBlockAddr(r.start == address) + return r.block + + +def clampBlockList(args, inputFileName, isZipped, opener): + # XXX This isn't very efficient because we end up reading and writing + # the file multiple times. + with opener(inputFileName, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + # Check that the invocation is reasonable for contents clamping. + invocation = j["invocation"] + if invocation["mode"] != "scan": + raise Exception("Log was taken in mode " + invocation["mode"] + " not scan") + + sys.stderr.write("Creating block range list.\n") + blockList = j["blockList"] + blockRanges = [] + for block in blockList: + blockRanges.append(AddrRange(block["addr"], block["req"])) + blockRanges.sort(key=lambda r: r.start) + + # Make sure there are no overlapping blocks. + prevRange = blockRanges[0] + for currRange in blockRanges[1:]: + assert prevRange.end <= currRange.start + prevRange = currRange + + sys.stderr.write("Clamping block contents.\n") + clampStats = ClampStats() + firstAddr = blockRanges[0].start + lastAddr = blockRanges[-1].end + + blockStarts = [] + for r in blockRanges: + blockStarts.append(r.start) + + for block in blockList: + # Small blocks don't have any contents. + if "contents" not in block: + continue + + cont = block["contents"] + for i in range(len(cont)): + address = int(cont[i], 16) + + if address == 0: + clampStats.nullAddr() + continue + + # If the address is before the first block or after the last + # block then it can't be within a block. + if address < firstAddr or address >= lastAddr: + clampStats.clampedNonBlockAddr() + cont[i] = "0" + continue + + cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address) + + # Remove any trailing nulls. + while len(cont) and cont[-1] == "0": + cont.pop() + + if args.print_clamp_stats: + clampStats.log() + + sys.stderr.write("Saving file.\n") + tmpFile = tempfile.NamedTemporaryFile(delete=False) + tmpFilename = tmpFile.name + if isZipped: + tmpFile = gzip.GzipFile(filename="", fileobj=tmpFile, mode="wb") + prettyPrintDmdJson(io.TextIOWrapper(tmpFile, encoding="utf-8"), j) + tmpFile.close() + shutil.move(tmpFilename, inputFileName) + + +def main(): + args = parseCommandLine() + digest = getDigestFromFile(args, args.input_file) + if args.input_file2: + digest2 = getDigestFromFile(args, args.input_file2) + digest = diffDigests(args, digest, digest2) + printDigest(args, digest) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/dmd/moz.build b/memory/replace/dmd/moz.build new file mode 100644 index 0000000000..6f3121df48 --- /dev/null +++ b/memory/replace/dmd/moz.build @@ -0,0 +1,37 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + "DMD.h", +] + +UNIFIED_SOURCES += [ + "DMD.cpp", +] + +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + UNIFIED_SOURCES += [ + "/mfbt/HashFunctions.cpp", + "/mfbt/JSONWriter.cpp", + "/mfbt/Poison.cpp", + "/mozglue/misc/StackWalk.cpp", + ] + if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "dbghelp", + ] + +ReplaceMalloc("dmd") + +DEFINES["MOZ_NO_MOZALLOC"] = True +DEFINES["IMPL_MFBT"] = True + +if CONFIG["MOZ_OPTIMIZE"]: + DEFINES["MOZ_OPTIMIZE"] = True + +DisableStlWrapping() + +TEST_DIRS += ["test"] diff --git a/memory/replace/dmd/test/SmokeDMD.cpp b/memory/replace/dmd/test/SmokeDMD.cpp new file mode 100644 index 0000000000..c72e92a543 --- /dev/null +++ b/memory/replace/dmd/test/SmokeDMD.cpp @@ -0,0 +1,378 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This program is used by the DMD xpcshell test. It is run under DMD and +// produces some output. The xpcshell test then post-processes and checks this +// output. +// +// Note that this file does not have "Test" or "test" in its name, because that +// will cause the build system to not record breakpad symbols for it, which +// will stop the post-processing (which includes stack fixing) from working +// correctly. + +// This is required on some systems such as Fedora to allow +// building with -O0 together with --warnings-as-errors due to +// a check in /usr/include/features.h +#undef _FORTIFY_SOURCE + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mozilla/Assertions.h" +#include "mozilla/JSONWriter.h" +#include "mozilla/Sprintf.h" +#include "mozilla/UniquePtr.h" +#include "DMD.h" + +using mozilla::MakeUnique; +using namespace mozilla::dmd; + +DMDFuncs::Singleton DMDFuncs::sSingleton; + +class FpWriteFunc final : public mozilla::JSONWriteFunc { + public: + explicit FpWriteFunc(const char* aFilename) { + mFp = fopen(aFilename, "w"); + if (!mFp) { + fprintf(stderr, "SmokeDMD: can't create %s file: %s\n", aFilename, + strerror(errno)); + exit(1); + } + } + + ~FpWriteFunc() { fclose(mFp); } + + void Write(const mozilla::Span<const char>& aStr) final { + for (const char c : aStr) { + fputc(c, mFp); + } + } + + private: + FILE* mFp; +}; + +// This stops otherwise-unused variables from being optimized away. +static void UseItOrLoseIt(void* aPtr, int aSeven) { + char buf[64]; + int n = SprintfLiteral(buf, "%p\n", aPtr); + if (n == 20 + aSeven) { + fprintf(stderr, "well, that is surprising"); + } +} + +// This function checks that heap blocks that have the same stack trace but +// different (or no) reporters get aggregated separately. +void Foo(int aSeven) { + char* a[6]; + for (int i = 0; i < aSeven - 1; i++) { + a[i] = (char*)malloc(128 - 16 * i); + UseItOrLoseIt(a[i], aSeven); + } + + // Oddly, some versions of clang will cause identical stack traces to be + // generated for adjacent calls to Report(), which breaks the test. Inserting + // the UseItOrLoseIt() calls in between is enough to prevent this. + + Report(a[2]); // reported + + UseItOrLoseIt(a[2], aSeven); + + for (int i = 0; i < aSeven - 5; i++) { + Report(a[i]); // reported + UseItOrLoseIt(a[i], aSeven); + } + + UseItOrLoseIt(a[2], aSeven); + + Report(a[3]); // reported + + // a[4], a[5] unreported +} + +void TestEmpty(const char* aTestName, const char* aMode) { + char filename[128]; + SprintfLiteral(filename, "complete-%s-%s.json", aTestName, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + char options[128]; + SprintfLiteral(options, "--mode=%s --stacks=full", aMode); + ResetEverything(options); + + // Zero for everything. + Analyze(std::move(f)); +} + +void TestFull(const char* aTestName, int aNum, const char* aMode, int aSeven) { + char filename[128]; + SprintfLiteral(filename, "complete-%s%d-%s.json", aTestName, aNum, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + // The --show-dump-stats=yes is there just to give that option some basic + // testing, e.g. ensure it doesn't crash. It's hard to test much beyond that. + char options[128]; + SprintfLiteral(options, "--mode=%s --stacks=full --show-dump-stats=yes", + aMode); + ResetEverything(options); + + // Analyze 1: 1 freed, 9 out of 10 unreported. + // Analyze 2: still present and unreported. + int i; + char* a = nullptr; + for (i = 0; i < aSeven + 3; i++) { + a = (char*)malloc(100); + UseItOrLoseIt(a, aSeven); + } + free(a); + + // A no-op. + free(nullptr); + + // Note: 16 bytes is the smallest requested size that gives consistent + // behaviour across all platforms with jemalloc. + // Analyze 1: reported. + // Analyze 2: thrice-reported. + char* a2 = (char*)malloc(16); + Report(a2); + + // Analyze 1: reported. + // Analyze 2: reportedness carries over, due to ReportOnAlloc. + char* b = (char*)malloc(10); + ReportOnAlloc(b); + + // ReportOnAlloc, then freed. + // Analyze 1: freed, irrelevant. + // Analyze 2: freed, irrelevant. + char* b2 = (char*)malloc(16); + ReportOnAlloc(b2); + free(b2); + + // Analyze 1: reported 4 times. + // Analyze 2: freed, irrelevant. + char* c = (char*)calloc(10, 3); + Report(c); + for (int i = 0; i < aSeven - 4; i++) { + Report(c); + } + + // Analyze 1: ignored. + // Analyze 2: irrelevant. + Report((void*)(intptr_t)i); + + // jemalloc rounds this up to 8192. + // Analyze 1: reported. + // Analyze 2: freed. + char* e = (char*)malloc(4096); + e = (char*)realloc(e, 7169); + Report(e); + + // First realloc is like malloc; second realloc is shrinking. + // Analyze 1: reported. + // Analyze 2: re-reported. + char* e2 = (char*)realloc(nullptr, 1024); + e2 = (char*)realloc(e2, 512); + Report(e2); + + // First realloc is like malloc; second realloc creates a min-sized block. + // XXX: on Windows, second realloc frees the block. + // Analyze 1: reported. + // Analyze 2: freed, irrelevant. + char* e3 = (char*)realloc(nullptr, 1023); + // e3 = (char*) realloc(e3, 0); + MOZ_ASSERT(e3); + Report(e3); + + // Analyze 1: freed, irrelevant. + // Analyze 2: freed, irrelevant. + char* f1 = (char*)malloc(64); + UseItOrLoseIt(f1, aSeven); + free(f1); + + // Analyze 1: ignored. + // Analyze 2: irrelevant. + Report((void*)(intptr_t)0x0); + + // Analyze 1: mixture of reported and unreported. + // Analyze 2: all unreported. + Foo(aSeven); + + // Analyze 1: twice-reported. + // Analyze 2: twice-reported. + char* g1 = (char*)malloc(77); + ReportOnAlloc(g1); + ReportOnAlloc(g1); + + // Analyze 1: mixture of reported and unreported. + // Analyze 2: all unreported. + // Nb: this Foo() call is deliberately not adjacent to the previous one. See + // the comment about adjacent calls in Foo() for more details. + Foo(aSeven); + + // Analyze 1: twice-reported. + // Analyze 2: once-reported. + char* g2 = (char*)malloc(78); + Report(g2); + ReportOnAlloc(g2); + + // Analyze 1: twice-reported. + // Analyze 2: once-reported. + char* g3 = (char*)malloc(79); + ReportOnAlloc(g3); + Report(g3); + + // All the odd-ball ones. + // Analyze 1: all unreported. + // Analyze 2: all freed, irrelevant. + // XXX: no memalign on Mac + // void* w = memalign(64, 65); // rounds up to 128 + // UseItOrLoseIt(w, aSeven); + + // XXX: posix_memalign doesn't work on B2G + // void* x; + // posix_memalign(&y, 128, 129); // rounds up to 256 + // UseItOrLoseIt(x, aSeven); + + // XXX: valloc doesn't work on Windows. + // void* y = valloc(1); // rounds up to 4096 + // UseItOrLoseIt(y, aSeven); + + // XXX: C11 only + // void* z = aligned_alloc(64, 256); + // UseItOrLoseIt(z, aSeven); + + if (aNum == 1) { + // Analyze 1. + Analyze(std::move(f)); + } + + ClearReports(); + + //--------- + + Report(a2); + Report(a2); + free(c); + free(e); + Report(e2); + free(e3); + // free(w); + // free(x); + // free(y); + // free(z); + + // Do some allocations that will only show up in cumulative mode. + for (int i = 0; i < 100; i++) { + void* v = malloc(128); + UseItOrLoseIt(v, aSeven); + free(v); + } + + if (aNum == 2) { + // Analyze 2. + Analyze(std::move(f)); + } +} + +void TestPartial(const char* aTestName, const char* aMode, int aSeven) { + char filename[128]; + SprintfLiteral(filename, "complete-%s-%s.json", aTestName, aMode); + auto f = MakeUnique<FpWriteFunc>(filename); + + char options[128]; + SprintfLiteral(options, "--mode=%s", aMode); + ResetEverything(options); + + int kTenThousand = aSeven + 9993; + char* s; + + // The output of this function is deterministic but it relies on the + // probability and seeds given to the FastBernoulliTrial instance in + // ResetBernoulli(). If they change, the output will change too. + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 16) = 0.0469. + // So we expect about 0.0469 * 10000 == 469. + // We actually get 511. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(16); + UseItOrLoseIt(s, aSeven); + } + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 128) = 0.3193. + // So we expect about 0.3193 * 10000 == 3193. + // We actually get 3136. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(128); + UseItOrLoseIt(s, aSeven); + } + + // Expected fraction with stacks: (1 - (1 - 0.003) ** 1024) = 0.9539. + // So we expect about 0.9539 * 10000 == 9539. + // We actually get 9531. + for (int i = 0; i < kTenThousand; i++) { + s = (char*)malloc(1024); + UseItOrLoseIt(s, aSeven); + } + + Analyze(std::move(f)); +} + +void TestScan(int aSeven) { + auto f = MakeUnique<FpWriteFunc>("basic-scan.json"); + + ResetEverything("--mode=scan"); + + uintptr_t* p = (uintptr_t*)malloc(6 * sizeof(uintptr_t)); + UseItOrLoseIt(p, aSeven); + + // Hard-coded values checked by scan-test.py + p[0] = 0x123; // outside a block, small value + p[1] = 0x0; // null + p[2] = (uintptr_t)((uint8_t*)p - 1); // pointer outside a block, but nearby + p[3] = (uintptr_t)p; // pointer to start of a block + p[4] = (uintptr_t)((uint8_t*)p + 1); // pointer into a block + p[5] = 0x0; // trailing null + + Analyze(std::move(f)); +} + +void RunTests() { + // This test relies on the compiler not doing various optimizations, such as + // eliding unused malloc() calls or unrolling loops with fixed iteration + // counts. So we compile it with -O0 (or equivalent), which probably prevents + // that. We also use the following variable for various loop iteration + // counts, just in case compilers might unroll very small loops even with + // -O0. + int seven = 7; + + // Make sure that DMD is actually running; it is initialized on the first + // allocation. + int* x = (int*)malloc(100); + UseItOrLoseIt(x, seven); + MOZ_RELEASE_ASSERT(IsRunning()); + + // Please keep this in sync with run_test in test_dmd.js. + + TestEmpty("empty", "live"); + TestEmpty("empty", "dark-matter"); + TestEmpty("empty", "cumulative"); + + TestFull("full", 1, "live", seven); + TestFull("full", 1, "dark-matter", seven); + + TestFull("full", 2, "dark-matter", seven); + TestFull("full", 2, "cumulative", seven); + + TestPartial("partial", "live", seven); + + TestScan(seven); +} + +int main() { + RunTests(); + + return 0; +} diff --git a/memory/replace/dmd/test/basic-scan-32-expected.txt b/memory/replace/dmd/test/basic-scan-32-expected.txt new file mode 100644 index 0000000000..9f6f4db325 --- /dev/null +++ b/memory/replace/dmd/test/basic-scan-32-expected.txt @@ -0,0 +1,25 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o basic-scan-32-actual.txt --clamp-contents basic-scan.json + +Invocation { + $DMD = '--mode=scan' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 1 + 32 bytes (24 requested / 8 slop) + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 32 bytes in 1 blocks +} + diff --git a/memory/replace/dmd/test/basic-scan-64-expected.txt b/memory/replace/dmd/test/basic-scan-64-expected.txt new file mode 100644 index 0000000000..59effc07b7 --- /dev/null +++ b/memory/replace/dmd/test/basic-scan-64-expected.txt @@ -0,0 +1,25 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o basic-scan-64-actual.txt --clamp-contents basic-scan.json + +Invocation { + $DMD = '--mode=scan' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 1 + 48 bytes (48 requested / 0 slop) + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 48 bytes in 1 blocks +} + diff --git a/memory/replace/dmd/test/complete-empty-cumulative-expected.txt b/memory/replace/dmd/test/complete-empty-cumulative-expected.txt new file mode 100644 index 0000000000..2486015d0b --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-cumulative-expected.txt @@ -0,0 +1,18 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-cumulative-actual.txt complete-empty-cumulative.json + +Invocation { + $DMD = '--mode=cumulative --stacks=full' + Mode = 'cumulative' +} + +#----------------------------------------------------------------- + +# no cumulative heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes in 0 blocks +} + diff --git a/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt b/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt new file mode 100644 index 0000000000..0020cddde3 --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-dark-matter-expected.txt @@ -0,0 +1,29 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-dark-matter-actual.txt complete-empty-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +# no twice-reported heap blocks + +#----------------------------------------------------------------- + +# no unreported heap blocks + +#----------------------------------------------------------------- + +# no once-reported heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes (100.00%) in 0 blocks (100.00%) + Unreported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Once-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Twice-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) +} + diff --git a/memory/replace/dmd/test/complete-empty-live-expected.txt b/memory/replace/dmd/test/complete-empty-live-expected.txt new file mode 100644 index 0000000000..d0d1721965 --- /dev/null +++ b/memory/replace/dmd/test/complete-empty-live-expected.txt @@ -0,0 +1,18 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-empty-live-actual.txt complete-empty-live.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +# no live heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 0 bytes in 0 blocks +} + diff --git a/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt b/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt new file mode 100644 index 0000000000..2c7d6b6343 --- /dev/null +++ b/memory/replace/dmd/test/complete-full1-dark-matter-expected.txt @@ -0,0 +1,265 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full1-dark-matter-actual.txt complete-full1-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full --show-dump-stats=yes' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + 1 block in heap block record 1 of 4 + 80 bytes (79 requested / 1 slop) + 0.66% of the heap (0.66% cumulative) + 29.41% of twice-reported (29.41% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 2 of 4 + 80 bytes (78 requested / 2 slop) + 0.66% of the heap (1.32% cumulative) + 29.41% of twice-reported (58.82% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 3 of 4 + 80 bytes (77 requested / 3 slop) + 0.66% of the heap (1.98% cumulative) + 29.41% of twice-reported (88.24% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 4 of 4 + 32 bytes (30 requested / 2 slop) + 0.26% of the heap (2.25% cumulative) + 11.76% of twice-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Unreported { + 9 blocks in heap block record 1 of 3 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 8.33% of the heap (8.33% cumulative) + 81.82% of unreported (81.82% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 2 blocks in heap block record 2 of 3 + 112 bytes (112 requested / 0 slop) + Individual block sizes: 64; 48 + 0.93% of the heap (9.26% cumulative) + 9.09% of unreported (90.91% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 2 blocks in heap block record 3 of 3 + 112 bytes (112 requested / 0 slop) + Individual block sizes: 64; 48 + 0.93% of the heap (10.19% cumulative) + 9.09% of unreported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Once-reported { + 1 block in heap block record 1 of 11 + 8,192 bytes (7,169 requested / 1,023 slop) + 67.72% of the heap (67.72% cumulative) + 77.34% of once-reported (77.34% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 2 of 11 + 1,024 bytes (1,023 requested / 1 slop) + 8.47% of the heap (76.19% cumulative) + 9.67% of once-reported (87.01% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 3 of 11 + 512 bytes (512 requested / 0 slop) + 4.23% of the heap (80.42% cumulative) + 4.83% of once-reported (91.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 2 blocks in heap block record 4 of 11 + 240 bytes (240 requested / 0 slop) + Individual block sizes: 128; 112 + 1.98% of the heap (82.41% cumulative) + 2.27% of once-reported (94.11% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 2 blocks in heap block record 5 of 11 + 240 bytes (240 requested / 0 slop) + Individual block sizes: 128; 112 + 1.98% of the heap (84.39% cumulative) + 2.27% of once-reported (96.37% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 6 of 11 + 96 bytes (96 requested / 0 slop) + 0.79% of the heap (85.19% cumulative) + 0.91% of once-reported (97.28% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 7 of 11 + 96 bytes (96 requested / 0 slop) + 0.79% of the heap (85.98% cumulative) + 0.91% of once-reported (98.19% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 8 of 11 + 80 bytes (80 requested / 0 slop) + 0.66% of the heap (86.64% cumulative) + 0.76% of once-reported (98.94% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 9 of 11 + 80 bytes (80 requested / 0 slop) + 0.66% of the heap (87.30% cumulative) + 0.76% of once-reported (99.70% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 10 of 11 + 16 bytes (16 requested / 0 slop) + 0.13% of the heap (87.43% cumulative) + 0.15% of once-reported (99.85% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 11 of 11 + 16 bytes (10 requested / 6 slop) + 0.13% of the heap (87.57% cumulative) + 0.15% of once-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 12,096 bytes (100.00%) in 30 blocks (100.00%) + Unreported: 1,232 bytes ( 10.19%) in 13 blocks ( 43.33%) + Once-reported: 10,592 bytes ( 87.57%) in 13 blocks ( 43.33%) + Twice-reported: 272 bytes ( 2.25%) in 4 blocks ( 13.33%) +} + diff --git a/memory/replace/dmd/test/complete-full1-live-expected.txt b/memory/replace/dmd/test/complete-full1-live-expected.txt new file mode 100644 index 0000000000..eaa1883e1f --- /dev/null +++ b/memory/replace/dmd/test/complete-full1-live-expected.txt @@ -0,0 +1,127 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full1-live-actual.txt complete-full1-live.json + +Invocation { + $DMD = '--mode=live --stacks=full --show-dump-stats=yes' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 12 + 8,192 bytes (7,169 requested / 1,023 slop) + 67.72% of the heap (67.72% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 2 of 12 + 1,024 bytes (1,023 requested / 1 slop) + 8.47% of the heap (76.19% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 9 blocks in heap block record 3 of 12 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 8.33% of the heap (84.52% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 6 blocks in heap block record 4 of 12 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 4.37% of the heap (88.89% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 6 blocks in heap block record 5 of 12 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 4.37% of the heap (93.25% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 6 of 12 + 512 bytes (512 requested / 0 slop) + 4.23% of the heap (97.49% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 7 of 12 + 80 bytes (79 requested / 1 slop) + 0.66% of the heap (98.15% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 8 of 12 + 80 bytes (78 requested / 2 slop) + 0.66% of the heap (98.81% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 9 of 12 + 80 bytes (77 requested / 3 slop) + 0.66% of the heap (99.47% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 10 of 12 + 32 bytes (30 requested / 2 slop) + 0.26% of the heap (99.74% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 11 of 12 + 16 bytes (16 requested / 0 slop) + 0.13% of the heap (99.87% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 1 block in heap block record 12 of 12 + 16 bytes (10 requested / 6 slop) + 0.13% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 12,096 bytes in 30 blocks +} + diff --git a/memory/replace/dmd/test/complete-full2-cumulative-expected.txt b/memory/replace/dmd/test/complete-full2-cumulative-expected.txt new file mode 100644 index 0000000000..5a225b9b8e --- /dev/null +++ b/memory/replace/dmd/test/complete-full2-cumulative-expected.txt @@ -0,0 +1,173 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full2-cumulative-actual.txt complete-full2-cumulative.json + +Invocation { + $DMD = '--mode=cumulative --stacks=full --show-dump-stats=yes' + Mode = 'cumulative' +} + +#----------------------------------------------------------------- + +Cumulative { + 100 blocks in heap block record 1 of 17 + 12,800 bytes (12,800 requested / 0 slop) + Individual block sizes: 128 x 100 + 42.37% of the heap (42.37% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 2 of 17 + 8,192 bytes (7,169 requested / 1,023 slop) + 27.12% of the heap (69.49% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 3 of 17 + 4,096 bytes (4,096 requested / 0 slop) + 13.56% of the heap (83.05% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 10 blocks in heap block record 4 of 17 + 1,120 bytes (1,000 requested / 120 slop) + Individual block sizes: 112 x 10 + 3.71% of the heap (86.76% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 5 of 17 + 1,024 bytes (1,024 requested / 0 slop) + 3.39% of the heap (90.15% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 6 of 17 + 1,024 bytes (1,023 requested / 1 slop) + 3.39% of the heap (93.54% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 6 blocks in heap block record 7 of 17 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 1.75% of the heap (95.29% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 6 blocks in heap block record 8 of 17 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 1.75% of the heap (97.03% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 9 of 17 + 512 bytes (512 requested / 0 slop) + 1.69% of the heap (98.73% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 10 of 17 + 80 bytes (79 requested / 1 slop) + 0.26% of the heap (98.99% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 11 of 17 + 80 bytes (78 requested / 2 slop) + 0.26% of the heap (99.26% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 12 of 17 + 80 bytes (77 requested / 3 slop) + 0.26% of the heap (99.52% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 13 of 17 + 64 bytes (64 requested / 0 slop) + 0.21% of the heap (99.74% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 14 of 17 + 32 bytes (30 requested / 2 slop) + 0.11% of the heap (99.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 15 of 17 + 16 bytes (16 requested / 0 slop) + 0.05% of the heap (99.89% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 16 of 17 + 16 bytes (16 requested / 0 slop) + 0.05% of the heap (99.95% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Cumulative { + 1 block in heap block record 17 of 17 + 16 bytes (10 requested / 6 slop) + 0.05% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 30,208 bytes in 135 blocks +} + diff --git a/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt b/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt new file mode 100644 index 0000000000..5f9585a8c6 --- /dev/null +++ b/memory/replace/dmd/test/complete-full2-dark-matter-expected.txt @@ -0,0 +1,140 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-full2-dark-matter-actual.txt complete-full2-dark-matter.json + +Invocation { + $DMD = '--mode=dark-matter --stacks=full --show-dump-stats=yes' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + 1 block in heap block record 1 of 2 + 80 bytes (77 requested / 3 slop) + 2.81% of the heap (2.81% cumulative) + 83.33% of twice-reported (83.33% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +Twice-reported { + 1 block in heap block record 2 of 2 + 16 bytes (16 requested / 0 slop) + 0.56% of the heap (3.37% cumulative) + 16.67% of twice-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } + Reported again at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Unreported { + 9 blocks in heap block record 1 of 3 + 1,008 bytes (900 requested / 108 slop) + Individual block sizes: 112 x 9 + 35.39% of the heap (35.39% cumulative) + 48.84% of unreported (48.84% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 6 blocks in heap block record 2 of 3 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 18.54% of the heap (53.93% cumulative) + 25.58% of unreported (74.42% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Unreported { + 6 blocks in heap block record 3 of 3 + 528 bytes (528 requested / 0 slop) + Individual block sizes: 128; 112; 96; 80; 64; 48 + 18.54% of the heap (72.47% cumulative) + 25.58% of unreported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Once-reported { + 1 block in heap block record 1 of 4 + 512 bytes (512 requested / 0 slop) + 17.98% of the heap (17.98% cumulative) + 74.42% of once-reported (74.42% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 2 of 4 + 80 bytes (79 requested / 1 slop) + 2.81% of the heap (20.79% cumulative) + 11.63% of once-reported (86.05% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 3 of 4 + 80 bytes (78 requested / 2 slop) + 2.81% of the heap (23.60% cumulative) + 11.63% of once-reported (97.67% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +Once-reported { + 1 block in heap block record 4 of 4 + 16 bytes (10 requested / 6 slop) + 0.56% of the heap (24.16% cumulative) + 2.33% of once-reported (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } + Reported at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 2,848 bytes (100.00%) in 27 blocks (100.00%) + Unreported: 2,064 bytes ( 72.47%) in 21 blocks ( 77.78%) + Once-reported: 688 bytes ( 24.16%) in 4 blocks ( 14.81%) + Twice-reported: 96 bytes ( 3.37%) in 2 blocks ( 7.41%) +} + diff --git a/memory/replace/dmd/test/complete-partial-live-expected.txt b/memory/replace/dmd/test/complete-partial-live-expected.txt new file mode 100644 index 0000000000..e7f27b0ee6 --- /dev/null +++ b/memory/replace/dmd/test/complete-partial-live-expected.txt @@ -0,0 +1,56 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o complete-partial-live-actual.txt complete-partial-live.json + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 9,531 blocks in heap block record 1 of 4 + 9,759,744 bytes (9,759,744 requested / 0 slop) + Individual block sizes: 1,024 x 9,531 + 83.56% of the heap (83.56% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 16,822 blocks in heap block record 2 of 4 + 1,510,672 bytes (1,510,672 requested / 0 slop) + Individual block sizes: 1,024 x 469; 128 x 6,864; 16 x 9,489 + 12.93% of the heap (96.49% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +Live { + 3,136 blocks in heap block record 3 of 4 + 401,408 bytes (401,408 requested / 0 slop) + Individual block sizes: 128 x 3,136 + 3.44% of the heap (99.93% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +Live { + 511 blocks in heap block record 4 of 4 + 8,176 bytes (8,176 requested / 0 slop) + Individual block sizes: 16 x 511 + 0.07% of the heap (100.00% cumulative) + Allocated at { + #01: ... DMD.cpp ... + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 11,680,000 bytes in 30,000 blocks +} + diff --git a/memory/replace/dmd/test/moz.build b/memory/replace/dmd/test/moz.build new file mode 100644 index 0000000000..77e4fa12a0 --- /dev/null +++ b/memory/replace/dmd/test/moz.build @@ -0,0 +1,26 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +GeckoSimplePrograms( + [ + "SmokeDMD", + ], + linkage=None, +) + +# See the comment at the top of SmokeDMD.cpp:RunTests(). +if CONFIG["CC_TYPE"] == "clang-cl": + CXXFLAGS += ["-Od", "-clang:-fno-lto"] +else: + CXXFLAGS += ["-O0", "-fno-lto"] + +DEFINES["MOZ_NO_MOZALLOC"] = True + +DisableStlWrapping() + +XPCSHELL_TESTS_MANIFESTS += [ + "xpcshell.toml", +] diff --git a/memory/replace/dmd/test/scan-test.py b/memory/replace/dmd/test/scan-test.py new file mode 100644 index 0000000000..c282b02693 --- /dev/null +++ b/memory/replace/dmd/test/scan-test.py @@ -0,0 +1,102 @@ +#! /usr/bin/env python +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Testing for the JSON file emitted by DMD heap scan mode when running SmokeDMD.""" + +import argparse +import gzip +import json +import sys + +# The DMD output version this script handles. +outputVersion = 5 + + +def parseCommandLine(): + description = """ +Ensure that DMD heap scan mode creates the correct output when run with SmokeDMD. +This is only for testing. Input files can be gzipped. +""" + p = argparse.ArgumentParser(description=description) + + p.add_argument( + "--clamp-contents", + action="store_true", + help="expect that the contents of the JSON input file have had " + "their addresses clamped", + ) + + p.add_argument("input_file", help="a file produced by DMD") + + return p.parse_args(sys.argv[1:]) + + +def checkScanContents(contents, expected): + if len(contents) != len(expected): + raise Exception( + "Expected " + + str(len(expected)) + + " things in contents but found " + + str(len(contents)) + ) + + for i in range(len(expected)): + if contents[i] != expected[i]: + raise Exception( + "Expected to find " + + expected[i] + + " at offset " + + str(i) + + " but found " + + contents[i] + ) + + +def main(): + args = parseCommandLine() + + # Handle gzipped input if necessary. + isZipped = args.input_file.endswith(".gz") + opener = gzip.open if isZipped else open + + with opener(args.input_file, "rb") as f: + j = json.load(f) + + if j["version"] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + invocation = j["invocation"] + + mode = invocation["mode"] + if mode != "scan": + raise Exception("bad 'mode' property: '{:s}'".format(mode)) + + blockList = j["blockList"] + + if len(blockList) != 1: + raise Exception("Expected only one block") + + b = blockList[0] + + # The expected values are based on hard-coded values in SmokeDMD.cpp. + if args.clamp_contents: + expected = ["0", "0", "0", b["addr"], b["addr"]] + else: + addr = int(b["addr"], 16) + expected = [ + "123", + "0", + str(format(addr - 1, "x")), + b["addr"], + str(format(addr + 1, "x")), + "0", + ] + + checkScanContents(b["contents"], expected) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/dmd/test/script-diff-dark-matter-expected.txt b/memory/replace/dmd/test/script-diff-dark-matter-expected.txt new file mode 100644 index 0000000000..b1fc28bac5 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter-expected.txt @@ -0,0 +1,127 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-diff-dark-matter-actual.txt script-diff-dark-matter1.json script-diff-dark-matter2.json + +Invocation 1 { + $DMD = '--mode=dark-matter' + Mode = 'dark-matter' +} + +Invocation 2 { + $DMD = '1' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +Twice-reported { + -1 blocks in heap block record 1 of 1 + -1,088 bytes (-1,064 requested / -24 slop) + Individual block sizes: -1,024; -127; 63 + 15.46% of the heap (15.46% cumulative) + 100.00% of twice-reported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } + Reported again at { + #01: R2 (R2.cpp:99) + } +} + +#----------------------------------------------------------------- + +Unreported { + 4 blocks in heap block record 1 of 5 + 16,384 bytes (16,384 requested / 0 slop) + Individual block sizes: 4,096 x 4 + -232.76% of the heap (-232.76% cumulative) + 371.01% of unreported (371.01% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +Unreported { + 7 blocks in heap block record 2 of 5 + -11,968 bytes (-12,016 requested / 48 slop) + Individual block sizes: -15,360; 2,048; 512 x 2; 128; -127; 64 x 4; 63 + 170.02% of the heap (-62.74% cumulative) + -271.01% of unreported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } +} + +Unreported { + 0 blocks in heap block record 3 of 5 + 0 bytes (-384 requested / 384 slop) + Individual block sizes: (no change) + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Unreported { + -2 blocks in heap block record 4 of 5 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 8,192 x 2; -4,096 x 4 + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Unreported { + 0 blocks in heap block record 5 of 5 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 20,480; -16,384; -8,192; 4,096 + -0.00% of the heap (-62.74% cumulative) + 0.00% of unreported (100.00% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +#----------------------------------------------------------------- + +Once-reported { + -3 blocks in heap block record 1 of 2 + -10,240 bytes (-10,192 requested / -48 slop) + Individual block sizes: -4,096 x 2; -2,048 + 145.48% of the heap (145.48% cumulative) + 98.77% of once-reported (98.77% cumulative) + Allocated at { + #01: D (D.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } +} + +Once-reported { + -1 blocks in heap block record 2 of 2 + -127 bytes (-151 requested / 24 slop) + 1.80% of the heap (147.28% cumulative) + 1.23% of once-reported (100.00% cumulative) + Allocated at { + #01: F (F.cpp:99) + } + Reported at { + #01: R1 (R1.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: -7,039 bytes (100.00%) in 4 blocks (100.00%) + Unreported: 4,416 bytes (-62.74%) in 9 blocks (225.00%) + Once-reported: -10,367 bytes (147.28%) in -4 blocks (-100.00%) + Twice-reported: -1,088 bytes ( 15.46%) in -1 blocks (-25.00%) +} + diff --git a/memory/replace/dmd/test/script-diff-dark-matter1.json b/memory/replace/dmd/test/script-diff-dark-matter1.json new file mode 100644 index 0000000000..1175394150 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter1.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=dark-matter", + "mode": "dark-matter" + }, + "blockList": [ + { "req": 4096, "alloc": "A", "num": 4 }, + + { "req": 4096, "alloc": "B", "num": 3 }, + { "req": 4096, "alloc": "B" }, + + { "req": 4096, "alloc": "C", "num": 2 }, + { "req": 4096, "alloc": "C", "num": 2 }, + + { "req": 4096, "alloc": "D", "reps": ["R1"], "num": 2 }, + { "req": 2000, "slop": 48, "alloc": "D", "reps": ["R1"] }, + + { "req": 15360, "alloc": "F" }, + { "req": 512, "alloc": "F", "num": 2 }, + { "req": 127, "alloc": "F" }, + { "req": 1024, "alloc": "F", "reps": ["R1"] }, + { "req": 127, "alloc": "F", "reps": ["R1"] }, + { "req": 1000, "slop": 24, "alloc": "F", "reps": ["R1", "R2"] }, + { "req": 127, "alloc": "F", "reps": ["R1", "R2"] }, + + { "req": 4096 }, + { "req": 8192 }, + { "req": 16384 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-dark-matter2.json b/memory/replace/dmd/test/script-diff-dark-matter2.json new file mode 100644 index 0000000000..2c3061223f --- /dev/null +++ b/memory/replace/dmd/test/script-diff-dark-matter2.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "1", + "mode": "dark-matter" + }, + "blockList": [ + { "req": 4096, "alloc": "A", "num": 4 }, + + { "req": 8192, "alloc": "B" }, + { "req": 8192, "alloc": "B" }, + + { "req": 4000, "slop": 96, "alloc": "C", "num": 4 }, + + { "req": 4096, "alloc": "E", "num": 4 }, + + { "req": 2000, "slop": 48, "alloc": "F" }, + { "req": 1000, "slop": 24, "alloc": "F", "reps": ["R1"] }, + { "req": 512, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 128, "alloc": "F" }, + { "req": 63, "alloc": "F", "reps": ["R1", "R2"] }, + { "req": 64, "alloc": "F", "num": 4 }, + { "req": 63, "alloc": "F" }, + + { "req": 4096, "num": 2 }, + { "req": 20480 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-live-expected.txt b/memory/replace/dmd/test/script-diff-live-expected.txt new file mode 100644 index 0000000000..20208c0768 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live-expected.txt @@ -0,0 +1,81 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-diff-live-actual.txt script-diff-live1.json script-diff-live2.json + +Invocation 1 { + $DMD = '--mode=live' + Mode = 'live' +} + +Invocation 2 { + $DMD = '--mode=live --stacks=partial' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 4 blocks in heap block record 1 of 6 + 16,384 bytes (16,384 requested / 0 slop) + Individual block sizes: 4,096 x 4 + -232.76% of the heap (-232.76% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 6 + -13,183 bytes (-13,231 requested / 48 slop) + Individual block sizes: -15,360; 2,048; -1,024; 512 x 2; 128; -127 x 3; 64 x 4; 63 x 2 + 187.29% of the heap (-45.48% cumulative) + Allocated at { + #01: F (F.cpp:99) + } +} + +Live { + -3 blocks in heap block record 3 of 6 + -10,240 bytes (-10,192 requested / -48 slop) + Individual block sizes: -4,096 x 2; -2,048 + 145.48% of the heap (100.00% cumulative) + Allocated at { + #01: D (D.cpp:99) + } +} + +Live { + 0 blocks in heap block record 4 of 6 + 0 bytes (-384 requested / 384 slop) + Individual block sizes: (no change) + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + -2 blocks in heap block record 5 of 6 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 8,192 x 2; -4,096 x 4 + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 0 blocks in heap block record 6 of 6 + 0 bytes (0 requested / 0 slop) + Individual block sizes: 20,480; -16,384; -8,192; 4,096 + -0.00% of the heap (100.00% cumulative) + Allocated at { + #01: (no stack trace recorded due to --stacks=partial) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: -7,039 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-diff-live1.json b/memory/replace/dmd/test/script-diff-live1.json new file mode 100644 index 0000000000..1296b9ea09 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live1.json @@ -0,0 +1,51 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live", + "mode": "live" + }, + "blockList": [ + { "req": 4096, "alloc": "A", "num": 4 }, + + { "req": 4096, "alloc": "B", "num": 4 }, + + { "req": 4096, "alloc": "C", "num": 4 }, + + { "req": 4096, "alloc": "D" }, + { "req": 4096, "alloc": "D" }, + { "req": 2000, "slop": 48, "alloc": "D" }, + + { "req": 15360, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 512, "alloc": "F" }, + { "req": 127, "alloc": "F" }, + { "req": 1024, "alloc": "F" }, + { "req": 127, "alloc": "F" }, + { "req": 1000, "slop": 24, "alloc": "F" }, + { "req": 127, "alloc": "F" }, + + { "req": 4096 }, + { "req": 8192 }, + { "req": 16384 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-diff-live2.json b/memory/replace/dmd/test/script-diff-live2.json new file mode 100644 index 0000000000..723ea5ff35 --- /dev/null +++ b/memory/replace/dmd/test/script-diff-live2.json @@ -0,0 +1,53 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live --stacks=partial", + "mode": "live" + }, + "blockList": [ + { "req": 4096, "alloc": "A", "num": 3 }, + { "req": 4096, "alloc": "A" }, + + { "req": 8192, "alloc": "B" }, + { "req": 8192, "alloc": "B" }, + + { "req": 4000, "slop": 96, "alloc": "C", "num": 4 }, + + { "req": 4096, "alloc": "E" }, + { "req": 4096, "alloc": "E" }, + { "req": 4096, "alloc": "E" }, + { "req": 4096, "alloc": "E" }, + + { "req": 2000, "slop": 48, "alloc": "F" }, + { "req": 1000, "slop": 24, "alloc": "F" }, + { "req": 512, "alloc": "F", "num": 4 }, + { "req": 128, "alloc": "F" }, + { "req": 63, "alloc": "F" }, + { "req": 64, "alloc": "F", "num": 4 }, + { "req": 63, "alloc": "F" }, + + { "req": 4096 }, + { "req": 4096 }, + { "req": 20480 } + ], + "traceTable": { + "A": ["AA"], + "B": ["BB"], + "C": ["CC"], + "D": ["DD"], + "E": ["EE"], + "F": ["FF"], + "R1": ["RR1"], + "R2": ["RR2"] + }, + "frameTable": { + "AA": "#00: A (A.cpp:99)", + "BB": "#00: B (B.cpp:99)", + "CC": "#00: C (C.cpp:99)", + "DD": "#00: D (D.cpp:99)", + "EE": "#00: E (E.cpp:99)", + "FF": "#00: F (F.cpp:99)", + "RR1": "#00: R1 (R1.cpp:99)", + "RR2": "#00: R2 (R2.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt b/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt new file mode 100644 index 0000000000..9428ef45fb --- /dev/null +++ b/memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt @@ -0,0 +1,72 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-ignore-alloc-fns-actual.txt --ignore-alloc-fns script-ignore-alloc-fns.json + +Invocation { + $DMD = '1' + Mode = 'dark-matter' +} + +#----------------------------------------------------------------- + +# no twice-reported heap blocks + +#----------------------------------------------------------------- + +Unreported { + 1 block in heap block record 1 of 4 + 1,048,576 bytes (1,048,576 requested / 0 slop) + 93.22% of the heap (93.22% cumulative) + 93.22% of unreported (93.22% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Unreported { + 1 block in heap block record 2 of 4 + 65,536 bytes (65,536 requested / 0 slop) + 5.83% of the heap (99.05% cumulative) + 5.83% of unreported (99.05% cumulative) + Allocated at { + #01: js::jit::JitRuntime::initialize(JSContext*) (Ion.cpp:301) + } +} + +Unreported { + 1 block in heap block record 3 of 4 + 8,192 bytes (8,000 requested / 192 slop) + 0.73% of the heap (99.78% cumulative) + 0.73% of unreported (99.78% cumulative) + Allocated at { + #01: mozilla::Vector::growStorageBy(unsigned long) (Vector.h:802) + #02: D (D.cpp:99) + } +} + +Unreported { + 1 block in heap block record 4 of 4 + 2,500 bytes (2,500 requested / 0 slop) + 0.22% of the heap (100.00% cumulative) + 0.22% of unreported (100.00% cumulative) + Allocated at { + #01: g_type_create_instance (/usr/lib/x86_64-linux-gnu/libgobject-2.0.so.0) + #02: not_an_alloc_function_so_alloc_functions_below_here_will_not_be_stripped (blah) + #03: replace_posix_memalign (replace_malloc.h:120) + #04: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0) + #05: another_non_alloc_function (blah) + } +} + +#----------------------------------------------------------------- + +# no once-reported heap blocks + +#----------------------------------------------------------------- + +Summary { + Total: 1,124,804 bytes (100.00%) in 4 blocks (100.00%) + Unreported: 1,124,804 bytes (100.00%) in 4 blocks (100.00%) + Once-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) + Twice-reported: 0 bytes ( 0.00%) in 0 blocks ( 0.00%) +} + diff --git a/memory/replace/dmd/test/script-ignore-alloc-fns.json b/memory/replace/dmd/test/script-ignore-alloc-fns.json new file mode 100644 index 0000000000..a6c7c8419a --- /dev/null +++ b/memory/replace/dmd/test/script-ignore-alloc-fns.json @@ -0,0 +1,45 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "1", + "mode": "dark-matter" + }, + "blockList": [ + { "req": 1048576, "alloc": "A" }, + { "req": 65536, "alloc": "B" }, + { "req": 8000, "slop": 192, "alloc": "C" }, + { "req": 2500, "alloc": "D" } + ], + "traceTable": { + "A": ["AA", "AB", "AC", "AD"], + "B": ["BA", "BB", "BC"], + "C": ["CA", "CB", "CC", "CD"], + "D": ["DA", "DB", "DD", "DD", "DE", "DF", "DG", "DH", "DI", "DJ"] + }, + "frameTable": { + "AA": "#00: replace_malloc (DMD.cpp:1106)", + "AB": "#00: moz_xmalloc (mozalloc.cpp:68)", + "AC": "#00: operator new(unsigned long) (mozalloc.h:208)", + "AD": "#00: A (A.cpp:99)", + + "BA": "#00: replace_calloc (DMD.cpp:1125)", + "BB": "#00: js_calloc(unsigned long) (Utility.h:107)", + "BC": "#06: js::jit::JitRuntime::initialize(JSContext*) (Ion.cpp:301)", + + "CA": "#00: replace_realloc (DMD.cpp:1153)", + "CB": "#00: bool* mozilla::MallocAllocPolicy::pod_realloc<bool>(bool*, unsigned long, unsigned long) (AllocPolicy.h:74)", + "CC": "#00: mozilla::Vector::growStorageBy(unsigned long) (Vector.h:802)", + "CD": "#00: D (D.cpp:99)", + + "DA": "#00: replace_memalign (DMD.cpp:1181)", + "DB": "#00: replace_posix_memalign (replace_malloc.h:120)", + "DC": "#00: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DD": "#00: g_slice_alloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DE": "#00: g_slice_alloc0 (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DF": "#00: g_type_create_instance (/usr/lib/x86_64-linux-gnu/libgobject-2.0.so.0)", + "DG": "#00: not_an_alloc_function_so_alloc_functions_below_here_will_not_be_stripped (blah)", + "DH": "#00: replace_posix_memalign (replace_malloc.h:120)", + "DI": "#00: ??? (/lib/x86_64-linux-gnu/libglib-2.0.so.0)", + "DJ": "#00: another_non_alloc_function (blah)" + } +} diff --git a/memory/replace/dmd/test/script-max-frames-1-expected.txt b/memory/replace/dmd/test/script-max-frames-1-expected.txt new file mode 100644 index 0000000000..65a00762bb --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-1-expected.txt @@ -0,0 +1,26 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-1-actual.txt --max-frames=1 script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 4 blocks in heap block record 1 of 1 + 4,416 bytes (4,404 requested / 12 slop) + Individual block sizes: 4,096; 128; 112; 80 + 100.00% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames-3-expected.txt b/memory/replace/dmd/test/script-max-frames-3-expected.txt new file mode 100644 index 0000000000..5df4914738 --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-3-expected.txt @@ -0,0 +1,48 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-3-actual.txt --max-frames=3 --no-fix-stacks script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 2 blocks in heap block record 1 of 3 + 4,224 bytes (4,224 requested / 0 slop) + Individual block sizes: 4,096; 128 + 95.65% of the heap (95.65% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + } +} + +Live { + 1 block in heap block record 2 of 3 + 112 bytes (100 requested / 12 slop) + 2.54% of the heap (98.19% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: X (X.cpp:99) + #03: Y (Y.cpp:99) + } +} + +Live { + 1 block in heap block record 3 of 3 + 80 bytes (80 requested / 0 slop) + 1.81% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames-8-expected.txt b/memory/replace/dmd/test/script-max-frames-8-expected.txt new file mode 100644 index 0000000000..174992d5b8 --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames-8-expected.txt @@ -0,0 +1,69 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-max-frames-8-actual.txt script-max-frames.json + +Invocation { + $DMD = '--mode=live --stacks=full' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 1 block in heap block record 1 of 4 + 4,096 bytes (4,096 requested / 0 slop) + 92.75% of the heap (92.75% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + #04: H (H.cpp:99) + #05: I (I.cpp:99) + #06: J (J.cpp:99) + #07: K (K.cpp:99) + #08: L (L.cpp:99) + } +} + +Live { + 1 block in heap block record 2 of 4 + 128 bytes (128 requested / 0 slop) + 2.90% of the heap (95.65% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: F (F.cpp:99) + #03: G (G.cpp:99) + #04: R (R.cpp:99) + #05: S (S.cpp:99) + #06: T (T.cpp:99) + #07: U (U.cpp:99) + #08: V (V.cpp:99) + } +} + +Live { + 1 block in heap block record 3 of 4 + 112 bytes (100 requested / 12 slop) + 2.54% of the heap (98.19% cumulative) + Allocated at { + #01: E (E.cpp:99) + #02: X (X.cpp:99) + #03: Y (Y.cpp:99) + #04: Z (Z.cpp:99) + } +} + +Live { + 1 block in heap block record 4 of 4 + 80 bytes (80 requested / 0 slop) + 1.81% of the heap (100.00% cumulative) + Allocated at { + #01: E (E.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 4,416 bytes in 4 blocks +} + diff --git a/memory/replace/dmd/test/script-max-frames.json b/memory/replace/dmd/test/script-max-frames.json new file mode 100644 index 0000000000..6de17a88c3 --- /dev/null +++ b/memory/replace/dmd/test/script-max-frames.json @@ -0,0 +1,43 @@ +{ + "version": 5, + "invocation": { + "dmdEnvVar": "--mode=live --stacks=full", + "mode": "live" + }, + "blockList": [ + { "req": 4096, "alloc": "A" }, + { "req": 128, "alloc": "B" }, + { "req": 100, "slop": 12, "alloc": "C" }, + { "req": 80, "alloc": "D" } + ], + "traceTable": { + "A": ["E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P"], + "B": ["E", "F", "G", "R", "S", "T", "U", "V"], + "C": ["E", "X", "Y", "Z"], + "D": ["E"] + }, + "frameTable": { + "E": "#00: E (E.cpp:99)", + "F": "#00: F (F.cpp:99)", + "G": "#00: G (G.cpp:99)", + "H": "#00: H (H.cpp:99)", + "I": "#00: I (I.cpp:99)", + "J": "#00: J (J.cpp:99)", + "K": "#00: K (K.cpp:99)", + "L": "#00: L (L.cpp:99)", + "M": "#00: M (M.cpp:99)", + "N": "#00: N (N.cpp:99)", + "O": "#00: O (O.cpp:99)", + "P": "#00: P (P.cpp:99)", + "Q": "#00: Q (Q.cpp:99)", + "R": "#00: R (R.cpp:99)", + "S": "#00: S (S.cpp:99)", + "T": "#00: T (T.cpp:99)", + "U": "#00: U (U.cpp:99)", + "V": "#00: V (V.cpp:99)", + "W": "#00: W (W.cpp:99)", + "X": "#00: X (X.cpp:99)", + "Y": "#00: Y (Y.cpp:99)", + "Z": "#00: Z (Z.cpp:99)" + } +} diff --git a/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt b/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt new file mode 100644 index 0000000000..8de03d953b --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-num-blocks-actual.txt --sort-by=num-blocks script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 8 blocks in heap block record 1 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (33.32% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.67% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 3 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (100.00% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-req-expected.txt b/memory/replace/dmd/test/script-sort-by-req-expected.txt new file mode 100644 index 0000000000..3ab21ba8f7 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-req-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-req-actual.txt --sort-by=req --no-fix-stacks script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 5 blocks in heap block record 1 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (33.33% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.68% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 8 blocks in heap block record 3 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-slop-expected.txt b/memory/replace/dmd/test/script-sort-by-slop-expected.txt new file mode 100644 index 0000000000..c325c7ed40 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-slop-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-slop-actual.txt --sort-by=slop script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 8 blocks in heap block record 1 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (33.32% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (66.67% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 3 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (100.00% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by-usable-expected.txt b/memory/replace/dmd/test/script-sort-by-usable-expected.txt new file mode 100644 index 0000000000..8239a4759e --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by-usable-expected.txt @@ -0,0 +1,46 @@ +#----------------------------------------------------------------- +# dmd.py --filter-stacks-for-testing -o script-sort-by-usable-actual.txt --sort-by=usable script-sort-by.json.gz + +Invocation { + $DMD = '--mode=live' + Mode = 'live' +} + +#----------------------------------------------------------------- + +Live { + 5 blocks in heap block record 1 of 3 + 16,400 bytes (12,016 requested / 4,384 slop) + Individual block sizes: 4,096 x 4; 16 + 33.35% of the heap (33.35% cumulative) + Allocated at { + #01: B (B.cpp:99) + } +} + +Live { + 5 blocks in heap block record 2 of 3 + 16,392 bytes (16,392 requested / 0 slop) + Individual block sizes: 4,096 x 4; 8 + 33.33% of the heap (66.68% cumulative) + Allocated at { + #01: A (A.cpp:99) + } +} + +Live { + 8 blocks in heap block record 3 of 3 + 16,384 bytes (8,200 requested / 8,184 slop) + Individual block sizes: 2,048 x 8 + 33.32% of the heap (100.00% cumulative) + Allocated at { + #01: C (C.cpp:99) + } +} + +#----------------------------------------------------------------- + +Summary { + Total: 49,176 bytes in 18 blocks +} + diff --git a/memory/replace/dmd/test/script-sort-by.json.gz b/memory/replace/dmd/test/script-sort-by.json.gz Binary files differnew file mode 100644 index 0000000000..f665c945c2 --- /dev/null +++ b/memory/replace/dmd/test/script-sort-by.json.gz diff --git a/memory/replace/dmd/test/test_dmd.js b/memory/replace/dmd/test/test_dmd.js new file mode 100644 index 0000000000..e092c79a02 --- /dev/null +++ b/memory/replace/dmd/test/test_dmd.js @@ -0,0 +1,232 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*-*/ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const { FileUtils } = ChromeUtils.importESModule( + "resource://gre/modules/FileUtils.sys.mjs" +); + +// The xpcshell test harness sets PYTHON so we can read it here. +var gPythonName = Services.env.get("PYTHON"); + +const gCwd = Services.dirsvc.get("CurWorkD", Ci.nsIFile); + +function getRelativeFile(...components) { + return new FileUtils.File(PathUtils.join(gCwd.path, ...components)); +} + +// If we're testing locally, the executable file is in "CurProcD". Otherwise, +// it is in another location that we have to find. +function getExecutable(aFilename) { + let file = new FileUtils.File( + PathUtils.join(Services.dirsvc.get("CurProcD", Ci.nsIFile).path, aFilename) + ); + if (!file.exists()) { + file = gCwd.clone(); + while (file.path.includes("xpcshell")) { + file = file.parent; + } + file.append("bin"); + file.append(aFilename); + } + return file; +} + +var gIsWindows = Services.appinfo.OS === "WINNT"; +var gDmdTestFile = getExecutable("SmokeDMD" + (gIsWindows ? ".exe" : "")); + +var gDmdScriptFile = getExecutable("dmd.py"); + +var gScanTestFile = getRelativeFile("scan-test.py"); + +function readFile(aFile) { + let fstream = Cc["@mozilla.org/network/file-input-stream;1"].createInstance( + Ci.nsIFileInputStream + ); + let cstream = Cc["@mozilla.org/intl/converter-input-stream;1"].createInstance( + Ci.nsIConverterInputStream + ); + fstream.init(aFile, -1, 0, 0); + cstream.init(fstream, "UTF-8", 0, 0); + + let data = ""; + let str = {}; + let read = 0; + do { + // Read as much as we can and put it in str.value. + read = cstream.readString(0xffffffff, str); + data += str.value; + } while (read != 0); + + cstream.close(); // this closes fstream + return data.replace(/\r/g, ""); // normalize line endings +} + +function runProcess(aExeFile, aArgs) { + let process = Cc["@mozilla.org/process/util;1"].createInstance(Ci.nsIProcess); + process.init(aExeFile); + process.run(/* blocking = */ true, aArgs, aArgs.length); + return process.exitValue; +} + +function test(aPrefix, aArgs) { + // DMD writes the JSON files to CurWorkD, so we do likewise here with + // |actualFile| for consistency. It is removed once we've finished. + let expectedFile = getRelativeFile(`${aPrefix}-expected.txt`); + let actualFile = getRelativeFile(`${aPrefix}-actual.txt`); + + // Run dmd.py on the JSON file, producing |actualFile|. + + let args = [ + gDmdScriptFile.path, + "--filter-stacks-for-testing", + "-o", + actualFile.path, + ].concat(aArgs); + + runProcess(new FileUtils.File(gPythonName), args); + + // Compare |expectedFile| with |actualFile|. We produce nice diffs with + // /usr/bin/diff on systems that have it (Mac and Linux). Otherwise (Windows) + // we do a string compare of the file contents and then print them both if + // they don't match. + + let success; + try { + let rv = runProcess(new FileUtils.File("/usr/bin/diff"), [ + "-u", + expectedFile.path, + actualFile.path, + ]); + success = rv == 0; + } catch (e) { + let expectedData = readFile(expectedFile); + let actualData = readFile(actualFile); + success = expectedData === actualData; + if (!success) { + expectedData = expectedData.split("\n"); + actualData = actualData.split("\n"); + for (let i = 0; i < expectedData.length; i++) { + print("EXPECTED:" + expectedData[i]); + } + for (let i = 0; i < actualData.length; i++) { + print(" ACTUAL:" + actualData[i]); + } + } + } + + ok(success, aPrefix); + + actualFile.remove(true); +} + +// Run scan-test.py on the JSON file and see if it succeeds. +function scanTest(aJsonFilePath, aExtraArgs) { + let args = [gScanTestFile.path, aJsonFilePath].concat(aExtraArgs); + + return runProcess(new FileUtils.File(gPythonName), args) == 0; +} + +function run_test() { + let jsonFile, jsonFile2; + + // These tests do complete end-to-end testing of DMD, i.e. both the C++ code + // that generates the JSON output, and the script that post-processes that + // output. + // + // Run these synchronously, because test() updates the complete*.json files + // in-place (to fix stacks) when it runs dmd.py, and that's not safe to do + // asynchronously. + + Services.env.set("DMD", "1"); + + runProcess(gDmdTestFile, []); + + function test2(aTestName, aMode) { + let name = "complete-" + aTestName + "-" + aMode; + jsonFile = getRelativeFile(`${name}.json`); + test(name, [jsonFile.path]); + jsonFile.remove(true); + } + + // Please keep this in sync with RunTests() in SmokeDMD.cpp. + + test2("empty", "live"); + test2("empty", "dark-matter"); + test2("empty", "cumulative"); + + test2("full1", "live"); + test2("full1", "dark-matter"); + + test2("full2", "dark-matter"); + test2("full2", "cumulative"); + + test2("partial", "live"); + + // Heap scan testing. + jsonFile = getRelativeFile("basic-scan.json"); + ok(scanTest(jsonFile.path), "Basic scan test"); + + let is64Bit = Services.appinfo.is64Bit; + let basicScanFileName = "basic-scan-" + (is64Bit ? "64" : "32"); + test(basicScanFileName, ["--clamp-contents", jsonFile.path]); + ok( + scanTest(jsonFile.path, ["--clamp-contents"]), + "Scan with address clamping" + ); + + // Run the generic test a second time to ensure that the first time produced + // valid JSON output. "--clamp-contents" is passed in so we don't have to have + // more variants of the files. + test(basicScanFileName, ["--clamp-contents", jsonFile.path]); + jsonFile.remove(true); + + // These tests only test the post-processing script. They use hand-written + // JSON files as input. Ideally the JSON files would contain comments + // explaining how they work, but JSON doesn't allow comments, so I've put + // explanations here. + + // This just tests that stack traces of various lengths are truncated + // appropriately. The number of records in the output is different for each + // of the tested values. + jsonFile = getRelativeFile("script-max-frames.json"); + test("script-max-frames-8", [jsonFile.path]); // --max-frames=8 is the default + test("script-max-frames-3", [ + "--max-frames=3", + "--no-fix-stacks", + jsonFile.path, + ]); + test("script-max-frames-1", ["--max-frames=1", jsonFile.path]); + + // This file has three records that are shown in a different order for each + // of the different sort values. It also tests the handling of gzipped JSON + // files. + jsonFile = getRelativeFile("script-sort-by.json.gz"); + test("script-sort-by-usable", ["--sort-by=usable", jsonFile.path]); + test("script-sort-by-req", [ + "--sort-by=req", + "--no-fix-stacks", + jsonFile.path, + ]); + test("script-sort-by-slop", ["--sort-by=slop", jsonFile.path]); + test("script-sort-by-num-blocks", ["--sort-by=num-blocks", jsonFile.path]); + + // This file has several real stack traces taken from Firefox execution, each + // of which tests a different allocator function (or functions). + jsonFile = getRelativeFile("script-ignore-alloc-fns.json"); + test("script-ignore-alloc-fns", ["--ignore-alloc-fns", jsonFile.path]); + + // This tests "live"-mode diffs. + jsonFile = getRelativeFile("script-diff-live1.json"); + jsonFile2 = getRelativeFile("script-diff-live2.json"); + test("script-diff-live", [jsonFile.path, jsonFile2.path]); + + // This tests "dark-matter"-mode diffs. + jsonFile = getRelativeFile("script-diff-dark-matter1.json"); + jsonFile2 = getRelativeFile("script-diff-dark-matter2.json"); + test("script-diff-dark-matter", [jsonFile.path, jsonFile2.path]); +} diff --git a/memory/replace/dmd/test/xpcshell.toml b/memory/replace/dmd/test/xpcshell.toml new file mode 100644 index 0000000000..ef05a8ec26 --- /dev/null +++ b/memory/replace/dmd/test/xpcshell.toml @@ -0,0 +1,35 @@ +[DEFAULT] +support-files = [ + "basic-scan-32-expected.txt", + "basic-scan-64-expected.txt", + "complete-empty-live-expected.txt", + "complete-empty-dark-matter-expected.txt", + "complete-empty-cumulative-expected.txt", + "complete-full1-live-expected.txt", + "complete-full1-dark-matter-expected.txt", + "complete-full2-dark-matter-expected.txt", + "complete-full2-cumulative-expected.txt", + "complete-partial-live-expected.txt", + "scan-test.py", + "script-max-frames.json", + "script-max-frames-8-expected.txt", + "script-max-frames-3-expected.txt", + "script-max-frames-1-expected.txt", + "script-sort-by.json.gz", + "script-sort-by-usable-expected.txt", + "script-sort-by-req-expected.txt", + "script-sort-by-slop-expected.txt", + "script-sort-by-num-blocks-expected.txt", + "script-ignore-alloc-fns.json", + "script-ignore-alloc-fns-expected.txt", + "script-diff-live1.json", + "script-diff-live2.json", + "script-diff-live-expected.txt", + "script-diff-dark-matter1.json", + "script-diff-dark-matter2.json", + "script-diff-dark-matter-expected.txt", +] + +["test_dmd.js"] +dmd = true +skip-if = ["!(os=='linux' || os=='mac' || (os=='win' && !pgo))"] diff --git a/memory/replace/logalloc/LogAlloc.cpp b/memory/replace/logalloc/LogAlloc.cpp new file mode 100644 index 0000000000..a976b0c674 --- /dev/null +++ b/memory/replace/logalloc/LogAlloc.cpp @@ -0,0 +1,238 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <cstdlib> +#include <fcntl.h> + +#ifdef _WIN32 +# include <windows.h> +# include <io.h> +# include <process.h> +#else +# include <unistd.h> +# include <pthread.h> +#endif + +#include "replace_malloc.h" +#include "FdPrintf.h" +#include "Mutex.h" + +static malloc_table_t sFuncs; +static intptr_t sFd = 0; +static bool sStdoutOrStderr = false; + +static Mutex sMutex MOZ_UNANNOTATED; + +#ifndef _WIN32 +static void prefork() MOZ_NO_THREAD_SAFETY_ANALYSIS { sMutex.Lock(); } +static void postfork_parent() MOZ_NO_THREAD_SAFETY_ANALYSIS { sMutex.Unlock(); } +static void postfork_child() { sMutex.Init(); } +#endif + +static size_t GetPid() { return size_t(getpid()); } + +static size_t GetTid() { +#if defined(_WIN32) + return size_t(GetCurrentThreadId()); +#else + return size_t(pthread_self()); +#endif +} + +#ifdef ANDROID +/* Android doesn't have pthread_atfork defined in pthread.h */ +extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void), + void (*)(void)); +#endif + +class LogAllocBridge : public ReplaceMallocBridge { + virtual void InitDebugFd(mozilla::DebugFdRegistry& aRegistry) override { + if (!sStdoutOrStderr) { + aRegistry.RegisterHandle(sFd); + } + } +}; + +/* Do a simple, text-form, log of all calls to replace-malloc functions. + * Use locking to guarantee that an allocation that did happen is logged + * before any other allocation/free happens. + */ + +static void* replace_malloc(size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.malloc(aSize); + FdPrintf(sFd, "%zu %zu malloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr); + return ptr; +} + +static int replace_posix_memalign(void** aPtr, size_t aAlignment, + size_t aSize) { + MutexAutoLock lock(sMutex); + int ret = sFuncs.posix_memalign(aPtr, aAlignment, aSize); + FdPrintf(sFd, "%zu %zu posix_memalign(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, (ret == 0) ? *aPtr : nullptr); + return ret; +} + +static void* replace_aligned_alloc(size_t aAlignment, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.aligned_alloc(aAlignment, aSize); + FdPrintf(sFd, "%zu %zu aligned_alloc(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, ptr); + return ptr; +} + +static void* replace_calloc(size_t aNum, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.calloc(aNum, aSize); + FdPrintf(sFd, "%zu %zu calloc(%zu,%zu)=%p\n", GetPid(), GetTid(), aNum, aSize, + ptr); + return ptr; +} + +static void* replace_realloc(void* aPtr, size_t aSize) { + MutexAutoLock lock(sMutex); + void* new_ptr = sFuncs.realloc(aPtr, aSize); + FdPrintf(sFd, "%zu %zu realloc(%p,%zu)=%p\n", GetPid(), GetTid(), aPtr, aSize, + new_ptr); + return new_ptr; +} + +static void replace_free(void* aPtr) { + MutexAutoLock lock(sMutex); + FdPrintf(sFd, "%zu %zu free(%p)\n", GetPid(), GetTid(), aPtr); + sFuncs.free(aPtr); +} + +static void* replace_memalign(size_t aAlignment, size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.memalign(aAlignment, aSize); + FdPrintf(sFd, "%zu %zu memalign(%zu,%zu)=%p\n", GetPid(), GetTid(), + aAlignment, aSize, ptr); + return ptr; +} + +static void* replace_valloc(size_t aSize) { + MutexAutoLock lock(sMutex); + void* ptr = sFuncs.valloc(aSize); + FdPrintf(sFd, "%zu %zu valloc(%zu)=%p\n", GetPid(), GetTid(), aSize, ptr); + return ptr; +} + +static void replace_jemalloc_stats(jemalloc_stats_t* aStats, + jemalloc_bin_stats_t* aBinStats) { + MutexAutoLock lock(sMutex); + sFuncs.jemalloc_stats_internal(aStats, aBinStats); + FdPrintf(sFd, "%zu %zu jemalloc_stats()\n", GetPid(), GetTid()); +} + +void replace_init(malloc_table_t* aTable, ReplaceMallocBridge** aBridge) { + /* Initialize output file descriptor from the MALLOC_LOG environment + * variable. Numbers up to 9999 are considered as a preopened file + * descriptor number. Other values are considered as a file name. */ +#ifdef _WIN32 + wchar_t* log = _wgetenv(L"MALLOC_LOG"); +#else + char* log = getenv("MALLOC_LOG"); +#endif + if (log && *log) { + int fd = 0; + const auto* fd_num = log; + while (*fd_num) { + /* Reject non digits. */ + if (*fd_num < '0' || *fd_num > '9') { + fd = -1; + break; + } + fd = fd * 10 + (*fd_num - '0'); + /* Reject values >= 10000. */ + if (fd >= 10000) { + fd = -1; + break; + } + fd_num++; + } + if (fd == 1 || fd == 2) { + sStdoutOrStderr = true; + } +#ifdef _WIN32 + // See comment in FdPrintf.h as to why CreateFile is used. + HANDLE handle; + if (fd > 0) { + handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); + } else { + handle = + CreateFileW(log, FILE_APPEND_DATA, FILE_SHARE_READ | FILE_SHARE_WRITE, + nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr); + } + if (handle != INVALID_HANDLE_VALUE) { + sFd = reinterpret_cast<intptr_t>(handle); + } +#else + if (fd == -1) { + fd = open(log, O_WRONLY | O_CREAT | O_APPEND, 0644); + } + if (fd > 0) { + sFd = fd; + } +#endif + } + + // Don't initialize if we weren't passed a valid MALLOC_LOG. + if (sFd == 0) { + return; + } + + sMutex.Init(); + static LogAllocBridge bridge; + sFuncs = *aTable; +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE +#define MALLOC_DECL(name, ...) aTable->name = replace_##name; +#include "malloc_decls.h" + aTable->jemalloc_stats_internal = replace_jemalloc_stats; + if (!getenv("MALLOC_LOG_MINIMAL")) { + aTable->posix_memalign = replace_posix_memalign; + aTable->aligned_alloc = replace_aligned_alloc; + aTable->valloc = replace_valloc; + } + *aBridge = &bridge; + +#ifndef _WIN32 + /* When another thread has acquired a lock before forking, the child + * process will inherit the lock state but the thread, being nonexistent + * in the child process, will never release it, leading to a dead-lock + * whenever the child process gets the lock. We thus need to ensure no + * other thread is holding the lock before forking, by acquiring it + * ourselves, and releasing it after forking in the parent process and + * resetting it to its initial state in the child process. The latter is + * important because some implementations (notably macOS) prevent a lock from + * being unlocked by a different thread than the one which locked it in the + * first place. + * Windows doesn't have this problem since there is no fork(). + * The real allocator, however, might be doing the same thing (jemalloc + * does). But pthread_atfork `prepare` handlers (first argument) are + * processed in reverse order they were established. But replace_init + * runs before the real allocator has had any chance to initialize and + * call pthread_atfork itself. This leads to its prefork running before + * ours. This leads to a race condition that can lead to a deadlock like + * the following: + * - thread A forks. + * - libc calls real allocator's prefork, so thread A holds the real + * allocator lock. + * - thread B calls malloc, which calls our replace_malloc. + * - consequently, thread B holds our lock. + * - thread B then proceeds to call the real allocator's malloc, and + * waits for the real allocator's lock, which thread A holds. + * - libc calls our prefork, so thread A waits for our lock, which + * thread B holds. + * To avoid this race condition, the real allocator's prefork must be + * called after ours, which means it needs to be registered before ours. + * So trick the real allocator into initializing itself without more side + * effects by calling malloc with a size it can't possibly allocate. */ + sFuncs.malloc(-1); + pthread_atfork(prefork, postfork_parent, postfork_child); +#endif +} diff --git a/memory/replace/logalloc/README b/memory/replace/logalloc/README new file mode 100644 index 0000000000..c2e8cf66ce --- /dev/null +++ b/memory/replace/logalloc/README @@ -0,0 +1,95 @@ +Logalloc is a replace-malloc library for Firefox (see +memory/build/replace_malloc.h) that dumps a log of memory allocations to a +given file descriptor or file name. That log can then be replayed against +Firefox's default memory allocator independently or through another +replace-malloc library, allowing the testing of other allocators under the +exact same workload. + +To get an allocation log the following environment variable when starting +Firefox: + MALLOC_LOG=/path/to/log-file + or + MALLOC_LOG=number + +When MALLOC_LOG is a number below 10000, it is considered as a file +descriptor number that is fed to Firefox when it is started. Otherwise, +it is considered as a file name. + +As those allocation logs can grow large quite quickly, it can be useful +to pipe the output to a compression tool. + +MALLOC_LOG=1 would send to Firefox's stdout, MALLOC_LOG=2 would send to +its stderr. Since in both cases that could be mixed with other output +from Firefox, it is usually better to use another file descriptor +by shell redirections, such as: + + MALLOC_LOG=3 firefox 3>&1 1>&2 | gzip -c > log.gz + +(3>&1 copies the `| gzip` pipe file descriptor to file descriptor #3, 1>&2 +then copies stderr to stdout. This leads to: fd1 and fd2 sending to stderr +of the parent process (the shell), and fd3 sending to gzip.) + +Each line of the allocations log is formatted as follows: + <pid> <tid> <function>([<args>])[=<result>] +where <args> is a comma separated list of values. The number of <args> and +the presence of <result> depend on the <function>. + +Example log: + 18545 18545 malloc(32)=0x7f90495120e0 + 18545 18545 calloc(1,148)=0x7f9049537480 + 18545 18545 realloc(0x7f90495120e0,64)=0x7f9049536680 + 18545 18545 posix_memalign(256,240)=0x7f9049583300 + 18545 18545 jemalloc_stats() + 18545 18545 free(0x7f9049536680) + +This log can be replayed with the logalloc-replay tool in +memory/replace/logalloc/replay. However, as the goal of that tool is to +reproduce the recorded memory allocations, it needs to avoid as much as +possible doing its own allocations for bookkeeping. Reading the logs as +they are would require data structures and memory allocations. As a +consequence, the logs need to be preprocessed beforehand. + +The logalloc_munge.py script is responsible for that preprocessing. It simply +takes a raw log on its stdin, and outputs the preprocessed log on its stdout. +It replaces pointer addresses with indexes the logalloc-replay tool can use +in a large (almost) linear array of allocation tracking slots (prefixed with +'#'). It also replaces the pids with numbers starting from 1 (such as the +first seen pid number is 1, the second is 2, etc.). + +The above example log would become the following, once preprocessed: + 1 1 malloc(32)=#1 + 1 1 calloc(1,148)=#2 + 1 1 realloc(#1,64)=#1 + 1 1 posix_memalign(256,240)=#3 + 1 1 jemalloc_stats() + 1 1 free(#1) + +The logalloc-replay tool then takes the preprocessed log on its stdin and +replays the allocations printed there, but will only replay those with the +same process id as the first line (which normally is 1). + +As the log files are simple text files, though, it is easy to separate out +the different processes log with e.g. grep, and feed the separate processes +logs to logalloc-replay. + +The logalloc-replay program won't output anything unless jemalloc_stats +records appears in the log. You can expect those to be recorded when going +to about:memory in Firefox, but they can also be added after preprocessing. + +Here is an example of what one can do: + + gunzip -c log.gz | python logalloc_munge.py | \ + awk '$1 == "2" { print $0 } !(NR % 10000) { print "2 1 jemalloc_stats()" }' | \ + ./logalloc-replay + +The above command replays the allocations of process #2, with some stats +output every 10000 records. + +The logalloc-replay tool itself being hooked with replace-malloc, it is possible +to set LD_PRELOAD/DYLD_INSERT_LIBRARIES/MOZ_REPLACE_MALLOC_LIB and replay a log +through a different allocator. For example: + + LD_PRELOAD=libreplace_jemalloc.so logalloc-replay < log + +Will replay the log against jemalloc4 (which is, as of writing, what +libreplace_jemalloc.so contains). diff --git a/memory/replace/logalloc/moz.build b/memory/replace/logalloc/moz.build new file mode 100644 index 0000000000..e0588cfb16 --- /dev/null +++ b/memory/replace/logalloc/moz.build @@ -0,0 +1,36 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ReplaceMalloc("logalloc") + +SOURCES += [ + "LogAlloc.cpp", +] + +# FdPrintf is statically linked if we're using static linking to mozjemalloc +# and PHC is also compiled-in. +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"] or not CONFIG["MOZ_PHC"]: + SOURCES += [ + "/memory/build/FdPrintf.cpp", + ] + +DisableStlWrapping() +NO_PGO = True +DEFINES["MOZ_NO_MOZALLOC"] = True + +LOCAL_INCLUDES += [ + "/memory/build", +] + +# Android doesn't have pthread_atfork, but we have our own in mozglue. +if CONFIG["OS_TARGET"] == "Android" and FORCE_SHARED_LIB: + USE_LIBS += [ + "mozglue", + ] + +DIRS += [ + "replay", +] diff --git a/memory/replace/logalloc/replay/Makefile.in b/memory/replace/logalloc/replay/Makefile.in new file mode 100644 index 0000000000..73659add98 --- /dev/null +++ b/memory/replace/logalloc/replay/Makefile.in @@ -0,0 +1,48 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ifdef MOZ_CODE_COVERAGE +SKIP = 1 +endif + +ifdef CROSS_COMPILE +SKIP = 1 +endif + +ifneq ($(SKIP),1) + +ifeq ($(OS_TARGET),WINNT) +LOGALLOC_VAR = MOZ_REPLACE_MALLOC_LIB +else +ifeq ($(OS_TARGET),Darwin) +LOGALLOC_VAR = DYLD_INSERT_LIBRARIES +else +LOGALLOC_VAR = LD_PRELOAD +endif +endif + +ifndef MOZ_REPLACE_MALLOC_STATIC +LOGALLOC = $(LOGALLOC_VAR)=$(CURDIR)/../$(DLL_PREFIX)logalloc$(DLL_SUFFIX) +endif + +expected_output.log: $(srcdir)/replay.log +# The logalloc-replay program will only replay entries from the first pid, +# so the expected output only contains entries beginning with "1 " + grep "^1 " $< > $@ + +check:: $(srcdir)/replay.log expected_output.log $(srcdir)/expected_output_minimal.log +# Test with MALLOC_LOG as a file descriptor number +# We filter out anything happening before the first jemalloc_stats (first +# command in replay.log) because starting with libstdc++ 5, a static +# initializer in the STL allocates memory, which we obviously don't have +# in expected_output.log. + MALLOC_LOG=1 $(LOGALLOC) ./$(PROGRAM) < $< | sed -n '/jemalloc_stats/,$$p' | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - expected_output.log +# Test with MALLOC_LOG as a file name + $(RM) test_output.log + MALLOC_LOG=test_output.log $(LOGALLOC) ./$(PROGRAM) < $< + sed -n '/jemalloc_stats/,$$p' test_output.log | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - expected_output.log + + MALLOC_LOG=1 MALLOC_LOG_MINIMAL=1 $(LOGALLOC) ./$(PROGRAM) < $< | sed -n '/jemalloc_stats/,$$p' | $(PYTHON3) $(srcdir)/logalloc_munge.py | diff -w - $(srcdir)/expected_output_minimal.log + +endif diff --git a/memory/replace/logalloc/replay/Replay.cpp b/memory/replace/logalloc/replay/Replay.cpp new file mode 100644 index 0000000000..b5ad0c540e --- /dev/null +++ b/memory/replace/logalloc/replay/Replay.cpp @@ -0,0 +1,1159 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define MOZ_MEMORY_IMPL +#include "mozmemory_wrap.h" + +#ifdef _WIN32 +# include <windows.h> +# include <io.h> +typedef intptr_t ssize_t; +#else +# include <sys/mman.h> +# include <unistd.h> +#endif +#ifdef XP_LINUX +# include <fcntl.h> +# include <stdlib.h> +#endif +#include <algorithm> +#include <cmath> +#include <cstdio> +#include <cstring> + +#include "mozilla/Assertions.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/Maybe.h" +#include "FdPrintf.h" + +using namespace mozilla; + +static void die(const char* message) { + /* Here, it doesn't matter that fprintf may allocate memory. */ + fprintf(stderr, "%s\n", message); + exit(1); +} + +#ifdef XP_LINUX +static size_t sPageSize = []() { return sysconf(_SC_PAGESIZE); }(); +#endif + +/* We don't want to be using malloc() to allocate our internal tracking + * data, because that would change the parameters of what is being measured, + * so we want to use data types that directly use mmap/VirtualAlloc. */ +template <typename T, size_t Len> +class MappedArray { + public: + MappedArray() : mPtr(nullptr) { +#ifdef XP_LINUX + MOZ_RELEASE_ASSERT(!((sizeof(T) * Len) & (sPageSize - 1)), + "MappedArray size must be a multiple of the page size"); +#endif + } + + ~MappedArray() { + if (mPtr) { +#ifdef _WIN32 + VirtualFree(mPtr, sizeof(T) * Len, MEM_RELEASE); +#elif defined(XP_LINUX) + munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(mPtr) - + sPageSize), + sizeof(T) * Len + sPageSize * 2); +#else + munmap(mPtr, sizeof(T) * Len); +#endif + } + } + + T& operator[](size_t aIndex) const { + if (mPtr) { + return mPtr[aIndex]; + } + +#ifdef _WIN32 + mPtr = reinterpret_cast<T*>(VirtualAlloc( + nullptr, sizeof(T) * Len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + if (mPtr == nullptr) { + die("VirtualAlloc error"); + } +#else + size_t data_size = sizeof(T) * Len; + size_t size = data_size; +# ifdef XP_LINUX + // See below + size += sPageSize * 2; +# endif + mPtr = reinterpret_cast<T*>(mmap(nullptr, size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0)); + if (mPtr == MAP_FAILED) { + die("Mmap error"); + } +# ifdef XP_LINUX + // On Linux we request a page on either side of the allocation and + // mprotect them. This prevents mappings in /proc/self/smaps from being + // merged and allows us to parse this file to calculate the allocator's RSS. + MOZ_ASSERT(0 == mprotect(mPtr, sPageSize, 0)); + MOZ_ASSERT(0 == mprotect(reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(mPtr) + data_size + + sPageSize), + sPageSize, 0)); + mPtr = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(mPtr) + sPageSize); +# endif +#endif + return mPtr[aIndex]; + } + + bool ownsMapping(uintptr_t addr) const { return addr == (uintptr_t)mPtr; } + + bool allocated() const { return !!mPtr; } + + private: + mutable T* mPtr; +}; + +/* Type for records of allocations. */ +struct MemSlot { + void* mPtr; + + // mRequest is only valid if mPtr is non-null. It doesn't need to be cleared + // when memory is freed or realloc()ed. + size_t mRequest; +}; + +/* An almost infinite list of slots. + * In essence, this is a linked list of arrays of groups of slots. + * Each group is 1MB. On 64-bits, one group allows to store 64k allocations. + * Each MemSlotList instance can store 1023 such groups, which means more + * than 67M allocations. In case more would be needed, we chain to another + * MemSlotList, and so on. + * Using 1023 groups makes the MemSlotList itself page sized on 32-bits + * and 2 pages-sized on 64-bits. + */ +class MemSlotList { + static constexpr size_t kGroups = 1024 - 1; + static constexpr size_t kGroupSize = (1024 * 1024) / sizeof(MemSlot); + + MappedArray<MemSlot, kGroupSize> mSlots[kGroups]; + MappedArray<MemSlotList, 1> mNext; + + public: + MemSlot& operator[](size_t aIndex) const { + if (aIndex < kGroupSize * kGroups) { + return mSlots[aIndex / kGroupSize][aIndex % kGroupSize]; + } + aIndex -= kGroupSize * kGroups; + return mNext[0][aIndex]; + } + + // Ask if any of the memory-mapped buffers use this range. + bool ownsMapping(uintptr_t aStart) const { + for (const auto& slot : mSlots) { + if (slot.allocated() && slot.ownsMapping(aStart)) { + return true; + } + } + return mNext.ownsMapping(aStart) || + (mNext.allocated() && mNext[0].ownsMapping(aStart)); + } +}; + +/* Helper class for memory buffers */ +class Buffer { + public: + Buffer() : mBuf(nullptr), mLength(0) {} + + Buffer(const void* aBuf, size_t aLength) + : mBuf(reinterpret_cast<const char*>(aBuf)), mLength(aLength) {} + + /* Constructor for string literals. */ + template <size_t Size> + explicit Buffer(const char (&aStr)[Size]) : mBuf(aStr), mLength(Size - 1) {} + + /* Returns a sub-buffer up-to but not including the given aNeedle character. + * The "parent" buffer itself is altered to begin after the aNeedle + * character. + * If the aNeedle character is not found, return the entire buffer, and empty + * the "parent" buffer. */ + Buffer SplitChar(char aNeedle) { + char* buf = const_cast<char*>(mBuf); + char* c = reinterpret_cast<char*>(memchr(buf, aNeedle, mLength)); + if (!c) { + return Split(mLength); + } + + Buffer result = Split(c - buf); + // Remove the aNeedle character itself. + Split(1); + return result; + } + + // Advance to the position after aNeedle. This is like SplitChar but does not + // return the skipped portion. + void Skip(char aNeedle, unsigned nTimes = 1) { + for (unsigned i = 0; i < nTimes; i++) { + SplitChar(aNeedle); + } + } + + void SkipWhitespace() { + while (mLength > 0) { + if (!IsSpace(mBuf[0])) { + break; + } + mBuf++; + mLength--; + } + } + + static bool IsSpace(char c) { + switch (c) { + case ' ': + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + return true; + } + return false; + } + + /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is + * amputated of those aLength characters. If the "parent" buffer is smaller + * than aLength, then its length is used instead. */ + Buffer Split(size_t aLength) { + Buffer result(mBuf, std::min(aLength, mLength)); + mLength -= result.mLength; + mBuf += result.mLength; + return result; + } + + /* Move the buffer (including its content) to the memory address of the aOther + * buffer. */ + void Slide(Buffer aOther) { + memmove(const_cast<char*>(aOther.mBuf), mBuf, mLength); + mBuf = aOther.mBuf; + } + + /* Returns whether the two involved buffers have the same content. */ + bool operator==(Buffer aOther) { + return mLength == aOther.mLength && + (mBuf == aOther.mBuf || !strncmp(mBuf, aOther.mBuf, mLength)); + } + + bool operator!=(Buffer aOther) { return !(*this == aOther); } + + /* Returns true if the buffer is not empty. */ + explicit operator bool() { return mLength; } + + char operator[](size_t n) const { return mBuf[n]; } + + /* Returns the memory location of the buffer. */ + const char* get() { return mBuf; } + + /* Returns the memory location of the end of the buffer (technically, the + * first byte after the buffer). */ + const char* GetEnd() { return mBuf + mLength; } + + /* Extend the buffer over the content of the other buffer, assuming it is + * adjacent. */ + void Extend(Buffer aOther) { + MOZ_ASSERT(aOther.mBuf == GetEnd()); + mLength += aOther.mLength; + } + + size_t Length() const { return mLength; } + + private: + const char* mBuf; + size_t mLength; +}; + +/* Helper class to read from a file descriptor line by line. */ +class FdReader { + public: + explicit FdReader(int aFd, bool aNeedClose = false) + : mFd(aFd), + mNeedClose(aNeedClose), + mData(&mRawBuf, 0), + mBuf(&mRawBuf, sizeof(mRawBuf)) {} + + FdReader(FdReader&& aOther) noexcept + : mFd(aOther.mFd), + mNeedClose(aOther.mNeedClose), + mData(&mRawBuf, 0), + mBuf(&mRawBuf, sizeof(mRawBuf)) { + memcpy(mRawBuf, aOther.mRawBuf, sizeof(mRawBuf)); + aOther.mFd = -1; + aOther.mNeedClose = false; + aOther.mData = Buffer(); + aOther.mBuf = Buffer(); + } + + FdReader& operator=(const FdReader&) = delete; + FdReader(const FdReader&) = delete; + + ~FdReader() { + if (mNeedClose) { + close(mFd); + } + } + + /* Read a line from the file descriptor and returns it as a Buffer instance */ + Buffer ReadLine() { + while (true) { + Buffer result = mData.SplitChar('\n'); + + /* There are essentially three different cases here: + * - '\n' was found "early". In this case, the end of the result buffer + * is before the beginning of the mData buffer (since SplitChar + * amputated it). + * - '\n' was found as the last character of mData. In this case, mData + * is empty, but still points at the end of mBuf. result points to what + * used to be in mData, without the last character. + * - '\n' was not found. In this case too, mData is empty and points at + * the end of mBuf. But result points to the entire buffer that used to + * be pointed by mData. + * Only in the latter case do both result and mData's end match, and it's + * the only case where we need to refill the buffer. + */ + if (result.GetEnd() != mData.GetEnd()) { + return result; + } + + /* Since SplitChar emptied mData, make it point to what it had before. */ + mData = result; + + /* And move it to the beginning of the read buffer. */ + mData.Slide(mBuf); + + FillBuffer(); + + if (!mData) { + return Buffer(); + } + } + } + + private: + /* Fill the read buffer. */ + void FillBuffer() { + size_t size = mBuf.GetEnd() - mData.GetEnd(); + Buffer remainder(mData.GetEnd(), size); + + ssize_t len = 1; + while (remainder && len > 0) { + len = ::read(mFd, const_cast<char*>(remainder.get()), size); + if (len < 0) { + die("Read error"); + } + size -= len; + mData.Extend(remainder.Split(len)); + } + } + + /* File descriptor to read from. */ + int mFd; + bool mNeedClose; + + /* Part of data that was read from the file descriptor but not returned with + * ReadLine yet. */ + Buffer mData; + /* Buffer representation of mRawBuf */ + Buffer mBuf; + /* read() buffer */ + char mRawBuf[4096]; +}; + +MOZ_BEGIN_EXTERN_C + +/* Function declarations for all the replace_malloc _impl functions. + * See memory/build/replace_malloc.c */ +#define MALLOC_DECL(name, return_type, ...) \ + return_type name##_impl(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_MALLOC +#include "malloc_decls.h" + +#define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__); +#define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC +#include "malloc_decls.h" + +#ifdef ANDROID + +/* mozjemalloc and jemalloc use pthread_atfork, which Android doesn't have. + * While gecko has one in libmozglue, the replay program can't use that. + * Since we're not going to fork anyways, make it a dummy function. */ +int pthread_atfork(void (*aPrepare)(void), void (*aParent)(void), + void (*aChild)(void)) { + return 0; +} +#endif + +MOZ_END_EXTERN_C + +template <unsigned Base = 10> +size_t parseNumber(Buffer aBuf) { + if (!aBuf) { + die("Malformed input"); + } + + size_t result = 0; + for (const char *c = aBuf.get(), *end = aBuf.GetEnd(); c < end; c++) { + result *= Base; + if ((*c >= '0' && *c <= '9')) { + result += *c - '0'; + } else if (Base == 16 && *c >= 'a' && *c <= 'f') { + result += *c - 'a' + 10; + } else if (Base == 16 && *c >= 'A' && *c <= 'F') { + result += *c - 'A' + 10; + } else { + die("Malformed input"); + } + } + return result; +} + +static size_t percent(size_t a, size_t b) { + if (!b) { + return 0; + } + return size_t(round(double(a) / double(b) * 100.0)); +} + +class Distribution { + public: + // Default constructor used for array initialisation. + Distribution() + : mMaxSize(0), + mNextSmallest(0), + mShift(0), + mArrayOffset(0), + mArraySlots(0), + mTotalRequests(0), + mRequests{0} {} + + Distribution(size_t max_size, size_t next_smallest, size_t bucket_size) + : mMaxSize(max_size), + mNextSmallest(next_smallest), + mShift(CeilingLog2(bucket_size)), + mArrayOffset(1 + next_smallest), + mArraySlots((max_size - next_smallest) >> mShift), + mTotalRequests(0), + mRequests{ + 0, + } { + MOZ_ASSERT(mMaxSize); + MOZ_RELEASE_ASSERT(mArraySlots <= MAX_NUM_BUCKETS); + } + + Distribution& operator=(const Distribution& aOther) = default; + + void addRequest(size_t request) { + MOZ_ASSERT(mMaxSize); + + mRequests[(request - mArrayOffset) >> mShift]++; + mTotalRequests++; + } + + void printDist(intptr_t std_err) { + MOZ_ASSERT(mMaxSize); + + // The translation to turn a slot index into a memory request size. + const size_t array_offset_add = (1 << mShift) + mNextSmallest; + + FdPrintf(std_err, "\n%zu-bin Distribution:\n", mMaxSize); + FdPrintf(std_err, " request : count percent\n"); + size_t range_start = mNextSmallest + 1; + for (size_t j = 0; j < mArraySlots; j++) { + size_t range_end = (j << mShift) + array_offset_add; + FdPrintf(std_err, "%5zu - %5zu: %6zu %6zu%%\n", range_start, range_end, + mRequests[j], percent(mRequests[j], mTotalRequests)); + range_start = range_end + 1; + } + } + + size_t maxSize() const { return mMaxSize; } + + private: + static constexpr size_t MAX_NUM_BUCKETS = 16; + + // If size is zero this distribution is uninitialised. + size_t mMaxSize; + size_t mNextSmallest; + + // Parameters to convert a size into a slot number. + unsigned mShift; + unsigned mArrayOffset; + + // The number of slots. + unsigned mArraySlots; + + size_t mTotalRequests; + size_t mRequests[MAX_NUM_BUCKETS]; +}; + +#ifdef XP_LINUX +struct MemoryMap { + uintptr_t mStart; + uintptr_t mEnd; + bool mReadable; + bool mPrivate; + bool mAnon; + bool mIsStack; + bool mIsSpecial; + size_t mRSS; + + bool IsCandidate() const { + // Candidates mappings are: + // * anonymous + // * they are private (not shared), + // * anonymous or "[heap]" (not another area such as stack), + // + // The only mappings we're falsely including are the .bss segments for + // shared libraries. + return mReadable && mPrivate && mAnon && !mIsStack && !mIsSpecial; + } +}; + +class SMapsReader : private FdReader { + private: + explicit SMapsReader(FdReader&& reader) : FdReader(std::move(reader)) {} + + public: + static Maybe<SMapsReader> open() { + int fd = ::open(FILENAME, O_RDONLY); + if (fd < 0) { + perror(FILENAME); + return mozilla::Nothing(); + } + + return Some(SMapsReader(FdReader(fd, true))); + } + + Maybe<MemoryMap> readMap(intptr_t aStdErr) { + // This is not very tolerant of format changes because things like + // parseNumber will crash if they get a bad value. TODO: make this + // soft-fail. + + Buffer line = ReadLine(); + if (!line) { + return Nothing(); + } + + // We're going to be at the start of an entry, start tokenising the first + // line. + + // Range + Buffer range = line.SplitChar(' '); + uintptr_t range_start = parseNumber<16>(range.SplitChar('-')); + uintptr_t range_end = parseNumber<16>(range); + + // Mode. + Buffer mode = line.SplitChar(' '); + if (mode.Length() != 4) { + FdPrintf(aStdErr, "Couldn't parse SMAPS file\n"); + return Nothing(); + } + bool readable = mode[0] == 'r'; + bool private_ = mode[3] == 'p'; + + // Offset, device and inode. + line.SkipWhitespace(); + bool zero_offset = !parseNumber<16>(line.SplitChar(' ')); + line.SkipWhitespace(); + bool no_device = line.SplitChar(' ') == Buffer("00:00"); + line.SkipWhitespace(); + bool zero_inode = !parseNumber(line.SplitChar(' ')); + bool is_anon = zero_offset && no_device && zero_inode; + + // Filename, or empty for anon mappings. + line.SkipWhitespace(); + Buffer filename = line.SplitChar(' '); + + bool is_stack; + bool is_special; + if (filename && filename[0] == '[') { + is_stack = filename == Buffer("[stack]"); + is_special = filename == Buffer("[vdso]") || + filename == Buffer("[vvar]") || + filename == Buffer("[vsyscall]"); + } else { + is_stack = false; + is_special = false; + } + + size_t rss = 0; + while ((line = ReadLine())) { + Buffer field = line.SplitChar(':'); + if (field == Buffer("VmFlags")) { + // This is the last field, at least in the current format. Break this + // loop to read the next mapping. + break; + } + + if (field == Buffer("Rss")) { + line.SkipWhitespace(); + Buffer value = line.SplitChar(' '); + rss = parseNumber(value) * 1024; + } + } + + return Some(MemoryMap({range_start, range_end, readable, private_, is_anon, + is_stack, is_special, rss})); + } + + static constexpr char FILENAME[] = "/proc/self/smaps"; +}; +#endif // XP_LINUX + +/* Class to handle dispatching the replay function calls to replace-malloc. */ +class Replay { + public: + Replay() { +#ifdef _WIN32 + // See comment in FdPrintf.h as to why native win32 handles are used. + mStdErr = reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE)); +#else + mStdErr = fileno(stderr); +#endif +#ifdef XP_LINUX + BuildInitialMapInfo(); +#endif + } + + void enableSlopCalculation() { mCalculateSlop = true; } + void enableMemset() { mDoMemset = true; } + + MemSlot& operator[](size_t index) const { return mSlots[index]; } + + void malloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::malloc_impl(size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void posix_memalign(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + void* ptr; + if (::posix_memalign_impl(&ptr, alignment, size) == 0) { + aSlot.mPtr = ptr; + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } else { + aSlot.mPtr = nullptr; + } + } + + void aligned_alloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::aligned_alloc_impl(alignment, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void calloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t num = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::calloc_impl(num, size); + if (aSlot.mPtr) { + aSlot.mRequest = num * size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += num * size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void realloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + Buffer dummy = aArgs.SplitChar('#'); + if (dummy) { + die("Malformed input"); + } + size_t slot_id = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + MemSlot& old_slot = (*this)[slot_id]; + void* old_ptr = old_slot.mPtr; + old_slot.mPtr = nullptr; + aSlot.mPtr = ::realloc_impl(old_ptr, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void free(Buffer& aArgs, Buffer& aResult) { + if (aResult) { + die("Malformed input"); + } + mOps++; + Buffer dummy = aArgs.SplitChar('#'); + if (dummy) { + die("Malformed input"); + } + size_t slot_id = parseNumber(aArgs); + MemSlot& slot = (*this)[slot_id]; + ::free_impl(slot.mPtr); + slot.mPtr = nullptr; + } + + void memalign(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t alignment = parseNumber(aArgs.SplitChar(',')); + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::memalign_impl(alignment, size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void valloc(Buffer& aArgs, Buffer& aResult) { + MemSlot& aSlot = SlotForResult(aResult); + mOps++; + size_t size = parseNumber(aArgs); + aSlot.mPtr = ::valloc_impl(size); + if (aSlot.mPtr) { + aSlot.mRequest = size; + MaybeCommit(aSlot); + if (mCalculateSlop) { + mTotalRequestedSize += size; + mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr); + } + } + } + + void jemalloc_stats(Buffer& aArgs, Buffer& aResult) { + if (aArgs || aResult) { + die("Malformed input"); + } + mOps++; + jemalloc_stats_t stats; + // Using a variable length array here is a GCC & Clang extension. But it + // allows us to place this on the stack and not alter jemalloc's profiling. + const size_t num_bins = ::jemalloc_stats_num_bins(); + const size_t MAX_NUM_BINS = 100; + if (num_bins > MAX_NUM_BINS) { + die("Exceeded maximum number of jemalloc stats bins"); + } + jemalloc_bin_stats_t bin_stats[MAX_NUM_BINS] = {{0}}; + ::jemalloc_stats_internal(&stats, bin_stats); + +#ifdef XP_LINUX + size_t rss = get_rss(); +#endif + + size_t num_objects = 0; + size_t num_sloppy_objects = 0; + size_t total_allocated = 0; + size_t total_slop = 0; + size_t large_slop = 0; + size_t large_used = 0; + size_t huge_slop = 0; + size_t huge_used = 0; + size_t bin_slop[MAX_NUM_BINS] = {0}; + + for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { + MemSlot& slot = mSlots[slot_id]; + if (slot.mPtr) { + size_t used = ::malloc_usable_size_impl(slot.mPtr); + size_t slop = used - slot.mRequest; + total_allocated += used; + total_slop += slop; + num_objects++; + if (slop) { + num_sloppy_objects++; + } + + if (used <= + (stats.subpage_max ? stats.subpage_max : stats.quantum_wide_max)) { + // We know that this is an inefficient linear search, but there's a + // small number of bins and this is simple. + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + if (used == bin.size) { + bin_slop[i] += slop; + break; + } + } + } else if (used <= stats.large_max) { + large_slop += slop; + large_used += used; + } else { + huge_slop += slop; + huge_used += used; + } + } + } + + // This formula corresponds to the calculation of wasted (from committed and + // the other parameters) within jemalloc_stats() + size_t committed = stats.allocated + stats.waste + stats.page_cache + + stats.bookkeeping + stats.bin_unused; + + FdPrintf(mStdErr, "\n"); + FdPrintf(mStdErr, "Objects: %9zu\n", num_objects); + FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots); + FdPrintf(mStdErr, "Ops: %9zu\n", mOps); + FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped); + FdPrintf(mStdErr, "committed: %9zu\n", committed); +#ifdef XP_LINUX + if (rss) { + FdPrintf(mStdErr, "rss: %9zu\n", rss); + } +#endif + FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated); + FdPrintf(mStdErr, "waste: %9zu\n", stats.waste); + FdPrintf(mStdErr, "dirty: %9zu\n", stats.page_cache); + FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping); + FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused); + FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max); + FdPrintf(mStdErr, "quantum-wide-max: %9zu\n", stats.quantum_wide_max); + FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.subpage_max); + FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max); + if (mCalculateSlop) { + size_t slop = mTotalAllocatedSize - mTotalRequestedSize; + FdPrintf(mStdErr, + "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n", + slop / 1024, mTotalAllocatedSize / 1024, + percent(slop, mTotalAllocatedSize)); + } + FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n", + num_sloppy_objects, num_objects, + percent(num_sloppy_objects, num_objects)); + FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n", + total_slop / 1024, total_allocated / 1024, + percent(total_slop, total_allocated)); + + FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size", + "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)", + "used (r)"); + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + MOZ_ASSERT(bin.size); + FdPrintf(mStdErr, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n", + bin.size, bin.bytes_unused / 1024, bin.bytes_total / 1024, + percent(bin.bytes_total - bin.bytes_unused, bin.bytes_total), + bin.num_non_full_runs, bin.num_runs, + percent(bin.num_runs - bin.num_non_full_runs, bin.num_runs)); + } + + FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent"); + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + size_t used = bin.bytes_total - bin.bytes_unused; + FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i], used, + percent(bin_slop[i], used)); + } + FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used, + percent(large_slop, large_used)); + FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used, + percent(huge_slop, huge_used)); + + print_distributions(stats, bin_stats); + } + + private: + /* + * Create and print frequency distributions of memory requests. + */ + void print_distributions(jemalloc_stats_t& stats, + jemalloc_bin_stats_t* bin_stats) { + const size_t num_bins = ::jemalloc_stats_num_bins(); + + // We compute distributions for all of the bins for small allocations + // (num_bins) plus two more distributions for larger allocations. + Distribution dists[num_bins + 2]; + + unsigned last_size = 0; + unsigned num_dists = 0; + for (unsigned i = 0; i < num_bins; i++) { + auto& bin = bin_stats[i]; + auto& dist = dists[num_dists++]; + + MOZ_ASSERT(bin.size); + if (bin.size <= 16) { + // 1 byte buckets. + dist = Distribution(bin.size, last_size, 1); + } else if (bin.size <= stats.quantum_max) { + // 4 buckets, (4 bytes per bucket with a 16 byte quantum). + dist = Distribution(bin.size, last_size, stats.quantum / 4); + } else if (bin.size <= stats.quantum_wide_max) { + // 8 buckets, (32 bytes per bucket with a 256 byte quantum-wide). + dist = Distribution(bin.size, last_size, stats.quantum_wide / 8); + } else { + // 16 buckets. + dist = Distribution(bin.size, last_size, (bin.size - last_size) / 16); + } + last_size = bin.size; + } + + // 16 buckets. + dists[num_dists] = Distribution(stats.page_size, last_size, + (stats.page_size - last_size) / 16); + num_dists++; + + // Buckets are 1/4 of the page size (12 buckets). + dists[num_dists] = + Distribution(stats.page_size * 4, stats.page_size, stats.page_size / 4); + num_dists++; + + MOZ_RELEASE_ASSERT(num_dists <= num_bins + 2); + + for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) { + MemSlot& slot = mSlots[slot_id]; + if (slot.mPtr) { + for (size_t i = 0; i < num_dists; i++) { + if (slot.mRequest <= dists[i].maxSize()) { + dists[i].addRequest(slot.mRequest); + break; + } + } + } + } + + for (unsigned i = 0; i < num_dists; i++) { + dists[i].printDist(mStdErr); + } + } + +#ifdef XP_LINUX + size_t get_rss() { + if (mGetRSSFailed) { + return 0; + } + + // On Linux we can determine the RSS of the heap area by examining the + // smaps file. + mozilla::Maybe<SMapsReader> reader = SMapsReader::open(); + if (!reader) { + mGetRSSFailed = true; + return 0; + } + + size_t rss = 0; + while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) { + if (map->IsCandidate() && !mSlots.ownsMapping(map->mStart) && + !InitialMapsContains(map->mStart)) { + rss += map->mRSS; + } + } + + return rss; + } + + bool InitialMapsContains(uintptr_t aRangeStart) { + for (unsigned i = 0; i < mNumInitialMaps; i++) { + MOZ_ASSERT(i < MAX_INITIAL_MAPS); + + if (mInitialMaps[i] == aRangeStart) { + return true; + } + } + return false; + } + + public: + void BuildInitialMapInfo() { + if (mGetRSSFailed) { + return; + } + + Maybe<SMapsReader> reader = SMapsReader::open(); + if (!reader) { + mGetRSSFailed = true; + return; + } + + while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) { + if (map->IsCandidate()) { + if (mNumInitialMaps >= MAX_INITIAL_MAPS) { + FdPrintf(mStdErr, "Too many initial mappings, can't compute RSS\n"); + mGetRSSFailed = false; + return; + } + + mInitialMaps[mNumInitialMaps++] = map->mStart; + } + } + } +#endif + + private: + MemSlot& SlotForResult(Buffer& aResult) { + /* Parse result value and get the corresponding slot. */ + Buffer dummy = aResult.SplitChar('='); + Buffer dummy2 = aResult.SplitChar('#'); + if (dummy || dummy2) { + die("Malformed input"); + } + + size_t slot_id = parseNumber(aResult); + mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1); + + return mSlots[slot_id]; + } + + void MaybeCommit(MemSlot& aSlot) { + if (mDoMemset) { + // Write any byte, 0x55 isn't significant. + memset(aSlot.mPtr, 0x55, aSlot.mRequest); + } + } + + intptr_t mStdErr; + size_t mOps = 0; + + // The number of slots that have been used. It is used to iterate over slots + // without accessing those we haven't initialised. + size_t mNumUsedSlots = 0; + + MemSlotList mSlots; + size_t mTotalRequestedSize = 0; + size_t mTotalAllocatedSize = 0; + // Whether to calculate slop for all allocations over the runtime of a + // process. + bool mCalculateSlop = false; + bool mDoMemset = false; + +#ifdef XP_LINUX + // If we have a failure reading smaps info then this is used to disable that + // feature. + bool mGetRSSFailed = false; + + // The initial memory mappings are recorded here at start up. We exclude + // memory in these mappings when computing RSS. We assume they do not grow + // and that no regions are allocated near them, this is true because they'll + // only record the .bss and .data segments from our binary and shared objects + // or regions that logalloc-replay has created for MappedArrays. + // + // 64 should be enough for anybody. + static constexpr unsigned MAX_INITIAL_MAPS = 64; + uintptr_t mInitialMaps[MAX_INITIAL_MAPS]; + unsigned mNumInitialMaps = 0; +#endif // XP_LINUX +}; + +static Replay replay; + +int main(int argc, const char* argv[]) { + size_t first_pid = 0; + FdReader reader(0); + + for (int i = 1; i < argc; i++) { + const char* option = argv[i]; + if (strcmp(option, "-s") == 0) { + // Do accounting to calculate allocation slop. + replay.enableSlopCalculation(); + } else if (strcmp(option, "-c") == 0) { + // Touch memory as we allocate it. + replay.enableMemset(); + } else { + fprintf(stderr, "Unknown command line option: %s\n", option); + return EXIT_FAILURE; + } + } + + /* Read log from stdin and dispatch function calls to the Replay instance. + * The log format is essentially: + * <pid> <tid> <function>([<args>])[=<result>] + * <args> is a comma separated list of arguments. + * + * The logs are expected to be preprocessed so that allocations are + * attributed a tracking slot. The input is trusted not to have crazy + * values for these slot numbers. + * + * <result>, as well as some of the args to some of the function calls are + * such slot numbers. + */ + while (true) { + Buffer line = reader.ReadLine(); + + if (!line) { + break; + } + + size_t pid = parseNumber(line.SplitChar(' ')); + if (!first_pid) { + first_pid = pid; + } + + /* The log may contain data for several processes, only entries for the + * very first that appears are treated. */ + if (first_pid != pid) { + continue; + } + + /* The log contains thread ids for manual analysis, but we just ignore them + * for now. */ + parseNumber(line.SplitChar(' ')); + + Buffer func = line.SplitChar('('); + Buffer args = line.SplitChar(')'); + + if (func == Buffer("jemalloc_stats")) { + replay.jemalloc_stats(args, line); + } else if (func == Buffer("free")) { + replay.free(args, line); + } else if (func == Buffer("malloc")) { + replay.malloc(args, line); + } else if (func == Buffer("posix_memalign")) { + replay.posix_memalign(args, line); + } else if (func == Buffer("aligned_alloc")) { + replay.aligned_alloc(args, line); + } else if (func == Buffer("calloc")) { + replay.calloc(args, line); + } else if (func == Buffer("realloc")) { + replay.realloc(args, line); + } else if (func == Buffer("memalign")) { + replay.memalign(args, line); + } else if (func == Buffer("valloc")) { + replay.valloc(args, line); + } else { + die("Malformed input"); + } + } + + return 0; +} diff --git a/memory/replace/logalloc/replay/expected_output_minimal.log b/memory/replace/logalloc/replay/expected_output_minimal.log new file mode 100644 index 0000000000..332fe20957 --- /dev/null +++ b/memory/replace/logalloc/replay/expected_output_minimal.log @@ -0,0 +1,17 @@ +1 1 jemalloc_stats() +1 1 malloc(42)=#1 +1 1 malloc(24)=#2 +1 1 free(#1) +1 1 memalign(4096,1024)=#1 +1 1 calloc(4,42)=#3 +1 1 free(#2) +1 1 realloc(#3,84)=#2 +1 1 memalign(256,1024)=#3 +1 1 memalign(512,1024)=#4 +1 1 memalign(4096,1024)=#5 +1 1 jemalloc_stats() +1 1 free(#5) +1 1 free(#4) +1 1 free(#3) +1 1 free(#2) +1 1 free(#1) diff --git a/memory/replace/logalloc/replay/logalloc_munge.py b/memory/replace/logalloc/replay/logalloc_munge.py new file mode 100644 index 0000000000..52d0032463 --- /dev/null +++ b/memory/replace/logalloc/replay/logalloc_munge.py @@ -0,0 +1,147 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +This script takes a log from the replace-malloc logalloc library on stdin +and munges it so that it can be used with the logalloc-replay tool. + +Given the following output: + 13663 malloc(42)=0x7f0c33502040 + 13663 malloc(24)=0x7f0c33503040 + 13663 free(0x7f0c33502040) +The resulting output is: + 1 malloc(42)=#1 + 1 malloc(24)=#2 + 1 free(#1) + +See README for more details. +""" + +import sys +from collections import defaultdict, deque + + +class IdMapping(object): + """Class to map values to ids. + + Each value is associated to an increasing id, starting from 1. + When a value is removed, its id is recycled and will be reused for + subsequent values. + """ + + def __init__(self): + self.id = 1 + self._values = {} + self._recycle = deque() + + def __getitem__(self, value): + if value not in self._values: + if self._recycle: + self._values[value] = self._recycle.popleft() + else: + self._values[value] = self.id + self.id += 1 + return self._values[value] + + def __delitem__(self, value): + if value == 0: + return + self._recycle.append(self._values[value]) + del self._values[value] + + def __contains__(self, value): + return value == 0 or value in self._values + + +class Ignored(Exception): + pass + + +def split_log_line(line): + try: + # The format for each line is: + # <pid> [<tid>] <function>([<args>])[=<result>] + # + # The original format didn't include the tid, so we try to parse + # lines whether they have one or not. + pid, func_call = line.split(" ", 1) + call, result = func_call.split(")") + func, args = call.split("(") + args = args.split(",") if args else [] + if result: + if result[0] != "=": + raise Ignored("Malformed input") + result = result[1:] + if " " in func: + tid, func = func.split(" ", 1) + else: + tid = pid + return pid, tid, func, args, result + except Exception: + raise Ignored("Malformed input") + + +NUM_ARGUMENTS = { + "jemalloc_stats": 0, + "free": 1, + "malloc": 1, + "posix_memalign": 2, + "aligned_alloc": 2, + "calloc": 2, + "realloc": 2, + "memalign": 2, + "valloc": 1, +} + + +def main(): + pids = IdMapping() + processes = defaultdict(lambda: {"pointers": IdMapping(), "tids": IdMapping()}) + for line in sys.stdin: + line = line.strip() + + try: + pid, tid, func, args, result = split_log_line(line) + + # Replace pid with an id. + pid = pids[int(pid)] + + process = processes[pid] + tid = process["tids"][int(tid)] + + pointers = process["pointers"] + + if func not in NUM_ARGUMENTS: + raise Ignored("Unknown function") + + if len(args) != NUM_ARGUMENTS[func]: + raise Ignored("Malformed input") + + if func in ("jemalloc_stats", "free") and result: + raise Ignored("Malformed input") + + if func in ("free", "realloc"): + ptr = int(args[0], 16) + if ptr and ptr not in pointers: + raise Ignored("Did not see an alloc for pointer") + args[0] = "#%d" % pointers[ptr] + del pointers[ptr] + + if result: + result = int(result, 16) + if not result: + raise Ignored("Result is NULL") + result = "#%d" % pointers[result] + + print( + "%d %d %s(%s)%s" + % (pid, tid, func, ",".join(args), "=%s" % result if result else "") + ) + + except Exception as e: + print('Ignored "%s": %s' % (line, e), file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/memory/replace/logalloc/replay/moz.build b/memory/replace/logalloc/replay/moz.build new file mode 100644 index 0000000000..4ebf48f842 --- /dev/null +++ b/memory/replace/logalloc/replay/moz.build @@ -0,0 +1,87 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Program("logalloc-replay") + +SOURCES += [ + "/mfbt/Assertions.cpp", + "/mfbt/Poison.cpp", + "/mfbt/RandomNum.cpp", + "/mfbt/TaggedAnonymousMemory.cpp", + "/mfbt/Unused.cpp", + "/mozglue/misc/StackWalk.cpp", + "Replay.cpp", +] + +if CONFIG["OS_TARGET"] == "WINNT": + SOURCES += [ + "/mozglue/misc/ProcessType.cpp", + ] + +if CONFIG["OS_TARGET"] == "Linux": + LDFLAGS += ["-static-libstdc++"] + +if CONFIG["OS_TARGET"] == "Darwin": + # Work around "warning: 'aligned_alloc' is only available on macOS 10.15 or newer" + # when building with MACOSX_DEPLOYMENT_TARGET < 10.15 with >= 10.15 SDK. + # We have our own definition of the function, so it doesn't matter what the SDK says. + SOURCES["Replay.cpp"].flags += ["-Wno-unguarded-availability-new"] + +if CONFIG["MOZ_REPLACE_MALLOC_STATIC"] and (CONFIG["MOZ_DMD"] or CONFIG["MOZ_PHC"]): + UNIFIED_SOURCES += [ + "/mfbt/HashFunctions.cpp", + "/mfbt/JSONWriter.cpp", + ] + +if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "advapi32", + "dbghelp", + ] + +if CONFIG["MOZ_LINKER"] and CONFIG["MOZ_WIDGET_TOOLKIT"] == "android": + LOCAL_INCLUDES += [ + "/mozglue/linker", + ] + DEFINES["__wrap_dladdr"] = "dladdr" + + +if CONFIG["MOZ_BUILD_APP"] == "memory": + EXPORTS.mozilla += [ + "/mozglue/misc/StackWalk.h", + ] + +if CONFIG["MOZ_BUILD_APP"] == "memory" or CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + UNIFIED_SOURCES += [ + "/mfbt/double-conversion/double-conversion/bignum-dtoa.cc", + "/mfbt/double-conversion/double-conversion/bignum.cc", + "/mfbt/double-conversion/double-conversion/cached-powers.cc", + "/mfbt/double-conversion/double-conversion/double-to-string.cc", + "/mfbt/double-conversion/double-conversion/fast-dtoa.cc", + "/mfbt/double-conversion/double-conversion/fixed-dtoa.cc", + "/mfbt/double-conversion/double-conversion/string-to-double.cc", + "/mfbt/double-conversion/double-conversion/strtod.cc", + "/mozglue/misc/Printf.cpp", + ] + +LOCAL_INCLUDES += [ + "/memory/build", +] + +# Link replace-malloc and the default allocator. +USE_LIBS += [ + "memory", +] + +# The memory library defines this, so it's needed here too. +DEFINES["IMPL_MFBT"] = True + +if CONFIG["MOZ_NEEDS_LIBATOMIC"]: + OS_LIBS += ["atomic"] + +DisableStlWrapping() + +include("/mozglue/build/replace_malloc.mozbuild") diff --git a/memory/replace/logalloc/replay/replay.log b/memory/replace/logalloc/replay/replay.log new file mode 100644 index 0000000000..f1e6de788b --- /dev/null +++ b/memory/replace/logalloc/replay/replay.log @@ -0,0 +1,18 @@ +1 1 jemalloc_stats() +1 1 malloc(42)=#1 +1 1 malloc(24)=#2 +2 2 malloc(42)=#1 +1 1 free(#1) +1 1 posix_memalign(4096,1024)=#1 +1 1 calloc(4,42)=#3 +1 1 free(#2) +1 1 realloc(#3,84)=#2 +1 1 aligned_alloc(256,1024)=#3 +1 1 memalign(512,1024)=#4 +1 1 valloc(1024)=#5 +1 1 jemalloc_stats() +1 1 free(#5) +1 1 free(#4) +1 1 free(#3) +1 1 free(#2) +1 1 free(#1) diff --git a/memory/replace/moz.build b/memory/replace/moz.build new file mode 100644 index 0000000000..7385faceb5 --- /dev/null +++ b/memory/replace/moz.build @@ -0,0 +1,22 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +@template +def ReplaceMalloc(name): + if CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + DEFINES["MOZ_REPLACE_MALLOC_PREFIX"] = name.replace("-", "_") + FINAL_LIBRARY = "memory" + else: + SharedLibrary(name) + + +DIRS += [ + "logalloc", +] + +if CONFIG["MOZ_DMD"]: + DIRS += ["dmd"] diff --git a/memory/replace/phc/moz.build b/memory/replace/phc/moz.build new file mode 100644 index 0000000000..e2d233642c --- /dev/null +++ b/memory/replace/phc/moz.build @@ -0,0 +1,50 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ReplaceMalloc("phc") + +DEFINES["MOZ_NO_MOZALLOC"] = True +DEFINES["IMPL_MFBT"] = True + +LOCAL_INCLUDES += [ + "../logalloc", + "/memory/build", +] + +EXPORTS += [ + "PHC.h", +] + +UNIFIED_SOURCES += [ + "PHC.cpp", +] + +if CONFIG["MOZ_BUILD_APP"] == "memory": + UNIFIED_SOURCES += [ + "/mfbt/double-conversion/double-conversion/bignum-dtoa.cc", + "/mfbt/double-conversion/double-conversion/bignum.cc", + "/mfbt/double-conversion/double-conversion/cached-powers.cc", + "/mfbt/double-conversion/double-conversion/double-to-string.cc", + "/mfbt/double-conversion/double-conversion/fast-dtoa.cc", + "/mfbt/double-conversion/double-conversion/fixed-dtoa.cc", + "/mfbt/double-conversion/double-conversion/string-to-double.cc", + "/mfbt/double-conversion/double-conversion/strtod.cc", + "/mozglue/misc/Printf.cpp", + ] + +if not CONFIG["MOZ_REPLACE_MALLOC_STATIC"]: + UNIFIED_SOURCES += [ + "../logalloc/FdPrintf.cpp", + "/mozglue/misc/StackWalk.cpp", + ] + if CONFIG["OS_ARCH"] == "WINNT": + OS_LIBS += [ + "dbghelp", + ] + +TEST_DIRS += ["test"] + +DisableStlWrapping() diff --git a/memory/replace/phc/test/moz.build b/memory/replace/phc/test/moz.build new file mode 100644 index 0000000000..8208ae849d --- /dev/null +++ b/memory/replace/phc/test/moz.build @@ -0,0 +1,9 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# The gtests won't work in a SpiderMonkey-only build. +if CONFIG["MOZ_WIDGET_TOOLKIT"]: + TEST_DIRS += ["gtest"] diff --git a/memory/volatile/VolatileBuffer.h b/memory/volatile/VolatileBuffer.h new file mode 100644 index 0000000000..badd7f3cec --- /dev/null +++ b/memory/volatile/VolatileBuffer.h @@ -0,0 +1,161 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozalloc_VolatileBuffer_h +#define mozalloc_VolatileBuffer_h + +#include "mozilla/mozalloc.h" +#include "mozilla/Mutex.h" +#include "mozilla/RefPtr.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/RefCounted.h" + +/* VolatileBuffer + * + * This class represents a piece of memory that can potentially be reclaimed + * by the OS when not in use. As long as there are one or more + * VolatileBufferPtrs holding on to a VolatileBuffer, the memory will remain + * available. However, when there are no VolatileBufferPtrs holding a + * VolatileBuffer, the OS can purge the pages if it wants to. The OS can make + * better decisions about what pages to purge than we can. + * + * VolatileBuffers may not always be volatile - if the allocation is too small, + * or if the OS doesn't support the feature, or if the OS doesn't want to, + * the buffer will be allocated on heap. + * + * VolatileBuffer allocations are fallible. They are intended for uses where + * one may allocate large buffers for caching data. Init() must be called + * exactly once. + * + * After getting a reference to VolatileBuffer using VolatileBufferPtr, + * WasPurged() can be used to check if the OS purged any pages in the buffer. + * The OS cannot purge a buffer immediately after a VolatileBuffer is + * initialized. At least one VolatileBufferPtr must be created before the + * buffer can be purged, so the first use of VolatileBufferPtr does not need + * to check WasPurged(). + * + * When a buffer is purged, some or all of the buffer is zeroed out. This + * API cannot tell which parts of the buffer were lost. + * + * VolatileBuffer and VolatileBufferPtr are threadsafe. + */ + +namespace mozilla { + +class VolatileBuffer { + friend class VolatileBufferPtr_base; + + public: + MOZ_DECLARE_REFCOUNTED_TYPENAME(VolatileBuffer) + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(VolatileBuffer) + + VolatileBuffer(); + + /* aAlignment must be a multiple of the pointer size */ + bool Init(size_t aSize, size_t aAlignment = sizeof(void*)); + + size_t HeapSizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const; + size_t NonHeapSizeOfExcludingThis() const; + bool OnHeap() const; + + protected: + bool Lock(void** aBuf); + void Unlock(); + + private: + ~VolatileBuffer(); + + /** + * Protects mLockCount, mFirstLock, and changes to the volatility of our + * buffer. Other member variables are read-only except in Init() and the + * destructor. + */ + Mutex mMutex MOZ_UNANNOTATED; + + void* mBuf; + size_t mSize; + int mLockCount; +#if defined(ANDROID) + int mFd; +#elif defined(XP_DARWIN) + bool mHeap; +#elif defined(XP_WIN) + bool mHeap; + bool mFirstLock; +#endif +}; + +class VolatileBufferPtr_base { + public: + explicit VolatileBufferPtr_base(VolatileBuffer* vbuf) + : mVBuf(vbuf), mMapping(nullptr), mPurged(false) { + Lock(); + } + + ~VolatileBufferPtr_base() { Unlock(); } + + bool WasBufferPurged() const { return mPurged; } + + protected: + RefPtr<VolatileBuffer> mVBuf; + void* mMapping; + + void Set(VolatileBuffer* vbuf) { + Unlock(); + mVBuf = vbuf; + Lock(); + } + + private: + bool mPurged; + + void Lock() { + if (mVBuf) { + mPurged = !mVBuf->Lock(&mMapping); + } else { + mMapping = nullptr; + mPurged = false; + } + } + + void Unlock() { + if (mVBuf) { + mVBuf->Unlock(); + } + } +}; + +template <class T> +class VolatileBufferPtr : public VolatileBufferPtr_base { + public: + explicit VolatileBufferPtr(VolatileBuffer* vbuf) + : VolatileBufferPtr_base(vbuf) {} + VolatileBufferPtr() : VolatileBufferPtr_base(nullptr) {} + + VolatileBufferPtr(VolatileBufferPtr&& aOther) + : VolatileBufferPtr_base(aOther.mVBuf) { + aOther.Set(nullptr); + } + + operator T*() const { return (T*)mMapping; } + + VolatileBufferPtr& operator=(VolatileBuffer* aVBuf) { + Set(aVBuf); + return *this; + } + + VolatileBufferPtr& operator=(VolatileBufferPtr&& aOther) { + MOZ_ASSERT(this != &aOther, "Self-moves are prohibited"); + Set(aOther.mVBuf); + aOther.Set(nullptr); + return *this; + } + + private: + VolatileBufferPtr(VolatileBufferPtr const& vbufptr) = delete; +}; + +} // namespace mozilla + +#endif /* mozalloc_VolatileBuffer_h */ diff --git a/memory/volatile/VolatileBufferAshmem.cpp b/memory/volatile/VolatileBufferAshmem.cpp new file mode 100644 index 0000000000..99a0d1307f --- /dev/null +++ b/memory/volatile/VolatileBufferAshmem.cpp @@ -0,0 +1,120 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "VolatileBuffer.h" +#include "mozilla/Assertions.h" +#include "mozilla/mozalloc.h" + +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "mozilla/Ashmem.h" + +#ifdef MOZ_MEMORY +extern "C" int posix_memalign(void** memptr, size_t alignment, size_t size); +#endif + +#define MIN_VOLATILE_ALLOC_SIZE 8192 + +namespace mozilla { + +VolatileBuffer::VolatileBuffer() + : mMutex("VolatileBuffer"), + mBuf(nullptr), + mSize(0), + mLockCount(0), + mFd(-1) {} + +bool VolatileBuffer::Init(size_t aSize, size_t aAlignment) { + MOZ_ASSERT(!mSize && !mBuf, "Init called twice"); + MOZ_ASSERT(!(aAlignment % sizeof(void*)), + "Alignment must be multiple of pointer size"); + + mSize = aSize; + if (aSize < MIN_VOLATILE_ALLOC_SIZE) { + goto heap_alloc; + } + + mFd = mozilla::android::ashmem_create(nullptr, mSize); + if (mFd < 0) { + goto heap_alloc; + } + + mBuf = mmap(nullptr, mSize, PROT_READ | PROT_WRITE, MAP_SHARED, mFd, 0); + if (mBuf != MAP_FAILED) { + return true; + } + +heap_alloc: + mBuf = nullptr; + if (mFd >= 0) { + close(mFd); + mFd = -1; + } + +#ifdef MOZ_MEMORY + posix_memalign(&mBuf, aAlignment, aSize); +#else + mBuf = memalign(aAlignment, aSize); +#endif + return !!mBuf; +} + +VolatileBuffer::~VolatileBuffer() { + MOZ_ASSERT(mLockCount == 0, "Being destroyed with non-zero lock count?"); + + if (OnHeap()) { + free(mBuf); + } else { + munmap(mBuf, mSize); + close(mFd); + } +} + +bool VolatileBuffer::Lock(void** aBuf) { + MutexAutoLock lock(mMutex); + + MOZ_ASSERT(mBuf, "Attempting to lock an uninitialized VolatileBuffer"); + + *aBuf = mBuf; + if (++mLockCount > 1 || OnHeap()) { + return true; + } + + // Zero offset and zero length means we want to pin/unpin the entire thing. + struct ashmem_pin pin = {0, 0}; + return ioctl(mFd, ASHMEM_PIN, &pin) == ASHMEM_NOT_PURGED; +} + +void VolatileBuffer::Unlock() { + MutexAutoLock lock(mMutex); + + MOZ_ASSERT(mLockCount > 0, "VolatileBuffer unlocked too many times!"); + if (--mLockCount || OnHeap()) { + return; + } + + struct ashmem_pin pin = {0, 0}; + ioctl(mFd, ASHMEM_UNPIN, &pin); +} + +bool VolatileBuffer::OnHeap() const { return mFd < 0; } + +size_t VolatileBuffer::HeapSizeOfExcludingThis( + MallocSizeOf aMallocSizeOf) const { + return OnHeap() ? aMallocSizeOf(mBuf) : 0; +} + +size_t VolatileBuffer::NonHeapSizeOfExcludingThis() const { + if (OnHeap()) { + return 0; + } + + return (mSize + (PAGE_SIZE - 1)) & PAGE_MASK; +} + +} // namespace mozilla diff --git a/memory/volatile/VolatileBufferFallback.cpp b/memory/volatile/VolatileBufferFallback.cpp new file mode 100644 index 0000000000..d4311788ff --- /dev/null +++ b/memory/volatile/VolatileBufferFallback.cpp @@ -0,0 +1,67 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "VolatileBuffer.h" +#include "mozilla/Assertions.h" +#include "mozilla/mozalloc.h" + +#ifdef MOZ_MEMORY +int posix_memalign(void** memptr, size_t alignment, size_t size); +#endif + +namespace mozilla { + +VolatileBuffer::VolatileBuffer() + : mMutex("VolatileBuffer"), mBuf(nullptr), mSize(0), mLockCount(0) {} + +bool VolatileBuffer::Init(size_t aSize, size_t aAlignment) { + MOZ_ASSERT(!mSize && !mBuf, "Init called twice"); + MOZ_ASSERT(!(aAlignment % sizeof(void*)), + "Alignment must be multiple of pointer size"); + + mSize = aSize; +#if defined(MOZ_MEMORY) || defined(HAVE_POSIX_MEMALIGN) + if (posix_memalign(&mBuf, aAlignment, aSize) != 0) { + return false; + } +#else +# error "No memalign implementation found" +#endif + return !!mBuf; +} + +VolatileBuffer::~VolatileBuffer() { + MOZ_ASSERT(mLockCount == 0, "Being destroyed with non-zero lock count?"); + + free(mBuf); +} + +bool VolatileBuffer::Lock(void** aBuf) { + MutexAutoLock lock(mMutex); + + MOZ_ASSERT(mBuf, "Attempting to lock an uninitialized VolatileBuffer"); + + *aBuf = mBuf; + mLockCount++; + + return true; +} + +void VolatileBuffer::Unlock() { + MutexAutoLock lock(mMutex); + + mLockCount--; + MOZ_ASSERT(mLockCount >= 0, "VolatileBuffer unlocked too many times!"); +} + +bool VolatileBuffer::OnHeap() const { return true; } + +size_t VolatileBuffer::HeapSizeOfExcludingThis( + MallocSizeOf aMallocSizeOf) const { + return aMallocSizeOf(mBuf); +} + +size_t VolatileBuffer::NonHeapSizeOfExcludingThis() const { return 0; } + +} // namespace mozilla diff --git a/memory/volatile/VolatileBufferOSX.cpp b/memory/volatile/VolatileBufferOSX.cpp new file mode 100644 index 0000000000..c6299d1db0 --- /dev/null +++ b/memory/volatile/VolatileBufferOSX.cpp @@ -0,0 +1,105 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "VolatileBuffer.h" +#include "mozilla/Assertions.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/mozalloc.h" + +#include <mach/mach.h> +#include <sys/mman.h> +#include <unistd.h> + +#define MIN_VOLATILE_ALLOC_SIZE 8192 + +namespace mozilla { + +VolatileBuffer::VolatileBuffer() + : mMutex("VolatileBuffer"), + mBuf(nullptr), + mSize(0), + mLockCount(0), + mHeap(false) {} + +bool VolatileBuffer::Init(size_t aSize, size_t aAlignment) { + MOZ_ASSERT(!mSize && !mBuf, "Init called twice"); + MOZ_ASSERT(!(aAlignment % sizeof(void*)), + "Alignment must be multiple of pointer size"); + + mSize = aSize; + + kern_return_t ret = 0; + if (aSize < MIN_VOLATILE_ALLOC_SIZE) { + goto heap_alloc; + } + + ret = vm_allocate(mach_task_self(), (vm_address_t*)&mBuf, mSize, + VM_FLAGS_PURGABLE | VM_FLAGS_ANYWHERE); + if (ret == KERN_SUCCESS) { + return true; + } + +heap_alloc: + (void)posix_memalign(&mBuf, aAlignment, aSize); + mHeap = true; + return !!mBuf; +} + +VolatileBuffer::~VolatileBuffer() { + MOZ_ASSERT(mLockCount == 0, "Being destroyed with non-zero lock count?"); + + if (OnHeap()) { + free(mBuf); + } else { + vm_deallocate(mach_task_self(), (vm_address_t)mBuf, mSize); + } +} + +bool VolatileBuffer::Lock(void** aBuf) { + MutexAutoLock lock(mMutex); + + MOZ_ASSERT(mBuf, "Attempting to lock an uninitialized VolatileBuffer"); + + *aBuf = mBuf; + if (++mLockCount > 1 || OnHeap()) { + return true; + } + + int state = VM_PURGABLE_NONVOLATILE; + kern_return_t ret = vm_purgable_control(mach_task_self(), (vm_address_t)mBuf, + VM_PURGABLE_SET_STATE, &state); + return ret == KERN_SUCCESS && !(state & VM_PURGABLE_EMPTY); +} + +void VolatileBuffer::Unlock() { + MutexAutoLock lock(mMutex); + + MOZ_ASSERT(mLockCount > 0, "VolatileBuffer unlocked too many times!"); + if (--mLockCount || OnHeap()) { + return; + } + + int state = VM_PURGABLE_VOLATILE | VM_VOLATILE_GROUP_DEFAULT; + DebugOnly<kern_return_t> ret = vm_purgable_control( + mach_task_self(), (vm_address_t)mBuf, VM_PURGABLE_SET_STATE, &state); + MOZ_ASSERT(ret == KERN_SUCCESS, "Failed to set buffer as purgable"); +} + +bool VolatileBuffer::OnHeap() const { return mHeap; } + +size_t VolatileBuffer::HeapSizeOfExcludingThis( + MallocSizeOf aMallocSizeOf) const { + return OnHeap() ? aMallocSizeOf(mBuf) : 0; +} + +size_t VolatileBuffer::NonHeapSizeOfExcludingThis() const { + if (OnHeap()) { + return 0; + } + + unsigned long pagemask = getpagesize() - 1; + return (mSize + pagemask) & ~pagemask; +} + +} // namespace mozilla diff --git a/memory/volatile/VolatileBufferWindows.cpp b/memory/volatile/VolatileBufferWindows.cpp new file mode 100644 index 0000000000..fbd4eec07a --- /dev/null +++ b/memory/volatile/VolatileBufferWindows.cpp @@ -0,0 +1,130 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "VolatileBuffer.h" +#include "mozilla/Assertions.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/mozalloc.h" + +#include <windows.h> + +#ifdef MOZ_MEMORY +extern "C" int posix_memalign(void** memptr, size_t alignment, size_t size); +#endif + +#ifndef MEM_RESET_UNDO +# define MEM_RESET_UNDO 0x1000000 +#endif + +#define MIN_VOLATILE_ALLOC_SIZE 8192 + +namespace mozilla { + +VolatileBuffer::VolatileBuffer() + : mMutex("VolatileBuffer"), + mBuf(nullptr), + mSize(0), + mLockCount(0), + mHeap(false), + mFirstLock(true) {} + +bool VolatileBuffer::Init(size_t aSize, size_t aAlignment) { + MOZ_ASSERT(!mSize && !mBuf, "Init called twice"); + MOZ_ASSERT(!(aAlignment % sizeof(void*)), + "Alignment must be multiple of pointer size"); + + mSize = aSize; + if (aSize < MIN_VOLATILE_ALLOC_SIZE) { + goto heap_alloc; + } + + mBuf = VirtualAllocEx(GetCurrentProcess(), nullptr, mSize, + MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (mBuf) { + return true; + } + +heap_alloc: +#ifdef MOZ_MEMORY + posix_memalign(&mBuf, aAlignment, aSize); +#else + mBuf = _aligned_malloc(aSize, aAlignment); +#endif + mHeap = true; + return !!mBuf; +} + +VolatileBuffer::~VolatileBuffer() { + MOZ_ASSERT(mLockCount == 0, "Being destroyed with non-zero lock count?"); + + if (OnHeap()) { +#ifdef MOZ_MEMORY + free(mBuf); +#else + _aligned_free(mBuf); +#endif + } else { + VirtualFreeEx(GetCurrentProcess(), mBuf, 0, MEM_RELEASE); + } +} + +bool VolatileBuffer::Lock(void** aBuf) { + MutexAutoLock lock(mMutex); + + MOZ_ASSERT(mBuf, "Attempting to lock an uninitialized VolatileBuffer"); + + *aBuf = mBuf; + if (++mLockCount > 1 || OnHeap()) { + return true; + } + + // MEM_RESET_UNDO's behavior is undefined when called on memory that + // hasn't been MEM_RESET. + if (mFirstLock) { + mFirstLock = false; + return true; + } + + void* addr = VirtualAllocEx(GetCurrentProcess(), mBuf, mSize, MEM_RESET_UNDO, + PAGE_READWRITE); + return !!addr; +} + +void VolatileBuffer::Unlock() { + MutexAutoLock lock(mMutex); + + MOZ_ASSERT(mLockCount > 0, "VolatileBuffer unlocked too many times!"); + if (--mLockCount || OnHeap()) { + return; + } + + DebugOnly<void*> addr = VirtualAllocEx(GetCurrentProcess(), mBuf, mSize, + MEM_RESET, PAGE_READWRITE); + MOZ_ASSERT(addr, "Failed to MEM_RESET"); +} + +bool VolatileBuffer::OnHeap() const { return mHeap; } + +size_t VolatileBuffer::HeapSizeOfExcludingThis( + MallocSizeOf aMallocSizeOf) const { + if (OnHeap()) { +#ifdef MOZ_MEMORY + return aMallocSizeOf(mBuf); +#else + return mSize; +#endif + } + + return 0; +} + +size_t VolatileBuffer::NonHeapSizeOfExcludingThis() const { + if (OnHeap()) { + return 0; + } + + return (mSize + 4095) & ~4095; +} + +} // namespace mozilla diff --git a/memory/volatile/moz.build b/memory/volatile/moz.build new file mode 100644 index 0000000000..9cfe43e5d0 --- /dev/null +++ b/memory/volatile/moz.build @@ -0,0 +1,31 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +NoVisibilityFlags() + +EXPORTS.mozilla += [ + "VolatileBuffer.h", +] + +if CONFIG["OS_TARGET"] == "Android": + UNIFIED_SOURCES += [ + "VolatileBufferAshmem.cpp", + ] +elif CONFIG["OS_TARGET"] == "Darwin": + UNIFIED_SOURCES += [ + "VolatileBufferOSX.cpp", + ] +elif CONFIG["OS_TARGET"] == "WINNT": + UNIFIED_SOURCES += [ + "VolatileBufferWindows.cpp", + ] +else: + UNIFIED_SOURCES += [ + "VolatileBufferFallback.cpp", + ] + +FINAL_LIBRARY = "xul" + +TEST_DIRS += ["tests"] diff --git a/memory/volatile/tests/TestVolatileBuffer.cpp b/memory/volatile/tests/TestVolatileBuffer.cpp new file mode 100644 index 0000000000..e9e9699ec6 --- /dev/null +++ b/memory/volatile/tests/TestVolatileBuffer.cpp @@ -0,0 +1,100 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "mozilla/VolatileBuffer.h" +#include <string.h> + +#if defined(ANDROID) +# include <fcntl.h> +# include <linux/ashmem.h> +# include <sys/ioctl.h> +# include <sys/stat.h> +# include <sys/types.h> +#elif defined(XP_DARWIN) +# include <mach/mach.h> +#endif + +using namespace mozilla; + +TEST(VolatileBufferTest, HeapVolatileBuffersWork) +{ + RefPtr<VolatileBuffer> heapbuf = new VolatileBuffer(); + + ASSERT_TRUE(heapbuf) + << "Failed to create VolatileBuffer"; + ASSERT_TRUE(heapbuf->Init(512)) + << "Failed to initialize VolatileBuffer"; + + VolatileBufferPtr<char> ptr(heapbuf); + + EXPECT_FALSE(ptr.WasBufferPurged()) + << "Buffer should not be purged immediately after initialization"; + EXPECT_TRUE(ptr) << "Couldn't get pointer from VolatileBufferPtr"; +} + +TEST(VolatileBufferTest, RealVolatileBuffersWork) +{ + RefPtr<VolatileBuffer> buf = new VolatileBuffer(); + + ASSERT_TRUE(buf) + << "Failed to create VolatileBuffer"; + ASSERT_TRUE(buf->Init(16384)) + << "Failed to initialize VolatileBuffer"; + + const char teststr[] = "foobar"; + + { + VolatileBufferPtr<char> ptr(buf); + + EXPECT_FALSE(ptr.WasBufferPurged()) + << "Buffer should not be purged immediately after initialization"; + EXPECT_TRUE(ptr) << "Couldn't get pointer from VolatileBufferPtr"; + + { + VolatileBufferPtr<char> ptr2(buf); + + EXPECT_FALSE(ptr.WasBufferPurged()) + << "Failed to lock buffer again while currently locked"; + ASSERT_TRUE(ptr2) + << "Didn't get a pointer on the second lock"; + + strcpy(ptr2, teststr); + } + } + + { + VolatileBufferPtr<char> ptr(buf); + + EXPECT_FALSE(ptr.WasBufferPurged()) + << "Buffer was immediately purged after unlock"; + EXPECT_STREQ(ptr, teststr) << "Buffer failed to retain data after unlock"; + } + + // Test purging if we know how to +#if defined(XP_DARWIN) + int state; + vm_purgable_control(mach_task_self(), (vm_address_t)NULL, + VM_PURGABLE_PURGE_ALL, &state); +#else + return; +#endif + + EXPECT_GT(buf->NonHeapSizeOfExcludingThis(), 0ul) + << "Buffer should not be allocated on heap"; + + { + VolatileBufferPtr<char> ptr(buf); + + EXPECT_TRUE(ptr.WasBufferPurged()) + << "Buffer should not be unpurged after forced purge"; + EXPECT_STRNE(ptr, teststr) << "Purge did not actually purge data"; + } + + { + VolatileBufferPtr<char> ptr(buf); + + EXPECT_FALSE(ptr.WasBufferPurged()) << "Buffer still purged after lock"; + } +} diff --git a/memory/volatile/tests/moz.build b/memory/volatile/tests/moz.build new file mode 100644 index 0000000000..31201db9d4 --- /dev/null +++ b/memory/volatile/tests/moz.build @@ -0,0 +1,11 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES = [ + "TestVolatileBuffer.cpp", +] + +FINAL_LIBRARY = "xul-gtest" |